I have a multi-threaded program and I want to have each thread read from the same file and have the same data
I know that from pread(2) it is thread safe; however, I'm concerned about speed and not memory.
Will it be faster to have multiple threads read from the same file descriptor using pread or would it be faster for each individual thread to have its own file descriptor to the same file and read from there?
My thought is, if pread is atomic, then that prevents another thread from reading at the same time; however, if having lots of threads each with their own fd and the OS needs to service each read, then all that context switching might take up more time.
Having the same requirement I did a test for this. According to the test on a SSD and a HDD,
pread - improves the read speed, but if same FD is used for the writing, write becomes slow.
read (with a separate FD) - read operations are slower compared to pread. But it does NOT impact write operations (done using a separate FD).
So best option is to,
Use a separate FD for writing (Write Only mode)
Open a single FD (Read only mode) and use it across multiple threads using pread
results on a SSD (read/write count 10,000,000)
-
pread (same FD used for write and read)
read using a different FD
Read
25 sec +
27 sec +
Write
38 sec +
33 sec+
Code used for the test
#include <string.h>
#include <iostream>
#include <unistd.h>
#include <sstream>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <errno.h>
#include <assert.h>
#include <sys/time.h>
#include <memory>
#include <atomic>
#include <thread>
off_t tell(int fd)
{
return lseek(fd, 0, SEEK_CUR);
}
inline int64_t getMicroSecDiff(const timeval& begin, const timeval& end)
{
constexpr uint32_t MicroSecPerSec = 1000 * 1000;
return (end.tv_sec - begin.tv_sec) * MicroSecPerSec + (end.tv_usec - begin.tv_usec);
}
inline int64_t getMicroSecDiffFrom(const timeval& begin)
{
timeval end;
gettimeofday(&end, nullptr);
return getMicroSecDiff(begin, end);
}
using namespace std;
int g_fileFd_A = -1;
int g_fileFd_B = -1;
std::atomic<int64_t> g_lastFileAPos = { -1 };
std::atomic<int64_t> g_lastFileBPos = { -1 };
std::atomic<bool> g_writeCompleted = { false };
const int WriteSz = 1;
uint64_t g_writeCount = 0;
void writer(int fdA, int fdB)
{
timeval begin;
gettimeofday(&begin, nullptr);
for (size_t i = 0; i < g_writeCount; i++)
{
int64_t fileAPosition = tell(fdA);
int64_t fileBPosition = tell(fdB);
auto written = write(fdA, "A", WriteSz);
assert(written == WriteSz);
written = write(fdB, "b", WriteSz);
assert(written == WriteSz);
g_lastFileAPos.store(fileAPosition, std::memory_order_relaxed);
g_lastFileBPos.store(fileBPosition, std::memory_order_relaxed);
}
g_writeCompleted = true;
auto diff = getMicroSecDiffFrom(begin);
std::ostringstream oss;
oss << "Write time: " << diff / 1000000 << " sec " << diff % 1000000 << " us" << endl;
cout << oss.str();
}
void reader(bool duplicateFds)
{
char bufferA[8];
char bufferB[8];
int failedCount = 0;
int fdA, fdB;
if (duplicateFds)
{
fdA = open("./A", O_RDONLY);
fdB = open("./B", O_RDONLY);
}
else
{
fdA = g_fileFd_A;
fdB = g_fileFd_B;
}
timeval begin;
gettimeofday(&begin, nullptr);
size_t iteNo = 0;
//for (; g_writeCompleted.load(memory_order_relaxed) == false; iteNo++)
for (; iteNo < g_writeCount; iteNo++)
{
off_t posA = g_lastFileAPos.load(std::memory_order_relaxed);
off_t posB = g_lastFileBPos.load(std::memory_order_relaxed);
if (posA < 0 or posB < 0)
{
iteNo--;
gettimeofday(&begin, nullptr);
continue;
}
int readSzA;
int readSzB;
off_t readPosA = rand();
off_t readPosB = rand();
if (readPosA > posA)
{
readPosA = posA;
}
if (readPosB > posB)
{
readPosB = posB;
}
if (duplicateFds)
{
lseek(fdA, readPosA, SEEK_SET);
lseek(fdB, readPosB, SEEK_SET);
readSzA = read(fdA, bufferA, WriteSz);
readSzB = read(fdB, bufferB, WriteSz);
}
else
{
readSzA = pread(fdA, bufferA, WriteSz, readPosA);
readSzB = pread(fdB, bufferB, WriteSz, readPosB);
}
if (readSzA < WriteSz or readSzB < WriteSz)
{
failedCount++;
if (failedCount % 1000 == 0)
{
cout << " " << failedCount;
cout.flush();
}
}
}
auto diff = getMicroSecDiffFrom(begin);
std::ostringstream oss;
oss << "Read time: " << diff / 1000000 << " sec " << diff % 1000000 << " us" << " ReadCount=" << iteNo << endl;
oss << "failedCount=" << failedCount << endl;
cout << oss.str();
if (duplicateFds)
{
close(fdA);
close(fdB);
}
}
int main (int argc, char** argv)
{
if (argc < 3)
{
cout << "usage: " << argv[0] << " duplicateFds" << " write-count" << endl;
return 0;
}
bool duplicateFds = atoi(argv[1]);
g_writeCount = atoi(argv[2]);
cout << "duplicateFds=" << duplicateFds << " write-count=" << g_writeCount << endl;
if (duplicateFds)
{
g_fileFd_A = open("./A", O_WRONLY | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
g_fileFd_B = open("./B", O_WRONLY | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
}
else
{
g_fileFd_A = open("./A", O_RDWR | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
g_fileFd_B = open("./B", O_RDWR | O_CREAT, S_IREAD | S_IWRITE | S_IRGRP | S_IROTH);
}
std::thread readerThread1(reader, duplicateFds);
std::thread readerThread2(reader, duplicateFds);
std::thread writerThread(writer, g_fileFd_A, g_fileFd_B);
readerThread1.join();
readerThread2.join();
writerThread.join();
close(g_fileFd_A);
close(g_fileFd_B);
return 0;
}
Related
I'm writing a Linux KVM hypervisor for x86 16-bit guests running in real mode. When doing interrupt calls (int ... instruction), I've encountered the KVM_INTERNAL_ERROR_SIMUL_EX error on Linux kernel 3.13.0. The same code is running fine on Linux kernel 3.16.0. Am I missing something? Is there a workaround I can add to my code to make it work with Linux kernel 3.13.0 (and possibly earlier)?
The test guest calls int 0x18 ... int 0x4f, all of which is handled in the hypervisor (C code after KVM_RUN has returned). When it's working correctly, all of the interrupt calls work. On Linux kernel 3.13.0, int 0x21 starts failing (and then int 0x22, int 0x23 and int 0x24 would also fail).
I was trying to write the shortest example C code to demonstrate the problem, here it is:
/* Based on: https://gist.github.com/zserge/d68683f17c68709818f8baab0ded2d15
* Based on: https://gist.githubusercontent.com/zserge/d68683f17c68709818f8baab0ded2d15/raw/b79033254b092ec9121bb891938b27dd128030d7/kvm-host-simple.c
*
* Compile: gcc -ansi -pedantic -s -O2 -W -Wall -o kvm16 kvm16.c && ./kvm16
*
* Expected correct output (e.g. on Linux 3.16.0 compiled for i386 (i686)):
*
* ...
* info: int 0x4f iret to: ...
* info: success, exiting
*
* Failure output (e.g. on Linux 3.13.0 compiled for amd64 (x86_64)):
*
* info: int 0x20 iret to: cs=0x0070 ip=0x0013
* fatal: KVM internal error suberror=2
*
* // Encounter unexpected simultaneous exceptions.
* #define KVM_INTERNAL_ERROR_SIMUL_EX 2
*/
#define _GNU_SOURCE
#include <fcntl.h>
#include <linux/kvm.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#define INT_NUM 0x21 /* Also works for INT_NUM == 0x20. */
int main(int argc, char *argv[]) {
int kvm_fd, vm_fd, vcpu_fd;
void *mem;
struct kvm_userspace_memory_region region;
struct kvm_run *run;
struct kvm_regs regs;
struct kvm_sregs sregs;
(void)argc; (void)argv;
if ((kvm_fd = open("/dev/kvm", O_RDWR)) < 0) {
perror("failed to open /dev/kvm");
return 1;
}
if ((vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0)) < 0) {
perror("failed to create vm");
return 1;
}
if ((mem = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0)) == NULL) {
perror("mmap");
return 1;
}
memset(®ion, 0, sizeof(region));
region.slot = 0;
region.guest_phys_addr = 0;
region.memory_size = 0x1000;
region.userspace_addr = (uintptr_t)mem;
if (ioctl(vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion) < 0) {
perror("KVM_SET_USER_MEMORY_REGION");
return 1;
}
{ /* 8086 real mode machine code. */
char *p = (char*)mem + 0x700;
unsigned int_num;
for (int_num = 0; int_num < 0x100; ++int_num) {
*(unsigned short*)((char*)mem + int_num * 4) = int_num; /* Interrupt vector INT_NUM offset := INT_NUM. */
*(unsigned short*)((char*)mem + int_num * 4 + 2) = 0x54; /* Interrupt vector INT_NUM segment := 0x54. */
}
*p++ = (char)0xf4; /* hlt. */
for (int_num = 0x18; int_num < 0x50; ++int_num) {
*p++ = (char)0xcd; /* int int_num. */
*p++ = (char)int_num;
}
*p++ = (char)0xf4;
}
memset((char*)mem + 0x540, '\xf4', 0x100); /* 256 times hlt. Interrupt vectors point here. */
if ((vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0)) < 0) {
perror("KVM_CREATE_VCPU");
return 1;
}
{
int kvm_run_mmap_size = ioctl(kvm_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
if (kvm_run_mmap_size < 0) {
perror("KVM_GET_VCPU_MMAP_SIZE");
return 1;
}
run = (struct kvm_run *)mmap(
NULL, kvm_run_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu_fd, 0);
if (run == NULL) {
perror("mmap kvm_run");
return 1;
}
}
memset(®s, '\0', sizeof(regs));
if (ioctl(vcpu_fd, KVM_GET_SREGS, &sregs) < 0) {
perror("KVM_GET_SREGS");
return 1;
}
{
int fd = open("kvm16.sregs", O_CREAT | O_WRONLY | O_TRUNC, 0644);
if (fd < 0) {
perror("open");
return 1;
}
if (write(fd, &sregs, sizeof(sregs)) != sizeof(sregs)) {
perror("write");
return 1;
}
if (close(fd) != 0) {
perror("close");
return 1;
}
}
sregs.cs.base = (sregs.cs.selector = 0x70) << 4;
sregs.ds.base = (sregs.ds.selector = sregs.cs.selector) << 4;
sregs.es.base = (sregs.es.selector = sregs.cs.selector) << 4;
sregs.ss.base = (sregs.ss.selector = sregs.cs.selector) << 4;
if (ioctl(vcpu_fd, KVM_GET_REGS, ®s) < 0) {
perror("KVM_GET_REGS");
return 1;
}
regs.rflags = 1 << 1; /* Reserved bit in EFLAGS. Even needed after KVM_GET_REGS. */
regs.rip = 0;
regs.rsp = 0x1000 - 0x700;
if (ioctl(vcpu_fd, KVM_SET_SREGS, &sregs) < 0) {
perror("KVM_SET_SREGS");
return 1;
}
if (ioctl(vcpu_fd, KVM_SET_REGS, ®s) < 0) {
perror("KVM_SET_REGS");
return 1;
}
for (;;) {
int ret = ioctl(vcpu_fd, KVM_RUN, 0);
unsigned short cs, ip;
if (ret < 0) {
perror("KVM_RUN");
return 1;
}
if (ioctl(vcpu_fd, KVM_GET_SREGS, &sregs) < 0) {
perror("KVM_GET_SREGS");
return 1;
}
if (ioctl(vcpu_fd, KVM_GET_REGS, ®s) < 0) {
perror("KVM_GET_REGS");
return 1;
}
cs = sregs.cs.selector;
ip = regs.rip;
if (run->exit_reason == KVM_EXIT_HLT) {
fprintf(stderr, "info: hlt: cs=0x%04x ip=0x%04x\n", cs, ip - 1);
if (cs == 0x70) {
if (ip != 0 + 1) {
fprintf(stderr, "info: success, exiting\n");
return 0; /* EXIT_SUCCESS after the second `hlt' in the code. */
}
} else if (cs == 0x54) { /* Simulate iret. */
const char *csip_ptr = (const char*)mem + ((unsigned short)sregs.ss.selector << 4) + (unsigned short)regs.rsp;
const unsigned short int_ip = ((const unsigned short*)csip_ptr)[0];
const unsigned short int_cs = ((const unsigned short*)csip_ptr)[1];
const unsigned short int_flags = ((const unsigned short*)csip_ptr)[2];
fprintf(stderr, "info: int 0x%02x iret to: cs=0x%04x ip=0x%04x\n", ip - 1, int_cs, int_ip);
sregs.cs.base = (sregs.cs.selector = int_cs) << 4;
regs.rip = int_ip;
if (int_flags & (1 << 9)) regs.rflags |= (1 << 9); /* Set IF back to 1 if it was 1. */
regs.rsp += 6; /* pop ip, pop cs, popfw . */
if (ioctl(vcpu_fd, KVM_SET_SREGS, &sregs) < 0) {
perror("KVM_SET_SREGS");
return 1;
}
if (ioctl(vcpu_fd, KVM_SET_REGS, ®s) < 0) {
perror("KVM_SET_REGS");
return 1;
}
} else {
fprintf(stderr, "fatal: unexpected hlt: cs=0x%04x ip=0x%04x\n", cs, ip - 1);
return 5;
}
} else if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR) {
fprintf(stderr, "fatal: KVM internal error suberror=%d\n", (unsigned)run->internal.suberror);
return 4;
} else {
fprintf(stderr, "fatal: unexpected KVM exit: exit_reason=%d cs=0x%04x ip=0x%04x\n", run->exit_reason, cs, ip);
return 2;
}
}
}
I am am trying to run some analytics on PMU hardware/cache events on my Pi using perf_event.h. I am getting an error whenever I am attempting to add more than 7 events to an event group. There are several things I don't understand.
The main questions are as follows:
Is there a limit to the number of events that can be multiplexed on the Pi (ARM cortex A53 I believe)? Like I said I am failing at 8 and that seems low.
When multiplexing events, does perf_event.h detect the number of PMU counters and utilize all of them?
If there are limits on number of events in event groups, can I access this information using perf_event.h capabilities?
Do you know of any good resources to help me better understand the functionality of perf_event.h?
I am going to include the entire file I am attempting to run because I am new to this and I am not sure which part will be significant. The macros N and M at the top of the code are the number of hw events and hw cache events to use respectively. The error occurs whenever N+M > 7
I believe this code will run on any Linux system (stress package is used apt-get install stress)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/unistd.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
#define N 0 // number of hw events to monitor
#define M 8 // number of hw events to monitor
int global_sigchld_trip = 0; // catch end child
void sighandler(int);
// function to add perf event to event list
// based on example from:
// http://web.eece.maine.edu/~vweaver/projects/perf_events/perf_event_open.html
static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event,
pid, cpu, group_fd, flags);
return ret;
}
// no function for seeing if we can monitor kernel events
int no_function(int seconds)
{
sleep(seconds);
printf("\n\n Start stress \n\n");
system("stress -c 4 -t 10");
return 0;
}
int
main(int argc, char **argv){
signal(SIGCHLD, sighandler);
int num_hw_events = N;
uint pe_hw[7] = {
PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS,
PERF_COUNT_HW_CACHE_REFERENCES,
PERF_COUNT_HW_CACHE_MISSES,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_MISSES,
PERF_COUNT_HW_BUS_CYCLES,
};
char hw_name_arr[7][50] = {
"PERF_COUNT_HW_CPU_CYCLES",
"PERF_COUNT_HW_INSTRUCTIONS",
"PERF_COUNT_HW_CACHE_REFERENCES",
"PERF_COUNT_HW_CACHE_MISSES",
"PERF_COUNT_HW_BRANCH_INSTRUCTIONS",
"PERF_COUNT_HW_BRANCH_MISSES",
"PERF_COUNT_HW_BUS_CYCLES",
};
// cache events
int num_hw_cache_events = M;
uint pe_hw_cache[12] = {
(PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), //
(PERF_COUNT_HW_CACHE_L1I) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
(PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
(PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
(PERF_COUNT_HW_CACHE_L1D) | (PERF_COUNT_HW_CACHE_OP_WRITE<< 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), //
(PERF_COUNT_HW_CACHE_LL) | (PERF_COUNT_HW_CACHE_OP_WRITE<< 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
(PERF_COUNT_HW_CACHE_BPU) | (PERF_COUNT_HW_CACHE_OP_WRITE<< 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
};
char hw_cache_name_arr[12][50] = {
"PERF_COUNT_HW_CACHE_L1D_read_miss",
"PERF_COUNT_HW_CACHE_L1I_read_miss",
"PERF_COUNT_HW_CACHE_LL_read_miss",
"PERF_COUNT_HW_CACHE_BPU_read_miss",
"PERF_COUNT_HW_CACHE_L1D_write_miss",
"PERF_COUNT_HW_CACHE_LL_write_miss",
"PERF_COUNT_HW_CACHE_BPU_write_miss",
};
struct perf_event_attr pat_arr[num_hw_events+num_hw_cache_events];
long long counts[num_hw_events+num_hw_cache_events];
int fd_arr[num_hw_events+num_hw_cache_events];
// initialize hw events
for(int i=0; i<num_hw_events; i++)
{
memset(&pat_arr[i], 0, sizeof(struct perf_event_attr));
pat_arr[i].type = PERF_TYPE_HARDWARE;
pat_arr[i].size = sizeof(struct perf_event_attr);
pat_arr[i].config = pe_hw[i];
if(i==0){pat_arr[i].disabled = 1;}
else{pat_arr[i].disabled = 0;}
pat_arr[i].exclude_kernel = 1;
pat_arr[i].exclude_hv = 1;
pat_arr[i].inherit = 1;
if(i==0){fd_arr[i] = perf_event_open(&pat_arr[i],0,-1,-1,0);}
else{fd_arr[i] = perf_event_open(&pat_arr[i],0,-1,fd_arr[0],0);}
if (fd_arr[i] == -1){
fprintf(stderr, "Error opening leader %llx\n", pat_arr[i].config);
exit(EXIT_FAILURE);
}
printf("FD%d: %d \t ITEM: %s\n",i,fd_arr[i], hw_name_arr[i]);
}
// initialize hw cache events
for(int i=0; i<num_hw_cache_events; i++)
{
memset(&pat_arr[i+num_hw_events], 0, sizeof(struct perf_event_attr));
pat_arr[i+num_hw_events].type = PERF_TYPE_HW_CACHE;
pat_arr[i+num_hw_events].size = sizeof(struct perf_event_attr);
pat_arr[i+num_hw_events].config = pe_hw_cache[i];
if(i+num_hw_events==0){printf("dis=1");pat_arr[i+num_hw_events].disabled = 1;}
else{printf("dis=0");pat_arr[i+num_hw_events].disabled = 0;}
pat_arr[i+num_hw_events].exclude_kernel = 1;
pat_arr[i+num_hw_events].exclude_hv = 1;
pat_arr[i+num_hw_events].inherit = 1;
if(i+num_hw_events==0){fd_arr[i+num_hw_events] = perf_event_open(&pat_arr[i+num_hw_events],0,-1,-1,0);}
else{fd_arr[i+num_hw_events] = perf_event_open(&pat_arr[i+num_hw_events],0,-1,fd_arr[0],0);}
if (fd_arr[i+num_hw_events] == -1){
printf("\ni: %d\nnhe:%d\n",i,num_hw_events);
fprintf(stderr, "Error opening leader %llx\n", pat_arr[i+num_hw_events].config);
exit(EXIT_FAILURE);
}
printf("FD%d: %d \t ITEM: %s\n",i+num_hw_events,fd_arr[i+num_hw_events], hw_cache_name_arr[i]);
}
// reset and enable counters
for(int i=0; i<num_hw_events+num_hw_cache_events; i++){
ioctl(fd_arr[i], PERF_EVENT_IOC_RESET,0);
ioctl(fd_arr[i], PERF_EVENT_IOC_ENABLE,0);
}
/////////////////// ACTION ///////////////////////////
/*-------- CHILD PROCESS BEING REDORDED -------*/
printf("\nSHOULD FORK RIGHTE HERE\n");
pid_t proc = fork();
if(proc==0){
printf("entered child process\n");
int no_sleep = 3;
no_function(no_sleep);
//x = silly_events(loop);
printf("exiting child process\n");
return 0;
}
/*-------- ACTION TAKEN DURRING REDORDING-------*/
else{
while(!global_sigchld_trip){
sleep(1);
for(int i=0;i<num_hw_events+num_hw_cache_events;i++){read(fd_arr[i], &counts[i], sizeof(long long));}
for(int i=0;i<num_hw_events+num_hw_cache_events;i++){ioctl(fd_arr[i], PERF_EVENT_IOC_RESET,0);}
for(int i=0;i<num_hw_events;i++){printf("%lld %s\t\n", counts[i], hw_name_arr[i]);}
printf("--------------------------------------\n");
for(int i=0;i<num_hw_cache_events;i++){printf("%lld %s\t\n", counts[i+num_hw_events], hw_cache_name_arr[i]);}
printf("\n\n");
}
}
for(int i=0;i<num_hw_events+num_hw_cache_events;i++){ioctl(fd_arr[i], PERF_EVENT_IOC_DISABLE,0);}
for(int i=0;i<num_hw_events+num_hw_cache_events;i++){read(fd_arr[i], &counts[i], sizeof(long long));}
for(int i=0;i<num_hw_events;i++){printf("Used %lld %s\t", counts[i], hw_name_arr[i]);}
for(int i=0;i<num_hw_cache_events;i++){printf("Used %lld %s\t", counts[i+num_hw_events], hw_cache_name_arr[i]);}
for(int i=0;i<num_hw_events;i++){close(fd_arr[i]);}
return 0;
}
void sighandler(int signum) {
printf("Caught signal %d, coming out...\n", signum);
global_sigchld_trip = 1;
}
I need to write RTSP steram from IP-cam to file. I use FFMPEG to do this. I found code example on C++, but i need to use only C. Can anyone help me?
i have problems in file operations. how can i write stream to file?
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sstream>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libswscale/swscale.h>
}
int main(int argc, char** argv) {
// Open the initial context variables that are needed
SwsContext *img_convert_ctx;
AVFormatContext* format_ctx = avformat_alloc_context();
AVCodecContext* codec_ctx = NULL;
int video_stream_index;
// Register everything
av_register_all();
avformat_network_init();
//open RTSP
if (avformat_open_input(&format_ctx, "rtsp://134.169.178.187:8554/h264.3gp",
NULL, NULL) != 0) {
return EXIT_FAILURE;
}
if (avformat_find_stream_info(format_ctx, NULL) < 0) {
return EXIT_FAILURE;
}
//search video stream
for (int i = 0; i < format_ctx->nb_streams; i++) {
if (format_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
video_stream_index = i;
}
AVPacket packet;
av_init_packet(&packet);
//open output file
AVFormatContext* output_ctx = avformat_alloc_context();
AVStream* stream = NULL;
int cnt = 0;
//start reading packets from stream and write them to file
av_read_play(format_ctx); //play RTSP
// Get the codec
AVCodec *codec = NULL;
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
exit(1);
}
// Add this to allocate the context by codec
codec_ctx = avcodec_alloc_context3(codec);
avcodec_get_context_defaults3(codec_ctx, codec);
avcodec_copy_context(codec_ctx, format_ctx->streams[video_stream_index]->codec);
std::ofstream output_file;
if (avcodec_open2(codec_ctx, codec, NULL) < 0)
exit(1);
img_convert_ctx = sws_getContext(codec_ctx->width, codec_ctx->height,
codec_ctx->pix_fmt, codec_ctx->width, codec_ctx->height, AV_PIX_FMT_RGB24,
SWS_BICUBIC, NULL, NULL, NULL);
int size = avpicture_get_size(AV_PIX_FMT_YUV420P, codec_ctx->width,
codec_ctx->height);
uint8_t* picture_buffer = (uint8_t*) (av_malloc(size));
AVFrame* picture = av_frame_alloc();
AVFrame* picture_rgb = av_frame_alloc();
int size2 = avpicture_get_size(AV_PIX_FMT_RGB24, codec_ctx->width,
codec_ctx->height);
uint8_t* picture_buffer_2 = (uint8_t*) (av_malloc(size2));
avpicture_fill((AVPicture *) picture, picture_buffer, AV_PIX_FMT_YUV420P,
codec_ctx->width, codec_ctx->height);
avpicture_fill((AVPicture *) picture_rgb, picture_buffer_2, AV_PIX_FMT_RGB24,
codec_ctx->width, codec_ctx->height);
while (av_read_frame(format_ctx, &packet) >= 0 && cnt < 1000) { //read ~ 1000 frames
std::cout << "1 Frame: " << cnt << std::endl;
if (packet.stream_index == video_stream_index) { //packet is video
std::cout << "2 Is Video" << std::endl;
if (stream == NULL) { //create stream in file
std::cout << "3 create stream" << std::endl;
stream = avformat_new_stream(output_ctx,
format_ctx->streams[video_stream_index]->codec->codec);
avcodec_copy_context(stream->codec,
format_ctx->streams[video_stream_index]->codec);
stream->sample_aspect_ratio =
format_ctx->streams[video_stream_index]->codec->sample_aspect_ratio;
}
int check = 0;
packet.stream_index = stream->id;
std::cout << "4 decoding" << std::endl;
int result = avcodec_decode_video2(codec_ctx, picture, &check, &packet);
std::cout << "Bytes decoded " << result << " check " << check
<< std::endl;
if (cnt > 100) //cnt < 0)
{
sws_scale(img_convert_ctx, picture->data, picture->linesize, 0,
codec_ctx->height, picture_rgb->data, picture_rgb->linesize);
std::stringstream file_name;
file_name << "test" << cnt << ".ppm";
output_file.open(file_name.str().c_str());
output_file << "P3 " << codec_ctx->width << " " << codec_ctx->height
<< " 255\n";
for (int y = 0; y < codec_ctx->height; y++) {
for (int x = 0; x < codec_ctx->width * 3; x++)
output_file
<< (int) (picture_rgb->data[0]
+ y * picture_rgb->linesize[0])[x] << " ";
}
output_file.close();
}
cnt++;
}
av_free_packet(&packet);
av_init_packet(&packet);
}
av_free(picture);
av_free(picture_rgb);
av_free(picture_buffer);
av_free(picture_buffer_2);
av_read_pause(format_ctx);
avio_close(output_ctx->pb);
avformat_free_context(output_ctx);
return (EXIT_SUCCESS);
}
please help me to compile this by C-compiler.
I'm trying to find a command to get the actual reserved memory for mqueue.
In /proc/self/limits is stored limit of total size for mqueue (in my case 819200).
It means that if I have 10 mqueues with a limit of 10 msg and size 8192, then total size is 10*10*8192=819200 which is number in system limit.
I know how to increase this limit but I don't know how to get the actual used memory for mqueue (for example if I'm currently using 6 mqueues).
if I cretate 6 mqueues with settings 10 msg and size 8192,
then alocated memory will be 6*10*8192 = 491520
and my question is where I can found this size 491520
#include <iostream>
#include <vector>
#include <cerrno>
#include <cstring>
#include <stdexcept>
#include <stdio.h>
#include <string>
#include "mqueue.h"
std::string exec(const char* cmd) {
char buffer[128];
std::string result = "";
FILE* pipe = popen(cmd, "r");
if (!pipe) throw std::runtime_error("popen() failed!");
try {
while (!feof(pipe)) {
if (fgets(buffer, 128, pipe) != NULL)
result += buffer;
}
} catch (...) {
pclose(pipe);
throw;
}
pclose(pipe);
return result;
}
class mqueue {
public:
mqueue(std::string n = "/dummy", int maxMsg = 10) : queue(), attr(), name(n) {
/* Set attributes */
this->attr.mq_flags = 0;
this->attr.mq_maxmsg = maxMsg; /* war: MAX_MSG_QUEUE */
this->attr.mq_msgsize = 8192; /* war: MAX_MSG_SIZE */
/* Destroy old message queue */
mq_unlink(this->name.c_str());
/* Open message queue */
queue = mq_open(this->name.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRWXU | S_IRWXG, &this->attr);
std::cout << "\n[info] in constructor class mqueue name: " << this->name << " | errno: " << std::strerror(errno);
}
~ mqueue() {
//std::cout << "\n[info] in destructor class mqueue name: " << this->name << " | close = " << mq_close(this->queue) << " | unlink = " << mq_unlink(this->name.c_str());
}
void getInfo() const {
std::cout << "\nname: " << this->name << " | mqueue: " << this->queue << "\n" ;
}
private:
std::string name;
mqd_t queue;
mq_attr attr;
};
int main(int argc, char **argv)
{
int i;
mqueue testMqueue("/dummy");
testMqueue.getInfo();
mqueue testMqueue1("/dummy1", 1);
mqueue testMqueue2("/dummy2", 6);
mqueue testMqueue3("/dummy3", 7);
mqueue testMqueue4("/dummy4", 10);
mqueue testMqueue5("/dummy5", 2);
mqueue testMqueue6("/dummy6", 3);
mqd_t mqdes;
mq_attr mqstat;
for (i = 0; i < 99; i++) {
mqdes = i;
mq_getattr(mqdes, &mqstat);
std::cout << "\n" << i << "\tmaxmsg: " << mqstat.mq_maxmsg << "\tmsgsize: " << mqstat.mq_msgsize << "\tsize: " << mqstat.mq_maxmsg * mqstat.mq_msgsize;
}
return 0;
}
Thanks for answers.
Adrian
Example:
Compile and execute:
std::vector<mqueue> testMqueue;
for (i = 0; i < 20; i++) {
testMqueue.push_back(mqueue("/dummy" + std::to_string(i)));
testMqueue.at(i).getInfo();
}
std::cout << exec("ipcs -q") ;
and as you can see:
[info] in constructor class mqueue name: /dummy19 | errno: Too many open files
name: /dummy19 | mqueue: -1
------ Message Queues --------
key msqid owner perms used-bytes messages
list is empty also when I executing ipcs -q in terminal
I want to record a video from a V4L2 device (from the Raspberry Pi camera) in C.
The recording itself works and I can save the video to a file.
However I need to change the bitrate of the video. From the strace output of the v4l2-ctl --set-ctrl video_bitrate=10000000 command I know that the extended controls API of v4l2 is used to achieve this.
Here's my code which doesn't work so far:
#include <iostream>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/mman.h> //mmap
#include <fcntl.h>
#include <unistd.h>
#include <linux/videodev2.h>
using namespace std;
#define numbuffers 3
struct picturebuffer
{
void *startadress;
size_t length;
};
//array in which the buffer pointer are being stored
picturebuffer pb[numbuffers];
int main()
{
//open camera
int fd;
fd = open("/dev/video0", O_RDWR);
if(fd < 0)
{
cout << "error during opening the camera device!";
cout.flush();
}
cout << "camera opened";
//read capabilities
struct v4l2_capability caps;
if(ioctl(fd, VIDIOC_QUERYCAP, &caps) < 0)
{
cout << "error while reading the capabilities!";
cout.flush();
}
cout << "Capabilities " << caps.capabilities << endl;
//ToDo: check for required capabilities
//set image data
struct v4l2_format format;
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
format.fmt.pix.pixelformat = V4L2_PIX_FMT_H264;
format.fmt.pix.width = 1920;
format.fmt.pix.height = 1080;
if(ioctl(fd, VIDIOC_S_FMT, &format) < 0)
{
cout << "error in the image format";
}
cout << "Image properties set" << endl;
//Todo: check if width and height fit together (VIDIOC_ENUM_FRAMESIZES)
//set extended Controls
struct v4l2_ext_controls ecs;
struct v4l2_ext_control ec;
memset(&ecs, 0, sizeof(ecs));
memset(&ec, 0, sizeof(ec));
ec.id = V4L2_CID_MPEG_VIDEO_BITRATE;
ec.value = 10000000;
ec.size = 0;
ecs.controls = &ec;
ecs.count = 1;
ecs.ctrl_class = V4L2_CTRL_CLASS_MPEG;
if(ioctl(fd, VIDIOC_S_EXT_CTRLS, &ecs) < 0)
{
cout << "error in extended controls bitrate";
cout.flush();
}
//allocate buffer in the kernel
struct v4l2_requestbuffers req;
req.count = numbuffers;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if(ioctl(fd, VIDIOC_REQBUFS, &req) < 0)
{
cout << "errro while allocating buffer";
cout.flush();
}
cout << "number of buffers: " << req.count << endl;
cout.flush();
//map buffers into userspace
for(int i=0; i<numbuffers; i++)
{
struct v4l2_buffer bufferinfo;
memset(&bufferinfo, 0, sizeof(bufferinfo));
bufferinfo.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufferinfo.memory = V4L2_MEMORY_MMAP;
bufferinfo.index = i;
if(ioctl(fd, VIDIOC_QUERYBUF, &bufferinfo) < 0)
{
cout << "error while querying bufferinfo";
cout.flush();
}
pb[i].startadress = mmap(NULL, bufferinfo.length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, bufferinfo.m.offset);
pb[i].length = bufferinfo.length;
if(pb[i].startadress == MAP_FAILED)
{
cout << "error during mmap" << endl;
}
memset(pb[i].startadress, 0, bufferinfo.length);
cout << "size of buffer: " << bufferinfo.length << endl;
}
cout << "buffers mapped into userspace" << endl;
cout.flush();
//queue in the buffers
for(int i=0; i<numbuffers; i++)
{
struct v4l2_buffer bufferinfo;
memset(&bufferinfo, 0, sizeof(bufferinfo));
bufferinfo.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufferinfo.memory = V4L2_MEMORY_MMAP;
bufferinfo.index = i;
if(ioctl(fd, VIDIOC_QBUF, &bufferinfo) < 0)
{
cout << "error while queueing the buffers in" << endl;
}
}
//since that point the driver starts capturing the pics
int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if(ioctl(fd, VIDIOC_STREAMON, &type) < 0)
{
cout << "error while starting the stream" << endl;
}
int file;
if((file = open("/home/pi/image.h264", O_WRONLY | O_CREAT, 0660)) < 0)
{
cout << "error while writing the file";
}
//loop for managing the pics
for(int i=0; i<100; i++)
{
struct v4l2_buffer bufferinfo;
memset(&bufferinfo, 0, sizeof(bufferinfo));
bufferinfo.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufferinfo.memory = V4L2_MEMORY_MMAP;
if(ioctl(fd, VIDIOC_DQBUF, &bufferinfo) < 0)
{
cout << "error while getting the buffer!" << endl;
}
//do anything with the pic
char buf[pb[bufferinfo.index].length];
memcpy(&buf, pb[bufferinfo.index].startadress, pb[bufferinfo.index].length);
cout << bufferinfo.index << endl;
cout.flush();
//write picture into the file
write(file, pb[bufferinfo.index].startadress, pb[bufferinfo.index].length);
if(ioctl(fd, VIDIOC_QBUF, &bufferinfo) < 0)
{
cout << "error while enqueuing the buffer" << endl;
}
}
close(file);
if(ioctl(fd, VIDIOC_STREAMOFF, &type) < 0)
{
cout << "error while stopping the stream" << endl;
}
//clean up
for(int i=0; i<numbuffers; i++)
{
if(munmap(pb[i].startadress, pb[i].length) < 0)
{
cout << "error during unmap";
}
}
//close camera file
close(fd);
cout << "!!!Hello World!!!" << endl;
cout.flush();
return 0;
}
The ioctl call seems to succeed, however my output file always has the same size as of 199,2 MB. Does someone know what´s wrong in the code ?
You need to check if the camera driver supports that IOCTL command. If the driver doesn't support the IOCTL command by not implementing it, you still can execute the command and it is routed to v4l2 default implementation, no actual changes are applied to the camera setting
Try to change the lines:
pb[bufferinfo.index].length
By:
pb[bufferinfo.index].bytesused
For example:
write(file, pb[bufferinfo.index].startadress, pb[bufferinfo.index].bytesused);