My whole system (Ubuntu 18.04) always freezes after around one hour when my c program continuously writes some logs to files. Each file created is around 100 to 200MB and the total amount of these files before system down is around 40-60GB. Usually, I have 150GB more SSD spaces available at this moment.
I checked system condition by System Monitor but couldn't find any problem. When my program runs, only one of the eight cores has 100% usage. Others are pretty low. Before system down, only 2.5GB of 15.5GB memory are used. Every time I reboot my machine, the latest 4-6 created files are empty. Even though most of them was showing some sizes at the moment of freezing. (looks like they were not actual written to SSD)
My c code can be simplified as below:
#define MEM_LEN 50000
#define FILE_LEN 10000*300
struct log_format {
long cnt;
long tv_sec;
long tv_nsec;
unsigned int user;
char rw;
char pathbuffer[256];
size_t count;
long long pos;
};
int main(int argc, const char *argv[])
{
int fd=0;
struct log_format *addr = NULL;
int i=0;
FILE *file;
char filestr[20];
int data_cnt = 0;
int file_cnt =0;
// open shared memory device //
fd = open("/dev/remap_pfn", O_RDWR);
if (fd < 0) {
perror("....open shared memory device1 failed\n");
exit(-1); }
// memory mapping to shared memory device //
addr = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, OFFSET);
if (!addr) {
perror("....mmap1 failed\n");
exit(-1); }
// open a file //
sprintf(filestr, "%d.csv", file_cnt);
file = fopen(filestr, "w");
printf("%s created\n",filestr);
// continuously check the memory replacement of last, and write to file //
while(1){
fprintf(file, "%lu,%lu,%lu,%u,%c,%s,%zu,%lld\n", addr[i].cnt, addr[i].tv_sec,
addr[i].tv_nsec, addr[i].user, addr[i].rw, addr[i].pathbuffer,
addr[i].count, addr[i].pos);
i++;
data_cnt++;
if(i>=MEM_LEN)
i=0;
// when reaching a threshold, create another file to write //
if(data_cnt>=FILE_LEN){
data_cnt = 0;
fclose(file);
file_cnt++;
// open a file //
sprintf(filestr, "%d.csv", file_cnt);
file = fopen(filestr, "w");
printf("%s created\n",filestr);
}
}
fclose(file);
return 0;
}
I didn't find any error message from syslog & kern.log. It just freezes.
Does anyone have ideas what could be the problem. Thanks.
I tried to add some delays into my While loop, to slow down the Write:
(since 1 nano second is still too long for the loop, I make it only Sleep per 10 runs)
While(1){
struct timespec ts = {0,1L};
if(data_cnt%10==0)
nanosleep(&ts, NULL);
......
}
The freeze problem seems gone now.
So... what might be the reason for this? For now, I only saw Write becoming slower and CPU loading decreased to 50% for that core. Is there a write buffer in between and my program exceeded its limit and crushed the system?
(I will also keep track if it is a overheated problem resulting in machine down)
Related
I am trying to implement file copy program with POSIX Asynchronous IO APIs in linux.
I tried this:
main() {
char data[200];
int fd = open("data.txt", O_RDONLY); // text file on the disk
struct aiocb aio;
aio.aio_fildes = fd;
aio.aio_buf = data;
aio.aio_nbytes = sizeof(data);
aio.aio_offset = 0;
memset(&aio, 0, sizeof(struct aiocb));
aio_read(arg->aio_p);
int counter = 0;
while (aio_error(arg->aio_p) == EINPROGRESS) {
printf("counter: %d\n", counter++);
}
int ret = aio_return(&aio);
printf("ret value %d \n",ret);
return 0;
}
But counter giving different results every time when I run
Is it possible to display progress of aio_read and aio_write functions?
You have different results because each different execution has its own context of execution that may differe from others (Do you always follow the exact same path to go from your house to the bank? Even if yes, elapsed time to reach the bank always exactly the same? Same task at the end - you're in the bank, different executions). What your program tries to measure is not I/O completion but the number of time it tests for completion of some async-I/O.
And no, there is no concept of percentage of completion of a given async-I/O.
The script file has over 6000 bytes which is copied into a buffer.The contents of the buffer are then written to the device connected to the serial port.However the write function only returns 4608 bytes whereas the buffer contains 6117 bytes.I'm unable to understand why this happens.
{
FILE *ptr;
long numbytes;
int i;
ptr=fopen("compass_script(1).4th","r");//Opening the script file
if(ptr==NULL)
return 1;
fseek(ptr,0,SEEK_END);
numbytes = ftell(ptr);//Number of bytes in the script
printf("number of bytes in the calibration script %ld\n",numbytes);
//Number of bytes in the script is 6117.
fseek(ptr,0,SEEK_SET);
char writebuffer[numbytes];//Creating a buffer to copy the file
if(writebuffer == NULL)
return 1;
int s=fread(writebuffer,sizeof(char),numbytes,ptr);
//Transferring contents into the buffer
perror("fread");
fclose(ptr);
fd = open("/dev/ttyUSB3",O_RDWR | O_NOCTTY | O_NONBLOCK);
//Opening serial port
speed_t baud=B115200;
struct termios serialset;//Setting a baud rate for communication
tcgetattr(fd,&serialset);
cfsetispeed(&serialset,baud);
cfsetospeed(&serialset,baud);
tcsetattr(fd,TCSANOW,&serialset);
long bytesw=0;
tcflush(fd,TCIFLUSH);
printf("\nnumbytes %ld",numbytes);
bytesw=write(fd,writebuffer,numbytes);
//Writing the script into the device connected to the serial port
printf("bytes written%ld\n",bytesw);//Only 4608 bytes are written
close (fd);
return 0;
}
Well, that's the specification. When you write to a file, your process normally is blocked until the whole data is written. And this means your process will run again only when all the data has been written to the disk buffers. This is not true for devices, as the device driver is the responsible of determining how much data is to be written in one pass. This means that, depending on the device driver, you'll get all data driven, only part of it, or even none at all. That simply depends on the device, and how the driver implements its control.
On the floor, device drivers normally have a limited amount of memory to fill buffers and are capable of a limited amount of data to be accepted. There are two policies here, the driver can block the process until more buffer space is available to process it, or it can return with a partial write only.
It's your program resposibility to accept a partial read and continue writing the rest of the buffer, or to pass back the problem to the client module and return only a partial write again. This approach is the most flexible one, and is the one implemented everywhere. Now you have a reason for your partial write, but the ball is on your roof, you have to decide what to do next.
Also, be careful, as you use long for the ftell() function call return value and int for the fwrite() function call... Although your amount of data is not huge and it's not probable that this values cannot be converted to long and int respectively, the return type of both calls is size_t and ssize_t resp. (like the speed_t type you use for the baudrate values) long can be 32bit and size_t a 64bit type.
The best thing you can do is to ensure the whole buffer is written by some code snippet like the next one:
char *p = buffer;
while (numbytes > 0) {
ssize_t n = write(fd, p, numbytes);
if (n < 0) {
perror("write");
/* driver signals some error */
return 1;
}
/* writing 0 bytes is weird, but possible, consider putting
* some code here to cope for that possibility. */
/* n >= 0 */
/* update pointer and numbytes */
p += n;
numbytes -= n;
}
/* if we get here, we have written all numbytes */
When I first made this project last semester, the code worked fine. Now I get a bus error when the mmapped memory to share between processes is being written to and I'm not sure why it is not working anymore.
Account_Info *mapData()
{
int fd;
//open/create file with read and write permission and check return value
if ((fd = open("accounts", O_RDWR|O_CREAT, 0644)) == -1)
{
perror("Unable to open account list file.");
exit(0);
}
//map data to be shared with different processes
Account_Info *accounts = mmap((void*)0, (size_t) 100*(sizeof(Account_Info)), PROT_WRITE,
MAP_SHARED, fd, 0);
int count= 0;
//loop to initialize values of Account_Info struct
while (count != 20)
{
//bus error occurs here
accounts[count].CurrBalance= 0;
accounts[count].flag = 0;
int i = 0;
while (i != 100)
{
//place NULL terminator into each element of AccName
accounts[count].AccName[i]= '\0';
i++;
}
count++;
}
close(fd);
return accounts;
}
A documented cause for SIGBUS with mmap is
Attempted access to a portion of the buffer that does not correspond to the file (for example, beyond the end of the file, including the case where another process has truncated the file).
My guess is that the accounts file didn't exist, so open with O_CREAT created it. But it has zero size, so any attempt to read or write through the mapping will fault. You need to fill the file with enough zeroes (or something else) to cover the mapping, for example using ftruncate.
You will get SIGBUS if you attempt to write past the mapped region of the file.
Chances are pretty good that your backing store file accounts is truncated/too short. (e.g.) if the file has space for 10 struct entries and you write to the 11th, you'll get SIGBUS
Do an fstat to get st_size and compare this against the length parameter you're giving to mmap
You may want to consider using ftruncate to extend the file before doing mmap
Currently, I am having a hard time to discover what the problem with my multithreading C program on the RPi is. I have written an application relying on two pthreads, one of them reading data from a gps device and writing it to a text file and the second one is doing exactly the same but with a temperature sensor. On my laptop (Intel® Core™ i3-380M, 2.53GHz) I am have the program nicely working and writing to my files up to the frequencies at which both of the devices send information (10 Hz and 500 Hz respectively).
The real problem emerges when I compile and execute my C program to run on the RPi; The performance of my program running on RPi considerably decreases, having my GPS log file written with a frequency of 3 Hz and the temperature log file at a frequency of 17 Hz (17 measurements written per second)..
I do not really know why I am getting those performance problems with my code running on the PI. Is it because of the RPi has only a 700 MHz ARM Processor and it can not process such a Multithreaded application? Or is it because my two threads routines are perturbing the nice work normally carried out by the PI? Thanks a lot in advance Guys....!!!
Here my code. I am posting just one thread function because I tested the performance with just one thread and it is still writing at a very low frequency (~4 Hz). At first, the main function:
int main(int argc, char *argv[]) {
int s1_hand = 0;
pthread_t routines[2];
printf("Creating Thread -> Main Thread Busy!\n");
s1_hand = pthread_create(&(routines[1]), NULL, thread_2, (void *)&(routines[1]));
if (s1_hand != 0){
printf("Not possible to create threads:[%s]\n", strerror(s1_hand));
exit(EXIT_FAILURE);
}
pthread_join(routines[1], NULL);
void* result;
if ((pthread_join(routines[1], &result)) == -1) {
perror("Cannot join thread 2");
exit(EXIT_FAILURE);
}
pthread_exit(NULL);
return 0;
}
Now, thread number 2 function:
void *thread_2(void *parameters) {
printf("Thread 2 starting...\n");
int fd, chars, parsing, c_1, parse, p_parse = 1;
double array[3];
fd = open("dev/ttyUSB0", O_RDONLY | O_NOCTTY | O_SYNC);
if (fd < 0){
perror("Unable to open the fd!");
exit (EXIT_FAILURE);
}
FILE *stream_a, *stream_b;
stream_a = fdopen(fd, "r");
stream_b = fopen (FILE_I, "w+");
if (stream_a == NULL || stream_b == NULL){
perror("IMPOSSIBLE TO CREATE STREAMS");
exit(EXIT_FAILURE);
}
c_1 = fgetc(stream_a);
parse = findit(p_parse, c_1, array);
printf("First Parse Done -> (%i)\n", parse);
while ((chars = fgetc(stream_a)) != EOF){
parsing = findit(0, (uint8_t)chars, array);
if (parsing == 1){
printf("MESSAGE FOUND AND SAVED -> (%i)\n", parsing);
fprintf(stream_b,"%.6f %.3f %.3f %.3f\n", time_stamp(), array[0], array[1], array[2]);
}
}
fflush(stream_b);
fclose(stream_b);
fclose(stream_a);
close(fd);
pthread_exit(NULL);
return 0;
}
Note that on my thread 2 function I am using findit(), function which returns 0 or 1 in case of having found and parsed a message from the gps, writing the parsed info in my array (0 no found, 1 found and parsed). The function time_stamp() just call the clock_gettime(CLOCK_MONOTONIC, &time_stamp) function in order to have a time reference on each written event. Hope with this information you guys can help me. Thank you!
Obviously the processor is capable of running 20 things a second. I'd first check your filesystem performance.
Write a small program that simulates the writes just the way you're doing them and see what the performance is like.
Beyond that, I'd suggest it's the task swapping that's causing delays. Try without one of the threads. What type of performance do you get?
I'd guess it's the filesystem, though. Try buffering your writes into memory and do large (4k+) writes every few seconds and I bet that will make your system a lot happier.
Also, post your code. Otherwise all we can do is guess.
I'm facing a quite tricky problem. I'm trying to get 2 virtual memory areas pointing to the same physical memory. The point is to have different page protection parameters on different memory areas.
On this forum, the user seems to have a solution, but it seems kinda hacky and it's pretty clear that something better can be done performance-wise :
http://www.linuxforums.org/forum/programming-scripting/19491-map-two-virtual-memory-addres-same-physical-page.html
As I'm facing the same problem, I want to give a shot here to know if somebody has a better idea. Don't be afraid to mention the dirty details behind the hood, this is what this question is about.
Thank by advance.
Since Linux kernel 3.17 (released in October 2014) you can use memfd_create system call to create a file descriptor backed by anonymous memory. Then mmap the same region several times, as mentioned in the above answers.
Note that glibc wrapper for the memfd_create system call was added in glibc 2.27 (released in February 2018). The glibc manual also describes how the descriptor returned can be used to create multiple mappings to the same underlying memory.
I'm trying to get 2 virtual memory area pointing on the same physical memory.
mmap the same region in the same file, twice, or use System V shared memory (which does not require mapping a file in memory).
I suppose if you dislike Sys V shared memrory you could use POSIX shared memory objects. They're not very popular but available on Linux and BSDs at least.
Once you get an fd with shm_open you could immediately call shm_unlink. Then no other process can attach to the same shared memory, and you can mmap it multiple times. Still a small race period available though.
As suggested by #PerJohansson, I wrote & tested following code, it works well on linux, using mmap with MAP_SHARED|MAP_FIXED flag, we can map the same physical page allocated by POSIX shm object multiple times and continuously into very large virtual memory.
#include "stdio.h"
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h> /* For mode constants */
#include <fcntl.h> /* For O_* constants */
void * alloc_1page_mem(int size) {
int fd;
char * ptr_base;
char * rptr;
/* Create shared memory object and set its size */
fd = shm_open("/myregion", O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("error in shm_open");
return NULL;
}
if (ftruncate(fd, 4096) == -1) {
perror("error in ftruncate");
return NULL;
}
// following trick reserves big enough holes in VM space
ptr_base = rptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
munmap(rptr, size);
for(int i=0; i<size; i+=4096) {
rptr = mmap(rptr, 4096, PROT_READ | PROT_WRITE, MAP_SHARED|MAP_FIXED, fd, 0);
if (rptr == MAP_FAILED) {
perror("error in mmap");
return NULL;
}
rptr += 4096;
}
close(fd);
shm_unlink("/myregion");
return ptr_base;
}
void check(int * p, int total_cnt){
for (int i=0;i<4096/sizeof(int);i++) {
p[i] = i;
}
int fail_cnt = 0;
for (int k=0; k<total_cnt; k+= 4096/sizeof(int)) {
for (int i=0;i<4096/sizeof(int);i++) {
if (p[k+i] != i)
fail_cnt ++;
}
}
printf("fail_cnt=%d\n", fail_cnt);
}
int main(int argc, const char * argv[]) {
const char * cmd = argv[1];
int sum;
int total_cnt = 32*1024*1024;
int * p = NULL;
if (*cmd++ == '1')
p = alloc_1page_mem(total_cnt*sizeof(int));
else
p = malloc(total_cnt*sizeof(int));
sum = 0;
while(*cmd) {
switch(*cmd++) {
case 'c':
check(p, total_cnt);
break;
case 'w':
// save only 4bytes per cache line
for (int k=0;k<total_cnt;k+=64/sizeof(int)){
p[k] = sum;
}
break;
case 'r':
// read only 4bytes per cache line
for (int k=0;k<total_cnt;k+=64/sizeof(int)) {
sum += p[k];
}
break;
case 'p':
// prevent sum from being optimized
printf("sum=%d\n", sum);
}
}
return 0;
}
You can observe very low cache miss rate on memory allocated in such method:
$ sudo perf stat -e mem_load_retired.l3_miss -- ./a.out 0wrrrrr
# this produces L3 miss linearly increase with number of 'r' charaters
$ sudo perf stat -e mem_load_retired.l3_miss -- ./a.out 1wrrrrr
# this produces almost constant L3 miss.
If you are root, you can mmap("/dev/mem", ...) but there are caveats in the newer kernels, see accessing mmaped /dev/mem?