I have a piece of code written in POSIX compliant C and it doesn't seem to work correctly. The goal is to read from /dev/random, the interface to the Linux/BSD/Darwin kernel's random number generator and output the written byte to a file. I'm not quite sure what I'm overlooking as I'm sure I've covered every ground. Anyway, here it is:
int incinerate(int number, const char * names[]) {
if (number == 0) {
// this shouldn't happen, but if it does, print an error message
fprintf(stderr, "Incinerator: no input files\n");
return 1;
}
// declare some stuff we'll be using
long long lengthOfFile = 0, bytesRead = 0;
int myRandomInteger;
// open the random file block device
int zeroPoint = open("/dev/random", O_RDONLY);
// start looping through and nuking files
for (int i = 1; i < number; i++) {
int filePoint = open(names[i], O_WRONLY);
// get the file size
struct stat st;
stat(names[i], &st);
lengthOfFile = st.st_size;
printf("The size of the file is %llu bytes.\n", lengthOfFile);
while (lengthOfFile != bytesRead) {
read(zeroPoint, &myRandomInteger, sizeof myRandomInteger);
write(filePoint, (const void*) myRandomInteger, sizeof(myRandomInteger));
bytesRead++;
}
close(filePoint);
}
return 0;
}
Any ideas? This is being developed on OS X but I see no reason why it shouldn't also work on Linux or FreeBSD.
If it helps, I've included the following headers:
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
Instead of
write(filePoint, (const void*) myRandomInteger, sizeof(myRandomInteger));
you surely meant to write
write(filePoint, (const void*) &myRandomInteger, sizeof(myRandomInteger));
didn't you? If you use the random bytes read from /dev/random as a pointer, you're almost certain to encounter a segfault sooner or later.
Related
I am trying to read and copy its contents to another file asynchronously with POSIX threads in C. Assuming a file contains "aabbcc" and i have 4 threads, how can i copy "aabbcc" to another file with threads asynchronously in C. This part has been stuck in my head for the entire day. What i have done so far is shown below.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <fcntl.h>
#include <pthread.h>
#include <aio.h>
#include <math.h> //for ceil() and floor()
#include <sys/types.h>
#include <unistd.h>
#define FILE_SIZE 1024 //in bytes
//>cc code.c -o code.out -lrt -lpthread
//>./code.out
char alphabets[52] = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z',
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z'};
long prepareInputFile(char* filename)
{
FILE *fp;
fp = fopen(filename, "w");
if(fp == NULL)
{
printf("Cannot write to input file\n");
return;
}
int index;
char str[FILE_SIZE];
int rand_size = (rand() % 1024)+1;
for(index = 0;index < rand_size;index++) /*Generate the file with random sizes in bytes*/
{
int num2 = (rand() % 52); /*Get a random char in char array*/
putc(alphabets[num2],fp); /*Write that char to the file pointed to by fp*/
}
putc('\n',fp);
fseek(fp, 0L, SEEK_END);
long size = ftell(fp);
fseek(fp, 0L, SEEK_SET);
return size;
}
//Perform main operation inside this function
void *writeToFileAsync(void *src_file, void *dest_file,
void *thread, void *t_count, void *filesize)
{
int readfd, writefd;
struct aiocb aio_write, aio_read;
memset(&aio_read, 0, sizeof(aio_read));
aio_read.aio_fildes = readfd;
aio_read.aio_nbytes = (int)filesize/(int)t_count;
readfd = open((char *)src_file, O_RDONLY);
if(readfd < 0)
{
printf("Cannot open the file for reading\n");
}
memset(&aio_write, 0, sizeof(aio_write));
aio_read.aio_fildes = writefd;
aio_read.aio_nbytes = (int)filesize/(int)t_count;
writefd = open((void *)dest_file, O_CREAT | O_WRONLY);
if(writefd < 0)
{
printf("Cannot open the file for writing\n");
}
return;
}
int main(int argc, char *argv[])
{
int i,threadCount;
char sourcePath[100], destPath[100];
strcpy(sourcePath,argv[1]);
if(strcmp(sourcePath, "-") == 0)
{
getcwd(sourcePath, sizeof(sourcePath));
strcpy(sourcePath, strcat(sourcePath, "/source.txt"));
}
else
{
strcpy(sourcePath, strcat(sourcePath, "source.txt"));
}
printf("Source path is: %s\n", sourcePath);
strcpy(destPath,argv[2]);
if(strcmp(destPath, "-") == 0)
{
getcwd(destPath, sizeof(destPath));
strcpy(destPath, strcat(destPath, "/destination.txt"));
}
else
{
strcpy(destPath, strcat(destPath, "destination.txt"));
}
printf("Dest path is: %s\n", destPath);
threadCount = strtol(argv[3],NULL,10);
long file_size = prepareInputFile(sourcePath);
pthread_t threads[threadCount];
for(i=0;i<threadCount;i++)
{
pthread_create(&threads[i],NULL,(void *)writeToFileAsync, NULL);
}
return 0;
}
Any help would be appreciated.
It is unlikely that parallelizing this operation would help, as it is probably bound by I/O rather than CPU time, and copying this way will certainly not be faster than simply copying via system call.
However, if you wanted to do this, one method would be: map the input file into memory (with mmap() or the equivalent), create the destination buffer or memory-mapped file, divide the source and destination files into equal slices, and have each thread copy its slice of the file. You might use memcpy(), but a modern compiler can see what your loop is doing and optimize it.
Even this is not going to be as fast as reading or mapping the source file into a buffer, then writing it back out from the same buffer with write(). If all you need to do is copy the file to disk, you don’t need to copy the bytes at all. In fact, you might even be able to make a second link to the file on disk.
This would probably work best if the slices are aligned to page boundaries. Be very careful about having two threads write to the same cache line, as this creates a race condition.
I am creating a model of Unix v6 File system. I have tried to first allocate the available free blocks by writing it to the file and then reading the same when need. I am having a free array of 100 blocks, so when the number of free blocks goes beyond hundred, the present free array will be written to the memory block in free[0] and the new free block will be assigned to free[0]. The following is the sample code that I have written
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
void splitCommand(char**,char*, char*);
unsigned short freeArr[100];
unsigned short nfree=1,fd;
int main()
{
fd = open("v6", O_RDWR | O_CREAT | O_TRUNC, S_IREAD | S_IEXEC | S_IWRITE);
freeArr[0] = 0;
for (int i = 28; i < 5000; i++)
{
addFreeBlock(i);
}
unsigned short free1=0 ;
lseek(fd, 3127 * 512, SEEK_SET);
read(fd, &(nfree), sizeof(unsigned short));
printf("%d\n",nfree);
for(int i=0; i<nfree; i++)
{
read(fd, &free1, sizeof(unsigned short));
printf("%d\n",free1);
}
}
void addFreeBlock(int block_no)
{
if(block_no==3127)
{
int a=0;
}
if (nfree == 100)
{
lseek(fd, block_no * 512, SEEK_SET);
write(fd, &(nfree), sizeof(unsigned short)); // copy nfree into free array
write(fd, freeArr, 200);// copy free array
nfree=0;
}
freeArr[nfree] = block_no;
nfree++;
}
Consider we have 5000 blocks in total. Each block is 512 bytes long. I am using the first 27 blocks for other purposes and so I am writing the blocks from 28 to 5000.
Now after writing all the blocks, I tried reading the blocks stored at random location. When I tried reading the blocks stored at 3027, I am able to read the 100 blocks numbered from 2927,2928,2929,....,3026. But when I read the blocks stored at 3127, I am able to read only the blocks from 3027, 3028, 3029,....,3081. The remaining is just random. I also tried at some other positions. It works for some of them.
Can anyone tell me where I went wrong?
Sorry, this is not really an answer but I cannot write this in a comment. It does however answer the question somehow.
Your code works correctly, I tested with valgrind and there are no errors, but you should think about it a lot.
There is no need for a single global variable in your code, you should only use a global variable when you really know there is no better solution, this is an improved version of your exact code,
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdint.h>
uint16_t add_free_block(int fd, int block_number, uint16_t *free_blocks, uint16_t free_block_count);
int main(void)
{
uint16_t free_blocks[100];
uint16_t free_block_count;
int fd;
fd = open("v6", O_RDWR | O_CREAT | O_TRUNC, S_IREAD | S_IEXEC | S_IWRITE);
if (fd == -1)
return -1;
free_blocks[0] = 0;
free_block_count = 1;
for (int i = 28; i < 5000; i++) {
free_block_count = add_free_block(fd, i, free_blocks, free_block_count);
}
lseek(fd, 3127 * 512, SEEK_SET);
read(fd, &free_block_count, sizeof(uint16_t));
printf("%d\n", free_block_count);
for (int i = 0; i < free_block_count; i++) {
uint16_t block_number;
if (read(fd, &block_number, sizeof(uint16_t)) == sizeof(uint16_t)) {
printf("%d\n", block_number);
}
}
close(fd);
return 0;
}
uint16_t
add_free_block(int fd, int block_number, uint16_t *free_blocks, uint16_t free_block_count)
{
if (free_block_count == 100) {
lseek(fd, block_number * 512, SEEK_SET);
write(fd, &free_block_count, sizeof(uint16_t));
write(fd, free_blocks, free_block_count * sizeof(uint16_t));
free_block_count = 0;
}
free_blocks[free_block_count] = block_number;
return free_block_count + 1;
}
with 0 global variables (also, minor but important I closed the file descriptor).
You should also check other return values like write(), I didn't because I didn't want to do it all, I just wanted check what was wrong.
The real problem lies somewhere else in the rest of your program, not in this code. So please POST the real code and stop guessing what the problem is.
In general, these are my recommendations
DO NOT USE GLOBAL VARIABLES, unless you are very, very sure you MUST.
Add the function prototype.
DO NOT IGNORE compiler warnings.
ALWAYS check the return value of a function that does return, if you don't know that it does return, then 5.
READ carefully all the documentation for every function you use.
I thought i had it figured out but i'm getting a bus error. All it has to do is take some text file, use mmap and then reverse the contents without a temp file. What i did was map it, and then erase the file and write it from memory by starting at the end of the mmap pointer. This worked when I did it with cout, but for some reason doing it to a file i get the error.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/io.h>
#include <sys/mman.h>
main(int argc, char *argv[])
{
unsigned char *f, *g;
int size;
struct stat s;
const char * file_name = argv[1];
int fd = open(argv[1], O_RDONLY);
int status = fstat(fd, &s);
size = s.st_size;
int i;
f = (char *) mmap (0, size, PROT_READ, MAP_PRIVATE, fd, 0);
//g = (char *) mmap (0, size, PROT_READ, MAP_PRIVATE, fd, 0);
for(i = 0; i < size; i++) {
char c;
c = f[i];
putchar(c);
}
//ABOVE THIS WORKS
// int z = 0;
//while(f[z] != NULL) {
//z++;
// printf("%d", z);
// }
int x;
int y = 0;
close(fd);
FILE *f1;
f1 = fopen(argv[1], "w+");
for(x = size - 1; x >= 0; x--)
{
char c;
c = f[x];
fputc(c, f1);
}
}
Because you fopened the file with w, you truncated the file to 0 length. The mmap man page says that:
The effect of changing the size of the underlying file of a mapping on the pages that correspond to added or removed regions of the file is unspecified.
Anyways, it seems to me that you should call mmap with PROT_WRITE also, so that you can just reverse the array f in memory. Then you don't have to open the file again. Make sure to use MMAP_SHARED, and to also call munmap() after you are finished modifying the shared memory. You need MMAP_SHARED because with MMAP_PRIVATE:
Updates to the mapping are not visible to other processes mapping the same file, and are not carried through to the underlying file.
You should call munmap() because:
The file may not actually be updated until msync(2) or munmap() is called.
If you exit the program without calling munmap(), the memory will automatically be unmapped for you. But it's a good habit to close/free/unmap things yourself instead of just exiting.
(Edit: Thanks Adam Rosenfield and EOF for the corrections to my original answer.)
For my OS class I have the assignment of implementing Unix's cat command with system calls (no scanf or printf). Here's what I got so far:
(Edited thanks to responses)
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
main(void)
{
int fd1;
int fd2;
char *buffer1;
buffer1 = (char *) calloc(100, sizeof(char));
char *buffer2;
buffer2 = (char *)calloc(100, sizeof(char));
fd1 = open("input.in", O_RDONLY);
fd2 = open("input2.in", O_RDONLY);
while(eof1){ //<-lseek condition to add here
read (fd1, buffer1, /*how much to read here?*/ );
write(1, buffer1, sizeof(buffer1)-1);
}
while (eof2){
read (fd2,buffer2, /*how much to read here?*/);
write(1, buffer2, sizeof(buffer2)-1);
}
}
The examples I have seen only show read with a known number of bytes. I don't know how much bytes each of the read files will have, so how do I specify read's last paramether?
Before you can read into a buffer, you have to allocate one. Either on the stack (easiest) or with mmap.
perror is a complicated library function, not a system call.
exit is not a system call on Linux. But _exit is.
Don't write more bytes than you have read before.
Or, in general: Read the documentation on all these system calls.
Edit: Here is my code, using only system calls. The error handling is somewhat limited, since I didn't want to re-implement perror.
#include <fcntl.h>
#include <unistd.h>
static int
cat_fd(int fd) {
char buf[4096];
ssize_t nread;
while ((nread = read(fd, buf, sizeof buf)) > 0) {
ssize_t ntotalwritten = 0;
while (ntotalwritten < nread) {
ssize_t nwritten = write(STDOUT_FILENO, buf + ntotalwritten, nread - ntotalwritten);
if (nwritten < 1)
return -1;
ntotalwritten += nwritten;
}
}
return nread == 0 ? 0 : -1;
}
static int
cat(const char *fname) {
int fd, success;
if ((fd = open(fname, O_RDONLY)) == -1)
return -1;
success = cat_fd(fd);
if (close(fd) != 0)
return -1;
return success;
}
int
main(int argc, char **argv) {
int i;
if (argc == 1) {
if (cat_fd(STDIN_FILENO) != 0)
goto error;
} else {
for (i = 1; i < argc; i++) {
if (cat(argv[i]) != 0)
goto error;
}
}
return 0;
error:
write(STDOUT_FILENO, "error\n", 6);
return 1;
}
You need to read as many bytes as will fit in the buffer. Right now, you don't have a buffer yet, all you got is a pointer to a buffer. That isn't initialized to anything. Chicken-and-egg, you therefore don't know how many bytes to read either.
Create a buffer.
There is usually no need to read the entire file in one gulp. Choosing a buffer size that is the same or a multiple of the host operating system's memory page size is a good way to go. 1 or 2 X the page size is probably good enough.
Using buffers that are too big can actually cause your program to run worse because they put pressure on the virtual memory system and can cause paging.
You could use open, fstat, mmap, madvise and write to make a very efficient cat command.
If Linux specific you could use open, fstat, fadvise and splice to make an even more efficient cat command.
The advise calls are to specify the SEQUENTIAL flags which will tell the kernel to do aggressive read-ahead on the file.
If you like to be polite to the rest of the system and minimize buffer cache use, you can do your copy in chunks of 32 megabytes or so and use the advise DONTNEED flags on the parts already read.
Note:
The above will only work if the source is a file. If the fstat fails to provide a size then you must fall back to using an allocated buffer and read, write. You can use splice too.
Use the stat function to find the size of your files before you read them. Alternatively, you can read chunks until you get an EOF.
I'm writing a high-loaded daemon that should be run on the FreeBSD 8.0 and on Linux as well. The main purpose of daemon is to pass files that are requested by their identifier. Identifier is converted into local filename/file size via request to db. And then I use sequential mmap() calls to pass file blocks with send().
However sometimes there are mismatch of filesize in db and filesize on filesystem (realsize < size in db). In this situation I've sent all real data blocks and when next data block is mapped -- mmap returns no errors, just usual address (I've checked errno variable also, it's equal to zero after mmap). And when daemon tries to send this block it gets Segmentation Fault. (This behaviour is guarantedly issued on FreeBSD 8.0 amd64)
I was using safe check before open to ensure size with stat() call. However real life shows to me that segfault still can be raised in rare situtaions.
So, my question is there a way to check whether pointer is accessible before dereferencing it? When I've opened core in gdb, gdb says that given address is out of bound.
Probably there is another solution somebody can propose.
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#define FILENAME "./datafile"
int main()
{
unsigned long i, j;
srand(time(NULL));
unsigned long pagesize = sysconf(_SC_PAGESIZE);
unsigned long basesize = 4 * pagesize;
unsigned long cropsize = 2 * pagesize;
// create 4*pagesize sized file
int f = creat(FILENAME, 0644);
for (i = 0; i < basesize; i++) {
unsigned char c = (unsigned char)rand();
if (write(f, &c, 1) < 1) { perror("write"); break; }
}
close(f);
f = open(FILENAME, O_RDONLY);
// walk trough file
unsigned char xor = 0;
unsigned long offset = 0;
for (j = 0; j < 4; j++) {
// trunc file to 2*pagesize
if (j == 2) truncate(FILENAME, cropsize);
char *data = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE, f, offset);
if (data == MAP_FAILED) { perror("mmap"); break; }
printf("mmap: %lu#%lu for %i\n", pagesize, offset, f);
for (i = 0; i < pagesize; i++) xor ^= data[i];
offset += pagesize;
}
close(f);
return 0;
}
Of course I can't prove it from here, but I strongly suspect that you just have a book-keeping bug in your code. If you call mmap and pass in a size, and it succeeds, you shouldn't get SIGSEGV.
I recommend that you apply valgrind to your investigation.
On many linux systems /proc/PID/maps will show you what regions are mapped with what access permissions.