Trying to use mmap to write to a file. Unfortunately the first write in the loop map[i] = i; will cause a bus error. Not sure why.
The PC runs Ubuntu 14.04 and the file /tmp/mmapped.bin has 12 bytes and the program is invoked with ./a.out 3.
Thanks
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
//#define NUMINTS (1000)
#define FILESIZE 0x400000000
int main(int argc, char *argv[])
{
int i;
int fd;
int *map; /* mmapped array of int's */
int size = atoi(argv[1]);
fd = open(FILEPATH, O_RDWR| O_CREAT | O_TRUNC);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
map = mmap(0, 4 * size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
for (i = 1; i <= size; ++i) {
map[i] = i;
}
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
In c you need to start at index 0. Because it will simply increment the pointer by the amount i and then dereference it. Your code dereferences the pointer beyond the allowed bound.
It should be,
for (i = 0; i < size; ++i) {
map[i] = i;
}
because it's equivalent to
for (i = 0; i < size; ++i) {
*(map + i) = i;
}
Also, use
map = mmap(0, size * sizeof *map, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
to ensure that enough space is allocated and that *(map + i) will be inside bounds. Don't use magic numbers.
According to the mmap man page a bus error (SIGBUS) happens when you read/write outside the bounds of the file.
The length of the mapping is separate from the length of the file. If your file is newly created its size will be 0, even if you specify a length with mmap. Resize the file with ftruncate after opening it.
Related
This mmap tutorial from 15 years ago ranks high in Google searches, but it actually runs subtly incorrectly on my Linux system.
mmap_write.c:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS (1000)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
int result;
int *map; /* mmapped array of int's */
/* Open a file for writing.
* - Creating the file if it doesn't exist.
* - Truncating it to 0 size if it already exists. (not really needed)
*
* Note: "O_WRONLY" mode is not sufficient when mmaping.
*/
fd = open(FILEPATH, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
if (fd == -1) {
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}
/* Stretch the file size to the size of the (mmapped) array of ints
*/
result = lseek(fd, FILESIZE-1, SEEK_SET);
if (result == -1) {
close(fd);
perror("Error calling lseek() to 'stretch' the file");
exit(EXIT_FAILURE);
}
/* Something needs to be written at the end of the file to
* have the file actually have the new size.
* Just writing an empty string at the current file position will do.
*
* Note:
* - The current position in the file is at the end of the stretched
* file due to the call to lseek().
* - An empty string is actually a single '\0' character, so a zero-byte
* will be written at the last byte of the file.
*/
result = write(fd, "", 1);
if (result != 1) {
close(fd);
perror("Error writing last byte of the file");
exit(EXIT_FAILURE);
}
/* Now the file is ready to be mmapped.
*/
map = mmap(0, FILESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Now write int's to the file as if it were memory (an array of ints).
*/
for (i = 1; i <=NUMINTS; ++i) {
map[i] = 2 * i;
}
/* Don't forget to free the mmapped memory
*/
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
/* Decide here whether to close(fd) and exit() or not. Depends... */
}
/* Un-mmaping doesn't close the file, so we still need to do that.
*/
close(fd);
return 0;
}
mmap_read.c:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS (1000)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
int *map; /* mmapped array of int's */
fd = open(FILEPATH, O_RDONLY);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
map = mmap(0, FILESIZE, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Read the file int-by-int from the mmap
*/
for (i = 1; i <=NUMINTS; ++i) {
printf("%d: %d\n", i, map[i]);
}
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
If the file does not already exist, the output of mmap_read is
...
998: 1996
999: 1998
1000: 2000
But if it does, the output is
...
998: 1996
999: 1998
1000: 0
Should the author have flushed the write? Or is GCC miscompiling the code?
Edit: I noticed that it's the prior existence or non-existence of the file that makes a difference, not the compilation flag.
You are starting at the second element, and writing 2000 after the end of the map.
for (i = 1; i <=NUMINTS; ++i) {
map[i] = 2 * i;
}
should be
for (i = 0; i < NUMINTS; ++i) {
map[i] = 2 * ( i + 1 );
}
Demo
It's not a buffering issue. write is a system call, so the data passed to the OS directly. It doesn't mean the data has been written to disk when write returns, but it is in the OS's hands, so it's as if it was on disk as far as OS functions are concerned, including its memory-mapping functionality.
In C indexes are from zero. Writing and reading index 1000 you invoke undefined behaviour
Change to in the write.:
for (i = 1; i <=NUMINTS; ++i) {
map[i - 1] = 2 * i;
}
and reading to:
for (i = 1; i <=NUMINTS; ++i) {
printf("%d: %d\n", i, map[i-1]);
}
I have a program which is another variant of copy program in linux(Actually I'm on Mac OSX).
In order to support copying large files, I wrote something like this:
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/mman.h>
#define BUFFSIZE 65535
#define PAGESIZE 4096
int main(int argc, char **argv){
char *source, *destination;
int src_fd, dst_fd;
unsigned long long bytes_read;
int bytes = BUFFSIZE;
struct timeval start, end;
int overall_time = 0;
unsigned long long offset = 0;
struct stat statbuf;
if(argc < 3){
printf("copy <source> <destination>\n");
exit(EXIT_FAILURE);
}
source = argv[1];
destination = argv[2];
src_fd = open(source, O_RDONLY, 0777);
if(src_fd < 0){
perror("src_fd");
exit(EXIT_FAILURE);
}
//bytes_read = lseek(src_fd, 0, SEEK_END);
fstat(src_fd, &statbuf);
bytes_read = statbuf.st_size;
dst_fd = open(destination, O_RDWR | O_CREAT, 0777);
if(dst_fd < 0){
perror("dst_fd");
exit(EXIT_FAILURE);
}
lseek(dst_fd, bytes_read -1, SEEK_SET);
write(dst_fd, "", 1);
gettimeofday(&start, NULL);
while(bytes_read > 0){
if(bytes_read < BUFFSIZE){
bytes = bytes_read;
bytes_read = 0;
}
else{
bytes_read -= bytes;
}
void *src_map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, src_fd, (off_t)offset);
if(src_map == (void*) MAP_FAILED){
perror("src_map");
exit(EXIT_FAILURE);
}
void *dst_map = mmap(NULL, bytes, PROT_WRITE, MAP_SHARED, dst_fd, (off_t)offset);
if(dst_map == (void*) MAP_FAILED){
perror("dst_map");
exit(EXIT_FAILURE);
}
memcpy(dst_map, src_map, bytes);
int src_unmp = munmap(src_map, bytes);
if(src_unmp == -1){
perror("src_unmap");
exit(EXIT_FAILURE);
}
int dst_unmp = munmap(dst_map, bytes);
if(dst_unmp == -1){
perror("dst_unmap");
exit(EXIT_FAILURE);
}
offset += 4096;
bytes_read -= bytes;
}
gettimeofday(&end, NULL);
printf("overall = %d\n", (end.tv_usec - start.tv_usec));
close(src_fd);
close(dst_fd);
return 0;
}
The goal is to measure the amount of time elapsed to copy a large file with the use of mmap().
The above code is not working for transferring 1GB file.
Any hint for that?
Thank you
Yes. The problem is in offset value. The offset value should be a multiple of page size.
I'm trying to share a text file between forked processes on my Ubuntu x86_64: the file will not be absurdly large, since strings will be written only if there is not already another identical string in the file; strings will be hostnames of visited websites, so I'll assume no more than 255 bytes for each hostname.
When it is a process' turn to write in shared object, it is OK; once all the processes wrote in shared object, msync should make the writing effective on the disk, but the mapped.txt file created only contain one string from arrayString, i.e. the string the last process wrote in shared object.
Here's the code:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <string.h>
// first forked process will write "first" in file, and so on
const char *arrayString[] = {
"first",
"second",
"third"
};
int main(void) {
int index;
int children = 3;
const char *filepath = "mapped.txt";
sem_t *sem;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
char *data;
data = (char *)mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("mmap:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
if (fork() == 0) {
sem_wait(sem);
size_t textsize = strlen(arrayString[index])+1;
if (ftruncate(fd, sizeof(textsize)) == -1) {
perror("ftruncate:");
return EXIT_FAILURE;
}
for (size_t i = 0; i < textsize; i++) {
printf("%d Writing character %c at %zu\n", getpid(), arrayString[index][i], i);
data[i] = arrayString[index][i];
}
printf("%d wrote ", getpid());
for (size_t i = 0; i < textsize; i++) {
printf("%c", data[i]);
}
printf("\n");
if (msync(data, textsize, MS_SYNC) == -1) {
perror("Could not sync the file to disk");
}
sem_post(sem);
_exit(EXIT_SUCCESS);
}
}
close(fd);
return EXIT_SUCCESS;
}
This is one possible output of the code above for three child processes (this is fine):
20373 Writing character s at 0
20373 Writing character e at 1
20373 Writing character c at 2
20373 Writing character o at 3
20373 Writing character n at 4
20373 Writing character d at 5
20373 Writing character at 6
20373 wrote second
20374 Writing character t at 0
20374 Writing character h at 1
20374 Writing character i at 2
20374 Writing character r at 3
20374 Writing character d at 4
20374 Writing character at 5
20374 wrote third
20372 Writing character f at 0
20372 Writing character i at 1
20372 Writing character r at 2
20372 Writing character s at 3
20372 Writing character t at 4
20372 Writing character at 5
20372 wrote first
And here's the content of mapped.txt (this is bad):
first^#^#^#
I expected:
second
third
first
but all I get is only the string of the last process, with those strange symbols. I'd like to keep this file persistent in memory, but because of the I/O slowness, I'm trying to use memory mapping.
Any idea why my file only contains the string written by the last process accessing the shared file?
Edit: I think I get it, it seems to work now: I hope it will be of help to someone. Compiled with g++ -g -o mapthis mapthis.cpp -lrt -pthread. Beware that some error checking are missing, like for fsync, snprintf and lseek.
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <time.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
const char *arrayString[] = {
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com"
};
int main(void) {
int index;
int children = sizeof(arrayString) / sizeof(char*);;
const char *filepath = "mapped.txt";
sem_t *sem;
char *data;
struct stat filestats;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
if (fstat(fd, &filestats) < 0) {
close(fd);
perror("fstat:");
return EXIT_FAILURE;
}
data = (char *)mmap(NULL, filestats.st_size ? filestats.st_size : 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("first map:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
sleep(1);
pid_t pid = fork();
if (pid == 0) {
int nw = 0;
int hostnameSize = 0;
const size_t origsize = filestats.st_size;
char *hostPos = NULL;
char *numPos = NULL;
char *backslashPos = NULL;
char tempBuff[64];
memset((char *)tempBuff, 0, sizeof(tempBuff));
sem_wait(sem);
// remap to current file size if it changed
fstat(fd, &filestats);
// file empty, just insert
if (filestats.st_size == 0) {
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
else {
// file not empty, let's look for string
hostPos = strstr(data, arrayString[index]);
if (hostPos) {
// string is already inserted, search for offset of number of seconds
lseek(fd, hostPos-data, SEEK_SET);
numPos = strchr(hostPos, ' ')+1;
backslashPos = strchr(numPos, '\n');
long unsigned before = atoi(numPos);
long unsigned now = (unsigned long)time(NULL);
long unsigned difference = now - before;
printf("%s visited %ld seconds ago (%ld - %ld)\n",
arrayString[index], difference, now, before);
nw = snprintf(tempBuff, backslashPos-hostPos+1, "%s %010lu", arrayString[index], now);
write(fd, tempBuff, nw);
write(fd, "\n", 1);
fsync(fd);
}
else {
data = (char *)mremap(data, origsize, filestats.st_size, MREMAP_MAYMOVE);
if (data == MAP_FAILED) {
close(fd);
sem_post(sem);
perror("mmap:");
_exit(EXIT_FAILURE);
}
lseek(fd, 0, SEEK_END);
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
}
munmap(data, filestats.st_size);
close(fd);
sem_post(sem);
_exit(EXIT_SUCCESS);
}
else if (pid > 0) {
wait(NULL);
}
}
munmap(data, filestats.st_size);
close(fd);
return EXIT_SUCCESS;
}
This line is problematic:
if (ftruncate(fd, sizeof(textsize)) == -1) {
textsize is a size_t, and taking its sizeof is just going to get 4 or 8 (on 32 and 64 bit systems). Looks like you're on a 64 bit system, so you're unconditionally truncating the file to 8 bytes in this case before every write. The "strange symbols" are just how your editor displays NUL/zero bytes. Even if you used ftruncate(fd, textsize), you'd still truncate down to just the string you're about to write, overwriting any data other children may have written; I doubt you want to ftruncate at all here.
For continual appends from separate processes (where they can't share information about the size or offset of the data they're adding), memory mapping just doesn't make sense; why aren't you just having each of them take the lock, lseek to end of file, then call write? You could still use memory mappings for the duplicate checking (some of it without locking), it would just be a bit different. Something like this:
int main(void) {
struct stat filestats;
int index;
int children = 3;
const char *filepath = "mapped.txt";
sem_t *sem;
char *data;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
// Mostly just to ensure it's mappable, we map the current size of the file
// If the file might already have values, and many child workers won't add
// to it, this might save some mapping work in the children; you could
// just map in the children when needed though
if (fstat(fd, &filestats) != 0) {
close(fd);
perror("fstat:");
return EXIT_FAILURE;
}
data = mmap(NULL, filestats.st_size, PROT_READ, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("mmap:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
if (fork() == 0) {
const size_t origsize = filestats.st_size;
sem_wait(sem);
// remap to current file size if it changed
// If you're not on Linux, you'd just have to mmap from scratch
// since mremap isn't standard
fstat(fd, &filestats);
if (origsize != filestats.st_size) {
data = mremap(data, origsize, filestats.st_size, MREMAP_MAYMOVE);
if (data == MAP_FAILED) {
close(fd);
sem_post(sem);
perror("mmap:");
_exit(EXIT_FAILURE);
}
}
// Not safe to use strstr since mapping might not end with NUL byte
// You'd need to workaround this, or implement a your own memstr-like function
if (!memstr(data, arrayString[index])) {
// Move fd to end of file, so we append new data
lseek(fd, 0, SEEK_END);
write(fd, arrayString[index], strlen(arrayString[index]));
write(fd, "\n", 1);
fsync(fd);
}
munmap(data, filestats.st_size);
close(fd);
sem_post(sem);
_exit(EXIT_SUCCESS);
}
}
munmap(data, filestats.st_size);
close(fd);
return EXIT_SUCCESS;
}
That memstr I referenced would need to be hand-implemented (or you'd need to do terrible things like ensure the file always had a NUL byte at the end so you could use strstr on it); you can get some tips on that here.
You're writing all the strings at offset 0 of the file, each over the top of the previous. The core of your loop should be something like
struct stat status;
fstat(fd, &status);
size_t cursize = status.st_size;
ftruncate(fd, cursize + textsize);
for (size_t i = 0; i < textsize; i++) {
data[cursize + i] = arrayString[index][i];
}
I am trying to create a shared memory area using examples and documentation I found online. My goal is IPC , so I can make different processes talk to each other.
This my C file
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
int main (int argc, char *argv[])
{
struct stat sb;
off_t len;
char *p;
int fd;
fd = shm_open("test", O_RDWR | O_CREAT); //,S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("open");
return 1;
}
if (fstat(fd, &sb)==-1){
perror("fstat");
return 1;
}
/*if (!S_ISREG(sb.st_mode)){
fprintf(stderr, "%s is not a file\n",fileName);
return 1;
}*/
p = mmap(0, sb.st_size, PROT_WRITE, MAP_SHARED, fd, 0);
if (p == MAP_FAILED){
perror("mmap");
return 1;
}
if (close(fd)==-1) {
perror("close");
return 1;
}
for (len = 0; len < sb.st_size; len++) {
putchar(p[len]);
}
if (munmap(p, sb.st_size) == -1) {
perror("munmao");
return 1;
}
fprintf(stderr,"\n");
return 0;
}
The problem is that I am getting a mmap: Invalid argument. I assume something is wrong with fd but have no clue how to fix it, any help would be appreciated. I am on Yosemite using latest XCODE .
You need to extend the size of the shared memory mapping, at least the first time when you create it. Right now its size is 0, and mmap is not going to allow you to make a zero length mapping.
So instead of your fstat() call, do e.g.:
size_t len = 4096;
if (ftruncate(fd, len) == -1) {
perror("ftruncate");
return 1;
}
And pass this len to mmap().
Your addr parameter is set to 0, which might be reserved. Did you mean to use NULL? This would be different than 0.
So I'm using mmap to then write to another file. But the weird thing is, when my code hits mmap, what it does is clears the file. So I have a file that's populated with random characters (AB, HAA, JAK, etc...). What it's supposed to do is use mmap as read basically and then write that file to the new file. So that first if (argc == 3) is the normal read and write, the second if (argc ==4) is supposed to use mmap. Does anyone have any idea why on Earth this is happening?
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/io.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
int main(int argc, char const *argv[])
{
int nbyte = 512;
char buffer[nbyte];
unsigned char *f;
int bytesRead = 0;
int size;
int totalBuffer;
struct stat s;
const char * file_name = argv[1];
int fd = open (argv[1], O_RDONLY);
int i = 0;
char c;
int fileInput = open(argv[1], O_RDONLY);
int fileOutPut = open(argv[2], O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR);
fstat(fileInput, &s);
size = s.st_size;
printf("%d\n", size);
if (argc == 3)
{
printf("size: %d\n", size);
printf("nbyte: %d\n", nbyte);
while (size - bytesRead >= nbyte)
{
read(fileInput, buffer, nbyte);
bytesRead += nbyte;
write(fileOutPut, buffer, nbyte);
}
read(fileInput, buffer, size - bytesRead);
write(fileOutPut, buffer, size - bytesRead);
}
else if (argc == 4)
{
int i = 0;
printf("4 arg\n");
f = (char *) mmap (0, size, PROT_READ, MAP_PRIVATE, fileInput, 0);
/* This is where it is being wipped */
}
close(fileInput);
close(fileOutPut);
int who = RUSAGE_SELF;
struct rusage usage;
int ret;
/* Get the status of the file and print some. Easy to do what "ls" does with fstat system call... */
int status = fstat (fd, & s);
printf("File Size: %d bytes\n",s.st_size);
printf("Number of Links: %d\n",s.st_nlink);
return 0;
}
EDIT: I wanted to mention that the first read and write works perfectly, it is only when you try to do it through the mmap.
If you mean it's clearing your destination file, then yes, that's exactly what your code will do.
It opens the destination with truncation and then, in your argc==4 section, you map the input file but do absolutely nothing to transfer the data to the output file.
You'll need a while loop of some description, similar to the one in the argc==3 case, but which writes the bytes in mapped memory to the fileOutput descriptor.