I'm trying to store a graph on file using mmap so i read and write more quickly but i can't read fields struct fields that are created using malloc (and i can't make them an array)
the problem is i can't read back the filed map[i].nodes->vertexKey from the file
(i think because is because it was created using malloc)
my code is :
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <string.h>
#define COUNT 10
#define FILESIZE ( COUNT * sizeof(struct vertex))
struct node{
int vertexKey ;
struct node *nextNode;
};
struct vertex {
int vertexKey;
struct node *nodes;
};
int readMmap(){
const char *filepath = "/tmp/mmapped.bin";
int fd = open(filepath, O_RDWR , (mode_t)0600);
if (fd == -1)
{
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}
struct stat fileInfo = {0};
if (fstat(fd, &fileInfo) == -1)
{
perror("Error getting the file size");
exit(EXIT_FAILURE);
}
if (fileInfo.st_size == 0)
{
fprintf(stderr, "Error: File is empty, nothing to do\n");
exit(EXIT_FAILURE);
}
printf("File size is %ji\n", (intmax_t)fileInfo.st_size);
struct vertex *map = mmap(0, FILESIZE , PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED)
{
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
for (off_t i = 0; i < COUNT; i++)
{
printf("%d |", map[i].vertexKey );
// i can't read map[i].nodes->vertexKey
printf("%d \n", map[i].nodes->vertexKey );
printf("\n" );
}
// Don't forget to free the mmapped memory
if (munmap(map, fileInfo.st_size) == -1)
{
close(fd);
perror("Error un-mmapping the file");
exit(EXIT_FAILURE);
}
// Un-mmaping doesn't close the file, so we still need to do that.
close(fd);
return 0;
}
int writeMmap(){
const char *filepath = "/tmp/mmapped.bin";
int fd = open(filepath, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
if (fd == -1){
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}
if (lseek(fd, FILESIZE-1, SEEK_SET) == -1){
close(fd);
perror("Error calling lseek() to 'stretch' the file");
exit(EXIT_FAILURE);
}
if (write(fd, "", 1) == -1){
close(fd);
perror("Error writing last byte of the file");
exit(EXIT_FAILURE);
}
struct vertex *map = mmap(0, FILESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < COUNT; i++){
struct vertex ss ;
ss.vertexKey=i;
struct node *n1 = (struct node*)malloc(sizeof(struct node));
n1->nextNode =NULL ;
n1->vertexKey=i*10 ;
ss.nodes = n1 ;
map[i] = ss;
}
// Write it now to disk
if (msync(map, 100, MS_SYNC) == -1)
{
perror("Could not sync the file to disk");
}
// Don't forget to free the mmapped memory
if (munmap(map, 100) == -1)
{
close(fd);
perror("Error un-mmapping the file");
exit(EXIT_FAILURE);
}
// Un-mmaping doesn't close the file, so we still need to do that.
close(fd);
return 0;
}
How fast does this really need to be? Using a memory image for your persistent format is a problematic practice -- you need it to be a pretty big win in the larger scheme of things for it to be worthwhile, if it is even possible at all.
If you want a persistent representation of your data, then that representation needs to be self-contained. Pointers per se cannot be supported, but in their place you can use indexes into tables (effectively arrays) of objects. Better would be if indexing were implicit, but that may not be sufficient for you. I apologize for being vague, but I'd need to understand the characteristics of your data much better before I could suggest any specifics.
Related
This mmap tutorial from 15 years ago ranks high in Google searches, but it actually runs subtly incorrectly on my Linux system.
mmap_write.c:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS (1000)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
int result;
int *map; /* mmapped array of int's */
/* Open a file for writing.
* - Creating the file if it doesn't exist.
* - Truncating it to 0 size if it already exists. (not really needed)
*
* Note: "O_WRONLY" mode is not sufficient when mmaping.
*/
fd = open(FILEPATH, O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
if (fd == -1) {
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}
/* Stretch the file size to the size of the (mmapped) array of ints
*/
result = lseek(fd, FILESIZE-1, SEEK_SET);
if (result == -1) {
close(fd);
perror("Error calling lseek() to 'stretch' the file");
exit(EXIT_FAILURE);
}
/* Something needs to be written at the end of the file to
* have the file actually have the new size.
* Just writing an empty string at the current file position will do.
*
* Note:
* - The current position in the file is at the end of the stretched
* file due to the call to lseek().
* - An empty string is actually a single '\0' character, so a zero-byte
* will be written at the last byte of the file.
*/
result = write(fd, "", 1);
if (result != 1) {
close(fd);
perror("Error writing last byte of the file");
exit(EXIT_FAILURE);
}
/* Now the file is ready to be mmapped.
*/
map = mmap(0, FILESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Now write int's to the file as if it were memory (an array of ints).
*/
for (i = 1; i <=NUMINTS; ++i) {
map[i] = 2 * i;
}
/* Don't forget to free the mmapped memory
*/
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
/* Decide here whether to close(fd) and exit() or not. Depends... */
}
/* Un-mmaping doesn't close the file, so we still need to do that.
*/
close(fd);
return 0;
}
mmap_read.c:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
#define NUMINTS (1000)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
int *map; /* mmapped array of int's */
fd = open(FILEPATH, O_RDONLY);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
map = mmap(0, FILESIZE, PROT_READ, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Read the file int-by-int from the mmap
*/
for (i = 1; i <=NUMINTS; ++i) {
printf("%d: %d\n", i, map[i]);
}
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
If the file does not already exist, the output of mmap_read is
...
998: 1996
999: 1998
1000: 2000
But if it does, the output is
...
998: 1996
999: 1998
1000: 0
Should the author have flushed the write? Or is GCC miscompiling the code?
Edit: I noticed that it's the prior existence or non-existence of the file that makes a difference, not the compilation flag.
You are starting at the second element, and writing 2000 after the end of the map.
for (i = 1; i <=NUMINTS; ++i) {
map[i] = 2 * i;
}
should be
for (i = 0; i < NUMINTS; ++i) {
map[i] = 2 * ( i + 1 );
}
Demo
It's not a buffering issue. write is a system call, so the data passed to the OS directly. It doesn't mean the data has been written to disk when write returns, but it is in the OS's hands, so it's as if it was on disk as far as OS functions are concerned, including its memory-mapping functionality.
In C indexes are from zero. Writing and reading index 1000 you invoke undefined behaviour
Change to in the write.:
for (i = 1; i <=NUMINTS; ++i) {
map[i - 1] = 2 * i;
}
and reading to:
for (i = 1; i <=NUMINTS; ++i) {
printf("%d: %d\n", i, map[i-1]);
}
I am trying to create an empty file if it does not exists. And than map it using mmap() so, that i can pass it to my other program for writing. I am not sure which arguments for mmap are suitable for an empty file. My code works for non empty files but gives error "Invalid argument" if file is empty
Code program1 (only creates an empty file if not exists)
int i;
int fd = open("/home/sungmin/dummy_programs/dummy.txt", O_RDONLY | O_CREAT, 0777);
char *pmap;
pid_t child;
if (fd == -1)
{
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}
struct stat fileInfo = {0};
if (fstat(fd, &fileInfo) == -1)
{
perror("Error getting the file size");
exit(EXIT_FAILURE);
}
/*if (fileInfo.st_size == 0)
{
fprintf(stderr, "Error: File is empty, nothing to do\n");
exit(EXIT_FAILURE);
}*/
pmap = mmap(0, fileInfo.st_size, PROT_READ | PROT_EXEC , MAP_ANONYMOUS, fd, 0);
if (pmap == MAP_FAILED)
{
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Calling fork function */
if((child=fork())==0){
printf("Iam Child process\n\n");
static char *argv[]={"This is some sample text. I need to write this text in my dummy file.","/home/sungmin/dummy_programs/dummy.txt",NULL};
execv("/home/sungmin/dummy_programs/pro2",argv);
exit(127);
}
else {
printf("Iam parent, waiting for child process to exit\n\n");
waitpid(child,0,0);
printf("Existing parent\n\n");
}
/* Don't forget to free the mmapped memory*/
if (munmap(pmap, fileInfo.st_size) == -1)
{
close(fd);
perror("Error un-mmapping the file");
exit(EXIT_FAILURE);
}
/* Un-mmaping doesn't close the file, so we still need to do that.*/
close(fd);
Code program2 (opens same file as program1 and writes text passed by program1)
size_t i;
int fd;
char *pmap;
pid_t child;
struct stat fileInfo = {0};
const char *text = argv[0];
fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, (mode_t)0600);
if (fd == -1)
{
perror("Error opening file for writing");
exit(EXIT_FAILURE);
}
size_t textsize = strlen(text) + 1; // + \0 null character
if (lseek(fd, textsize-1, SEEK_SET) == -1)
{
close(fd);
perror("Error calling lseek() to 'stretch' the file");
exit(EXIT_FAILURE);
}
if (write(fd, "", 1) == -1)
{
close(fd);
perror("Error writing last byte of the file");
exit(EXIT_FAILURE);
}
pmap = mmap(0, textsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (pmap == MAP_FAILED)
{
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/* Writting users text to file */
for (i = 0; i < textsize; i++)
{
pmap[i] = text[i];
}
// Write it now to disk
if (msync(pmap, textsize, MS_SYNC) == -1)
{
perror("Could not sync the file to disk");
}
/* Don't forget to free the mmapped memory*/
if (munmap(pmap, textsize) == -1)
{
close(fd);
perror("Error un-mmapping the file");
exit(EXIT_FAILURE);
}
/* Un-mmaping doesn't close the file, so we still need to do that.*/
close(fd);
You need to use truncate to extend the file length after creating it before mapping it.
Yes, the function name sounds wrong, but truncate can actually set the file length to any number. Be sure to use a multiple of 4K for best results.
Then, if you want to keep the mapping open to see data between Program 1 and 2, you need to get rid of ANONYMOUS and map with MAP_SHARED in Program 1. A mapping that isn't shared will not show changes made by other programs. Or it might, if it has to reload from disk. It's weird, don't mix SHARED and not-SHARED mappings.
Once you've changed Program 1 to use truncate, take that lseek and write code out of Program 2. The file will already have been created and extended by Program 1.
Here is my code. I'm assuming this has something to do with improper use of pointers or maybe I'm not mapping and unmapping my memory correctly.
Could anyone please provide me with some insight into the issue?
#define _XOPEN_SOURCE 500
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <ftw.h>
#include <sys/stat.h>
#include <string.h>
int size;
int map1, map2;
void *tar, *temp;
int callback(const char *filename,
const struct stat *sb2,
int filetype,
struct FTW *ftw)
{
printf("test");
if(sb2->st_size == sb1->st_size){
temp = mmap(NULL, sb2->st_size, PROT_NONE, 0, map2, 0);
int cmp = memcmp(tar, temp, sb2->st_size);
printf("%d\n", cmp);
if(cmp == 0){
printf("%s\n", filename);
}
if(munmap(temp,sb2->st_size) == -1){
fprintf(stderr, "Error in unmapping in callback function");
exit(EXIT_FAILURE);
}
}
return 0; //continue to walk the tree
}
int main(int argc, char *argv[])
{
//check for correct arguments
if (argc == 1 || argc > 3) {
fprintf(stderr, "Syntax: %s filename dirname\n", argv[0]);
exit(EXIT_FAILURE);
}
//use stat to get size of filename
struct stat sb1;
if(stat(argv[1],&sb1) != 0){
fprintf(stderr, "Error in stat().");
exit(EXIT_FAILURE);
}
size = sb1.st_size;
//fd = mmap filename
tar = mmap(NULL,sb1->st_size, PROT_WRITE, MAP_SHARED, map1, 0);
if(tar == 0){
fprintf(stderr, "Main() mmap failed");
exit(EXIT_FAILURE);
}
//walk through the directory with callback function
nftw(argv[2], callback, 20, 0);
// use munmap to clear fd
if (munmap(tar,sb1->st_size) == -1) {
fprintf(stderr, "Error in unmapping");
exit(EXIT_FAILURE);
}
}
EDIT
I now declare my struct stat sb1 right before I use the stat function. After doing that I receieved a segmentation error again. I then commented out my nftw() call and and printed out the size variable (which has a reasonable number so I believe that's working). The new error is:
Error in unmapping.
You declare:
struct stat *sb1;
You use:
stat(argv[1],sb1);
You crash and burn because sb1 is a null pointer (since the variable is defined at file scope, it is initialized with 0).
You need to declare (at file scope):
struct stat sb1;
And then in main() you can use:
if (stat(argv[1], &sb1) != 0)
...oops...
You'll have to review all uses of sb1 to fix the status change from pointer to object, adding an & where necessary, and changing -> to . where necessary.
mmap() by example
This is a mildly edited version of a function I wrote that uses mmap() to map a file into memory:
/* Map named file into memory and validate that it is a MSG file */
static int msg_mapfile(const char *file)
{
int fd;
void *vp;
struct stat sb;
if (file == 0)
return(MSG_NOMSGFILE);
if ((fd = open(file, O_RDONLY, 0)) < 0)
return(MSG_OPENFAIL);
if (fstat(fd, &sb) != 0)
{
close(fd);
return(MSG_STATFAIL);
}
vp = mmap(0, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (vp == MAP_FAILED)
return(MSG_MMAPFAIL);
The MSG_xxxx constants are distinct error numbers applicable to the program it came from. It was only needing to read the file, hence the PROT_READ; I think you may be OK with that too.
if (argc == 1 || argc > 3) {
fprintf(stderr, "Syntax: %s filename dirname\n", argv[0]);
exit(EXIT_FAILURE);
}
/* ... */
nftw(argv[2], callback, 20, 0);
I see a possibility for argv[2] to be NULL. Perhaps you meant:
if (argc != 3) {
fprintf(stderr, "Syntax: %s filename dirname\n", argv[0]);
exit(EXIT_FAILURE);
}
Which book are you reading?
I have a tailq struct:
struct entry {
int file;
int *map;
int pos;
TAILQ_ENTRY(entry) tailq; /* Tail queue. */
};
And to each entry of the tailq I've a mmaped file, or a wish to:
#define NUMINTS (1000)
#define FILESIZE (NUMINTS * sizeof(u_int64_t))
struct entry *np;
int result;
if((np = malloc(sizeof(struct entry))) == NULL){
errx(1, "malloc");
}
np->file = open(temp, O_WRONLY | O_CREAT, (mode_t)0600);
if (np->file == -1) {
errx(1, "Error opening file for writing");
}
np->map = mmap(0, FILESIZE, PROT_READ | PROT_WRITE, MAP_SHARED, np->file, 0);
if (np->map == MAP_FAILED) {
close(np->file);
errx(1, "Error mmapping the file");
}
TAILQ_INSERT_TAIL(&tailq_head[thread_id], np, tailq);
I'm getting "Error mmapping the file", why?
You're opening the file write-only, and then trying to map read/write. Try opening the file O_RDWR
This question already has answers here:
How do you determine the size of a file in C?
(15 answers)
Closed 3 years ago.
How can I find out the size of a file I opened with an application written in C ?
I would like to know the size, because I want to put the content of the loaded file into a string, which I allocate using malloc(). Just writing malloc(10000*sizeof(char)); is IMHO a bad idea.
You need to seek to the end of the file and then ask for the position:
fseek(fp, 0L, SEEK_END);
sz = ftell(fp);
You can then seek back, e.g.:
fseek(fp, 0L, SEEK_SET);
or (if seeking to go to the beginning)
rewind(fp);
Using standard library:
Assuming that your implementation meaningfully supports SEEK_END:
fseek(f, 0, SEEK_END); // seek to end of file
size = ftell(f); // get current file pointer
fseek(f, 0, SEEK_SET); // seek back to beginning of file
// proceed with allocating memory and reading the file
Linux/POSIX:
You can use stat (if you know the filename), or fstat (if you have the file descriptor).
Here is an example for stat:
#include <sys/stat.h>
struct stat st;
stat(filename, &st);
size = st.st_size;
Win32:
You can use GetFileSize or GetFileSizeEx.
If you have the file descriptor fstat() returns a stat structure which contain the file size.
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
// fd = fileno(f); //if you have a stream (e.g. from fopen), not a file descriptor.
struct stat buf;
fstat(fd, &buf);
off_t size = buf.st_size;
I ended up just making a short and sweet fsize function(note, no error checking)
int fsize(FILE *fp){
int prev=ftell(fp);
fseek(fp, 0L, SEEK_END);
int sz=ftell(fp);
fseek(fp,prev,SEEK_SET); //go back to where we were
return sz;
}
It's kind of silly that the standard C library doesn't have such a function, but I can see why it'd be difficult as not every "file" has a size(for instance /dev/null)
How to use lseek/fseek/stat/fstat to get filesize ?
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
void
fseek_filesize(const char *filename)
{
FILE *fp = NULL;
long off;
fp = fopen(filename, "r");
if (fp == NULL)
{
printf("failed to fopen %s\n", filename);
exit(EXIT_FAILURE);
}
if (fseek(fp, 0, SEEK_END) == -1)
{
printf("failed to fseek %s\n", filename);
exit(EXIT_FAILURE);
}
off = ftell(fp);
if (off == -1)
{
printf("failed to ftell %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] fseek_filesize - file: %s, size: %ld\n", filename, off);
if (fclose(fp) != 0)
{
printf("failed to fclose %s\n", filename);
exit(EXIT_FAILURE);
}
}
void
fstat_filesize(const char *filename)
{
int fd;
struct stat statbuf;
fd = open(filename, O_RDONLY, S_IRUSR | S_IRGRP);
if (fd == -1)
{
printf("failed to open %s\n", filename);
exit(EXIT_FAILURE);
}
if (fstat(fd, &statbuf) == -1)
{
printf("failed to fstat %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] fstat_filesize - file: %s, size: %lld\n", filename, statbuf.st_size);
if (close(fd) == -1)
{
printf("failed to fclose %s\n", filename);
exit(EXIT_FAILURE);
}
}
void
stat_filesize(const char *filename)
{
struct stat statbuf;
if (stat(filename, &statbuf) == -1)
{
printf("failed to stat %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] stat_filesize - file: %s, size: %lld\n", filename, statbuf.st_size);
}
void
seek_filesize(const char *filename)
{
int fd;
off_t off;
if (filename == NULL)
{
printf("invalid filename\n");
exit(EXIT_FAILURE);
}
fd = open(filename, O_RDONLY, S_IRUSR | S_IRGRP);
if (fd == -1)
{
printf("failed to open %s\n", filename);
exit(EXIT_FAILURE);
}
off = lseek(fd, 0, SEEK_END);
if (off == -1)
{
printf("failed to lseek %s\n", filename);
exit(EXIT_FAILURE);
}
printf("[*] seek_filesize - file: %s, size: %lld\n", filename, (long long) off);
if (close(fd) == -1)
{
printf("failed to close %s\n", filename);
exit(EXIT_FAILURE);
}
}
int
main(int argc, const char *argv[])
{
int i;
if (argc < 2)
{
printf("%s <file1> <file2>...\n", argv[0]);
exit(0);
}
for(i = 1; i < argc; i++)
{
seek_filesize(argv[i]);
stat_filesize(argv[i]);
fstat_filesize(argv[i]);
fseek_filesize(argv[i]);
}
return 0;
}
Have you considered not computing the file size and just growing the array if necessary? Here's an example (with error checking ommitted):
#define CHUNK 1024
/* Read the contents of a file into a buffer. Return the size of the file
* and set buf to point to a buffer allocated with malloc that contains
* the file contents.
*/
int read_file(FILE *fp, char **buf)
{
int n, np;
char *b, *b2;
n = CHUNK;
np = n;
b = malloc(sizeof(char)*n);
while ((r = fread(b, sizeof(char), CHUNK, fp)) > 0) {
n += r;
if (np - n < CHUNK) {
np *= 2; // buffer is too small, the next read could overflow!
b2 = malloc(np*sizeof(char));
memcpy(b2, b, n * sizeof(char));
free(b);
b = b2;
}
}
*buf = b;
return n;
}
This has the advantage of working even for streams in which it is impossible to get the file size (like stdin).
If you're on Linux, seriously consider just using the g_file_get_contents function from glib. It handles all the code for loading a file, allocating memory, and handling errors.
#include <stdio.h>
#define MAXNUMBER 1024
int main()
{
int i;
char a[MAXNUMBER];
FILE *fp = popen("du -b /bin/bash", "r");
while((a[i++] = getc(fp))!= 9)
;
a[i] ='\0';
printf(" a is %s\n", a);
pclose(fp);
return 0;
}
HTH