I have a program which is another variant of copy program in linux(Actually I'm on Mac OSX).
In order to support copying large files, I wrote something like this:
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/mman.h>
#define BUFFSIZE 65535
#define PAGESIZE 4096
int main(int argc, char **argv){
char *source, *destination;
int src_fd, dst_fd;
unsigned long long bytes_read;
int bytes = BUFFSIZE;
struct timeval start, end;
int overall_time = 0;
unsigned long long offset = 0;
struct stat statbuf;
if(argc < 3){
printf("copy <source> <destination>\n");
exit(EXIT_FAILURE);
}
source = argv[1];
destination = argv[2];
src_fd = open(source, O_RDONLY, 0777);
if(src_fd < 0){
perror("src_fd");
exit(EXIT_FAILURE);
}
//bytes_read = lseek(src_fd, 0, SEEK_END);
fstat(src_fd, &statbuf);
bytes_read = statbuf.st_size;
dst_fd = open(destination, O_RDWR | O_CREAT, 0777);
if(dst_fd < 0){
perror("dst_fd");
exit(EXIT_FAILURE);
}
lseek(dst_fd, bytes_read -1, SEEK_SET);
write(dst_fd, "", 1);
gettimeofday(&start, NULL);
while(bytes_read > 0){
if(bytes_read < BUFFSIZE){
bytes = bytes_read;
bytes_read = 0;
}
else{
bytes_read -= bytes;
}
void *src_map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, src_fd, (off_t)offset);
if(src_map == (void*) MAP_FAILED){
perror("src_map");
exit(EXIT_FAILURE);
}
void *dst_map = mmap(NULL, bytes, PROT_WRITE, MAP_SHARED, dst_fd, (off_t)offset);
if(dst_map == (void*) MAP_FAILED){
perror("dst_map");
exit(EXIT_FAILURE);
}
memcpy(dst_map, src_map, bytes);
int src_unmp = munmap(src_map, bytes);
if(src_unmp == -1){
perror("src_unmap");
exit(EXIT_FAILURE);
}
int dst_unmp = munmap(dst_map, bytes);
if(dst_unmp == -1){
perror("dst_unmap");
exit(EXIT_FAILURE);
}
offset += 4096;
bytes_read -= bytes;
}
gettimeofday(&end, NULL);
printf("overall = %d\n", (end.tv_usec - start.tv_usec));
close(src_fd);
close(dst_fd);
return 0;
}
The goal is to measure the amount of time elapsed to copy a large file with the use of mmap().
The above code is not working for transferring 1GB file.
Any hint for that?
Thank you
Yes. The problem is in offset value. The offset value should be a multiple of page size.
Related
I'm trying to write a program that uses counting semaphores, a mutex, and two threads. One thread is a producer that writes items to shared memory. Each item has a sequence number, timestamp, checksum, and some data. The consumer thread copies the original checksum from an item then calculates its own checksum from the item's data and compares the two to make sure the data wasn't corrupted.
My program runs, however, it reports incorrect checksums far more than correct checksums. I did some print statements to see what was going on, and it looks like the item's data is changing between writing to shared memory and reading from it. The item's stored checksum is also changing, and I have no idea what is causing this.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <errno.h>
#include <stdint.h>
#include <semaphore.h>
#include <time.h>
#include <pthread.h>
typedef struct{
int seqNo;
uint16_t checksum;
uint32_t timeStamp;
uint8_t data[22];
}Item;
char* shm_name = "buffer";
int shm_fd;
uint8_t *shm_ptr;
pthread_t producers;
pthread_t consumers;
pthread_mutex_t mutex;
sem_t *empty, *full;
int shmSize;
int in = 0;
int out = 0;
//method for initializing shared memory
void CreateSharedMemory(){
shm_fd = shm_open(shm_name, O_CREAT | O_RDWR, 0644);
if (shm_fd == -1) {
fprintf(stderr, "Error unable to create shared memory, '%s, errno = %d (%s)\n", shm_name,
errno, strerror(errno));
return -1;
}
/* configure the size of the shared memory segment */
if (ftruncate(shm_fd, shmSize) == -1) {
fprintf(stderr, "Error configure create shared memory, '%s, errno = %d (%s)\n", shm_name,
errno, strerror(errno));
shm_unlink(shm_name);
return -1;
}
printf("shared memory create success, shm_fd = %d\n", shm_fd);
}
uint16_t checksum(char *addr, uint32_t count)
{
register uint32_t sum = 0;
uint16_t *buf = (uint16_t *) addr;
// Main summing loop
while(count > 1)
{
sum = sum + *(buf)++;
count = count - 2;
}
// Add left-over byte, if any
if (count > 0)
sum = sum + *addr;
// Fold 32-bit sum to 16 bits
while (sum>>16)
sum = (sum & 0xFFFF) + (sum >> 16);
return(~sum);
}
Item CreateItem(){
Item item;
uint16_t cksum;
int j = 0;
time_t seconds;
seconds = time(NULL);
item.seqNo = j;
item.timeStamp = seconds; //FIX this
for(int i = 0; i < 22; ++i){
item.data[i] = rand() % 256;
}
cksum = checksum(&item.data[0], shmSize-2);
item.checksum = cksum;
++j;
return item;
}
void* producer() {
shm_fd = shm_open(shm_name, O_RDWR, 0644);
shm_ptr = (uint8_t *)mmap(0, 32, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
while(1) {
Item tempItem = CreateItem();
tempItem.seqNo = in;
sem_wait(empty);
pthread_mutex_lock(&mutex);
while (((in + 1)%shmSize) == out)
; // waiting
if(in < shmSize) {
//&shm_ptr[counter] = item;
\
memcpy(&shm_ptr[in], &tempItem, 32);
printf("%d\n", tempItem.seqNo);
in = (in + 1) % shmSize;
printf("Producer: %x\n", tempItem.checksum);
}
sleep(1);
pthread_mutex_unlock(&mutex);
sem_post(full);
}
}
void* consumer() {
uint16_t cksum1, cksum2;
shm_fd = shm_open(shm_name, O_RDWR, 0644);
shm_ptr = (uint8_t *)mmap(0, shmSize, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
while(1) {
sem_wait(full);
pthread_mutex_lock(&mutex);
while (in == out)
; // waiting
if(out > 0) {
Item tempItem;
memcpy(&tempItem, &shm_ptr[out], 32);
cksum1 = tempItem.checksum;
cksum2 = checksum(&tempItem.data[0], shmSize-2);
if (cksum1 != cksum2) {
printf("Checksum mismatch: expected %02x, received %02x \n", cksum2, cksum1);
}
else{
printf("removed from shm\n");
}
//printf("Checksums match !!! \n");
out = (out + 1)%shmSize;
}
sleep(1);
pthread_mutex_unlock(&mutex);
sem_post(empty);
}
}
int main(int argc, char **argv){
sem_unlink(&empty);
sem_unlink(&full);
shm_unlink(shm_name);
shmSize = atoi(argv[1]);
out = shmSize;
if(shmSize < 0){
printf("Error: Size of buffer cannot be negative. ");
return -1;
}
pthread_mutex_init(&mutex, NULL);
empty = sem_open("/empty", O_CREAT, 0644, shmSize);
full = sem_open("/full", O_CREAT, 0644, 0);
CreateSharedMemory();
pthread_create(&producers, NULL, producer, NULL);
pthread_create(&consumers, NULL, consumer, NULL);
pthread_exit(NULL);
Trying to use mmap to write to a file. Unfortunately the first write in the loop map[i] = i; will cause a bus error. Not sure why.
The PC runs Ubuntu 14.04 and the file /tmp/mmapped.bin has 12 bytes and the program is invoked with ./a.out 3.
Thanks
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define FILEPATH "/tmp/mmapped.bin"
//#define NUMINTS (1000)
#define FILESIZE 0x400000000
int main(int argc, char *argv[])
{
int i;
int fd;
int *map; /* mmapped array of int's */
int size = atoi(argv[1]);
fd = open(FILEPATH, O_RDWR| O_CREAT | O_TRUNC);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
map = mmap(0, 4 * size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
for (i = 1; i <= size; ++i) {
map[i] = i;
}
if (munmap(map, FILESIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}
In c you need to start at index 0. Because it will simply increment the pointer by the amount i and then dereference it. Your code dereferences the pointer beyond the allowed bound.
It should be,
for (i = 0; i < size; ++i) {
map[i] = i;
}
because it's equivalent to
for (i = 0; i < size; ++i) {
*(map + i) = i;
}
Also, use
map = mmap(0, size * sizeof *map, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
to ensure that enough space is allocated and that *(map + i) will be inside bounds. Don't use magic numbers.
According to the mmap man page a bus error (SIGBUS) happens when you read/write outside the bounds of the file.
The length of the mapping is separate from the length of the file. If your file is newly created its size will be 0, even if you specify a length with mmap. Resize the file with ftruncate after opening it.
I'm trying to share a text file between forked processes on my Ubuntu x86_64: the file will not be absurdly large, since strings will be written only if there is not already another identical string in the file; strings will be hostnames of visited websites, so I'll assume no more than 255 bytes for each hostname.
When it is a process' turn to write in shared object, it is OK; once all the processes wrote in shared object, msync should make the writing effective on the disk, but the mapped.txt file created only contain one string from arrayString, i.e. the string the last process wrote in shared object.
Here's the code:
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <string.h>
// first forked process will write "first" in file, and so on
const char *arrayString[] = {
"first",
"second",
"third"
};
int main(void) {
int index;
int children = 3;
const char *filepath = "mapped.txt";
sem_t *sem;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
char *data;
data = (char *)mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("mmap:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
if (fork() == 0) {
sem_wait(sem);
size_t textsize = strlen(arrayString[index])+1;
if (ftruncate(fd, sizeof(textsize)) == -1) {
perror("ftruncate:");
return EXIT_FAILURE;
}
for (size_t i = 0; i < textsize; i++) {
printf("%d Writing character %c at %zu\n", getpid(), arrayString[index][i], i);
data[i] = arrayString[index][i];
}
printf("%d wrote ", getpid());
for (size_t i = 0; i < textsize; i++) {
printf("%c", data[i]);
}
printf("\n");
if (msync(data, textsize, MS_SYNC) == -1) {
perror("Could not sync the file to disk");
}
sem_post(sem);
_exit(EXIT_SUCCESS);
}
}
close(fd);
return EXIT_SUCCESS;
}
This is one possible output of the code above for three child processes (this is fine):
20373 Writing character s at 0
20373 Writing character e at 1
20373 Writing character c at 2
20373 Writing character o at 3
20373 Writing character n at 4
20373 Writing character d at 5
20373 Writing character at 6
20373 wrote second
20374 Writing character t at 0
20374 Writing character h at 1
20374 Writing character i at 2
20374 Writing character r at 3
20374 Writing character d at 4
20374 Writing character at 5
20374 wrote third
20372 Writing character f at 0
20372 Writing character i at 1
20372 Writing character r at 2
20372 Writing character s at 3
20372 Writing character t at 4
20372 Writing character at 5
20372 wrote first
And here's the content of mapped.txt (this is bad):
first^#^#^#
I expected:
second
third
first
but all I get is only the string of the last process, with those strange symbols. I'd like to keep this file persistent in memory, but because of the I/O slowness, I'm trying to use memory mapping.
Any idea why my file only contains the string written by the last process accessing the shared file?
Edit: I think I get it, it seems to work now: I hope it will be of help to someone. Compiled with g++ -g -o mapthis mapthis.cpp -lrt -pthread. Beware that some error checking are missing, like for fsync, snprintf and lseek.
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#include <semaphore.h>
#include <time.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
const char *arrayString[] = {
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com",
"www.facebook.com",
"www.google.com",
"www.cnn.com",
"www.speechrepository.com",
"www.youtube.com"
};
int main(void) {
int index;
int children = sizeof(arrayString) / sizeof(char*);;
const char *filepath = "mapped.txt";
sem_t *sem;
char *data;
struct stat filestats;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
if (fstat(fd, &filestats) < 0) {
close(fd);
perror("fstat:");
return EXIT_FAILURE;
}
data = (char *)mmap(NULL, filestats.st_size ? filestats.st_size : 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("first map:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
sleep(1);
pid_t pid = fork();
if (pid == 0) {
int nw = 0;
int hostnameSize = 0;
const size_t origsize = filestats.st_size;
char *hostPos = NULL;
char *numPos = NULL;
char *backslashPos = NULL;
char tempBuff[64];
memset((char *)tempBuff, 0, sizeof(tempBuff));
sem_wait(sem);
// remap to current file size if it changed
fstat(fd, &filestats);
// file empty, just insert
if (filestats.st_size == 0) {
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
else {
// file not empty, let's look for string
hostPos = strstr(data, arrayString[index]);
if (hostPos) {
// string is already inserted, search for offset of number of seconds
lseek(fd, hostPos-data, SEEK_SET);
numPos = strchr(hostPos, ' ')+1;
backslashPos = strchr(numPos, '\n');
long unsigned before = atoi(numPos);
long unsigned now = (unsigned long)time(NULL);
long unsigned difference = now - before;
printf("%s visited %ld seconds ago (%ld - %ld)\n",
arrayString[index], difference, now, before);
nw = snprintf(tempBuff, backslashPos-hostPos+1, "%s %010lu", arrayString[index], now);
write(fd, tempBuff, nw);
write(fd, "\n", 1);
fsync(fd);
}
else {
data = (char *)mremap(data, origsize, filestats.st_size, MREMAP_MAYMOVE);
if (data == MAP_FAILED) {
close(fd);
sem_post(sem);
perror("mmap:");
_exit(EXIT_FAILURE);
}
lseek(fd, 0, SEEK_END);
nw = snprintf(tempBuff, sizeof(tempBuff), "%s %010lu\n", arrayString[index], (unsigned long)time(NULL));
write(fd, tempBuff, nw);
fsync(fd);
}
}
munmap(data, filestats.st_size);
close(fd);
sem_post(sem);
_exit(EXIT_SUCCESS);
}
else if (pid > 0) {
wait(NULL);
}
}
munmap(data, filestats.st_size);
close(fd);
return EXIT_SUCCESS;
}
This line is problematic:
if (ftruncate(fd, sizeof(textsize)) == -1) {
textsize is a size_t, and taking its sizeof is just going to get 4 or 8 (on 32 and 64 bit systems). Looks like you're on a 64 bit system, so you're unconditionally truncating the file to 8 bytes in this case before every write. The "strange symbols" are just how your editor displays NUL/zero bytes. Even if you used ftruncate(fd, textsize), you'd still truncate down to just the string you're about to write, overwriting any data other children may have written; I doubt you want to ftruncate at all here.
For continual appends from separate processes (where they can't share information about the size or offset of the data they're adding), memory mapping just doesn't make sense; why aren't you just having each of them take the lock, lseek to end of file, then call write? You could still use memory mappings for the duplicate checking (some of it without locking), it would just be a bit different. Something like this:
int main(void) {
struct stat filestats;
int index;
int children = 3;
const char *filepath = "mapped.txt";
sem_t *sem;
char *data;
sem = sem_open("semaphore", O_CREAT | O_EXCL, 0644, 1);
sem_unlink("semaphore");
int fd;
fd = open(filepath, O_RDWR | O_CREAT, 0644);
if (fd < 0) {
perror("open:");
return EXIT_FAILURE;
}
// Mostly just to ensure it's mappable, we map the current size of the file
// If the file might already have values, and many child workers won't add
// to it, this might save some mapping work in the children; you could
// just map in the children when needed though
if (fstat(fd, &filestats) != 0) {
close(fd);
perror("fstat:");
return EXIT_FAILURE;
}
data = mmap(NULL, filestats.st_size, PROT_READ, MAP_SHARED, fd, 0);
if (data == MAP_FAILED) {
close(fd);
perror("mmap:");
return EXIT_FAILURE;
}
for (index=0; index<children; index++) {
if (fork() == 0) {
const size_t origsize = filestats.st_size;
sem_wait(sem);
// remap to current file size if it changed
// If you're not on Linux, you'd just have to mmap from scratch
// since mremap isn't standard
fstat(fd, &filestats);
if (origsize != filestats.st_size) {
data = mremap(data, origsize, filestats.st_size, MREMAP_MAYMOVE);
if (data == MAP_FAILED) {
close(fd);
sem_post(sem);
perror("mmap:");
_exit(EXIT_FAILURE);
}
}
// Not safe to use strstr since mapping might not end with NUL byte
// You'd need to workaround this, or implement a your own memstr-like function
if (!memstr(data, arrayString[index])) {
// Move fd to end of file, so we append new data
lseek(fd, 0, SEEK_END);
write(fd, arrayString[index], strlen(arrayString[index]));
write(fd, "\n", 1);
fsync(fd);
}
munmap(data, filestats.st_size);
close(fd);
sem_post(sem);
_exit(EXIT_SUCCESS);
}
}
munmap(data, filestats.st_size);
close(fd);
return EXIT_SUCCESS;
}
That memstr I referenced would need to be hand-implemented (or you'd need to do terrible things like ensure the file always had a NUL byte at the end so you could use strstr on it); you can get some tips on that here.
You're writing all the strings at offset 0 of the file, each over the top of the previous. The core of your loop should be something like
struct stat status;
fstat(fd, &status);
size_t cursize = status.st_size;
ftruncate(fd, cursize + textsize);
for (size_t i = 0; i < textsize; i++) {
data[cursize + i] = arrayString[index][i];
}
I am trying to create a shared memory area using examples and documentation I found online. My goal is IPC , so I can make different processes talk to each other.
This my C file
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
int main (int argc, char *argv[])
{
struct stat sb;
off_t len;
char *p;
int fd;
fd = shm_open("test", O_RDWR | O_CREAT); //,S_IRUSR | S_IWUSR);
if (fd == -1) {
perror("open");
return 1;
}
if (fstat(fd, &sb)==-1){
perror("fstat");
return 1;
}
/*if (!S_ISREG(sb.st_mode)){
fprintf(stderr, "%s is not a file\n",fileName);
return 1;
}*/
p = mmap(0, sb.st_size, PROT_WRITE, MAP_SHARED, fd, 0);
if (p == MAP_FAILED){
perror("mmap");
return 1;
}
if (close(fd)==-1) {
perror("close");
return 1;
}
for (len = 0; len < sb.st_size; len++) {
putchar(p[len]);
}
if (munmap(p, sb.st_size) == -1) {
perror("munmao");
return 1;
}
fprintf(stderr,"\n");
return 0;
}
The problem is that I am getting a mmap: Invalid argument. I assume something is wrong with fd but have no clue how to fix it, any help would be appreciated. I am on Yosemite using latest XCODE .
You need to extend the size of the shared memory mapping, at least the first time when you create it. Right now its size is 0, and mmap is not going to allow you to make a zero length mapping.
So instead of your fstat() call, do e.g.:
size_t len = 4096;
if (ftruncate(fd, len) == -1) {
perror("ftruncate");
return 1;
}
And pass this len to mmap().
Your addr parameter is set to 0, which might be reserved. Did you mean to use NULL? This would be different than 0.
code:
#include <fcntl.h>
#include <linux/fs.h>
#include <stdio.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/time.h>
void write_zero(char * file, unsigned long bytes)
{
printf("Zeroing %s\n", file);
unsigned int wrote = 0, total = 0;
int fd, i, buf;
char obj = 0x00;
fd = open(file, O_RDWR, DEFFILEMODE);
lseek(fd, 0, SEEK_SET);
write(fd, &obj, bytes);
}
int main(int argc, char * * argv)
{
int fd;
unsigned long blocks = 0;
char check = 0x0;
fd = open(argv[1], O_RDONLY);
ioctl(fd, BLKGETSIZE, &blocks);
close(fd);
printf("Blocks: %lu\tBytes: %lu\tGB: %.2f\n",
blocks, blocks * 512, (double)blocks * 512.0 / (1024 * 1024 * 1024));
do
{
printf("Write 0x0 to %s? [y/N] ", argv[1]);
fflush(stdout);
}
while (scanf("%c", &check) < 1);
if (check == 'y')
{
write_zero(argv[1], blocks * 512);
}
}
I get nothing actually written to the device.. I copied my open line from the 'dd' source code, thinking maybe it was not opened right. dd can zero the device, but this program does not. Any ideas?
It seems like this has been beaten to death but
char obj = 0x00;
fd = open(file, O_RDWR, DEFFILEMODE);
lseek(fd, 0, SEEK_SET);
write(fd, &obj, bytes);
Is not going to write zeros. It's going to write garbage from the stack.