I have a program that reads from a Random Access File and is to return the smallest and largest number in the file. One requirement is that this is done with 4 processes using fork() and piping the results. I divide the file up into 4 chunks and have each process evaluate a chunk of the file. I find the max and min of each chunk and write them to a pipe. At the end I will compare the piped values and find the largest and smallest of the values.
I am having trouble reading from the pipes as they are returning -1. Any insight on what I am doing wrong? Thanks!
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
int findMin(int start, int end, const char * filename);
int findMax(int start, int end, const char * filename);
//Calculates minimum and maximum of a number
int main(int argc, char * argv[])
{
const char * filename; // name of file to read
FILE * ft; // file handle for the file
int pid, // process id of this process
num, // the number of integer values in the file
i, // loop control variable for reading values
temp=0; // used to store each value read from the file
long size; // size in bytes of the input file
/*********************************************************************/
filename = argv[1]; // read the file named on the command line
ft= fopen(filename, "rb");
if (ft)
{
pid = getpid();
fseek (ft,0,SEEK_END); //go to end of file
size = ftell(ft); //what byte in file am I at?
fseek (ft,0,SEEK_SET); //go to beginning of file
num = (int)size / (int)sizeof(int); // number of integer values
printf("file size: %li bytes\n", size);
printf("sizeof(int) = %i bytes\n",(int) sizeof(int));
printf("how many integers = %i\n\n", num);
fclose(ft);
}
//Split file size into quarters to make 4 processes
int increment = num/4;
int num1 = increment;
int num2 = num1 + increment;
int num3 = num2 + increment;
int num4 = num;
int status;
int pid1 = -1;
int pid2 = -1;
//Pipes
int fdmin1[2];
int fdmax1[2];
int fdmin2[2];
int fdmax2[2];
int fdmin3[2];
int fdmax3[2];
int fdmin4[2];
int fdmax4[2];
//initializing pipes
if(pipe(fdmin1) == -1)
{
perror("Piping fd1 failed");
return 0;
}
if(pipe(fdmax1) == -1)
{
perror("Piping fd2 failed");
return 0;
}
if(pipe(fdmin2) == -1)
{
perror("Piping fd3 failed");
return 0;
}
if(pipe(fdmax2) == -1)
{
perror("Piping fd4 failed");
return 0;
}
if(pipe(fdmin3) == -1)
{
perror("Piping fd3 failed");
return 0;
}
if(pipe(fdmax3) == -1)
{
perror("Piping fd4 failed");
return 0;
}
if(pipe(fdmin4) == -1)
{
perror("Piping fd3 failed");
return 0;
}
if(pipe(fdmax4) == -1)
{
perror("Piping fd4 failed");
return 0;
}
//temp variables for pipes
int temp1;
int temp2;
int temp3;
int temp4;
int temp5;
int temp6;
int temp7;
int temp8;
pid1 = fork();
printf("pid1: %d \n", pid1);
if(pid1 > 0)
{
//Process 1
temp1 = findMin(0, num1, filename);
temp2 = findMax(0, num1, filename);
close(fdmin1[0]);
if(write(fdmin1[1], &temp1, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmin1[1]);
close(fdmax1[0]);
if(write(fdmax1[1], &temp2, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmax1[1]);
}
else if(pid1 == 0)
{
//Process 2
temp3 = findMin(num1, num2, filename);
temp4 = findMax(num1, num2, filename);
close(fdmin2[0]);
if(write(fdmin2[1], &temp3, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmin2[1]);
close(fdmax2[0]);
if(write(fdmax2[1], &temp4, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmax2[1]);
pid2 = fork();
printf("pid2: %d \n", pid2);
if(pid2 > 0)
{
//Process 3
temp5 = findMin(num2, num3, filename);
temp6 = findMax(num2, num3, filename);
close(fdmin3[0]);
if(write(fdmin3[1], &temp5, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmin3[1]);
close(fdmax3[0]);
if(write(fdmax3[1], &temp6, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmax3[1]);
}
else if(pid2 == 0)
{
//Process 4
temp7 = findMin(num3, num4, filename);
temp8 = findMax(num3, num4, filename);
close(fdmin4[0]);
if(write(fdmin4[1], &temp7, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmin4[1]);
close(fdmax4[0]);
if(write(fdmax4[1], &temp8, sizeof(int)) == -1)
{
printf("Error writting to pipe");
}
close(fdmax4[1]);
}
}
//Close all pipe ends in all processes
close(fdmin1[0]);
close(fdmin1[1]);
close(fdmin2[0]);
close(fdmin2[1]);
close(fdmin3[0]);
close(fdmin3[1]);
close(fdmin4[0]);
close(fdmin4[1]);
close(fdmax1[0]);
close(fdmax1[1]);
close(fdmax2[0]);
close(fdmax2[1]);
close(fdmax3[0]);
close(fdmax3[1]);
close(fdmax4[0]);
close(fdmax4[1]);
//Wait for all processes to finish
int returnStatus;
waitpid(pid1, &returnStatus, 0);
int returnStatus2;
waitpid(pid2, &returnStatus2, 0);
//Make sure we are in parant process
if(pid1 > 0)
{
//Variables to compare min and max returned from processses
int min1;
int max1;
int min2;
int max2;
int min3;
int max3;
int min4;
int max4;
//read from pipe (error is occuring here)
close(fdmin1[1]);
if(read(fdmin1[0], &min1, sizeof(int)) == -1)
{
printf("Error reading");
}
close(fdmin1[0]);
printf("min1: %d \n", min1);
}
return 0;
}
//function to find the minimum in the file
int findMin(int start, int end, const char * filename)
{
int temp;
int smallestNum;
int i;
int length = end - start;
FILE * ft2;
ft2= fopen(filename, "rb");
fseek (ft2,start,SEEK_SET);
fread(&smallestNum,sizeof(int),1,ft2);
for(i = 0; i < length; i++)
{
fread(&temp,sizeof(int),1,ft2);
//printf("%d \n", temp);
if(temp < smallestNum)
{
smallestNum = temp;
}
/*
printf("%5i: %7i ",pid,temp);
if ((i+1)%5 == 0)
printf("\n");
*/
}
fclose(ft2);
printf("SmallestNum: %d \n", smallestNum);
return smallestNum;
}
//function to find maximum in file
int findMax(int start, int end, const char * filename)
{
int temp;
int largestNum;
int i;
int length = end - start;
FILE * ft3;
ft3= fopen(filename, "rb");
fseek (ft3,start,SEEK_SET);
fread(&largestNum,sizeof(int),1,ft3);
for(i = 0; i < length; i++)
{
fread(&temp,sizeof(int),1,ft3);
//printf("%d \n", temp);
if(temp > largestNum)
{
largestNum = temp;
}
/*
printf("%5i: %7i ",pid,temp);
if ((i+1)%5 == 0)
printf("\n");
*/
}
fclose(ft3);
printf("Largest Num: %d \n", largestNum);
return largestNum;
}
Here is the code for generating the Random Access File
/*
* This file generates a binary output file containing integers. It
* requires the output filename as a parameter and will take an
* argument indicating the number of values to generate as input.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define BIAS 0 // a bias value added to the numbers to "bias" the file
// contents to provide an offset to the min and max
int main(int argc, char * argv[]) {
const char * filename; // name of the output file
FILE * ft; // file handle for output file
int numtogen = 1000000; // default is to generate 1,000,000 numbers
int randomnum, i; // variables used in the loop generating numbers
if (argc<2) { // not enough arguments, need output file name
printf("Usage: gendata <filename> [number of numbers]\n");
return 1;
}
if (argc == 3) // optional third argument for number of numbers
numtogen = atoi(argv[2]);
filename=argv[1]; // use the filename entered to store numbers
srand(time(NULL)); // seed the random number generator
ft= fopen(filename, "wb") ;
if (ft) {
for (i = 0; i < numtogen; i++){
randomnum = rand() % numtogen + BIAS;
fwrite(&randomnum,sizeof(int),1,ft);
}
fclose(ft);
}
return 0;
}
I am having trouble reading from the pipes as they are returning -1. Any insight on what I am doing wrong? Thanks!
this is because in the main process you close two times the pipe, doing
printf("pid1: %d \n", pid1);
if(pid1 > 0)
{
...
close(fdmin1[0]); <<< HERE
and
//Close all pipe ends in all processes
close(fdmin1[0]); <<< HERE
so it is closed when you do :
if(read(fdmin1[0], &min1, sizeof(int)) == -1)
do not close fdmin1[0] before to read in but the reverse.
Note you also close two times fdmin1[1] and fdmax1[0] and fdmax1[1].
The usage of the pipes is very strange and probably no what you want :
fdmin1 is a pipe between the main process and itself, the main process does if(write(fdmin1[1], &temp1, sizeof(int)) == -1)and later if(read(fdmin1[0], &min1, sizeof(int)) == -1) so that pipe is useless and min1 is temp1
the main process does if(write(fdmax1[1], &temp2, sizeof(int)) == -1) but nobody read that value, that pipe is useless and temp2 = findMax(0, num1, filename); is done for nothing.
the main process child does if(write(fdmin2[1], &temp3, sizeof(int)) == -1) and if(write(fdmax2[1], &temp4, sizeof(int)) == -1) and if(write(fdmin3[1], &temp5, sizeof(int)) == -1) and if(write(fdmax3[1], &temp6, sizeof(int)) == -1) but nobody read, these four pipes are useless and all the min/max computing are done for nothing.
it is the same for the third created process doing if(write(fdmin4[1], &temp7, sizeof(int)) == -1) and if(write(fdmax4[1], &temp8, sizeof(int)) == -1) but nobody read, these two pipes are useless and the min/max computing are done for nothing.
That means at the end you do not get the right min/max value in the main process, but only the min value of the first quarter computing by the main process and all other computing are lost.
The code
//Wait for all processes to finish
int returnStatus;
waitpid(pid1, &returnStatus, 0);
int returnStatus2;
waitpid(pid2, &returnStatus2, 0);
is executed by all the child processes, because you do not exit or return when you have to do.
You also have an undefined behavior because you have a race condition between your processes, the execution is not the same depending on where I had usleep in your code. A parent process must wait for the end of its child process when needed, you do not at the right moment. Note your process numbering is wrong, there are only the main process and two children, so 3 processes rather than 4, //process4 does not exist and that comment is in process 2.
Except in the main process you do not read from the right position in the file because for findMin and findMax the parameter start correspond to a rank of int rather than a position in the file, you must replace
fseek (ft2,start,SEEK_SET);
fseek (ft3,start,SEEK_SET);
by
fseek (ft2,start*sizeof(int),SEEK_SET);
fseek (ft3,start*sizeof(int),SEEK_SET);
You also (try to) read one int too many doing
int length = end - start;
...
fread(&smallestNum,sizeof(int),1,ft2);
for(i = 0; i < length; i++)
{
fread(&temp,sizeof(int),1,ft2);
for instance replace the loop to have
for(i = 1; i < length; i++)
There also are a lot of useless variables in your program, if I compile with option -Wall :
bruno#bruno-XPS-8300:/tmp$ gcc -Wall -g p.c -o p
p.c: In function ‘main’:
p.c:250:16: warning: unused variable ‘max4’ [-Wunused-variable]
int max4;
^
p.c:249:16: warning: unused variable ‘min4’ [-Wunused-variable]
int min4;
^
p.c:248:16: warning: unused variable ‘max3’ [-Wunused-variable]
int max3;
^
p.c:247:16: warning: unused variable ‘min3’ [-Wunused-variable]
int min3;
^
p.c:246:16: warning: unused variable ‘max2’ [-Wunused-variable]
int max2;
^
p.c:245:16: warning: unused variable ‘min2’ [-Wunused-variable]
int min2;
^
p.c:244:16: warning: unused variable ‘max1’ [-Wunused-variable]
int max1;
^
p.c:48:12: warning: unused variable ‘status’ [-Wunused-variable]
int status;
^
p.c:20:8: warning: unused variable ‘temp’ [-Wunused-variable]
temp=0; // used to store each value read from the file
^
p.c:19:8: warning: unused variable ‘i’ [-Wunused-variable]
i, // loop control variable for reading values
^
p.c:17:8: warning: variable ‘pid’ set but not used [-Wunused-but-set-variable]
int pid, // process id of this process
^
bruno#bruno-XPS-8300:/tmp$
Out of that
You must check the value of argc before to do filename = argv[1];.
If fopen(filename, "rb"); fails you must stop the execution, currently you continue with an undefined behavior.
Note also your program can be simplified using array of pipe rather than separated variables for them, allowing you to use a loop rather than the sequence of if(pipe(fdmin1) == -1) ... if(pipe(fdmax4) == -1) .... It is the same to start the child processes, rather than to duplicate the code use a function to write it only one time. Doing that you can have a definition allowing any number of child process rather than dedicated to 4 only.
Going back to the statement
I divide the file up into 4 chunks and have each process evaluate a chunk of the file
This is an extreme case but you have to manage the case the file is too small to be divided by 4, this is not the case in your proposal.
this is done with 4 processes
Considering the main process is count among the 4, 3 children must be created. But rather than to have each child creating an other one if needed, it is more simple to have the 3 children created by the main process and the parallelism is a little better.
A program must be simple, I already said you have a lot of variables for nothing and lot of code is duplicated, also :
It is useless to have so many pipes, only one is enough to allow each child to send the min/max it computed because the pipe reads and writes are guaranteed to be atomic up to PIPE_BUF (larger than the size of 2 int)
It is useless to read the file so many times, you can search for the min and the max at the same time.
And finally a proposal :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#define N 4 /* including the main process */
/* to send/receive result atomicaly through the pipe */
typedef struct {
int min, max;
} MinMax;
void findMinMax(long offset, long n, FILE * fp, MinMax * minmax);
//Calculates minimum and maximum of a number
int main(int argc, char * argv[])
{
const char * filename; // name of file to read
FILE * fp; // file handle for the file
long num; // the number of integer values in the file
long size; // size in bytes of the input file
long offset; // offset in file
int pp[2]; // the unique pipe
int pids[N-1];
MinMax minmax;
int i;
if (argc != 2) {
fprintf(stderr, "Usage: %s <filename>\n", *argv);
exit(-1);
}
filename = argv[1];
fp = fopen(filename, "rb");
if (fp == NULL) {
perror("cannot open file");
exit(-1);
}
/* get file size */
if (fseek(fp, 0, SEEK_END) == -1) { //go to end of file
perror("cannot fseek");
fclose(fp); /* also done automaticaly when exiting program */
exit(-1);
}
size = ftell(fp); //what byte in file am I at?
num = size / sizeof(int); // number of integer values
printf("file size: %li bytes\n", size);
printf("how many integers = %li\n\n", num);
if (num < N) {
fprintf(stderr, "the input file is too small, it must contains at least %i int\n", N);
fclose(fp); /* also done automaticaly when exiting program */
exit(-1);
}
//initializing pipe
if(pipe(pp) == -1) {
perror("Piping failed");
exit(-1);
}
offset = 0;
for (i = 0; i != N-1; ++i) {
pids[i] = fork();
switch (pids[i]) {
case 0:
/* child */
{
FILE * fp2 = fopen(filename, "rb");
if (fp2 == NULL) {
perror("child cannot open file");
exit(-1);
}
findMinMax(offset, num/N, fp2, &minmax);
printf("min max child %d : %d %d\n", i, minmax.min, minmax.max);
if (write(pp[1], &minmax, sizeof(minmax)) != sizeof(minmax)) {
perror("Error writting to pipe");
exit(-1);
}
}
exit(0);
case -1:
/* parent */
perror("Cannot fork");
exit(-1);
default:
/* parent, no error */
offset += (num/N)*sizeof(int);
}
}
findMinMax(offset, (size - offset)/sizeof(int), fp, &minmax);
printf("min max main : %d %d\n", minmax.min, minmax.max);
for (i = 0; i != N-1; ++i) {
int status;
MinMax mm;
if ((waitpid(pids[i], &status, 0) != -1) &&
(status == 0) &&
(read(pp[0], &mm, sizeof(mm)) == sizeof(mm))) {
if (mm.min < minmax.min)
minmax.min = mm.min;
if (mm.max > minmax.max)
minmax.max = mm.max;
}
else
fprintf(stderr, "cannot get result for child %d\n", i);
}
printf("global min max : %d %d\n", minmax.min, minmax.max);
return 0;
}
// function to find the minimum and maximum in the file
// n > 1
void findMinMax(long offset, long n, FILE * fp, MinMax * minmax)
{
int v;
if (fseek(fp, offset, SEEK_SET) == -1) {
perror("cannot fseek");
exit(-1);
}
if (fread(&minmax->min, sizeof(minmax->min), 1, fp) != 1) {
fclose(fp); /* also done automaticaly when exiting program */
perror("cannot read int");
exit(-1);
}
minmax->max = minmax->min;
while (--n) {
if (fread(&v, sizeof(v), 1, fp) != 1) {
fclose(fp); /* also done automaticaly when exiting program */
perror("cannot read int");
exit(-1);
}
if (v < minmax->min)
minmax->min = v;
if (v > minmax->max)
minmax->max = v;
}
fclose(fp); /* also done automaticaly when exiting program */
}
As you can see the code is much simple and I just have to modify #define N 4 to an other value to change the number of processes working in parallel.
Using your second program to generate 1000000 int in aze, compilation and execution of my proposal :
bruno#bruno-XPS-8300:/tmp$ gcc -g -Wall p.c
bruno#bruno-XPS-8300:/tmp$ ./a.out aze
file size: 4000000 bytes
how many integers = 1000000
min max main : 2 999995
min max child 0 : 10 999994
min max child 2 : 0 999998
min max child 1 : 3 999999
global min max : 0 999999
bruno#bruno-XPS-8300:/tmp$
Related
I managed to compile ncat. I am using -k option to keep server open. Instead of accepting data to STDOUT, my goal is to write to files instead. So far I was able to write to a file instead of STDOUT but my goal is to loop through new files on each new connection. Right now it is appending to the same filename_0 and f++ is not incrementing. Here is what I have so far. The original code will be below. The difference is in the else clause, basically if n is actually greater than 0. On each loop, n is 512 bytes until the last chunk. I just want to be able to have new files from each new connection. filename_0, filename_1, filename_3, etc.
MODIFIED CODE:
/* Read from a client socket and write to stdout. Return the number of bytes
read from the socket, or -1 on error. */
int read_socket(int recv_fd)
{
char buf[DEFAULT_TCP_BUF_LEN];
struct fdinfo *fdn;
int nbytes, pending;
int f = 0;
fdn = get_fdinfo(&client_fdlist, recv_fd);
ncat_assert(fdn != NULL);
nbytes = 0;
do {
int n, s;
n = ncat_recv(fdn, buf, 512, &pending);
if (n <= 0) {
if (o.debug)
logdebug("Closing fd %d.\n", recv_fd);
#ifdef HAVE_OPENSSL
if (o.ssl && fdn->ssl) {
if (nbytes == 0)
SSL_shutdown(fdn->ssl);
SSL_free(fdn->ssl);
}
#endif
close(recv_fd);
checked_fd_clr(recv_fd, &master_readfds);
rm_fd(&client_fdlist, recv_fd);
checked_fd_clr(recv_fd, &master_broadcastfds);
rm_fd(&broadcast_fdlist, recv_fd);
conn_inc--;
if (get_conn_count() == 0)
checked_fd_clr(STDIN_FILENO, &master_readfds);
return n;
}
else {
char filename[20];
snprintf(filename, sizeof(char) * 20, "filename_%i", f);
FILE *fp = fopen(filename, "a");
if (fp == NULL)
{
printf("Could not open file");
return 0;
}
//Write(STDOUT_FILENO, buf, n);
s = fwrite(buf, 1, n, fp);
fclose(fp);
f++;
nbytes += n;
}
} while (pending);
return nbytes;
}
ORIGINAL CODE:
int read_socket(int recv_fd)
{
char buf[DEFAULT_TCP_BUF_LEN];
struct fdinfo *fdn;
int nbytes, pending;
fdn = get_fdinfo(&client_fdlist, recv_fd);
ncat_assert(fdn != NULL);
nbytes = 0;
do {
int n;
n = ncat_recv(fdn, buf, sizeof(buf), &pending);
if (n <= 0) {
if (o.debug)
logdebug("Closing fd %d.\n", recv_fd);
#ifdef HAVE_OPENSSL
if (o.ssl && fdn->ssl) {
if (nbytes == 0)
SSL_shutdown(fdn->ssl);
SSL_free(fdn->ssl);
}
#endif
close(recv_fd);
checked_fd_clr(recv_fd, &master_readfds);
rm_fd(&client_fdlist, recv_fd);
checked_fd_clr(recv_fd, &master_broadcastfds);
rm_fd(&broadcast_fdlist, recv_fd);
conn_inc--;
if (get_conn_count() == 0)
checked_fd_clr(STDIN_FILENO, &master_readfds);
return n;
}
else {
Write(STDOUT_FILENO, buf, n);
nbytes += n;
}
} while (pending);
return nbytes;
}
I was able to figure out using the other functions involved. i passed a pointer into this function to write to it. the handler is a function i added the open() file pointer to.
I have a large file (around 1,000,000 characters) in the format "AATACGTAGCTA" and a subsequent file, such as "CGTATC" (10,240 characters). I want to find the largest match of the subsequence within the main sequence. A full, 100% subsequence match may not exist, this is not guaranteed. For the sake of a smaller example, the above would output: Longest match is 4/6 starting at position 5.
I'm working on my C basics, and would like to implement it like so:
The user chooses how many processes they would like to split the work
into.
Each process does 1/nth of the work and updates the shared memory
values located in the struct.
The longest match (it may not be all characters) is reflected in the
struct, as well as it's starting position, and how many
characters were matched. See output below.
Code
#define _GNU_SOURCE
#include <limits.h>
#include <stdio.h>
#include <errno.h>
#include <semaphore.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/shm.h>
typedef struct memoryNeeded {
int start_pos, total_correct;
char sequence[1038336];
char subsequence[10240];
sem_t *sem;
} memoryNeeded;
// Used to check all arguments for validity
int checkArguments(char* p, int argc) {
char *prcs;
errno = 0;
int num;
long conv = strtol(p, &prcs, 10);
if (errno!= 0 || *prcs != '\0' || conv > INT_MAX || conv > 50) {
puts("Please input a valid integer for number of processes. (1-50)");
exit(1);
} else {
num = conv;
if (argc != 4) {
puts("\nPlease input the correct amount of command line arguments (4) in"
"the format: \n./DNA (processes) (sequence) (subsequence)\n");
exit(1);
} else
printf("Looking for string using %d processes...\n", num);
return(num);
}
}
int main (int argc, char* argv[]) {
int processes = checkArguments(argv[1], argc);
key_t shmkey;
int procNumber, shmid, pid;
FILE *sequence;
FILE *subsequence;
char *buf1, *buf2;
// Create shared memory
size_t region_size = sizeof(memoryNeeded);
shmkey = ftok("ckozeny", 5);
shmid = shmget(shmkey, region_size, 0644 | IPC_CREAT);
if (shmid < 0) {
perror("shmget\n");
exit(1);
}
// Create structure in shared memory, attach memory and open semaphore
memoryNeeded *mn;
mn = (memoryNeeded *)shmat(shmid, NULL, 0);
mn->sem = sem_open("sem", O_CREAT | O_EXCL, 0644, 1);
sequence = fopen(argv[2], "r");
subsequence = fopen(argv[3], "r");
// Get file sizes
fseek(sequence, 0L, SEEK_END);
int sz1 = ftell(sequence);
rewind(sequence);
fseek(subsequence, 0L, SEEK_END);
int sz2 = ftell(subsequence);
rewind(subsequence);
// Read files into 2 buffers, which are put into struct mn
buf1 = malloc(sz1);
buf2 = malloc(sz2);
if (sz1 != fread(buf1, 1, sz1, sequence)) {
free(buf1);
}
if (sz2 != fread(buf2, 1, sz2, subsequence)) {
free(buf2);
}
// Initialize struct with necessary values
mn->start_pos = 0;
mn->total_correct = 0;
strncpy(mn->sequence, buf1, sz1);
strncpy(mn->subsequence, buf2, sz2);
fclose(sequence);
fclose(subsequence);
// Begin n forks
for (procNumber = 0; procNumber < processes; procNumber++) {
pid = fork();
if (pid < 0) {
sem_unlink("sem");
sem_close(mn->sem);
printf ("Fork error.\n");
} else if (pid == 0)
break;
}
if (pid != 0) {
while ((pid = waitpid (-1, NULL, 0))){
if (errno == ECHILD)
break;
}
printf("Best match is at position %d with %d/10240 correct.", mn->start_pos, mn->total_correct);
printf ("\nParent: All children have exited.\n");
sem_unlink("sem");
sem_close(mn->sem);
shmdt(mn);
shmctl(shmid, IPC_RMID, 0);
exit(0);
} else {
// this child process will do its 1/nth of the work
sem_wait(mn->sem);
printf ("Child(%d) is in critical section.\n", procNumber);
sleep(1);
int i = 0;
int longest, count = 0;
for (i = 0; i < sz1; i += processes) {
for (int j = 0; j < sz2; j += processes) {
count = 0;
while (mn->sequence[i+j] == mn->subsequence[j]) {
count++;
j++;
}
if (count > longest) {
longest = count;
}
}
}
// If local match is longer than that of the struct, update and unlock
if (longest > mn->total_correct) {
mn->total_correct = count;
mn->start_pos = (i - count);
sem_post(mn->sem);
} else
// If not - unlock and let next process go
sem_post(mn->sem);
exit(0);
}
return 1;
}
The current child code is more or less "pseudocode". I've put it together how it makes sense in my head. (I'm aware this may not be correct or function as intended.) My question is in regard to the child code algorithm near the bottom.
How do I implement this so each child does 1/nth of the work, and finds the longest match, even though it may not match 100%?
Final output would be:
./DNA 6 sequence1 subsequence1
Looking for string using 6 processes...
Best match is at position 123456 with 9876/10240 correct.
Thanks.
I need to read two 1MB+ binary files byte by byte, compare them - If they're not equal, print out the next 16 bytes starting at the unequal byte. The requirement is that it all runs in just 5msecs. Currently, my program is taking 19msecs if the unequal bit is at the end of the two files. Are there any suggestions as to how I can optimize it?
#include <stdio.h> //printf
#include <unistd.h> //file open
#include <fcntl.h> //file read
#include <stdlib.h> //exit()
#include <time.h> //clock
#define SIZE 4096
void compare_binary(int fd1, int fd2)
{
int cmpflag = 0;
int errorbytes = 1;
char c1[SIZE], c2[SIZE];
int numberofbytesread = 1;
while(read(fd1, &c1, SIZE) == SIZE && read(fd2, &c2, SIZE) == SIZE && errorbytes < 17){
for (int i=0 ; i < SIZE ; i++) {
if (c1[i] != c2[i] && cmpflag == 0){
printf("Bytes not matching at offset %d\n",numberofbytesread);
cmpflag = 1;
}
if (cmpflag == 1){
printf("Byte Output %d: 0x%02x 0x%02x\n", errorbytes, c1[i], c2[i]);
errorbytes++;
}
if (errorbytes > 16){
break;
}
numberofbytesread++;
}
}
}
int main(int argc, char *argv[])
{
int fd[2];
if (argc < 3){
printf("Check the number of arguments passed.\n");
printf("Usage: ./compare_binary <binaryfile1> <binaryfile2>\n");
exit(0);
}
if (!((access(argv[1], F_OK) == 0) && (access(argv[2], F_OK) == 0))){
printf("Please check if the files passed in the argument exist.\n");
exit(0);
}
fd[0] = open(argv[1], O_RDONLY);
fd[1] = open(argv[2], O_RDONLY);
if (fd[0]< 0 && fd[1] < 0){
printf("Can't open file.\n");
exit(0);
}
clock_t t;
t = clock();
compare_binary(fd[0], fd[1]);
t = clock() - t;
double time_taken = ((double)t)/(CLOCKS_PER_SEC/1000);
printf("compare_binary took %f milliseconds to execute \n", time_taken);
}
Basically need the optimized way to read binary files over 1MB such that they can be done under 5msecs.
First, try reading larger blocks. There's no point in performing so many read calls when you can read everything at once. Using 2 MB of memory is not a deal nowadays. Disk I/O calls are inherently expensive, their overhead is significant too, but can be reduced.
Second, try comparing integers (or even 64-bit longs) instead of bytes in each iteration, that reduces the number of loops you need to do significantly. Once you find a missmatch, you can still switch to the byte-per-byte implementation. (of course, some extra trickery is required if the file length is not a multiple of 4 or 8).
first thing caught my eye is this
if (cmpflag == 1){
printf("Byte Output %d: 0x%02x 0x%02x\n", errorbytes, c1[i], c2[i]);
errorbytes++;
}
if (errorbytes > 16){
break;
}
yourcmpflag checking is useless maybe this thing do a little optimaztion
if (c1[i] != c2[i] && cmpflag == 0){
printf("Bytes not matching at offset %d\n",numberofbytesread);
printf("Byte Output %d: 0x%02x 0x%02x\n", errorbytes, c1[i], c2[i]);
errorbytes++;
if (errorbytes > 16){
break;
}
}
you can do array compare built in function, or increase your buffer too
So i'm currently trying to code in unix using shared memory and the fork() function, I have an array of 10 structs and I would like to put that array into shared memory so that it can be accessed by a client program. I was hoping someone could point me in the right direction on how to do this.
the code I currently have is:
// Compiler Directives
// Standard Library Inclusions
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <unistd.h>
#include <time.h>
//Other Inclusions
struct strProcess
{
int nPriority;
int nPid;
};
// Function Prototypes (if not included within a header file)
int frand (int nInput);
int finval (int nInput);
void fsortasc(struct strProcess pArray[],int nInput);
// Main
int main(void)
{
// Variable Declarations
int nShmid,i,arraySize,nRpriority,j, nInput;
key_t nKey;
char *ptrshm, *ptrs;
int nSize;
pid_t pid;
struct strProcess pArray[10];
struct strProcess *Array;
Array = pArray;
// Code start
nKey = 5678;
FILE *f = fopen("logfile.txt", "w");
if (f == NULL)
{
printf("Error opening file!\n");
exit(1);
}
printf("please enter the amount of processes to create for this cycle between 1 and 10 \n");
scanf("%d",&nInput);
if (nInput <= 0 || nInput > 10)
{
nInput = finval(nInput);
}
printf("%d", nInput);
nSize = sizeof(pArray) * 10;
//create segment
if ((nShmid = shmget(nKey,nSize, IPC_CREAT | 0666)) <0)
{
perror("shmget");
exit(1);
}
printf("segment created \n\n");
fprintf(f, "shared memory segment created");
Array *pArray = shmat(shmid,NULL, 0);
if (Array* pArray (-1))
{
perror("shmat");
exit(1);
}
printf("segment attached \n\n");
fprintf(f, "shared memory segment attached");
for(i = 0 ; i < nInput; i++)
{
if ((pid = fork()) < 0)
{
perror("fork");
exit(1);
}
if (pid == 0)
{
Array[i].nPid = getpid();
nRpriority = frand(nInput);
Array[i].nPriority = nRpriority;
printf("print job created with Pid %d and priority number %d",
getpid(), nRpriority);
fprintf(f, "print job created with Pid %d and priority number %d",
getpid(), nRpriority);
}
}
fprintf(f, " %d processes have been created", nInput);
fsortasc(pArray, nInput); /*sort array into ascending order by nRpriority values*/
// Function Definitions - in alphabetical order
int finval (int nInput)
{
while(nInput <= 0 || nInput > 10)
{
printf("please enter a number between 1 and 10 \n");
scanf("%d", &nInput);
}
return nInput;
}
int frand (int nInput)
{
int nRand;
nRand = (rand() % nInput)+1; /*set nRand == a random number
inbetween nInput and 1*/
return nRand; /*return the random number*/
}
void fsortasc(struct strProcess pArray[],int nInput)
{
struct strProcess temp; /*temporary storage for elements being swapped*/
int i, j;
for (i = 0; i < nInput - 1; i++)
{
for (j = 0; j < (nInput - 1-i); j++)
{
if (pArray[j].nPriority > pArray[j + 1].nPriority) /*if the current element is greater than the next element*/
{
temp = pArray[j];
pArray[j] = pArray[j + 1];
pArray[j + 1] = temp;
}
}
}
I have an array of 10 structs and I would like to put that array into shared memory ? It's very simple, first create array 10 struct variable and then create the shared memory using shmget of required size and then attach that shared memory with pointer and finally copy array of 10 structs into pointer attached with shmat. I added below simple code to understand your requirement.
typedef struct company {
int emp_id;
}cmp;
int main(int argc,char *argv[]) {
cmp cmp_info[10];
int shm_id, sze = sizeof(cmp_info) ,i;
/* I have an array of 10 structs -- with some data like emp_id*/
for(i=0 ;i<10 ;i++) {
printf("\n enter emp % Id \n",i);
scanf("%d",&cmp_info[i].emp_id);
}
/* create the shared memory of 'sze' size. */
shm_id = shmget(10,sze, IPC_CREAT | 0664);
perror("shmget");
/* attach the shared memory with shm_id */
cmp *shm_ptr = shmat(shm_id, NULL, 0);
perror("shmat");
/* I have an array of 10 structs and I would like to put that array into shared memory */
shm_ptr = cmp_info;//now shared memory contains array of 10 struct data
/** print using shm_ptr to verify **/
for(i=0;i<10;i++) {
printf("Employee[%d] Id is : [%d]\n",i,shm_ptr[i].emp_id);
}
/* once above things are done clients program can read from shared memory */
/** finaly de-atach the shared memory */
shmdt(shm_ptr);
}
Below snapshot is for your code, Explanation is in comments.
struct strProcess {
int nPriority;
int nPid;
};
int main(int argc,char *argv[]) {
// Variable Declarations
int nShmid,i,arraySize,nRpriority,j, nInput;
key_t nKey;
char *ptrshm, *ptrs;
int nSize;
struct strProcess pArray[10];//array of 10 structure
struct strProcess *Array;
//Array = pArray;
nKey = 5678;
FILE *f = fopen("logfile.txt", "w");
if(f == NULL) {
printf("Error opening file!\n");
exit(1);
}
nSize = sizeof(pArray);
//create segment
if((nShmid = shmget(nKey,nSize, IPC_CREAT | 0666)) < 0) {
perror("shmget");
exit(1);
}
else {
perror("shmget");
fprintf(f, "\n shared memory segment created\n");
}
Array = shmat(nShmid, NULL, 0);
perror("shmat");
/** loop to create exaCtly 10 process */
nInput = 10; /** call finval function **/
for(i = 0 ; i < nInput; i++) {
if(fork() == 0) {
srand(getpid());
Array[i].nPid = getpid();
nRpriority = rand()%10 + 1;//putting random no b/w 1 to 10..u can call your function also
Array[i].nPriority = nRpriority;
fprintf(f, "\nprint job created with Pid [%d] and priority number [%d]\n",
Array[i].nPid, Array[i].nPriority);
break;//must to avoid repeating
}
else {
;//parent does nothing
}
}
shmdt(Array);
//fprintf(f,"\n total [%d] processes have been created\n",nInput);
/* call fsortasc(pArray, nInput); */
fclose(f);
}
I hope it helps.
Scenario: Say I have 8 files that I want to sort all the data of numbers it has in order from least to greatest. Only leaf proceses can sort all the numbers that a file contains. These leaf processes must send the sorted data to a parent process via pipes. This parent process will compare the data is receives and send which ever number is smaller to the next process up. It will do this until all number in the pipe is empty.
So think of it as a tree. We have one master process. With 8 files to sort, the Master process will spawn 2 processes off of it (a left and a right). Those two new processes will spawn their own processes. This will happen until there are 8 leaf processes at the bottom. Internal nodes can only hold onto one number. These will pass their number along a series of pipes until they reach the master process. The master process will output its piped contents to a file.
I've included the code here (as it is a bit lengthy but straightforward).
This works if I have 2 files to sort. So we have 1 master process and then two children. The two children sort their file's numbers and then pass them up. The master process then prints out the data in order from the pipes. However if I add some complexity (4 files), the leaf processes still send their data up, however when the master process begins to read the from the internal nodes pipes, it thinks it is empty and finishes the program without any data.
Any idea why the master process is thinking that its left and right pipes are empty?
Like I said, works great when there is one parent and 2 children. Anymore processes and it fails. (assuming that processing will happen in powers of 2).
NOTE: perror is being used for debugging purposes.
full program here [very messy as I have been doing a lot with it but it will compile.
The updated code in Pastebin is not a compilable function - let alone the complete program. That makes it hard to say what's really wrong.
However, one of the problems is in the code fragment:
if (pipe(upPipe) < 0 || pipe(leftPipe) < 0 || pipe(rightPipe) < 0)
...error exit...
if ((leftPID = fork()) < 0)
...error exit...
if(leftPID == 0){
fMax = ((fMax)/2);
dup2(leftPipe[WRITE], upPipe[WRITE]);
pipe(leftPipe);
pipe(rightPipe);
The call to dup2() is odd; you carefully map the write channel of the left pipe to the write channel of the up pipe.
The two pipe() calls after the dup2() fairly promptly screw up everything in the left child, opening 4 more file descriptors but losing the previous values stored in leftPipe and rightPipe.
You need to make your problem statement clearer. I cannot fathom from what you've got what you're supposed to have. There's a call to convertToInt() which takes no arguments and returns no value; what on earth is that doing? There's a call to freeMem(); it is not clear what that does.
z.c:42: error: ‘numberChar’ undeclared (first use in this function)
z.c:42: error: ‘sizeNumbers’ undeclared (first use in this function)
z.c:43: warning: implicit declaration of function ‘readFile’
z.c:43: error: ‘fileNames’ undeclared (first use in this function)
z.c:45: warning: implicit declaration of function ‘convertToInt’
z.c:46: error: ‘i’ undeclared (first use in this function)
z.c:46: error: ‘numbs’ undeclared (first use in this function)
z.c:47: error: ‘numbers’ undeclared (first use in this function)
z.c:48: warning: implicit declaration of function ‘freeMem’
Sorry, your question is unanswerable because you are not giving us:
The accurate requirements.
The code you've actually got compiling.
Your code does not have a good clean break up of the functions. Do you use a VCS (version control system - such as git)? If not, you should. I made the changed version below - which is essentially a complete rewrite - in 9 check-ins, and should probably have made more smaller check-ins than that. But using a VCS was crucial to me; it allowed me to make changes with confidence, knowing I would not lose anything valuable. And I didn't have to comment code out; I removed the stuff I didn't want. The solution below is 261 lines; the original was about 687 lines in total, including a lot of commented out code; when I'd finished stripping out the comments, etc, it came down to 469 lines.
When I got your code running (and reporting on which files were being opened by each child), I found that there were 2 processes opening each of files 2 and 3 (and since the data files didn't exist at the time, they failed at that point).
The revised code has an almost clean structure; the odd bit is the 'convertToString()' phase which reads binary integers off a pipe and converts them to ASCII output again. It works; I'm not convinced it is elegant. Instead of using an array of hard-coded file names, it takes an arbitrary list of file names from the command line (it does not have to be 8; it has been tested with 0 through 8, and I've no reason to think it won't handle 20 or more). I did a fair amount of testing with:
./piped-merge-sort [1-8]
There is copious diagnostic output. I've used two functions that I find vastly helpful in my work - I have them packaged up with some other related code in a more complex package, but the simple versions of err_error() and err_remark() functions really help me. Note that these versions report the PID of the reporting process for each call. They're also careful to pre-format the message into a string and then write the string in one print to standard error; otherwise, I was getting a lot of interleaved output which was confusing at best.
'Nuff said - here's the code:
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>
/* Error reporting */
static void err_vremark(char *fmt, va_list args)
{
char buffer[256];
int errnum = errno;
int buflen = snprintf(buffer, sizeof(buffer), "%d: ", (int)getpid());
buflen += vsnprintf(buffer + buflen, sizeof(buffer) - buflen, fmt, args);
if (errnum != 0)
buflen += snprintf(buffer + buflen, sizeof(buffer) - buflen,
": errno = %d (%s)", errnum, strerror(errnum));
fprintf(stderr, "%s\n", buffer);
}
static void err_error(char *fmt, ...)
{
va_list args;
va_start(args, fmt);
err_vremark(fmt, args);
va_end(args);
exit(1);
}
static void err_remark(char *fmt, ...)
{
va_list args;
va_start(args, fmt);
err_vremark(fmt, args);
va_end(args);
}
enum { READ = 0 };
enum { WRITE = 1 };
enum { BUFFER_SIZE = 256 };
static int *a_data = 0;
static int a_used = 0;
static int a_size = 0;
void readFile(char const *fileName);
void freeMem(void);
void sortArray(void);
int intcmp(void const *n1, void const *n2);
static void sortMergeFiles(int fd, int number, char **names);
static void sortOneFile(int fd, const char *file);
static void convertToString(int fd, FILE *fp);
int main(int argc, char **argv)
{
int m_pipe[2];
pid_t pid;
if (pipe(m_pipe) < 0)
err_error("Failed to create master pipe");
if ((pid = fork()) < 0)
err_error("Failed to fork master");
else if (pid == 0)
{
close(m_pipe[READ]);
sortMergeFiles(m_pipe[WRITE], argc - 1, &argv[1]);
close(m_pipe[WRITE]);
}
else
{
close(m_pipe[WRITE]);
convertToString(m_pipe[READ], stdout);
close(m_pipe[READ]);
}
return 0;
}
static void convertToString(int fd, FILE *fp)
{
int value;
while (read(fd, &value, sizeof(int)) == sizeof(int))
fprintf(fp, "%d\n", value);
}
static int readInteger(int fd, int *value)
{
if (read(fd, value, sizeof(int)) != sizeof(int))
return EOF;
return 0;
}
static void writeInteger(int fd, int value)
{
if (write(fd, &value, sizeof(int)) != sizeof(int))
err_error("Failed to write integer to fd %d", fd);
}
static void mergeFiles(int fd_in1, int fd_in2, int fd_out)
{
int value_1;
int value_2;
int eof_1 = readInteger(fd_in1, &value_1);
int eof_2 = readInteger(fd_in2, &value_2);
while (eof_1 != EOF && eof_2 != EOF)
{
err_remark("v1: %d; v2: %d", value_1, value_2);
if (value_1 <= value_2)
{
writeInteger(fd_out, value_1);
eof_1 = readInteger(fd_in1, &value_1);
}
else
{
writeInteger(fd_out, value_2);
eof_2 = readInteger(fd_in2, &value_2);
}
}
while (eof_1 != EOF)
{
err_remark("v1: %d", value_1);
writeInteger(fd_out, value_1);
eof_1 = readInteger(fd_in1, &value_1);
}
while (eof_2 != EOF)
{
err_remark("v2: %d", value_2);
writeInteger(fd_out, value_2);
eof_2 = readInteger(fd_in2, &value_2);
}
}
static void sortMergeFiles(int fd, int number, char **names)
{
assert(number >= 0);
if (number == 0)
return;
else if (number == 1)
sortOneFile(fd, names[0]);
else
{
err_remark("Non-Leaf: processing %d files (%s .. %s)", number, names[0], names[number-1]);
int mid = number / 2;
int l_pipe[2];
int r_pipe[2];
pid_t l_pid;
pid_t r_pid;
if (pipe(l_pipe) < 0 || pipe(r_pipe) < 0)
err_error("Failed to create pipes");
if ((l_pid = fork()) < 0)
err_error("Failed to fork left child");
else if (l_pid == 0)
{
close(l_pipe[READ]);
close(r_pipe[READ]);
close(r_pipe[WRITE]);
sortMergeFiles(l_pipe[WRITE], mid, names);
close(l_pipe[WRITE]);
exit(0);
}
else if ((r_pid = fork()) < 0)
err_error("Failed to fork right child");
else if (r_pid == 0)
{
close(r_pipe[READ]);
close(l_pipe[READ]);
close(l_pipe[WRITE]);
sortMergeFiles(r_pipe[WRITE], number - mid, names + mid);
close(r_pipe[WRITE]);
exit(0);
}
else
{
close(l_pipe[WRITE]);
close(r_pipe[WRITE]);
mergeFiles(l_pipe[READ], r_pipe[READ], fd);
close(l_pipe[READ]);
close(r_pipe[READ]);
err_remark("Non-Leaf: finished %d files (%s .. %s)", number, names[0], names[number-1]);
}
}
}
static void addNumberToArray(int number)
{
assert(a_used >= 0 && a_used <= a_size);
if (a_used == a_size)
{
int n_size = (a_size + 1) * 2;
int *n_data = realloc(a_data, sizeof(*n_data) * n_size);
if (n_data == 0)
err_error("Failed to allocate space for %d numbers", n_size);
a_data = n_data;
a_size = n_size;
}
a_data[a_used++] = number;
}
/* Could be compressed to write(fd, a_data, a_used * sizeof(int)); */
/* Arguably should check for write errors - but not SIGPIPE */
static void writeArray(int fd)
{
for (int i = 0; i < a_used; i++)
{
err_remark("Write: %d", a_data[i]);
write(fd, &a_data[i], sizeof(int));
}
}
void readFile(char const *fileName)
{
char buffer[BUFFER_SIZE];
FILE *fp;
fp = fopen(fileName, "r");
if (fp == NULL)
err_error("Failed to open file %s for reading", fileName);
while (fgets(buffer, sizeof(buffer), fp) != NULL)
{
char *nl = strchr(buffer, '\n');
if (nl != 0)
*nl = '\0';
err_remark("Line: %s", buffer);
addNumberToArray(atoi(buffer));
}
fclose(fp);
}
int intcmp(const void *n1, const void *n2)
{
const int num1 = *(const int *) n1;
const int num2 = *(const int *) n2;
return (num1 < num2) ? -1 : (num1 > num2);
}
void sortArray(void)
{
qsort(a_data, a_used, sizeof(int), intcmp);
}
void freeMem(void)
{
free(a_data);
}
static void sortOneFile(int fd, const char *file)
{
err_remark("Leaf: processing file %s", file);
readFile(file);
sortArray();
writeArray(fd);
freeMem();
err_remark("Leaf: finished file %s", file);
}