C Multithreaded Word count - c

So I'm working on a multithreaded word count program in c and I was having some problems with my code, while searching here I found an old question that was similar to my own project. Rather than trying to rework my code which was full of problems, I decided to try and get this other one working, then modify it to make what I want.
The code takes a txt file as input. The problem is when you run the program there is a segmentation fault.
Here's the code:
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
struct thread_data{
FILE *fp;
long int offset;
int start;
int blockSize;
};
int words=0;
void *countFrequency(void* data){
struct thread_data* td=data;
char *buffer = malloc(td->blockSize);
int i,c;
i=0;c=0;
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
fseek(td->fp, td->offset, td->start);
char last = ' ';
while ((fread(buffer, td->blockSize, 1, td->fp))==1){
if ( buffer[0]== ' ' || buffer[0] == '\t' ){
state = WHITESPACE;
}
else if (buffer[0]=='\n'){
state = WHITESPACE;
}
else {
if ( state == WHITESPACE ){
words++;
}
state = WORD;
}
last = buffer[0];
}
free(buffer);
pthread_exit(NULL);
return NULL;
}
int main(int argc, char **argv){
int nthreads, id, blockSize,len;
FILE *fp;
pthread_t *threads;
if (argc < 2){
fprintf(stderr, "Usage: ./a.out <file_path>");
exit(-1);
}
if((fp=fopen(argv[1],"r"))==NULL){
printf("Error opening file");
exit(-1);
}
printf("Enter the number of threads: ");
scanf("%d",&nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads*sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n",len);
blockSize=(len+nthreads-1)/nthreads;
printf("size= %d\n",blockSize);
for(id = 0; id < nthreads; id++){
data[id].fp=fp;
data[id].offset = blockSize;
data[id].start = id*blockSize+1;
}
data[nthreads-1].start=(nthreads-1)*blockSize+1;
for(id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL, &countFrequency,&data[id]);
for(id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("%d\n",words);
return 0;
}
And here's a link to the original post: original

You invoked undefined behavior by using indeterminate value of uninitalized variable having automatic storage duration nthreads in struct thread_data data[nthreads];.
Try moving the line after scanf("%d",&nthreads);.

Related

How to fix a Segmentation Fault in pthread_create call

I am having an issue with my current code. I am working on a project where I am using threads to read a group of files from the terminal and tell how many lines there are in the individual and total grouping of files. My question is that when I run the code I get a core dump and when I run my code through gdb I get a segmentation fault at the pthread_create call. Is it because of my implementation or is it due to something else in my code?
#define NUM_THREADS 12
struct thread_data{
char *thread_id;
int count;
};
struct thread_data thread_data_array[NUM_THREADS];
void* filecount(void * thread_arg){
char thread_id;
int count;
struct thread_data *thread;
thread = (struct thread_data *) thread_arg;
thread_id = *thread->thread_id;
count = thread->count;
FILE *fp = fopen(&thread_id, "r");
if (fp == NULL) {
fprintf(stderr, "Cannot open %s\n", thread_id);
exit(-1);
}
for (char c = getc(fp); c != EOF; c = getc(fp))
if (c == '\n')
count++;
fclose(fp);
pthread_exit(NULL);
}
int main(int argc, char *argv[]){
if (argc == 1)
return 0;
pthread_t threads[argc];
int t, total_count, count;
total_count = 0;
for(t=1; t<argc; t++){
thread_data_array[t].thread_id = argv[t];
thread_data_array[t].count = count;
printf("Creating thread for file: %s",thread_data_array[t].thread_id);
///This is the line in question///
pthread_create(&threads[t], NULL,filecount,(void *) &thread_data_array[t]);
printf("File name: %s --- line count: %d", thread_data_array[t].thread_id, total_count);
total_count += thread_data_array[t].count;
}
printf("Total line count: %d", total_count);
pthread_exit(NULL);
}
To summarize some of the comments:
This
char thread_id;
thread_id = *thread->thread_id;
will give you the first character of the filename. So while &thread_id is the correct type (char *) for the first argument of fopen, its not a pointer to a null terminating string. This is undefined behaviour.
In
thread_data_array[t].count = count;
count is uninitialized, and its value is indeterminate. This is undefined behaviour.
You need to wait for each thread to finish before you use its result. pthread_join is the function to use here.
getc (fgetc) returns type int, which allows for the check against EOF. Narrowing to char removes the ability to properly test for EOF.
thread_data_array should match the threads array in size.
Here is a refactored program:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
struct thread_data {
char *thread_id;
int count;
};
void *filecount(void *thread_arg){
struct thread_data *arg = thread_arg;
FILE *fp = fopen(arg->thread_id, "r");
if (fp == NULL) {
fprintf(stderr, "Cannot open %s\n", arg->thread_id);
pthread_exit(NULL);
}
for (int c = getc(fp); c != EOF; c = getc(fp))
if (c == '\n')
arg->count++;
fclose(fp);
return NULL;
}
int main(int argc, char *argv[]){
if (argc == 1)
return 0;
argv++;
argc--;
pthread_t threads[argc];
struct thread_data thread_data_array[argc];
int total_count = 0;
for (int i = 0; i < argc; i++) {
thread_data_array[i].thread_id = argv[i];
thread_data_array[i].count = 0;
pthread_create(&threads[i], NULL, filecount,(void *) &thread_data_array[i]);
}
for (int i = 0; i < argc; i++) {
pthread_join(threads[i], NULL);
total_count += thread_data_array[i].count;
}
printf("Total line count: %d\n", total_count);
}

Creating a multi thread pthread program that counts the number of words in a text fi using C on linux

I'm pretty new to linux and C. For my Operating System's class we're supposed to write a code that that partitions the text file into 8 segements. The program can't be partition manually. I used gcc assign4.c -Wall -Werror -pthread in the command line and it processed the code no mistakes. Then I entered ./a.out and what printed out was " Assign4.txt". Can someone please guide to what may be wrong?
Below is the code:
#include<stdio.h>
#include<pthread.h>
#include<stdlib.h>
struct thread_data
{
FILE *fp;
long int offset;
int start;
int blocksize;
};
int words = 0;
void *countFrequency(void* data)
{
struct thread_data* td=data;
char *buffer = malloc(td->blocksize);
enum states {WHITESPACE, WORD};
int state = WHITESPACE;
fseek(td->fp,td->offset,td->start);
while ((fread(buffer,td->blocksize,1,td->fp))==1)
{
if (buffer[0]==' '||buffer[0]=='\t')
{
state = WHITESPACE;
}
else if (buffer[0]=='\n')
{
state = WHITESPACE;
}
else
{
if (state == WHITESPACE)
{
state = WORD;
words++;
}
}
free(buffer);
pthread_exit(NULL);
}
return NULL;
}
int main(int argc,char **argv)
{
int nthreads, id, blockSize,len;
FILE *fp;
pthread_t *threads;
if (argc < 2)
{
fprintf(stderr, "Assign4.txt");
exit(-1);
}
if ((fp=fopen(argv[1],"r"))== NULL)
{
printf("Error opening file");
exit(-1);
}
printf("Enter filename: ");
scanf("%d",&nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads*sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n",len);
blockSize = (len+nthreads-1)/nthreads;
printf("size= %d\n",blockSize);
for(id = 0; id < nthreads; id++)
{
data[id].fp=fp;
data[id].offset = blockSize;
data[id].start = id*blockSize+1;
}
data[nthreads-1].start = (nthreads-1)*blockSize+1;
for(id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL,
&countFrequency,&data[id]);
for(id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("Total: %d\n", words+1);
return 0;
}

How to shuffle 2 different text file into 1?

#include <stdio.h>
int main(){
char temp[64];
FILE *fp1=fopen("data/1.txt","a");
FILE *fp2=fopen("data/2.txt","r");
while(fgets(temp,64,fp2)!=NULL){
fputs(temp,fp1);
}
fclose(fp1);
fclose(fp2);
return 0;
}
With such code I was able to combine 2 different text file into 1.
data/1.txt contents: abcdefghijk
data/2.txt contents: ABCDE
Outcome: abcdefghijkABCDE
However, I am struggling with shuffling 2 different text file.
Wanted result: aAbBcCdDeEfghijk
Followings are my current code.
#include <stdio.h>
#include <string.h>
int main(){
FILE *fp1,*fp2,*fp_out;
char ch1,ch2;
int result=1;
fp1=fopen("data/1.txt","r");
fp2=fopen("data/2.txt","r");
fp_out=fopen("data/out.txt","w");
//shuffling code area//
fclose(fp1);
fclose(fp2);
fclose(fp_out);
char buf[64]={};
fp_out=fopen("data/out.txt","r");
fgets(buf,64,fp_out);
if(!strncmp("aAbBcCdDeEfghijk",buf,64))
printf("PASS\n");
else
printf("FAIL\n");
fclose(fp_out);
return 0;
}
How can I design a code in "shuffling code area" in order to have outcomes like wanted result? I have thought about making 2 different FOR loops and combining but it kept showed an error.
This is some dirty way to do the job.
You can read the file which ever you want to write first character first and then read a character from second file and write both into third file one after the other.
Just adding extra code as per your need.
This just works for your case , not tested with many cases and corner cases.
#include <stdio.h>
#include <string.h>
int main(){
FILE *fp1,*fp2,*fp_out;
char ch1,ch2;
int result=1;
int file1_content_over = 0;
int file2_content_over = 0;
fp1 = fopen("data/1.txt","r");
fp2 = fopen("data/2.txt","r");
fp_out=fopen("data/out.txt","w");
//shuffling code area//
// read till file1_content_over or file2_content_over is not finished
while(! file1_content_over || !file2_content_over)
{
ch1 = fgetc(fp1);
ch2 = fgetc(fp2);
if(ch1 != EOF)
fputc(ch1,fp_out);
else
file1_content_over = 1;
if(ch2 != EOF)
fputc(ch2,fp_out);
else
file2_content_over = 1;
}
//shuffling code area//
fclose(fp1);
fclose(fp2);
fclose(fp_out);
char buf[64]={};
fp_out=fopen("data/out.txt","r");
fgets(buf,64,fp_out);
printf("buf = %s\n", buf);
if(!strncmp("aAbBcCdDeEfghijk",buf,strlen("aAbBcCdDeEfghijk")))
printf("PASS\n");
else
printf("FAIL\n");
fclose(fp_out);
return 0;
}
Working for me! Not the best optimized code, I didnt get to much time to that!
Main():
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX 100
int removingSPaces(char array[MAX], int sizeArray);
void orderChar(char bufFile1[MAX], char bufFile2[MAX], char bufOut[MAX], int maxSize, int sizeBuf1, int sizeBuf2);
int getChar(char buf[MAX], FILE *fp);
int main(){
FILE *fp1, *fp2, *fpOut;
char bufFile1[MAX] = {0}, bufFile2[MAX] = {0}, bufOut[MAX] = {0};
int sizeBuf1 = 0, sizeBuf2 = 0;
int maxSize=0;
if((fp1=fopen("file1.txt","r")) == NULL || (fp2=fopen("file2.txt","r")) == NULL || (fpOut=fopen("fileOut.txt","w")) == NULL){
perror("");
exit(1);
}
sizeBuf1 = getChar(bufFile1, fp1); //geting the chars from file1
fclose(fp1);
sizeBuf1 = removingSPaces(bufFile1, sizeBuf1); //removing the \n if exists from chars of file1
sizeBuf2 = getChar(bufFile2, fp2); //geting the chars from file2
fclose(fp2);
sizeBuf2 = removingSPaces(bufFile2, sizeBuf2); //removing the \n if exists from chars of file2
maxSize = sizeBuf1 + sizeBuf2; //Max Size to loop for
orderChar(bufFile1, bufFile2, bufOut, maxSize, sizeBuf1, sizeBuf2); //Order the chars!
fprintf(fpOut, "%s", bufOut); //Printing to the file
fclose(fpOut);
/* COPIED FROM YOUR CODE */
char buf[64]={0}; //Just added the 0, because you cant initialize the array like with only {}
if((fpOut=fopen("fileOut.txt", "r")) == NULL){
perror("");
exit(1);
}
fgets(buf,64, fpOut);
if(!strncmp("aAbBcCdDeEfghijk", buf, 64))
printf("PASS\n");
else
printf("FAIL\n");
fclose(fpOut);
/* COPIED FROM YOUR CODE */
return 0;
}
Functions():
int removingSPaces(char array[MAX], int sizeArray){
int size = sizeArray;
if(array[sizeArray -1] == '\n'){
array[sizeArray -1] = '\0';
size = strlen(array);
}
return size;
}
int getChar(char buf[MAX], FILE *fp){
char bufAux[MAX];
int size;
while(fgets(bufAux, sizeof(bufAux), fp)){
size = strlen(bufAux);
}
strcpy(buf, bufAux);
return size;
}
void orderChar(char bufFile1[MAX], char bufFile2[MAX], char bufOut[MAX], int maxSize, int sizeBuf1, int sizeBuf2){
int positionsF1=0, positionsF2=0;
int aux = 0; //This will starts organization by the first file! If you want to change it just change to 1;
for(int i=0; i < maxSize; i++){
if(aux == 0 && positionsF1 != sizeBuf1){
bufOut[i]=bufFile1[positionsF1];
if(positionsF2!=sizeBuf2){
aux = 1;
}
positionsF1++;
}else if(aux == 1 && positionsF2 != sizeBuf2){
bufOut[i]=bufFile2[positionsF2];
if(positionsF1!=sizeBuf1){
aux = 0;
}
positionsF2++;
}
}
}
Content of file 1:
abcdefghijk
Content of file 2:
ABCDE

How to assign words from a file to a 2D array of strings in C

I'm trying to read words from a file (which has one word per line) and store these words in a 2D String array such that each row contains all the words that begins with same letter/character. But when I try to print the array all the elements are shown to be "null".
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#define numberOfMappers 2
char *buffer1[10];
char *buffer2[10];
int add = 0;
int rem = 0;
int num = 0;
pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t c_mapper = PTHREAD_COND_INITIALIZER;
pthread_cond_t c_pooler = PTHREAD_COND_INITIALIZER;
void *pooler (void *param);
void *mapper (void *param);
void printBuffer1();
int main(int argc, char *argv[]) {
pthread_t tid1;
int i;
if(pthread_create(&tid1, NULL, pooler, NULL) != 0) {
fprintf(stderr, "Unable to create producer thread\n");
exit(1);
}
pthread_join(tid1, NULL);
printBuffer1();
return 0;
}
void *pooler(void *param) {
FILE * fp;
char * line = NULL;
size_t len = 0;
ssize_t read;
fp = fopen("file.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
int i;
for (i = 0; i < 10; i ++) {
buffer1[i] = line;
}
fflush(stdout);
return 0;
}
void printBuffer1() {
int i;
for (i = 0; i < 10; i ++) {
printf("%s\t", buffer1[i]);
}
}
You forgot to call getline().
getline() reuses or reallocates the line buffer if you pass the previous one, thus the old line gets lost. You can prevent this by clearing the line pointer before each call.
So, insert
line = NULL, len = 0;
if (getline(&line, &len, fp) < 0) break;
before
buffer1[i] = line;

Why is my program not outputting the right count of words?

#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
struct thread_data {
FILE *fp;
long int offset;
int start;
int blockSize;
//struct word maybe?
};
int words = 0;
void *countFrequency(void* data) {
struct thread_data* td = data;
char *buffer = malloc(td->blockSize);
int i, c;
i = 0; c = 0;
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
fseek(td->fp, td->offset, td->start);
char last = ' ';
while ((fread(buffer, td->blockSize, 1, td->fp)) == 1) {
if (buffer[0]== ' ' || buffer[0] == '\t') {
state = WHITESPACE;
} else if (buffer[0] == '\n') {
//newLine++;
state = WHITESPACE;
} else {
if (state == WHITESPACE) {
words++;
}
state = WORD;
}
last = buffer[0];
}
free(buffer);
pthread_exit(NULL);
return NULL;
}
int main(int argc, char **argv) {
int nthreads, x, id, blockSize, len;
//void *state;
FILE *fp;
pthread_t *threads;
fp = fopen("file1.txt", "r");
printf("Enter the number of threads: ");
scanf("%d", &nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads * sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n", len);
blockSize = (len + nthreads - 1) / nthreads;
printf("size= %d\n", blockSize);
for (id = 0; id < nthreads; id++) {
data[id].fp = fp;
data[id].offset = blockSize;
data[id].start = id * blockSize + 1;
//maybe data[id]. word struct
}
//LAST THREAD
data[nthreads-1].start=(nthreads-1)*blockSize+1;
for (id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL, &countFrequency,&data[id]);
for (id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("%d\n",words);
return 0;
}
I had a segmentation fault that I fixed in this program but now when I run it, I get 0 words, which is incorrect because there are about a million words in the text file.
Can anyone tell me why it is giving me an incorrect word count?
One problem you have is you are using the same file descriptor in each of the countFrequency threads, each thread performs an fseek once, and then attempts to loop reading. The last fseek wins.
This design flaw must be addressed first.

Resources