Why is my program not outputting the right count of words? - c

#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
struct thread_data {
FILE *fp;
long int offset;
int start;
int blockSize;
//struct word maybe?
};
int words = 0;
void *countFrequency(void* data) {
struct thread_data* td = data;
char *buffer = malloc(td->blockSize);
int i, c;
i = 0; c = 0;
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
fseek(td->fp, td->offset, td->start);
char last = ' ';
while ((fread(buffer, td->blockSize, 1, td->fp)) == 1) {
if (buffer[0]== ' ' || buffer[0] == '\t') {
state = WHITESPACE;
} else if (buffer[0] == '\n') {
//newLine++;
state = WHITESPACE;
} else {
if (state == WHITESPACE) {
words++;
}
state = WORD;
}
last = buffer[0];
}
free(buffer);
pthread_exit(NULL);
return NULL;
}
int main(int argc, char **argv) {
int nthreads, x, id, blockSize, len;
//void *state;
FILE *fp;
pthread_t *threads;
fp = fopen("file1.txt", "r");
printf("Enter the number of threads: ");
scanf("%d", &nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads * sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n", len);
blockSize = (len + nthreads - 1) / nthreads;
printf("size= %d\n", blockSize);
for (id = 0; id < nthreads; id++) {
data[id].fp = fp;
data[id].offset = blockSize;
data[id].start = id * blockSize + 1;
//maybe data[id]. word struct
}
//LAST THREAD
data[nthreads-1].start=(nthreads-1)*blockSize+1;
for (id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL, &countFrequency,&data[id]);
for (id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("%d\n",words);
return 0;
}
I had a segmentation fault that I fixed in this program but now when I run it, I get 0 words, which is incorrect because there are about a million words in the text file.
Can anyone tell me why it is giving me an incorrect word count?

One problem you have is you are using the same file descriptor in each of the countFrequency threads, each thread performs an fseek once, and then attempts to loop reading. The last fseek wins.
This design flaw must be addressed first.

Related

Creating a multi thread pthread program that counts the number of words in a text fi using C on linux

I'm pretty new to linux and C. For my Operating System's class we're supposed to write a code that that partitions the text file into 8 segements. The program can't be partition manually. I used gcc assign4.c -Wall -Werror -pthread in the command line and it processed the code no mistakes. Then I entered ./a.out and what printed out was " Assign4.txt". Can someone please guide to what may be wrong?
Below is the code:
#include<stdio.h>
#include<pthread.h>
#include<stdlib.h>
struct thread_data
{
FILE *fp;
long int offset;
int start;
int blocksize;
};
int words = 0;
void *countFrequency(void* data)
{
struct thread_data* td=data;
char *buffer = malloc(td->blocksize);
enum states {WHITESPACE, WORD};
int state = WHITESPACE;
fseek(td->fp,td->offset,td->start);
while ((fread(buffer,td->blocksize,1,td->fp))==1)
{
if (buffer[0]==' '||buffer[0]=='\t')
{
state = WHITESPACE;
}
else if (buffer[0]=='\n')
{
state = WHITESPACE;
}
else
{
if (state == WHITESPACE)
{
state = WORD;
words++;
}
}
free(buffer);
pthread_exit(NULL);
}
return NULL;
}
int main(int argc,char **argv)
{
int nthreads, id, blockSize,len;
FILE *fp;
pthread_t *threads;
if (argc < 2)
{
fprintf(stderr, "Assign4.txt");
exit(-1);
}
if ((fp=fopen(argv[1],"r"))== NULL)
{
printf("Error opening file");
exit(-1);
}
printf("Enter filename: ");
scanf("%d",&nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads*sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n",len);
blockSize = (len+nthreads-1)/nthreads;
printf("size= %d\n",blockSize);
for(id = 0; id < nthreads; id++)
{
data[id].fp=fp;
data[id].offset = blockSize;
data[id].start = id*blockSize+1;
}
data[nthreads-1].start = (nthreads-1)*blockSize+1;
for(id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL,
&countFrequency,&data[id]);
for(id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("Total: %d\n", words+1);
return 0;
}

How to loop a nested array in C

I've been developing a guessing game in which the goal is to guess the character selected by the user among specific characters, anyway, my first and only idea is to create an array with the questions to be asked, and each question has its options like in the code below I'm a newbie in C language so that I there are several things which I'm not sure how to handle. In short, I'd like to know how can I loop over the array showing to the user the questions with its questions to be answered? Here's the code.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#define ROW 500
#define LINE 200
//Read file and append to an array buffer
char *characters(){
char *source = NULL;
FILE *fp = fopen("file.txt", "r");
if (fp != NULL) {
/* Go to the end of the file. */
if (fseek(fp, 0L, SEEK_END) == 0) {
/* Get the size of the file. */
long bufsize = ftell(fp);
if (bufsize == -1) { /* Error */ }
/* Allocate our buffer to that size. */
source = malloc(sizeof(char) * (bufsize + 1));
/* Go back to the start of the file. */
if (fseek(fp, 0L, SEEK_SET) != 0) { /* Error */ }
/* Read the entire file into memory. */
size_t newLen = fread(source, sizeof(char), bufsize, fp);
if ( ferror( fp ) != 0 ) {
fputs("Error reading file", stderr);
} else {
source[newLen++] = '\0'; /* Just to be safe. */
}
}
fclose(fp);
}
return source;
}
char *strndup(const char *s, size_t n) {
char *p;
size_t n1;
for (n1 = 0; n1 < n && s[n1] != '\0'; n1++)
continue;
p = malloc(n + 1);
if (p != NULL) {
memcpy(p, s, n1);
p[n1] = '\0';
}
return p;
}
// User input
char *input(){
char *value;
char buffer[10];
int j = 0;
while( j < 1 && fgets(buffer, 10, stdin) != NULL){
value = strndup(buffer, 10);
j++;
}
return value;
}
// Main function
int main (void)
{
char *questions[] = {
"Genre",{"male","female"},
"Hair", {"black","red","blond"},
"Cloths",{"dress","shirt","pants"},
"pet", {"dog","cat","pig"}
};
int asked[4] = {0};
char *answers[5];
char buffer[6];
srand(time(NULL));
for (int i = 0; i < 4; i++) {
int q = rand() % 4;
while (asked[q])
q = rand() % 4;
asked[q]++;
printf ("%s\n", questions[q]);
answers[i] = input();
}
for(int i = 0; i < 4; i++)
{
printf(" %s ",answers[i]);
}
return 0;
}
That's the file's structure I'll compare as long as I have all the answers from the user.
female,blond,vestido,pig,character b
male,black,shirt,pants,dog,character c
male,black,shirt,pants,cat,character d
female,blond,dress,cat,character A
male,red,shirt,pants,pig,character e

How to empty Char Array and reuse the same in C?

I am trying to read a file. I want to read each line from the file and check if there are any spelling error in that line.
For that I have added condition that data from file will store in buffer until it gets a new line characher '\n'. And after getting this line I want to empty the buffer and re insert the values in that.
Code I am using for the same is as follows:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define W_COUNT 23800
#define MAX_LEN 100
char *dict[W_COUNT];
char buffer[MAX_LEN];
int num_words; //No of Words
char *statement[W_COUNT];
char buffer1[MAX_LEN];
void read_dictionary();
void file_read(char *);
void spell_check();
int word_search(char*);
int main(int argc, char*argv[]){
int i;
if(argc < 2){
printf("Expected Filename.\n");
exit(0);
}
read_dictionary();
file_read(argv[1]);
// spell_check();
}
void read_dictionary(){
FILE *fd;
int i = 0;
fd = fopen("dictionary", "r");
while ( fscanf(fd,"%s",buffer) != EOF)
dict[i++] = strdup(buffer);
num_words = i;
fclose(fd);
}
void file_read(char *filename){
FILE *fd;
int i = 0;
char c;
fd = fopen(filename,"r");
/*while ( fscanf(fd,"%s",buffer1) != EOF)
{
word[i++] = strdup(buffer1);
printf("File : %s\n", buffer1);
}*/
while ( ( c = fgetc(fd)) != EOF )
{
buffer1[i++] = tolower(c);
if ( c == '\n')
{
//printf("New Line\n");
spell_check();
buffer1[i] = 0;
}
//buffer1[i] = 0;
}
printf("Statement : %s\n", buffer1);
fclose(fd);
}
void spell_check(){
char *str;
str = strtok(buffer1," .?,!-");
while( str != NULL){
if(!word_search(str))
printf("%s Not found.\n",str);
str = strtok(0," .?,!-");
}
}
int word_search(char *word){
int high, low, mid;
high = num_words - 1;
low = 0;
int found = 0;
while (found == 0){
mid = (low + high) / 2;
if(strcmp(word, dict[mid]) == 0)
return 1;
else if(strcmp(word,dict[mid]) < 0)
high = mid - 1;
else
low = mid + 1;
if ( low > high)
return 0;
}
}
Any suggestions will be appreciated.
Thank you in advance.
while ( ( c = fgetc(fd)) != EOF )
{
buffer1[i++] = tolower(c);
if ( c == '\n')
{
//printf("New Line\n");
spell_check();
i = 0;
buffer1[i] = 0;
}
//buffer1[i] = 0;
}
For each line reading you have to assign the 0 to the i. After that you have to assign the null to the 0th position in the buffer.
You can try the above code for loop it will work.

C Multithreaded Word count

So I'm working on a multithreaded word count program in c and I was having some problems with my code, while searching here I found an old question that was similar to my own project. Rather than trying to rework my code which was full of problems, I decided to try and get this other one working, then modify it to make what I want.
The code takes a txt file as input. The problem is when you run the program there is a segmentation fault.
Here's the code:
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
struct thread_data{
FILE *fp;
long int offset;
int start;
int blockSize;
};
int words=0;
void *countFrequency(void* data){
struct thread_data* td=data;
char *buffer = malloc(td->blockSize);
int i,c;
i=0;c=0;
enum states { WHITESPACE, WORD };
int state = WHITESPACE;
fseek(td->fp, td->offset, td->start);
char last = ' ';
while ((fread(buffer, td->blockSize, 1, td->fp))==1){
if ( buffer[0]== ' ' || buffer[0] == '\t' ){
state = WHITESPACE;
}
else if (buffer[0]=='\n'){
state = WHITESPACE;
}
else {
if ( state == WHITESPACE ){
words++;
}
state = WORD;
}
last = buffer[0];
}
free(buffer);
pthread_exit(NULL);
return NULL;
}
int main(int argc, char **argv){
int nthreads, id, blockSize,len;
FILE *fp;
pthread_t *threads;
if (argc < 2){
fprintf(stderr, "Usage: ./a.out <file_path>");
exit(-1);
}
if((fp=fopen(argv[1],"r"))==NULL){
printf("Error opening file");
exit(-1);
}
printf("Enter the number of threads: ");
scanf("%d",&nthreads);
struct thread_data data[nthreads];
threads = malloc(nthreads*sizeof(pthread_t));
fseek(fp, 0, SEEK_END);
len = ftell(fp);
printf("len= %d\n",len);
blockSize=(len+nthreads-1)/nthreads;
printf("size= %d\n",blockSize);
for(id = 0; id < nthreads; id++){
data[id].fp=fp;
data[id].offset = blockSize;
data[id].start = id*blockSize+1;
}
data[nthreads-1].start=(nthreads-1)*blockSize+1;
for(id = 0; id < nthreads; id++)
pthread_create(&threads[id], NULL, &countFrequency,&data[id]);
for(id = 0; id < nthreads; id++)
pthread_join(threads[id],NULL);
fclose(fp);
printf("%d\n",words);
return 0;
}
And here's a link to the original post: original
You invoked undefined behavior by using indeterminate value of uninitalized variable having automatic storage duration nthreads in struct thread_data data[nthreads];.
Try moving the line after scanf("%d",&nthreads);.

How to assign words from a file to a 2D array of strings in C

I'm trying to read words from a file (which has one word per line) and store these words in a 2D String array such that each row contains all the words that begins with same letter/character. But when I try to print the array all the elements are shown to be "null".
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#define numberOfMappers 2
char *buffer1[10];
char *buffer2[10];
int add = 0;
int rem = 0;
int num = 0;
pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t c_mapper = PTHREAD_COND_INITIALIZER;
pthread_cond_t c_pooler = PTHREAD_COND_INITIALIZER;
void *pooler (void *param);
void *mapper (void *param);
void printBuffer1();
int main(int argc, char *argv[]) {
pthread_t tid1;
int i;
if(pthread_create(&tid1, NULL, pooler, NULL) != 0) {
fprintf(stderr, "Unable to create producer thread\n");
exit(1);
}
pthread_join(tid1, NULL);
printBuffer1();
return 0;
}
void *pooler(void *param) {
FILE * fp;
char * line = NULL;
size_t len = 0;
ssize_t read;
fp = fopen("file.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
int i;
for (i = 0; i < 10; i ++) {
buffer1[i] = line;
}
fflush(stdout);
return 0;
}
void printBuffer1() {
int i;
for (i = 0; i < 10; i ++) {
printf("%s\t", buffer1[i]);
}
}
You forgot to call getline().
getline() reuses or reallocates the line buffer if you pass the previous one, thus the old line gets lost. You can prevent this by clearing the line pointer before each call.
So, insert
line = NULL, len = 0;
if (getline(&line, &len, fp) < 0) break;
before
buffer1[i] = line;

Resources