I have a file which contains a list of filenames in which I wanna search for a word and replace it
I modified the code a little just to show only relevant parts here
The problem is that if I have only one file in that list, it won't process it with multi threads because the threads are working only if I have multiple files
So I want to keep the current threads configuration but I wanna add some threads at the processing part
I have this code:
struct words_list {
char word[20];
struct words_list * next;
};
FILE * INFILE;
int num_thread = 10;
// Mutex variables
pthread_mutex_t input_queue;
pthread_mutex_t word_list;
int main(int argc,char **argv)
{
//some code is missing
if((INFILE = fopen(myfile,"r")) == NULL) {
fprintf(stderr,"Can't open input file\n");
exit(0);
}
for(i = 0 ; i < number_thread; i++)
{
if(pthread_create(&thread_id[i],NULL,&search,NULL) != 0)
{
i--;
fprintf(stderr,RED "\nError in creating thread\n" NONE);
}
}
for(i = 0 ; i < number_thread; i++)
if(pthread_join(thread_id[i],NULL) != 0)
{
fprintf(stderr,RED "\nError in joining thread\n" NONE);
}
fflush(INFILE);
fclose(INFILE);
}
void * search(void * data)
{
char file[20];
while (!feof(INFILE))
{
if (fgets(file,sizeof(file),INFILE) != NULL)
{
if (strlen(file) < 8)
break;
if (file[strlen (file) - 1] == '\n')
file[strlen (file) - 1] = '\0';
}
process(file);
}
return NULL;
}
void process(char *filename)
{
char buff[512];
char word[20];
struct words_list * curr_word = first_word;
if(verbose != 0)
fprintf(stderr,"Processing: %s\n",filename);
while(curr_word != NULL)
{
//some code missing
pthread_mutex_lock(&word_list);
strncpy(word,curr_word->word,sizeof(word) - 1);
pthread_mutex_unlock(&word_list);
**//replace_word must run with multiple threads**
ret = replace_word(word,buff,sizeof(buff));
//end of threads part
//code missing
}
}
How can I add other pthreads at the bold part so it can process with multiple threads each file?
Instead of assigning one thread per file. Why shouldn't you allocate a thread to process portion of each file. As shown below, I allocated a thread for every 50 bytes of data in a file. In this technique, even if you have only one file in the list, you can still use multiple threads to parse it. Hope it helps.
#include "stdio.h"
#include "pthread.h"
#include "sys/stat.h"
#include "string.h"
#include "fcntl.h"
#define TOTAL_NUMBER_THREADS 100
struct words_list {
char word[20];
struct words_list * next;
};
struct file_segment {
char filename[50];
size_t foffset;
size_t size;
}fs[TOTAL_NUMBER_THREADS];
FILE * INFILE;
int num_thread=0;
// Mutex variables
pthread_mutex_t input_queue;
pthread_mutex_t word_list;
pthread_t thread_id[TOTAL_NUMBER_THREADS];
void *process( void *arg);
void segment_file(char *filename)
{
int fd;
int offset=0;
struct stat statbuf;
size_t size;
fd = open(filename, O_RDONLY);
if(fd < 0)
{
perror("fopen");
return;
}
fstat(fd, &statbuf);
size=statbuf.st_size;
while((offset < size) && (num_thread <= 100))
{
strncpy(fs[num_thread].filename, filename, sizeof(fs[num_thread].filename));
fs[num_thread].foffset=offset;
fs[num_thread].size=(size>50)?50:size;
offset+=fs[num_thread].size;
if(pthread_create(&thread_id[num_thread],NULL,&process,&fs[num_thread]) != 0)
{
fprintf(stderr,"\nError in creating thread\n");
}
num_thread++;
}
return;
}
void *process( void *arg)
{
char buf[50];
struct file_segment *fs;
char word[20];
//struct words_list * curr_word = first_word;
fs = (struct file_segment *) arg;
FILE *fp;
fp=fopen(fs->filename, "r");
fseek(fp, fs->foffset, SEEK_SET);
fread(buf,1,fs->size,fp);
while(curr_word != NULL)
{
//some code missing
pthread_mutex_lock(&word_list);
strncpy(word,curr_word->word,sizeof(word) - 1);
pthread_mutex_unlock(&word_list);
**//replace_word must run with multiple threads**
ret = replace_word(word,buff,sizeof(buff));
//end of threads part
//code missing
}
//printf("Filename: %s\n Info: %s\n", fs->filename, buf);
printf("%s", buf);
return;
}
int main(int argc,char **argv)
{
//some code is missing
char file[50];
int i;
if((INFILE = fopen("list.txt","r")) == NULL) {
fprintf(stderr,"Can't open input file\n");
return 0; }
while (!feof(INFILE))
{
if (fgets(file,sizeof(file),INFILE) != NULL)
{
if (strlen(file) < 8)
break;
if (file[strlen (file) - 1] == '\n')
file[strlen (file) - 1] = '\0';
}
segment_file(file);
}
for(i = 0 ; i < num_thread; i++)
if(pthread_join(thread_id[i],NULL) != 0)
{
fprintf(stderr,"\nError in joining thread\n");
}
fflush(INFILE);
fclose(INFILE);
}
Related
I am trying to read from file hw4.data and see if it has a name. The user inputs the name via a command line argument. Everything works fine but I can't get the file to be passed between the functions correctly. The assignment requires that I define the file in main and pass it between SCAN and LOAD.
#include <stdio.h>
#include <stdlib.h>
struct _data {
char name[20];
long number;
};
int SCAN(FILE *(*stream)) { // skim through the file and find how many entries there are
int size = 0;
char s_temp[100];
long l_temp;
while (1) {
fscanf(*stream, "%s %ld", s_temp, &l_temp);
if (feof(*stream)) break;
size++;
}
return size;
}
struct _data* LOAD(FILE *stream, int size) { // loop through the file and load the entries into the main data array
struct _data* d = malloc(size * sizeof(struct _data));
int i;
for (i = 0; i < size; i++) {
fscanf(stream, "%s %ld", d[i].name, &d[i].number);
}
return d;
}
void SEARCH(struct _data *BlackBox, char* name, int size) { // loop through the array and search for the right name
int i;
int found = 0;
for (i = 0; i < size; i++) {
printf("%s %s\n", BlackBox[i].name, name);
if (strcmp(BlackBox[i].name, name) == 0) {
printf("*******************************************\nThe name was found at the %d entry.\n*******************************************\n", i);
found = 1;
break;
}
}
if (found == 0) {
printf("*******************************************\nThe name was NOT found.\n*******************************************\n");
}
}
void FREE(struct _data* BlackBox, int size) { // free up the dynamic array
free(BlackBox);
}
int main(int argv, char* argc[]) {
if (argv == 2) {
printf("The argument supplied is %s\n", argc[1]);
FILE* file = fopen("./hw4.data", "r");
int size = SCAN(&file);
struct _data* data = LOAD(&file, size);
SEARCH(data, argc[1], size);
fclose(file);
return 0;
} else {
printf("*******************************************\n* You must include a name to search for.*\n*******************************************\n");
return 0;
}
}
Here's the format of hw4.data
ron 7774013
jon 7774014
tom 7774015
won 7774016
A few issues:
In SCAN, remove the feof. Replace with: if (fscanf(*stream, "%s %ld", s_temp, &l_temp) != 2) break;
Note that after calling SCAN, you should do: rewind(file);. Otherwise, LOAD will only see [immediate] EOF.
And, as others have mentioned, just pass file to SCAN/LOAD and not &file.
Add a check for null return from fopen (e.g.) if (file == NULL) { perror("fopen"); exit(1); }
Stylistically:
If you have a comment describing a function, put it on the line above the function.
Try to keep lines within 80 chars
Here is the refactored code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct _data {
char name[20];
long number;
};
// skim through the file and find how many entries there are
int
SCAN(FILE *stream)
{
int size = 0;
char s_temp[100];
long l_temp;
while (1) {
if (fscanf(stream, "%s %ld", s_temp, &l_temp) != 2)
break;
size++;
}
return size;
}
// loop through the file and load the entries into the main data array
struct _data *
LOAD(FILE *stream, int size)
{
struct _data *d = malloc(size * sizeof(struct _data));
int i;
for (i = 0; i < size; i++) {
fscanf(stream, "%s %ld", d[i].name, &d[i].number);
}
return d;
}
// loop through the array and search for the right name
void
SEARCH(struct _data *BlackBox, char *name, int size)
{
int i;
int found = 0;
for (i = 0; i < size; i++) {
printf("%s %s\n", BlackBox[i].name, name);
if (strcmp(BlackBox[i].name, name) == 0) {
printf("*******************************************\n");
printf("The name was found at the %d entry.\n", i);
printf("*******************************************\n");
found = 1;
break;
}
}
if (found == 0)
printf("*******************************************\n"
"The name was NOT found.\n"
"*******************************************\n");
}
// free up the dynamic array
void
FREE(struct _data *BlackBox, int size)
{
free(BlackBox);
}
int
main(int argv, char *argc[])
{
if (argv == 2) {
printf("The argument supplied is %s\n", argc[1]);
FILE *file = fopen("./hw4.data", "r");
if (file == NULL) {
perror("fopen");
exit(1);
}
int size = SCAN(file);
rewind(file);
struct _data *data = LOAD(file, size);
SEARCH(data, argc[1], size);
fclose(file);
}
else
printf("*******************************************\n"
"* You must include a name to search for.*\n"
"*******************************************\n");
return 0;
}
Using realloc, we can combine SCAN and LOAD into a single function:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct _data {
char name[20];
long number;
};
// loop through the file and load the entries into the main data array
struct _data *
LOAD(FILE *stream, int *sizep)
{
struct _data *all = NULL;
struct _data *d;
int size = 0;
int capacity = 0;
while (1) {
if (size >= capacity) {
capacity += 10;
all = realloc(all,sizeof(*all) * capacity);
if (all == NULL) {
perror("realloc");
exit(1);
}
}
d = &all[size++];
if (fscanf(stream, "%s %ld", d->name, &d->number) != 2)
break;
}
// trim to size actually used
all = realloc(all,sizeof(*all) * size);
*sizep = size;
return all;
}
// loop through the array and search for the right name
void
SEARCH(struct _data *BlackBox, char *name, int size)
{
int i;
int found = 0;
for (i = 0; i < size; i++) {
printf("%s %s\n", BlackBox[i].name, name);
if (strcmp(BlackBox[i].name, name) == 0) {
printf("*******************************************\n");
printf("The name was found at the %d entry.\n", i);
printf("*******************************************\n");
found = 1;
break;
}
}
if (found == 0)
printf("*******************************************\n"
"The name was NOT found.\n"
"*******************************************\n");
}
// free up the dynamic array
void
FREE(struct _data *BlackBox, int size)
{
free(BlackBox);
}
int
main(int argv, char *argc[])
{
if (argv == 2) {
printf("The argument supplied is %s\n", argc[1]);
FILE *file = fopen("./hw4.data", "r");
if (file == NULL) {
perror("fopen");
exit(1);
}
int size;
struct _data *data = LOAD(file, &size);
SEARCH(data, argc[1], size);
fclose(file);
}
else
printf("*******************************************\n"
"* You must include a name to search for.*\n"
"*******************************************\n");
return 0;
}
I had to use rewind() in order to reset the file so that LOAD() would read from the start of the file and give good data.
I wrote the next function that tries to read and enter each line from text file into a string array in c :
int main(int argc,char* argv[])
{
char ** lines;
readFile(argv[1],lines);
}
int readFile(char* filePath,char** lines)
{
char file_char;
int letter_in_line=0;
int line=1;
char* line_string=malloc(1024);
int j=1;
int fd=open(filePath,O_RDONLY);
if (fd < 0)
{
return 0;
}
while (read(fd,&file_char,1) >0)
{
if(file_char != '\n' && file_char != '0x0')
{
line_string[letter_in_line] = file_char;
letter_in_line++;
}
else
{
if(lines != NULL)
{
lines=(char**)realloc(lines,sizeof(char*)*line);
}
else
{
lines=(char**)malloc(sizeof(char*));
}
char* line_s_copy=strdup(line_string);
lines[line-1]=line_s_copy;
line++;
letter_in_line=0;
memset(line_string,0,strlen(line_string));
}
j++;
}
printf("cell 0 : %s",lines[0]);
return 1;
}
I have 2 questions :
1)Whenever the code reaches the print of cell 0, I'm getting
Segmentation fault (core dumped) error. What is wrong ?
2)In case I
want to see the changes in the lines array in my main, I should pass
&lines to the func and get char*** lines as an argument ? In
addition, I will need to replace every 'line' keyword with '*line' ?
*I know that I can use fopen,fget, etc... I decided to implement it in this way for a reason.
There is many issues that make your code core dump.
Here a version very similar to your code. I hope it will help you to understand this.
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
int read_file(const char *filename, char ***result)
{
/* open the file */
const int fd = open(filename, O_RDONLY);
if (fd < 0) {
*result = NULL;
return -1;
}
/* read the file characters by characters */
char *buffer = (char *)malloc(sizeof(char) * 1024);
char c;
int column = 0;
int line = 0;
*result = NULL;
/* for each characters in the file */
while (read(fd, &c, 1) > 0) {
/* check for end of line */
if (c != '\n' && c != 0 && column < 1024 - 1)
buffer[column++] = c;
else {
/* string are null terminated in C */
buffer[column] = 0;
column = 0;
/* alloc memory for this line in result */
*result = (char **)realloc(*result, sizeof(char *) *
(line + 1));
/* duplicate buffer and store it in result */
(*result)[line++] = strdup(buffer);
}
}
free(buffer);
return line;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "usage: %s [filename]", argv[0]);
return 1;
}
char **lines;
int line_count = read_file(argv[1], &lines);
if (line_count < 0) {
fprintf(stderr, "cannot open file %s\n", argv[1]);
return 1;
}
for(int i=0; i < line_count; i++)
printf("%s\n", lines[i]);
return 0;
}
Here an other version:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int read_file(const char *filename, char ***result)
{
/* init result */
*result = NULL;
/* open the file */
FILE *file = fopen(filename, "r");
if (file == NULL)
return -1;
/* read the file line by line */
char *buffer = (char *)malloc(sizeof(char) * 1024);
int line = 0;
while (fgets(buffer, 1024, file)) {
*result = (char **)realloc(*result, sizeof(char *) *
(line + 1));
(*result)[line++] = strdup(buffer);
}
free(buffer);
return line;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
fprintf(stderr, "usage: %s [filename]", argv[0]);
return 1;
}
char **lines;
int line_count = read_file(argv[1], &lines);
if (line_count < 0) {
fprintf(stderr, "cannot open file %s\n", argv[1]);
return 1;
}
for(int i=0; i < line_count; i++)
printf("%s\n", lines[i]);
return 0;
}
This is for a project for university (small replica of a Catan game) and I'm struggling a bit with this part, we have the read an INI file with fairly simple formatting, it only has some comments starting with ';' and then it's just tags with a value in front:
xdim=4
ydim=5
N=D
S=L2
E=S10
W=D
etc...
I have this function to read from an INI file and address the read values to the correct struct element. But it seems like it doesn't even read the file, the struct is a simple struct with xdim and ydim, after I call the func xdim is '&d&d&d&d etc...' and ydim is 0
I've tried placing in some printf's just to see if the values from the INI file itself where being read wrong, but nothing is printed.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX 128
typedef struct UNIT { /**struct used in an array for the rest of the INI values*/
char N[4];
char S[4];
char W[4];
char E[4];
char Building;
}UNIT;
typedef struct{ /**This is declared in main and passed to the functions*/
UNIT *grid;
unsigned int xdim;
unsigned int ydim;
} MAP_CONFIG;
void set_config_val(MAP_CONFIG *config, const char *key, int val) {
if (config == NULL)
return;
if (strcmp(key, "xdim") == 0){
printf("here");
config->xdim = val;
}
else if (strcmp(key, "ydim") == 0){
printf("here");
config->ydim = val;
}
else{
;
}
}
void read_config(MAP_CONFIG *config,FILE *f) {
char str[MAX];
char *token;
const char *delim = "=\n";
while (1) {
fgets(str, MAX, f);
if(feof(f)!= 0) break;
puts(str);
if (strchr(str, '=')!=NULL) {
char varname[MAX];
int value;
token = strtok(str, delim);
strcpy(varname, token);
token = strtok(NULL, delim);
value = atoi(token);
printf("&d", token);
set_config_val(config, varname, value);
}
}
config = malloc(sizeof(MAP_CONFIG));
config->grid = calloc(config->xdim * config->ydim, sizeof(UNIT));
close(f);
return;
}
open file function:
FILE *openFile(char *nome, char *mode) {
FILE *f;
printf("Opening file %s\n", nome);
f = fopen(nome, mode);
if (f == NULL) {
fprintf(stderr, "*** It was not possible to open the file %s.", nome);
exit(1);
}
return f;
}
test main im using:
int main(int argc, char **argv) {
MAP_CONFIG map;
MAP_CONFIG *mapa = ↦
FILE *f;
char *filename;
for (int i = 0; i < argc; i++)
printf("Parametro %d: %s\n", i, argv[i]);
if (argc >= 2) {
filename = argv[1];
}
else {
printf("Opening base map file..\n");
filename = "mapa.ini";
}
f = openFile(filename, "r");
read_config(mapa, f);
printf("%d %d", map.xdim, map.ydim);
return 0;
}
I just want it to read the xdim and ydim, and then repeat the process to an array of structs for each struct to get the correct value of the N,S,E,W present in the INI file... Help!
I'm trying to use linked list to store every line of one big file (from 1 GB to 70 GB), but that's the problem, I can't because it overflows my RAM and forces windows to stop the program execution.
The function I wrote are these:
struct Word {
char word[13];
};
typedef struct Nodo {
struct Word word;
struct Nodo *next;
} TNodo;
typedef TNodo *Nodo;
void NewWord(Nodo *p, struct Word s) {
Nodo temp;
temp = (Nodo)malloc(sizeof(TNodo));
temp->word = s;
temp->next = *p;
*p = temp;
}
void LoadList(Nodo *p) {
FILE *f;
struct Word s;
char *buffer = malloc(sizeof(struct Word));
if (!(f= fopen("wordlist.txt", "r"))) {
fclose(f);
exit(1);
}
while (fgets(buffer, sizeof(struct Word), f)) {
if (sscanf(buffer,"%s", s.word) == 1) {
NewWord(p, s);
}
}
fclose(f);
free(buffer);
}
Is there a better way to process data (like deleting lines of file) from very large text files without storing them?
the text file I'm trying to read has this simple structure:
Word
Worf
Worg
As far as I have read, I found the following 2 ways better than others:
1) Read a larger chunk into a large memory buffer, and then parse out the data from that buffer.
2)Another way may be to instead memory map the file, then the OS will put the file into your process virtual memory map, so you can read it like reading from memory.
I changed the function according to your answers, now the function NewWord just print the word into a second file, skipping unnecessary words according to the functions step1() and step2().
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 67
char letters[SIZE] = {'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
'.','_','1','2','3','4','5','6','7','8','9','0','!','#','$'};
struct Word
{
char word[13];
};
_Bool step1(char * word)
{
for(int i = 0; i < SIZE; i++)
{
for(int j = 0, c = 0; j < strlen(word); j++)
{
if(word[j] == letters[i])
{
c++;
if(c > 3)
{
return 1;
}
}
}
}
return 0;
}
_Bool step2(char * word)
{
for(int i = 0; i < SIZE; i++)
{
for(int j = 0; j < strlen(word); j++)
{
if(word[j] == letters[i] && word[j+1] == letters[i] && word[j+2] == letters[i])
{
return 1;
}
}
}
return 0;
}
void NewWord(FILE *f, struct Word s)
{
if(step1(s.word ) == 1 || step2(s.word) == 1)
return;
fprintf(f, "%s\n", s.word);
}
void LoadList()
{
FILE * f1;
FILE * f2;
struct Word s;
char * buffer = malloc(sizeof(struct Word));
if(!(f1= fopen("wordlist.txt", "r")))
{
fclose(f1);
exit(1);
}
if(!(f2 = fopen("bb.txt", "w")))
{
fclose(f2);
exit(1);
}
while(fgets(buffer, sizeof(struct Word), f1))
{
if(sscanf(buffer,"%s", s.word) == 1)
{
NewWord(f2, s);
}
}
fclose(f1);
fclose(f2);
free(buffer);
}
int main()
{
LoadList();
exit(0);
}
Hi guys I am in the second weekend of trying to find the solution to this problem. I am new at c programming and I have been trying to read each individual line of a text file and pass each of them to their own variable, where I will be able to manipulate them(such as compare them, do calculations etc).
I have a code to read each individual lines but I am unsure how to pass each line to a variable, here is the code:
#include <stdlib.h>
#include <stdio.h>
struct line_reader {
FILE *f;
char *buf;
size_t siz;
};
void
lr_init(struct line_reader *lr, FILE *f)
{
lr->f = f;
lr->buf = NULL;
lr->siz = 0;
}
char *
next_line(struct line_reader *lr, size_t *len)
{
size_t newsiz;
int c;
char *newbuf;
*len = 0;
for (;;) {
c = fgetc(lr->f);
if (ferror(lr->f))
return NULL;
if (c == EOF) {
if (*len == 0)
return NULL;
else
return lr->buf;
} else {
if (*len == lr->siz) {
newsiz = lr->siz + 4096;
newbuf = realloc(lr->buf, newsiz);
if (newbuf == NULL)
return NULL;
lr->buf = newbuf;
lr->siz = newsiz;
}
lr->buf[(*len)++] = c;
if (c == '\n')
return lr->buf;
}
}
}
void
lr_free(struct line_reader *lr)
{
free(lr->buf);
lr->buf = NULL;
lr->siz = 0;
}
int
main()
{
struct line_reader lr;
FILE *f;
size_t len;
char *line;
f = fopen("file.txt", "r");
if (f == NULL) {
perror("foobar.txt");
exit(1);
}
lr_init(&lr, f);
while (line = next_line(&lr, &len)) {
fputs("1: ", stdout);
fwrite(line, len, 1, stdout);
}
if (!feof(f)) {
perror("next_line");
exit(1);
}
lr_free(&lr);
return 0;
}
Any help would be appreciated.
What about using an array simply as a suggestion
e.g.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char** readFile(const char *filename, size_t *lineCount){
FILE *fp;
char buff[4096];
size_t lines = 0, capacity=1024;
char **line;
if(NULL==(fp=fopen(filename, "r"))){
perror("file can't open.");
return NULL;
}
if(NULL==(line=(char**)malloc(sizeof(char*)*capacity))){
perror("can't memory allocate.");
fclose(fp);
return NULL;
}
while(NULL!=fgets(buff, sizeof(buff), fp)){
line[lines++] = strdup(buff);
if(lines == capacity){
capacity += 32;
if(NULL==(line=(char**)realloc(line, sizeof(char*)*capacity))){
perror("can't memory allocate.");
fclose(fp);
return NULL;
}
}
}
*lineCount = lines;
fclose(fp);
return (char**)realloc(line, sizeof(char*)*lines);
}
void freeMem(char** p, size_t size){
size_t i;
if(p==NULL) return;
for(i=0;i<size;++i)
free(p[i]);
free(p);
}
int main(){
size_t lines;
char **line;
if(NULL!=(line=readFile("file.txt", &lines))){//lines: set line count of file
printf("%s", line[25]);// 26th line of file, zero origin
}
freeMem(line, lines);
return 0;
}
On any POSIX-compliant system just use the m scan modifier:
for ( char *line, nl; scanf("%m[^\n]%c",&line,&nl) != EOF ; free(line) ) {
if ( !line )
strcpy(line=malloc(1),""), getchar();
// ...
}
m has been in the standard for five years now.