SegFault when switching from small test file to 31 mb file - c

Below is my (incomplete) code for a merge sort project. This worked fine for the parts I have implemented until I switched from the 128 line test file to the 31 mb file that is supposed to be sorted. Now getting a segfault and I'm not sure what to do in order to solve this.
Removed some lines I believe are inconsequential because "mostly code".
struct Record {
char key[KEYSIZE+1];
char data[DATASIZE+1];
};
int threadCount;
int tiers;
static struct ThdArg {
int thdNum; // Thread number 0,1,2,3
struct Record * lowRec; // First record of group or first index of record
struct Record * hiRec; // Last record of group or last index of record
};
int lines;
int tiers;
void *threadFunc(void *var)
{
struct ThdArg temp2 = *((struct ThdArg*)var);
qsort((temp2.lowRec), lines/threadCount, sizeof(struct Record), comparator);
for(int k=0;k<tiers;k++)
if(temp2.thdNum%(int)(pow(2,k+1))==0)
{
qsort((temp2.lowRec), lines/(threadCount/(int)pow(2,k+1)), sizeof(struct Record),comparator);
}
}
int main(int argc, char **argv[])
{
if (argc!=2)
{
printf("Please enter a file name");
return 0;
}
threadCount =8;
tiers =(int)log2((double)threadCount);
pthread_t threads[threadCount];
FILE *recordFile=fopen(argv[1], "r");
char ch;
fseek(recordFile, 0, SEEK_END);
lines = ftell(recordFile);
fseek(recordFile, 0, SEEK_SET);
lines=lines/64;
struct Record recArr[lines];
char buffer[9];
char buffer2[57];
for(int j=0;j<lines;j++)
{
fgets(buffer, 9, recordFile);
for(int i=0;i<8;i++)
{
recArr[j].key[i]=buffer[i];
}
recArr[j].key[8]='\0';
fgets(buffer2, 57, recordFile);
for(int i=0;i<56;i++)
{
recArr[j].data[i]=buffer2[i];
}
recArr[j].data[57]='\0';
}
struct ThdArg temp[threadCount];
for(int i=0;i<threadCount;i++)
{
temp[i].thdNum = i;
temp[i].lowRec=&recArr[(lines/threadCount)*i];
temp[i].hiRec=&recArr[(lines/threadCount)*(i+1)-1];
pthread_create(&threads[i],NULL, threadFunc, (void *)&temp[i]);
}
for(int i=0;i<threadCount;i++)
{
pthread_join(threads[i], NULL);
}
}

The following line:
struct Record recArr[lines];
allocates memory on the stack. Its size is restricted.
If you read a file which can be be very big use malloc:
#include <stdlib.h>
typedef struct {
char key[KEYSIZE +1];
char data[DATASIZE +1];
}Record;
...
recArr = malloc(sizeof(Record) * lines);
...
free(recArr);
You can use the pointer like an array. (In fact, they are the same)

Related

Array of struct from binary file

I have to write a function that will read an array of structures of type Product with data from a binary file.This file contains the number of products - nr and a number of articles of type Product. What's wrong? Thank you in advance!
#define SIZE 30
typedef struc{
int id;
char[SIZE] name;
float price;
}Product;
void create(Product *p, FILE *fptr)
{
p = malloc(sizeof(Product));
fread(p, 1, sizeof(Product), fptr);
}
int main(int argc, char* argv[])
{
FILE *fptr = fopen(argv[1],"rb");
Product *p;
create(p, fptr);
return 0;
}
You have to modify it to something like this:
#include <stdio.h>
#include <stdlib.h>
#define SIZE 30
typedef struct{
int id;
char name[SIZE];
float price;
}Product;
int readproducts(Product *p, FILE *fptr, int nr)
{
if(nr != fread(p, sizeof(Product), nr, fptr))
return -1;
return 0;
}
int main(int argc, char* argv[])
{
FILE *fptr = fopen(argv[1],"rb");
int nr = 0;
if(NULL == fptr) return -1;
// First read number of products from file
// Assuming this number is written as 4 byte integer - at the start of file
if(fread(&nr, 4, 1, fptr) != 1)
return -1;
// Now, read the products
Product *p = malloc(nr * sizeof(Product));
if(-1 == readproducts(p, fptr, nr))
return -1;
fclose(fptr);
return 0;
}
The way you had used malloc in your function was wrong, see here why.
PS. That said, binary writing/reading might not be portable across different computers.

how to deal with Segmentation fault in dynamically allocated struct C

I have written a program that reads in words from a text file. There is one word per line. I need to find how many times each word repeats. To find this out so far i have read the words in from the file and placed them all in a dynamically allocated array of struct. My problem is that the program keeps segmentation faulting whenever i try to run it. I assume there is a problem with how i am dynamically allocating the data.
Code is as follows;
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
//struct
struct _data {
char *word;
int number;
};
//scan for size of file
int SCAN(FILE *data) {
int size = 0;
char s_temp[50];
while (1) {
fscanf(data, "%s", s_temp);
if (feof(data)) break;
size++;
}
return size;
}
//load content into struct
int LOAD(FILE *data, int size, struct _data *Wordstruct){
int i;
char temp[50];
for (i=0; i <size; i++){
fscanf(data, "%s", temp , &Wordstruct[i].word, &Wordstruct[i].number);
Wordstruct[i].word =calloc(strlen(temp), sizeof(char));
strcpy(Wordstruct[i].word, temp);
if(strcasecmp(Wordstruct[i].word, temp) ==0){
Wordstruct[i].number++;
}
}
return size;
}
//count how many times each word repeats
void COUNT(struct _data *Wordstruct, int size){
int i;
int count;
count =0;
char *word;
if (strcasecmp(Wordstruct[i].word, word)==0){
count++;
for(i=0; i<size; i++){
printf("%s\n",Wordstruct[i].word,"occurs:\t",count);
}
}
}
//main routine
int main(int argc, char *argv[]){
int size;
FILE *data;
struct _data *Wordlist;
if(argc <2){
printf("Not enough arguments\n");
}
else{
FILE *data= fopen(argv[1],"r");
size =SCAN(data);
LOAD(data, size, Wordlist);
COUNT(Wordlist, size);
}
return 0;
}
You haven't allocated memory for Wordlist. Add
Wordlist = malloc(size*sizeof(*Wordlist));
before the call to LOAD.
And, as pointed out by #BLUEPIXY in comments, change
Wordstruct[i].word =calloc(strlen(temp), sizeof(char));
to
Wordstruct[i].word =calloc(strlen(temp)+1, sizeof(char));
Change this:
Wordstruct[i].word =calloc(strlen(temp), sizeof(char));
To this:
Wordstruct[i].word =calloc(strlen(temp)+1, sizeof(char));
You need to account for NULL terminator, strlen() does not do that for you here.

Hash Table in C (find the frequency of every word)

I want to create a hash table for an exercise I have to send in my University.
The program will open a number of files, break each file's content to <<words>> (tokens) and it will save each <<word>> in a hash table with the frequency of each <<word>>.
In case the word is already in the hash table , the program will increase the word's frequency.
At the end the program will print the words and it's frequencies accordingly.
Also the frequencies should be printed from the highest word frequency to the lowest.
The comparison of the <<words>> will ignore upper and lower case letters.
For example if a file contains : one two three four Two Three Four THREE FOUR FoUr
It should print:
four 4
three 3
two 2
one 1
The professor gave us a template that we should complete but I'm really confused on what to do with the insert_ht() and clear_ht() functions as well as the compare one.
Here is the code :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define HTABLE_SIZ 1001
#define MAX_LINE_SIZ 1024
/* Hash Table */
typedef struct node* link;
struct node { char *token; int freq; link next; };
link htable[HTABLE_SIZ] = { NULL }; /* Table of lists (#buckets) */
int size = 0; /* Size (number of elements) of hash table */
unsigned int hash (char *tok );
void insert_ht (char *data);
void clear_ht ( );
void print_ht ( );
void Process(FILE *fp);
int main(int argc, char *argv[])
{
int i;
FILE *fp;
for (i=1; i < argc; i++)
{
fp = fopen(argv[i],"r");
if (NULL == fp)
{
fprintf(stderr,"Problem opening file: %s\n",argv[i]);
continue;
}
Process(fp);
fclose(fp);
}
print_ht();
clear_ht();
return 0;
}
void Process(FILE *fp)
{
const char *seperators = " ?!'\";,.:+-*&%(){}[]<>\\\t\n";
char line[MAX_LINE_SIZ];
char *s;
while((fgets(line,MAX_LINE_SIZ, fp)) != NULL)
{
for (s=strtok(line,seperators); s; s=strtok(NULL,seperators))
insert_ht(s);
}
}
/* Hash Function */
unsigned int hash(char *tok)
{
unsigned int hv = 0;
while (*tok)
hv = (hv << 4) | toupper(*tok++);
return hv % HTABLE_SIZ;
}
void insert_ht(char *token)
{
……………………………………………
}
void clear_ht()
{
……………………………………………
}
int compare(const void *elem1, const void *elem2)
{
……………………………………………
}
void print_ht()
{
int i, j=0;
link l, *vector = (link*) malloc(sizeof(link)*size);
for (i=0; i < HTABLE_SIZ; i++)
for (l=htable[i]; l; l=l->next)
vector[j++] = l;
qsort(vector,size,sizeof(link),compare);
for (i=0; i < size; i++)
printf("%-50s\t%7d\n",vector[i]->token,vector[i]->freq);
free(vector);
}
I'll answer you in a new post because it's hard to be exhaustive in comments.
1. Malloc
Why would I need to use malloc then ? Shouldn't i write directly to the htable? (on the insert_ht() funtion)
You need to use malloc because you declare a char pointer in struct (char *token). The thing is that you never initialize the pointer to anything, and as far you don't know the size of the token, you need to malloc every token. But, as you use strdup(token), you don't need to malloc token because strdup does. So don't forget to free every token in order to avoid memory leaks.
2. Segfault
I can't test you code, but it seems like the following line causes the segmentation fault :
list = htable[hashval]->token
Indeed, you try to access token while htable[hashval] is NULL, and to assign a char * to a link type (list).
You need to loop with this :
for(list = htable[hashval]; list != NULL; list = list->next) { ... }
3. Notes
if (x=1) should be if(x==1).
Don't malloc new_list if you don't need to.
Because new_list if used when htable[hashval] is NULL, new_list->next = htable[hashval]; will set new_list->next to NULL.
You should use the -Wall option in gcc (for warnings) and you may use valgrind to understand your segmentation faults. In this case, use gcc with debug mode (-g).
Double and Final edit : Ι found the solution. Apparently for some reason my compare function was wrong.
I still haven't figured out why but here is the correct one, hopefully someone else will find this post helpful!
int compare(const void *elem1, const void *elem2)
{
return (*(link*)elem2)->freq - (*(link*)elem1)->freq;
}
Edit: deleted old answer . Found the correct way I think but I have another problem right now.
The compare function doesn't work correctly. My printf is fine but it doesnt sort them with the frequiencies. I want them to be sorted from the highest to lowest .
In this example: the file contains -> one two three four Two Three Four THREE FOUR FoUr
And I get:
two 2
one 1
four 4
three 3
While I should be getting :
four 4
three 3
two 2
one 1
Here is the code. Feel free to help!
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define HTABLE_SIZ 1001
#define MAX_LINE_SIZ 1024
/* Hash Table */
typedef struct node* link;
struct node { char *token; int freq; link next; };
link htable[HTABLE_SIZ] = { NULL }; /* Table of lists (#buckets) */
int size = 0; /* Size (number of elements) of hash table */
unsigned int hash (char *tok );
void insert_ht (char *data);
void clear_ht ( );
void print_ht ( );
void Process(FILE *fp);
int main(int argc, char *argv[])
{
int i;
FILE *fp;
printf("prin tin for \n");
for (i=1; i < argc; i++)
{
printf("prin tin fopen \n");
fp = fopen(argv[i],"r");
if (NULL == fp)
{
fprintf(stderr,"Problem opening file: %s\n",argv[i]);
continue;
}
printf("prin tin process \n");
Process(fp);
fclose(fp);
}
print_ht();
//clear_ht();
return 0;
}
void Process(FILE *fp)
{
const char *seperators = " ?!'\";,.:+-*&%(){}[]<>\\\t\n";
char line[MAX_LINE_SIZ];
char *s;
while((fgets(line,MAX_LINE_SIZ, fp)) != NULL)
{
for (s=strtok(line,seperators); s; s=strtok(NULL,seperators)){
printf("prin tin insert %s \n",s);
insert_ht(s);
}
}
}
/* Hash Function */
unsigned int hash(char *tok)
{
printf("bike stin hash \n");
unsigned int hv = 0;
while (*tok)
hv = (hv << 4) | toupper(*tok++);
printf("VGAINEIIIIIIIIIIIIII %d \n",hv);
return hv % HTABLE_SIZ;
}
void insert_ht(char *token)
{
printf("bike stin insert %s \n",token);
unsigned int hashval = hash(token);
if (htable[hashval]==NULL){
printf("mesa stin prwti if %u %s \n",hashval,token);
//token = strdup(token);
htable[hashval] = malloc(sizeof(token));
htable[hashval]->token = token ;
htable[hashval]->freq = 1;
size++;
}else {
htable[hashval]->freq++;
}
printf("ta evale epitixws \n");
}
int compare(const void *elem1, const void *elem2)
{
const struct node *p1 = elem1;
const struct node *p2 = elem2;
if ( p1->freq < p2->freq)
return -1;
else if (p1->freq > p2->freq)
return 1;
else
return 0;
}
void print_ht()
{
int i, j=0;
link l, *vector = (link*) malloc(sizeof(link)*size);
for (i=0; i < HTABLE_SIZ; i++)
for (l=htable[i]; l; l=l->next)
vector[j++] = l;
qsort(vector,size,sizeof(link),compare);
for (i=0; i < size; i++)
printf("%-50s\t%7d\n",vector[i]->token,vector[i]->freq);
free(vector);
}
Sorry for my bad english.
I think that :
insert(char *token) takes a word of the file and puts into the hash table. In brief, if the word exists in the hash table, you just have to increment its frequencie. Otherwise, you need to create another node and put the frequencie to 1, then ad it to the array. At the end, you will have one entry for each unique word.
compare(const void *elem1, const void *elem2) will be used by qsort. It returns 0 if elem1 = elem2, a negative number if elem1 < elem2 and a number > 0 if elem1 > elem2. By passing compare to qsort, you allow qsort to sort you array according to your own criteria.
clear_ht() may set all the values of the array to NULL, in order to restart another count ?

Struct arrays in C

Hi I'm having trouble trying to initializing each element of the struct array. When I try and assign the value ZERO to both 'bSize' and 'msgs', it doesn't work as it errors out when i get to malloc. In the printf statement it prints a -1852803823 number. Excuse the messy code as i'm playing around trying to figure it out.
struct message{
int *data;
int bSize;
int msgs;
};
int main(int argc, char *argv[]) {
.....
}
void getSchedFile (FILE *file, int **schd) {
struct message sMsg[nodeCount];
const int pakSize = 6;
// Iniitialise message buffer
for (int i=0; i<nodeCount; i++){
sMsg[i].bSize = 0;
sMsg[i].msgs = 0;
printf("bSize %d\n",sMsg[i].bSize);
}
/* Get the number of bytes */
fseek(file, 0L, SEEK_SET);
int time;
while((fscanf(file, "%d", &time)) != EOF){
int src;
fscanf(file, "%d", &src); // get source node id
// These are here for easier reading code
int aPos = sMsg[src].bSize;
int nMsg = sMsg[src].msgs;
printf("size %d\n", sMsg[src].bSize);
if (sMsg[src].bSize==0){
sMsg[src].data = malloc( pakSize * sizeof(int));
}else{
sMsg[src].data = realloc(sMsg[src].data, (aPos+pakSize)*sizeof(int));
}
Where is the nodeCount value coming from? Is it a global variable? You should be very careful with global variables, and avoid using them if possible.
Pass the nodeCount in the method parameter and as Charlie mentioned, check it for > 0

C - struct problems - writing

I'm making a program in C, and I'mm having some troubles with memory, I think.
So my problem is: I have 2 functions that return a struct. When I run only one function at a time I have no problem whatsoever. But when I run one after the other I always get an error when writting to the second struct.
Function struct item* ReadFileBIN(char *name) -- reads a binary file.
struct tables* getMesasInfo(char* Filename) -- reads a text file.
My code is this:
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
int numberOfTables=0;
int numberOfItems=0;
//struct tables* mesas;
//struct item* Menu;
typedef struct item{
char nome[100];
int id;
float preco;
};
typedef struct tables{
int id;
int capacity;
bool inUse;
};
struct tables* getMesasInfo(char* Filename){
struct tables* mesas;
char *c;
int counter,numberOflines=0,temp=0;
char *filename=Filename;
FILE * G;
G = fopen(filename,"r");
if (G==NULL){
printf("Cannot open file.\n");
}
else{
while (!feof(G)){
fscanf(G, "%s", &c);
numberOflines++;
}
fclose(G);
}
/* Memory allocate for input array */
mesas = (struct tables *)malloc(numberOflines* sizeof(struct tables*));
counter=0;
G=fopen(filename,"r");
while (!feof(G)){
mesas[counter].id=counter;
fscanf(G, "%d", &mesas[counter].capacity);
mesas[counter].inUse= false;
counter++;
}
fclose(G);
numberOfTables = counter;
return mesas;
}
struct item* ReadFileBIN(char *name)
{
int total=0;
int counter;
FILE *ptr_myfile;
struct item my_record;
struct item* Menu;
ptr_myfile=fopen(name,"r");
if (!ptr_myfile)
{
printf("Unable to open file!");
}
while (!feof(ptr_myfile)){
fread(&my_record,sizeof(struct item),1,ptr_myfile);
total=total+1;
}
numberOfItems=total-1;
Menu = (struct item *)calloc(numberOfItems , sizeof(struct item));
fseek(ptr_myfile, sizeof(struct item), SEEK_END);
rewind(ptr_myfile);
for ( counter=1; counter < total ; counter++)
{
fread(&my_record,sizeof(struct item),1,ptr_myfile);
Menu[counter] = my_record;
printf("Nome: %s\n",Menu[counter].nome);
printf("ID: %d\n",Menu[counter].id);
printf("Preco: %f\n",Menu[counter].preco);
}
fclose(ptr_myfile);
return Menu;
}
int _tmain(int argc, _TCHAR* argv[])
{
struct item* tt = ReadFileBIN("menu.dat");
struct tables* t = getMesasInfo("Capacity.txt");
getchar();
}**
the error that im getting is :
"Unhandled exception at 0x00411700 in test.exe: 0xC0000005: Access violation writing location 0x00000000."
in "Menu[counter] = my_record;"
Thanks in advance.
You seem to allocate a memory block of the wrong size in getMesasInfo(): sizeof(struct tables*) gives you the size of the pointer, not that of the struct it is pointing to:
mesas = (struct tables *)malloc(numberOflines* sizeof(struct tables*));
So you can easily overwrite unallocated memory. The proper allocation should be
mesas = (struct tables *)malloc(numberOflines* sizeof(struct tables));
or, similar to how you allocate the other array in ReadFileBIN():
mesas = (struct tables *)calloc(numberOflines, sizeof(struct tables));
Moreover, I don't know whether it's intentional or not, but in ReadFileBIN() you are allocating (1) and reading (2) one less record than the total number of records:
numberOfItems=total-1; // 1
Menu = (struct item *)calloc(numberOfItems , sizeof(struct item)); // 1
fseek(ptr_myfile, sizeof(struct item), SEEK_END);
rewind(ptr_myfile);
for ( counter=1; counter < total ; counter++) // 2
...
Since the loop counter is started from 1 (instead of 0 as is normal in C), you effectively execute the loop total-1 times. That is, you read into elements 1 to total-1 of the array. However, the real elements of the array are indexed from 0 to total-2 so the very first element in the array is left uninitialized, and in the end you commit a buffer overflow error.
You have a problem in these lines:
numberOfItems=total-1;
Menu = (struct item *)calloc(numberOfItems , sizeof(struct item));
fseek(ptr_myfile, sizeof(struct item), SEEK_END);
rewind(ptr_myfile);
for ( counter=1; counter < total ; counter++)
First, you're setting numberOfItems to one less than the total. This is incorrect. You don't even need numberOfItems; since total has the number of lines in the file, the next line should really be Menu = (struct item*) calloc(total, sizeof(struct item));
Second, you're trying to use Menu as a one-based array in the for loop. C arrays are zero-based. You should have the for loop use for (counter = 0; counter < total; counter++).
Finally, as Peter pointed out, in the first function you're allocating the wrong size of object. You need to malloc numberOfLines*(sizeof(struct tables) (not sizeof(struct tables*).
To further illustrate Peter's point:
struct tables {
int id;
int capacity;
int inUse; /* bool is not a C type */
};
int main()
{
printf("sizeof: %d\n",sizeof(struct tables*));
printf("sizeof: %d\n",sizeof(struct tables));
}
Will output:
sizeof: 4
sizeof: 12

Resources