Multithreading going throught an array of struct in C - c

I have wrote a program that receives as input a text file and return as output another text file.
The text file is created with a script(python) inside a 3D app (Blender) , and it contains a list of vertex that are part of a square mesh. The program receives that data, stores it in a struct, and return a list of vertex that forms a smaller square. Than, the 3D app, again with a script, reads this vertices and separate them from the original mesh. Doing this several times, the original mesh will be divided in many squares of the same area.
BY NOW, IT WORKS ;)
But is terribly low.. When doing it on 200k vertices it takes a while, but running it on 1kk vertices it takes ages
Here the source:
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
typedef struct{
int index;
float x,y,z;
} vertex;
vertex *find_vertex(vertex *list, int len)
{
int i;
vertex lower,highter;
lower=list[0];
highter=list[1];
//find the lower lefter and the upper righter vertices
for(i=0;i<len;i++)
{
if ((list[i].x<=lower.x) && (list[i].y<=lower.y))
lower=list[i];
if ((list[i].x>=highter.x) && (list[i].y>=highter.y))
highter=list[i];
}
vertex *ret;//create a pointer for returning 2 structs
ret=(vertex*)malloc(sizeof(vertex)*2);
if (ret==NULL)
{
printf("Can't allocate the memory");
return 0;
}
ret[0]=lower;
ret[1]=highter;
return ret;
}
vertex *square_list_of_vertex(vertex *list,int len,vertex start, float size)
{
int i=0,a=0;
unsigned int *num;
num=(int*)malloc(sizeof(unsigned int)*len);
if (num==NULL)
{
printf("Can't allocate the memory");
return 0;
}
//controlls if point is in the right position and adds its index in the main list in another array
for(i=0;i<len;i++)
{
if ((list[i].x-start.x)<size && (list[i].y-start.y<size))
{
if (list[i].y-start.y>-size/100)//it was adding also wrong vertices. This line is to solve a bug
{
num[a]=i;
a++;//len of the return list
}
}
}
//create the list with the right vertices
vertex *retlist;
retlist=(vertex*)malloc(sizeof(vertex)*(a+1));
if (retlist==NULL)
{
printf("Can't allocate the memory");
return 0;
}
//the first index is used only as an info container
vertex infos;
infos.index=a+1;
retlist[0]=infos;
//set the value for the return pointer
for(i=1;i<=a;i++)
{
retlist[i]=list[num[i-1]];
}
return retlist;
}
//the function that pass the data to python
void return_funct_1(vertex lower,vertex highter)
{
FILE* ret;
ret=fopen("max_min.txt","w");
if (ret==NULL)
{
printf("Error opening the file\n");
return;
}
fprintf(ret,"%i\n",lower.index);
fprintf(ret,"%i\n",highter.index);
fclose(ret);
}
//the function that pass the data to python
void return_funct_2(vertex *squarelist)
{
FILE* ret;
int i,len;
ret=fopen("square_list.txt","w");
if (ret==NULL)
{
printf("Error opening the file\n");
return;
}
len=squarelist[0].index;
for(i=1;i<len;i++)
{
//return all the informations
//fprintf(ret,"%i %f %f %f\n",squarelist[i].index,squarelist[i].x,squarelist[i].y,squarelist[i].z);
//just return the index(it's enought for the python script)
fprintf(ret,"%i\n",squarelist[i].index);
}
fclose(ret);
}
//argv:
//function[1/2] number_of_vert(int) size_of_square(int) v_index(int) v_xcoord(float) v_ycoord(float) v_zcoord(float)...
//example of file: 2 4 2 0 1 2 3 1 1 2 3 2 1 2 3 3 1 2 3 4 1 2 3 //function 1, number of ver=4, size=2 and then the 4 vertex with their coords
int main(int argc, char *argv[])
{
if(argc==1)
{
printf("%s need a path to a vectorlist file\n",argv[0]);
return 0;
}
FILE* input;
input=fopen(argv[1],"r");
if (input==NULL)
{
printf("Error opening the file\n");
return(0);
}
int func=0,i=0,a=0,u=0;
char read;
char* argument;
argument=(char*)malloc(sizeof(char)*50);//yeah, i know, i should use list instead of an array, but when i first coded this i was quite in hurry (and i'm still learning )
//get the first paramater in the file
argument[0]=fgetc(input);
argument[1]='\0';
func=atoi(argument);
//skipp the space
read=fgetc(input);
//get the number of vertices;
i=0;
do {
read=fgetc(input);
argument[i]=read;
i++;
}while(read!=' ' && !feof(input) );
//set the end of the string
argument[i]='\0';
//set the variable to the correct integer value;
int vnumber=atoi(argument);
i=0;
do {
read=fgetc(input);
argument[i]=read;
i++;
} while(read!=' ' && !feof(input));
//set the end of the string
argument[i]='\0';
float sqsize=atof(argument);
vertex *list;
//allocate memory in the array to fit the number of vertex needed
list=(vertex*)malloc(sizeof(vertex)*vnumber);
//control if the memory get allocated
if (list==NULL)
{
printf("Can't allocate the memory");
return 0;
}
//do the cycle for each vertex
for(u=0;u<vnumber;u++)
{
//read the number and assign it to the proper value of the vertex
for(a=0;a<4;a++)
{
i=0;
do
{
read=fgetc(input);
argument[i]=read;
i++;
} while(read!=' ' && !feof(input));
argument[i]='\0';
if(a==0)
list[u].index=atoi(argument);
if(a==1)
list[u].x=atof(argument);
if(a==2)
list[u].y=atof(argument);
if(a==3)
list[u].z=atof(argument);
}
}
//close the file
fclose(input);
if (func==1)
{
//find the lowest vertex and the higtest vertex
vertex lower;
vertex highter;
vertex *lohi;
lohi=(vertex*)find_vertex(list, vnumber);
lower=lohi[0];
highter=lohi[1];
free(lohi);
return_funct_1(lower,highter);//the function that return the data to python
return 1;
}
if(func==2)
{
//find the list to return
vertex *lohi;
lohi=(vertex*)find_vertex(list, vnumber);
vertex lower;
lower=lohi[0];
free(lohi);
return_funct_2(square_list_of_vertex(list,vnumber, lower, sqsize));//the function that return the data to python
return 1;
}
printf("Function argument was wrong: nothing was done\n");
}
I would really appreciate any help for making this multithreaded.. It takes ages to work on really big data(today i've tried with a 50mb text file, and after 20 mins it had run only 30 times(on the 26000 i needed)), and since quite all pc that will use this will have at least 4 cores, i would really like to get it multithreaded!
Thanks in advice! :)
Ps: if you need, i can post the python script code too, but it's quite full of calls to the internal api of the program, so i don't really know if it would be usefull.

I am not going to work specifically through your code but your algorithm may be able to apply Map and Reduce.
This is an article of how you can use it in C:
http://pages.cs.wisc.edu/~gibson/mapReduceTutorial.html

When I profile your code running over a sample dataset of 2 million random vertexes, with the source file preloaded into the page cache the bottleneck is the conversion of strings to floats (it still runs in only 5 seconds, though - so it's not that slow).
It is possible to multithread the conversion of strings to floats, and with careful coding you will get some gains this way. However, you will get much more bang for your buck if instead you change the Python code to write the floating point numbers in a binary format that can be directly loaded by the C code (with fread()). I believe you can use struct.pack on the Python side to achieve this.
The processing part of your code can certainly be improved too, but until it is the bottleneck I wouldn't worry about it.

Related

Recovering colums/rows Matrix from a file

so i've this program that scan and print the matrix from a file. anyways the program i have works with normal matrix i mean Square matrix but now i want to make a manual matrix i mean i've to enter the number of lines/columns and then i call the lines and columns in the main.
So the program below explains the situation
int recuperation (int t[][20], char *nomFichier){
int nbElement=0 ,i,j,nbElement2=0;
FILE *fp;
fp=fopen(nomFichier,"r");
if(fp!=NULL)
{
fscanf(fp,"%d\n",&nbElement);
fscanf(fp,"%d\n",&nbElement2);
if(nbElement && nbElement2)
{
for(i=1;i<=nbElement;i++)
{
for(j=1;j<=nbElement2;j++)
{
fscanf(fp,"%d",&t[i-1][j-1]);
}
}
}
}
else
printf("\n Fichier vide \n");
return nbElement;
}
You see the return? nbElement that's the number of lines but i want to return the number of columns too, which is nbElement2.
Because later in main() i've to call this function by typing:
l= recuperation(t,txtfile)
but i can't call the columns since i returned only 1 value.
Hope that you got what i mean, thanks.
The best thing to do is to provide the columns and rows as pointers to the function. This way, when you assign values to those variables, they change outside of the function too.
int recuperation (int t[][20], char *nomFichier, int * rows, int * columns){
int i,j;
FILE *fp;
fp=fopen(nomFichier,"r");
if(fp!=NULL)
{
fscanf(fp,"%d\n",rows);
fscanf(fp,"%d\n",columns); // already a pointer
if(*rows && *columns) // dereference the pointer to get the value
{
for(i=1;i<=*rows;i++)
{
for(j=1;j<=*columns;j++)
{
fscanf(fp,"%d",&t[i-1][j-1]);
}
}
}
}
else
printf("\n Fichier vide \n");
return 0;
}

CUDA code not processing if block properly

Stuck at if block right below //step 5, the issue is that the code will not progress into or after the given if block. I need to figure out how to get this particular issue settled before starting the task of generating parallel code. If you run the code you will see one print statement that indicates the value of "one" and another two for "i" and "j". After the if block begins, none of the other print statements are hit. As a result I am quite stuck, I am aware that this is a specific issue, however, I cannot seem to determine it's cause.
Any help is appreciated!
Thanks in advance!
Input file sample.
>386.fasta.screen.Contig1
GAGTTTGATCCTGGCTCAGAATCAACGCTGGCGGCGCGCTTAACACATGC
AAGTCGAACGAGAAAGTGGAGCAATCCATGAGTACAGTGGCGTACGGGTG
AGTAACACGTGGGTAATCTACCTCTTAGTGGGGAATAACTTTGGGAAACC
GAAGCTAATACCGCATAAGCTCGAGAGAGGAAAGCAGCAATGCGCTGAGA
GAGGAGCCCGCGGCCGATTAGCTAGTTGGCAGGGTAAAAGCCTACCAAGG
CAGAGATCGGTAGCCGGCCTGAGAGGGCACACGGCCACACTGGCACTGAA
ACACGGGCCAGACTCCTACGGGAGGCAGCAGTGGGGAATCTTGCACAATG
GGGGCAACCCTGATGCAGCGACGCCGCGTGAGCGATGAAGCCCTTCGGGG
TGTAAAGCTCTTTCGTCAGGGAAGATAGTGACGGTACCTGGAGAAGCAGC
TGCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGCAGCGAGCGT
TGTTCGGAGTTACTGGGCGTAAAGGGTGTGTAGGCGGTTGTTTAAGTTTG
GTGTGAAATCTCCCGGCTCAACTGGGAGGGTGCGCCGAATACTGAGCGAC
TAGAGTGCGGGAGAGGAAAGTGGAATTCCTGGTGTAGCGGTGAAATGCGT
AGATATCAGGAGGAACACCGGTGGTGTAGACGGCTTTCTGGACCGTAACT
GACGCTGAGACACGAAAGCGTGGGTAGCAAACAGGATTAGATACCCTGGT
AGTCCACGCCCTAAACGATGCATATTTGGTGTGGGCAGTTCATTCTGTCC
GTGCCGGAGCTAACGCGTTAAATATGCCGCCTGGGGAGTACAGTCGCAAG
GCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGT
GGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGGCTCGAACGGCTTC
CCAACGCCGGTAGAAATATCGGTACCCCGCAAGGGGGTGGAATCGAGGTG
CTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGC
AACGAGCGCAACCCTTGTCCTGTGTTGCCATGCCGCAAGGCGGCACTCGC
AGGAGACCGCCAGCGATAAGCTGGAGGAAGGTGGGGATGACGTCAAGTCC
TCATGGCCTTTATGTCCAGGGCTACACACGTGCTACAATGGCCGGTACAA
AGCGTCGCTAACCTGCGAAGGGGAGCCAATCGCAAAAAACCGGTCTCAGT
TCGGATTGCAGGCTGCAACCCGCCTGCATGAAGCTGGAATCGCTAGTAAT
GGCAGATCAGCACGCTGCCGTGAATACGTTCCCGGGCCTTGTACACACAT
/********************************
Based on code by:
Lorenzo Seidenari (sixmoney#virgilio.it)
*********************************/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#define MAX_SEQUENCE_LENGTH 100000
int n;
int m;
int levenshtein_distance(char *s,char*t);
int minimum(int a,int b,int c);
//-----------------------------------------------------------------------------
void cleanString(char string[]) {
//Removes all spaces from string pointed to by "string", converts characters
//to uppercase, and deletes a terminating newline character.
int i, current;
int length = strlen(string);
current = 0;
for(i=0;i<length;i++) {
if(string[i]=='\n') {
string[current++] = '\0';
break;
}
else if(string[i]!=' ') {
string[current++] = toupper(string[i]);
}
}
}
//-----------------------------------------------------------------------------
int importFASTA(char *filename, char *sequence) {
//Reads a file, located at path specified by "filename", containing a FASTA
//sequence. It finds the first full, complete sequence in the file, stores
//it in "sequence", and returns the length of the sequence, or -1 on failure.
FILE *fastaFile;
char input[256];
int readFlag; //set to 1 once a sequence has been read in
int length;
//open the file
if((fastaFile = fopen(filename, "r")) == NULL) {
return -1;
}
sequence[0] = '\0';
//read the full first sequence, discarding unnecessary headers
readFlag=0;
length = 0;
while(fgets(input,256,fastaFile)!=NULL) {
//is it a header or a comment?
if(input[0]=='>' || input[0]==';') {
if(readFlag) break;
else continue;
}
else readFlag = 1;
cleanString(input);
length += strlen(input);
strncat(sequence,input,MAX_SEQUENCE_LENGTH-length - 1);
}
//Add a terminatng null character, just in case
sequence[length] = '\0';
fclose(fastaFile);
return length;
}
/****************************************/
/*Implementation of Levenshtein distance*/
/****************************************/
__global__ void levenshtein_distance(char *s,char*t, int one, int two)
/*Compute levenshtein distance between s and t*/
{
//Step 1
int k,i,j,cost,*d;
int distance = 0;
if(one!=0&&two!=0)
{
d=(int *)malloc((sizeof(int))*(two+1)*(one+1));
two++;
one++;
//Step 2
for(k=0;k<one;k++){
d[k]=k;
}
for(k=0;k<two;k++){
d[k*one]=k;
}
//Step 3 and 4
for(i=1;i<one;i++){
for(j=1;j<two;j++)
{
//Step 5
printf("%d %d %d\n", one, i, j);
if(s[i-1]==t[j-1]){
cost=0;
printf("%d %d %d\n", one, i, j);
}
else{
cost=1;
printf("%d %d %d\n", one, i, j);
}
printf("%d %d %d\n", one, i, j);
//Step 6
int min = d[(j-1)*one+i]+1;
if (d[j*one+i-1]+1 < min)
min = d[j*one+i-1]+1;
if (d[(j-1)*one+i-1]+cost < min)
min = d[(j-1)*one+i-1]+cost;
d[j*one+i] = min;
}
distance=d[one*two-1];
free(d);
printf("%d\n", distance);
}
}
else
printf ("-1");
}
int main(int argc, char *argv[]) {
char A[MAX_SEQUENCE_LENGTH+1];
char B[MAX_SEQUENCE_LENGTH+1];
if(argc < 3) {
printf("Usage: new_edit_distance <sequence1> <sequence2>\n");
printf("<sequence1>: file containing the first sequence, FASTA format\n");
printf("<sequence2>: file containing the second sequence, FASTA format\n");
return EXIT_FAILURE;
}
n = importFASTA(argv[1],A);
m = importFASTA(argv[2],B);
levenshtein_distance<<<1, 1>>>(A,B, n, m);
cudaDeviceSynchronize();
printf ("%s\n", cudaGetErrorString(cudaGetLastError()));
return EXIT_SUCCESS;
}
I get it now. You took straight serial C/C++ code, dropped it into a kernel, intended to run that kernel as a single thread, and then want to proceed from there.
The idea is plausible, but you're missing a key fact about CUDA and GPUs: they can't directly access host memory.
So when you set up A and B like this:
char A[MAX_SEQUENCE_LENGTH+1];
char B[MAX_SEQUENCE_LENGTH+1];
....
n = importFASTA(argv[1],A);
m = importFASTA(argv[2],B);
those are ordinary variables that live in host memory. GPU (ordinary CUDA) code can't directly access host memory. So when you pass those pointers to a kernel like this:
levenshtein_distance<<<1, 1>>>(A,B, n, m);
the GPU code will try and dereference those A and B pointers and will fault (unspecified launch failure).
Every CUDA program has the following basic sequence:
copy data to the GPU
perform computations on the GPU
copy results back
You've tried to do step 2 without step 1. It won't work.
Since I'm not able to run your program since I don't have valid input files, I'll make the following suggestion. I assume you know little or nothing about CUDA. Try adding lines like this:
n = importFASTA(argv[1],A); // no change
m = importFASTA(argv[2],B); // no change
char *d_A, *d_B; // add this line
cudaMalloc(&d_A, MAX_SEQUENCE_LENGTH+1); // add this line
cudaMalloc(&d_B, MAX_SEQUENCE_LENGTH+1); // add this line
cudaMemcpy(d_A, A, MAX_SEQUENCE_LENGTH+1, cudaMemcpyHostToDevice); // add
cudaMemcpy(d_B, B, MAX_SEQUENCE_LENGTH+1, cudaMemcpyHostToDevice); // add
levenshtein_distance<<<1, 1>>>(d_A,d_B, n, m); //modify parameters
n and m don't need to be handled any differently since you are passing those by value.
And add proper cuda error checking to your code.
EDIT: after some further analysis, it's clear that this sequence is not correct:
distance=d[one*two-1];
free(d);
printf("%d\n", distance);
}
}
You are freeing d on every iteration of the i loop. That cannot possibly be correct. I suggest you go back to square one and get your serial code working first, in ordinary serial C code, before dropping it into a cuda kernel this way. If you move that free statement outside the i loop, then your kernel runs for a very very long time. Be advised that in-kernel printf is limited in the amount of output that can be easily generated.
I'm not going to debug your code any further for you. Get your serial code working first, then figure out a way to create a kernel without massive quantities of printout.
A final comment: I said above your approach is "plausible". That it means it could be made to work, i.e produce the same behavior as the same code executing on the host. It does not mean it will run fast. This is not how you get acceleration out of a GPU (running a single block of a single thread). I assume you already know this based on your comment "how to get this particular issue settled before starting the task of generating parallel code." But I think the disclaimer is appropriate anyway.

libmp3lame encoding to char array slow

I am trying to encode pcm audio that i generated using "mplayer -ao pcm:nowaveheader" into mp3 with a c program. I don't want to write the mp3 to a file, I want to keep in in an array until i need to write it to a file, I wrote this, and it appears to work in a short .9 second test file, but it is very slow. What exactly is wrong?
#include <stdio.h>
#include <stdlib.h>
#include <lame/lame.h>
lame_global_flags *gfp;
int loopcount;
int inputSize;
FILE *fp=NULL;
FILE *fpo=NULL;
char *mp3buffer;
int mp3buffersize;
int countsize;
int x=0;
int y=0;
short *pcmbuffer;
short *lpcmbuffer;
short *rpcmbuffer;
int parse()
{
printf("loading PCM data...\n");
pcmbuffer=malloc(inputSize);
fread(pcmbuffer,2,(inputSize/2),fp);
printf("data in buffer\n");
printf("splitting left and right channels\n");
lpcmbuffer=malloc(inputSize/2);
countsize=((inputSize/4)-1);
while (x<=countsize)
{
lpcmbuffer[x]=pcmbuffer[x*2];
x++;
}
x=0;
rpcmbuffer=malloc(inputSize/2);
while (x<=countsize)
{
rpcmbuffer[x]=pcmbuffer[(x*2)+1];
x++;
}
x=0;
printf("starting lame\n");
gfp=lame_init();
lame_set_num_channels(gfp,2);
lame_set_in_samplerate(gfp,44100);
lame_set_brate(gfp,256);
lame_set_mode(gfp,1);
lame_set_quality(gfp,5);
if (lame_init_params(gfp)<0)
{
return 1;
}
}
encode()
{
x=0;
mp3buffersize=(1.25*countsize+7200);
mp3buffer=malloc(mp3buffersize);
while (x!=countsize)
{
lame_encode_buffer(gfp,lpcmbuffer,rpcmbuffer,x,mp3buffer,mp3buffersize);
x++;
y++;
if(y==1000)
{
printf("%d %d\n",countsize,x);
y=0;
}
}
x=0;
lame_encode_flush(gfp,mp3buffer,mp3buffersize);
fpo=fopen("test.mp3","w");
fwrite(mp3buffer,1,countsize,fpo);
}
decode()
{
}
bounty()
{
//the quicker picker upper
printf("closing files\n");
fclose(fpo);
fclose(fp);
printf("closing lame\n");
lame_close(gfp);
printf("freeing pcmbuffer\n");
free(pcmbuffer);
free(lpcmbuffer);
free(rpcmbuffer);
free(mp3buffer);
}
int main(int argc,char **argv)
{
loopcount=atoi(argv[1]);
fp=fopen(argv[2],"r");
if (fp==NULL)
{
printf("File Read Error\n");
return 0;
}
fseek(fp,0,SEEK_END);
inputSize=ftell(fp);
fseek(fp,0,SEEK_SET);
printf("detected a %d byte(s) file\n",inputSize);
printf("Proceeding with parsing and importing...\n");
if (parse()==1)
{
printf("lame init error\n");
}
printf("loopcount is %d\n",loopcount);
encode();
//the Quicker Picker Upper
bounty();
return 0;
}
Short answer, make this your encode function:
void encode()
{
mp3buffersize=(1.25*countsize+7200);
mp3buffer=malloc(mp3buffersize);
lame_encode_buffer(gfp, lpcmbuffer, rpcmbuffer, countsize, mp3buffer, mp3buffersize);
lame_encode_flush(gfp,mp3buffer,mp3buffersize);
fpo=fopen("test.mp3","w");
fwrite(mp3buffer,1,countsize,fpo);
}
I've never used lame, but, it looked like in your encode() function you were calling lame_encode_buffer() again and again, overwriting the result each time, and doing from 0 to countsize as the number of samples per channel (argument 4).
Other comments:
Why aren't you using lame_encode_buffer_interleaved()? Much of your parse() function is just undoing the existing interleaving of your file, seems like a waste.
IMO, the mass of global variables you're using are UGLY. Ideally your encode() would look more like: encode(lame_global_flags *gfp, const short * lpcmbuffer, const short * rpcmbuffer, const int countsize) this way it is clear from reading the parameter list the type of the variables, and that they must have come from/been set by the caller. const is nice to clarify that they're only for reading.
Finally, you really should have done some profiling, e.g. printing time differences between entry and exit of functions, to figure where your time sink was, and posted what you'd found. I ventured a guess looking at your loops, the encode() function had the only loop with any meat in it. I never ran your program, maybe I'm 100% wrong.

Coredump in selfmade arrayList

i'm current working on a homework assesment where i'm programming a program ment to stitch textfiles with a piece of an ascii image to create a complete image of all the pieces. The way i intended to write the code is having a while loop looking through a directory finding the parts and adding them to an array. However in my AddFile method(or when i call it to be precise) i get a coredump.. I just started working with c so i dont know if it is very obvious to some of you why i get a coredump or more complicated. Also, i originaly wrote the addFile method to use and accept int's instead of the FILE type, at that point it worked perfectly without any coredumps so i suspect (but hey i might be wrong) that it went wrong when i tried to implement it with the FILE type.
#include <stdio.h>
#include <stdlib.h>
typedef struct{
int listSize;
int listCapacity;
FILE *fileStream;
}FileList;
void addFile(FileList* list, FILE file)
{
if((*list).listSize<(*list).listCapacity)
{
(*list).fileStream[(*list).listSize]=file;
(*list).listSize+=1;
}
else
{
FILE *tempArray = calloc((*list).listSize,sizeof(FILE));
for(int i=0; i<(*list).listSize; i++)
{
tempArray[i]=(*list).fileStream[i];
}
//Do something with array...
free((*list).fileStream);
(*list).listCapacity=((*list).listCapacity)*2;
(*list).fileStream=calloc((*list).listCapacity,sizeof(FILE));
for(int i=0; i<(*list).listSize; i++)
{
(*list).fileStream[i]=tempArray[i];
}
(*list).fileStream[(*list).listSize]=file;
(*list).listSize+=1;
free(tempArray);
}
}
int main()
{
FileList intList;
intList.listSize=0;
intList.listCapacity=1;
intList.fileStream=calloc(intList.listCapacity,sizeof(int));
int fileYcoord=0;
int fileXcoord=0;
while(1)
{
char fileName [100];
int fileNameLength=sprintf(fileName,"part_%02d-%02d",fileXcoord,fileYcoord);
FILE * pFile = fopen (fileName,"r");
if(pFile!=NULL)
{
printf("- ! found file: %s - name length : %d \n",fileName,fileNameLength);
addFile(&intList,*pFile);
fclose(pFile);
fileXcoord++;
}
else
{
if(fileXcoord!=0)
{
fileYcoord+=1;
fileXcoord=0;
}
else
{
break;
}
}
}
printf("size %d , %d",fileXcoord, fileYcoord);
free(intList.fileStream);
return 0;
}
The call to addFile() is dereferencing a FILE *, producing a value of type FILE. This is wrong, this is an opaque type and should always be handled by pointers.

Hash Table: it doesn't save correctly [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 5 years ago.
Improve this question
What my program do..
read a text file of format
store name 1
itemcode quantity
itemcode quantity
.
.
store name 2
itemcode quantity
itemcode quantity
.
.
When you Run my code you will Ask to Enter a task.
there are three options
L itemcode quantity
entering the above sequence will print all the stores which contains that item with the given quantity.
U itemcode quantity storename
this option takes three arguments itemcode int quantity and storename
the Function for this option just update the given store with the amount quantity.
Q
this option call my Savefile method which save the current data structure back to the file.
Problem.
There is a problem I am facing.
whenever I update file it updates successfully but when Enter Command Q to quit and save it doesn't save correctly..
save_file(char *)
it lost whole data just the first store is save..
stores.txt
carrefour_Milan
12345678 12
23456766 16
carrefour_Torino
12345678 65
67676765 12
Carrefour_Vercelli
23456766 20
and also can you help me in finding the time complexity of
int listfile(char *)
and
int updatefile(char *,int ,char *)
I mean Big O.
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#define MAX_ITEM 1000
#define MAXS 129
#define MAXL 132
#define MAXC 9
FILE *fp;
typedef struct store{
char Storename[MAXS];
int quantity;
struct store *NEXT;
}STORE;
typedef struct item{
char item_code[MAXC];
struct store *Stores;
struct item *NEXT;
}ITEM;
ITEM *list_item[MAX_ITEM];
int readfile(char *fname);
int update_file(char *item_code,int qty,char *name);
int hash(char *item_code);
int save_file(char *fname);
void init();
void init(){
int i;
for( i=0;i<MAX_ITEM;i++)
list_item[i]=NULL;
}
int readfile(char *fname){
char *p,line[MAXL+1],storen[MAXL+1];
int pos;
ITEM *current=NULL,*prev=NULL;
STORE *s_cur=NULL,*s_prev=NULL;
char itemcode[MAXC];int qty;
if((fp=fopen(fname,"r"))==NULL)
return -1;
while(!feof(fp)){
if(fgets(line,MAXL+1,fp)==NULL)
break;
if((p=strchr(line,'\n'))==NULL)
;
else
*p='\0';
if(line[0]>='a' && line[0]<='z' ||line[0]>='A' && line[0]<='Z')
strcpy(storen,line);
else{
//fgets(line,MAXL,fp);
if(sscanf(line,"%s %d",itemcode,&qty)>0){
current=(ITEM *)malloc(sizeof(ITEM));
if(current==NULL)
return -1;
pos=hash(itemcode);
if(list_item[pos]==NULL){
list_item[pos]=current;
if((s_cur=(STORE *)malloc(sizeof(STORE)))==NULL)
return -1;
strcpy(s_cur->Storename,storen);
strcpy(current->item_code,itemcode);
s_cur->quantity=qty;
current->Stores=s_cur;
s_cur->NEXT=NULL;
current->NEXT=NULL;
}
else{
ITEM *q=list_item[pos];
if((s_cur=(STORE *)malloc(sizeof(STORE)))==NULL)
return -1;
while(q!=NULL){
if(strcmp(q->item_code,itemcode)==0){
STORE *temp=q->Stores,*temp_a=NULL;
if(temp==NULL){
q->Stores=s_cur;
strcpy(s_cur->Storename,storen);
s_cur->quantity=qty;
s_cur->NEXT=NULL;
}
else{
while(temp!=NULL){
temp_a=temp;
temp=temp->NEXT;
}
temp_a->NEXT=s_cur;
strcpy(s_cur->Storename,storen);
s_cur->quantity=qty;
s_cur->NEXT=NULL;
}
}
q=q->NEXT;
}
if(q==NULL){
q=current;
current->NEXT=NULL;
current->Stores=s_cur;
strcpy(s_cur->Storename,storen);
s_cur->quantity=qty;
s_cur->NEXT=NULL;
}
}
}
}
}
fclose(fp);
return 0;
}
int listfile(char *item_code,int qty){
int i;
ITEM *u=NULL;
item_code[strlen(item_code)]='\0';
if(list_item[hash(item_code)]==NULL)
return -1;
else{
u=list_item[hash(item_code)];
while(u!=NULL){
if(strcmp(u->item_code,item_code)==0){
STORE *temp=u->Stores;
while(temp!=NULL){
if(temp->quantity>=qty){
printf("STORE %s\n",temp->Storename);
}
temp=temp->NEXT;
}
}
u=u->NEXT;
}
}
return 0;
}
int update_file(char *item_code,int qty,char *name){
ITEM *u=NULL;
item_code[strlen(item_code)]='\0';
name[strlen(name)]='\0';
if(list_item[hash(item_code)]==NULL)
return -1;
u=list_item[hash(item_code)];
if(u==NULL)
return -1;
while(u!=NULL){
if(strcmp(u->item_code,item_code)==0){
STORE *temp=u->Stores;
while(temp!=NULL){
if(strcmp(temp->Storename,name)==0)
temp->quantity+=qty;
temp=temp->NEXT;
}
}
u=u->NEXT;
}
return 0;
}
int hash(char *item_code){
int sum=0,s=0;
while(item_code[s]!='\0'){
sum+=33*item_code[s];
s++;}
return sum%MAX_ITEM;
}
void clear(){
char c;
while(c!='\n')
scanf("%c",&c);
}
main(){
int y;
char fname[]="stores.txt",line[MAXL],command,z[MAXS];
char x[MAXC];
init();
if(readfile(fname)==-1)
printf("Error reading file!");
else{
do{
printf("Enter task:");
fgets(line,MAXL,stdin);
sscanf(line,"%c",&command);
switch(command){
case 'L': sscanf(line,"%c%s%d",&command,x,&y);
if(listfile(x,y)==-1)
printf("No items were found\n");
break;
case 'U':sscanf(line,"%c%s%d%s",&command,x,&y,z);
if(update_file(x,y,z)==0)
printf("Update OK\n");
else
printf("Error when updating\n");
break;
case 'Q':if(save_file(fname)==0)
printf("Done\n!");
break;
default:printf("Enter correct command\n");
break;
}
}while(command!='Q');
}
}
int save_file(char *fname){
ITEM *p=NULL,*q=NULL;
int num=0,i,j;
char str[MAXS];
if((fp=fopen(fname,"w"))==NULL)
return -1;
for( i=0;i<MAX_ITEM;i++){
if(list_item[i]==NULL)
;
else{
p=list_item[i];
while(p!=NULL){
STORE *s=p->Stores;
if(s==NULL)
;
else{
if(strcmp(s->Storename,"0000\0")!=0){
strcpy(str,s->Storename);
// puts(str);
fprintf(fp,"%s\n",str);
}
while(s!=NULL){
for( j=0;j<MAX_ITEM;j++){
if(list_item[j]==NULL)
;
else{
q=list_item[j];
while(q!=NULL){
STORE *st=q->Stores;
if(st==NULL)
;
else{
while(st!=NULL){
if(strcmp(st->Storename,str)==0 && strcmp(st->Storename,"0000\0")!=0){
printf("%s %d\n",q->item_code,st->quantity);
fprintf(fp,"%s %d\n",q->item_code,st->quantity);
strcpy(st->Storename,"0000\0");
}
st=st->NEXT;
}
}
q=q->NEXT;
}
}
}
s=s->NEXT;
}
}
p=p->NEXT;
}
}
}
fclose(fp);
return 0;
}
This is an inconsistent and unreadable mess. I suggest as first steps to refactor the layout.
Repair the indentation so it reflects the code structure. Chose a bracing style and use it consistently. Something like this
if(x){
;
}else{
foo();
}
should better look like this:
if (x) {
;
}
else {
foo();
}
That's a much better starting point for any debugging and maintenance. And there is a lot of maintenance necessary.
Your code is very inefficient. For example when reading the file, you malloc the store structure separately in both branches of the if statement, and copy the store name in three different places, again in all different code paths. Why not simply malloc the store structure and initialise it correctly before you work out where to put it?
Also in the read file function, if the hash table position corresponding to the item is not empty, the memory allocated to "current" gets leaked.
Furthermore, if you actually find a match for the item, you don't break out of the loop which means that the block of code beginning:
if(q==NULL){
q=current;
gets executed.
Lastly (for now), if a slot in the hash table is filled but there is no matching itemcode then the item won't get put into the hash table. Look at your code. At what point do you assign "current" to any part of the chain that starts at "list_item[pos]"? You don't. Doing "q = current" just stores one value in another variable. What you need is something like:
current->next = list_item[pos];
list_item[pos] = current;
To add it on at the beginning of the list.
I suggest you fix your file reading function before worrying about your file writing function.
P.s. an upvote and a request for more comments may get you some more help. Depending on how busy I am and whether others can also be bothered to help.

Resources