Reading data from csv file into struct, getting errors back - c

I'm trying to read data from a csv file into a struct. The struct contains int, char and float members. I'm getting errors back except for the char member. I'm fairly new to C so I appreciate your help!
Data from csv file "Order":
0, cafe, 3.90, 0
0, espresso, 3.50, 0
...
My struct:
typedef struct {
int position;
char name[20];
float price;
int counter;
}drink;
void init(drink *pt)
{
FILE *fp;
char buf[50];
int i = 0, j;
fp=fopen("Order", "r");
while( fgets(buf,sizeof(buf),fp) != NULL)
{
strcpy(pt[i].position, strtok(buf,","));
strcpy(pt[i].name, strtok(NULL,","));
strcpy(pt[i].price, strtok(NULL,","));
strcpy(pt[i].counter, strtok(NULL,","));
++i;
}
}
int main()
{
int number = NR;
int d=0;
drink bar[number];
drink *pt = &bar[0];
welcome();
init(pt);
...
return 0;
}

Wrong copy.
Do not use strcpy() to copy a string to an int. Rather convert it.
// strcpy(pt[i].position, strtok(buf,","));
char *endptr;
pt[i].position = strtol(strtok(buf,","), &endptr, 10);
// or
pt[i].position = atoi(strtok(buf,","));
...
pt[i].price = strtod(strtok(NULL,","), &endptr);
(Note: Various error checking omitted)
Enable all compiler warnings. This will save you time as your compiler should have caught this.
If you were getting errors, compile time or run time, post the error
rather than weakly describe the error as "getting errors back".

You are not using strcpy correct.
You should only use it with char buffers, not with integers and floats.
Read man strcpy for more information about it.

// to extract a integer from a char buffer into a int value, use atoi() not strcpy
// to extract a float from a char buffer into a float value, use atof(), not strcpy
// the last field in a line probably does not have a trailing ','
// and the last field should already be '\0' terminated by the fgets
// so the code should use something else to get a pointer to the last field
// the calls to strtok() should be setting a char* field from the returned value
// then
// 1) that value can be checked for NULL
// 2) getting a pointer to the last field would be
// returnedValue+=2;
// (the 2 to skip over the intervening ' ' after the converted comma
// all the copying/converting of the fields need to advance the
// returnedValue by 1 to skip over the leading ' ',
// except the first field, which has no leading ' '
// the #define for 'NR' should be used in the function so as to
// not overflow the available number of input fields
// for most of the break; statements, you may want to add a printf
// so the user knows what happened
// suggest:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NR (20)
#define MAX_NAME_LEN (20)
typedef struct
{
int position;
char name[MAX_NAME_LEN];
float price;
int counter;
} drink;
void init(drink *pt)
{
char buf[50];
int i = 0; // loop counter
//int j = 0; // comment out or compiler will raise a warning about unused variable
char * returnFromStrtok = NULL;
FILE *fp = NULL;
if( NULL == (fp=fopen("Order", "r")) )
{ // then, fopen failed
perror( "fopen failed for: Order" );
exit( EXIT_FAILURE );
}
// implied else, fopen successful
for( i = 0; i<NR; i++)
{
if( NULL == fgets(buf,sizeof(buf),fp) ) break;
returnFromStrtok = strtok(buf, ",");
if( NULL == returnFromStrtok ) break;
pt[i].position = atoi(returnFromStrtok);
returnFromStrtok = strtok(NULL, ",");
if( NULL == returnFromStrtok ) break;
// step past leading ' '
returnFromStrtok++;
if( MAX_NAME_LEN <= strlen( returnFromStrtok ) )
{ // bad field, too long
memset( pt[i].name, '*', MAX_NAME_LEN ); // indicate invalid field
}
else
{
strcpy(pt[i].name, returnFromStrtok );
}
returnFromStrtok = strtok(NULL, ",");
if( NULL == returnFromStrtok ) break;
// step past leading ' '
returnFromStrtok++;
pt[i].price = atof(returnFromStrtok);
// +2 steps by '\0' and ','
returnFromStrtok += strlen(returnFromStrtok)+2;
pt[i].counter = atoi(returnFromStrtok);
} // end for
} // end function: init

Related

Using strtok( ) to extract substring from double quotes

I'm trying to make a code, that should read all of the columns, row by row in csv file; then storing them under a struct array, that is called movies in this code. I actually managed to store the data, but just because of picking the parameter of delimater in strtok lines, some of my movies' data has missing;
Example: Row in the file:
Synecdoche, New York - Charlie Kaufman - 2008 - Drama
has to be stored as;
id = 37
name = Synecdoche, New York
directorName = Charlie Kaufman
year = 2008
genre = Drama
but it is stored as;
id = 37
name = Synecdoche
directorName = New York
year = Charlie Kaufman
genre = 2008
I am aware that this is because the string of characters I need to separate contains a comma; but I couldn't find how to solve it. So how can I make the strtok only split the string inside the double quotes?
I don't know if anyone can understand; but still I leave my code like this below;
#include <conio.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
typedef struct movie{
int id;
char name[100];
char directorName[100];
int year;
char genre[30];
} movie;
movie movies[100];
void remove_all_chars(char* str, char c) {
char *pr = str, *pw = str;
while (*pr) {
*pw = *pr++;
pw += (*pw != c);
}
*pw = '\0';
}
void fillMovies(FILE *filePointer)
{
char line[150];
int id = 1;
int arrIndex = 0;
while(!feof(filePointer))
{
fgets(line, sizeof(line), filePointer);
puts(line);
//sleep(1);
int i = 1;
char* value = strtok(line, ",");
struct movie movie = {
id,
"",
"",
0,
""
};
while(value != NULL)
{
//remove_all_chars(value, '\"');
printf("%s ", value);
if(i == 1)
{
//movie.name = value;
strcat(movie.name, value);
//movie.name += value;
i++;
value = strtok(NULL, ",");
continue;
}
if(i == 2)
{
//movie.directorName = value;
strcat(movie.directorName, value);
//movie.directorName += value;
i++;
value = strtok(NULL, ",");
continue;
}
if(i == 3)
{
movie.year = atoi(value);
i++;
value = strtok(NULL, ",");
continue;
}
if(i == 4)
{
//movie.genre = value;
strcat(movie.genre, value);
movies[arrIndex] = movie;
arrIndex++;
id++;
value = strtok(NULL, ",");
break;
}
}
printf("\n");
}
// Close the file
fclose(filePointer);
}
void printMovie(int i)
{
sleep(1);
printf("%d. ", movies[i].id);
printf("%s", movies[i].name);
printf(", ");
printf("%s", movies[i].directorName);
printf(", ");
printf("%d", movies[i].year);
printf(", ");
printf("%s", movies[i].genre);
}
// close, stringleri nasýl eþitleyeceðini bul.
int main()
{
// Buraya kendi dosya pathini lütfen yaz.
FILE* filePointer = fopen("movies.csv", "r");
if (!filePointer)
{
printf("Can't open file\n");
} else {
fillMovies(filePointer);
int i = 0;
while(i < 60){
printMovie(i);
i++;
}
}
return 0;
}
One of the sad truths to CSV files is that they look simple, and promise simplicity, but very quickly become nightmares to read. For any truly non-trivial CSV file, you need to build a state machine.
However, if we can put three significant constraints on your input then we can make life a whole easier:
Each record in your CSV is exactly N fields long
No quoted field will embed the quote character itself
Each field is known to be quoted or unquoted
No field is empty
If that is the case then you really only need a function to read characters from a file until one of a set of delimiters is encountered. That is fortunately very easy.
The other disheartening truth (and this applies to all computer languages) is that user input is really, really hard. Here is a function that crosses all the important ‘t’s and dots the ‘i’s.
int read_delimited( FILE * f, char * s, int n, const char * cs )
//
// Get text from a file.
//
// f File to read
// s Buffer to store characters read from f.
// The resulting buffer will always be null-terminated.
// May not be NULL.
// n Size of buffer. Must be at least 1.
// cs Delimiters.
// May not be NULL. (But it may be empty.)
//
// Read terminates only when EOF or one of the delimiters is read.
// Read does not terminate when the buffer fills up! If your buffer
// is too small the entire field is still read, but only (n-1)
// characters from the file are stored.
//
// Returns the last character read (either EOF or a delimiter).
//
{
int count = 0;
while (true)
{
int c = fgetc( f );
if ((c == EOF) or strchr( cs, c )) break;
if (count < n) s[count++] = c;
}
s[count] = '\0';
return c;
}
You can then use this and a little helper function in a loop to collect all your data for each record.
int skip_chars( FILE * f, const char * cs )
//
// Skips all characters in cs[].
// Returns the character last read (EOF or something not in cs[]).
//
{
int c;
do c = fgetc( f );
while ((c != EOF) and !!strchr( cs, c ));
return c;
}
bool read_movie( FILE * f, movie * m )
{
char s[100];
int c = skip_chars( f, " \t\n" ); // skip whitespace, including newlines
if (c == EOF) return false;
read_delimited( f, s, sizeof(s), "," );
m->id = atoi( s );
skip_chars( f, " \t" ); // skip leading ws
read_delimited( f, m->name, sizeof(m->name), "\"" );
skip_chars( f, " \t," ); // skip ws and trailing ,
read_delimited( f, m->directorName, sizeof(m->directorName), "\"" );
skip_chars( f, " \t," ); // skip trailing ws, trailing comma, leading ws
read_delimited( f, s, sizeof(s), "," );
m->year = atoi( s );
skip_chars( f, " \t" ); // skip leading ws
read_delimited( f, m->genre, sizeof(m->genre), "\n" );
return m->genre[0] != '\0';
}
After that you just need a loop to read all the records:
const int max_movies = 100;
movie movies[max_movies];
int num_movies = 0;
while ((num_movies < max_movies) and read_movie( f, &movies[num_movies] ))
{
num_movies += 1;
}
As you can see, it gets less-than-one-line really fast. But I don’t think you can really make it any simpler.
Another useful option, if it is available to you, is to use a TAB character instead of a comma to separate fields. I assume that you are given to handle a CSV, though, and cannot change that.

Read a CSV file into a dynamic array of structs C

I'm fairly new to C. I'm trying to read a .CSV file, then parse each line, then store the data in a dynamic array of pointers to structs. Unfortunately I've gone wrong somewhere in my implementation which is resulting in an infinite loop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataSet {
char ID;
char postcode;
int population;
char contact;
double x;
double y;
}data;
int main(int argc, char* argv[]) {
char line[100] = "";
int count = 0;
int each = 0;
data *allData = NULL;
data *temp = NULL;
FILE *file = fopen("dataset.csv", "r");
if (file == NULL)
{
printf("Error! File null");
return 1;
}
while (fgets(line, sizeof line, file))
{
if(NULL == (temp = realloc(allData, sizeof(*allData) * (count + 1))))
{
fprintf(stderr, "realloc problem\n");
fclose(file);
free(allData);
return 0;
}
allData = temp;
if (6 == scanf(line, "%s, %s, %d, %s, %lf, %lf",
&allData[count].ID,
&allData[count].postcode,
&allData[count].population,
&allData[count].contact,
&allData[count].x,
&allData[count].y)) {
count++;
}
else {
printf("Problem with data\n");
}
}
fclose(file);
for (each = 0; each < count; each++)
{
printf("%s, %s, %d, %s, %lf, %lf\n",
&allData[count].ID,
&allData[count].postcode,
&allData[count].population,
&allData[count].contact,
&allData[count].x,
&allData[count].y);
}
free(allData);
return 0;
}
Any help or tips would be greatly appreciated.
[s]scanf() is a nasty function. You don't have enough control once it fails. Problem is: there are too many conditions: the input can be incorrect, or the destination is not large enough. Even reading complete lines with fgets(), and parsing them afterwards, will only allow you to skip complete lines; also: the line buffer is mostly fixed sized, and fgets() could read incomplete lines. A way to keep complete control is to read character-based. This might imply a Finite State machine.
A simpler reader (using a zero-state machine) could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct omg {
char o;
int m;
char g[11];
};
struct wtf {
unsigned size;
unsigned used;
struct omg *array;
};
#define INITIAL_SIZE 7
struct wtf read_stuff(char *name)
{
FILE *fp;
unsigned icol,irec,len;
char buff[123];
struct wtf this = {0,0,NULL};
fp = fopen(name, "rb" );
if (!fp) return this;
for (icol=irec=len=0; ; ) {
int ch;
if (this.used >= this.size) {
size_t newsize;
struct omg *tmp;
newsize = this.size? this.size*2: INITIAL_SIZE;
fprintf(stderr, "Realloc(%zu)\n", newsize);
tmp = realloc(this.array, sizeof *this.array * newsize);
this.array = tmp;
this.size = newsize;
}
ch = getc(fp);
switch(ch) {
case '\r' : continue;
/* End of field or record: terminate buffer */
#if 0
case ',' :
#else
case '\t' :
#endif
case '\n' :
buff[len] = 0;
break;
case EOF :
goto done;
/* Normal character: assign to buffer
** You may want to report too long fields here
*/
default:
if (len >= sizeof buff -2) continue;
buff[len++] = ch;
continue;
}
/* When we arrive here, we have a new field. Let's process it ...*/
switch (icol) {
case 0: /* Assign first field here from buff[], (dont forget to check len!) */
this.array[this.used].o = buff[0];
break;
case 1: /* Assign second field from buff[], this may need some additional checks
** You may want to avoid sscanf() here ...
*/
sscanf(buff, "%d", &this.array[this.used].m );
break;
case 2: /* Assign third field from buff[] */
if (len >= sizeof this.array[this.used].g)
len = sizeof this.array[this.used].g -1;
memcpy (this.array[this.used].g, buff, len);
this.array[this.used].g[len] = 0;
break;
default: /* Ignore excess fields
** You may want to report hem.
*/
break;
}
/* Do some bookkeeping */
len = 0;
if(ch == '\n') {
/* You may want to check if icol==2, here */
icol=0; irec++; this.used++;
}
else icol++;
}
done:
fclose(fp);
/* You could do a final realloc() here */
return this;
}
int main(int argc, char **argv)
{
struct wtf result;
unsigned idx;
result = read_stuff(argv[1] );
fprintf(stderr, "Result=%u/%u\n", result.used,result.size);
for (idx=0; idx < result.used; idx++) {
printf("%c %d %s\n"
, result.array[idx].o
, result.array[idx].m
, result.array[idx].g);
if (idx >= 10) break;
}
return 0;
}
You ask for tips...
1 - your struct is wrong if your plan was to use dynamic memory. The char members should be pointers to char, ( char * not char ) as shown below. But to reduce complexity, use char arrays instead of forcing dynamic allocation for struct members: i.e. do not use this:
typedef struct dataSet {
char *ID;
char *postcode;
int population;
char *contact;
double x;
double y;
}data;
Rather use this:
typedef struct dataSet {
char ID[80];
char postcode[11];
int population;
char contact[80];
double x;
double y;
}data;
If the lengths are not right, then make them bigger, but this will reduce calls to calloc() and free().
2 - suggested steps:
Count lines in file. (example here). This will essentially open the file, count the lines and close the file.
Use the count to allocate memory for that number of instances of data (i.e. data *records = malloc(sizeof(*records)*countOfLines); )
Open the file again. If file != NULL, then...
Begin to read file line by line in a loop, such as the fgets(...) loop you have.
In this loop, suggest replacing scanf() with a series of calls to strtok() making the appropriate conversion one-by-one. Its a few more lines of code, but is easier in the long run to see what parsing problems you might run into.
The following pseudo code illustrates...
data *record = malloc(CountOfLines*sizeof(*record));
if(record)
{
int i = 0;
while(fgets(line, sizeof line, file))
{
tok = strtok(line, ",");
if(tok)
{ //convert string
strncpy(record[i].ID, tok, sizeof(record[i].ID) - 1);
tok = strtok(NULL, ",");
if(tok)
{//convert string
strncpy(record[i].postcode, tok, sizeof(record[i].postcode) - 1);
tok = strtok(NULL, ",");
if(tok)
{//convert int
record[i].population = atoi(tok);
//and so on ...

Scanning group of integers in the form of (x,y) in c

I am trying to take inputs from the standard input in the form (a,b) (c,d) (e, f) (g,h) and will stop taking input if an empty line is added. I need these inputs tuple by tuple like (a,b) first then I perform some computation with it like add in the binary tree and add (c,d) then (e, f) and so on in the following way:
insert_in_tree(&tree->root,&tree->root,a, b);
I know how to accept integers till empty line is added which i do in the following way:
AVLTree *CreateAVLTree(const char *filename)
{
// put your code here
AVLTree *tree = newAVLTree();
int key, value;
if(strcmp(filename, "stdin") == 0){
char str[1024]={};
printf("Enter your values");
while( fgets(str, 1024, stdin) && strlen(str) && str[0] != '\n' ){
printf("string %s", str);
sscanf(str, "%d, %d", &key, &value);
//int key = atoi(str);
printf("This is key you entered %d\n", key);
printf("This is value you entered %d\n", value);
}
}else{
FILE* file = fopen(filename, "r"); // open a file
if(file == NULL) {
return NULL; // error checking
}
while (fscanf (file, " (%d,%d)", &key, &value) == 2) // check for number of conversions
// space added here ^
{
insert_in_tree_q5(&tree->root,&tree->root, key, value);
//printf("%d,%d\n", key, value);
}
fclose(file);
//node = tree->root;
}
return tree;
}
but i am not sure how use this to solve my problem stated above.
I'm not a fan of using scanf() et.al. to parse data, as a simple scanf("%d,%d") tends to be error prone with differing user input.
My general approach when dealing with known formatting characters (like (, ,, )), is to find them first with strchr(), validate they're somewhat sensible, and only then try to extract the value.
In the code below, I locate the parentheses and comma, then copy out the possibly numeric data in between, before handing it off to strtol() for converting the integer string to a numeric representation.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_NUMBER_LEN 32
/*
** Given a string which contains (somewhere) a pair
** of numbers in the form "... (x, y) ...", parse the pair
** into val1 and val2 respectively.
**
** Returns the point at which the input ended successfully
** or NULL on error
*/
const char *parseTuple(const char *input, int *val1, int *val2)
{
char *result = NULL;
char val1_str[ MAX_NUMBER_LEN+1 ] = { '\0' };
char val2_str[ MAX_NUMBER_LEN+1 ] = { '\0' };
// Find the first '('
char *left_paren = strchr( input, '(' );
char *right_paren = strchr( input, ')' );
char *comma = strchr( input, ',' );
// validate the things we found exist, and are in valid positions
if ( left_paren != NULL && right_paren != NULL && comma != NULL && // needed parts exist
left_paren < comma && comma < right_paren ) // in the correct order
{
// val1 source string exists between left_paren+1 and comma-1
int val1_len = comma-1 - left_paren+1 - 1;
if ( val1_len > 0 && val1_len < MAX_NUMBER_LEN )
{
strncpy( val1_str, left_paren+1, val1_len );
val1_str[ val1_len ] = '\0';
}
// val2 source string exists between comma+1 and right_paren-1
int val2_len = right_paren-1 - comma+1 - 1;
if ( val2_len > 0 && val2_len < MAX_NUMBER_LEN )
{
strncpy( val2_str, comma+1, val2_len );
val2_str[ val2_len ] = '\0';
}
// If we extracted some reasonable numbers, try to parse them
if ( val1_str[0] != '\0' && val2_str[0] != '\0' )
{
*val1 = strtol( val1_str, NULL, 10 );
*val2 = strtol( val2_str, NULL, 10 );
// TODO handle errno when string is not a number
// if errono did not indicate a strol() failure
result = right_paren+1; // point to the next input location, so we can call again
}
}
return result;
}
int main(int argc, char **argv)
{
const char *input;
int val1;
int val2;
for (int i=1; i<argc; i++)
{
input = argv[i];
do
{
printf( "From input of: [%s]\n" , input );
input = parseTuple( input, &val1, &val2 );
if ( input != NULL )
printf( " Parsed out: (%3d,%3d)\n", val1, val2 );
} while ( input != NULL && strlen( input ) );
}
return 0;
}
Giving the test-run:
$ ./parse_tuple '(-3, 2)' '(1,1)(11111111111111111111111111111111111111111111111111111111111111111111,0) () (,)' '(' '()' ')' '(,)' '(-12,)' '(123,456)'
From input of: [(-3, 2)]
Parsed out: ( -3, 2)
From input of: [(1,1)(11111111111111111111111111111111111111111111111111111111111111111111,0) () (,)]
Parsed out: ( 1, 1)
From input of: [(11111111111111111111111111111111111111111111111111111111111111111111,0) () (,)]
From input of: [(]
From input of: [()]
From input of: [)]
From input of: [(,)]
From input of: [(-12,)]
From input of: [(123,456)]
Parsed out: (123,456)

How to load multiple "clones" of structure from FILE? C

I want to learn how to load multiple structures (many students: name, surname, index, address...) from a text file looking like:
Achilles, 9999
Hector, 9998
Menelaos, 9997
... and so on
Struct can be like:
struct student_t {
char *name;
int index;
}
My attempt (doesn't work; I'm not even sure if fgets+sscanf is a considerable option here):
int numStudents=3; //to simplify... I'd need a function to count num of lines, I imagine
int x, y=1000, err_code=1;
FILE *pfile = fopen("file.txt", "r");
if(pfile==0) {return 2;}
STUDENT* students = malloc(numStudents * sizeof *students);
char buffer[1024];
char *ptr[numStudents];
for (x = 0; x < numStudents; x++){ //loop for each student
students[x].name=malloc(100); //allocation of each *name field
fgets(buffer, 100, pfile); //reads 1 line containing data of 1 student, to buffer
if(x==0) *ptr[x] = strtok(buffer, ",");//cuts buffer into tokens: ptr[x] for *name
else *ptr[x] = strtok(NULL, ","); //cuts next part of buffer
sscanf(ptr[x], "%19s", students[x].name); //loads the token to struct field
*ptr[y] = strtok(NULL, ","); //cuts next part of the buffer
students[y].index = (int)strtol(ptr[y], NULL, 10); //loads int token to struct field
*buffer='\0';//resets buffer to the beginning for the next line from x++ fgets...
y++;//the idea with y=1000 is that I need another pointer to each struct field right?
}
for (x = 0; x < numStudents; x++)
printf("first name: %s, index: %d\n",students[x].name, students[x].index);
return students;
Then printf it to see what was loaded. (to simplify my real structure that has 6 fields). I know a nice method to load 1 student from user input...(How to scanf commas, but with commas not assigned to a structure? C) however to load multiple, I have this idea but I'm not sure if it's too clumsy to work or just terrybly written.
Later I'd try to sort students by name , and perhaps even try to do a realloc buffer that increases it's size along with new students being loaded to buffer... and then to sort what'd been loaded... but I imagine that first I need to load it from the file to buffer and from buffer to fill structure, to be able to sort it then?...
Thanks A LOT for all the help!
C is a little harsh. I use GNU getline below, which may be not portable, which you might end up implementing yourself. I use stdin for input FILE * just for simplicity.
The program reads the students list into the students array. Then I sort the students by comparing indexes, then by name, each time with printing out.
Your code is a bit of a mishmash - try to write a separate function for loading a single student, you don't need char ptr[students] just a single char *ptr for strtok function. strtok is a little mixy, I prefer using just strchr mutliple times. I used memcpy to just copy the name from the string and remember to null delimeter it.
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
struct student_s {
char *name;
int index;
};
static int students_name_cmp(const void *a, const void *b)
{
const struct student_s *s1 = a;
const struct student_s *s2 = b;
return strcmp(s1->name, s2->name);
}
static int students_index_cmp(const void *a, const void *b)
{
const struct student_s *s1 = a;
const struct student_s *s2 = b;
return s1->index - s2->index;
}
int main()
{
struct student_s *students = NULL;
size_t students_cnt = 0;
FILE *fp = stdin;
size_t read;
char *line = NULL;
size_t len = 0;
// for each line
while ((read = getline(&line, &len, fp)) != -1) {
// resize students!
students = realloc(students, (students_cnt + 1) * sizeof(*students));
// handle erros
if (students == NULL) {
fprintf(stderr, "ERROR allocating students!\n");
exit(-1);
}
// find the comma in the line
const const char * const commapos = strchr(line, ',');
if (commapos == NULL) {
fprintf(stderr, "ERROR file is badly formatted!\n");
exit(-1);
}
// student has the neme between the start to the comma adding null delimeter
const size_t namelen = (commapos - line) + 1;
// alloc memory for the name and copy it and null delimeter it
students[students_cnt].name = malloc(namelen * sizeof(char));
// handle errors
if (students[students_cnt].name == NULL) {
fprintf(stderr, "ERROR allocating students name!\n");
exit(-1);
}
memcpy(students[students_cnt].name, line, namelen - 1);
students[students_cnt].name[namelen] = '\0';
// convert the string after the comma to the number
// strtol (sadly) discards whitespaces before it, but in this case it's lucky
// we can start after the comma
errno = 0;
char *endptr;
const long int tmp = strtol(&line[namelen], &endptr, 10);
// handle strtol errors
if (errno) {
fprintf(stderr, "ERROR converting student index into number\n");
exit(-1);
}
// handle out of range values, I use INT_MIN/MAX cause index is int, no better idea, depends on application
if (tmp <= INT_MIN || INT_MAX <= tmp) {
fprintf(stderr, "ERROR index number is out of allowed range\n");
exit(-1);
}
students[students_cnt].index = tmp;
// handle the case when the line consist of any more characters then a string and a number
if (*endptr != '\n' && *endptr != '\0') {
fprintf(stderr, "ERROR there are some rabbish characters after the index!");
exit(-1);
}
// finnally, increment students count
students_cnt++;
}
if (line) {
free(line);
}
// sort by index
qsort(students, students_cnt, sizeof(*students), students_index_cmp);
// print students out sorted by index
printf("Students sorted by index:\n");
for (size_t i = 0; i < students_cnt; ++i) {
printf("student[%zu] = '%s', %d\n", i, students[i].name, students[i].index);
}
// now we have students. We can sort them.
qsort(students, students_cnt, sizeof(*students), students_name_cmp);
// print students out sorted by name
printf("Students sorted by name:\n");
for (size_t i = 0; i < students_cnt; ++i) {
printf("student[%zu] = '%s', %d\n", i, students[i].name, students[i].index);
}
// free students, lucky them!
for (size_t i = 0; i < students_cnt; ++i) {
free(students[i].name);
}
free(students);
return 0;
}
For the following input on stdin:
Achilles, 9999
Hector, 9998
Menelaos, 9997
the program outputs:
Students sorted by index:
student[0] = 'Menelaos', 9997
student[1] = 'Hector', 9998
student[2] = 'Achilles', 9999
Students sorted by name:
student[0] = 'Achilles', 9999
student[1] = 'Hector', 9998
student[2] = 'Menelaos', 9997
A test version available here on onlinegdb.

Word count problems

I'm trying to finishing one of my C homework questions. Here's the definition and sample IO:
Description
Given an article as input. You have to count the number of each word, and prints the list of words in alphabetical order.
Sample Input
It was the best of times, it was the worst of times, it was the age of
wisdom, it was the age of foolishness, it was the epoch of belief, it
was the epoch of incredulity, it was the season of Light, it was the
season of Darkness, it was the spring of hope, it was the winter of
despair, we had everything before us, we had nothing before us, we
were all going direct to Heaven, we were all going direct the other
way.
Sample Output
age 2
all 2
before 2
belief 1
best 1
darkness 1
despair 1
direct 2
epoch 2
everything 1
foolishness 1
going 2
had 2
heaven 1
hope 1
incredulity 1
it 10
light 1
nothing 1
of 10
other 1
season 2
spring 1
the 11
times 2
to 1
us 2
was 10
way 1
we 4
were 2
winter 1
wisdom 1
worst 1
And this is my code now:
in main.c:
#include <stdio.h>
#include <stdlib.h>
#include "function.h"
#include <string.h>
int main()
{
char wordcollected [3100] = {0};
char *word_ptr[100];
int countarray[100];
static char temp[31];
int nth_word = 0;
while(1){
int n = strlen(wordcollected);
word_ptr [nth_word] = wordcollected + strlen(wordcollected);
if(strcpy(temp, fetch_word()) == NULL){
for(n == strlen(wordcollected); n >= 0; n--){
if(wordcollected[n] == ','){
wordcollected[n] = '\0';
}
}
break;
}
strcat((wordcollected), temp);
strcat((wordcollected), ",");
nth_word ++;
}
}
Our TA have already finished partial codes for us:
in function.c:
#include "function.h"
#include <stdio.h>
// fetch words from stdin
const char *fetch_word(){
static char skip_symbol[] = " \t\n,.;:?()[]{}\"\"''" ;
static char line_buffer[1024] ;
static char *now = NULL ;
// try to read a line from stdin
for( ;; ){
if( now == NULL)
now = fgets( line_buffer, sizeof(line_buffer), stdin ) ;
// End Of File?
if( now == NULL )
return NULL ;
// skip symbols
for( ; *now ; now++ ){
int size = sizeof( skip_symbol ) ;
int i ;
for( i=0 ; i<size ; i++ ){
if( *now == skip_symbol[i] )
break ;
}
// if not match skip_symbol[]
if( i >= size )
break ;
}
// End Of Line?
if( *now == '\0' ){
now = NULL ;
continue ;
}
char *word = now ;
for( ; *now ; now++ ){
int size = sizeof( skip_symbol ) ;
int i ;
for( i=0 ; i<size ; i++ ){
if( *now == skip_symbol[i] )
break ;
}
// if match skip_symbol[]
if( i < size )
break ;
}
if( *now ){
*now = '\0' ;
now ++ ;
}
return word ;
}
return NULL ;
}
In function.h:
#ifndef __FUNCTION_H__
#define __FUNCTION_H__
// fetch words from stdin
const char *fetch_word() ;
#endif
The function *fetch_word() will return a pointer points to each word in stdin while running, and will also return NULL if the function already reaches the End-Of-File. But everytime when it reach to EOF, it just keep saying segmentation fault and the system halted. How can I detect the return value of fetch_word(), know when I reached the End-Of-File, and also prevent from losing any words?
You need to break at the end sign before you do the cycle:
if(strcpy(temp, fetch_word()) == NULL){
break;
for(n == strlen(wordcollected); n >= 0; n--){
if(wordcollected[n] == ','){
wordcollected[n] = '\0';
}
}
}
But take my advice and refactor your code and make it more readable. You will save yourself a lot of time that way.
Also, algorithmically, for me it seems that you might want to create a linked list of words, paired with a number (implement the linked list data structure to achieve that) and whenever you read a word, try to find it in the linked list until you reach something lower than the word alphabetically or the end of the list, or a match. If you find a match, add 1 to the number. Otherwise insert the word with 1 as value at its corresponding place.
For fetching words, you can use following fetch_words().
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void print_word(const char *word, void *arg)
{
printf("[Word] <%s>\n", word);
}
/*
* #fp file pointer
* #func recall function whch has two params (word, arg)
* #arg the second param of func
* */
void fetch_words(FILE *fp, void (*func)(const char *, void *), void *arg)
{
char area[256] = {0};
char buff[128];
int size = sizeof(buff) - 1;
char *delim = " \t\n,.;:?(){}\"'";
char *last;
char *token;
int len;
char *pos;
while (fgets(buff, size, fp) != NULL)
{
/* Append part of buff into area */
len = size - strlen(area);
strncat(area, buff, len);
pos = area + strlen(area);
/* Split string in area */
if ((token = strtok(area, delim)) != NULL)
{
last = token;
while ((token = strtok(NULL, delim)) != NULL)
{
func(last, arg);
last = token;
}
if (last + strlen(last) == pos)
{
/* Copy last token into area */
strcpy(area, last);
}
else
{
/* Clean area */
area[0] = 0;
func(last, arg);
last = NULL;
}
}
/* Append left part of buff into area for next loop */
if (len < strlen(buff))
{
strcat(area, buff + len);
}
}
if (last)
{
func(last, arg);
}
}
int main(int argc, char *argv)
{
fetch_words(stdin, print_word, NULL);
return 0;
}
For word counting, you can use hashmap (Key, Value). Key is word, and Value is count of word.
Here is a simple implementation of hashmap in C:
https://github.com/foreverpersist/hashmap

Resources