array of structs and members of it - c

I have a struct country and an array of struct. I realloc but there is a problem in the members of the struct i can not understand. Here is the code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct country{
char* name;
char* col;
int num;
char** nb;
struct country* add;
};
int main(void){
int ch, i = 0;
struct country* A = (struct country*)malloc(sizeof(struct country));
while(1){
/*allocation for colour, name, */
A[i].col = (char*)malloc(16*sizeof(char));
A[i].name = (char*)malloc(16*sizeof(char)); /* BBBBUUUGGG HEEEERRRRREEEE <<<<<<<<<<<<<<<<_________-----------------____________________---------------------*/
A[i].nb = (char**)malloc(sizeof(char*));
A[i].nb[0] = (char*)malloc(sizeof(char));
A[i].num = 0; /* define number of countries and then increase it*/
scanf("%s %s", A[i].col, A[i].name); /*value for colour, name of current country(node)*/
ch = getchar(); /* see if there are neighbors or next country or the end*/
while((ch=='\t') || (ch==' ')){ /* whitespace or tab means neighbor */
A[i].nb = (char**)realloc(A[i].nb, (A[i].num+1)*sizeof(char*)); /* increase the elements of array(names of neighbors) by one */
A[i].nb[A[i].num] = (char*)malloc(16*sizeof(char)); /* allocate memory for the name of the next neighbor(he is the num'th neighbor)*/
scanf("%s", A[i].nb[A[i].num]);
(A[i].num)++;
ch = getchar();
}
(A[i].num)--; /*we have increased the number of neighbors one time more than the actal size */
if(ch!=EOF){ /* means that we have another line and so another country */
A = (struct country*)realloc(A, (i+1)*sizeof(struct country)); /* another country-->A should be bigger */
i++;
}else{
break; /* END OF FILE no more countries*/
}
}
}
name==name of the country, col= colour of the county, num = number of neighbors of the country and **nb is an array with their names, ignore struct countr* add

You first need to understand that there is no Array present in the entirety of the code you have posted. Your struct and its members are pointers to blocks of memory you must allocate and manage.
Your logic for your while (ch ...) loop is jumbled leading confusing expressions like:
(A[i].num)--; /* we have increased the number of neighbors one time more */
/* than the actal size */
And then multiple attempted reallocations without a single allocation/reallocation validation or any validation of input.
There is no need to cast the return of malloc, it is unnecessary. See: Do I cast the result of malloc?. When setting the size for your allocation, if you use the dereferenced type, you will always get your type size correct.
...
while (1) {
/*allocation for colour, name, */
A[i].col = malloc (16 * sizeof *A[i].col);
A[i].name = malloc (16 * sizeof *A[i].name);
A[i].nb = malloc (sizeof *A[i].nb);
A[i].nb[A->num] = malloc (sizeof *A[i].nb[A[i].num]);
/* validate EVERY allocation */
if (!A[i].col || !A[i].name || !A[i].nb || !A[i].nb[A->num]) {
perror ("malloc-col,name,nb");
return 1;
}
/* validate EVERY input - protect array bounds */
if (scanf ("%15s %15s", A[i].col, A[i].name) != 2) {
fputs ("error: failed to read col & name.\n", stderr);
return 1;
}
...
(note: you must validate EVERY allocation and EVERY reallocation, just as you must validate EVERY input)
Note the use of the field-width modifier to limit the read of character to 15 saving +1 for the nul-terminating character in
/* validate EVERY input - protect array bounds */
if (scanf ("%15s %15s", A[i].col, A[i].name) != 2) {
fputs ("error: failed to read col & name.\n", stderr);
return 1;
}
Further, you ALWAYS realloc using a temporary pointer. When realloc fails, it returns NULL and if fail to use a temporary pointer, you have just overwritten your pointer to your original block of memory with NULL creating a memory-leak because you have lost your reference to the original block which now cannot be freed. Instead, you do something like:
/* ALWAYS realloc with a temprary pointer */
void *tmp = realloc (A[i].nb, (A[i].num + 1) * sizeof *A[i].nb);
if (!tmp) { /* validate EVERY reallocation */
perror ("realloc-A[i].nb");
return 1;
}
A[i].nb = tmp; /* assign new block to A[i].nb after validation */
For your control of your loop checking for '\t' and ' ', you can make the logic simpler by incorporating your EOF check in the loop itself and then realloc A after, e.g.
/* see if there are neighbors or next country or the end*/
while ((ch = getchar()) != EOF && (ch == '\t' || ch == ' ')) {
if (scanf("%15s", A[i].nb[A[i].num]) != 1) {
fputs ("error: EOF of read of A[i].nb[A[i].num].\n", stderr);
return 1;
}
A[i].num++;
/* ALWAYS realloc with a temporary pointer */
void *tmp = realloc (A[i].nb, (A[i].num + 1) * sizeof *A[i].nb);
if (!tmp) { /* validate EVERY reallocation */
perror ("realloc-A[i].nb");
return 1;
}
A[i].nb = tmp; /* assign new block to A[i].nb after validation */
A[i].nb[A[i].num] = malloc (16 * sizeof *A[i].nb[A[i].num]);
if (!A[i].nb[A[i].num]) {
perror ("malloc-A[i].nb[A[i].num]");
return 1;
}
}
/* realloc A */
void *tmp = realloc (A, (i + 1) * sizeof *A);
if (!tmp) { /* validate */
perror ("realloc-A");
return 1;
}
A = tmp; /* assign reallocated block */
i++; /* increment counter ('ncountry' better choice than i?) */
After going back through your example, my best guess at what you are trying to accomplish is contained in the following example. If you provide a bit of sample input I can validate further.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct country {
char *name; /* generally the '*' goes with the variable not the type */
char *col; /* char* a, b, c; does NOT create 3 pointers to char... */
int num; /* char *a, b, c; makes that clear */
char **nb;
struct country *add;
};
int main (void) {
int ch, i = 0;
struct country *A = malloc (sizeof *A); /* allocate 1 struct */
if (!A) { /* validate EVERY allocation */
perror ("malloc-A");
return 1;
}
A[i].num = 0;
while (1) {
/*allocation for colour, name, */
A[i].col = malloc (16 * sizeof *A[i].col);
A[i].name = malloc (16 * sizeof *A[i].name);
A[i].nb = malloc (sizeof *A[i].nb);
A[i].nb[A->num] = malloc (sizeof *A[i].nb[A[i].num]);
/* validate EVERY allocation */
if (!A[i].col || !A[i].name || !A[i].nb || !A[i].nb[A->num]) {
perror ("malloc-col,name,nb");
return 1;
}
/* validate EVERY input - protect array bounds */
if (scanf ("%15s %15s", A[i].col, A[i].name) != 2) {
fputs ("error: failed to read col & name.\n", stderr);
return 1;
}
/* see if there are neighbors or next country or the end*/
while ((ch = getchar()) != EOF && (ch == '\t' || ch == ' ')) {
if (scanf("%15s", A[i].nb[A[i].num]) != 1) {
fputs ("error: EOF of read of A->nb[A->num].\n", stderr);
return 1;
}
A[i].num++;
/* ALWAYS realloc with a temprary pointer */
void *tmp = realloc (A[i].nb, (A[i].num + 1) * sizeof *A[i].nb);
if (!tmp) { /* validate EVERY reallocation */
perror ("realloc-A->nb");
return 1;
}
A[i].nb = tmp; /* assign new block to A->nb after validation */
A[i].nb[A[i].num] = malloc (16 * sizeof *A[i].nb[A[i].num]);
if (!A[i].nb[A[i].num]) {
perror ("malloc-A[i].nb[A[i].num]");
return 1;
}
}
/* realloc A */
void *tmp = realloc (A, (i + 1) * sizeof *A);
if (!tmp) { /* validate */
perror ("realloc-A");
return 1;
}
A = tmp; /* assign reallocated block */
i++; /* increment counter ('ncountry' better choice than i?) */
}
}
Look things over and let me know if you have further questions.

One problem you have is how you are increasing the size of the A array (using the i variable)!
In this code:
if(ch!=EOF){ /* means that we have another line and so another country */
A = (struct country*)realloc(A, (i+1)*sizeof(struct country)); /* another country-->A should be bigger */
i++;
}else{
break; /* END OF FILE no more countries*/
}
you have an "off-by-one" error!
Thus, on the first run through the while loop, i will have a value of zero, so your realloc call will create a new buffer big enough for just one structure (i + 1) - but you already have a buffer for one such structure!
Solution: You need to put the i++ line before the realloc call!
EDIT: You have a very similar problem in your inner while loop with this line:
A[i].nb = (char**)realloc(A[i].nb, (A[i].num+1)*sizeof(char*));
but here it's probably better to fix it using (A[i].num+2) as you are using the (correct) A[i].num values several times before you increment it.

Related

Dynamic growing string array memory issues

I'm working on a crosswords program in which a word dictionary is necessary. I'm trying load a jspell dictionary file into an dynamic string array but i keep getting the
error malloc(): mismatching next->prev_size (unsorted)
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"
void dict_init(Dictionary * dict, char * dict_dir, size_t w_len)
{
printf("dictionary.c (dict_init): initializing dictionary.\n");
/*Adjust this value to control the initial array size*/
size_t init_size = 1000;
/*initialize dictionary file directory*/
dict->dir = malloc(strlen(dict_dir) * sizeof(char) + 1);
strcpy(dict->dir, dict_dir);
/*create memory for words array*/
dict->words = malloc(init_size * sizeof(char *));
/*initialize array size*/
dict->size = init_size;
/*initilize word length*/
dict->w_len = w_len;
/*initialize word counter*/
dict->counter = 0;
/*load words into dictionary*/
dict_load(dict);
printf("dictionary.c (dict_init): dictionary initialized.\n");
}
void dict_add(Dictionary * dict, char * word)
{
char ** dictionary = dict->words;
/*check if word array is full*/
if(dict->counter == dict->size)
{
/*increrase size of dictionary*/
dict->size *= 1.5;
dict->words = realloc(dict->words, dict->size * sizeof(char *));
}
/*add word to dictionary*/
dictionary[dict->counter] = malloc(strlen(word) * sizeof(char) + 1);
strcpy(dictionary[dict->counter], word);
dict->counter++;
free(word);
}
void dict_free(Dictionary * dict)
{
free(dict->words);
}
void dict_load(Dictionary * dict)
{
FILE * fp;
char * line = NULL;
char * word = NULL;
size_t len = 0;
ssize_t read;
fp = fopen(dict->dir, "r");
/*check if file exists*/
if (fp == NULL)
{
perror("ERROR: File not found.");
exit(EXIT_FAILURE);
}
/*discard first line*/
if(strstr(dict->dir, ".dic"))
getline(&line, &len, fp);
/*read file lines*/
while ((read = getline(&line, &len, fp)) != -1)
{
if(((strstr(line, "[CAT=punct") == NULL) && (word = parse_line(line, dict->w_len)) != NULL)) {
dict_add(dict, word);
}
}
fclose(fp);
free(line);
printf("dictionary.c (dict_load): dictionary loaded %ld words.\n", dict->counter);
}
char * parse_line(char * line, size_t w_len)
{
int i;
char s_tmp[101] = "";
char * dlm_slash, * dlm_space, * dlm_tab , *substring;
/*get delimiter pointer*/
dlm_slash = strchr(line, '/');
dlm_space = strchr(line, ' ');
dlm_tab = strchr(line, '\t');
/*check if delimiter exists in line*/
if(dlm_slash != NULL)
i = (int)(dlm_slash - line);
else if(dlm_space != NULL)
i = (int)(dlm_space - line);
else if(dlm_tab != NULL)
i = (int)(dlm_tab - line);
else
{
/*replace '\n' with '\0'*/
line[strcspn(line, "\n")] = '\0';
i = strlen(line);
}
strncpy(s_tmp, line, i);
substring = malloc(sizeof(char) * strlen(s_tmp) + 1);
strncpy(substring, s_tmp, strlen(s_tmp));
/*lowercase word*/
lower_case(substring);
if((is_valid(substring) == 0) && (strlen(substring) <= w_len))
return substring;
free(substring);
return NULL;
}
Here's the basic problem, I think:
void dict_add(Dictionary * dict, char * word) {
char ** dictionary = dict->words; /* **** 1 **** */
/*check if word array is full*/
if(dict->counter == dict->size)
{
/*increrase size of dictionary*/
dict->size *= 1.5; /* **** 2 **** */
dict->words = realloc(dict->words, dict->size * sizeof(char *));
/* **** 3 **** */
}
/*add word to dictionary*/
This one is the problem:
dictionary[dict->counter] = malloc(strlen(word) * sizeof(char) + 1);
strcpy(dictionary[dict->counter], word);
dict->counter++;
free(word); /* **** 4 **** */
}
The problem is that dictionary was saved before you called realloc. realloc might make a brand-new memory allocation, in which case it will automatically free() the old one after copying its contents into the new one. So any copy of the pointer which you made before calling realloc might end up pointing to unallocated memory. Writing to unallocated memory is a big no-no; in this particular case, you're probably overwriting malloc's bookkeeping information about the unallocated block, which is why it detects the problem and complains. Count yourself lucky: lots of memory corruption problems go undetected for quite a while until the factory explodes.
Some other issues which I noticed while writing this, with numbered comments in the source:
There's actually no need for the variable dictionary at all.
dict->size is an integer. Forcing conversion to a floating point number and then truncating back to an integer is not very useful. Prefer dict->size += dict->size/2;. Even better would be to first make sure that dict->size isn't so big that increasing it will cause integer wraparound. (This is not undefined behaviour on unsigned types like size_t, but it's not going to produce correct results.)
Here you could actually use a temporary, because realloc might return NULL indicating a memory allocation failure. If that happens, the original allocation is not automatically freed, and you don't have a way to free it. (Actually you do, since you have a variable confusingly called dictionary, but in point 1 I recommended that you get rid of it.) A more idiomatic call would be:
if(dict->counter == dict->size) {
/*increrase size of dictionary*/
dict->size += dict->size / 2; /* See point 2, above */
char** new_words = realloc(dict->words, dict->size * sizeof(*new_words));
if (new_words == NULL) {
/* Report allocation error and free all the memory you've allocated */
/* Then probably exit(1) but if this were a library function, just
* return some kind of failure indication so that the caller can do
* their own clean-up.
*/
}
dict->words = new_words;
}
dict->words[dict->counter] = word; /* See point 4, below */
You're freeing word here because it was allocated in parse_line(). But if you know you're going to free it anyway, there wasn't much point making a copy of it first. You might as well just use it. (But you need to document the fact that this function takes ownership of the word passed as an argument.)
It might be considered cleaner to do the copy as you do but then not free the argument, leaving it for the caller to do that. That would have the advantage of allowing the caller to provide a word which hadn't been dynamically allocated, or use the word for some other purpose.
(Not indicated in this snippet, but nonetheless important). Every block of allocated memory must be freed. So your program should execute free exactly as many times as it executed malloc. But you don't do that; you just free the array of word pointers, and let the words pointed to in that array leak. You should fix that. (Note that you don't need an extra call to free for a call to realloc, since realloc itself frees the old block if it allocates a new one. You only need to match the initial malloc with a free.)

C Program to Convert a Text File into a CSV File

The question is to convert a text file into a CSV file using C programming. The input text file is formatted as the following:
JACK Maria Stephan Nora
20 34 45 28
London NewYork Toronto Berlin
The output CSV file should look like:
Jack,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
The following code is what I tried so far:
void load_and_convert(const char* filename){
FILE *fp1, *fp2;
char ch;
fp1=fopen(filename,"r");
fp2=fopen("output.csv","w");
for(int i=0;i<1000;i++){
ch=fgetc(fp1);
fprintf(fp2,"%c",ch);
if(ch==' '|| ch=='\n')
fprintf(fp2,"%c,\n",ch);
}
fclose(fp1);
fclose(fp2);
}
The output from my code looks like:
Jack,
Maria,
Stephan,
Nora,
20,
34,
45,
28,
London,
NewYork,
Toronto,
Berlin,
How should I modify my code to make it work correctly?
What's the idea to treat this question?
Since I have some times, here is a working solution for you (tried my best to make the solution as elegant as I can):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_STRING_LENGTH 50
#define MAX_NUMBER_OF_PEOPLE 50
typedef struct
{
char name[MAX_STRING_LENGTH];
int age;
char city[MAX_STRING_LENGTH];
} Person;
void getName(char *src, char *delim, Person *people) {
char *ptr = strtok(src, delim);
int i = 0;
while(ptr != NULL)
{
strncpy(people[i].name, ptr, MAX_STRING_LENGTH);
ptr = strtok(NULL, delim);
i++;
}
}
void getAge(char *src, char *delim, Person *people) {
char *ptr = strtok(src, delim);
int i = 0;
while(ptr != NULL)
{
people[i].age = atoi(ptr);
i++;
ptr = strtok(NULL, delim);
}
}
void getCity(char *src, char *delim, Person *people) {
char *ptr = strtok(src, delim);
int i = 0;
while(ptr != NULL)
{
strncpy(people[i].city, ptr, MAX_STRING_LENGTH);
i++;
ptr = strtok(NULL, delim);
}
}
int main(void)
{
Person somebody[MAX_NUMBER_OF_PEOPLE];
FILE *fp;
char *line = NULL;
size_t len = 0;
ssize_t read;
int ln = 0;
fp = fopen("./test.txt", "r");
if (fp == NULL)
return -1;
// Read every line, support first line is name, second line is age...
while ((read = getline(&line, &len, fp)) != -1) {
// remote trailing newline character
line = strtok(line, "\n");
if (ln == 0) {
getName(line, " ", somebody);
} else if (ln == 1) {
getAge(line, " ", somebody);
} else {
getCity(line, " ", somebody);
}
ln++;
}
for (int j = 0; j < MAX_NUMBER_OF_PEOPLE; j++) {
if (somebody[j].age == 0)
break;
printf("%s, %d, %s\n", somebody[j].name, somebody[j].age, somebody[j].city);
}
fclose(fp);
if (line)
free(line);
return 0;
}
What you are needing to do is non-trivial if you want to approach the problem holding all values in memory as you transform the 3-rows with 4-fields in each row, to a format of 4-rows with 3-fields per-row. So when you have your datafile containing:
Example Input File
$ cat dat/col2csv3x4.txt
JACK Maria Stephan Nora
20 34 45 28
London NewYork Toronto Berlin
You want to read each of the three lines and then transpose the columns into rows for .csv output. Meaning you will then end up with 4-rows of 3-csv fields each, e.g.
Expected Program Output
$ ./bin/transpose2csv < dat/col2csv3x4.txt
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
There is nothing difficult in doing it, but it takes meticulous attention to handling the memory storage of object and allocating/reallocating to handle the transformation between 3-rows with 4-pieces of data to 4-rows with 3-pieces of data.
One approach is to read all original lines into a typical pointer-to-pointer to char setup. Then transform/transpose the columns into rows. Since conceivably there could be 100-rows with 500-fields next time, you will want to approach the transformation using indexes and counters to track your allocation and reallocation requirement to make your finished code able to handle transposing a generic number of lines and fields into fields-number of lines with as many vales per row as you had original lines.
You can design your code to provide the transformation in two basic functions. The first to read and store the lines (saygetlines`) and the second to then transpose those lines into a new pointer-to-pointer to char so it can be output as comma separated values
One way to approach these two functions would be similar to the following that takes the filename to read as the first-arguments (or will read from stdin by default if no argument is given). The code isn't trivial, but it isn't difficult either. Just keep track of all your allocations, preserving a pointer to the beginning of each, so the memory may be freed when no longer needed, e.g.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NPTR 2
#define NWRD 128
#define MAXC 1024
/** getlines allocates all storage required to read all lines from file.
* the pointers are doubled each time reallocation is needed and then
* realloc'ed a final time to exactly size to the number of lines. all
* lines are stored with the exact memory required.
*/
char **getlines (size_t *n, FILE *fp)
{
size_t nptr = NPTR; /* tracks number of allocated pointers */
char buf[MAXC]; /* tmp buffer sufficient to hold each line */
char **lines = calloc (nptr, sizeof *lines);
if (!lines) { /* validate EVERY allocaiton */
perror ("calloc-lines");
return NULL;
}
*n = 0; /* pointer tracks no. of lines read */
rewind (fp); /* clears stream error state if set */
while (fgets (buf, MAXC, fp)) { /* read each line o finput */
size_t len;
if (*n == nptr) { /* check/realloc ptrs if required */
void *tmp = realloc (lines, 2 * nptr * sizeof *lines);
if (!tmp) { /* validate reallocation */
perror ("realloc-tmp");
break;
}
lines = tmp; /* assign new block, (opt, zero new mem below) */
memset (lines + nptr, 0, nptr * sizeof *lines);
nptr *= 2; /* increment allocated pointer count */
}
buf[(len = strcspn(buf, "\r\n"))] = 0; /* get line, remove '\n' */
lines[*n] = malloc (len + 1); /* allocate for line */
if (!lines[*n]) { /* validate */
perror ("malloc-lines[*n]");
break;
}
memcpy (lines[(*n)++], buf, len + 1); /* copy to line[*n] */
}
if (!*n) { /* if no lines read */
free (lines); /* free pointers */
return NULL;
}
/* optional final realloc to free unused pointers */
void *tmp = realloc (lines, *n * sizeof *lines);
if (!tmp) {
perror ("final-realloc");
return lines;
}
return (lines = tmp); /* return ptr to exact no. of required ptrs */
}
/** free all pointers and n alocated arrays */
void freep2p (void *p2p, size_t n)
{
for (size_t i = 0; i < n; i++)
free (((char **)p2p)[i]);
free (p2p);
}
/** transpose a file of n rows and a varying number of fields to an
* allocated pointer-to-pointer t0 char structure with a fields number
* of rows and n csv values per row.
*/
char **transpose2csv (size_t *n, FILE *fp)
{
char **l = NULL, **t = NULL;
size_t csvl = 0, /* csv line count */
ncsv = 0, /* number of csv lines allocated */
nchr = MAXC, /* initial chars alloc for csv line */
*offset, /* array tracking read offsets in lines */
*used; /* array tracking write offset to csv lines */
if (!(l = getlines (n, fp))) { /* read all lines to l */
fputs ("error: getlines failed.\n", stderr);
return NULL;
}
ncsv = *n;
#ifdef DEBUG
for (size_t i = 0; i < *n; i++)
puts (l[i]);
#endif
if (!(t = malloc (ncsv * sizeof *t))) { /* alloc ncsv ptrs for csv */
perror ("malloc-t");
freep2p (l, *n); /* free everything else on failure */
return NULL;
}
for (size_t i = 0; i < ncsv; i++) /* alloc MAXC chars to csv ptrs */
if (!(t[i] = malloc (nchr * sizeof *t[i]))) {
perror ("malloc-t[i]");
while (i--) /* free everything else on failure */
free (t[i]);
free (t);
freep2p (l, *n);
return NULL;
}
if (!(offset = calloc (*n, sizeof *offset))) { /* alloc offsets array */
perror ("calloc-offsets");
free (t);
freep2p (l, *n);
return NULL;
}
if (!(used = calloc (ncsv, sizeof *used))) { /* alloc used array */
perror ("calloc-used");
free (t);
free (offset);
freep2p (l, *n);
return NULL;
}
for (;;) { /* loop continually transposing cols to csv rows */
for (size_t i = 0; i < *n; i++) { /* read next word from each line */
char word[NWRD]; /* tmp buffer for word */
int off; /* number of characters consumed in read */
if (sscanf (l[i] + offset[i], "%s%n", word, &off) != 1)
goto readdone; /* break nested loops on read failure */
size_t len = strlen (word); /* get word length */
offset[i] += off; /* increment read offset */
if (csvl == ncsv) { /* check/realloc new csv row as required */
size_t newsz = ncsv + 1; /* allocate +1 row over *n */
void *tmp = realloc (t, newsz * sizeof *t); /* realloc ptrs */
if (!tmp) {
perror ("realloc-t");
freep2p (t, ncsv);
goto readdone;
}
t = tmp;
t[ncsv] = NULL; /* set new pointer NULL */
/* allocate nchr chars to new pointer */
if (!(t[ncsv] = malloc (nchr * sizeof *t[ncsv]))) {
perror ("malloc-t[i]");
while (ncsv--) /* free everything else on failure */
free (t[ncsv]);
goto readdone;
}
tmp = realloc (used, newsz * sizeof *used); /* realloc used */
if (!tmp) {
perror ("realloc-used");
freep2p (t, ncsv);
goto readdone;
}
used = tmp;
used[ncsv] = 0;
ncsv++;
}
if (nchr - used[csvl] - 2 < len) { /* check word fits in line */
/* realloc t[i] if required (left for you) */
fputs ("realloc t[i] required.\n", stderr);
}
/* write word to csv line at end */
sprintf (t[csvl] + used[csvl], used[csvl] ? ",%s" : "%s", word);
t[csvl][used[csvl] ? used[csvl] + len + 1 : len] = 0;
used[csvl] += used[csvl] ? len + 1 : len;
}
csvl++;
}
readdone:;
freep2p (l, *n);
free (offset);
free (used);
*n = csvl;
return t;
}
int main (int argc, char **argv) {
char **t;
size_t n = 0;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!(t = transpose2csv (&n, fp))) {
fputs ("error: transpose2csv failed.\n", stderr);
return 1;
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < n; i++)
if (t[i])
puts (t[i]);
freep2p (t, n);
return 0;
}
Example Use/Output
$ ./bin/transpose2csv < dat/col2csv3x4.txt
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/transpose2csv < dat/col2csv3x4.txt
==18604== Memcheck, a memory error detector
==18604== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==18604== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==18604== Command: ./bin/transpose2csv
==18604==
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
==18604==
==18604== HEAP SUMMARY:
==18604== in use at exit: 0 bytes in 0 blocks
==18604== total heap usage: 15 allocs, 15 frees, 4,371 bytes allocated
==18604==
==18604== All heap blocks were freed -- no leaks are possible
==18604==
==18604== For counts of detected and suppressed errors, rerun with: -v
==18604== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me know if you have further questions.

Printing the most frequent occurring words in a given text file, unable to sort by frequency in C

I am working on an assignment that requires me to print the top 10 most occurring words in a given text file. My code is printing the words from the file, but it is not sorting them according to their frequency.
Here is come of my code below. I use a hashtable to store each unique word and its frequency. I am currently sorting the words using the wordcmp function I wrote and calling it in the inbuilt qsort function in main.
If anyone can guide me to fix my error, I'd be very greatful.
My current output:
the top 10 words (out of 10) are:
1 im
1 are
1 again
3 happy
2 hello
1 how
1 lets
1 you
1 try
1 this
Expected output (what I want):
The top 10 words (out of 10) are:
3 happy
2 hello
1 you
1 try
1 this
1 lets
1 im
1 how
1 are
1 again
Here is some of my code:
typedef struct word
{
char *s; /* the word */
int count; /* number of times word occurs */
struct word* next;
}word;
struct hashtable
{
word **table;
int tablesize;
int currentsize;
};
typedef struct hashtable hashtable;
int main(int argc, char *argv[])
{
int top_words = 10;
word *word = NULL;
hashtable *hash = ht_create(5000);
char *file_name;
char *file_word;
FILE *fp;
struct word *present = NULL;
fp = fopen (file_name, "r");
if (fp == NULL)
{
fprintf (stderr,"%s: No such file or directory\n", file_name);
fprintf(stderr,"The top %d words (out of 0) are:\n", top_words);
exit(-1);
}
continue_program:
while ((file_word = getWord(fp)))
{
word = add(hash, file_word, 1);
}
fclose(fp);
qsort((void*)hash->table, hash->currentsize, sizeof(word),(int (*)(const void *, const void *)) wordcmp);
if(top_words > total_unique_words)
top_words = total_unique_words;
printf("the top %d words (out of %d) are:\n", top_words, total_unique_words);
int iterations =0;
for(i =0; i <= hash->tablesize && iterations< top_words; i++)
{
present = hash->table[i];
if(present != NULL)
{
printf(" %4d %s\n", present->count, present->s);
present = present->next;
iterations++;
}
}
freetable(hash);
return 0;
}
int wordcmp (word *a, word *b)
{
if (a != NULL && b!= NULL) {
if (a->count < b->count)
{
return +1;
}
else if (a->count > b->count)
{
return -1;
}
else if (a->count == b->count)
{
/*return strcmp(b->s, a->s);*/
return 0;
}
}
return 0;
}
/* Create a new hashtable. */
struct hashtable *ht_create( int size )
{
int i;
if( size < 1 )
return NULL;
hashtable *table = (hashtable *) malloc(sizeof(hashtable));
table->table = (word **) malloc(sizeof(word *) * size);
if(table != NULL)
{
table->currentsize = 0;
table->tablesize = size;
}
for( i = 0; i < size; i++ )
{
table->table[i] = NULL;
}
return table;
}
/* Adds a new node to the hash table*/
word * add(hashtable *h, char *key, int freq)
{
int index = hashcode(key) % h->tablesize;
word *current = h->table[index];
/* Search for duplicate value */
while(current != NULL) {
if(contains(h, key) == 1){
current->count++;
return current;
}
current = current->next;
}
/* Create new node if no duplicate is found */
word *newnode = (struct word*)malloc(sizeof(struct word));
if(newnode!=NULL){
newnode->s =strdup(key);
newnode-> count = freq;
newnode-> next = NULL;
}
h->table[index] = newnode;
h->currentsize = h->currentsize + 1;
total_unique_words++;
return newnode;
}
The primary problem you are facing is attempting to sort a hashtable with linked-list chaining of buckets. When a hash collision occurs, your table is not resized, you simply use a linked-list to store the word causing the collision at the same table[index] linked to the word already stored there. That is what add does.
This can easily result in the contents of your hashtable looking like this:
table[ 0] = NULL
table[ 1] = foo
table[ 2] = NULL
table[ 3] = |some|->|words|->|that|->|collided| /* chained bucket */
table[ 4] = other
table[ 5] = words
table[ 6] = NULL
table[ 7] = NULL
...
You cannot simply qsort table and hope to get the correct word frequencies. qsort has no way to know that "some" is just the beginning word in a linked-list, all qsort gets is a pointer to "some" and sizeof(word).
To make life much easier, simply forget the hashtable, and use a dynamically allocated array of word**. You can use a similar add where you increment the number of occurrences for duplicates, and you avoid all problems with chained-buckets. (and if you provide automatic storage for each word, it leaves you with a simple free() of your pointers and you are done)
The following example takes 2 arguments. The first the filename to read words from, and (optionally) a second integer value limiting the sorted output to the that top number of words. The words_t struct uses automatic storage for word limited to 32-chars (the largest word in the unabridged dictionary is 28-characters). You can change the way words or read to parse the input and ignore punctuation and plurals as desired. The following delimits words on all punctuation (except the hyphen), and discards the plural form of words (e.g. it stores "Mike" when "Mike's" is encountered, discarding the "'s")
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#define MAXC 32 /* max word length is 28-char, 29-char is sufficient */
#define MAXW 128 /* initial maximum number of words to allocate */
typedef struct {
char word[MAXC]; /* struct holding individual words */
size_t ninst; /* and the number of times they occur */
} words_t;
/* function prototypes */
void *addword (words_t *words, const char *word, size_t *wc, size_t *maxw);
void *xrealloc (void *ptr, size_t psz, size_t *nelem);
/* qsort compare function for words_t (alphabetical) */
int cmpwrds (const void *a, const void *b)
{
return strcmp (((words_t *)a)->word, ((words_t *)b)->word);
}
/* qsort compare function for words_t (by occurrence - descending)
* and alphabetical (ascending) if occurrences are equal)
*/
int cmpinst (const void *a, const void *b)
{
int ndiff = (((words_t *)a)->ninst < ((words_t *)b)->ninst) -
(((words_t *)a)->ninst > ((words_t *)b)->ninst);
if (ndiff)
return ndiff;
return strcmp (((words_t *)a)->word, ((words_t *)b)->word);
}
int main (int argc, char **argv) {
int c = 0, nc = 0, prev = ' ', total = 0;
size_t maxw = MAXW, wc = 0, top = 0;
char buf[MAXC] = "";
words_t *words = NULL;
FILE *fp = fopen (argv[1], "r");
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (argc > 2) { /* if 2 args, convert argv[2] to number of top words */
char *p = argv[2];
size_t tmp = strtoul (argv[2], &p, 0);
if (p != argv[2] && !errno)
top = tmp;
}
/* allocate/validate initial words */
if (!(words = calloc (maxw, sizeof *words))) {
perror ("calloc-words");
return 1;
}
while ((c = fgetc(fp)) != EOF) { /* read each character in file */
if (c != '-' && (isspace (c) || ispunct (c))) { /* word-end found */
if (!isspace (prev) && !ispunct (prev) && /* multiple ws/punct */
!(prev == 's' && nc == 1)) { /* exclude "'s" */
buf[nc] = 0; /* nul-terminate */
words = addword (words, buf, &wc, &maxw); /* add word */
nc = 0; /* reset char count */
}
}
else if (nc < MAXC - 1) { /* add char to buf */
buf[nc++] = c;
}
else { /* chars exceed MAXC - 1; storage capability of struct */
fprintf (stderr, "error: characters exceed %d.\n", MAXC);
return 1;
}
prev = c; /* save previous char */
}
if (!isspace (prev) && !ispunct (prev)) /* handle non-POSIX end */
words = addword (words, buf, &wc, &maxw);
if (fp != stdin) fclose (fp); /* close file if not stdin */
qsort (words, wc, sizeof *words, cmpinst); /* sort words by frequency */
printf ("'%s' contained '%zu' words.\n\n", /* output total No. words */
fp == stdin ? "stdin" : argv[1], wc);
/* output top words (or all words in descending order if top not given) */
for (size_t i = 0; i < (top != 0 ? top : wc); i++) {
printf (" %-28s %5zu\n", words[i].word, words[i].ninst);
total += words[i].ninst;
}
printf ("%33s------\n%34s%5d\n", " ", "Total: ", total);
free (words);
return 0;
}
/** add word to words, updating pointer to word-count 'wc' and
* the maximum words allocated 'maxw' as needed. returns pointer
* to words (which must be assigned back in the caller).
*/
void *addword (words_t *words, const char *word, size_t *wc, size_t *maxw)
{
size_t i;
for (i = 0; i < *wc; i++)
if (strcmp (words[i].word, word) == 0) {
words[i].ninst++;
return words;
}
if (*wc == *maxw)
words = xrealloc (words, sizeof *words, maxw);
strcpy (words[*wc].word, word);
words[(*wc)++].ninst++;
return words;
}
/** realloc 'ptr' of 'nelem' of 'psz' to 'nelem * 2' of 'psz'.
* returns pointer to reallocated block of memory with new
* memory initialized to 0/NULL. return must be assigned to
* original pointer in caller.
*/
void *xrealloc (void *ptr, size_t psz, size_t *nelem)
{ void *memptr = realloc ((char *)ptr, *nelem * 2 * psz);
if (!memptr) {
perror ("realloc(): virtual memory exhausted.");
exit (EXIT_FAILURE);
} /* zero new memory (optional) */
memset ((char *)memptr + *nelem * psz, 0, *nelem * psz);
*nelem *= 2;
return memptr;
}
(note: the output is sorted in descending order of occurrence, and in alphabetical order if words have the same number of occurrences)
Example Use/Output
$ ./bin/getchar_wordcnt_top dat/damages.txt 10
'dat/damages.txt' contained '109' words.
the 12
a 10
in 7
of 7
and 5
anguish 4
injury 4
jury 4
mental 4
that 4
------
Total: 61
Note: to use your hashtable as your basis for storage, you would have to, at minimum, create an array of pointers to each word in your hashtable, and then sort the array of pointers. Otherwise you would need to duplicate storage and copy the words to a new array to sort. (that would be somewhat a memory inefficient approach). Creating a separate array of pointers to each word in your hashtable to sort is about the only way you have to then call qsort and avoid the chained-bucket problem.

reading large lists through stdin in C

If my program is going to have large lists of numbers passed in through stdin, what would be the most efficient way of reading this in?
The input I'm going to be passing into the program is going to be of the following format:
3,5;6,7;8,9;11,4;;
I need to process the input so that I can use the numbers between the colons (i.e I want to be able to use 3 and 5, 6 and 7 etc etc). The ;; indicates that it is the end of the line.
I was thinking of using a buffered reader to read entire lines and then using parseInt.
Would this be the most efficient way of doing it?
This is a working solution
One way to do this is to use strtok() and store the values in an array. Ideally, dynamically allocated.
int main(int argc, char *argv[])
{
int lst_size=100;
int line_size=255;
int lst[lst_size];
int count=0;
char buff[line_size];
char * token=NULL;
fgets (buff, line_size, stdin); //Get input
Using strtok by passing ',' and ';' as deleminator.
token=strtok(buff, ";,");
lst[count++]=atoi(token);
while(token=strtok(NULL, ";,")){
lst[count++]=atoi(token);
}
Finally you have to account for the double ";;" by reducing the count by 1, because atoi(token) will return 0 for that case and store it in the nth index. Which you don't want.
count--;
}
One other fairly elegant way to handle this is to allow strtol to parse the input by advancing the string to be read to endptr as returned by strtol. Combined with an array allocated/reallocated as needed, you should be able to handle lines of any length (up to memory exhaustion). The example below uses a single array for the data. If you want to store multiple lines, each as a separate array, you can use the same approach, but start with a pointer to array of pointers to int. (i.e. int **numbers and allocate the pointers and then each array). Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
int main () {
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
int *numbers = NULL; /* array to hold numbers */
size_t nmax = NMAX; /* check for reallocation */
size_t idx = 0; /* numbers array index */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while ((nchr = getline (&ln, &n, stdin)) != -1)
{
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
{
int *tmp = realloc (numbers, 2 * nmax * sizeof *numbers);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
exit (EXIT_FAILURE);
}
numbers = tmp;
memset (numbers + nmax, 0, nmax * sizeof *numbers);
nmax *= 2;
}
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
size_t i = 0;
for (i = 0; i < idx; i++)
printf (" numbers[%2zu] %d\n", i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
Output
$ echo "3,5;6,7;8,9;11,4;;" | ./bin/prsistdin
numbers[ 0] 3
numbers[ 1] 5
numbers[ 2] 6
numbers[ 3] 7
numbers[ 4] 8
numbers[ 5] 11
numbers[ 6] 4
Also works where the string is stored in a file as:
$ cat dat/numsemic.csv | ./bin/prsistdin
or
$ ./bin/prsistdin < dat/numsemic.csv
Using fgets and without size_t
It took a little reworking to come up with a revision I was happy with that eliminated getline and substituted fgets. getline is far more flexible, handling the allocation of space for you, with fgets it is up to you. (not to mention getline returning the actual number of chars read without having to call strlen).
My goal here was to preserve the ability to read any length line to meet your requirement. That either meant initially allocating some huge line buffer (wasteful) or coming up with a scheme that would reallocate the input line buffer as needed in the event it was longer than the space initially allocate to ln. (this is what getline does so well). I'm reasonably happy with the results. Note: I put the reallocation code in functions to keep main reasonably clean. footnote 2
Take a look at the following code. Note, I have left the DEBUG preprocessor directives in the code allowing you to compile with the -DDEBUG flag if you want to have it spit out each time it allocates. [footnote 1] You can compile the code with:
gcc -Wall -Wextra -o yourexename yourfilename.c
or if you want the debugging output (e.g. set LMAX to 2 or something less than the line length), use the following:
gcc -Wall -Wextra -o yourexename yourfilename.c -DDEBUG
Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
#define LMAX 1024
char *realloc_char (char *sp, unsigned int *n); /* reallocate char array */
int *realloc_int (int *sp, unsigned int *n); /* reallocate int array */
char *fixshortread (FILE *fp, char **s, unsigned int *n); /* read all stdin */
int main () {
char *ln = NULL; /* dynamically allocated for fgets */
int *numbers = NULL; /* array to hold numbers */
unsigned int nmax = NMAX; /* numbers check for reallocation */
unsigned int lmax = LMAX; /* ln check for reallocation */
unsigned int idx = 0; /* numbers array index */
unsigned int i = 0; /* simple counter variable */
char *nl = NULL;
/* initial allocation for numbers */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed (numbers).");
return 1;
}
/* initial allocation for ln */
if (!(ln = calloc (LMAX, sizeof *ln))) {
fprintf (stderr, "error: memory allocation failed (ln).");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while (fgets (ln, lmax, stdin) != NULL)
{
/* provide a fallback to read remainder of line
if the line length exceeds lmax */
if (!(nl = strchr (ln, '\n')))
fixshortread (stdin, &ln, &lmax);
else
*nl = 0;
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
realloc_int (numbers, &nmax);
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
for (i = 0; i < idx; i++)
printf (" numbers[%2u] %d\n", (unsigned int)i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
/* reallocate character pointer memory */
char *realloc_char (char *sp, unsigned int *n)
{
char *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: char pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* reallocate integer pointer memory */
int *realloc_int (int *sp, unsigned int *n)
{
int *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: int pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* if fgets fails to read entire line, fix short read */
char *fixshortread (FILE *fp, char **s, unsigned int *n)
{
unsigned int i = 0;
int c = 0;
i = *n - 1;
realloc_char (*s, n);
do
{
c = fgetc (fp);
(*s)[i] = c;
i++;
if (i == *n)
realloc_char (*s, n);
} while (c != '\n' && c != EOF);
(*s)[i-1] = 0;
return *s;
}
footnote 1
nothing special about the choice of the word DEBUG (it could have been DOG, etc..), the point to take away is if you want to conditionally include/exclude code, you can simply use preprocessor flags to do that. You just add -Dflagname to pass flagname to the compiler.
footnote 2
you can combine the reallocation functions into a single void* function that accepts a void pointer as its argument along with the size of the type to be reallocated and returns a void pointer to the reallocated space -- but we will leave that for a later date.
What you could do is read in from stdin using fgets or fgetc. You could also use getline() since you're reading in from stdin.
Once you read in the line you can use strtok() with the delimiter for ";" to split the string into pieces at the semicolons. You can loop through until strok() is null, or in this case, ';'. Also in C you should use atoi() to convert strings to integers.
For Example:
int length = 256;
char* str = (char*)malloc(length);
int err = getline(&str, &length, stdin);
I would read in the command args, then parse using the strtok() library method
http://man7.org/linux/man-pages/man3/strtok.3.html
(The web page referenced by the URL above even has a code sample of how to use it.)
I'm a little rusty at C, but could this work for you?
char[1000] remainder;
int first, second;
fp = fopen("C:\\file.txt", "r"); // Error check this, probably.
while (fgets(&remainder, 1000, fp) != null) { // Get a line.
while (sscanf(remainder, "%d,%d;%s", first, second, remainder) != null) {
// place first and second into a struct or something
}
}
getchar_unlocked() is what you are looking for.
Here is the code:
#include <stdio.h>
inline int fastRead_int(int * x)
{
register int c = getchar_unlocked();
*x = 0;
// clean stuff in front of + look for EOF
for(; ((c<48 || c>57) && c != EOF); c = getchar_unlocked());
if(c == EOF)
return 0;
// build int
for(; c>47 && c<58 ; c = getchar_unlocked()) {
*x = (*x<<1) + (*x<<3) + c - 48;
}
return 1;
}
int main()
{
int x;
while(fastRead_int(&x))
printf("%d ",x);
return 0;
}
For input 1;2;2;;3;;4;;;;;54;;;; the code above produces 1 2 2 3 4 54.
I guarantee, this solution is a lot faster than others presented in this topic. It is not only using getchar_unlocked(), but also uses register, inline as well as multiplying by 10 tricky way: (*x<<1) + (*x<<3).
I wish you good luck in finding better solution.

Read unknown number of lines from stdin, C

i have a problem with reading stdin of unknown size. In fact its a table in .txt file, which i get to stdin by calling parameter '<'table.txt. My code should look like this:
#include <stdio.h>
#include <string.h>
int main(int argc,char *argv[])
{
char words[10][1024];
int i=0;
while(feof(stdin)==0)
{
fgets(words[i],100,stdin);
printf("%s", words[i]);
i++;
}
return 0;
}
but there is the problem i dont know the nuber of lines, which in this case is 10(we know the number of characters in line - 1024).
It would be great if someone know the solution. Thanks in advance.
You have hit on one of the issues that plagues all new C-programmers. How do I dynamically allocate all memory I need to free myself from static limits while still keeping track of my collection of 'stuff' in memory. This problem usually presents itself when you need to read an unknown number of 'things' from an input. The initial options are (1) declare some limit big enough to work (defeating the purpose), or (2) dynamically allocate a pointers as needed.
Obviously, the goal is (2). However, you then run into the problem of "How do I keep track of what I've allocated?" This in itself is an issue that dogs beginners. The problem being, If I dynamically allocate using a bunch of pointers, **How do I iterate over the list to get my 'stuff' back out? Also, you have to initialize some initial number of pointers (unless using an advanced data structure like a linked-list), so the next question is "what do I do when I run out?"
The usual solution is to allocate an initial set of pointers, then when the limit is reached, reallocate to twice as many as original, and keep going. (as Grayson indicated in his answer).
However, there is one more trick to iterate over the list to get your 'stuff' back out that is worth understanding. Yes, you can allocate with malloc and keep track of the number of pointers used, but you can free yourself from tying a counter to your list of pointers by initially allocating with calloc. That not only allocates space, but also sets the allocated pointers to NULL (or 0). This allows you to iterate over your list with a simple while (pointer != NULL). This provides many benefits when it comes to passing your collection of pointers to functions, etc.. The downside (a minimal one) is that you get to write a reallocation scheme that uses calloc to allocate new space when needed. (bummer, I get to get smarter -- but I have to work to do it...)
You can evaluate whether to use malloc/realloc off-the-shelf, or whether to reallocate using calloc and a custom reallocate function depending on what your requirements are. Regardless, understanding both, just adds more tools to your programming toolbox.
OK, enough jabber, where is the example in all this blather?
Both of the following examples simply read all lines from any text file and print the lines (with pointer index numbers) back to stdout. Both expect that you will provide the filename to read as the first argument on the command line. The only difference between the two is the second has the reallocation with calloc done is a custom reallocation function. They both allocate 255 pointers initially and double the number of pointers each time the limit is hit. (for fun, you can set MAXLINES to something small like 10 and force repeated reallocations to test).
first example with reallocation in main()
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#define MAXLINES 255
void free_buffer (char **buffer)
{
register int i = 0;
while (buffer[i])
{
free (buffer[i]);
i++;
}
free (buffer);
}
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file\n", argv[0]);
return 1;
}
char *line = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit number of chars to 'n', 0 no limit */
char **filebuf = NULL;
char **rtmp = NULL;
int linecnt = 0;
size_t limit = MAXLINES;
size_t newlim = 0;
FILE *ifp = fopen(argv[1],"r");
if (!ifp)
{
fprintf(stderr, "\nerror: failed to open file: '%s'\n\n", argv[1]);
return 1;
}
filebuf = calloc (MAXLINES, sizeof (*filebuf)); /* allocate MAXLINES pointers */
while ((read = getline (&line, &n, ifp)) != -1) /* read each line in file with getline */
{
if (line[read - 1] == 0xa) { line[read - 1] = 0; read--; } /* strip newline */
if (linecnt >= (limit - 1)) /* test if linecnt at limit, reallocate */
{
newlim = limit * 2; /* set new number of pointers to 2X old */
if ((rtmp = calloc (newlim, sizeof (*filebuf)))) /* calloc to set to NULL */
{
/* copy original filebuf to newly allocated rtmp */
if (memcpy (rtmp, filebuf, linecnt * sizeof (*filebuf)) == rtmp)
{
free (filebuf); /* free original filebuf */
filebuf = rtmp; /* set filebuf equal to new rtmp */
}
else
{
fprintf (stderr, "error: memcpy failed, exiting\n");
return 1;
}
}
else
{
fprintf (stderr, "error: rtmp allocation failed, exiting\n");
return 1;
}
limit = newlim; /* update limit to new limit */
}
filebuf[linecnt] = strdup (line); /* copy line (strdup allocates) */
linecnt++; /* increment linecnt */
}
fclose(ifp);
if (line) free (line); /* free memory allocated to line */
linecnt = 0; /* reset linecnt to iterate filebuf */
printf ("\nLines read in filebuf buffer:\n\n"); /* output all lines read */
while (filebuf[linecnt])
{
printf (" line[%d]: %s\n", linecnt, filebuf[linecnt]);
linecnt++;
}
printf ("\n");
free_buffer (filebuf); /* free memory allocated to filebuf */
return 0;
}
second example with reallocation in custom function
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#define MAXLINES 255
/* function to free allocated memory */
void free_buffer (char **buffer)
{
register int i = 0;
while (buffer[i])
{
free (buffer[i]);
i++;
}
free (buffer);
}
/* custom realloc using calloc/memcpy */
char **recalloc (size_t *lim, char **buf)
{
int newlim = *lim * 2;
char **tmp = NULL;
if ((tmp = calloc (newlim, sizeof (*buf))))
{
if (memcpy (tmp, buf, *lim * sizeof (*buf)) == tmp)
{
free (buf);
buf = tmp;
}
else
{
fprintf (stderr, "%s(): error, memcpy failed, exiting\n", __func__);
return NULL;
}
}
else
{
fprintf (stderr, "%s(): error, tmp allocation failed, exiting\n", __func__);
return NULL;
}
*lim = newlim;
return tmp;
}
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file\n", argv[0]);
return 1;
}
char *line = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit number of chars to 'n', 0 no limit */
char **filebuf = NULL;
int linecnt = 0;
size_t limit = MAXLINES;
FILE *ifp = fopen(argv[1],"r");
if (!ifp)
{
fprintf(stderr, "\nerror: failed to open file: '%s'\n\n", argv[1]);
return 1;
}
filebuf = calloc (MAXLINES, sizeof (*filebuf)); /* allocate MAXLINES pointers */
while ((read = getline (&line, &n, ifp)) != -1) /* read each line in file with getline */
{
if (line[read - 1] == 0xa) { line[read - 1] = 0; read--; } /* strip newline */
if (linecnt >= (limit - 1)) /* test if linecnt at limit, reallocate */
{
filebuf = recalloc (&limit, filebuf); /* reallocate filebuf to 2X size */
if (!filebuf)
{
fprintf (stderr, "error: recalloc failed, exiting.\n");
return 1;
}
}
filebuf[linecnt] = strdup (line); /* copy line (strdup allocates) */
linecnt++; /* increment linecnt */
}
fclose(ifp);
if (line) free (line); /* free memory allocated to line */
linecnt = 0; /* reset linecnt to iterate filebuf */
printf ("\nLines read in filebuf buffer:\n\n"); /* output all lines read */
while (filebuf[linecnt])
{
printf (" line[%d]: %s\n", linecnt, filebuf[linecnt]);
linecnt++;
}
printf ("\n");
free_buffer (filebuf); /* free memory allocated to filebuf */
return 0;
}
Take a look at both examples. Know that there are many, many ways to do this. These examples just give one approach that provide example of using a few extra tricks than you will normally find. Give them a try. Drop a comment if you need more help.
I suggest that you use malloc and realloc to manage your memory. Keep track of how big your array is or how many entries it has, and call realloc to double its size whenever the array is not big enough.
Op appears to need to store the data somewhere
#define N 100000u
char BABuffer[N];
int main(int argc, char *argv[]) {
size_t lcount = 0;
size_t ccount = 0;
char words[1024 + 2];
while(fgets(words, sizeof words, stdin) != NULL) {
size_t len = strlen(words);
if (ccount + len >= N - 1) {
fputs("Too much!\n", stderr);
break;
}
memcpy(&BABuffer[ccount], words, len);
ccount += len;
lcount++;
}
BABuffer[ccount] = '\0';
printf("Read %zu lines.\n", lcount);
printf("Read %zu char.\n", ccount);
fputs(BABuffer, stdout);
return 0;
}
Note: ccount includes the end-of-line character(s).

Resources