Trouble reading a .txt file and storing into an array - c

I've been given a .txt file with a specific structure: each line has a String with 5 characters but with a random number of lines, and we should read the file and store it as we want.
I've tried doing it with a linked list and it worked just fine but as the size of the file grew up, the time it took to execute was too long. Since then i've been trying to store the Strings into an array of strings, so everything would be stored contiguously in memory. When executing, i get a segmentation fault error and i have no idea why. The code goes as follows:
int nLines (char *path)
{
int answer = 0;
FILE* fp;
fp = fopen(path,"r");
char line[6];
while (fgets(line, sizeof(line),fp))
{
answer++;
}
return answer;
}
int main (int argc, char *argv[])
{
FILE* fp;
fp = fopen(argv[1], "r");
int numberLines = nLines(argv[1]);
char **storage = malloc(numberLines * 6 * sizeof(char));
if(storage != NULL)
{
int i = 0;
char line [6];
while (fgets(line, sizeof(line),fp))
{
strcpy(storage[i], line);
i++;
}
}
free(storage);
}
The first function is supposed to return the number of lines there is in the file. With this information, i'm trying to allocate memory equal to the number of strings * the size of each string since i know before hand this value. I'm imagining the problem comes from the line:
char **storage = malloc (numberLines * 6 *sizeof(char));
I haven't touched C in a long time and i'm kinda rusty with the whole pointers and memory stuff. Can someone help please. Thank you!

your allocation is wrong
int main (int argc, char *argv[])
{
FILE* fp;
fp = fopen(argv[1], "r");
size_t numberLines = 0;
char **storage = NULL;
char line [8];
while (fgets(line, sizeof(line),fp))
{
storage = realloc(storage, (numberLines + 1) * sizeof(*storage));
storage[numberLines] = malloc(8);
strcpy(storage[numlines++], line);
}
/* ... */
}
you need to allocate space for the pointers, then space for the strings. It is demo only and you should implement the correct error handling (memory and file).

If one wants to truly have an on-line algorithm, one isn't going to have the number of lines available. The idiomatic way to have a contiguous dynamic container is to reallocate geometrically increasing capacity, like vector or ArrayList. C doesn't have that type built-in, but it's worth the extra code if one uses it a lot. For example, this reads from stdin until EOF and uses a Fibonacci sequence as it's capacities.
#include <stddef.h>
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
/** One line of maximum 5 `char` plus 1 `NUL`. */
struct Line { char str[6]; };
/** A dynamic line array. */
struct LineArray {
struct Line *data; /* data -> (c0 < c1 || c0 == c1 == max_size) */
size_t capacity, next_capacity; /* !data -> !size, data -> size<=capacity */
size_t size;
};
/** Ensures `min_capacity` of `a`. Return success, otherwise, `errno` will be
set: `realloc` or `ERANGE` -- tried allocating more then can fit in `size_t`
or `realloc` doesn't follow [IEEE Std 1003.1-2001
](https://pubs.opengroup.org/onlinepubs/009695399/functions/realloc.html). */
static int reserve(struct LineArray *const a, const size_t min_capacity) {
size_t c0, c1;
struct Line *data;
const size_t max_size = (size_t)-1 / sizeof(struct Line *);
assert(a);
if(!a->data) {
if(!min_capacity) return 1;
c0 = 8, c1 = 13;
} else {
if(min_capacity <= a->capacity) return 1;
c0 = a->capacity, c1 = a->next_capacity;
}
if(min_capacity > max_size) return errno = ERANGE, 0;
assert(c0 < c1); /* Fibonacci: c0 ^= c1, c1 ^= c0, c0 ^= c1, c1 += c0. */
while(c0 < min_capacity) {
size_t temp = c0 + c1; c0 = c1; c1 = temp;
if(c1 > max_size || c1 < c0) c1 = max_size;
}
if(!(data = realloc(a->data, c0 * sizeof *a->data)))
{ if(!errno) errno = ERANGE; return 0; }
a->data = data;
a->capacity = c0;
a->next_capacity = c1;
return 1;
}
/** Adds one to the size of `a` and returns it (`push_back`.) Exceptional
return null and `errno` is `realloc` or `ERANGE`. */
static struct Line *new_line(struct LineArray *const a) {
assert(a);
if(a->size >= (size_t)-1) { errno = ERANGE; return 0; } /* Unlikely. */
if(!reserve(a, a->size + 1)) return 0; /* (Less) unlikely. */
return a->data + a->size++;
}
/** Destructor. */
static void linearray_(struct LineArray *const a) {
assert(a);
free(a->data);
a->data = 0, a->capacity = a->next_capacity = a->size = 0;
}
#include <string.h>
#include <stdio.h>
int main(void)
{
struct LineArray storage = { 0, 0, 0, 0 };
struct Line *s, *s_end;
size_t l = 0, line_len;
char line[7] = "";
int success = EXIT_FAILURE;
/* `line` must be capable of storing the "*[,5]\n\0". */
assert(sizeof line == sizeof ((struct Line *)0)->str + 1);
while (fgets(line, sizeof line, stdin))
{
l++;
line_len = strlen(line);
assert(line_len && line_len < sizeof line);
/* Too long. */
if(line[line_len - 1] != '\n') { errno = ERANGE; goto catch; }
/* Cut off the trailing new-line. */
line[line_len-- - 1] = '\0';
/* Store `line`. */
if(!(s = new_line(&storage))) goto catch;
strcpy(s->str, line);
}
if(ferror(stdin)) goto catch;
/* Print all. */
for(s = storage.data, s_end = s + storage.size; s < s_end; s++)
printf("stored: %s\n", s->str);
success = EXIT_SUCCESS;
goto finally;
catch:
perror("Error");
fprintf(stderr, "On line %lu: \"%s\".\n", (unsigned long)l, line);
finally:
linearray_(&storage);
return success;
}

Related

Open file is not read the text, Get_Next_Line.c

hello guys a just need help on this, not showing the text I wrote:
This program open the file and just show on command what is inside,
if buffer is > 0 show all the text contained in file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
char *ft_strncat(char *dst, const char *src, size_t n)
{
if (n != 0) {
char *d = dst;
const char *s = src;
while (*d != 0)
d++;
do {
if ((*d = *s++) == 0)
break;
d++;
} while (--n != 0);
*d = 0;
}
return (dst);
}
char *get_next_line(int fd)
{
char buffer[2] = "";
char **line;
if( !*line )
*line = malloc(100 * sizeof(char));
*line[0] = '\0';
while( read(fd, buffer, 1) > 0 ) {
ft_strncat(*line, buffer, 1);
if( buffer[0] == '\n' )
break;
}
return (0);
}
int main(void)
{
int fd;
int ret;
fd = open("ola.txt", O_RDONLY);
if (fd < 3 && fd != 0)
return (-1);
printf("%d\n", fd);
printf("%s\n", get_next_line(fd));
return (0);
}
im trying to see the error but I cant, im a noob on C yet
thank you for help me.
line should be char *, not char **. That would only be needed if it were a function parameter that should be updated by the function.
You need to return line from the function, not 0.
You should use realloc() to grow line if the input line is longer than the size of line. Use a variable capacity to hold the current size.
There's no good reason to use ft_strncat(). Use another variable to hold the current position in line, and write the character there directly.
char *get_next_line(int fd)
{
char buffer;
size_t capacity = 100;
char *line = malloc(capacity * sizeof(char));
size_t pos = 0;
*line[0] = '\0';
while( read(fd, &buffer, 1) > 0 ) {
if (pos > capacity - 2) {
capacity += 100;
line = realloc(line, capacity);
}
line[pos++] = buffer;
if( buffer == '\n' ) {
line[pos] = '\0';
break;
}
}
return line;
}
In addition, the caller should assign the result to a variable, so it can free the memory. Otherwise you'll create lots of memory leaks when you read all the lines of the file.

Read a CSV file into a dynamic array of structs C

I'm fairly new to C. I'm trying to read a .CSV file, then parse each line, then store the data in a dynamic array of pointers to structs. Unfortunately I've gone wrong somewhere in my implementation which is resulting in an infinite loop.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct dataSet {
char ID;
char postcode;
int population;
char contact;
double x;
double y;
}data;
int main(int argc, char* argv[]) {
char line[100] = "";
int count = 0;
int each = 0;
data *allData = NULL;
data *temp = NULL;
FILE *file = fopen("dataset.csv", "r");
if (file == NULL)
{
printf("Error! File null");
return 1;
}
while (fgets(line, sizeof line, file))
{
if(NULL == (temp = realloc(allData, sizeof(*allData) * (count + 1))))
{
fprintf(stderr, "realloc problem\n");
fclose(file);
free(allData);
return 0;
}
allData = temp;
if (6 == scanf(line, "%s, %s, %d, %s, %lf, %lf",
&allData[count].ID,
&allData[count].postcode,
&allData[count].population,
&allData[count].contact,
&allData[count].x,
&allData[count].y)) {
count++;
}
else {
printf("Problem with data\n");
}
}
fclose(file);
for (each = 0; each < count; each++)
{
printf("%s, %s, %d, %s, %lf, %lf\n",
&allData[count].ID,
&allData[count].postcode,
&allData[count].population,
&allData[count].contact,
&allData[count].x,
&allData[count].y);
}
free(allData);
return 0;
}
Any help or tips would be greatly appreciated.
[s]scanf() is a nasty function. You don't have enough control once it fails. Problem is: there are too many conditions: the input can be incorrect, or the destination is not large enough. Even reading complete lines with fgets(), and parsing them afterwards, will only allow you to skip complete lines; also: the line buffer is mostly fixed sized, and fgets() could read incomplete lines. A way to keep complete control is to read character-based. This might imply a Finite State machine.
A simpler reader (using a zero-state machine) could be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct omg {
char o;
int m;
char g[11];
};
struct wtf {
unsigned size;
unsigned used;
struct omg *array;
};
#define INITIAL_SIZE 7
struct wtf read_stuff(char *name)
{
FILE *fp;
unsigned icol,irec,len;
char buff[123];
struct wtf this = {0,0,NULL};
fp = fopen(name, "rb" );
if (!fp) return this;
for (icol=irec=len=0; ; ) {
int ch;
if (this.used >= this.size) {
size_t newsize;
struct omg *tmp;
newsize = this.size? this.size*2: INITIAL_SIZE;
fprintf(stderr, "Realloc(%zu)\n", newsize);
tmp = realloc(this.array, sizeof *this.array * newsize);
this.array = tmp;
this.size = newsize;
}
ch = getc(fp);
switch(ch) {
case '\r' : continue;
/* End of field or record: terminate buffer */
#if 0
case ',' :
#else
case '\t' :
#endif
case '\n' :
buff[len] = 0;
break;
case EOF :
goto done;
/* Normal character: assign to buffer
** You may want to report too long fields here
*/
default:
if (len >= sizeof buff -2) continue;
buff[len++] = ch;
continue;
}
/* When we arrive here, we have a new field. Let's process it ...*/
switch (icol) {
case 0: /* Assign first field here from buff[], (dont forget to check len!) */
this.array[this.used].o = buff[0];
break;
case 1: /* Assign second field from buff[], this may need some additional checks
** You may want to avoid sscanf() here ...
*/
sscanf(buff, "%d", &this.array[this.used].m );
break;
case 2: /* Assign third field from buff[] */
if (len >= sizeof this.array[this.used].g)
len = sizeof this.array[this.used].g -1;
memcpy (this.array[this.used].g, buff, len);
this.array[this.used].g[len] = 0;
break;
default: /* Ignore excess fields
** You may want to report hem.
*/
break;
}
/* Do some bookkeeping */
len = 0;
if(ch == '\n') {
/* You may want to check if icol==2, here */
icol=0; irec++; this.used++;
}
else icol++;
}
done:
fclose(fp);
/* You could do a final realloc() here */
return this;
}
int main(int argc, char **argv)
{
struct wtf result;
unsigned idx;
result = read_stuff(argv[1] );
fprintf(stderr, "Result=%u/%u\n", result.used,result.size);
for (idx=0; idx < result.used; idx++) {
printf("%c %d %s\n"
, result.array[idx].o
, result.array[idx].m
, result.array[idx].g);
if (idx >= 10) break;
}
return 0;
}
You ask for tips...
1 - your struct is wrong if your plan was to use dynamic memory. The char members should be pointers to char, ( char * not char ) as shown below. But to reduce complexity, use char arrays instead of forcing dynamic allocation for struct members: i.e. do not use this:
typedef struct dataSet {
char *ID;
char *postcode;
int population;
char *contact;
double x;
double y;
}data;
Rather use this:
typedef struct dataSet {
char ID[80];
char postcode[11];
int population;
char contact[80];
double x;
double y;
}data;
If the lengths are not right, then make them bigger, but this will reduce calls to calloc() and free().
2 - suggested steps:
Count lines in file. (example here). This will essentially open the file, count the lines and close the file.
Use the count to allocate memory for that number of instances of data (i.e. data *records = malloc(sizeof(*records)*countOfLines); )
Open the file again. If file != NULL, then...
Begin to read file line by line in a loop, such as the fgets(...) loop you have.
In this loop, suggest replacing scanf() with a series of calls to strtok() making the appropriate conversion one-by-one. Its a few more lines of code, but is easier in the long run to see what parsing problems you might run into.
The following pseudo code illustrates...
data *record = malloc(CountOfLines*sizeof(*record));
if(record)
{
int i = 0;
while(fgets(line, sizeof line, file))
{
tok = strtok(line, ",");
if(tok)
{ //convert string
strncpy(record[i].ID, tok, sizeof(record[i].ID) - 1);
tok = strtok(NULL, ",");
if(tok)
{//convert string
strncpy(record[i].postcode, tok, sizeof(record[i].postcode) - 1);
tok = strtok(NULL, ",");
if(tok)
{//convert int
record[i].population = atoi(tok);
//and so on ...

How store each string of getline() inside a (dynamic) array of strings?

I'm using the getline() function to get every line of stdin. Every line is a string with different length:
#include <stdio.h>
#include <stdlib.h>
int main() {
char *line = NULL;
size_t foo = 0;
ssize_t reader;
while ((reader = getline(&line, &foo, stdin)) != -1) { // %zu of reader is length of line
printf("%s", line);
}
free(line);
return 0;
}
In every iteration, line is a string and is containing the current line. How can I take each string-line and store it inside an array? There are several things I have tried but none of them worked or they just lead to memory access failure :(
I hope my question is clear? If it's not, please tell me and I will change it!
Unless you know up front how many lines to expect, then you will have to allocate the array dynamically, eg:
#include <stdio.h>
#include <stdlib.h>
int main() {
char *line = NULL;
size_t foo = 0;
ssize_t reader;
int result = 0;
int numlines = 0, maxlines = 10;
char **lines = malloc(sizeof(char*) * maxlines);
if (!lines) {
printf("error allocating array\n");
}
else {
while ((reader = getline(&line, &foo, stdin)) != -1) { // %zu of reader is length of line
printf("%s", line);
if (numlines == maxlines) {
maxlines *= 2; // <-- or use whatever threshold makes sense for you
char **newlines = realloc(lines, sizeof(char*) * maxlines);
if (!newlines) {
printf("error reallocating array\n");
result = -1;
break;
}
lines = newlines;
}
lines[numlines] = line;
++numlines;
line = NULL;
foo = 0;
}
free(line); // <-- in case getline() or realloc() failed...
// use lines up to numlines as needed
// free lines
for(int i = 0; i < numlines; ++i) {
free(lines[i]);
}
free(lines);
}
return result;
}
You need to create an array of pointers that gets resized when needed:
#include <stdio.h>
#include <stdlib.h>
int main()
{
// start with an array that ends with a NULL pointer
// (like argv does)
size_t numLines = 0;
char **lines = malloc( ( numLines + 1 ) * sizeof( *lines ) );
lines[ numLines ] = NULL;
// break the loop explicitly - easier to handle and much less
// bug-prone than putting the assignment into a while statement
for ( ;; )
{
// get the next line
size_t bytes = 0UL;
char *line = NULL;
ssize_t result = getline( &line, &bytes, stdin );
if ( result < 0 )
{
break;
}
// enlarge the array by one
numLines++;
char **tmp = realloc( lines, ( numLines + 1 ) * sizeof( *tmp ) );
if ( !tmp )
{
break;
}
lines = tmp;
// add the new line to the end of the array
lines[ numLines ] = line;
lines[ numLines + 1 ] = NULL;
}
// use lines - then free them
return( 0 );
}
That can be optimized by doing the realloc() calls in chunks, such as every 32 or 64 lines. But given that you're already effectively calling malloc() once per line, that might not help much.

Printing the most frequent occurring words in a given text file, unable to sort by frequency in C

I am working on an assignment that requires me to print the top 10 most occurring words in a given text file. My code is printing the words from the file, but it is not sorting them according to their frequency.
Here is come of my code below. I use a hashtable to store each unique word and its frequency. I am currently sorting the words using the wordcmp function I wrote and calling it in the inbuilt qsort function in main.
If anyone can guide me to fix my error, I'd be very greatful.
My current output:
the top 10 words (out of 10) are:
1 im
1 are
1 again
3 happy
2 hello
1 how
1 lets
1 you
1 try
1 this
Expected output (what I want):
The top 10 words (out of 10) are:
3 happy
2 hello
1 you
1 try
1 this
1 lets
1 im
1 how
1 are
1 again
Here is some of my code:
typedef struct word
{
char *s; /* the word */
int count; /* number of times word occurs */
struct word* next;
}word;
struct hashtable
{
word **table;
int tablesize;
int currentsize;
};
typedef struct hashtable hashtable;
int main(int argc, char *argv[])
{
int top_words = 10;
word *word = NULL;
hashtable *hash = ht_create(5000);
char *file_name;
char *file_word;
FILE *fp;
struct word *present = NULL;
fp = fopen (file_name, "r");
if (fp == NULL)
{
fprintf (stderr,"%s: No such file or directory\n", file_name);
fprintf(stderr,"The top %d words (out of 0) are:\n", top_words);
exit(-1);
}
continue_program:
while ((file_word = getWord(fp)))
{
word = add(hash, file_word, 1);
}
fclose(fp);
qsort((void*)hash->table, hash->currentsize, sizeof(word),(int (*)(const void *, const void *)) wordcmp);
if(top_words > total_unique_words)
top_words = total_unique_words;
printf("the top %d words (out of %d) are:\n", top_words, total_unique_words);
int iterations =0;
for(i =0; i <= hash->tablesize && iterations< top_words; i++)
{
present = hash->table[i];
if(present != NULL)
{
printf(" %4d %s\n", present->count, present->s);
present = present->next;
iterations++;
}
}
freetable(hash);
return 0;
}
int wordcmp (word *a, word *b)
{
if (a != NULL && b!= NULL) {
if (a->count < b->count)
{
return +1;
}
else if (a->count > b->count)
{
return -1;
}
else if (a->count == b->count)
{
/*return strcmp(b->s, a->s);*/
return 0;
}
}
return 0;
}
/* Create a new hashtable. */
struct hashtable *ht_create( int size )
{
int i;
if( size < 1 )
return NULL;
hashtable *table = (hashtable *) malloc(sizeof(hashtable));
table->table = (word **) malloc(sizeof(word *) * size);
if(table != NULL)
{
table->currentsize = 0;
table->tablesize = size;
}
for( i = 0; i < size; i++ )
{
table->table[i] = NULL;
}
return table;
}
/* Adds a new node to the hash table*/
word * add(hashtable *h, char *key, int freq)
{
int index = hashcode(key) % h->tablesize;
word *current = h->table[index];
/* Search for duplicate value */
while(current != NULL) {
if(contains(h, key) == 1){
current->count++;
return current;
}
current = current->next;
}
/* Create new node if no duplicate is found */
word *newnode = (struct word*)malloc(sizeof(struct word));
if(newnode!=NULL){
newnode->s =strdup(key);
newnode-> count = freq;
newnode-> next = NULL;
}
h->table[index] = newnode;
h->currentsize = h->currentsize + 1;
total_unique_words++;
return newnode;
}
The primary problem you are facing is attempting to sort a hashtable with linked-list chaining of buckets. When a hash collision occurs, your table is not resized, you simply use a linked-list to store the word causing the collision at the same table[index] linked to the word already stored there. That is what add does.
This can easily result in the contents of your hashtable looking like this:
table[ 0] = NULL
table[ 1] = foo
table[ 2] = NULL
table[ 3] = |some|->|words|->|that|->|collided| /* chained bucket */
table[ 4] = other
table[ 5] = words
table[ 6] = NULL
table[ 7] = NULL
...
You cannot simply qsort table and hope to get the correct word frequencies. qsort has no way to know that "some" is just the beginning word in a linked-list, all qsort gets is a pointer to "some" and sizeof(word).
To make life much easier, simply forget the hashtable, and use a dynamically allocated array of word**. You can use a similar add where you increment the number of occurrences for duplicates, and you avoid all problems with chained-buckets. (and if you provide automatic storage for each word, it leaves you with a simple free() of your pointers and you are done)
The following example takes 2 arguments. The first the filename to read words from, and (optionally) a second integer value limiting the sorted output to the that top number of words. The words_t struct uses automatic storage for word limited to 32-chars (the largest word in the unabridged dictionary is 28-characters). You can change the way words or read to parse the input and ignore punctuation and plurals as desired. The following delimits words on all punctuation (except the hyphen), and discards the plural form of words (e.g. it stores "Mike" when "Mike's" is encountered, discarding the "'s")
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#define MAXC 32 /* max word length is 28-char, 29-char is sufficient */
#define MAXW 128 /* initial maximum number of words to allocate */
typedef struct {
char word[MAXC]; /* struct holding individual words */
size_t ninst; /* and the number of times they occur */
} words_t;
/* function prototypes */
void *addword (words_t *words, const char *word, size_t *wc, size_t *maxw);
void *xrealloc (void *ptr, size_t psz, size_t *nelem);
/* qsort compare function for words_t (alphabetical) */
int cmpwrds (const void *a, const void *b)
{
return strcmp (((words_t *)a)->word, ((words_t *)b)->word);
}
/* qsort compare function for words_t (by occurrence - descending)
* and alphabetical (ascending) if occurrences are equal)
*/
int cmpinst (const void *a, const void *b)
{
int ndiff = (((words_t *)a)->ninst < ((words_t *)b)->ninst) -
(((words_t *)a)->ninst > ((words_t *)b)->ninst);
if (ndiff)
return ndiff;
return strcmp (((words_t *)a)->word, ((words_t *)b)->word);
}
int main (int argc, char **argv) {
int c = 0, nc = 0, prev = ' ', total = 0;
size_t maxw = MAXW, wc = 0, top = 0;
char buf[MAXC] = "";
words_t *words = NULL;
FILE *fp = fopen (argv[1], "r");
if (!fp) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (argc > 2) { /* if 2 args, convert argv[2] to number of top words */
char *p = argv[2];
size_t tmp = strtoul (argv[2], &p, 0);
if (p != argv[2] && !errno)
top = tmp;
}
/* allocate/validate initial words */
if (!(words = calloc (maxw, sizeof *words))) {
perror ("calloc-words");
return 1;
}
while ((c = fgetc(fp)) != EOF) { /* read each character in file */
if (c != '-' && (isspace (c) || ispunct (c))) { /* word-end found */
if (!isspace (prev) && !ispunct (prev) && /* multiple ws/punct */
!(prev == 's' && nc == 1)) { /* exclude "'s" */
buf[nc] = 0; /* nul-terminate */
words = addword (words, buf, &wc, &maxw); /* add word */
nc = 0; /* reset char count */
}
}
else if (nc < MAXC - 1) { /* add char to buf */
buf[nc++] = c;
}
else { /* chars exceed MAXC - 1; storage capability of struct */
fprintf (stderr, "error: characters exceed %d.\n", MAXC);
return 1;
}
prev = c; /* save previous char */
}
if (!isspace (prev) && !ispunct (prev)) /* handle non-POSIX end */
words = addword (words, buf, &wc, &maxw);
if (fp != stdin) fclose (fp); /* close file if not stdin */
qsort (words, wc, sizeof *words, cmpinst); /* sort words by frequency */
printf ("'%s' contained '%zu' words.\n\n", /* output total No. words */
fp == stdin ? "stdin" : argv[1], wc);
/* output top words (or all words in descending order if top not given) */
for (size_t i = 0; i < (top != 0 ? top : wc); i++) {
printf (" %-28s %5zu\n", words[i].word, words[i].ninst);
total += words[i].ninst;
}
printf ("%33s------\n%34s%5d\n", " ", "Total: ", total);
free (words);
return 0;
}
/** add word to words, updating pointer to word-count 'wc' and
* the maximum words allocated 'maxw' as needed. returns pointer
* to words (which must be assigned back in the caller).
*/
void *addword (words_t *words, const char *word, size_t *wc, size_t *maxw)
{
size_t i;
for (i = 0; i < *wc; i++)
if (strcmp (words[i].word, word) == 0) {
words[i].ninst++;
return words;
}
if (*wc == *maxw)
words = xrealloc (words, sizeof *words, maxw);
strcpy (words[*wc].word, word);
words[(*wc)++].ninst++;
return words;
}
/** realloc 'ptr' of 'nelem' of 'psz' to 'nelem * 2' of 'psz'.
* returns pointer to reallocated block of memory with new
* memory initialized to 0/NULL. return must be assigned to
* original pointer in caller.
*/
void *xrealloc (void *ptr, size_t psz, size_t *nelem)
{ void *memptr = realloc ((char *)ptr, *nelem * 2 * psz);
if (!memptr) {
perror ("realloc(): virtual memory exhausted.");
exit (EXIT_FAILURE);
} /* zero new memory (optional) */
memset ((char *)memptr + *nelem * psz, 0, *nelem * psz);
*nelem *= 2;
return memptr;
}
(note: the output is sorted in descending order of occurrence, and in alphabetical order if words have the same number of occurrences)
Example Use/Output
$ ./bin/getchar_wordcnt_top dat/damages.txt 10
'dat/damages.txt' contained '109' words.
the 12
a 10
in 7
of 7
and 5
anguish 4
injury 4
jury 4
mental 4
that 4
------
Total: 61
Note: to use your hashtable as your basis for storage, you would have to, at minimum, create an array of pointers to each word in your hashtable, and then sort the array of pointers. Otherwise you would need to duplicate storage and copy the words to a new array to sort. (that would be somewhat a memory inefficient approach). Creating a separate array of pointers to each word in your hashtable to sort is about the only way you have to then call qsort and avoid the chained-bucket problem.

How to compare strings of two files?

I am a new c-language programmer.
I have got two files.
One consists of lines like:
84:1b:5e:a8:bf:7f
00:8e:f2:c0:13:cc
Another consists of lines like:
00-22-39
8C-FD-F0
My question is how can I using C language compare first half of line in the first file with a line in the second file?
Like: is 84:1b:5e equals to 8C-FD-F0?
I know the way to create an arrays to store those lines for the further comparison. But do I really need to create arrays?
P.S: comparison is case-insensitive
You haven't been very clear about what rules constitute a match. But if you want to compare the byte values, then you need to parse each line, converting it to those byte values.
You could use variations of strtok() to get the values from each line. However, a variation of sscanf() might be easier. Once you have the binary values from each file, then you can compare them.
Read the second file completely and store the contents in a sorted array. Then for each line read from the first file, binary search the sorted array to locate the match.
Implementation is below. It compiles with gcc.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
int cmp(const void * s1, const void * s2)
{
return strcasecmp(*(char **)s1, *(char **)s2);
}
int cmp_half(const char * s1, const char * s2)
{
int i;
for (i = 0; i < 3; i++)
{
int res = strncasecmp((char *)s1+i*3, (char *)s2+i*3, 2);
if (res != 0) return res;
}
return 0;
}
char * line[1024];
int n = 0;
int search(const char * s)
{
int first, last, middle;
first = 0;
last = n - 1;
middle = (first+last)/2;
while( first <= last )
{
int res = cmp_half(s, line[middle]);
if (res == 0) return middle;
if (res > 0)
first = middle + 1;
else
last = middle - 1;
middle = (first + last)/2;
}
return -1;
}
int main()
{
FILE * f1, * f2;
char * s;
char buf[1024*1024], text[1024];
f1 = fopen("file1.txt", "rt");
f2 = fopen("file2.txt", "rt");
s = buf;
while (fgets(s, 1024, f2) != NULL)
{
line[n] = s;
s = s+strlen(s)+1;
n++;
}
qsort(line, n, sizeof(char *), cmp);
while (fgets(text, 1024, f1) != NULL)
{
text[strlen(text)-1] = 0;
int idx = search(text);
if (idx >= 0)
{
printf("%s matched %s\n", text, line[idx]);
}
else
{
printf("%s not matched\n", text);
}
}
return 0;
}

Resources