(char*)malloc(sizeof(char)) causing segmentation fault, how? - c

The code works fine on most inputs, but for userID's whih are very long I get a segmentation fault. My question is, how can malloc cause a segmentation fault? simply allocating memory shouldn't cause this. I found the problem area by using printf() statements, it seem the malloc within my read_line() function is where the problem is because the second "read_line" does not print, but the first before the malloc does.
thank you.
- Chris
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define DELIM " " /* the delimiter */
#define MAX_CHANGE (10.0/86400.0) /* 10kg/day */
/* seconds in a day is 24 hours * 60 minutes * 60 seconds */
#define MEM_OUT printf("%s","out of memory");
/* Defines Node structure. */
struct Node{
char *id;
float weight;
int time;
struct Node *next;
} *head, *p, *t, *last;
/* Constructor which returns a pointer to a new node. */
struct Node * new_node(int time, char *id, float w)
{ /*note malloc returns a pointer */
struct Node *node = (struct Node *)malloc(sizeof(struct Node));
node->time = time;
node->id = (char *)malloc( (strlen(id) + 1) * sizeof(char));
strcpy(node->id, id); //duplicate the id, so new node has own copy.
node->weight = w;
node->next = NULL;
return node;
}
/* reads in line of characters until either a EOF or '\n' is encountered
then places a the terminator '\0' at the end */
char * read_line(FILE *stream)
{
printf("read_line");
char * temp = (char*)malloc(sizeof(char));
printf("read_line");
char * line = (char*)malloc(sizeof(char));
char c;
*line = '\0';
int i = 1;
//strchr()
while( (c = getc(stream)) != EOF && c != '\n')
{
//if(c == EOF) return NULL;
//realloc(line,++i);
strcpy(temp,line);
line = malloc(++i * sizeof(char));
strcpy(line,temp);
temp = malloc(i * sizeof(char));
*(line + (i-1)) = '\0';
*(line + (i-2)) = c;
}
free(temp);
if( i == 1) return NULL;
return line;
}
main() {
int lasttime = 0, timestamp, duration, tokens;
char * userID = NULL;
char * lastuserID = NULL;
char * line = NULL;
float weight,lastweight,change,changePerTime;
head = new_node(0,"",0.0);
last = head;
FILE *fp = fopen("C:\\Users\\chris\\Desktop\\School\\York\\cse\\2031 Software Tools\\Labs\\6\\input.txt","r");
while( (line = read_line(fp)) != '\0') {
printf("%s\n",line);
//free(userID);
line = strtok(line, " \n");
if (line == NULL || sscanf(line,"%d",&timestamp) < 1 || timestamp == 0){
printf("%s\n","Invalid time");
continue;
}
line = strtok(NULL, " \n");
if(line == NULL || isdigit(line[0]) || line[0] == '.') {
printf("Illegal userID");
//free(line);
continue;
}
userID = (char * )malloc( (strlen(line)+1) * sizeof(char));
strcpy(userID,line);
strcat(userID," ");
do{
line = strtok(NULL," \n");
if(line != NULL && !isdigit(line[0]) && line[0] != '.'){
strcat(userID,line ); // adds ' ' and '\0'
strcat(userID," ");
}
}while(line != NULL && line[0] != '.' && !isdigit(line[0]) );
userID[strlen(userID)-1] = '\0'; //erases the tailing space.
if(strlen(userID) > 179){
printf("Illegal userID\n");
printf("mid");
continue;
printf("%s\n","after" );
}
if(line != NULL)
tokens = sscanf(line,"%f", &weight);
if(line == NULL || tokens < 1 || weight < 30.0 || weight > 300.0)
{printf("Illegal weight\n"); continue; }
if (lasttime >= timestamp){
printf("Nonmonotonic timestamps\n");
continue;
}
lasttime = timestamp;
// record is valid apst this point.
/* t = last occurence of this userID, p = last userID*/
for(p = head, t = NULL; p != NULL; p = p->next)
{
if(strcmp(userID,p->id) == 0)
t=p;
last = p; // set last to last p.
}
if(t == NULL)
printf("OK newuser\n");
else if(t != NULL)
{
duration = timestamp - t->time;
change = weight - t->weight;
changePerTime = change / duration;
if(changePerTime < -MAX_CHANGE || changePerTime > MAX_CHANGE)
printf("Suspiciously large weight change\n");
else
printf("OK\n");
}
/* add new node to end of list */
last->next = new_node(timestamp,userID,weight);
/* update lastnode */
last = last->next;
free(line);
}
fclose(fp);
/* count sum of id's for last valid user*/
int count=0;
for(p = head->next; p !=NULL; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
count++;
}
//fclose(f); // use if input from file is uncommented
// adds 1 to both demensions to hole axis
int tHeight = 11;
int tWidth = count + 1;
int qHeight = 10;
int qWidth= count;
/* builds the barchart */
char bc[tHeight][tWidth]; // + 1 for y-axis
/* draws axis and orgin */
int a,b;
for(a=0; a<tHeight; a++)
{
for(b=0;b<tWidth; b++)
{
if(a == qHeight && b == 0)
bc[a][b] = '+';
else if(a < tHeight && b == 0)
bc[a][b] = '|';
else if(a == qHeight && b > 0)
bc[a][b] = '-';
}
}
/* prints the bars */
int j=1, i, k, bh;
for(p = head; p != NULL, j < tWidth; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
{
for(i = 9, k=0, bh = (int)(p->weight / 30);i >= 0; i--)
{
if(k < bh)
{
bc[i][j] = '*';
k++; // barheight
}
else
bc[i][j] = ' ';
}
j++;
}
}
/* prints the barchart */
int m, n;
for(m=0; m < tHeight; m++)
{
for(n=0; n < tWidth; n++)
{
printf("%c",bc[m][n]);
}
printf("%c",'\n');
}
}

The malloc calls are not causing a segmentation fault. But your use of them later on could be.
Some Items of Note
Your printf("read line") statements will not print out immediately when called because the output is buffered. If you want them to print right away, do printf("read line\n"). You'll then see that both execute and your code that uses the tiny buffer you allocated will cause the crash.
In your while loop, you are doing more malloc calls and assigning the returns to variables, like temp and line, without freeing the prior memory pointers that temp and line hold, thus causing some memory leaks. Your commented out realloc was the better thought process: line = realloc(line, ++i * sizeof(*line));. Similarly for temp.
Memory Allocation Problem
One very problematic area is here:
userID = (char * )malloc( (strlen(line)+1) * sizeof(char));
strcpy(userID,line);
strcat(userID," ");
userID can hold the length of the string in line (strlen(line)) plus one more byte. But that one more byte is needed for the null terminator. Your strcat(userID, " ") will write past the length of the allocated buffer for userID by one byte.

I solved the problem! The issue wasn't the read_line function at all, it was the memory allocation for the userID string. Moving the malloc() for the user to the beginning of the loop fixed the problem.
The amount of memory allocated for the userID portion of the line was based on the the length of the whole line. eg: malloc(strlen(line)+2). However this was done after calling strtok() on the line a few times, which would allocate a memory block shorter than the length of the entire line. This is because strtok() places null terminators '\0' 's at every instance of the specified delimiter in the line, and strlen() only counts the length from the passed character pointer to the first '\0' it encounters.
Anyway, thanks for your help guys!
-Chris
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define DELIM " " /* the delimiter */
#define MAX_CHANGE (10.0/86400.0) /* 10kg/day */
/* seconds in a day is 24 hours * 60 minutes * 60 seconds */
/* Defines Node structure. */
struct Node{
char *id;
float weight;
int time;
struct Node *next;
} *head, *p, *t, *last;
/* Constructor which returns a pointer to a new node. */
struct Node * new_node(int time, char *id, float w)
{ /*note malloc returns a pointer */
struct Node *node = (struct Node *)malloc(sizeof(struct Node));
node->time = time;
node->id = malloc(strlen(id) + 1);
strcpy(node->id, id); //duplicate the id, so new node has own copy.
node->weight = w;
node->next = NULL;
return node;
}
/* reads in line of characters until either a EOF or '\n' is encountered
then places a the terminator '\0' at the end */
char * read_line(FILE *in)
{
int i = 1;
char * s = NULL;
char c;
do{
s = realloc(s,i); //strlen does not work on NULL strings
if(s == NULL || s == "")
{
printf("%s\n","out of memory");
exit(1);
}
*(s + (i-1)) = '\0'; // ensures null terminated
if(i > 1)
*(s + (i-2)) = c;
i++;
}
while( (c = getc(in)) != EOF && c != '\n' );
if (c == '\n')
return s;
else if(c == EOF)
return NULL;
}
main() {
int lasttime = 0, timestamp, duration, tokens;
char * userID = NULL;
char * lastuserID = NULL;
char * line = NULL;
float weight,lastweight,change,changePerTime;
head = new_node(0,"",0.0);
last = head;
FILE *fp = fopen("C:\\Users\\chris\\Desktop\\School\\York\\cse\\2031 Software Tools\\Labs\\6\\tests\\04.in","r");
while((line = read_line(fp)) != NULL) {
userID = malloc(strlen(line)+2); // max userID length is line length**
line = strtok(line, " \n");
if (line == NULL || sscanf(line,"%d",&timestamp) < 1 || timestamp == 0){
printf("%s\n","Invalid time");
continue;
}
line = strtok(NULL, " \n");
if(line == NULL || isdigit(line[0]) || line[0] == '.') {
printf("%s\n","Illegal userID");
//free(line);
continue;
}
strcpy(userID,line);
strcat(userID," ");
do{
line = strtok(NULL," \n");
if(line != NULL && !isdigit(line[0]) && line[0] != '.'){
strcat(userID,line ); // adds ' ' and '\0'
strcat(userID," ");
}
}while(line != NULL && line[0] != '.' && !isdigit(line[0]) );
userID[strlen(userID)-1] = '\0'; //erases the tailing space.
if(strlen(userID) > 179){
printf("Illegal userID\n");
free(userID);
free(line);
continue;
}
if(line != NULL)
tokens = sscanf(line,"%f", &weight);
if(line == NULL || tokens < 1 || weight < 30.0 || weight > 300.0)
{printf("Illegal weight\n"); continue; }
if (lasttime >= timestamp){
printf("Nonmonotonic timestamps\n");
continue;
}
lasttime = timestamp;
// record is valid apst this point.
/* t = last occurence of this userID, p = last userID*/
for(p = head, t = NULL; p != NULL; p = p->next)
{
if(strcmp(userID,p->id) == 0)
t=p;
last = p; // set last to last p.
}
if(t == NULL)
printf("OK newuser\n");
else if(t != NULL)
{
duration = timestamp - t->time;
change = weight - t->weight;
changePerTime = change / duration;
if(changePerTime < -MAX_CHANGE || changePerTime > MAX_CHANGE)
printf("Suspiciously large weight change\n");
else
printf("OK\n");
}
/* add new node to end of list */
last->next = new_node(timestamp,userID,weight);
/* update lastnode */
last = last->next;
free(line);
} // end of input loop
fclose(fp);
/* count sum of id's for last valid user*/
int count=0;
for(p = head->next; p !=NULL; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
count++;
}
//fclose(f); // use if input from file is uncommented
// adds 1 to both demensions to hole axis
int tHeight = 11;
int tWidth = count + 1;
int qHeight = 10;
int qWidth= count;
/* builds the barchart */
char bc[tHeight][tWidth]; // + 1 for y-axis
/* draws axis and orgin */
int a,b;
for(a=0; a<tHeight; a++)
{
for(b=0;b<tWidth; b++)
{
if(a == qHeight && b == 0)
bc[a][b] = '+';
else if(a < tHeight && b == 0)
bc[a][b] = '|';
else if(a == qHeight && b > 0)
bc[a][b] = '-';
}
}
/* prints the bars */
int j=1, i, k, bh;
for(p = head; p != NULL, j < tWidth; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
{
for(i = 9, k=0, bh = (int)(p->weight / 30);i >= 0; i--)
{
if(k < bh)
{
bc[i][j] = '*';
k++; // barheight
}
else
bc[i][j] = ' ';
}
j++;
}
}
/* prints the barchart */
int m, n;
for(m=0; m < tHeight; m++)
{
for(n=0; n < tWidth; n++)
{
printf("%c",bc[m][n]);
}
printf("%c",'\n');
}
}

Related

Heap block warning during the words counting of a program

I think there are some problems related to the memory and the heap corruption that don't allow my program to run properly (mainly because of some bug inside of it). The program just stops running, or crashes after its quit.
I'm trying to learn how trees work and for my case I have to write a cross-referencer that reads all the words in a document (in my case, the input line), and for each word, a list of the line numbers on which it occurs. For example:
foo
bar bar
foo bar
Should produce as output:
2 foo: [1, 3]
2 bar: [2, 3]
where the numbers inside the [] are the lines where our words are found.
There are 2 main issues with my code:
it only prints 1 inside the brackets, as if the program never checks the newline
if I try to run more than 10 lines of input it crashes. Without gdb it allows me to output all the lines I want, and won't crash until it reaches the 10 lines:
t
t
t
t
t
quit
5 t: [1, 1, 1, 1, 1]
When I run it with gdb, instead, it gives me this:
(gdb) r
Starting program: C:\...\6.exe
[New Thread 15276.0x14fc]
t
t
t
warning: HEAP[6.exe]:
warning: Heap block at 000001E191B97CA0 modified at 000001E191B97CB6 past requested size of 6
Thread 1 received signal SIGTRAP, Trace/breakpoint trap.
0x00007ff981f969ff in ntdll!RtlRegisterSecureMemoryCacheCallback () from C:\WINDOWS\SYSTEM32\ntdll.dl
(gdb) bt
#0 0x00007ff981f969ff in ntdll!RtlRegisterSecureMemoryCacheCallback ()
from C:\WINDOWS\SYSTEM32\ntdll.dll
#1 0x00007ff981f9288a in ntdll!RtlZeroHeap () from C:\WINDOWS\SYSTEM32\ntdll.dll
#2 0x00007ff981f61357 in ntdll!EtwLogTraceEvent () from C:\WINDOWS\SYSTEM32\ntdll.dll
#3 0x00007ff981f95839 in ntdll!RtlRegisterSecureMemoryCacheCallback ()
from C:\WINDOWS\SYSTEM32\ntdll.dll
#4 0x00007ff981f4de29 in ntdll!EtwLogTraceEvent () from C:\WINDOWS\SYSTEM32\ntdll.dll
#5 0x00007ff981ed24b7 in ntdll!RtlReAllocateHeap () from C:\WINDOWS\SYSTEM32\ntdll.dll
#6 0x00007ff981ed237a in ntdll!RtlReAllocateHeap () from C:\WINDOWS\SYSTEM32\ntdll.dll
#7 0x00007ff97fb71a89 in ucrtbase!_realloc_base () from C:\WINDOWS\System32\ucrtbase.dll
#8 0x00007ff71ff81bbe in addtree ()
#9 0x00007ff71ff81a4e in main ()
I didn't even type quit (the word to break the loop) and it just stopped by giving me this warning.
I don't know how to fix this, because probably I am forgetting to free something (there is some heap allocation), but I have no idea on where the problem may be.
This is the code:
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFSIZE 100
#define MAXWORD 100
#define IS_NOISE_WORD(word) \
(strcmp(word, "a") == 0 || \
strcmp(word, "an") == 0 || \
strcmp(word, "the") == 0 || \
strcmp(word, "and") == 0 || \
strcmp(word, "or") == 0 || \
strcmp(word, "in") == 0 || \
strcmp(word, "of") == 0 || \
strcmp(word, "to") == 0 || \
strcmp(word, "is") == 0 || \
strcmp(word, "are") == 0 || \
strcmp(word, "was") == 0 || \
strcmp(word, "were") == 0 || \
strcmp(word, "be") == 0 || \
strcmp(word, "been") == 0 || \
strcmp(word, "being") == 0 || \
strcmp(word, "have") == 0 || \
strcmp(word, "has") == 0 || \
strcmp(word, "had") == 0 || \
strcmp(word, "having") == 0)
/* etc. */
#define IS_NOT_NOISE_WORD(word) (!IS_NOISE_WORD(word))
/* the tree node */
struct tnode {
char *word; /* points to the text */
int count; /* number of occurrences */
int *lines; /* lines where the word occurs */
struct tnode *left; /* left child */
struct tnode *right; /* right child */
};
char buf[BUFSIZE]; /* buffer for ungetch */
int bufp = 0; /* next free position in buf */
/* char *strdup(char *); */
int getword(char *, int);
struct tnode *addtree(struct tnode *, char *, int);
void tfree(struct tnode *);
void treeprint(struct tnode *);
/* word frequency count */
int main(int argc, char *argv[])
{
struct tnode *root = NULL;
char word[MAXWORD];
int n = 1; /* number of lines */
while (getword(word, MAXWORD) != EOF)
{
if (word[0] == '\n')
n++;
/* if there is a word and it's not a noise */
if (isalpha(word[0]) && IS_NOT_NOISE_WORD(word) && strcmp(word, "quit") != 0 && strcmp(word, "exit") != 0)
root = addtree(root, word, n);
if (!strcmp(word, "quit") || !strcmp(word, "exit"))
break;
}
treeprint(root);
tfree(root);
return 0;
}
/* addtree: add a node with the word w at line l, at or below p */
struct tnode *addtree(struct tnode *p, char *w, int l)
{
int cond;
/* a new word has arrived */
if (p == NULL)
{
/* make a new node */
p = malloc(sizeof(struct tnode));
p->word = strdup(w);
p->count = 1;
p->lines = calloc(p->count + 1, sizeof(int));
p->lines[p->count - 1] = l;
p->left = p->right = NULL;
}
else {
cond = strcmp(w, p->word);
if (cond == 0) {
/* repeated word */
p->count++;
p->lines = realloc(p->lines, p->count + 1 * sizeof(int));
p->lines[p->count - 1] = l;
}
else if (cond < 0) {
/* less than into left subtree */
p->left = addtree(p->left, w, l);
}
else {
/* greater than into right subtree */
p->right = addtree(p->right, w, l);
}
}
return p;
}
/* tfree: free a tnode */
void tfree(struct tnode *p)
{
if (p == NULL)
return;
tfree(p->left);
tfree(p->right);
free(p);
if (p->word != NULL) {
free(p->word);
p->word = NULL;
}
if (p->lines != NULL) {
free(p->lines);
p->lines = NULL;
}
}
/* treeprint: in-order print of tree p */
void treeprint(struct tnode *p)
{
int i;
if (p != NULL) {
treeprint(p->left);
printf("%4d %s: [%d", p->count, p->word, p->lines[0]);
for (i = 1; i < p->count; i++)
printf(", %d", p->lines[i]);
printf("]\n");
treeprint(p->right);
}
}
/* getword: get next word or character from input */
int getword(char *word, int lim)
{
char *w = word;
int c, getch(void);
void ungetch(int);
int in_comment = 0; /* 1 if inside a comment */
int in_pp_line = 0; /* 1 if inside a preprocessor line */
int in_string = 0; /* 1 if inside a string */
/* skip spaces */
while (isspace(c = getch()))
;
if (c != EOF)
*w++ = c;
/* not underscore, pp line, comment, string */
if (!isalpha(c) && c != '_' && c != '\"' && c != '#' && c != '/') {
*w = '\0';
return c;
}
if (c == '\"')
in_string = 1;
if (c == '#')
in_pp_line = 1;
/* it only checks single line comments for now */
if (c == '/') {
if ((c = getch()) == '/')
in_comment = 1;
else
ungetch(c);
}
while (--lim > 0)
{
c = getch();
if (in_comment && (c == '\n'))
in_comment = 0;
if (in_pp_line && (c == '\n'))
in_pp_line = 0;
/* if the char is in a string or in a comment or in a pp line, and is not alphanumeric */
if (!isalnum(c) && c != '_' && (in_string == 1 || c != '\"') && !in_pp_line && !in_comment)
{
ungetch(c);
break;
}
if (c == '/' && *(w - 1) == '/')
in_comment = 1;
if (c == '\"')
in_string = (in_string == 1) ? 0 : 1;
*w++ = c;
}
*w = '\0';
return word[0];
}
/* get a (possibly pushed-back) character */
int getch(void) {
return (bufp > 0) ? buf[--bufp] : getchar();
}
/* push character back on input */
void ungetch(int c) {
if (bufp >= BUFSIZE)
printf("ungetch: too many characters\n");
else
buf[bufp++] = c;
}
Besides the crash issues I don't get why the n count doesn't increase. Is the getword function not returning '\n' at all?
tfree(): It's undefined behavior to deference a pointer after it's freed. Also, there is no point of setting p->word and p->lines to NULL when you free(p).
void tfree(struct tnode *p) {
if (!p)
return;
tfree(p->left);
tfree(p->right);
if (p->word)
free(p->word);
if (p->lines)
free(p->lines);
free(p);
}
addtree(): * has higher precedence than +. It should be:
p->lines = realloc(p->lines, (p->count + 1) * sizeof(int));
but as you increment p->count the line before you just want:
p->lines = realloc(p->lines, p->count * sizeof(int));
There is a similar logic error in the call to calloc().
valgrind is happy after I fix these these two issues, and I cannot reproduce the crash with 10 lines of input.
getword(): line numbers don't advance as you skip the \n from the last line with:
while (isspace(c = getch()))
yet caller expect word[0] to be a '\n' to advance the line number n. Here is a minimal fix:
do {
c = getch();
} while(c != '\n' && isspace(c));
and the output is now:
3 bar: [2, 2, 3]
2 foo: [1, 3]
That said I suggest you have caller read a line with fgets() then split that line into words with a revised version of getwork().
(not fixed) getword(): It's problematic that you 3 separate calls to getch() unless you really want to handle input differently in each case.
addtree(): You currently record duplicate lines for a given word but you want subsequent duplicates lines to be no-op it seems. Also, might as well just use malloc() instead of calloc() as you explicitly set p->lines right after. Prefer using variable instead of type to sizeof().
struct tnode *addtree(struct tnode *p, char *w, int l) {
if (!p) {
/* make a new node */
p = malloc(sizeof *p);
p->word = strdup(w);
p->count = 1;
p->lines = malloc(sizeof *p->lines);
p->lines[p->count - 1] = l;
p->left = NULL;
p->right = NULL;
return p;
}
int cond = strcmp(w, p->word);
if(cond < 0)
p->left = addtree(p->left, w, l);
else if(!cond && l != p->lines[p->count - 1]) {
p->count++;
p->lines = realloc(p->lines, p->count * sizeof(int));
p->lines[p->count - 1] = l;
} else if(cond > 0)
p->right = addtree(p->right, w, l);
return p;
}
and the output is now:
2 bar: [2, 3]
2 foo: [1, 3]
(not fixed) p = realloc(p, ...) leaks p if realloc() fails. It should be:
int *tmp = realloc(p->lines, (p->count + 1) * sizeof(int));
if(!tmp) {
// handle error
return NULL;
}
p->lines = tmp;
(not fixed) malloc(), calloc(), strdup() may fail and return NULL. You want to check for that:
p = malloc(sizeof(struct tnode));
if(!p) {
// handle error
return NULL;
}

malloc() in C returns populated memory

char *string = (char *) malloc(sizeof(char) * sz);
code right before this->void insert_word(word *root, char string1[], int linenumber) { int sz=strlen(string1)<=MAX_WORD_LENGTH?strlen(string1):MAX_WORD_LENGTH; Code block 3 has the entire context
Sometimes malloc() returns a populated memory location while using it.
What bothers me is that this is not random.
(This program consists of taking words from a file and passing them to this function. For THE SAME WORD, the function behaviour(in particular that of malloc()) is different.
For the inputs
string1=0x7fffffffdf10 "lol" root=BST, sz gets a value of 3
The value allocated to string by malloc() is 0x55555555c510 "\340\305UUUU" Why is malloc not pointing to an empty memory location? (This is not random behaviour, it is predictable and repeatable)
Furthermore,this loop runs an infinite amount of time for some reason
while(strcmp(string1,string)!=0)
{
free(string);
string=NULL;
string = (char *) malloc(sizeof(char) * sz);
strncpy(string,string1,sz);
}
MORE RELAVANT CODE
#define MAX_WORD_LENGTH 20
Definition of the structures
typedef struct linkedList
{
int number;
struct linkedList *next;
}list;
typedef struct word_with_count
{
char* string;
list *linenumbers;
struct word_with_count *left;
struct word_with_count *right;
}word;```
[3] ) The function
void insert_word(word *root, char string1[], int linenumber) {
int sz=strlen(string1)<=MAX_WORD_LENGTH?strlen(string1):MAX_WORD_LENGTH;
char *string = (char *) malloc(sizeof(char) * sz);
strncpy(string,string1,sz);
if (root==NULL) {
return;
} else if (strcmp(string, root->string) < 0) {
if (root->left == NULL) {
root->left = createword(string, linenumber);
} else {
insert_word(root->left, string, linenumber);
}
} else if (strcmp(string, root->string) > 0) {
if (root->right == NULL) {
root->right = createword(string, linenumber);
} else {
insert_word(root->right, string, linenumber);
}
} else {
append_list(linenumber, root->linenumbers);
}
free(string);
}
main() which calls this function
int main() {
char path[MAX_PATH_LENGTH];
FILE *fp;
fgets(path, MAX_PATH_LENGTH, stdin);
if (strlen(path) > 0 && path[strlen(path) - 1] == '\n')
path[strlen(path) - 1] = '\0';
fp = fopen(path, "r");
if (fp == NULL) {
printf("File not found\n");
return 0;
}
char ch;
int line_count = 1;
char current_word[MAX_WORD_LENGTH] = "";
word *root = NULL;
while (!feof(fp)) {
ch = fgetc(fp);
//printf("%c", ch);
if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
if (ch >= 'A' && ch <= 'Z')
ch = ch - 'A' + 'a';
strncat(current_word, &ch, 1);
} else if (ch == '-') {
continue;
} else {
if (strlen(current_word) > 2) {
if (root == NULL) {
root = createword(current_word, line_count);
} else {
insert_word(root, current_word, line_count);
}
}
memset(current_word, 0, sizeof(current_word));
if (ch == '\n') {
line_count++;
}
}
}
if (strlen(current_word) > 2) {
if (root == NULL) {
root = createword(current_word, line_count);
} else {
insert_word(root, current_word, line_count);
}
}
fclose(fp);
// print_tree(root);
//printf("\n");
//print_tree(root);
int status=delete_low_ocurrence(root, NULL, 3);
if (status == -1)root = NULL;
print_tree(root);
freetree(root);
return 0;
}
5)Auxilary function used by this function
word* createword(char string[], int linenumber)
{
word *newword = (word*)malloc(sizeof(word));
int sz=strlen(string)<=MAX_WORD_LENGTH?strlen(string):MAX_WORD_LENGTH;
newword->string = (char*)malloc(sizeof(char)*sz);
strncpy(newword->string, string,sz);
newword->linenumbers = (list*)malloc(sizeof(list));
newword->linenumbers->number = linenumber;
newword->linenumbers->next = NULL;
newword->left = NULL;
newword->right = NULL;
return newword;
}
Textfile given as input
much2f
much3f
lol
lol
lol
qwertyuiopasdfghjklzxcvbnmqwertyuiop
qwertyuiopasdfghjklzxcvbnmqwertyuiop
qwertyuiopasdfghjklzxcvbnmqwertyuiop
qwertyuiopasdfghjklzxcvbnmqwertyuiop
Why is malloc not pointing to an empty memory location?
Because it can. The content of the allocated memory via malloc() is not specified.
If code needs zeroed out memory, see calloc().
Bad code
strncpy(string,string1,sz) does not result in string being a string as it may lack null character termination. The following (strcmp(string... is then undefined behavior. Instead, do not use strncpy(), use strcpy() and make certain the prior allocation has enough room for the terminating null character.
strncpy(string,string1,sz);
...
} else if (strcmp(string, root->string) < 0) { // bad
Repaired code
word* createword(const char string[], int linenumber) {
word *newword = calloc(1, sizeof *newword);
size_t length = strlen(string);
if (length > MAX_WORD_LENGTH) {
length = MAX_WORD_LENGTH;
}
char *s = malloc(length + 1); // Include room for the \0
list *linenumbers = calloc(1, sizeof *linenumbers);
// Test allocation success
if (newword == NULL || s == NULL || linenumbers == NULL) {
free(newword);
free(s);
free(linenumbers);
return NULL;
}
memcpy(s, string, length); // Only copy the first 'length' characters.
s[length] = 0;
newword->string = s;
newword->linenumbers = linenumbers;
newword->linenumbers->number = linenumber;
newword->linenumbers->next = NULL;
newword->left = NULL;
newword->right = NULL;
return newword;
}
Why is “while ( !feof (file) )” always wrong?
feof(fp) improperly used here. fgetc() returns 257 different values. Do not use char ch.
//char ch;
//...
//while (!feof(fp)) {
// ch = fgetc(fp);
int ch;
...
while ((ch = fgetc(fp)) != EOF) {;
This is quite normal behaviour. 'malloc' just does the memory allocation, it makes no commitment on what's already in that memory location. What you probably need is 'calloc', which clears the memory and then allocates it to your program.

Memory leak caused by realloc in valgrind C

I have a my_string object made that contains a char pointer, a size, and a capacity. I am trying to read words from a dictionary text file into the my_string object and print them to the screen. The default capacity is 7, so when I read a word that is longer than 7 characters, I need to reallocate some space at the end of the string. I haven't been able to implement this functionality correctly yet. It seems to work correctly, but when I run it in valgrind, there is apparently a memory leak somewhere in my code. Not sure where this is coming from, because I free the entire string after the program runs. If someone could help me with this you'd be a lifesaver. Here is my driver code, and my my_string.c file:
main.c
#include <stdio.h>
#include <stdlib.h>
#include "my_string.h"
int main(int argc, char* argv[]) {
MY_STRING hMy_String = NULL;
FILE* fp;
hMy_String = my_string_init_default();
fp = fopen("dictionary.txt", "r");
int len;
while(my_string_extraction(hMy_String, fp)) {
len = my_string_get_size(hMy_String);
if(len == 8){
my_string_insertion(hMy_String, stdout);
printf("\n");
if(fgetc(fp) == ' '){
printf("Found a space after the string\n");
}
}
}
my_string_destroy(&hMy_String);
fclose(fp);
return 0;
}
my_string.c
#include <stdio.h>
#include <stdlib.h>
#include "my_string.h"
struct my_string {
int size;
int capacity;
char* data;
};
typedef struct my_string My_String;
MY_STRING my_string_init_default(void){
//default capacity of string is 7
//initializes pointer to My_String, set to NULL for good practice
My_String* pMy_String = NULL;
//allocates memory for default string
pMy_String = (My_String*)malloc(sizeof(My_String));
if(pMy_String != NULL) {
pMy_String->size = 0;
pMy_String->capacity = 7;
pMy_String->data = (char*)malloc(sizeof(char) * pMy_String->capacity);
if(pMy_String->data == NULL) {
free(pMy_String);
pMy_String = NULL;
}
}
//returns copy of address to default string
return pMy_String;
}
MY_STRING my_string_init_c_string(char* c_string) {
int i = 0;
//initializes a pointer to a My_String
My_String* theString = NULL;
//loops through string, for every character, i increases 1
while((*c_string) != '\0'){
c_string++;
i++;
}
//resets c_strings value from before the loop
c_string = c_string - i;
i++; //i needs to be 1 greater than the length of the string
//allocation
theString = malloc(sizeof(My_String) + i);
//if there was an error, return NULL
if(theString == NULL)
return NULL;
else {
//sets the values of the object to the given values
(*theString).size = (i - 1);
(*theString).capacity = i;
(*theString).data = c_string;
}
//returns address of the initialized string
return theString;
}
int my_string_get_capacity(MY_STRING hMy_string){
return sizeof(hMy_string);
}
int my_string_get_size(MY_STRING hMy_string) {
char* str = (char*) hMy_string;
int size = 0;
while(str[size] != '\0')
size++;
return size;
}
int my_string_compare(MY_STRING hLeft_string, MY_STRING hRight_string) {
// sets the strings to pointers to the My_String data type
My_String* Left = hLeft_string;
My_String* Right = hRight_string;
//variables for lexicographical value of each string
int left_lex = 0;
int right_lex = 0;
//loops through left string, adds up left_lex value
while((*Left->data) != '\0') {
left_lex += (*Left->data);
(*Left).data++;
}
//loops through right string, adds up right_lex value
while((*Right->data) != '\0') {
right_lex += (*Right->data);
(*Right).data++;
}
//does the comparison and returns the corrosponding value
if(left_lex < right_lex){
return -1;
}else if (left_lex == right_lex) {
return 0;
}else {
return 1;
}
}
Status my_string_extraction(MY_STRING hMy_string, FILE* fp) {
My_String* pMy_string = hMy_string;
int start = 0, end = 0;
char ch;
int strLength, capacity, i;
while(!feof(fp)){
ch = fgetc(fp);
if(ch == ' ' || ch == '\t' || ch == '\n' ||ch == '\r') {
continue;
}
else {
start = ftell(fp) - 1;
end = start + 1;
break;
}
}
if(end == 0){
return FAILURE;
}
while(!feof(fp)) {
ch = fgetc(fp);
end++;
if(ch == ' ' || ch == '\t' || ch == '\n' ||ch == '\r'){
break;
}
}
strLength = end - start - 1;
(*pMy_string).size = strLength;
if(strLength == 0){
return FAILURE;
}
capacity = my_string_get_capacity(hMy_string);
if(strLength >= capacity){
hMy_string = realloc(hMy_string, strLength + 1);
}
fseek(fp, start, SEEK_SET);
char* str = (char*)hMy_string;
for(i = 0; i < strLength; i++) {
str[i] = fgetc(fp);
}
str[i] = '\0';
return SUCCESS;
}
Status my_string_insertion(MY_STRING hMy_string, FILE* fp) {
char* str = (char*)hMy_string;
if(fprintf(fp, "%s", str))
return SUCCESS;
else
return FAILURE;
}
void my_string_destroy(MY_STRING* phMy_string){
free(*phMy_string);
*phMy_string = NULL;
}

Printing top 10 recurring words in a file

Edited question:
Hi guys, my goal is to print the top 10 occurring words in a file, I have managed to get everything to work from reading the file to counting word occurrences and printing it, but when I implement my qsort I get a segfault. I looked over my pointers and they look okay to me, I would appreciate any feedback.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX 51
struct words
{
char *ch;
int index;
struct words *pNext;
};
struct words* createWordCounter(char *ch)
{
struct words *pCounter = NULL;
pCounter = (struct words*)malloc(sizeof(char));
pCounter->ch = (char*)malloc(strlen(ch)+1);
strcpy(pCounter->ch, ch);
pCounter->index = 1;
pCounter->pNext = NULL;
return pCounter;
}
struct words *pStart = NULL;
char* removePunc(struct words* ch)
{
char *src = ch, *dst = ch;
while (*src)
{
if (ispunct((unsigned char)*src))
{
src++;
}
else if (isupper((unsigned char)*src))
{
*dst++ = tolower((unsigned char)*src);
src++;
}
else if (src == dst)
{
src++;
dst++;
}
else
{
*dst++ = *src++;
}
}
*dst = 0;
}
void addWord(char *word)
{
struct words *pCounter = NULL;
struct words *pLast = NULL;
if(pStart == NULL)
{
pStart = createWordCounter(word);
return;
}
pCounter = pStart;
while(pCounter != NULL)
{
if(strcmp(word, pCounter->ch) == 0)
{
++pCounter->index;
return;
}
pLast = pCounter;
pCounter = pCounter->pNext;
}
pLast->pNext = createWordCounter(word);
}
void printWord(struct words *pCounter)
{
printf("\n%-30s %5d\n", pCounter->ch, pCounter->index);
}
//sort
int compare (const void * a, const void * b){
struct words *A1 = (struct words *)a;
struct words *B1 = (struct words *)b;
return B1->index - A1->index;
/*
if ((A1->count - B1->count) > 0)
return -1;
else if ((A1->count - B2->count) < 0)
return 1;
else
return 0;
*/
}
int main(int argc, char * argv[])
{
struct words *pCounter = NULL;
char temp[MAX];
FILE *fpt;
if(argc == 2)
{
printf("File name is: %s\n",argv[1]);
fpt = fopen(argv[1], "r");
//fail test
if(fpt == NULL)
{
printf("cannot open file, exiting program...\n");
exit(0);
}
//get the data out of the file and insert in struct
int wordCounter = 0;
int i = 0;
int lines = 0;
while((fscanf(fpt, "%s ", &temp)) == 1)
{
removePunc(temp);
addWord(temp);
if(temp == ' ')
i++;
if(temp == '\n')
lines++;
wordCounter++;
}
/*
pCounter = pStart;
while(pCounter != NULL)
{
printWord(pCounter);
pCounter = pCounter->pNext;
}
*/
//sort
qsort(pCounter, wordCounter, sizeof(struct words), compare);
for(int j = 0; i < 10; i++)
{
printWord(pCounter);
}
}
fclose(fpt);
return 0;
}
First temp is already a pointer, so do not include '&' before it in fscanf. Second, don't skimp on buffer size (e.g. #define MAX 1024). Third, protect your array bounds with the field-width modifier and don't put trailing whitespace in your format-string.
Putting it altogether (presuming you use 1024 as MAX, you can use
fscanf(fpt, "1023%s", temp))
Well done on checking the return of fscanf during your read.
Adding to the things that have already been mentioned.
In createWordCounter(...)
pCounter = (struct words*)malloc(sizeof(char));
you are allocating memory for a char. Even though the pointer to a struct is the pointer to its first member, the first element of words is a pointer to a char. It is better to be careful and write
struct words *pCounter = malloc(sizeof *pCounter);
Also, be mindful of operator precedence.
In addWord(...) you have
++pCounter->index;
What that does is increment the pointer pCounter before accessing index. If you are trying to increment index, it should be
++(pCounter->index);
or
pCounter->index++;
I recommend striping your program down to its bare essentials and test each part one at a time systematically to narrow down the cause of your errors.
I think the main problem is the size of temp array when you try to using fscanf.
while((fscanf(fpt, "%s ", temp)) == 1)
When the length of one line is bigger than MAX, segmentation fault occur.
You can change your code like this
#define SCANF_LEN2(x) #x
#define SCANF_LEN(x) SCANF_LEN2(x)
//...
//your original code
//...
while((fscanf(fpt, "%"SCANF_LEN(MAX)"s ", temp)) == 1)
By the way, you should check
(1) compile warning about type
char* removePunc(struct words* ch)
should be char* removePunc(char *ch)
if(temp == ' ') should be if(temp[0] == ' ')
if(temp == '\n') should be if(temp[0] == '\n')
(2) malloc size
pCounter = (struct words*)malloc(sizeof(char)); should be pCounter = (struct words*)malloc(sizeof(struct words));
(3) remember free after using malloc

Dynamic memory allocation in C using realloc

I have read the other SO question about using realloc to get a new pointer to the beginning of a bigger memory address space but I cant figure out what I am doing wrong. It prints a backtrace and memory dump. I later try to access strhldr but I dont think it even gets that far.
char *strhldr = (char *)malloc(strsize);
int chrctr = 0;
if(chrctr == strsize - 3){ // when you get close
strsize = strsize*2; //double size
char *temp = realloc(strhldr, strsize); //make more room
if(temp == NULL)
printf("reallocate failed\n");
else{
strhldr = temp;
free(temp); // removed same issue
}
}
// Later attempt to add it to an array
cmdargs[i] = strhldr;
This is all within a while loop where chrctr and strsize get incremented
complete code
int argctr = 64;
char **cmdargs = (char **) malloc(argctr * sizeof(char*));
char c = getchar();
int i = 0;
while(c != '\n' && c != EOF){ //read through a String of stdin
int strsize = 32;
char *strhldr = (char *)malloc(strsize);
char *strstarthldr = strhldr;
if(c == ' ')
c = getchar();
while(c != ' ' && c != '\n' && c != EOF){
int chrctr = 0;
if(chrctr == strsize - 3){ // when you get close
strsize = strsize*2; //double size
char *temp = realloc(strhldr, strsize); //make more room
if(temp == NULL)
printf("reallocate failed\n");
else
strhldr = temp;
} //add that word to the array of strings
strhldr[chrctr] = c;
chrctr++;
c = getchar();
}
strhldr[charctr] = '\0';
//strhldr = strstarthldr;
cmdargs[i] = strhldr;
i++;
}
On success, realloc will free its argument if needed. So remove the call to free(temp).
It's not very clear to me what you are trying to do but I believe free(temp); invalidates strhldr and future read/write access to it will cause you trouble.
Second problem - your value charctr (not chrctr) is not set. Here is a version of your loop. I haven't tested it but it should be close.
if(c == ' ') {
c = getchar();
}
int chrctr = 0;
while(c != ' ' && c != '\n' && c != EOF){
if(chrctr == strsize - 3){ // when you get close
strsize = strsize*2; //double size
char *temp = realloc(strhldr, strsize); //make more room
if(temp == NULL) {
printf("reallocate failed\n");
break;
}
else {
strhldr = temp;
}
}
strhldr[chrctr] = c;
chrctr++;
c = getchar();
}
strhldr[chrctr] = 0;

Resources