Heap block warning during the words counting of a program - c

I think there are some problems related to the memory and the heap corruption that don't allow my program to run properly (mainly because of some bug inside of it). The program just stops running, or crashes after its quit.
I'm trying to learn how trees work and for my case I have to write a cross-referencer that reads all the words in a document (in my case, the input line), and for each word, a list of the line numbers on which it occurs. For example:
foo
bar bar
foo bar
Should produce as output:
2 foo: [1, 3]
2 bar: [2, 3]
where the numbers inside the [] are the lines where our words are found.
There are 2 main issues with my code:
it only prints 1 inside the brackets, as if the program never checks the newline
if I try to run more than 10 lines of input it crashes. Without gdb it allows me to output all the lines I want, and won't crash until it reaches the 10 lines:
t
t
t
t
t
quit
5 t: [1, 1, 1, 1, 1]
When I run it with gdb, instead, it gives me this:
(gdb) r
Starting program: C:\...\6.exe
[New Thread 15276.0x14fc]
t
t
t
warning: HEAP[6.exe]:
warning: Heap block at 000001E191B97CA0 modified at 000001E191B97CB6 past requested size of 6
Thread 1 received signal SIGTRAP, Trace/breakpoint trap.
0x00007ff981f969ff in ntdll!RtlRegisterSecureMemoryCacheCallback () from C:\WINDOWS\SYSTEM32\ntdll.dl
(gdb) bt
#0 0x00007ff981f969ff in ntdll!RtlRegisterSecureMemoryCacheCallback ()
from C:\WINDOWS\SYSTEM32\ntdll.dll
#1 0x00007ff981f9288a in ntdll!RtlZeroHeap () from C:\WINDOWS\SYSTEM32\ntdll.dll
#2 0x00007ff981f61357 in ntdll!EtwLogTraceEvent () from C:\WINDOWS\SYSTEM32\ntdll.dll
#3 0x00007ff981f95839 in ntdll!RtlRegisterSecureMemoryCacheCallback ()
from C:\WINDOWS\SYSTEM32\ntdll.dll
#4 0x00007ff981f4de29 in ntdll!EtwLogTraceEvent () from C:\WINDOWS\SYSTEM32\ntdll.dll
#5 0x00007ff981ed24b7 in ntdll!RtlReAllocateHeap () from C:\WINDOWS\SYSTEM32\ntdll.dll
#6 0x00007ff981ed237a in ntdll!RtlReAllocateHeap () from C:\WINDOWS\SYSTEM32\ntdll.dll
#7 0x00007ff97fb71a89 in ucrtbase!_realloc_base () from C:\WINDOWS\System32\ucrtbase.dll
#8 0x00007ff71ff81bbe in addtree ()
#9 0x00007ff71ff81a4e in main ()
I didn't even type quit (the word to break the loop) and it just stopped by giving me this warning.
I don't know how to fix this, because probably I am forgetting to free something (there is some heap allocation), but I have no idea on where the problem may be.
This is the code:
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUFSIZE 100
#define MAXWORD 100
#define IS_NOISE_WORD(word) \
(strcmp(word, "a") == 0 || \
strcmp(word, "an") == 0 || \
strcmp(word, "the") == 0 || \
strcmp(word, "and") == 0 || \
strcmp(word, "or") == 0 || \
strcmp(word, "in") == 0 || \
strcmp(word, "of") == 0 || \
strcmp(word, "to") == 0 || \
strcmp(word, "is") == 0 || \
strcmp(word, "are") == 0 || \
strcmp(word, "was") == 0 || \
strcmp(word, "were") == 0 || \
strcmp(word, "be") == 0 || \
strcmp(word, "been") == 0 || \
strcmp(word, "being") == 0 || \
strcmp(word, "have") == 0 || \
strcmp(word, "has") == 0 || \
strcmp(word, "had") == 0 || \
strcmp(word, "having") == 0)
/* etc. */
#define IS_NOT_NOISE_WORD(word) (!IS_NOISE_WORD(word))
/* the tree node */
struct tnode {
char *word; /* points to the text */
int count; /* number of occurrences */
int *lines; /* lines where the word occurs */
struct tnode *left; /* left child */
struct tnode *right; /* right child */
};
char buf[BUFSIZE]; /* buffer for ungetch */
int bufp = 0; /* next free position in buf */
/* char *strdup(char *); */
int getword(char *, int);
struct tnode *addtree(struct tnode *, char *, int);
void tfree(struct tnode *);
void treeprint(struct tnode *);
/* word frequency count */
int main(int argc, char *argv[])
{
struct tnode *root = NULL;
char word[MAXWORD];
int n = 1; /* number of lines */
while (getword(word, MAXWORD) != EOF)
{
if (word[0] == '\n')
n++;
/* if there is a word and it's not a noise */
if (isalpha(word[0]) && IS_NOT_NOISE_WORD(word) && strcmp(word, "quit") != 0 && strcmp(word, "exit") != 0)
root = addtree(root, word, n);
if (!strcmp(word, "quit") || !strcmp(word, "exit"))
break;
}
treeprint(root);
tfree(root);
return 0;
}
/* addtree: add a node with the word w at line l, at or below p */
struct tnode *addtree(struct tnode *p, char *w, int l)
{
int cond;
/* a new word has arrived */
if (p == NULL)
{
/* make a new node */
p = malloc(sizeof(struct tnode));
p->word = strdup(w);
p->count = 1;
p->lines = calloc(p->count + 1, sizeof(int));
p->lines[p->count - 1] = l;
p->left = p->right = NULL;
}
else {
cond = strcmp(w, p->word);
if (cond == 0) {
/* repeated word */
p->count++;
p->lines = realloc(p->lines, p->count + 1 * sizeof(int));
p->lines[p->count - 1] = l;
}
else if (cond < 0) {
/* less than into left subtree */
p->left = addtree(p->left, w, l);
}
else {
/* greater than into right subtree */
p->right = addtree(p->right, w, l);
}
}
return p;
}
/* tfree: free a tnode */
void tfree(struct tnode *p)
{
if (p == NULL)
return;
tfree(p->left);
tfree(p->right);
free(p);
if (p->word != NULL) {
free(p->word);
p->word = NULL;
}
if (p->lines != NULL) {
free(p->lines);
p->lines = NULL;
}
}
/* treeprint: in-order print of tree p */
void treeprint(struct tnode *p)
{
int i;
if (p != NULL) {
treeprint(p->left);
printf("%4d %s: [%d", p->count, p->word, p->lines[0]);
for (i = 1; i < p->count; i++)
printf(", %d", p->lines[i]);
printf("]\n");
treeprint(p->right);
}
}
/* getword: get next word or character from input */
int getword(char *word, int lim)
{
char *w = word;
int c, getch(void);
void ungetch(int);
int in_comment = 0; /* 1 if inside a comment */
int in_pp_line = 0; /* 1 if inside a preprocessor line */
int in_string = 0; /* 1 if inside a string */
/* skip spaces */
while (isspace(c = getch()))
;
if (c != EOF)
*w++ = c;
/* not underscore, pp line, comment, string */
if (!isalpha(c) && c != '_' && c != '\"' && c != '#' && c != '/') {
*w = '\0';
return c;
}
if (c == '\"')
in_string = 1;
if (c == '#')
in_pp_line = 1;
/* it only checks single line comments for now */
if (c == '/') {
if ((c = getch()) == '/')
in_comment = 1;
else
ungetch(c);
}
while (--lim > 0)
{
c = getch();
if (in_comment && (c == '\n'))
in_comment = 0;
if (in_pp_line && (c == '\n'))
in_pp_line = 0;
/* if the char is in a string or in a comment or in a pp line, and is not alphanumeric */
if (!isalnum(c) && c != '_' && (in_string == 1 || c != '\"') && !in_pp_line && !in_comment)
{
ungetch(c);
break;
}
if (c == '/' && *(w - 1) == '/')
in_comment = 1;
if (c == '\"')
in_string = (in_string == 1) ? 0 : 1;
*w++ = c;
}
*w = '\0';
return word[0];
}
/* get a (possibly pushed-back) character */
int getch(void) {
return (bufp > 0) ? buf[--bufp] : getchar();
}
/* push character back on input */
void ungetch(int c) {
if (bufp >= BUFSIZE)
printf("ungetch: too many characters\n");
else
buf[bufp++] = c;
}
Besides the crash issues I don't get why the n count doesn't increase. Is the getword function not returning '\n' at all?

tfree(): It's undefined behavior to deference a pointer after it's freed. Also, there is no point of setting p->word and p->lines to NULL when you free(p).
void tfree(struct tnode *p) {
if (!p)
return;
tfree(p->left);
tfree(p->right);
if (p->word)
free(p->word);
if (p->lines)
free(p->lines);
free(p);
}
addtree(): * has higher precedence than +. It should be:
p->lines = realloc(p->lines, (p->count + 1) * sizeof(int));
but as you increment p->count the line before you just want:
p->lines = realloc(p->lines, p->count * sizeof(int));
There is a similar logic error in the call to calloc().
valgrind is happy after I fix these these two issues, and I cannot reproduce the crash with 10 lines of input.
getword(): line numbers don't advance as you skip the \n from the last line with:
while (isspace(c = getch()))
yet caller expect word[0] to be a '\n' to advance the line number n. Here is a minimal fix:
do {
c = getch();
} while(c != '\n' && isspace(c));
and the output is now:
3 bar: [2, 2, 3]
2 foo: [1, 3]
That said I suggest you have caller read a line with fgets() then split that line into words with a revised version of getwork().
(not fixed) getword(): It's problematic that you 3 separate calls to getch() unless you really want to handle input differently in each case.
addtree(): You currently record duplicate lines for a given word but you want subsequent duplicates lines to be no-op it seems. Also, might as well just use malloc() instead of calloc() as you explicitly set p->lines right after. Prefer using variable instead of type to sizeof().
struct tnode *addtree(struct tnode *p, char *w, int l) {
if (!p) {
/* make a new node */
p = malloc(sizeof *p);
p->word = strdup(w);
p->count = 1;
p->lines = malloc(sizeof *p->lines);
p->lines[p->count - 1] = l;
p->left = NULL;
p->right = NULL;
return p;
}
int cond = strcmp(w, p->word);
if(cond < 0)
p->left = addtree(p->left, w, l);
else if(!cond && l != p->lines[p->count - 1]) {
p->count++;
p->lines = realloc(p->lines, p->count * sizeof(int));
p->lines[p->count - 1] = l;
} else if(cond > 0)
p->right = addtree(p->right, w, l);
return p;
}
and the output is now:
2 bar: [2, 3]
2 foo: [1, 3]
(not fixed) p = realloc(p, ...) leaks p if realloc() fails. It should be:
int *tmp = realloc(p->lines, (p->count + 1) * sizeof(int));
if(!tmp) {
// handle error
return NULL;
}
p->lines = tmp;
(not fixed) malloc(), calloc(), strdup() may fail and return NULL. You want to check for that:
p = malloc(sizeof(struct tnode));
if(!p) {
// handle error
return NULL;
}

Related

Some problem un-static local variable with static fuction in C?

My process is terminated with segmentation fault rarely.
Segmentaion fault point is at line 15 (while (isspace (*b))).
When I analyze coredump, argument funcname is shorter than buff and has valid address.
How is it possible? What should I do?
static char *
strip_funcname (char *name, const char *funcname)
{
char buff[80];
char *e, *b = buff;
/* Sanity check. */
if (name == NULL || funcname == NULL)
return NULL;
strncpy (buff, funcname, sizeof (buff));
buff[sizeof (buff) - 1] = '\0';
/* Skip beginning space. */
while (isspace (*b)) <===== Segmentation fault point
++b;
if (*b == '(')
{
/* funcname == "(Word)". */
e = buff + strlen (buff) - 1;
while (*b == ' ' || *b == '(')
++b;
while (*e == ' ' || *e == ')')
--e;
e++;
*e = '\0';
}
else
{
/* funcname == "Word ()". */
e = strchr (b, '(');
if (e)
*e-- = '\0';
else
e = buff + strlen (buff) - 1;
while (isspace (*e))
*e-- = '\0';
}
strcpy (name, b);
return name;
}

malloc() in C returns populated memory

char *string = (char *) malloc(sizeof(char) * sz);
code right before this->void insert_word(word *root, char string1[], int linenumber) { int sz=strlen(string1)<=MAX_WORD_LENGTH?strlen(string1):MAX_WORD_LENGTH; Code block 3 has the entire context
Sometimes malloc() returns a populated memory location while using it.
What bothers me is that this is not random.
(This program consists of taking words from a file and passing them to this function. For THE SAME WORD, the function behaviour(in particular that of malloc()) is different.
For the inputs
string1=0x7fffffffdf10 "lol" root=BST, sz gets a value of 3
The value allocated to string by malloc() is 0x55555555c510 "\340\305UUUU" Why is malloc not pointing to an empty memory location? (This is not random behaviour, it is predictable and repeatable)
Furthermore,this loop runs an infinite amount of time for some reason
while(strcmp(string1,string)!=0)
{
free(string);
string=NULL;
string = (char *) malloc(sizeof(char) * sz);
strncpy(string,string1,sz);
}
MORE RELAVANT CODE
#define MAX_WORD_LENGTH 20
Definition of the structures
typedef struct linkedList
{
int number;
struct linkedList *next;
}list;
typedef struct word_with_count
{
char* string;
list *linenumbers;
struct word_with_count *left;
struct word_with_count *right;
}word;```
[3] ) The function
void insert_word(word *root, char string1[], int linenumber) {
int sz=strlen(string1)<=MAX_WORD_LENGTH?strlen(string1):MAX_WORD_LENGTH;
char *string = (char *) malloc(sizeof(char) * sz);
strncpy(string,string1,sz);
if (root==NULL) {
return;
} else if (strcmp(string, root->string) < 0) {
if (root->left == NULL) {
root->left = createword(string, linenumber);
} else {
insert_word(root->left, string, linenumber);
}
} else if (strcmp(string, root->string) > 0) {
if (root->right == NULL) {
root->right = createword(string, linenumber);
} else {
insert_word(root->right, string, linenumber);
}
} else {
append_list(linenumber, root->linenumbers);
}
free(string);
}
main() which calls this function
int main() {
char path[MAX_PATH_LENGTH];
FILE *fp;
fgets(path, MAX_PATH_LENGTH, stdin);
if (strlen(path) > 0 && path[strlen(path) - 1] == '\n')
path[strlen(path) - 1] = '\0';
fp = fopen(path, "r");
if (fp == NULL) {
printf("File not found\n");
return 0;
}
char ch;
int line_count = 1;
char current_word[MAX_WORD_LENGTH] = "";
word *root = NULL;
while (!feof(fp)) {
ch = fgetc(fp);
//printf("%c", ch);
if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
if (ch >= 'A' && ch <= 'Z')
ch = ch - 'A' + 'a';
strncat(current_word, &ch, 1);
} else if (ch == '-') {
continue;
} else {
if (strlen(current_word) > 2) {
if (root == NULL) {
root = createword(current_word, line_count);
} else {
insert_word(root, current_word, line_count);
}
}
memset(current_word, 0, sizeof(current_word));
if (ch == '\n') {
line_count++;
}
}
}
if (strlen(current_word) > 2) {
if (root == NULL) {
root = createword(current_word, line_count);
} else {
insert_word(root, current_word, line_count);
}
}
fclose(fp);
// print_tree(root);
//printf("\n");
//print_tree(root);
int status=delete_low_ocurrence(root, NULL, 3);
if (status == -1)root = NULL;
print_tree(root);
freetree(root);
return 0;
}
5)Auxilary function used by this function
word* createword(char string[], int linenumber)
{
word *newword = (word*)malloc(sizeof(word));
int sz=strlen(string)<=MAX_WORD_LENGTH?strlen(string):MAX_WORD_LENGTH;
newword->string = (char*)malloc(sizeof(char)*sz);
strncpy(newword->string, string,sz);
newword->linenumbers = (list*)malloc(sizeof(list));
newword->linenumbers->number = linenumber;
newword->linenumbers->next = NULL;
newword->left = NULL;
newword->right = NULL;
return newword;
}
Textfile given as input
much2f
much3f
lol
lol
lol
qwertyuiopasdfghjklzxcvbnmqwertyuiop
qwertyuiopasdfghjklzxcvbnmqwertyuiop
qwertyuiopasdfghjklzxcvbnmqwertyuiop
qwertyuiopasdfghjklzxcvbnmqwertyuiop
Why is malloc not pointing to an empty memory location?
Because it can. The content of the allocated memory via malloc() is not specified.
If code needs zeroed out memory, see calloc().
Bad code
strncpy(string,string1,sz) does not result in string being a string as it may lack null character termination. The following (strcmp(string... is then undefined behavior. Instead, do not use strncpy(), use strcpy() and make certain the prior allocation has enough room for the terminating null character.
strncpy(string,string1,sz);
...
} else if (strcmp(string, root->string) < 0) { // bad
Repaired code
word* createword(const char string[], int linenumber) {
word *newword = calloc(1, sizeof *newword);
size_t length = strlen(string);
if (length > MAX_WORD_LENGTH) {
length = MAX_WORD_LENGTH;
}
char *s = malloc(length + 1); // Include room for the \0
list *linenumbers = calloc(1, sizeof *linenumbers);
// Test allocation success
if (newword == NULL || s == NULL || linenumbers == NULL) {
free(newword);
free(s);
free(linenumbers);
return NULL;
}
memcpy(s, string, length); // Only copy the first 'length' characters.
s[length] = 0;
newword->string = s;
newword->linenumbers = linenumbers;
newword->linenumbers->number = linenumber;
newword->linenumbers->next = NULL;
newword->left = NULL;
newword->right = NULL;
return newword;
}
Why is “while ( !feof (file) )” always wrong?
feof(fp) improperly used here. fgetc() returns 257 different values. Do not use char ch.
//char ch;
//...
//while (!feof(fp)) {
// ch = fgetc(fp);
int ch;
...
while ((ch = fgetc(fp)) != EOF) {;
This is quite normal behaviour. 'malloc' just does the memory allocation, it makes no commitment on what's already in that memory location. What you probably need is 'calloc', which clears the memory and then allocates it to your program.

Printing top 10 recurring words in a file

Edited question:
Hi guys, my goal is to print the top 10 occurring words in a file, I have managed to get everything to work from reading the file to counting word occurrences and printing it, but when I implement my qsort I get a segfault. I looked over my pointers and they look okay to me, I would appreciate any feedback.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX 51
struct words
{
char *ch;
int index;
struct words *pNext;
};
struct words* createWordCounter(char *ch)
{
struct words *pCounter = NULL;
pCounter = (struct words*)malloc(sizeof(char));
pCounter->ch = (char*)malloc(strlen(ch)+1);
strcpy(pCounter->ch, ch);
pCounter->index = 1;
pCounter->pNext = NULL;
return pCounter;
}
struct words *pStart = NULL;
char* removePunc(struct words* ch)
{
char *src = ch, *dst = ch;
while (*src)
{
if (ispunct((unsigned char)*src))
{
src++;
}
else if (isupper((unsigned char)*src))
{
*dst++ = tolower((unsigned char)*src);
src++;
}
else if (src == dst)
{
src++;
dst++;
}
else
{
*dst++ = *src++;
}
}
*dst = 0;
}
void addWord(char *word)
{
struct words *pCounter = NULL;
struct words *pLast = NULL;
if(pStart == NULL)
{
pStart = createWordCounter(word);
return;
}
pCounter = pStart;
while(pCounter != NULL)
{
if(strcmp(word, pCounter->ch) == 0)
{
++pCounter->index;
return;
}
pLast = pCounter;
pCounter = pCounter->pNext;
}
pLast->pNext = createWordCounter(word);
}
void printWord(struct words *pCounter)
{
printf("\n%-30s %5d\n", pCounter->ch, pCounter->index);
}
//sort
int compare (const void * a, const void * b){
struct words *A1 = (struct words *)a;
struct words *B1 = (struct words *)b;
return B1->index - A1->index;
/*
if ((A1->count - B1->count) > 0)
return -1;
else if ((A1->count - B2->count) < 0)
return 1;
else
return 0;
*/
}
int main(int argc, char * argv[])
{
struct words *pCounter = NULL;
char temp[MAX];
FILE *fpt;
if(argc == 2)
{
printf("File name is: %s\n",argv[1]);
fpt = fopen(argv[1], "r");
//fail test
if(fpt == NULL)
{
printf("cannot open file, exiting program...\n");
exit(0);
}
//get the data out of the file and insert in struct
int wordCounter = 0;
int i = 0;
int lines = 0;
while((fscanf(fpt, "%s ", &temp)) == 1)
{
removePunc(temp);
addWord(temp);
if(temp == ' ')
i++;
if(temp == '\n')
lines++;
wordCounter++;
}
/*
pCounter = pStart;
while(pCounter != NULL)
{
printWord(pCounter);
pCounter = pCounter->pNext;
}
*/
//sort
qsort(pCounter, wordCounter, sizeof(struct words), compare);
for(int j = 0; i < 10; i++)
{
printWord(pCounter);
}
}
fclose(fpt);
return 0;
}
First temp is already a pointer, so do not include '&' before it in fscanf. Second, don't skimp on buffer size (e.g. #define MAX 1024). Third, protect your array bounds with the field-width modifier and don't put trailing whitespace in your format-string.
Putting it altogether (presuming you use 1024 as MAX, you can use
fscanf(fpt, "1023%s", temp))
Well done on checking the return of fscanf during your read.
Adding to the things that have already been mentioned.
In createWordCounter(...)
pCounter = (struct words*)malloc(sizeof(char));
you are allocating memory for a char. Even though the pointer to a struct is the pointer to its first member, the first element of words is a pointer to a char. It is better to be careful and write
struct words *pCounter = malloc(sizeof *pCounter);
Also, be mindful of operator precedence.
In addWord(...) you have
++pCounter->index;
What that does is increment the pointer pCounter before accessing index. If you are trying to increment index, it should be
++(pCounter->index);
or
pCounter->index++;
I recommend striping your program down to its bare essentials and test each part one at a time systematically to narrow down the cause of your errors.
I think the main problem is the size of temp array when you try to using fscanf.
while((fscanf(fpt, "%s ", temp)) == 1)
When the length of one line is bigger than MAX, segmentation fault occur.
You can change your code like this
#define SCANF_LEN2(x) #x
#define SCANF_LEN(x) SCANF_LEN2(x)
//...
//your original code
//...
while((fscanf(fpt, "%"SCANF_LEN(MAX)"s ", temp)) == 1)
By the way, you should check
(1) compile warning about type
char* removePunc(struct words* ch)
should be char* removePunc(char *ch)
if(temp == ' ') should be if(temp[0] == ' ')
if(temp == '\n') should be if(temp[0] == '\n')
(2) malloc size
pCounter = (struct words*)malloc(sizeof(char)); should be pCounter = (struct words*)malloc(sizeof(struct words));
(3) remember free after using malloc

(char*)malloc(sizeof(char)) causing segmentation fault, how?

The code works fine on most inputs, but for userID's whih are very long I get a segmentation fault. My question is, how can malloc cause a segmentation fault? simply allocating memory shouldn't cause this. I found the problem area by using printf() statements, it seem the malloc within my read_line() function is where the problem is because the second "read_line" does not print, but the first before the malloc does.
thank you.
- Chris
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define DELIM " " /* the delimiter */
#define MAX_CHANGE (10.0/86400.0) /* 10kg/day */
/* seconds in a day is 24 hours * 60 minutes * 60 seconds */
#define MEM_OUT printf("%s","out of memory");
/* Defines Node structure. */
struct Node{
char *id;
float weight;
int time;
struct Node *next;
} *head, *p, *t, *last;
/* Constructor which returns a pointer to a new node. */
struct Node * new_node(int time, char *id, float w)
{ /*note malloc returns a pointer */
struct Node *node = (struct Node *)malloc(sizeof(struct Node));
node->time = time;
node->id = (char *)malloc( (strlen(id) + 1) * sizeof(char));
strcpy(node->id, id); //duplicate the id, so new node has own copy.
node->weight = w;
node->next = NULL;
return node;
}
/* reads in line of characters until either a EOF or '\n' is encountered
then places a the terminator '\0' at the end */
char * read_line(FILE *stream)
{
printf("read_line");
char * temp = (char*)malloc(sizeof(char));
printf("read_line");
char * line = (char*)malloc(sizeof(char));
char c;
*line = '\0';
int i = 1;
//strchr()
while( (c = getc(stream)) != EOF && c != '\n')
{
//if(c == EOF) return NULL;
//realloc(line,++i);
strcpy(temp,line);
line = malloc(++i * sizeof(char));
strcpy(line,temp);
temp = malloc(i * sizeof(char));
*(line + (i-1)) = '\0';
*(line + (i-2)) = c;
}
free(temp);
if( i == 1) return NULL;
return line;
}
main() {
int lasttime = 0, timestamp, duration, tokens;
char * userID = NULL;
char * lastuserID = NULL;
char * line = NULL;
float weight,lastweight,change,changePerTime;
head = new_node(0,"",0.0);
last = head;
FILE *fp = fopen("C:\\Users\\chris\\Desktop\\School\\York\\cse\\2031 Software Tools\\Labs\\6\\input.txt","r");
while( (line = read_line(fp)) != '\0') {
printf("%s\n",line);
//free(userID);
line = strtok(line, " \n");
if (line == NULL || sscanf(line,"%d",&timestamp) < 1 || timestamp == 0){
printf("%s\n","Invalid time");
continue;
}
line = strtok(NULL, " \n");
if(line == NULL || isdigit(line[0]) || line[0] == '.') {
printf("Illegal userID");
//free(line);
continue;
}
userID = (char * )malloc( (strlen(line)+1) * sizeof(char));
strcpy(userID,line);
strcat(userID," ");
do{
line = strtok(NULL," \n");
if(line != NULL && !isdigit(line[0]) && line[0] != '.'){
strcat(userID,line ); // adds ' ' and '\0'
strcat(userID," ");
}
}while(line != NULL && line[0] != '.' && !isdigit(line[0]) );
userID[strlen(userID)-1] = '\0'; //erases the tailing space.
if(strlen(userID) > 179){
printf("Illegal userID\n");
printf("mid");
continue;
printf("%s\n","after" );
}
if(line != NULL)
tokens = sscanf(line,"%f", &weight);
if(line == NULL || tokens < 1 || weight < 30.0 || weight > 300.0)
{printf("Illegal weight\n"); continue; }
if (lasttime >= timestamp){
printf("Nonmonotonic timestamps\n");
continue;
}
lasttime = timestamp;
// record is valid apst this point.
/* t = last occurence of this userID, p = last userID*/
for(p = head, t = NULL; p != NULL; p = p->next)
{
if(strcmp(userID,p->id) == 0)
t=p;
last = p; // set last to last p.
}
if(t == NULL)
printf("OK newuser\n");
else if(t != NULL)
{
duration = timestamp - t->time;
change = weight - t->weight;
changePerTime = change / duration;
if(changePerTime < -MAX_CHANGE || changePerTime > MAX_CHANGE)
printf("Suspiciously large weight change\n");
else
printf("OK\n");
}
/* add new node to end of list */
last->next = new_node(timestamp,userID,weight);
/* update lastnode */
last = last->next;
free(line);
}
fclose(fp);
/* count sum of id's for last valid user*/
int count=0;
for(p = head->next; p !=NULL; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
count++;
}
//fclose(f); // use if input from file is uncommented
// adds 1 to both demensions to hole axis
int tHeight = 11;
int tWidth = count + 1;
int qHeight = 10;
int qWidth= count;
/* builds the barchart */
char bc[tHeight][tWidth]; // + 1 for y-axis
/* draws axis and orgin */
int a,b;
for(a=0; a<tHeight; a++)
{
for(b=0;b<tWidth; b++)
{
if(a == qHeight && b == 0)
bc[a][b] = '+';
else if(a < tHeight && b == 0)
bc[a][b] = '|';
else if(a == qHeight && b > 0)
bc[a][b] = '-';
}
}
/* prints the bars */
int j=1, i, k, bh;
for(p = head; p != NULL, j < tWidth; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
{
for(i = 9, k=0, bh = (int)(p->weight / 30);i >= 0; i--)
{
if(k < bh)
{
bc[i][j] = '*';
k++; // barheight
}
else
bc[i][j] = ' ';
}
j++;
}
}
/* prints the barchart */
int m, n;
for(m=0; m < tHeight; m++)
{
for(n=0; n < tWidth; n++)
{
printf("%c",bc[m][n]);
}
printf("%c",'\n');
}
}
The malloc calls are not causing a segmentation fault. But your use of them later on could be.
Some Items of Note
Your printf("read line") statements will not print out immediately when called because the output is buffered. If you want them to print right away, do printf("read line\n"). You'll then see that both execute and your code that uses the tiny buffer you allocated will cause the crash.
In your while loop, you are doing more malloc calls and assigning the returns to variables, like temp and line, without freeing the prior memory pointers that temp and line hold, thus causing some memory leaks. Your commented out realloc was the better thought process: line = realloc(line, ++i * sizeof(*line));. Similarly for temp.
Memory Allocation Problem
One very problematic area is here:
userID = (char * )malloc( (strlen(line)+1) * sizeof(char));
strcpy(userID,line);
strcat(userID," ");
userID can hold the length of the string in line (strlen(line)) plus one more byte. But that one more byte is needed for the null terminator. Your strcat(userID, " ") will write past the length of the allocated buffer for userID by one byte.
I solved the problem! The issue wasn't the read_line function at all, it was the memory allocation for the userID string. Moving the malloc() for the user to the beginning of the loop fixed the problem.
The amount of memory allocated for the userID portion of the line was based on the the length of the whole line. eg: malloc(strlen(line)+2). However this was done after calling strtok() on the line a few times, which would allocate a memory block shorter than the length of the entire line. This is because strtok() places null terminators '\0' 's at every instance of the specified delimiter in the line, and strlen() only counts the length from the passed character pointer to the first '\0' it encounters.
Anyway, thanks for your help guys!
-Chris
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define DELIM " " /* the delimiter */
#define MAX_CHANGE (10.0/86400.0) /* 10kg/day */
/* seconds in a day is 24 hours * 60 minutes * 60 seconds */
/* Defines Node structure. */
struct Node{
char *id;
float weight;
int time;
struct Node *next;
} *head, *p, *t, *last;
/* Constructor which returns a pointer to a new node. */
struct Node * new_node(int time, char *id, float w)
{ /*note malloc returns a pointer */
struct Node *node = (struct Node *)malloc(sizeof(struct Node));
node->time = time;
node->id = malloc(strlen(id) + 1);
strcpy(node->id, id); //duplicate the id, so new node has own copy.
node->weight = w;
node->next = NULL;
return node;
}
/* reads in line of characters until either a EOF or '\n' is encountered
then places a the terminator '\0' at the end */
char * read_line(FILE *in)
{
int i = 1;
char * s = NULL;
char c;
do{
s = realloc(s,i); //strlen does not work on NULL strings
if(s == NULL || s == "")
{
printf("%s\n","out of memory");
exit(1);
}
*(s + (i-1)) = '\0'; // ensures null terminated
if(i > 1)
*(s + (i-2)) = c;
i++;
}
while( (c = getc(in)) != EOF && c != '\n' );
if (c == '\n')
return s;
else if(c == EOF)
return NULL;
}
main() {
int lasttime = 0, timestamp, duration, tokens;
char * userID = NULL;
char * lastuserID = NULL;
char * line = NULL;
float weight,lastweight,change,changePerTime;
head = new_node(0,"",0.0);
last = head;
FILE *fp = fopen("C:\\Users\\chris\\Desktop\\School\\York\\cse\\2031 Software Tools\\Labs\\6\\tests\\04.in","r");
while((line = read_line(fp)) != NULL) {
userID = malloc(strlen(line)+2); // max userID length is line length**
line = strtok(line, " \n");
if (line == NULL || sscanf(line,"%d",&timestamp) < 1 || timestamp == 0){
printf("%s\n","Invalid time");
continue;
}
line = strtok(NULL, " \n");
if(line == NULL || isdigit(line[0]) || line[0] == '.') {
printf("%s\n","Illegal userID");
//free(line);
continue;
}
strcpy(userID,line);
strcat(userID," ");
do{
line = strtok(NULL," \n");
if(line != NULL && !isdigit(line[0]) && line[0] != '.'){
strcat(userID,line ); // adds ' ' and '\0'
strcat(userID," ");
}
}while(line != NULL && line[0] != '.' && !isdigit(line[0]) );
userID[strlen(userID)-1] = '\0'; //erases the tailing space.
if(strlen(userID) > 179){
printf("Illegal userID\n");
free(userID);
free(line);
continue;
}
if(line != NULL)
tokens = sscanf(line,"%f", &weight);
if(line == NULL || tokens < 1 || weight < 30.0 || weight > 300.0)
{printf("Illegal weight\n"); continue; }
if (lasttime >= timestamp){
printf("Nonmonotonic timestamps\n");
continue;
}
lasttime = timestamp;
// record is valid apst this point.
/* t = last occurence of this userID, p = last userID*/
for(p = head, t = NULL; p != NULL; p = p->next)
{
if(strcmp(userID,p->id) == 0)
t=p;
last = p; // set last to last p.
}
if(t == NULL)
printf("OK newuser\n");
else if(t != NULL)
{
duration = timestamp - t->time;
change = weight - t->weight;
changePerTime = change / duration;
if(changePerTime < -MAX_CHANGE || changePerTime > MAX_CHANGE)
printf("Suspiciously large weight change\n");
else
printf("OK\n");
}
/* add new node to end of list */
last->next = new_node(timestamp,userID,weight);
/* update lastnode */
last = last->next;
free(line);
} // end of input loop
fclose(fp);
/* count sum of id's for last valid user*/
int count=0;
for(p = head->next; p !=NULL; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
count++;
}
//fclose(f); // use if input from file is uncommented
// adds 1 to both demensions to hole axis
int tHeight = 11;
int tWidth = count + 1;
int qHeight = 10;
int qWidth= count;
/* builds the barchart */
char bc[tHeight][tWidth]; // + 1 for y-axis
/* draws axis and orgin */
int a,b;
for(a=0; a<tHeight; a++)
{
for(b=0;b<tWidth; b++)
{
if(a == qHeight && b == 0)
bc[a][b] = '+';
else if(a < tHeight && b == 0)
bc[a][b] = '|';
else if(a == qHeight && b > 0)
bc[a][b] = '-';
}
}
/* prints the bars */
int j=1, i, k, bh;
for(p = head; p != NULL, j < tWidth; p=p->next)
{
if(strcmp(last->id,p->id) == 0)
{
for(i = 9, k=0, bh = (int)(p->weight / 30);i >= 0; i--)
{
if(k < bh)
{
bc[i][j] = '*';
k++; // barheight
}
else
bc[i][j] = ' ';
}
j++;
}
}
/* prints the barchart */
int m, n;
for(m=0; m < tHeight; m++)
{
for(n=0; n < tWidth; n++)
{
printf("%c",bc[m][n]);
}
printf("%c",'\n');
}
}

String Search and format in C

Just a quick one: in C I have a buffer full of data like below:
char buffer[255]="CODE=12345-MODE-12453-CODE1-12355"
My question is how to search through this. For example for the CODE=12345, section bear in mind that the numbers change, so I would like to search like this CODE=***** using wildcard or preset amount of spaces after the CODE= part.
This method wont compile last one left to try
#include <stdio.h>
#include <string.h>
#include <windows.h>
int main ()
{
char buf[255]="CODE=12345-MODE-12453-CODE1-12355";
#define TRIMSPACES(p) while(*p != '\0' && isspace((unsigned char)*p) != 0) ++p
#define NSTRIP(p, n) p += n
#define STRIP(p) ++p
char* getcode(const char *input)
{
char *p = (char*) input, *buf, *pbuf;
if((buf = malloc(256)) == NULL)
return NULL;
pbuf = buf;
while(*p != '\0') {
if(strncmp(p, "CODE", 3) == 0) {
NSTRIP(p, 4); //remove 'code'
TRIMSPACES(p);//trim white-space after 'code'
if(*p != '=')
return NULL;
STRIP(p); // remove '='
TRIMSPACES(p); //trim white-spaces after '='
/* copy the value until found a '-'
note: you must be control the size of it,
for avoid overflow. we allocated size, that's 256
or do subsequent calls to realloc()
*/
while(*p != '\0' && *p != '-')
*pbuf ++ = *p++;
// break;
}
p ++;
}
//put 0-terminator.
*pbuf ++ = '\0';
return buf;
}
//
}
You could use the sscanf() function:
int number;
sscanf(buffer, "CODE = %i", &number);
for that to work well your buffer has to be null terminated.
Another way to do it instead of sscanf():
char *input, *code;
input = strstr(buf, "CODE");
if(input == NULL) {
printf("Not found CODE=\n");
return -1;
}
code = strtok(strdup(input), "=");
if(code != NULL) {
code = strtok(NULL, "-");
printf("%s\n", code); // code = atoi(code);
} else {
//not found '='
}
Or more robust way.. a bit more complex:
#define TRIMSPACES(p) while(*p != '\0' && isspace((unsigned char)*p) != 0) ++p
#define NSTRIP(p, n) p += n
#define STRIP(p) ++p
char* getcode(const char *input, size_t limit)
{
char *p = (char*) input, *buf, *pbuf;
size_t i = 0;
while(*p != '\0') {
if(strncmp(p, "CODE", 3) == 0) {
NSTRIP(p, 4); //remove 'code'
TRIMSPACES(p);//trim all white-spaces after 'code'
/* check we have a '=' after CODE (without spaces).
if there is not, returns NULL
*/
if(*p != '=')
return NULL;
/* ok. We have.. now we don't need of it
just remove it from we output string.
*/
STRIP(p);
/* remove again all white-spaces after '=' */
TRIMSPACES(p);
/* the rest of string is not valid,
because are white-spaces values.
*/
if(*p == '\0')
return NULL;
/* allocate space for store the value
between code= and -.
this limit is set into second parameter.
*/
if((buf = malloc(limit)) == NULL)
return NULL;
/* copy the value until found a '-'
note: you must be control the size of it,
for don't overflow. we allocated 256 bytes.
if the string is greater it, do implementation with
subjecents call to realloc()
*/
pbuf = buf;
while(*p != '\0' && *p != '-' && i < limit) {
*pbuf ++ = *p++;
i ++;
}
*pbuf ++ = '\0';
return buf;
}
p ++;
}
return NULL;
}
And then:
char buf[255] = "foo baa CODE = 12345-MODE-12453-CODE-12355";
char *code = getcode(buf,256);
if(code != NULL) {
printf("code = %s\n", code);
free(code);
} else {
printf("Not found code.\n");
}
output:
code = 12345
Check out this online.
if you want to don't differentiate case, you can use the strncasecmp() that's POSIX function.
Assuming the CODE= part always comes at the beginning of the string, it's pretty easy:
sscanf(buffer, "CODE = %d", &number);
...but you want buffer to be char[255], not unsigned long.
Edit: If the CODE= part isn't necessarily at the beginning of the string, you can use strstr to find CODE in the buffer, do your sscanf starting from that point, then look immediately following that:
int codes[256];
char *pos = buffer;
size_t current = 0;
while ((pos=strstr(pos, "CODE")) != NULL) {
if (sscanf(pos, "CODE = %d", codes+current))
++current;
pos += 4;
}
Edit2:
For example, you'd use this something like this:
#include <stdio.h>
#include <string.h>
#include <windows.h>
int main ()
{
// This is full of other junk as well
char buffer[255]="CODE=12345 MODE-12453 CODE=12355" ;
int i;
int codes[256];
char *pos = buffer;
size_t current = 0;
while ((pos=strstr(pos, "CODE")) != NULL) {
if (sscanf(pos, "CODE = %d", codes+current))
++current;
pos += 4;
}
for (i=0; i<current; i++)
printf("%d\n", codes[i]);
return 0;
}
For me, this produces the following output:
12345
12355
...correctly reading the two "CODE=xxx" sections, but skipings over the "MODE=yyy" section.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *getcode(const char *str, const char *pattern){
//pattern: char is match, space is skip, * is collect
static const char *p=NULL;
char *retbuf, *pat;
int i, match, skip, patlen;
if(str != NULL) p=str;
if(p==NULL || *p=='\0') return NULL;
if(NULL==(retbuf=(char*)malloc((strlen(p)+1)*sizeof(char))))
return NULL;
pat = (char*)pattern;
patlen = strlen(pat);
i = match = skip = 0;
while(*p){
if(isspace(*p)){
++p;
++skip;
continue;
}
if(*pat){
if(*p == *pat){
++match;
++p;
++pat;
} else if(*pat == '*'){
++match;
retbuf[i++]=*p++;
++pat;
} else {
if(match){//reset
pat=(char*)pattern;
p -= match + skip -1;
i = match = skip = 0;
} else //next
++p;
}
} else {
break;
}
}
if(i){//has match
retbuf[i++]='\0';
retbuf=realloc(retbuf, i);
return retbuf;
} else {
free(retbuf);
return NULL;
}
}
int main (){
char *code;
code=getcode("CODE=12345-MODE-12453-CODE1-12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode(" CODE = 12345-MODE-12453-CODE1-12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode("CODE-12345-MODE-12453-CODE1-12355", "CODE=*****");
if(code==NULL)printf("not match\n");//not match
code=getcode("CODE=12345-MODE-12453-CODE=12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode(NULL, "CODE=*****");
printf("\"%s\"\n",code);//"12355"
free(code);
code=getcode("CODE=12345-MODE-12453-CODE1-12355", "CODE=*****");
printf("\"%s\"\n",code);//"12345"
free(code);
code=getcode(NULL, "CODE1-*****");
printf("\"%s\"\n",code);//"12355"
free(code);
return 0;
}

Resources