While Loop stops before condition - c

I have a problem with my code. I want to load a dictionary which works fine with a small one. But when i try to load the larger version, my while loop stops at the 701th word which is " acclimatization" and then the programs continues. I searched a lot on forums and tried a lot of things, but i just can't find the reason this is caused. Does anyone have an idea of how this occurs?
Dictionary.c
bool load(const char* dictionary)
{
// reserve space for word
char* word = malloc(sizeof(char*));
// open file
FILE* dict = fopen(dictionary, "r");
if (dict == NULL)
{
fclose(dict);
fprintf(dict, "Could not load %s.\n", dictionary);
return 1;
}
root = (struct node *) malloc(sizeof(struct node));
root->is_word = false;
//Loops over word aslong the EOF is not reached
while (fgets(word,LENGTH,dict) != NULL)
{
printf("word = %s\n", word);
int word_length = strlen(word) -1;
node* current = root;
word_count++;
//Loops over letters
for (int i = 0; i < word_length; i++)
{
int index;
node *next_node;
// checks if letter isnt a apostrophe
if(word[i] == 39)
{
index = MAX_CHARS - 1;
}
// gets nummeric value of letter
else
{
index = tolower(word[i]) - 'a';
}
next_node = current->children[index];
// creates new node if letter didnt exists before
if(next_node == NULL)
{
next_node = malloc(sizeof(node));
current->children[index] = next_node;
current->is_word = false;
printf("new letter: %c\n", word[i]);
}
else
{
printf("letter: %c\n", word[i]);
}
// checks for end of the word
if(i == word_length - 1)
{
next_node->is_word = true;
}
current = next_node;
}
}
return true;
}
The node is defined by:
// node
typedef struct node
{
bool is_word;
struct node* children[27];
}
node;

char* word = malloc(sizeof(char*));
Depending on platform it can be 4 or 8 . You need to allocate more memory.
char* word;
word = malloc(LENGTH); // LENGTH as you use it here while (fgets(word,LENGTH,dict) != NULL)
if(word!=NULL){ // and checking if malloc is successful
// your code
free(word); // freeing allocated memory
return true;
}
else { // executed only if malloc fails
//handle error
}
You can give any desired size.
Note - Using function free() , you need to free every time you allocate memory.

You allocate very little space for word, it's probably 8 or 4 bytes depending on your platform.
You are allocating space for 1 char pointer, so when you read from the file LENGTH characters you can be storing bytes beyond the limits of the allocated buffer. The problem is, that the behavior is undefined thus the program might work or it might stop or anything can happen.
You don't need to allocate it dynamically, just like this it's ok
char word[100];
while (fgets(word, sizeof(word), file) != NULL) ...
/* ^ this only works with arrays, */
/* the benefit is that you can */
/* change the definition of word */
/* and resize it without changing */
/* this part. */
/* */
/* It will NOT work if you use `malloc()' */
Also, you would have a memory leak if fopen() failes, every malloc() requires a corresponding free().
Suggestion:
for (int i = 0; i < word_length; i++)
can be written like this too
for (int i = 0; ((word[i] != '\n') && (word[i] != '\0')); i++)
and you avoid calling strlen() which will also iterate through the characters.

Related

Count the number of occurrences of each word

I'm trying to count the number of occurrences of each word in the function countWords I believe i started the for loop in the function properly but how do I compare the words in the arrays together and count them and then delete the duplicates? Isn't it like a fibonacci series or am I mistaken? Also int n has the value of 756 because thats how many words are in the array and wordsArray are the elements in the array.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
int *countWords( char **words, int n);
int main(int argc, char *argv[])
{
char buffer[100]; //Maximum word size is 100 letters
FILE *textFile;
int numWords=0;
int nextWord;
int i, j, len, lastChar;
char *wordPtr;
char **wordArray;
int *countArray;
int *alphaCountArray;
char **alphaWordArray;
int *freqCountArray;
char **freqWordArray;
int choice=0;
//Check to see if command line argument (file name)
//was properly supplied. If not, terminate program
if(argc == 1)
{
printf ("Must supply a file name as command line argument\n");
return (0);
}
//Open the input file. Terminate program if open fails
textFile=fopen(argv[1], "r");
if(textFile == NULL)
{
printf("Error opening file. Program terminated.\n");
return (0);
}
//Read file to count the number of words
fscanf(textFile, "%s", buffer);
while(!feof(textFile))
{
numWords++;
fscanf(textFile, "%s", buffer);
}
printf("The total number of words is: %d\n", numWords);
//Create array to hold pointers to words
wordArray = (char **) malloc(numWords*sizeof(char *));
if (wordArray == NULL)
{
printf("malloc of word Array failed. Terminating program.\n");
return (0);
}
//Rewind file pointer and read file again to create
//wordArray
rewind(textFile);
for(nextWord=0; nextWord < numWords; nextWord++)
{
//read next word from file into buffer.
fscanf(textFile, "%s", buffer);
//Remove any punctuation at beginning of word
i=0;
while(!isalpha(buffer[i]))
{
i++;
}
if(i>0)
{
len = strlen(buffer);
for(j=i; j<=len; j++)
{
buffer[j-i] = buffer[j];
}
}
//Remove any punctuation at end of word
len = strlen(buffer);
lastChar = len -1;
while(!isalpha(buffer[lastChar]))
{
lastChar--;
}
buffer[lastChar+1] = '\0';
//make sure all characters are lower case
for(i=0; i < strlen(buffer); i++)
{
buffer[i] = tolower(buffer[i]);
}
//Now add the word to the wordArray.
//Need to malloc an array of chars to hold the word.
//Then copy the word from buffer into this array.
//Place pointer to array holding the word into next
//position of wordArray
wordPtr = (char *) malloc((strlen(buffer)+1)*sizeof(char));
if(wordPtr == NULL)
{
printf("malloc failure. Terminating program\n");
return (0);
}
strcpy(wordPtr, buffer);
wordArray[nextWord] = wordPtr;
}
//Call countWords() to create countArray and replace
//duplicate words in wordArray with NULL
countArray = countWords(wordArray, numWords);
if(countArray == NULL)
{
printf("countWords() function returned NULL; Terminating program\n");
return (0);
}
//Now call compress to remove NULL entries from wordArray
compress(&wordArray, &countArray, &numWords);
if(wordArray == NULL)
{
printf("compress() function failed; Terminating program.\n");
return(0);
}
printf("Number of words in wordArray after eliminating duplicates and compressing is: %d\n", numWords);
//Create copy of compressed countArray and wordArray and then sort them alphabetically
alphaCountArray = copyCountArray(countArray, numWords);
freqCountArray = copyCountArray(alphaCountArray, numWords);
int *countWords( char **wordArray, int n)
{
return NULL;
int i=0;
int n=0;
for(i=0;i<n;i++)
{
for(n=0;n<wordArray[i];n++)
{
}
}
}
Assuming you want the return value of countWords to be an array of integers with word counts for each unique word, you need to have a double loop. One loop goes over the whole array, the second loop goes through the rest of the array (after the current word), looking for duplicates.
You could do something like this pseudo code:
Allocate the return array countArray (n integers)
Loop over all words (as you currently do in your `for i` loop)
If the word at `i` is not null // Check we haven't already deleted this word
// Found a new word
Set countArray[i] to 1
Loop through the rest of the words e.g. for (j = i + 1; j < n; j++)
If the word at j is not NULL and matches the word at i (using strcmp)
// Found a duplicate word
Increment countArray[i] (the original word's count)
// We don't want wordArray[j] anymore, so
Free wordArray[j]
Set wordArray[j] to NULL
Else
// A null indicates this was a duplicate, set the count to 0 for consistency.
Set countArray[i] to 0
Return wordArray
I'm going to throw you a bit of a curve ball here.
Rather than fix your code, which can be easily fixed as it's pretty good on its own, but incomplete, I decided to write an example from scratch.
No need to read the file twice [first time just to get the maximum count]. This could be handled by a dynamic array and realloc.
The main point, I guess, is that it is much easier to ensure that word list has no duplicates while creating it, rather than removing duplicates at the end.
I opted for a few things.
I created a "word control" struct. You've got several separate arrays that are indexed the same way. That, sort of, "cries out" for a struct. That is, rather than [say] 5 separate arrays, have a single array of a struct that has 5 elements in it.
The word list is a linked list of these structs. It could be a dynamic array on the heap that gets realloced instead, but the linked list is actually easier to maintain for this particular usage.
Each struct has the [cleaned up] word text and a count of the occurrences (vs. your separate wordArray and countArray).
When adding a word, the list is scanned for an existing match. If one is found, the count is incremented, rather than creating a new word list element. That's the key to eliminating duplicates [i.e. don't create them in the first place].
Anyway, here it is:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#define sysfault(_fmt...) \
do { \
printf(_fmt); \
exit(1); \
} while (0)
// word control
typedef struct word {
struct word *next; // linked list pointer
char *str; // pointer to word string
int count; // word frequency count
} word_t;
word_t wordlist; // list of words
// cleanword -- strip chaff and clean up word
void
cleanword(char *dst,const char *src)
{
int chr;
// NOTE: using _two_ buffers in much easier than trying to clean one
// buffer in-place
for (chr = *src++; chr != 0; chr = *src++) {
if (! isalpha(chr))
continue;
chr = tolower(chr);
*dst++ = chr;
}
*dst = 0;
}
// addword -- add unique word to list and keep count of number of words
void
addword(const char *str)
{
word_t *cur;
word_t *prev;
char word[1000];
// get the cleaned up word
cleanword(word,str);
// find a match to a previous word [if it exists]
prev = NULL;
for (cur = wordlist.next; cur != NULL; cur = cur->next) {
if (strcmp(cur->str,word) == 0)
break;
prev = cur;
}
// found a match -- just increment the count (i.e. do _not_ create a
// duplicate that has to be removed later)
if (cur != NULL) {
cur->count += 1;
return;
}
// new unique word
cur = malloc(sizeof(word_t));
if (cur == NULL)
sysfault("addword: malloc failure -- %s\n",strerror(errno));
cur->count = 1;
cur->next = NULL;
// save off the word string
cur->str = strdup(word);
if (cur->str == NULL)
sysfault("addword: strdup failure -- %s\n",strerror(errno));
// add the new word to the end of the list
if (prev != NULL)
prev->next = cur;
// add the first word
else
wordlist.next = cur;
}
int
main(int argc,char **argv)
{
FILE *xf;
char buf[1000];
char *cp;
char *bp;
word_t *cur;
--argc;
++argv;
xf = fopen(*argv,"r");
if (xf == NULL)
sysfault("main: unable to open '%s' -- %s\n",*argv,strerror(errno));
while (1) {
// get next line
cp = fgets(buf,sizeof(buf),xf);
if (cp == NULL)
break;
// loop through all words on a line
bp = buf;
while (1) {
cp = strtok(bp," \t\n");
bp = NULL;
if (cp == NULL)
break;
// add this word to the list [avoiding duplicates]
addword(cp);
}
}
fclose(xf);
// print the words and their counts
for (cur = wordlist.next; cur != NULL; cur = cur->next)
printf("%s %d\n",cur->str,cur->count);
return 0;
}

How to debug a Tree Summing and Runtime error?

I am looking at problem 112 from UVa Online Judge.
For a couple of weeks ago, I got some homeworks from my university and the thing is that, though other problems are accepted on the UVa, I cannot figure out what is going wrong with this problem. I've already run the input from Udebug website and there was no problem. I double-checked the result and now, I'm sick and tired of solving this issue.
Here are details about what has happened. First of all, I increase the BUFSIZE to 2^20 in order to avoid any memory overflow. The result? Failed. Second, I downsized the size of the element in the stack I made. The result? Failed. Lastly, I removed an eol character of the result just in case. The result? Failed.
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#define BUFSIZE 16384
typedef struct node {
int element[BUFSIZE];
int size;
int current;
}Stack;//This is a stack I made
static Stack *stack;
static int level;//This is a level of a node in the whole tree
static int integer;//This is an integer that should be came out from the sum() function
bool initialize(void) {
if (stack == NULL)
stack = (Stack *)malloc(sizeof(Stack));
stack->size = BUFSIZE;
stack->current = 0;
return true;
}
bool push(int number) {
if (stack == NULL)
return false;
if ((stack->current + 1) > stack->size)
return false;
stack->element[stack->current] = number;
stack->current++;
return true;
}
int pop() {
if (stack->current <= 0)
return 0xFFFFFFFF;
stack->current--;
return stack->element[stack->current];
}
int sum() {
int result = 0;
int i;
if (stack == NULL)
return 0xFFFFFFFF;
if (stack->current == 0)
return 0xFFFFFFFF;
for (i = 0; i < stack->current; i++)
result += stack->element[i];
return result;
}//Sum all the values in the stack and return it.
void replace(char * o_string, char * s_string, char * r_string) {
char *buffer = (char *)calloc(BUFSIZE, sizeof(char));
char * ch;
if (!(ch = strstr(o_string, s_string)))
return;
strncpy(buffer, o_string, ch - o_string);
buffer[ch - o_string] = 0;
sprintf(buffer + (ch - o_string), "%s%s", r_string, ch + strlen(s_string));
o_string[0] = 0;
strcpy(o_string, buffer);
free(buffer);
return replace(o_string, s_string, r_string);
}//This is a function I found on Google. Memory usage optimization is not guaranteed.
int main(void) {
char *buffer;
char *line;
char *restOfTheString;
char *token;
bool checked = false, found = false;
int i = 0, j = 0, scannedInteger, result = 0, array[4096];
buffer = (char *)calloc(BUFSIZE, sizeof(char));
restOfTheString = (char *)calloc(BUFSIZE, sizeof(char));
line = (char *)calloc(BUFSIZE, sizeof(char));
memset(buffer, 0, BUFSIZE);
for (i = 0; i < 4096; i++) {
array[i] = -1;
}
level = 0;
integer = 0;
while (fgets(line, sizeof(line), stdin) != NULL) {//Get input line by line
if (line[0] != '\n') {
token = strtok(line, "\n");
if (strlen(line) >= 1) {
strcat(buffer, token);
}
}
}
replace(buffer, " ", "");
replace(buffer, "()()", "K");
strcpy(restOfTheString, buffer);
i = 0;
while (restOfTheString[i] != 0) {
if (level == 0 && !checked) {//If the level of the node is 0, then it is clearly the summed value I need to find out on the whole tree.
initialize();
sscanf(&restOfTheString[i], "%d%s", &integer, &restOfTheString[0]);
i = -1;
checked = true;
}
if (restOfTheString[i] == '(') {
checked = false;
level++;
}//If there is an openning bracket, then increase the level of the node.
else if (restOfTheString[i] == ')') {
if (restOfTheString[i - 1] != '(')
if (pop() == 0xFFFFFFFF)
return 0;
level--;
if (!found && level == 0) {
array[j] = 0;
j++;
free(stack);
stack = NULL;
}//If there is a closing bracket, then it's time to check whether the level of the node is 0. If the level of the node is 0, then we need to report the result to the 'array' which is an integer array and move on to the next input.
else if (found && level == 0) {
array[j] = 1;
j++;
free(stack);
stack = NULL;
found = false;
}
}
else if (restOfTheString[i] == '-' && !checked) {
if (sscanf(&restOfTheString[i], "%d%s", &scannedInteger, &restOfTheString[0]) == 2) {
if (push(scannedInteger) == false)
return 0;
i = -1;
}
}//If there is a minus character, then it's obvious that the next couple of characters are a negative integer and I need to scan it out of the whole input.
else if (restOfTheString[i] >= 48 && restOfTheString[i] <= 57 && !checked) {
if (sscanf(&restOfTheString[i], "%d%s", &scannedInteger, &restOfTheString[0]) == 2) {
if (push(scannedInteger) == false)
return 0;
i = -1;
}
}//If there is a numerous character, then it's obvious that the next couple of characters are a negative integer and I need to scan it out of the whole input.
else if (restOfTheString[i] == 'K') {
if ((result = sum()) == 0xFFFFFFFF)
return 0;
if (result == integer) {
found = true;
}
}//The 'K' character means the integer scanned prior to this iteration is a value in a leaf. So I need to call the sum() function in order to figure it out the result.
i++;
}
i = 0;
while (array[i] != -1) {
if (array[i] == 1)
printf("yes\n");
else if (array[i] == 0)
printf("no\n");
i++;
}
return 0;
}
Though it is clearly suspicious about the memory usage, I don't know how to track the stack on my system.
You use many questionable practices.
You free and re-allocate the stack from scratch. The stack has a fixed size in your case; allocate one at the beginning of main and free once at the end.
You set the index i to −1 as indicator, but keep on accessing restOfString[i] later. restOfString is an allocated string and writing to bytes before the actual data might corrupt the internal information that thze system keeps for allocated memory. This might lead to errors when freeing. In any case, it's undefined behaviour.
You read the input line-wise and concatenate everything into one huge string. You use strcat for this, which will get slower as your string grows. If you must load everything into a large buffer, consider using fread.
Your recursive replace method also does a lot of copying of temporarily allocated buffers.
This:
sscanf(&rest[i], "%d%s", &integer, &rest[0]);
looks fishy. You store the result in the string that you are reading, albeit at different indices. Result and source may overlap, which probably is undefined behaviour. In any case, it entails a lot of copying. Instead of using sscanf, you could read the integer with strtol, which gives you the position of the string after parsing the number. Continue scanning the old string at the resulting offset.
Your problems seem to be not in the core algorithm but with reading the input. Ihe assignment does not mention a maximum line length. This may be a sign that you shouldn't read the input in a line context.
You can use the scanf functions which don't know about line breaks. You can make use of the fact that unsuccessful scanning with data conversion, e.g. scanning an integer, resets the input stream.
Such a strategy would require only storage for the current token. You don't even need a stack if you use recursion. I doubt that the test cases in the online judge will break the stack limit, even if there they contain degenerate trees with a large depths.

Segmantation Fault in C

This program will create link list from text alphabetically.
It is case-sensitive and it will eliminate the marks.
When I run the program, it gives a segmentation fault. I can't find where the problem is. I added the printf() in order to find the mistake but i can't.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct NODE {
char *word;
int count;
struct NODE *next;
}NODE;
char *get_word(FILE *fp){
printf("getWord");
char *str = (char*)malloc(sizeof(char)*100);
char c;
do {
c = fgetc(fp);
if (c == EOF)
return 0;
} while (!isalpha(c));
do {
printf("getWord");
*str++ = tolower(c);
c = fgetc(fp);
printf("Word");
} while (isalpha(c));
return str;
}
void insert(NODE* sortedList, char *word) {
printf("INSERT ");
char *str = (char*)malloc(sizeof(char)*100);
if (sortedList == NULL || word < sortedList->word) {
NODE *ekle;
ekle=(NODE*)malloc(sizeof(NODE));
strcpy(ekle->word,word);
ekle->count = 1;
ekle->next = sortedList;
sortedList = ekle;
}
else {
//
NODE *current = sortedList->next;
NODE *pre = sortedList;
while (current != NULL && word > current->word) {
pre = current;
current = current->next;
}
if (current != NULL && word == current->word) {
(current->count)++;
}
else {
NODE *ekle;
ekle=(NODE*)malloc(sizeof(NODE));
strcpy(ekle->word,word);
ekle->count = 1;
ekle->next = current;
pre->next = ekle;
}
}
}
void createList(FILE* fp,NODE *n) {
printf("CREATELIST ");
char *word;
strcpy(word,get_word(fp));
puts(word);
while (strcmp(word,"")) {
printf("Create_LİST2");
insert(n,word);
word = get_word(fp);
}
}
NODE *head;
int main(){
NODE *list=NULL;;
FILE *fp;
fp=fopen( "text.txt", "r" );
head=list;
while(!feof(fp)){
createList(fp,list);
}
while(list->next != NULL){
printf("%s", list->word);
}
return 0;
}
A major problem is this line
*str++ = tolower(c);
This changes the pointer str, so when you return str from the function it actually points beyond the string. A string which you, by the way, do not terminate.
Another major problem are these lines:
NODE *ekle;
ekle=(NODE*)malloc(sizeof(NODE));
strcpy(ekle->word,word);
Here you allocate a NODE structure, but you do not allocate memory for ekle->word, so it points to indeterminate memory. You have the above code in two places.
Equal to the above allocation problem, you have
char *word;
strcpy(word,get_word(fp));
Here too you don't allocate memory for word, so you have a pointer to indeterminate memory.
Also, in C you should not cast the return of malloc. You should also look out for warnings from the compiler, and if you don't get any from the code you have then you need to enable more warnings. Compiler warnings are often a sign of undefined behavior which is what all of the above leads to. And finally, next to the compiler I would argue that a debugger is a developers best tool. Learn to use it, it would have helped you with some of the above problems.
Here's one problem:
char c;
do {
c = fgetc(fp);
if (c == EOF)
return 0;
This is wrong; fgetc() returns int, since EOF does not fit in a char. The first line should therefore be:
int c;
Fist you have to verify if the file is correctly open. Then AFAIK the strcpy requires that destination has enough space to store the data (line 74), instead of "char *word" use "char word[255]" for instance (if you know the size limit).
Your main problem is here:
*str++ = tolower(c);
First of all, once you increment str, you no longer hold a pointer to the dynamically allocated memory. Therefore, you will not be able to release that memory at a later point in the execution of your program, which will eventually lead to memory leaks. Second, when you return str at the end of the function, you are not returning a pointer to that string as you're probably hoping to.
Additional problems are:
You are not making sure that no more than 99 characters are stored.
You are not terminating the string pointed by str with a null-character.
You are not de-allocating the string pointed by str if an EOF is encountered.
You are not using an int in order to store the return value of function fgetc.
Here is how your function should look like:
#define MAX_WORD_LEN 101
char* get_word(FILE* fp)
{
char* str = (char*)malloc(sizeof(char)*MAX_WORD_LEN);
int c,i;
do
{
c = fgetc(fp);
if (c == EOF)
{
free(str);
return 0;
}
}
while (!isalpha((char)c));
i = 0;
do
{
str[i++] = tolower((char)c);
c = fgetc(fp);
}
while (isalpha((char)c) && i < MAX_WORD_LEN-1);
str[i] = 0;
return str;
}
Please note that if a word in your file is longer than MAX_WORD_LEN-1 characters, then you will essentially "lose" the last character that was read, because it will not be stored anywhere.
not the segfault, but: you malloc str, without using or freeing it
void insert(NODE* sortedList, char *word) {
printf("INSERT ");
char *str = (char*)malloc(sizeof(char)*100);
Here i write code which may helps you to understand problem. It's not exact what as your program does but somewhat similar and easier to understand and also find your all problems solution from below code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct NODE
{
char word[100];
int count;
struct NODE *next;
}NODE;
NODE *head = NULL;
NODE *list = NULL;
void insert(char *word)
{
if (list == NULL)
{
list = calloc(1, sizeof(NODE));
if (NULL == list)
{
perror("Memory allocation failed");
return;
}
strncpy(list->word, word, 99);
list->count = 1;
list->next = NULL;
head = list;
}
else
{
list->next = calloc(1, sizeof(NODE));
if (NULL == list->next)
{
perror("Memory allocation failed");
return;
}
strncpy(list->next->word, word, 99);
list->next->count = 1;
list->next->next = NULL;
list = list->next;
}
}
void createList(FILE* fp)
{
char word[100] = {0};
while (EOF != fscanf(fp, "%99s", word))
{
if (0 < strlen(word))
{
insert(word);
}
memset(word, 0x00, 100);
}
}
int main()
{
FILE *fp = NULL;
fp = fopen("text.txt", "r");
if (NULL == fp)
{
//File is not readable
perror("text.txt file open failed");
return -1;
}
createList(fp);
list = head;
while(NULL != list)
{
printf("%s\n", list->word);
list = list->next;
}
if (NULL != fp)
{
fclose(fp);fp = NULL;
}
return 0;
}
And also create function to free all memory which is allocated in insert function.

Linked list value being changed [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 8 years ago.
Improve this question
Relevant code:
Constant Def:
#define MAX_NAME_LEN 15 /* Maximum length of any attribute or relation is 15. */
Struct Def:
typedef struct schnode {
char name[MAX_NAME_LEN + 1]; /* The name of the attribute. */
char type; /* The type of the attribute ('S' or 'I'). */
int len; /* The length of the attribute. */
struct schnode *next; /* A pointer to the next node. */
} SCHNODE;
Variable Def:
FILE *schemafile; /* A text file. */
SCHNODE *head = NULL; /* Head of the linked list. */
SCHNODE *ptrnode = NULL; /* A variable node in the linked list. */
SCHNODE *p = NULL; /* A variable node in the linked list. */
SCHNODE *tail = NULL; /* Tail of the linked list. */
char *attr_name; /* Holds the name of the current attribute. */
char *attr_type; /* Holds the current attribute type (S or I). */
int attr_len; /* Holds the current attribute length. */
int attr_loc = 0; /* Attribute location in respect to the tuple. */
int i; /* Index for loops. */
char attr_match = 0; /* 1 if the attribute is valid, 0 otherwise. */
Constant Def:
#define BUFFER_START_SIZE 20
#define BUFFER_INC_SIZE 10
NextLine Function:
char* NextLine (FILE *f) {
int size = BUFFER_START_SIZE;
char *buf; //buffer storing current line as a char array
int i = 0; //index for array
int c; //a character
if (f == NULL){ //check if the file pointer is NULL
printf("Unable to find the file.\n"); fflush(stdout);
return NULL; //nothing to read, so return NULL
}
if((c = getc(f)) == EOF) //check if the first 'char' in the line is EOF
return NULL; //if so, line is empty, so return NULL
if((buf = (char *) malloc(size)) == NULL){ //make room for the buffer
printf("Failed to create buffer.\n"); fflush(stdout); }
buf[0] = (char) c; //put the first char into the buffer string
i = 1; //and increment the index
while(1){
while(i < size){ //loop through the buffer char array
if((c = getc(f)) == '\n'){ //if end of line
buf[i] = '\0'; //instead put '\0' to terminate string
return buf; //and return the completed string
}
buf[i] = (char) c; //otherwise, just keep copying the characters
i++; //increment the index
}
//IF REACHED THIS POINT, LINE IS TOO LONG TO FIT IN BUFFER -> REALLOC
size += BUFFER_INC_SIZE;
if((buf = (char*) realloc(buf,size)) == NULL){ //make buffer bigger
printf("Unable to realloc memory.\n"); fflush(stdout); }
//loop and continue adding chars from where you left off
}
}
Executed Code:
for (i = 0; i < num_attr; i++) {
if ((i > 0) && !attr_match) attr_loc += attr_len;
attr_name = NextLine(schemafile);
attr_name = strtok(attr_name, " \t\n");
attr_type = strtok(NULL, " \t\n");
attr_len = atoi(strtok(NULL, " \t\n"));
/* Construct linked list of schema file data. */
/* If empty: */
if (head == NULL) {
if ((head = (SCHNODE*) malloc(sizeof(SCHNODE))) == NULL) {
fprintf(stderr, "Unable to Malloc space.\n");
return;
}
strcpy(head->name, attr_name);
head->type = attr_type[0];
head->len = attr_len;
head->next = NULL;
}
/* If the list already has a head defined: */
else {
/* Start from the head if tail is NULL (only 1 element) otherwise start from tail. */
if(tail == NULL) ptrnode = head;
else ptrnode = tail;
/* Malloc space for the tail node. */
if ((tail = (SCHNODE*) malloc(sizeof(SCHNODE))) == NULL) {
fprintf(stderr, "Unable to Malloc space.\n");
return;
}
strcpy(tail->name, attr_name);
tail->type = attr_type[0];
tail->len = attr_len;
tail->next = NULL;
/* Insert tail node at the end. */
ptrnode->next = tail;
if (strcmp(tail->name, "Instr") == 0)
p = tail;
if (p != NULL) printf("%d: %s\n", i, p->name);
}
Input:
CName S 25
CId S 8
Instr S 10
Credits I 4
Output:
2: Instr
3: Insur
No other values are changed (as far as I can tell). Can someone explain why this particular value is always altered? (Instr -> Insur). I just want the entry that I read (Instr) to stay the same throughout the entire process of reading.
In your program, the name field of your data structure is defined to be a pointer. Hence, when you strcpy, there will be memory corruption as no space has been allocated to the pointer. Hence, either malloc a space for name for every node or define name to be an array of n elements.

C: malloc() ignoring size requested?

I haven't used C in a long time and apparently I've forgotten more than I thought. While attempting to use malloc() to allocate a string, I keep getting the old data for that string, including it's old, longer length when the requested space is shorter. The circumstance do include the pointer to the string being free()'d and set to NULL. Here is a sample run of what I see in my terminal:
yes, quit, or other (<-message from program)
oooo (<-user input; this will be put to upper case and token'd)
------uIT LENGTH:4 (<-debug message showing length of userInputToken)
preC--tmp: (<-contents of tmp variable)
pstC--tmp:OOOO (<-contents of temp variable)
bad input (<-program response)
yes, quit, or other
yes
------uIT LENGTH:3
preC--tmp:OOOO (<-: tmp = malloc(sizeof(char)*(strlen(userInputToken)-1)); )
pstC--tmp:YESO (<-: strncpy(tmp,userInputToken,strlen(userInputToken)-1); )
bad input
yes, quit, or other
yes
------uIT LENGTH:3
preC--tmp:YESO
pstC--tmp:YESO
bad input
yes, quit, or other
quit
------uIT LENGTH:4
preC--tmp:YESO
pstC--tmp:QUIT (<-: Successful quit because I only did 4 chars; if 5 were used, this would have failed)
As you can see, strlen(userInputToken) gets the correct length and it is used to get the correct number of characters copied – but either free() or malloc() doesn't seem to care about it. I can't figure out what's going on here! Is this a punishment for leaving C for Python?
What's more, the tmp variable should be cleared regardless of free() because it is limited by its scope. Here is the code where everything goes down:
In main.c:
void run() {
outputFlagContainer *outputFlags = malloc(sizeof(outputFlagContainer));
while(true) {
puts("yes, quit, or other");
outputFlags = getUserInput(outputFlags);
if (outputFlags->YES) {
puts("It was a yes!");
} else if (outputFlags->QUIT) {
break;
} else {
puts("bad input");
}
}
free(outputFlags);
}
In messsageParserPieces.h:
outputFlagContainer *getUserInput(outputFlagContainer *outputFlags) {
outputFlags = resetOutputFlags(outputFlags);
char *userInput = NULL;
char user_input[MAX_INPUT];
char *userInputToken = NULL;
char *tmp = NULL;
char *finalCharacterCheck = NULL;
// Tokens to search for:
char QUIT[] = "QUIT";
char YES[] = "YES";
userInput = fgets(user_input, MAX_INPUT-1, stdin);
int i = 0;
while(userInput[i]) {
userInput[i] = toupper(userInput[i]);
i++;
}
userInputToken = strtok(userInput, " ");
if (userInputToken) {
finalCharacterCheck = strchr(userInputToken, '\n');
if (finalCharacterCheck) {
int MEOW = strlen(userInputToken)-1; // DEBUG LINE
printf("\n------uIT LENGTH:%d\n", MEOW); // DEBUG LINE
// The problem appears to happen here and under the circumstances that
// userInput is (for example) 4 characters and then after getUserInput()
// is called again, userInput is 3 characters long.
tmp = malloc(sizeof(char)*(strlen(userInputToken)-1));
if (tmp == NULL) {
exit(1);
}
printf("\npreC--tmp:%s\n", tmp); // This shows that the malloc DOES NOT use the given length.
strncpy(tmp,userInputToken,strlen(userInputToken)-1);
printf("\npstC--tmp:%s\n", tmp); // Copies in the correct number of characters.
userInputToken = tmp;
free(tmp);
tmp = NULL;
}
}
while (userInputToken != NULL) { // NULL = NO (more) tokens.
if (0 == strcmp(userInputToken, YES)) {
outputFlags->YES = true;
} else if (0 == strcmp(userInputToken, QUIT)) {
outputFlags->QUIT = true;
}
userInputToken = strtok(NULL, " ");
if (userInputToken) {
finalCharacterCheck = strchr(userInputToken, '\n');
if (finalCharacterCheck) {
tmp = malloc(sizeof(char)*(strlen(userInputToken)-1));
if (tmp == NULL) {
exit(1);
}
strncpy(tmp,userInputToken,strlen(userInputToken)-1);
userInputToken = tmp;
free(tmp);
tmp = NULL;
}
}
}
return outputFlags;
}
I'm assuming this is some kind of obvious error, but I've tried googling it for about 2 hours tonight. I can't think of how to search this that doesn't bring up a malloc() tutorial – and I have looked at a couple already.
Any insight at all would be greatly appreciated!
tmp = malloc(sizeof(char)*(strlen(userInputToken)-1));
if (tmp == NULL) {
exit(1);
}
printf("\npreC--tmp:%s\n", tmp); // This shows that the malloc DOES NOT use the given length.
strncpy(tmp,userInputToken,strlen(userInputToken)-1);
printf("\npstC--tmp:%s\n", tmp); // Copies in the correct number of characters.
This snippet shows that you expect tmp to be initialised with something. This is not true. You must initialise your memory after allocating it. That's what you do with strncpy.
There's also a problem because you are not allocating enough bytes to hold the string, therefore you cannot display it with a plain %s format specifier. You are allocating strlen(userInputToken)-1 bytes and copying that same number. That means there's no room for a null character, and strncpy will consequently not terminate your string. You should always add one more byte, and if the NULL character will not be copied by strncpy then you must set it yourself:
size_t length = strlen(userInputToken)-1;
tmp = malloc(length + 1);
strncpy(tmp, userInputToken, length);
tmp[length] = 0;
So, just to be clear, you have three issues:
You display the newly allocated 'string' before you initialise it;
You do not allocate enough memory to hold the string
You do not terminate the string (and neither does strncpy because it did not encounter a string terminator within the allowed number of bytes).
I just spotted something else in your while (userInputToken != NULL) loop... You always to a string compare using userInputToken at the beginning of the loop, but inside the loop (and also in the part above the loop) you do this:
userInputToken = tmp;
free(tmp);
That means userInputToken is a dangling pointer. It points to memory that has been freed, and you must NOT use it. You will have to rethink your approach, and allow it to live until it's no longer needed.
You should probably use calloc. You should also not use uninitialized memory like this. Malloc allocates memory in chunks. When you a free a chunk it may be reused. You don't get an exact size and until you mset the memory with malloc there is not guarantee as to what the value of its bytes will be. All you know is that you have a chunk of memory to use that is at least as big as the size you requested. So in this example, you are printing the old contents of the memory chunks before you write to it.
Your validation of allocated length with this line is not correct:
printf("\npreC--tmp:%s\n", tmp); // This shows that the malloc DOES NOT use the given length.
malloc will allocated requested number of bytes, but it does not initialize the allocated memory. So when you try to print it as character string which should be terminating with '\0', it will try to print all characters until it finds '\0' in the memory. The terminating character may not be from the same memory block. Presence of '\0' is non-deterministic.
I hope this helps
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define MAX_INPUT 128
#define true 1
#define false 0
typedef struct _outputFlagContainer{
int YES, QUIT;
}outputFlagContainer;
void run();
outputFlagContainer *getUserInput(outputFlagContainer *outputFlags);
outputFlagContainer *resetOutputFlags(outputFlagContainer *outputFlags);
int main(int argc, char *argv[]){
run();
return 0;
}
void run() {
outputFlagContainer *outputFlags = malloc(sizeof(outputFlagContainer));
while(true) {
puts("yes, quit, or other");
outputFlags = getUserInput(outputFlags);
if (outputFlags->YES)
{
puts("It was a yes!");
}
else if (outputFlags->QUIT)
{
break;
}
else
{
puts("bad input");
}
}
free(outputFlags);
}
outputFlagContainer *resetOutputFlags(outputFlagContainer *outputFlags) {
if(outputFlags!= NULL){
outputFlags->YES = false;
outputFlags->QUIT = false;
}
return outputFlags;
}
outputFlagContainer *getUserInput(outputFlagContainer *outputFlags) {
int len;
char user_input[MAX_INPUT]={0}; // Zero Initialization
char *userInput = NULL;
char *userInputToken = NULL;
char *tmp = NULL;
char *finalCharacterCheck = NULL;
// Tokens to search for: // Immutable Strings
char *QUIT = "QUIT";
char *YES = "YES";
// Reset The Structure
outputFlags = resetOutputFlags(outputFlags);
userInput = fgets(user_input, MAX_INPUT, stdin); // it copies one less than MAX_INPUT
// Converting to Upper Case
int i = 0;
while(userInput[i]) {
userInput[i] = toupper(userInput[i]);
i++;
}
userInputToken = strtok(userInput, " ");
if (userInputToken) {
finalCharacterCheck = strchr(userInputToken, '\n');
if (finalCharacterCheck) {
len = strlen(userInputToken);
printf("\n------uIT LENGTH:%d\n", len); // DEBUG LINE
tmp = malloc(sizeof(char)*(len+1));
if (tmp == NULL)
exit(1);
strncpy(tmp,userInputToken,len);
tmp[len]='\0';
printf("\npstC--tmp:%s\n", tmp); // Copies in the correct number of characters.
strcpy(user_input,tmp);
userInputToken = user_input;
free(tmp);
tmp = NULL;
}
}
while (userInputToken != NULL) { // NULL = NO (more) tokens.
if (0 == strcmp(userInputToken, YES)) {
outputFlags->YES = true;
}
else if (0 == strcmp(userInputToken, QUIT)) {
outputFlags->QUIT = true;
}
userInputToken = strtok(NULL, " ");
if (userInputToken) {
finalCharacterCheck = strchr(userInputToken, '\n');
if (finalCharacterCheck) {
len = strlen(userInputToken);
tmp = malloc(sizeof(char)*(len+1));
if (tmp == NULL) {
exit(1);
}
strncpy(tmp,userInputToken,len);
tmp[len]='\0';
strcpy(user_input,tmp);
userInputToken = user_input;
free(tmp);
tmp = NULL;
}
}
}
return outputFlags;
}

Resources