I am writing a function to read from a text file and extract the strings word by word and store them to a binary search tree. The function should ignore all punctuations and discard duplicate words(only adds to the word frequency).
My problem with the code now is that every time "while (fscanf(fp, "%s", line)!=EOF)" runs, my rootWord gets replaced by the newly read word. I cannot figure out how is it possible for fscanf to be able to do this.
typedef struct word * wordPtr;
typedef struct position * positionPtr;
typedef struct position
{
int position;
positionPtr nextPosition;
} Position;
typedef struct word
{
char * word;
unsigned freq;
positionPtr firstPosition;
wordPtr leftWord;
wordPtr rightWord;
} Word;
typedef struct bstWord
{
wordPtr rootWord;
unsigned wordCount;
} BSTWord;
int BSTCreate(BSTWord* bst, char* fileName)
{
FILE * fp = fopen(fileName,"r");
char line[MAX_WORD_LEN + 1];
int charCount = 0;
char * token;
char delimit[] = "\t\r\n\v\f,.-;:\"\' ";
while (fscanf(fp, "%s", line)!=EOF)
{
wordPtr prev = NULL, curr = bst->rootWord;
wordPtr newWord;
positionPtr newPosition;
int lessThen;
int status = 1;
token = strtok(line, delimit);
charCount = charCount + 1;
while(curr!=NULL)
{
prev = curr;
if(strcmp(token, curr->word)<0)
{
printf("\nless");
lessThen = 1;
curr = curr->leftWord;
status = 1;
}
else if(strcmp(token, curr->word)>0)
{
printf("\nmore");
lessThen = 0;
curr = curr->rightWord;
status = 1;
}
else if(strcmp(token, curr->word)==0) //If word is already in tree, add freq + update position
{
if ( ( newPosition = malloc( sizeof( Position ) ) ) == NULL )
return FAILURE;
newPosition->position = charCount;
newPosition->nextPosition = NULL;
positionPtr prevPosition = NULL, currPosition = curr->firstPosition;
while(currPosition!=NULL)
{
prevPosition = currPosition;
currPosition = currPosition->nextPosition;
}
prevPosition->nextPosition = newPosition;
status = 0;
curr = NULL;
break;
}
}
if(status == 1)
{
if ( ( newWord = malloc( sizeof( Word ) ) ) == NULL )
return FAILURE;
if ( ( newPosition = malloc( sizeof( Position ) ) ) == NULL )
return FAILURE;
newPosition->position = charCount;
newWord->word = token;
newWord->freq = 1;
newWord->firstPosition = newPosition;
newWord->leftWord = NULL;
newWord->rightWord = NULL;
if(bst->rootWord == NULL)
bst->rootWord = newWord;
else
{
if(lessThen)
{
prev->leftWord = newWord;
}
else
{
prev->rightWord = newWord;
}
}
}
bst->wordCount++;
}
fclose(fp);
free(fp);
return SUCCESS;
}
newWord->word = token;
Every token points to the same memory you allocated at:
char line[MAX_WORD_LEN + 1];
You need to allocate additional memory and copy the string there:
newword->word = malloc(strlen(token) + 1);
strcpy(newword->word, token);
Related
I am trying to insert Node to Binary tree. This is my function for creating Node (rest is done).
void BVSCreate_function(TNodef *rootPtr, function_save token) {
TNodef *newPtr = malloc(sizeof(struct tnodef));
if (newPtr == NULL) {
fprintf(stderr, "99");
return;
}
TNodef init;
string initStr;
initStr.str = NULL;
initStr.length = 0;
initStr.alloc = 0;
newPtr = &init;
newPtr->content = &initStr;
newPtr->leftPtr = NULL;
newPtr->rightPtr = NULL;
newPtr->return_type = token.ret_value;
newPtr->parameters = token.param_count;
strCpyStr(newPtr->content, token.content);
rootPtr = newPtr;
}
void BVSInsert_function(TNodef *rootPtr, function_save token) {
if (rootPtr == NULL) {
BVSCreate_function(rootPtr, token);
} else {
if ((strCmpStr(token.content, rootPtr->content)) < 0) {
BVSCreate_function(rootPtr->leftPtr, token);
} else
if ((strCmpStr(token.content, rootPtr->content)) > 0) {
BVSCreate_function(rootPtr->rightPtr, token);
}
}
}
When TNodef and function_save are structs:
typedef struct {
string *content;
int param_count;
int ret_value;
} function_save;
typedef struct tnodef {
string *content;
struct tnodef *leftPtr;
struct tnodef *rightPtr;
int parameters;
int return_type;
} TNodef;
Where string is defined as this struct:
typedef struct {
char *str; // content of string
int length; // length of string
int alloc; // amount of memory allocated
} string;
strCpystr function :
int strCpyStr(string *s1, string *s2) {
int len2 = s2->length;
if (len2 > s1->alloc) {
if (((s1->str) = (char *)realloc(s1->str, len2 + 1)) == NULL) {
return 1;
}
s1->alloc = len2 + 1;
}
strcpy(s1->str, s2->str);
s1->length = len2 + 1;
return 0;
}
I am trying to create a node in binary tree and put there information from struct function_save.
But when I try to print this tree after insert it shows me that tree is still empty.
Your code in BVSCreate_function has undefined behavior because:
newPtr = &init; discards the allocated node and instead uses a local structure that will become invalid as soon as the function returns.
newPtr->content = &initStr; is incorrect for the same reason: you should allocate memory for the string too or possibly modify the TNodeDef to make content a string object instead of a pointer.
Function BVSInsert_function does not return the updated root pointer, hence the caller's root node is never updated. You could change the API, passing the address of the pointer to be updated.
There is also a confusion in BVSInsert_function: it should call itself recursively when walking down the tree instead of calling BVSCreate_function.
Here is a modified version:
/* Allocate the node and return 1 if successful, -1 on failure */
int BVSCreate_function(TNodef **rootPtr, function_save token) {
TNodef *newPtr = malloc(sizeof(*newPtr));
string *newStr = malloc(sizeof(*content));
if (newPtr == NULL || newStr == NULL) {
fprintf(stderr, "99");
free(newPtr);
free(newStr);
return -1;
}
newStr->str = NULL;
newStr->length = 0;
newStr->alloc = 0;
newPtr->content = newStr;
newPtr->leftPtr = NULL;
newPtr->rightPtr = NULL;
newPtr->return_type = token.ret_value;
newPtr->parameters = token.param_count;
strCpyStr(newPtr->content, token.content);
*rootPtr = newPtr;
return 1;
}
int BVSInsert_function(TNodef **rootPtr, function_save token) {
if (*rootPtr == NULL) {
return BVSCreate_function(rootPtr, token);
} else {
if (strCmpStr(token.content, rootPtr->content) < 0) {
return BVSInsert_function(&rootPtr->leftPtr, token);
} else
if ((strCmpStr(token.content, rootPtr->content)) > 0) {
return BVSInsert_function(&rootPtr->rightPtr, token);
} else {
/* function is already present: return 0 */
return 0;
}
}
}
Note also that function strCpyStr may write beyond the end of the allocated area is len2 == s1->alloc, assuming s1->len is the length of the string, excluding the null terminator.
Here is a modified version:
int strCpyStr(string *s1, const string *s2) {
int len2 = s2->length;
if (len2 >= s1->alloc) {
char *newstr = (char *)realloc(s1->str, len2 + 1);
if (newstr == NULL) {
return 1;
}
s1->str = newstr;
s1->alloc = len2 + 1;
}
strcpy(s1->str, s2->str);
s1->length = len2;
return 0;
}
I am coding a local server, I need to parse a file to get the config of the server.
Problem : I have a heap buffer overflow indicated at on the while.
This probeme is shown when I run with -fsanitize but I don't have any trouble without.
Here is the code :
struct container *configParse(FILE *file)
{
char *line = NULL;
size_t n;
char *token = NULL;
char *saveptr = NULL;
struct container *head = NULL;
struct container *container = NULL;
int key = 0;
int first = 1;
while ((getline(&line, &n, file)) != -1)
{
token = strtok_r(saveptr, " =\n\r", &line);
while (token != NULL)
{
if (token[0] == '[')
{
if (first)
{
container = container_init();
container->title = token;
head = container;
first = 0;
}
else
{
container = container_add_back(container);
container = container->next;
container->item = NULL;
container->title = token;
}
key = 0;
}
else
{
if (key == 0)
{
if (container->item == NULL)
{
container->item = items_init();
container->item->key = token;
}
else
{
struct item *itemcpy = container->item;
while (itemcpy->next != NULL)
{
itemcpy = itemcpy->next;
}
itemcpy->next = items_init();
itemcpy->next->key = token;
}
key = 1;
}
else
{
struct item *itemcpy = container->item;
while (itemcpy->next != NULL)
{
itemcpy = itemcpy->next;
}
itemcpy->value = token;
key = 0;
}
}
token = strtok_r(NULL, " =\n\r", &line);
}
}
container_print(head);
printf("\n*****Parsing du .conf*****\n\n");
if (isvalid(head))
printf("Parsing OK\n");
else
{
printf("Parsing KO\n");
return NULL;
}
return head;
}
Thanks in advance.
As explained I try to run the program without -fsanitze, and everything was fine
I have to separate each input I receive into individual digits. Then I have to print all the values I get into the push function.
I have to separate each input I receive into individual digits. Then I have to print all the values I get into the push function.
int main(void)
{
struct Node* first = NULL;
struct Node* second = NULL;
//Take the input from the user
scanf("%d", &b);
int n1;
scanf("%d", &n1);
// I must the use push function with input by one to one
// push(&first, n1);
printf("First List is: ");
printList(first);
// Multiply the two lists and see result
struct Node* result = multiplyTwoLists(first, second);
printf("Resultant list is: ");
printList(result);
return 0;
}
The first questions we must ask are:
How many inputs goes, or for how long the inputs will last?
Should the lists have a minimun size?
In the case some lists are greater than other, what should we do?
Here goes something i Did.
Hope it helps
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<ctype.h>
typedef struct STRUCT_INPUT_LIST {
int inputValue;
struct STRUCT_INPUT_LIST *nextList;
}STRUCT_INPUT_LIST;
void pushInputToList(STRUCT_INPUT_LIST *inputList){
char read[32];
memset(read, 0, sizeof(read));
printf("Give a input\n");
if ( fgets(read, sizeof(read), stdin) == NULL )
return;
strtok(read, "\n\r");
if (strlen(read) > 0 && isdigit(read[0]))
inputList->inputValue = atoi(read);
}
STRUCT_INPUT_LIST *getLastUsedNode(STRUCT_INPUT_LIST *inputList){
STRUCT_INPUT_LIST* wrkList = NULL;
if ( inputList == NULL ) return NULL;
for( wrkList = inputList; wrkList->nextList != NULL; wrkList = wrkList->nextList );
return wrkList;
}
void freeList(STRUCT_INPUT_LIST *inputList){
STRUCT_INPUT_LIST* wrkList = NULL;
for ( wrkList = inputList; wrkList != NULL; ){
STRUCT_INPUT_LIST* lastList = NULL;
lastList = wrkList;
wrkList = wrkList->nextList;
free(lastList);
}
}
int getMinMultipliableSize(STRUCT_INPUT_LIST *listOne, STRUCT_INPUT_LIST *listTwo){
STRUCT_INPUT_LIST* wrkList= NULL;
STRUCT_INPUT_LIST* wrkListTwo= NULL;
int iSize = 0;
for( wrkList = listOne,
wrkListTwo = listTwo;
wrkList != NULL &&
wrkListTwo != NULL;
wrkList = wrkList->nextList,
wrkListTwo = wrkListTwo->nextList ){
iSize++;
}
return iSize;
}
void multiplyLists(STRUCT_INPUT_LIST *listOne, STRUCT_INPUT_LIST *listTwo,STRUCT_INPUT_LIST *resultList, int times){
STRUCT_INPUT_LIST* wrkList= NULL;
STRUCT_INPUT_LIST* wrkListTwo= NULL;
STRUCT_INPUT_LIST* wrkRsl= NULL;
int i = 0;
for( wrkList = listOne,
wrkListTwo = listTwo,
wrkRsl = resultList;
i < times;
wrkList = wrkList->nextList,
wrkListTwo = wrkListTwo->nextList,
wrkRsl = wrkRsl->nextList, i++ ){
wrkRsl->inputValue = wrkList->inputValue * wrkListTwo->inputValue;
}
return ;
}
void outputList(STRUCT_INPUT_LIST *inputList, char *listName){
STRUCT_INPUT_LIST* wrkList= NULL;
printf("==========================\n");
printf("+++ Listing Inputs of %s:\n", listName);
for( wrkList = inputList; wrkList != NULL; wrkList = wrkList->nextList ){
printf("[%d] ", wrkList->inputValue);
}
printf("+++ End of Inputs\n");
printf("==========================\n");
}
int main(void)
{
STRUCT_INPUT_LIST* firstList = NULL;
STRUCT_INPUT_LIST* wrkList = NULL;
STRUCT_INPUT_LIST* secondList = NULL;
STRUCT_INPUT_LIST* rslList = NULL;
int stop = 0;
int invalidanswer = 0;
int minMultipliable = 0;
char read[32];
firstList = (STRUCT_INPUT_LIST *) malloc (sizeof(STRUCT_INPUT_LIST));
secondList = (STRUCT_INPUT_LIST *) malloc (sizeof(STRUCT_INPUT_LIST));
memset(firstList, 0, sizeof(STRUCT_INPUT_LIST)); firstList->nextList = NULL;
memset(secondList, 0, sizeof(STRUCT_INPUT_LIST)); secondList->nextList = NULL;
wrkList = firstList;
do{
pushInputToList(wrkList);
memset(read, 0, sizeof(read));
do{
invalidanswer = 0;
printf("Read more inputs to the (first) list, to the (second) or (stop) ?\n");
if ( fgets(read, sizeof(read), stdin) == NULL )
return -1;
strtok(read, "\n\r");
if ( !strcasecmp(read, "first") ){
wrkList = getLastUsedNode(firstList);
}
else if ( !strcasecmp(read, "second") ){
wrkList = getLastUsedNode(secondList);
}
else if ( !strcasecmp(read, "stop") ){
stop = 1;
break;
}
else{
printf("invalid input\n\n");
invalidanswer = 1;
continue;
}
if ( wrkList->inputValue == 0 )
break;
wrkList->nextList = (STRUCT_INPUT_LIST *) malloc (sizeof(STRUCT_INPUT_LIST));
wrkList = wrkList->nextList;
memset(wrkList, 0, sizeof(STRUCT_INPUT_LIST)); wrkList->nextList = NULL;
} while ( invalidanswer );
} while ( !stop );
minMultipliable = getMinMultipliableSize(firstList, secondList);
{
int i;
STRUCT_INPUT_LIST *wrkRsl;
rslList = (STRUCT_INPUT_LIST *) malloc (sizeof(STRUCT_INPUT_LIST));
memset(rslList, 0, sizeof(STRUCT_INPUT_LIST));
wrkRsl = rslList;
for ( i = 0 ; i < (minMultipliable -1) ; i++){
wrkRsl->nextList = (STRUCT_INPUT_LIST *) malloc (sizeof(STRUCT_INPUT_LIST));
wrkRsl = wrkRsl-> nextList;
memset(wrkRsl, 0, sizeof(STRUCT_INPUT_LIST));
}
wrkRsl->nextList = NULL;
}
multiplyLists(firstList, secondList, rslList, minMultipliable);
outputList(firstList, "firstList");
outputList(secondList, "secondList");
outputList(rslList, "multipliedList");
freeList(firstList);
freeList(secondList);
freeList(rslList);
return 0;
}
I am new to C and am trying to code up a data structure, primarily, a ternary search tree. I am working under the assumption (for now) that valid char inputs are being passed in. I am having some issues with my insert function. Note that I am also inserting the original string in the last TSTnode where the last character of str will also be held.
Here is what I have so far
struct TSTnode {
char* word; // NULL if no word ends here
char self;
struct TSTnode *left, *sub, *right;
};
int insert_tst(struct TSTnode** tree, const char* str) {
return _insert(tree, str, 0);
}
int _insert(struct TSTnode** tree, const char* str, int position) {
if((*tree) == NULL) {
*tree = new_tst_node(*(str+position));
position = position + 1;
if(*(str+position) == '\0') {
(*tree)->word = strcpy((*tree)->word,str);
return 1;
}
}
else if ((*tree)->self > *(str+position)) {
position = position + 1;
_insert( &((*tree)->left), str, position);
}
else if ((*tree)->self < *(str+position)) {
position = position + 1;
_insert( &((*tree)->right), str, position);
}
else {
position = position + 1;
_insert( &((*tree)->sub), str, position);
}
return 0;
}
struct TSTnode* new_tst_node(char self) {
struct TSTnode* newNode = (struct TSTnode*) malloc(sizeof(struct
TSTnode));
if (newNode == NULL) {
return NULL;
}
newNode->word = NULL;
newNode->self = self;
newNode->left = NULL;
newNode->right = NULL;
newNode->sub = NULL;
return newNode;
}
Here is how I am testing:
struct TSTnode* tree = NULL;
char* words[1] = {"hello"};
for (int i = 0; i < 1; i++) {
if (insert_tst(&tree, words[i]) == 0) {
//print some error
}
else { //success }
EDIT - My issue is that none of my conditional branches are being taken and the insert function simply goes straight to return 0.
Note: You confusingly use tree for both TSTnode* and TSTnode**. I'm going to use tree_ptr for the latter, and pretend that you did the same.
Your claim is false. The body of if((*tree_ptr) == NULL) is executed. You do have a number of problems, though.
You don't handle the case where *tree_ptr == NULL && *(str+position+1) != '\0'.
You don't correctly handle the case where *tree_ptr != NULL && *(str+position+1) == '\0'.
You always return 0 when *tree_ptr != NULL || str[1] != '\0'.
You never allocate word, but you deference it. The thing is, you shouldn't be storing the string again anyway!
You don't handle the case where str[0] == '\0' (empty string).
Fixed:
int insert_tst(struct TSTnode** tree_ptr, const char* str) {
if (!*str)
return 0; /* Zero-length strings are not supported. */
return insert_tst_helper(tree_ptr, str, 0);
}
int insert_tst_helper(struct TSTnode** tree_ptr, const char* str, int position) {
if (*tree_ptr == NULL) {
*tree_ptr = new_tst_node(*(str+position));
if (*tree_ptr == NULL)
return 0; /* Memory allocation error. */
}
if (*(str+position+1) == '\0') { /* If the next char is a NUL */
(*tree_ptr)->is_word = 1;
return 1;
}
else if ((*tree_ptr)->self > *(str+position)) {
position = position + 1;
return insert_tst_helper( &((*tree_ptr)->left), str, position);
}
else if ((*tree_ptr)->self < *(str+position)) {
position = position + 1;
return insert_tst_helper( &((*tree_ptr)->right), str, position);
}
else {
position = position + 1;
return insert_tst_helper( &((*tree_ptr)->sub), str, position);
}
}
Untested.
Let's clean this up, though.
*(str+position)simplifies tostr[position]
ch == '\0'simplifies toch == 0then to!ch
position = position + 1; return insert_tst_helper(..., str, position);simplifies to++position; return insert_tst_helper(..., str, position);then toreturn insert_tst_helper(..., str, position+1);then toreturn insert_tst_helper(..., str+1, 0);then toreturn insert_tst(..., str+1);
Why is recursion being used at all???
Fixed:
int insert_tst(struct TSTnode** tree_ptr, const char* str) {
if (!*str)
return 0; /* Zero-length strings are not supported. */
while (1) {
if (*tree_ptr == NULL) {
*tree_ptr = new_tst_node(*str);
if (*tree_ptr == NULL)
return 0; /* Memory allocation error. */
}
if (!*(str+1)) { /* If the next char is a NUL */
(*tree_ptr)->is_word = 1;
return 1;
}
int cmp = *str - (*tree_ptr)->self;
if (cmp < 0) { tree_ptr = &( (*tree_ptr)->left ); }
else if (cmp > 0) { tree_ptr = &( (*tree_ptr)->right ); }
else { tree_ptr = &( (*tree_ptr)->sub ); }
++str;
}
}
Untested.
I want to make list made of lists and the inner list is made of items.
Union of datatype:
typedef union s_datatype {
int t_int;
char* t_char;
double t_double;
bool t_bool;
} t_datatype;
Structure of item:
typedef struct s_token {
int y;
int type;
t_datatype value;
struct s_token *next;
} t_token;
Structure of inner list:
typedef struct s_line {
int x;
int depth;
int type;
int number_of_tokens;
t_token*head;
struct s_line *next;
} t_line;
Structure of the final list:
typedef struct s_tokenized_code {
int number_of_lines;
t_line*head;
} t_tokenized_code;
Let's say that I want to add new "line" and into that line, I want to insert the "token". But I don't know, how to put this together. Can you help me? I'm not sure how to alloc this and how to work with this list of lists.
EDIT: Structure modified
Solved.
If you want to know, how to (i want to insert items to the end of list, not beginning):
t_token*init_token (int y, int type, t_datatype value) {
t_token*token = malloc(sizeof(struct s_token));
if (token == NULL) {
return NULL;
}
token->y = y;
token->type = type;
token->value = value;
token->next = NULL;
return token;
}
t_line*init_line (int x, int depth, int type) {
t_line*line = malloc(sizeof(struct s_line));
if (line == NULL) {
return NULL;
}
line->x = x;
line->depth = depth;
line->type = type;
line->number_of_tokens = 0;
line->head = NULL;
line->next = NULL;
return line;
}
t_tokenized_code*init_code (void) {
t_tokenized_code*code = malloc(sizeof(struct s_tokenized_code));
if (code == NULL) {
return NULL;
}
code->number_of_lines = 0;
code->head = NULL;
return code;
}
void insert_token (t_line*line, t_token*token) {
if (token != NULL) {
if (line->head == NULL){
line->head = token;
line->number_of_tokens += 1;
return;
}
t_token*tmp = line->head;
while (tmp->next != NULL) {
tmp = tmp->next;
}
tmp->next = token;
line->number_of_tokens += 1;
}
}
void insert_line (t_tokenized_code*code, t_line*line) {
if (line != NULL) {
if (code->head == NULL) {
code->head = line;
code->number_of_lines += 1;
return;
}
t_line*tmp = code->head;
while (tmp->next != NULL) {
tmp = tmp->next;
}
tmp->next = line;
code->number_of_lines += 1;
}
}
void free_code (t_tokenized_code*source) {
if (source == NULL) {
return;
}
t_line*line;
t_token*token;
while ((line = source->head) != NULL) {
while ((token = line->head) != NULL) {
line->head = token->next;
free(token);
}
source->head = line->next;
free(line);
}
free(source);
}
Let's make a simple test - Adding 2 tokens to the first line and 3 tokens to the second line:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
int main (void) {
t_tokenized_code*source = init_code();
t_line*line = init_line(0,0,3);
if (line == NULL) {
return 1;
}
insert_line(source, line);
t_datatype tt;
tt.t_char = "first";
t_token*token = init_token(0,6, tt);
if (token == NULL) {
return 1;
}
insert_token(line, token);
tt.t_char = "second";
token = init_token(1,6, tt);
insert_token(line, token);
line = init_line(1,0,3);
insert_line(source, line);
tt.t_char = "third";
token = init_token(0,6, tt);
insert_token(line, token);
tt.t_char = "fourth";
token = init_token(1,6, tt);
insert_token(line, token);
tt.t_char = "fifth";
token = init_token(2,6, tt);
insert_token(line, token);
line = source->head;
while (line != NULL) {
printf("****\n");
token = line->head;
while (token != NULL) {
printf("%s\n", token->value.t_char);
token = token->next;
}
line = line->next;
}
free_code (source);
return 0;
}
The output will be following:
****
first
second
****
third
fourth
fifth