segmentation fault in a linked list while loop? - c

I'm trying to setup a graph in C. I tried the graph with user input and it works perfectly. However, i am trying to implement a read from file. The last else statement is where the error is coming from because when i commented it out it compiles without any problems. I have included a comment over the block i think that has the problem. Please let me know if there is anything else needed for this question.
#include <stdio.h>
#include <stdlib.h>
struct node{
int data;
struct node* next;
};
//int counter and mainVertex would be used to determine if graph is connected.
// void graphConnection(){
//
//
//
//
//
//
// }
char* deblank(char* input)
{
int i,j;
char *output=input;
for (i = 0, j = 0; i<strlen(input); i++,j++)
{
if (input[i]!=' ')
output[j]=input[i];
else
j--;
}
output[j]=0;
return output;
}
struct node *G[1000];
int counter = 0;
char *mainVertex;
void readingEachLine(){
FILE * fp;
char * line = NULL;
size_t len = 0;
ssize_t read;
//Read file and exit if fail
fp = fopen("test.txt", "r");
if (fp == NULL)
exit(EXIT_FAILURE);
while ((read = getline(&line, &len, fp)) != -1) {
line = deblank(line);
int i = 0;
struct node* cursor = malloc(sizeof(struct node));
struct node* secondcursor = malloc(sizeof(struct node));
struct node* tempitem;
while(line[i] != '\n'){
//If its the first of the line look into the array and set struct cursor to the corresponding
//array position
if (i == 0){
mainVertex[counter] = line[0];
int convertor = line[i] - '0';
cursor = G[convertor];
counter++;
}
//if its not the first, then set a struct with that number as data
else{
tempitem = malloc(sizeof(struct node));
int convertor = line[i] - '0';
tempitem->data = convertor;
tempitem->next = NULL;
}
//if there is no element connected to the struct in array, connect the tempitem
if (cursor->next == NULL){
cursor->next = tempitem;
}
//If there are already connected elements, loop until the end of the linked list
//and append the tempitem
//ERROR: I GET SEGMENTATION FAULT FROM HERE. TRIED AFTER COMMENTING IT OUT
else{
secondcursor = cursor;
while(secondcursor->next != NULL){
secondcursor = secondcursor->next;
}
secondcursor->next = tempitem;
}
i++;
}
printf("\n");
}
}
int main(void){
for (int i = 1; i < 1000; i++)
{
G[i]= malloc(sizeof(struct node));
G[i]->data = i;
G[i]->next = NULL;
}
readingEachLine();
}
EDIT: This is how the text file looks like:
1 3 4
2 4
3 1 4
4 2 1 3

Your code has several misconceoptions:
Apparently, you can have a maximum of 1,000 nodes. You have an array G of 1,000 head pointers to linked lists. Don't allocate memory for all 1,000 nodes at the beginning. At the beginning, all lists are empty and an empty linked list is one that has no node and whose head is NULL.
In your example, cursor is used to iterate oer already existing pointers, so don't allocate memory for it. If you have code like this:
struct node *p = malloc(...);
// next use of p:
p = other_node;
you shouldn't allocate. You would overwrite p and lose the handle to the allocated memory. Not all pointers have to be initialised with malloc; allocate only if you create a node.
Your idea to strip all spaces from a line and then parse single digits will fail if you ever have more then 9 nodes. (But you cater for 1,000 node.) Don't try to parse the numbers yourself. There are library functions for that, for example strtol.
It is not clear what mainVertex is supposed to be. You use it only once, when you assign to it. You treat it like an array, but it is a global pointer, initialised to NULL. When you dereference it, you get undefined behaviour, which is where your segmentation fault probably comes from.
Here's a program that does what you want to do. (It always inserts nodes at the head for simplicity and it should have more allocation checks.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
enum {
maxNodes = 1000
};
struct node{
int data;
struct node* next;
};
struct node *G[maxNodes];
size_t nnode = 0;
int read_graph(const char *fn)
{
FILE * fp;
char * line = NULL;
size_t len = 0;
fp = fopen(fn, "r");
if (fp == NULL) return -1;
while (getline(&line, &len, fp) != -1) {
char *p;
char *end;
int id;
int n;
id = strtol(line, &end, 10);
if (end == line) continue;
if (id < 1 || id > maxNodes) break;
if (id > nnode) nnode = id;
id--;
p = end;
n = strtol(p, &end, 10);
while (p != end) {
struct node *nnew = malloc(sizeof(*nnew));
nnew->data = n - 1;
nnew->next = G[id];
G[id] = nnew;
p = end;
n = strtol(p, &end, 10);
}
}
fclose(fp);
free(line);
return 0;
}
int main(void)
{
if (read_graph("test.txt") < 0) {
fprintf(stderr, "Couldn't gread raph.\n");
exit(1);
}
for (int i = 0; i < nnode; i++) {
struct node *p = G[i];
if (p) {
printf("%d:", i + 1);
for (; p; p = p->next) {
printf(" %d", p->data + 1);
}
puts("");
}
}
for (int i = 0; i < nnode; i++) {
struct node *p = G[i];
while (p) {
struct node *old = p;
p = p->next;
free(old);
}
}
return 0;
}

Related

Creating a singly linked list from a .txt file and reversing odd numbers of each line in C

I have a project about linked lists but I'm having a hard time doing it. The teacher wants me to read a .txt file and create singly linked list from it. After that, I need to reverse odd numbers of every line. Then print it. Here is the code which I used for printing the linked list. But I need help to reverse the odd numbers of each line.
This is the code which I used to print the list:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct list {
char *string;
struct list *next;
};
typedef struct list LIST;
int main(void) {
FILE *fp;
char line[10];
LIST *current, *head;
head = current = NULL;
fp = fopen("data.txt", "r");
while(fgets(line, sizeof(line), fp)){
LIST *node = malloc(sizeof(LIST));
node->string = strdup(line);
node->next =NULL;
if(head == NULL){
current = head = node;
} else {
current = current->next = node;
}
}
fclose(fp);
for(current = head; current ; current=current->next){
printf("%s", current->string);
}
return 0;
}
Here is the content of the .txt file:
10
9,6,11,7,12,18,19,14,15,13
13,14,9,12,15,3,18,20,1,2
4,11,8,17,12,15,20,10,3,16
19,4,11,1,13,17,12,16,20,18
1,6,20,11,13,9,7,16,10,2
12,4,11,16,3,20,9,19,17,15
20,3,10,12,18,2,5,14,15,16
18,19,15,2,6,9,1,3,17,4
7,6,20,1,11,4,3,5,8,16
1,2,16,13,17,10,12,9,4,15
"But I need help to reverse the odd numbers of each line."
There are several other parts that need to be considered before this step can be developed.
Following are suggestions for a functions approach implementation using your problem description. A few items are simply suggestions to simplify the existing code. And a few other steps, are not mentioned as necessary, but should be considered:
Since you are not mandated to use char *string; in your problem description, choose to use a reasonable string length variable that does not require an additional layer of dynamic allocation, such as char string[260]; (or even smaller to fit your input file.) This will greatly simplify the code.
Because the input file is sized with lines ~30 char long, declare the variable line to be at least large enough to contain one line, eg 80 would allow larger values, and still allow enough space, but since memory is cheap, go with the same size as is used in the string member of your linked list.
Move the work of populating each new node to a function. It also will greatly simplify the program, and provide greater readability. Eg: void insert(LIST **head_ref, char *str);
Always test the return of fopen() before attempting to use the file descriptor.
To manipulate the contents of each odd row (eg 1, 3, 5, 7, 9), as numbers, the contents of each line read in from a file as a string, needs to first be converted to a collection of numbers. This suggests an additional member be added to the struct. For example int num[10].
The previous observation implicitly suggests the need of an additional function to parse and convert each comma delimited string into discrete integer values. Perhaps with the prototype: void parseIntArray(LIST **list);
The next and final task also suggests an additional function to reverse the contents of selected array member integer arrays. This one might use a prototype such as: void reverse_odd(LIST **list, size_t size);
Finally, because each node of LIST created required dynamically allocated memory, once finished using LIST, the memory must be given back to the OS to prevent memory leaks. An additional function to do this could be prototyped: void freeList(LIST **head);
Following are the main() function and preceding support declarations etc. It is intended here to illustrate the above suggested steps, and the benefits of breaking down a bigger problem into smaller problems, then implementing each smaller solution to support the whole. Benefits include for example readability and maintainability and potential re-use of code-base, (Note the similarity of argument lists in each supporting function.):
#define MAX_STRLEN 260 //use mnemonic values to avoid magic numbers in code
struct list {
char string[MAX_STRLEN];
int arr[10];
struct list *next;
};
typedef struct list LIST;
//Prototypes of 'smaller' solutions
void insert(LIST **head_ref, char *str);
void parseIntArray(LIST **list);
void reverse_odd(LIST **list, size_t size);
void freeList(LIST **head);
int main(void)
{
FILE *fp;
char line[MAX_STRLEN];
LIST *current, *head;
char *convPtr = NULL;
head = current = NULL;
fp = fopen("data.txt", "r");
if(fp)
{
//consume 1st line
if(fgets(line, sizeof(line), fp));//10
{
sizeArray = strtol(line, &convPtr, 10);
if(errno != ERANGE)
{
while(fgets(line, sizeof(line), fp))
{
//(see implementations of each below)
//create new node, insert num string
insert(&current, line);
//convert new->string to integers, place in new->array
parseIntArray(&current);
//reverse 'odd' contents of each array
reverse_odd(&current, sizeArray);
}
}else{//handle error and leave}
}
fclose(fp);
}else{//handle error and leave}
//At this point in code, entire file is captured into nodes of list.
//use list as needed
//When finished using list, memory must be freed to prevent memory leaks
head = current;
freeList(&head);
return 0;
}
The remaining code segments are the function implementations used above:
void freeList(LIST **head)
{
LIST *tmp;
while (*head != NULL)
{
tmp = (*head);
(*head) = (*head)->next;
free(tmp);
}
}
//create new node, insert num string
void insert(LIST **head_ref, char *str)
{
int *arr = malloc(numNodes * sizeof(*arr));
//allocate node
LIST* new = calloc(1, sizeof(*new));
//put in the data
strcpy(new->string, str);
//Make next of new node as head
new->next = (*head_ref);
//Move the head to point to the new node
(*head_ref) = new;
}
//convert new->string to integers, place in list->array
void parseIntArray(LIST **list)
{
char *tok = NULL;
int i = 0;
int tmp = 0;
char *sArray = strdup((*list)->string);
tok = strtok(sArray, ",\n ");
while(tok)
{
errno = 0;
tmp = atoi(tok);
if(errno == ERANGE)
{
printf("Error converting string to number\nExiting.");
return;
}
(*list)->arr[i] = tmp;
i++;
tok = strtok(NULL, ",\n ");
}
}
//reverse 'odd' contents of list->array
void reverse_odd(LIST **list, size_t size)
{
int *ptr = &((*list)->arr[0]);
int *tmp = malloc(size * sizeof(*tmp));
memset(tmp, -1, size*sizeof(*tmp));
for(int i=0;i<size;i++)
{
if(ptr[i]%2 != 0)
tmp[size-1-i] = ptr[i];
}
for(int i=0;i<size;i++)
{
if(tmp[i] < 0)
{
while((*ptr)%2 != 0 ) ptr++;
tmp[i] = *ptr;
ptr++;
}
}
memcpy((*list)->arr, tmp, size*sizeof(int));
}
This hope this code will do the job.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct line {
struct num *first;
struct line *next;
} LineNode;
typedef struct num {
int num;
int order;
struct num *next;
} NumNode;
int main() {
FILE *fp;
char ch;
int counter = 0;
NumNode *curr_num, *even_ptr, *odd_ptr, *odd_head, *even_head;
LineNode *curr_line, *line_head;
curr_num = even_head = odd_head = even_ptr = odd_ptr = NULL;
line_head = curr_line = NULL;
fp = fopen("data.txt", "r");
if (fp == NULL)
{
return 1;
}
ch = fgetc(fp);
while(ch != EOF){
if (ch >= 48 && ch <= 57)
{
int n = 0;
while (ch != EOF && ch != '\n' && ch >= 48 && ch <= 57)
{
int x = ch - 48;
n = n * 10 + x;
ch = fgetc(fp);
}
NumNode *node = malloc(sizeof(NumNode));
node->num = n;
node->order = counter;
node->next =NULL;
if (n % 2 == 0){
if(even_head == NULL){
even_head = even_ptr = node;
} else {
even_ptr = even_ptr->next = node;
}
}else{
if(odd_head == NULL){
odd_head = node;
} else {
node->next = odd_head;
odd_head = node;
}
}
counter++;
}
if (ch == '\n' || ch == EOF)
{
NumNode *num_node, *head;
num_node = head = NULL;
even_ptr = even_head;
odd_ptr = odd_head;
counter = 0;
if (even_head != NULL && even_head->order == counter){
head = num_node = even_ptr;
even_ptr = even_ptr->next;
} else {
head = num_node = odd_ptr;
odd_ptr = odd_ptr->next;
}
counter++;
while (even_ptr != NULL)
{
if (even_ptr->order == counter) {
num_node = num_node->next = even_ptr;
even_ptr = even_ptr->next;
}
else if (odd_ptr != NULL) {
num_node = num_node->next = odd_ptr;
odd_ptr = odd_ptr->next;
}
counter++;
}
while (odd_ptr != NULL)
{
num_node = num_node->next = odd_ptr;
odd_ptr = odd_ptr->next;
}
LineNode *node = malloc(sizeof(LineNode));
node->next =NULL;
node->first = head;
if (line_head == NULL)
line_head = curr_line = node;
else
curr_line = curr_line->next = node;
odd_head = even_head = NULL;
counter = 0;
}
ch = fgetc(fp);
}
fclose(fp);
for(curr_line = line_head; curr_line != NULL ; curr_line=curr_line->next) {
for(curr_num = curr_line->first; curr_num != NULL ; curr_num=curr_num->next) {
printf("%d", curr_num->num);
if (curr_num->next != NULL)
printf(",");
}
printf("\n");
}
return 0;
}

Problems regarding an multiple choice question program

I have created a program to generate the result of a multiple choice exam. The program was supposed to show the total number of mistakes, blank answers and the number of the question which were answered incorrectly. For the following input:
6
1..223
(Here . means blank answer)
123124
The output was supposed to be:
Your result:
Mistakes: 3
Blanks: 2
Your mistakes are following:
4 5 6
Your blanks are following:
2 3
But the code shows undefined behavior. It seems to go through infinite loop. Expecting solution to my problem shortly. Thanks in advance.
#include <stdio.h>
#include <stdlib.h>
typedef struct node
{
char data;
struct node* next;
}node;
void printNode(node* head)
{
node* local = head;
int i = 0;
if(local -> data == 0)
{
printf("0");
return;
}
while(local != NULL)
{
if(i == 3)
{
i = 0;
printf("\n");
}
printf("%d\t", local -> data);
local = local -> next;
++i;
}
}
void freeNode(node** head)
{
node* temp = (*head);
while((*head) != NULL)
{
(*head) = (*head) -> next;
free(temp);
temp = (*head);
}
}
int main()
{
int n, i, flagB, flagM, blnk, mstk;
blnk = mstk = flagB = flagM = 0;
printf("Enter the number of questions: ");
scanf("%d", &n);
char ques[n], ans[n];
if(n == 0)
return 0;
node* headM = (node*)malloc(sizeof(node));
node* nodeM;
node* headB = (node*)malloc(sizeof(node));
node* nodeB;
printf("Enter your given answers: ");
fflush(stdin);
for(i = 0; i < n; ++i)
{
scanf("%c", &ques[i]);
}
fflush(stdin);
ques[n] = '\0';
printf("Enter the solution: ");
for(i = 0; i < n; ++i)
{
scanf("%c", &ans[i]);
}
ans[n] = '\0';
for(i = 0; i < n; ++i)
{
if(ques[i] == '.')
{
++blnk;
if(flagB == 0)
{
headB -> data = i + 1;
headB -> next = NULL;
nodeB = headB;
continue;
}
nodeB -> next = (node*)malloc(sizeof(node));
nodeB = nodeB -> next;
nodeB -> data = i + 1;
nodeB-> next = NULL;
flagB = 1;
}
else if(ques[i] != ans[i])
{
++mstk;
if(flagM == 0)
{
headM -> data = i + 1;
headM -> next = NULL;
nodeM = headM;
continue;
}
nodeM -> next = (node*)malloc(sizeof(node));
nodeM = nodeM -> next;
nodeM -> data = i;
nodeM-> next = NULL;
flagM = 1;
}
}
printf("Your result:\n\tMistakes: %d\n\tBlanks: %d\n", mstk, blnk);
printf("Your mistakes are follwing:\n");
printNode(headM);
printf("\nYour blanks are follwing:\n");
printNode(headB);
freeNode(&headM);
freeNode(&headM);
return 0;
}
Here are some additional thoughts. What makes your code very convoluted and hard to debug and keep the logic straight is you are mixing your linked-list Add function within the logic of your blanks and mistakes and using special conditions to handle adding the first node and subsequent nodes. This make things difficult to test and debug. If you need to add nodes to a linked-list, then write an add() function that you can thoroughly test and debug before putting it to use in your code.
Your VLAs ques and ans are too short to hold a string of n characters, at minimum they must be n + 1 characters long to provide storage for the nul-termining character that marks the end of the string. Ideally, you will make them at least 2-character longer to also hold the '\n' which will allow you to take input with fgets() rather than looping scanf() a character at a time -- which is just nuts.
You do not need to pass the address of the pointer to freeNode() simply pass a pointer. Sure freeNode() will receive a copy of the pointer -- but it will contain the original address -- and since you don't have to make any changes to that pointer available back to the caller, there is no need to pass the address of the pointer (there won't be any list left to worry about when you are done...)
So putting those pieces together, adding an add() function to add to your linked lists (See Linus on Understanding Pointers for why a pointer-to-pointer is used to iterate to the end), and adding a simple empty_stdin() function to remove the '\n' left in stdin from reading n with scanf() before making calls to fgets() later for ques and ans, you could do:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* simple function to empty stdin to end-of-line */
void empty_stdin (void)
{
int c = getchar();
while (c != '\n' && c != EOF)
c = getchar();
}
typedef struct node
{
int data;
struct node *next;
} node;
node *add(node **head, int v)
{
node **ppn = head, /* pointer to pointer to head */
*pn = *head, /* pointer to head */
*newn = malloc (sizeof *newn); /* allocate new node */
if (!newn) { /* validate allocation */
perror ("malloc-node");
return NULL;
}
newn->data = v; /* initialize members values */
newn->next = NULL;
while (pn) { /* iterate to end of list */
ppn = &pn->next;
pn = pn->next;
}
return *ppn = newn; /* add & return new node */
}
void printNode (node *head)
{
for (; head; head = head->next)
printf (" %d", head->data);
putchar ('\n');
}
void freeNode(node *head)
{
while (head != NULL)
{
node *victim = head;
head = head->next;
free(victim);
}
}
int main()
{
int n, i, blnk, mstk;
blnk = mstk = 0;
node *headM = NULL; /* declare pointers and initialize NULL */
node *headB = NULL;
printf ("Enter the number of questions: ");
/* you must VALIDATE every user-input */
if (scanf ("%d", &n) != 1) {
fputs ("error: invalid integer input.\n", stderr);
return 1;
}
empty_stdin(); /* remove '\n' (and any other chars from user) */
/* before calling fgets() below */
if (n == 0) /* check 0 BEFORE VLA declaration */
return 0;
char ques[2*n], ans[2*n]; /* declare question/answer VLAs, don't skimp */
printf("Enter your given answers: ");
if (!fgets(ques, sizeof ques, stdin)) /* read ques from stdin */
return 1;
ques[strcspn(ques, "\r\n")] = 0; /* trim '\n' from end of ques */
printf("Enter the solution: ");
if (!fgets(ans, sizeof ans, stdin)) /* read ans from stdin */
return 1;
ans[strcspn(ans, "\r\n")] = 0; /* ditto for ans */
for(i = 0; i < n; ++i) /* loop n times */
{
if(ques[i] == '.') /* if blank */
{
add (&headB, i + 1); /* add to list headB */
++blnk; /* increment counter */
}
else if(ques[i] != ans[i]) /* if mistake */
{
add (&headM, i + 1); /* add to list headM */
++mstk; /* increment counter */
}
}
printf ("Your result:\n\tMistakes: %d\n\tBlanks: %d\n"
"Your mistakes are following:\n", mstk, blnk);
printNode(headM);
printf("\nYour blanks are following:\n");
printNode(headB);
freeNode(headM); /* no need to pass the address of the pointer to free */
freeNode(headB); /* there won't be a list left when freeNode is done */
return 0;
}
There is a lot there, so go through it slowly.
Example Use/Output
$ ./bin/llquestions
Enter the number of questions: 6
Enter your given answers: 1..223
Enter the solution: 123124
Your result:
Mistakes: 2
Blanks: 2
Your mistakes are following:
4 6
Your blanks are following:
2 3
(note: in 1..223 and 123124, 5 is not a mistake, the 2 is in the correct position at the end)
Look things over and let me know if you have further questions.
I made some changes to this code, check this out.
#include <stdio.h>
#include <stdlib.h>
typedef struct Node node;
struct Node
{
int data;
struct Node * next;
};
void printNode(node *head)
{
node *local = head;
while (local != NULL)
{
printf("%d ", local->data);
local = local->next;
}
}
void freeNode(node **head)
{
node *temp = (*head);
while ((*head) != NULL)
{
(*head) = (*head)->next;
free(temp);
temp = (*head);
}
}
int main()
{
int n, i, flagB = 0, flagM = 0, blnk = 0, mstk = 0;
blnk = mstk = flagB = flagM = 0;
printf("Enter the number of questions: ");
scanf("%d", &n);
char ques[n], ans[n];
if (n == 0)
return 0;
node *headM = (node*) malloc(sizeof(node));
headM->data = 0;
node *nodeM = headM;
node *headB = (node*) malloc(sizeof(node));
headB->next = 0;
node *nodeB = headB;
printf("Enter your given answers: ");
for (i = 0; i < n; ++i)
{
scanf("%s", &ques[i]);
}
ques[n] = '\0';
fflush(stdin);
printf("Enter the solution: ");
for (i = 0; i < n; ++i)
{
scanf("%s", &ans[i]);
}
ans[n] = '\0';
fflush(stdin);
for (i = 0; i < n; ++i)
{
if (ques[i] == '.')
{ ++blnk;
if (flagB == 0)
{
nodeB->data = i + 1;
nodeB->next = NULL;
flagB = 1;
continue;
}
nodeB->next = (node*) malloc(sizeof(node));
nodeB = nodeB->next;
nodeB->data = i + 1;
nodeB->next = NULL;
}
else if (ques[i] != ans[i])
{ ++mstk;
if (flagM == 0)
{
nodeM->data = i + 1;
nodeM->next = NULL;
flagM = 1;
continue;
}
nodeM->next = (node*) malloc(sizeof(node));
nodeM = nodeM->next;
nodeM->data = i + 1;
nodeM->next = NULL;
//flagM = 1; //You made a mistake here
}
}
nodeM = headM;
nodeB = headB;
printf("Your result:\n\tMistakes: %d\n\tBlanks: %d\n", mstk, blnk);
printf("Your mistakes are following question numbers:\n");
if (mstk != 0)
printNode(headM);
else
printf("No Mistakes\n");
printf("\nYour blanks are following question numbers:\n");
if (blnk != 0)
printNode(headB);
else
printf("No Blanks\n");
freeNode(&headM);
freeNode(&headM);
return 0;
}

Reading file into linked list

I am trying to read a text file I made into a linked list, the text file looks like this:
around 1 2 1
bread 2 4 3 5 1
four 1 3 2
head 3 1 2 2 1 5 1
has 2 3 1 5 2
Where the first string of each line are just words from a paragraph. The first number after the word is the number of lines the word was found in, in the paragraph. Then the following numbers are pairs of (line, occurrences) in the paragraph.
For example, for the word bread:
It was found in 2 lines in the paragraph. In the first line, line 4, it was found 3 times. Then in the second line, line 5, it was found 1 time.
I am trying to create a linked list from this text file, my program looks like this so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#define MAXWORD 999
typedef struct node node_t;
struct node {
char *word;
int num_lines;
int paragraph;
int freq;
node_t *next;
};
int
main(int argc, char *argv[]) {
FILE *fp;
char word[MAXWORD+1];
int ch, line_count = 0, len = 0;
node_t *node = (node_t*)malloc(sizeof(*node));
node_t *curr, *prev;
fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "Error reading file\n");
exit(EXIT_FAILURE);
}
/* Just trying to store the string so far */
while ((ch = getc(fp)) != EOF) {
if (ch == '\n') {
line_count++;
strcpy(node->word, word);
}
if (isalpha(ch)) {
word[len] = ch;
len++;
word[len] = '\0';
}
if (isdigit(ch)) {
len = 0;
}
}
printf("line count = %d", line_count);
free(node)
fclose(fp);
return 0;
}
In this snippet, I have been trying to store the string in the linked list data structure, but I have not yet used dynamic arrays to store the numbers after the word which occur in the text file. I know I will need to build this data structure using malloc() and realloc(), but I am unsure of how to do this.
How should I do this?
My desired output would look like this:
There are five words in the text file,
and 9 pairs of (line, occurences)
Word: pairs
"around": 2,1
"bread": 4,3; 5,1
"four": 3,2
"head": 1,2; 2,1; 5,1
"has": 3,1; 5,2
UPDATE
I have been researching this and it seems to be very similar to the inverted index problem, where I have seen that using a binary search tree would be best.
Could I implement my binary search tree like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>
#define MAXWORD 999
typedef char word_t[MAXWORD+1];
typedef struct node node_t;
struct node {
void *data;
int *ints;
node_t *rght;
node_t *left;
};
typedef struct {
node_t *root;
int (*cmp)(void*, void*);
} tree_t;
int
main(int argc, char *argv[]) {
FILE *fp;
fp = fopen(argv[1], "r");
if (fp == NULL) {
fprintf(stderr, "Error reading file\n");
exit(EXIT_FAILURE);
}
while ((ch = getc(fp)) != EOF) {
if (ch == '\n') {
line_count++;
}
}
fclose(fp);
return 0;
}
You could do something like this:
typedef struct {
int paragraph;
int freq;
} stats_t;
struct node {
char *word;
int num_lines;
stats_t *stats;
node_t *next;
};
Then after you parse the string you can do:
ps = calloc(line_count, sizeof(stats_t));
to get a pointer to an array of stats_t structs, which you can fill with line locations and frequencies. Then you can store the pointer ps in your node struct.
I wrote a program that does what I think you are looking for. I modified the structs I was thinking about before:
typedef node node_t;
struct node {
char *word;
int num_lines;
int *location;
int *frequency;
node_t *next;
};
This way the nodes contain pointers to arrays of int to store the location and frequency information. Nodes and storage for the word strings, location arrays, and frequency arrays are all dynamically allocated. Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXLINE 1000
#define MAXWORD 30
typedef struct node node_t;
struct node {
char *word;
int num_lines;
int *location;
int *frequency;
node_t *next;
};
void strip(char *pln);
void normalize_word(char *pstr);
struct node * update_word(char *pwd, int lnum, struct node *phead);
struct node * find_in_list(char *pwd, struct node *phead);
int find_line_pair(int lnum, struct node *pwn);
int list_len(struct node *phead);
int num_pairs(struct node *phead);
int main(int argc, char *argv[])
{
FILE *fp;
struct node *head, *current;
char *pline, *pword;
char line[MAXLINE + 1];
char word[MAXWORD + 1];
int i, n, line_count = 0;
head = NULL;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename\n", argv[0]);
exit(EXIT_FAILURE);
} else {
if ((fp = fopen(argv[1], "r")) == NULL) {
fprintf(stderr, "Unable to open file %s\n", argv[1]);
exit(EXIT_FAILURE);
}
}
/* Read in lines and process words */
pline = line;
pword = word;
while (fgets(pline, MAXLINE, fp) != NULL) {
++line_count;
strip(pline);
while ((pword = strtok(pline, " ")) != NULL) {
normalize_word(pword);
if (*pword != '\0') // don't add empty words
head = update_word(pword, line_count, head);
pline = NULL;
}
pline = line;
}
/* Display list contents */
printf("There are %d words in the text file,\n",
list_len(head));
printf("and %d pairs of (line, occurrences)\n",
num_pairs(head));
printf("Word: pairs\n");
current = head;
while (current != NULL) {
n = current->num_lines;
printf("%s:", current->word);
for (i = 0; i < n; i++) {
printf(" %d, %d;",
current->location[i], current->frequency[i]);
}
putchar('\n');
current = current->next;
}
/* Cleanup */
// close file
if (fclose(fp) != 0)
fprintf(stderr, "Error closing file %s\n", argv[1]);
// free all allocated memory
current = head;
while (current != NULL) {
free(current->word);
free(current->location);
free(current->frequency);
current = current->next;
free(head);
head = current;
}
return 0;
}
/* Remove trailing newlines */
void strip(char *pln)
{
while (*pln != '\0') {
if (*pln == '\n')
*pln = '\0';
++pln;
}
}
/* Convert word to lowercase and remove trailing
* non-alphanumeric characters */
void normalize_word(char *pstr)
{
int i = 0;
char ch;
while ((ch = pstr[i]) != '\0') {
pstr[i] = tolower(ch);
++i;
}
while ((--i >= 0) && !isalnum(pstr[i])) {
pstr[i] = '\0';
continue;
}
}
/* Update existing word node or create a new one, and return
* a pointer to the head of the list */
struct node * update_word(char *pwd, int lnum, struct node *phead)
{
struct node *found, *newnode;
char *pword;
int *ploc, *pfreq;
int index;
/* Modify existing node if word is in list */
if ((found = find_in_list(pwd, phead)) != NULL) {
// add new (location, freq) pair if word not in found line
if ((index = find_line_pair(lnum, found)) == -1) {
index = found->num_lines; // index for new pair
found->num_lines += 1; // increment number of lines
ploc = realloc(found->location, (index + 1) * sizeof(int));
pfreq = realloc(found->frequency, (index + 1) * sizeof(int));
ploc[index] = lnum; // new location
pfreq[index] = 1; // found once in this line so far
found->location = ploc; // point to new location array
found->frequency = pfreq; // point to new frequency array
}
else { // update frequency in existing line
found->frequency[index] += 1;
}
/* Set up a new node */
} else {
// allocate memory for new node
newnode = malloc(sizeof(struct node));
// allocate memory for string pointed to from node
pword = malloc((strlen (pwd) + 1) * sizeof(char));
strcpy(pword, pwd);
newnode->word = pword; // set word pointer
newnode->num_lines = 1; // only one line so far
ploc = malloc(sizeof(int));
pfreq = malloc(sizeof(int));
*ploc = lnum; // location was passed by caller
*pfreq = 1; // only one occurrence so far
newnode->location = ploc;
newnode->frequency = pfreq;
if (phead == NULL) { // if wordlist is empty
newnode->next = NULL; // only/last link in the list
phead = newnode; // newnode is the head
} else {
newnode->next = phead; // insert newnode at front of list
phead = newnode;
}
}
return phead;
}
/* Return pointer to node containing word, or NULL */
struct node * find_in_list(char *pwd, struct node *phead)
{
struct node *current = phead;
while (current != NULL) {
if (strcmp(current->word, pwd) == 0)
return current; // word already in list
current = current->next;
}
return NULL; // word not found
}
/* Return index of existing line location, or -1 */
int find_line_pair(int lnum, struct node *pwn)
{
int n = pwn->num_lines;
int index = 0;
while (index < n) {
if (pwn->location[index] == lnum)
return index; // word already found in this line
++index;
}
return -1; // word not yet found in this line
}
/* Find number of nodes in linked list */
int list_len(struct node *phead)
{
int length = 0;
struct node *current = phead;
while (current != NULL) {
++length;
current = current->next;
}
return length;
}
/* Find number of (line, occurrence) pairs */
int num_pairs(struct node *phead)
{
int num = 0;
struct node *current = phead;
while (current != NULL) {
num += current->num_lines;
current = current->next;
}
return num;
}
Note: I modified this from the previous version in the update_word() function. The original code inserted a new node at the end of the list, so the resulting list contained words in order of their first appearance in the input text. This version inserts a new node at the beginning of the list, so the resulting list contains words in reverse order of their first appearance. This speeds up node insertion and simplifies the node-insertion code from:
current = phead;
while (current->next != NULL) // find tail
current = current->next;
current->next = newnode; // add newnode to end
to:
newnode->next = phead; // insert newnode at front of list
I have no doubt that the code can be improved, but this does seem to work. I wouldn't say that this is exactly simple, but relatively straightforward. I ran it against this text file:
Three blind mice. Three blind mice.
See how they run. See how they run.
They all ran after the farmer's wife,
Who cut off their tails with a carving knife,
Did you ever see such a sight in your life,
As three blind mice?
Here are the results:
There are 31 words in the text file,
and 37 pairs of (line, occurrences)
Word: pairs
as: 6, 1;
life: 5, 1;
your: 5, 1;
in: 5, 1;
sight: 5, 1;
such: 5, 1;
ever: 5, 1;
you: 5, 1;
did: 5, 1;
knife: 4, 1;
carving: 4, 1;
a: 4, 1; 5, 1;
with: 4, 1;
tails: 4, 1;
their: 4, 1;
off: 4, 1;
cut: 4, 1;
who: 4, 1;
wife: 3, 1;
farmer's: 3, 1;
the: 3, 1;
after: 3, 1;
ran: 3, 1;
all: 3, 1;
run: 2, 2;
they: 2, 2; 3, 1;
how: 2, 2;
see: 2, 2; 5, 1;
mice: 1, 2; 6, 1;
blind: 1, 2; 6, 1;
three: 1, 2; 6, 1;
Here is my version using Binary Search Tree (BST):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef struct internal_node in_node;
struct internal_node{
int line;
int freq;
in_node* next;
};
struct tree{
char *word;
int num_lines;
in_node* in_nodeptr;
in_node* current;
struct tree* right;
struct tree* left;
};
typedef struct tree* treeptr;
void free_list(in_node* in_nodeptr){
if(in_nodeptr!=NULL) {
free(in_nodeptr);
}
}
void free_bst(treeptr head){
if (head!=NULL) {
free_bst(head->right);
free_bst(head->left);
free_list(head->in_nodeptr);
free(head->word);
free(head);
}
}
void print_list(in_node* in_nodeptr){
while(in_nodeptr!=NULL){
printf("%d %d; ",in_nodeptr->line,in_nodeptr->freq);
in_nodeptr=in_nodeptr->next;
}
}
void print_bst(treeptr head){
if(head!=NULL){
printf("%s: ",head->word);
print_list(head->in_nodeptr);
printf("\n");
print_bst(head->right);
print_bst(head->left);
}
}
void input_to_bst(treeptr* head,char* word,int line){
if((*head)==NULL){
(*head)=(treeptr)malloc(sizeof(struct tree));
(*head)->word=(char*)malloc(50*sizeof(char));
strcpy(((*head)->word),word);
(*head)->num_lines=1;
(*head)->right=NULL;
(*head)->left=NULL;
(*head)->in_nodeptr=(in_node*)malloc(sizeof(in_node));
(*head)->in_nodeptr->line=line;
(*head)->in_nodeptr->freq=1;
(*head)->in_nodeptr->next=NULL;
(*head)->current=(*head)->in_nodeptr;
}
else{
int check=strcmp(((*head)->word),word);
if(check>0) input_to_bst(&((*head)->left),word,line);
else if(check<0) input_to_bst(&((*head)->right),word,line);
else{
if( (*head)->current->line==line) (*head)->current->freq++;
else {
(*head)->current->next=(in_node*)malloc(sizeof(in_node));
(*head)->current->next->line=line;
(*head)->current->next->freq=1;
(*head)->current->next->next=NULL;
}
}
}
}
int main(int argc, char *argv[]) {
treeptr head=NULL;
FILE *fp=fopen(argv[1], "r");
char word[50],ch;
int len=0,lines=1;
if (fp == NULL) {
fprintf(stderr, "Error reading file\n");
exit(1);
}
while ((ch = getc(fp)) != EOF) {
if (ch == '\n') {
word[len]='\0';
if(len>0) input_to_bst(&head,word,lines);
len=0;
lines++;
}
else if (ch==' '){
word[len]='\0';
if(len>0) input_to_bst(&head,word,lines);
len=0;
}
else if (isalpha(ch)){
word[len]=ch;
len++;
}
}
if(len>0) {
word[len]='\0';
input_to_bst(&head,word,lines);
}
print_bst(head);
fclose(fp);
free_bst(head);
return 0;
}
Every word is held as a node of the BST and also each node of BST except from the word, holds a list with all the appearances (lines and frequency ) of the word. In order to be as most efficient as possible we hold a pointer (in_node* current) to the last element of list of appearance so that we don't need to traverses every time we need to add an appearance.
As an example:
Text:
C is an imperative procedural language. It was designed to be compiled
using a relatively straightforward compiler and to require minimal
runtime support.
Output:
C: 1 1;
is: 1 1;
procedural: 1 1;
was: 1 1;
to: 1 1; 2 1;
using: 2 1;
relatively: 2 1;
straightforward: 2 1;
support: 3 1;
require: 2 1;
runtime: 3 1;
language: 1 1;
minimal: 2 1;
an: 1 1;
imperative: 1 1;
designed: 1 1;
be: 1 1;
compiled: 1 1;
compiler: 2 1;
and: 2 1;
It: 1 1;
a: 2 1;
Note that the above implementation is case sensitive for example "And" is different from "and".
If you don't wish to be case sensitive just replace the line word[len]=ch; with word[len]=tolower(ch); and works fine.
The complexity of the above algorithm is O(n^2) which would be the same if you used only linked lists but in the average case BST is O(nlogn) which is much better than linked lists and this is the reason that it is considered to be the better.
Also note that since we must keep a list for appearances of each word the complexity would be worst if we didn't keep the in_node* current pointer which gives us access to the end of each appearance list in constant time (O(1)). So I think that as terms of complexity you can't go better than O(nlogn).

Infinite loops with a linked list

I'm working on a program that takes an input of numbers from stdin and computes the median of the sequence and prints it out as a float. I'm currently getting an infinite loop in the function
len(struct node *)
at the for loop and I'm not sure why.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct node {
float *val;
struct node *next;
};
int len(struct node *list) {
int i = 0;
struct node *temp = list;
for (i = 0; temp != NULL; i++) {
temp = temp->next;
}
return i;
}
float median(int size, struct node list) {
struct node temp = list;
int i = 0;
if (size == 1) {
return *temp.val;
} else
if (size == 2) {
return (*(temp.val) + *(temp.next->val)) / 2;
} else {
if (size / 2 == 1) {
for (i = 3; i != (size / 2) - 1; i++) {
temp = *(temp.next);
}
return *temp.val;
} else {
for (i = 3; i != (size / 2); i++) {
temp = *(temp.next);
}
return (*(temp.val) + *(temp.next->val)) / 2;
}
}
}
int main() {
struct node *tmpnode;
tmpnode = malloc(sizeof(struct node));
tmpnode->next = NULL;
struct node *list = NULL;
list = tmpnode;
float temp = 0;
int err = 0;
int size = 0;
while ((err = scanf("%f", &temp)) != EOF) {
if (err < 1) {
fprintf(stderr, "Error: non-integer character inputted\n");
return 1;
}
tmpnode->val = &temp;
tmpnode->next = list;
list = tmpnode;
}
size = len(list);
if (size == 0) {
fprintf(stderr, "Error: no inputs found");
return 1;
}
printf("%f\n", median(size, *list));
return 0;
}
Edit: I've fixed the infinite loop, but now I'm getting a segfault at temp = *(temp.next) in median(). Do I need to allocate for temp?
You created only one node and assigned next of the node to itself, so this is cause of the infinite loop.
Create new nodes and link them in the input loop.
Assigning address of temp to all nodes is also not good.
Your main() function should be like this:
int main(void){
struct node *tmpnode;
tmpnode = malloc(sizeof(struct node));
if(tmpnode == NULL){
perror("malloc 1");
return 1;
}
tmpnode->next = NULL;
struct node *list = NULL;
list = tmpnode;
float temp = 0;
int err = 0;
int size = 0;
while((err = scanf("%f", &temp)) != EOF){
if(err < 1){
fprintf(stderr, "Error: non-integer character inputted\n");
return 1;
}
tmpnode->val = malloc(sizeof(float));
if(tmpnode->val == NULL){
perror("malloc 2");
return 1;
}
*tmpnode->val = temp;
tmpnode->next = malloc(sizeof(struct node));
if(tmpnode->next == NULL){
perror("malloc 3");
return 1;
}
tmpnode = tmpnode->next;
tmpnode->val = NULL;
tmpnode->next = NULL;
}
size = len(list);
if(size == 0){
fprintf(stderr, "Error: no inputs found");
return 1;
}
printf("%f\n", median(size, *list));
/* code to free the list should be here */
return 0;
}
(I gave input 1 2 3 4 5 and this program's output was 1.500000, which might be wrong)
If you're looking for the median you would have to arrange the nodes in order and get the number that is in the middle.If the number of nods is even and there is no middle you should add the two middlemost numbers and divide them by two.
Is the sequence in order? If not you're miscalculating the median.
Supposing the sequence is in order.
I didn't really understand the usefulness of this statement
if(size/2 == 1)
Maybe you're trying to see if the size is odd. In that case you should do:
> if(size%2 == 1)
Why the list is probably looping might be due to this
for(i = 3; i != (size/2); i++){
temp = *(temp.next);
}
Suppose you pass a 5 to the function size/2=2 (decimal part is lost), so it'll keep on going until an overflow occurs and it actually reaches 2, making your program most probably seg_fault in the process.
Start from i=0, because even though you started from 3 your current node is not the third one but the FIRST ONE.
Good luck hope this helps!!!!

Hashmap with Linked List to find word count

I have been working on this little project for quite some time and I can't figure out why I'm not getting the results that are expected. I am a beginner to C programming so my understanding with pointers and memory allocation/deallocation is novice. Anyways, I have constructed this segment of code by originally building a hash function, then adding a count to it. However, when I test it, sometimes the count works, sometimes it doesn't. I'm not sure whether it's the fault of the hash function, or the fault of the way I set up my count. The text file is read one line at a time and is a string consisting of a hexadecimal.
struct node {
char *data;
struct node *next;
int count; /* Implement count here for word frequencies */
};
#define H_SIZE 1024
struct node *hashtable[H_SIZE]; /* Declaration of hash table */
void h_lookup(void)
{
int i = 0;
struct node *tmp;
for(i = 0; i < H_SIZE; i++) {
for(tmp = hashtable[i]; tmp != NULL; tmp = tmp->next) {
if(tmp->data != 0) {
printf("Index: %d\nData: %s\nCount: %d\n\n", i,
tmp->data, tmp->count);
}
}
}
}
/* self explanatory */
void h_add(char *data)
{
unsigned int i = h_assign(data);
struct node *tmp;
char *strdup(const char *s);
/* Checks to see if data exists, consider inserting COUNT here */
for(tmp = hashtable[i]; tmp != NULL; tmp = tmp->next) {
if(tmp->data != 0) { /* root node */
int count = tmp->count;
if(!strcmp(data, tmp->data))
count= count+1;
tmp->count = count;
return;
}
}
for(tmp = hashtable[i]; tmp->next != NULL; tmp = tmp->next);
if(tmp->next == NULL) {
tmp->next = h_alloc();
tmp = tmp->next;
tmp->data = strdup(data);
tmp->next = NULL;
tmp->count = 1;
} else
exit(EXIT_FAILURE);
}
/* Hash function, takes value (string) and converts into an index into the array of linked lists) */
unsigned int h_assign(char *string)
{
unsigned int num = 0;
while(*string++ != '\0')
num += *string;
return num % H_SIZE;
}
/* h_initialize(void) initializes the array of linked lists. Allocates one node for each list by calling h_alloc which creates a new node and sets node.next to null */
void h_initialize(void)
{ int i;
for(i = 0; i <H_SIZE; i++) {
hashtable[i] = h_alloc();
}
}
/* h_alloc(void) is a method which creates a new node and sets it's pointer to null */
struct node *h_alloc(void)
{
struct node *tmp = calloc(1, sizeof(struct node));
if (tmp != NULL){
tmp->next = NULL;
return tmp;
}
else{
exit(EXIT_FAILURE);
}
}
/* Clean up hashtable and free up memory */
void h_free(void)
{
struct node *tmp;
struct node *fwd;
int x;
for(x = 0; x < H_SIZE; x++) {
tmp = hashtable[x];
while(tmp != NULL) {
fwd = tmp->next;
free(tmp->data);
free(tmp);
tmp = fwd;
}
}
}
I assume that the count is not being incremented when it does not work. It is possible that strdup is not able to allocate memory for the new string and is returning NULL. You should check the return value to and exit gracefully if it fails.

Resources