Adding strings to a linked list result in seg fault - c

I am trying to add strings that I am reading from a text file to a linked list.
Since I don't know how long the file or the string is , I want to do this dynamically.
But somewhere along the line I get a segmentation fault.
I have tried everything but I think I overlooked something crucial.
Could someone tell me what I am doing wrong?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
char *name;
int age;
struct node* next;
}node;
node *add(node *head, char* n_m){
node *new_node;
new_node = (node*)malloc(sizeof(node));
if(new_node == NULL)
printf("Fehler bei Speicher reservierung...");
new_node->name = (char*)malloc(100*sizeof(char));
if(new_node->name == NULL)
printf("Fehler bei Speicher reservierung...");
strcpy(new_node->name, n_m);
if(head == NULL){
head = new_node;
head->next = NULL;
return head;
}
node *current;
current = head;
while(current->next != NULL){
current = current->next;
}
current->next = new_node;
new_node->next = NULL;
return head;
}
void print(node *head){
node *current;
current = head;
while(current != NULL){
printf("%s\n", current->name);
current = current->next;
}
}
int main(){
node *head = NULL;
char character;
FILE *fp;
fp = fopen("test.txt", "r");
while ((character = fgetc(fp)) != EOF) {
char *n_m;
n_m = (char*)malloc(100 * sizeof(char));
if(n_m == NULL)
printf("Fehler bei Speicher reservierung...");
int i = 0;
while (character != ' ') {
n_m[i++] = character;
character = fgetc(fp);
}
n_m[++i] = '\0'; // NULL-terminate
head = add(head, n_m);
free(n_m);
}
print(head);
return 0;
}

Your biggest issue is your read of characters does not trap EOF and will continue reading after EOF is encountered, causing i to exceed your array bounds invoking Undefined Behavior leading to your SegFault. The code in question is:
while (character != ' ') {
n_m[i++] = character;
character = fgetc(fp);
}
Since POSIX files do not end with a ' ' (space), but instead a '\n', your read of the last word in the file does not stop at EOF. You further have problems with multiple spaces together repeatedly writing nodes containing the empty-string to your list.
You also fail to handle any other whitespace other than space, meaning you are including '\t', '\n', vertical tab, etc.. within the words you store. Instead of checking space with ' ', use the isspace() macro included in ctype.h.
n_m[++i] = '\0'; should be n_m[i] = '\0';. You increment i with n_m[i++] = character;. You do NOT want to increment i again with the pre-increment operator before nul-terminating your string. That results in the last character in the string being indeterminate (again invoking undefined behavior when a read of the string is attempted)
Fixing those issues (and using c instead of character, ndx instead of i and buf instead of n_m), your read and add() to your list would resemble:
while ((c = fgetc(fp)) != EOF) { /* read each char in file */
if (isspace(c) || ndx == MAXC - 1) { /* is space or buf full? */
if (in) { /* were we within word? */
buf[ndx] = 0; /* nul-terminate */
head = add (head, buf); /* add node to list */
}
if (ndx < MAXC - 1) /* buffer not full */
in = 0; /* set in flag zero */
ndx = 0; /* reset index zero */
}
else { /* otherwise */
buf[ndx++] = c; /* add char to buf */
in = 1; /* set in flag 1 */
}
}
(note: using the variable in as an in/out flag to keep track of whether you are within a word reading characters, or between words reading whitespace solves the problem you would have with multiple whitespace characters in sequences, e.g. "hello world")
Optional, but helpful, when allocating nodes that also contain members that need to be allocated is to write a createnode() function that fully Allocates and Initializes all node members rather than putting that code in add(). It keeps things clean and ensures every node you allocate is fully initialized before its use in add(). For example:
/** create new node, allocate and initialize all member values,
* return pointer to node on success, NULL otherwise.
*/
node *createnode (char *s)
{
node *new_node;
new_node = malloc (sizeof *new_node); /* allocate/validate node */
if (new_node == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;
}
new_node->name = malloc (strlen (s) + 1); /* allocate/validate name */
if (new_node->name == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;;
}
strcpy (new_node->name, s); /* initialize all node values */
new_node->age = 0;
new_node->next = NULL;
return new_node; /* return newly allocated/initialized node */
}
Then your add() function reduces to:
/** add node containing allocated string 'n_m' to list, return pointer
* to 1st node in list on success, exit with failure otherwise.
*/
node *add (node *head, char *n_m)
{
node *new_node = createnode (n_m); /* allocate/initialize new node */
node *current = head; /* pointer to current head */
if (new_node == NULL) /* validate allocation */
exit (EXIT_FAILURE);
if (!head) /* handle 1st node */
return new_node;
while (current->next != NULL) /* iterate to end of list */
current = current->next;
current->next = new_node; /* set next node to new_node */
return head; /* return pointer to head */
}
Putting it altogether and adding a del_list() function to free all allocated memory for the list, you could do the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024
typedef struct node {
char *name;
int age;
struct node *next;
} node;
/** create new node, allocate and initialize all member values,
* return pointer to node on success, NULL otherwise.
*/
node *createnode (char *s)
{
node *new_node;
new_node = malloc (sizeof *new_node); /* allocate/validate node */
if (new_node == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;
}
new_node->name = malloc (strlen (s) + 1); /* allocate/validate name */
if (new_node->name == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;;
}
strcpy (new_node->name, s); /* initialize all node values */
new_node->age = 0;
new_node->next = NULL;
return new_node; /* return newly allocated/initialized node */
}
/** add node containing allocated string 'n_m' to list, return pointer
* to 1st node in list on success, exit with failure otherwise.
*/
node *add (node *head, char *n_m)
{
node *new_node = createnode (n_m); /* allocate/initialize new node */
node *current = head; /* pointer to current head */
if (new_node == NULL) /* validate allocation */
exit (EXIT_FAILURE);
if (!head) /* handle 1st node */
return new_node;
while (current->next != NULL) /* iterate to end of list */
current = current->next;
current->next = new_node; /* set next node to new_node */
return head; /* return pointer to head */
}
void print (node * head)
{
node *current;
current = head;
while (current != NULL) {
printf ("%s\n", current->name);
current = current->next;
}
}
/** delete all nodes in list */
void del_list (node *head)
{
node *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->name); /* free current string */
free (victim); /* free current node */
}
}
int main (int argc, char **argv) {
char buf[MAXC];
int c = 0, in = 0, ndx = 0;
node *head = NULL;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while ((c = fgetc(fp)) != EOF) { /* read each char in file */
if (isspace(c) || ndx == MAXC - 1) { /* is space or buf full? */
if (in) { /* were we within word? */
buf[ndx] = 0; /* nul-terminate */
head = add (head, buf); /* add node to list */
}
if (ndx < MAXC - 1) /* buffer not full */
in = 0; /* set in flag zero */
ndx = 0; /* reset index zero */
}
else { /* otherwise */
buf[ndx++] = c; /* add char to buf */
in = 1; /* set in flag 1 */
}
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
print (head); /* print list */
del_list (head); /* free all allocated memory */
}
Example Input File
$ cat dat/captnjack.txt
This is a tale
Of Captain Jack Sparrow
A Pirate So Brave
On the Seven Seas.
Example Use/Output
$ ./bin/ll_name_age dat/captnjack.txt
This
is
a
tale
Of
Captain
Jack
Sparrow
A
Pirate
So
Brave
On
the
Seven
Seas.
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to ensure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/ll_name_age dat/captnjack.txt
==18265== Memcheck, a memory error detector
==18265== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==18265== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==18265== Command: ./bin/ll_name_age dat/captnjack.txt
==18265==
This
is
a
tale
Of
Captain
Jack
Sparrow
A
Pirate
So
Brave
On
the
Seven
Seas.
==18265==
==18265== HEAP SUMMARY:
==18265== in use at exit: 0 bytes in 0 blocks
==18265== total heap usage: 35 allocs, 35 frees, 6,132 bytes allocated
==18265==
==18265== All heap blocks were freed -- no leaks are possible
==18265==
==18265== For counts of detected and suppressed errors, rerun with: -v
==18265== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me know if you have further questions.

You need to guard against reading beyond the end of file and against going beyond the memory you've allocated. It may also be possible that your file had a "long line" right at 100 characters by both postfixing and prefixing i you went passed your end of buffer.
while ((character = fgetc(fp)) != EOF) {
char *n_m;
n_m = (char*)malloc(100 * sizeof(char));
if(n_m == NULL)
printf("Fehler bei Speicher reservierung...");
int i = 0;
while ((character != ' ')
&& (character != EOF)
&& (i < 100)) {
n_m[i++] = character;
character = fgetc(fp);
}
// don't postfix add about then prefix add below
if (i >= 100) {
… there is a problem …
}
n_m[i] = '\0'; // NULL-terminate
head = add(head, n_m);
free(n_m);
}
You might consider something more like this
#define BUFF_SIZE 100
char buff[BUFF_SIZE];
buff[0] = '\0';
int i = 0;
while (((character = fgetc(fp)) != EOF)
&& (i < BUFF_SIZE)) {
buff[i++] = character;
character = fgetc(fp);
if (character = ' ') {
buff[i] = '\0'; // NULL-terminate
head = add(head, buff);
i = 0;
buff[0] = '\0';
}
}
if (i >= BUFF_SIZE) { … there is a problem … }
This does a couple of useful things. One is your buffer is statically allocated and it's size is controlled by a single #define. Second, it reduces the number of loops involved which can improve readability.

Related

Function to add a node to linked list not working | c

I have the following program which reads words from a text file and creates a single linked list, with each node containing: word, count, next.
When a word already exists in the linked list the count is updated, otherwise, a node is created at the end of the linked list.
All of my functions work, except for the one where I am adding a word to the end of the linked list. There is likely an error with linkage of the nodes?
with my following text file: line1 "my name is natalie", line 2 "my dog is niko"
I should be getting the following output: my(2), name(1), is(2), natalie(1), dog(1), niko(1)
but I am getting: my(2), dog(2), s(1), iko(1), is(1), niko(1)
WHERE IS MY ERROR?
//function to add word to linked list
struct WordNode *addWord(char* word, struct WordNode *wordListHead){
//create new node
struct WordNode *current = malloc(sizeof(struct WordNode));
current->word = word;
current->count = 1;
current->next = NULL;
//
while(1){
if((wordListHead != NULL)&&(wordListHead->next ==NULL)){
//connect
wordListHead->next=current;
break;
}
wordListHead = wordListHead->next;
}
}
called here in main:
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("%s\n", filename);
if (fp == NULL){
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
else {
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\\/*&^%#!~+=_"; // These are our word delimiters.
char line[1000];
char * token;
int count = 0;//count used so that first word of the doc can be created as head node
//create head pointer
struct WordNode *wordListHead = NULL;
//create current pointer
struct WordNode *current = NULL;
//iterate through each line in file
while(fgets(line, 1000, fp)){
//seperate each word
//first word of line
token = strtok(line, delim);
printf("%s\n", token);
if(count == 0){
//first word of document, create first wordnode (head node)
wordListHead = malloc(sizeof(struct WordNode));
wordListHead->word = token;
wordListHead->count = 1;
wordListHead->next = NULL;
}
else{
//check if first word of line exists in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
//iterate through all the other words of line
while ((token=strtok(NULL, delim)) != NULL){
printf("%s\n", token);
//check if name is in linked list
if((doesWordExist(token, wordListHead)) == 0){
//update count
updateCount(token, wordListHead);
}
else{
//create node
addWord(token, wordListHead);
}
}
count++;
}
printWordList(wordListHead);
}
}
struct defined here:
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode{
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList( struct WordNode *wordListHead);
struct WordNode *addWord(char* word , struct WordNode *wordListead);
#endif
other functions for reference:
//function to check if word is in linked list
bool doesWordExist(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
return 0;
}
wordListHead= wordListHead-> next;
}
return 1;
}
//function to update the count of word
void updateCount(char* myword, struct WordNode *wordListHead){
while (wordListHead != NULL){
if(strcmp(wordListHead->word, myword) == 0){
//update count value
//capture current count and add 1
int curcount = wordListHead->count;
int newcount = curcount + 1;
wordListHead->count = newcount;
//printf("%d\n", newcount);
}
wordListHead= wordListHead-> next;
}
}
//function to print word list
//takes head node as argument
void printWordList( struct WordNode *wordListHead){
//WordNode *toyptr = wordListHead;
while (wordListHead != NULL){
printf("%s\n", wordListHead->word);
printf("%ld\n", wordListHead->count);
wordListHead = wordListHead -> next;
}
}
When you are storing token into your struct, you are using a pointer that is part of the input buffer.
On a new input line, the tokens gathered from previous lines will be corrupted/trashed.
You need to allocate space to store the token in the struct on the heap. Use strdup for that.
So, in addWord, you want:
current->word = strdup(word);
And in main, you want:
wordListHead->word = strdup(token);
UPDATE:
That's the primary issue. But, your code does a bunch of needless replication.
addWord doesn't handle an empty list. But, if it did there would be no need for main to have separate [replicated] code for the first word and subsequent words on the line.
The strcmp can be incorporated into addWord and it can "do it all". (i.e. a single scan of the list)
For doesWordExist, it returns a bool on a match. If it returned the pointer to the element that matched, updateCount would just have to increment the count [and not rescan the list]. I've updated those functions accordingly, but they are no longer needed due to the changes to addWord
Here's how I would simplify and refactor the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) \
printf(_fmt)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
struct WordNode {
char *word;
unsigned long count;
struct WordNode *next;
};
void printWordList(struct WordNode *wordListHead);
#endif
//function to add word to linked list
struct WordNode *
addWord(char *word, struct WordNode **list)
{
struct WordNode *curr;
struct WordNode *prev = NULL;
struct WordNode *newnode = NULL;
for (curr = *list; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,word) == 0) {
newnode = curr;
break;
}
prev = curr;
}
// create new node
do {
// word already exists
if (newnode != NULL)
break;
newnode = malloc(sizeof(struct WordNode));
newnode->word = strdup(word);
newnode->count = 0;
newnode->next = NULL;
// attach to tail of list
if (prev != NULL) {
prev->next = newnode;
break;
}
// first node -- set list pointer
*list = newnode;
} while (0);
// increment the count
newnode->count += 1;
return newnode;
}
//function to check if word is in linked list
struct WordNode *
findWord(char *myword, struct WordNode *head)
{
struct WordNode *curr;
for (curr = head; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to update the count of word
void
updateCount(char *myword, struct WordNode *head)
{
struct WordNode *match;
match = findWord(myword,head);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(struct WordNode *head)
{
struct WordNode *curr;
for (curr = head; curr != NULL; curr = curr->next) {
printf("%s", curr->word);
printf(" %ld\n", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %s\n", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\\/*&^%#!~+=_";
char line[1000];
char *token;
// create head pointer
struct WordNode *wordListHead = NULL;
// iterate through each line in file
while (fgets(line, sizeof(line), fp)) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %s\n", token);
addWord(token,&wordListHead);
}
}
printWordList(wordListHead);
return 0;
}
UPDATE #2:
Note that addWord and findWord replicate code. The first part of addWord is [essentially] duplicating what findWord does.
But, addWord can not just use findWord [which would be desirable] because findWord, if it fails to find a match returns NULL. In that case, it doesn't [have a way to] communicate back the last element (i.e. the "tail" of the list) which addWord needs to append to.
While we could add an extra argument to findWord to propagate this value back, a better solution is to create a different struct that defines a "list".
In the existing code, we are using a "double star" pointer to the head word node as a "list". Using a separate struct is cleaner and has some additional advantages.
We can just pass around a simple pointer to the list. We no longer need to worry about whether we should be passing a double star pointer or not.
Although we're only using a singly linked list, a separate list struct helps should we wish to convert the list to a doubly linked list [later on].
We just pass around a pointer to the list, and the list can keep track of the head of the list, the tail of the list, and the count of the number of elements in the list.
Linked lists lend themselves well to sorting with mergesort. The list count helps make that more efficient because it is much easier to find the "midpoint" of the list [which a mergesort would need to know].
To show the beginnings of the doubly linked list, I've added a prev element to the word struct. This isn't currently used, but it hints at the doubly linked version.
I've reworked all functions to take a pointer to a list, rather than a pointer to the head node.
Because the list struct has a tail pointer, addWord can now call findWord. If findWord does not find a match, addWord can simply use the head/tail pointers in the list to find the correct insertion point.
To simplify a bit more, I've changed the word node [and the list struct] to use some typedef statements.
Also, it's more usual/idiomatic to have the dominant struct pointer be the first argument, so I've reversed the order of the arguments on some of the functions.
Anyway, here's the [further] refactored code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef int bool;
#ifdef DEBUG
#define dbgprt(_fmt...) \
printf(_fmt)
#else
#define dbgprt(_fmt...) do { } while (0)
#endif
//structure definition of linked list node
#ifndef WORDLISTH
#define WORDLISTH
// word frequency control
typedef struct WordNode Word_t;
struct WordNode {
const char *word;
unsigned long count;
Word_t *next;
Word_t *prev;
};
// word list control
typedef struct {
Word_t *head;
Word_t *tail;
unsigned long count;
} List_t;
void printWordList(List_t *list);
#endif
// create a list
List_t *
newList(void)
{
List_t *list;
list = calloc(1,sizeof(*list));
return list;
}
// function to check if word is in linked list
Word_t *
findWord(List_t *list,const char *myword)
{
Word_t *curr;
for (curr = list->head; curr != NULL; curr = curr->next) {
if (strcmp(curr->word,myword) == 0)
break;
}
return curr;
}
//function to add word to linked list
Word_t *
addWord(List_t *list,const char *word)
{
Word_t *match;
do {
// try to find existing word
match = findWord(list,word);
// word already exists
if (match != NULL)
break;
// create new node
match = malloc(sizeof(*match));
match->word = strdup(word);
match->count = 0;
match->next = NULL;
// attach to head of list
if (list->head == NULL)
list->head = match;
// append to tail of list
else
list->tail->next = match;
// set new tail of list
list->tail = match;
// advance list count
list->count += 1;
} while (0);
// increment the word frequency count
match->count += 1;
return match;
}
//function to update the count of word
void
updateCount(List_t *list,const char *myword)
{
Word_t *match;
match = findWord(list,myword);
if (match != NULL)
match->count += 1;
}
//function to print word list
//takes head node as argument
void
printWordList(List_t *list)
{
Word_t *curr;
for (curr = list->head; curr != NULL; curr = curr->next) {
printf("%s", curr->word);
printf(" %ld\n", curr->count);
}
}
int
main(int argc, char **argv)
{
char *filename = argv[1];
FILE *fp = fopen(filename, "r");
printf("FILE: %s\n", filename);
if (fp == NULL) {
printf("Error: unable to open the file ");
printf("%s", filename);
return 1;
}
// These are our word delimiters.
char *delim = " \t\n,:;'\".?!#$-><(){}[]|\\/*&^%#!~+=_";
char line[1000];
char *token;
// create list/head pointer
List_t *list = newList();
// iterate through each line in file
while (fgets(line, sizeof(line), fp) != NULL) {
// seperate each word
// first word of line
char *bp = line;
while (1) {
token = strtok(bp, delim);
bp = NULL;
if (token == NULL)
break;
dbgprt("TOKEN1: %s\n", token);
addWord(list,token);
}
}
printWordList(list);
return 0;
}
#Craig Estey has provided a great answer for you, so don't change your answer selection, but rather than just leave you a link in the comments, there are a couple of important ways of looking at list operations that may help, and you must use a memory/error checking program to validate your use of allocated memory, especially when dealing with list operations.
Take a node holding a string with a reference count of the additional number of times the string occurs, as in your case, e.g.
typedef struct node_t { /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
Iterating With Address of Node & Pointer to Node Eliminates Special Cases
Using both the address of the node and pointer to node is discussed in Linus on Understanding Pointers.
For example, where you need to check if (list->head == NULL) to add the first node to the list, if iterating with both the address of the node and pointer to node, you simply assign the allocated pointer to your new node to the address of the current node. This works regardless whether it is the first, middle or last node. It also eliminates having to worry about what the previous node was when removing nodes from the list. At the node to delete, you simply assign the contents of the next node to the current address and free the node that was originally there. This reduces your add node function to something similar to:
/** add node in sort order to list.
* returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head, /* address of current node */
*pn = *head; /* pointer to current node */
while (pn) { /* iterate to last node */
if (strcmp (pn->s, s) == 0) { /* compare node string with string */
pn->refcnt += 1; /* increment ref count */
return *ppn;
}
ppn = &pn->next; /* ppn to address of next */
pn = pn->next; /* advance pointer to next */
}
return *ppn = create_node (s); /* allocate and return node */
}
(note: by delaying allocation for the new node and string (create_node (s) above), you avoid allocating until you know the string needs to be added -- simplifying memory handling)
As mentioned in the comments above, this combines your doesWordExist() traversal of the list and your addWord() traversal to find the end into a single traversal. If there are hundreds of thousands of nodes in your list, you don't want to traverse the list multiple times.
Using strdup() is Fine, but know it's POSIX not standard C
strdup() is handy for duplicating strings and assigning the result to a new pointer, but strdup() is provided by POSIX, so not all implementation will provide it. Additionally, strdup() allocates memory, so just as with any function that allocates memory, you must check that the result is not NULL before using the pointer that is returned. You can avoid the potential portability issue by writing a short equivalent. For example in the create_node() shown above, it does:
/** helper function to allocate node, and storage for string.
* copies string to node and initializes node->next pointer NULL
* and node->refcnt zero. returns pointer to allocated node on success,
* NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s); /* get string length */
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) { /* allocate for string */
perror ("create_node: malloc node->s");
free (node); /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1); /* copy string to node */
node->refcnt = 0; /* initialize ref count */
node->next = NULL; /* initialze next NULL */
return node; /* return allocated & initialized node */
}
Freeing Allocated Memory
Any time you write code creating data structures, they need to be able to clean up after themselves in the event you want to remove a single node, or are done using the list. This becomes imperative when you create lists, etc. that are declared and used solely within functions below main() as the memory isn't freed on the function return. With main(), on return the program exits and will free all allocated memory. However if the list is created and used solely below main() a memory leak will result each time that function is called if the list memory is not freed before return. A function that frees all memory is short and easy to write, e.g.
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node_t *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->s); /* free current string */
free (victim); /* free current node */
}
}
(**no need to worry about the refcnt since you are deleting the list)
A short example including all of these points, as well as a function del_node() to remove a single node from the list (or reduce the refcnt without removing the node if the refcnt is non-zero) can be:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024 /* if you need a constant, #define one (or more) */
typedef struct node_t { /* list node */
char *s;
size_t refcnt;
struct node_t *next;
} node_t;
/** helper function to allocate node, and storage for string.
* copies string to node and initializes node->next pointer NULL
* and node->refcnt zero. returns pointer to allocated node on success,
* NULL otherwise.
*/
node_t *create_node (const char *s)
{
size_t len = strlen (s); /* get string length */
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate EVERY allocation */
perror ("create_node: malloc node");
return NULL;
}
if (!(node->s = malloc (len + 1))) { /* allocate for string */
perror ("create_node: malloc node->s");
free (node); /* on failure, free node before returning NULL */
return NULL;
}
memcpy (node->s, s, len+1); /* copy string to node */
node->refcnt = 0; /* initialize ref count */
node->next = NULL; /* initialze next NULL */
return node; /* return allocated & initialized node */
}
/** add node in sort order to list.
* returns pointer to new node on success, NULL otherwise.
*/
node_t *add_node (node_t **head, const char *s)
{
node_t **ppn = head, /* address of current node */
*pn = *head; /* pointer to current node */
while (pn) { /* iterate to last node */
if (strcmp (pn->s, s) == 0) { /* compare node string with string */
pn->refcnt += 1; /* increment ref count */
return *ppn;
}
ppn = &pn->next; /* ppn to address of next */
pn = pn->next; /* advance pointer to next */
}
return *ppn = create_node (s); /* allocate and return node */
}
/** print all nodes in list */
void prn_list (node_t *head)
{
if (!head) { /* check if list is empty */
puts ("list-empty");
return;
}
for (node_t *pn = head; pn; pn = pn->next) /* iterate over each node */
printf ("%-24s %4zu\n", pn->s, pn->refcnt); /* print string an refcount */
}
/** delete node with string s from list (for loop) */
void del_node (node_t **head, const char *s)
{
node_t **ppn = head; /* address of node */
node_t *pn = *head; /* pointer to node */
for (; pn; ppn = &pn->next, pn = pn->next) {
if (strcmp (pn->s, s) == 0) { /* does string match */
if (pn->refcnt) { /* ref count > 0 */
pn->refcnt -= 1; /* decrement ref count */
return; /* done */
}
*ppn = pn->next; /* set content at address to next */
free (pn); /* free original pointer */
break;
}
}
}
/** delete all nodes in list */
void del_list (node_t *head)
{
node_t *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node_t *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->s); /* free current string */
free (victim); /* free current node */
}
}
int main (int argc, char **argv) {
char buf[MAXC]; /* read buffer */
const char *delim = " \t\n.,;?!()"; /* strtok delimiters */
node_t *list = NULL; /* pointer to list (must initialize NULL */
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while (fgets (buf, MAXC, fp)) /* read all lines in file */
/* tokenize line based on delim */
for (char *p = strtok (buf, delim); p; p = strtok (NULL, delim)) {
if (ispunct(*p)) /* if word is punctionation, skip */
continue;
add_node (&list, p); /* add node or increment refcnt */
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
puts ("string refcnt\n" /* heading */
"-------------------------------");
prn_list (list); /* print contents of list */
del_list (list); /* free all list memory */
return 0;
}
Take a look at the characters included in delim to use with strtok() as well as the use of ispunct() to skip tokens that end up beginning with punctuation (which allows hyphenated words, but skips hyphens used alone as sentence continuations, etc....)
Example Input File
$ cat dat/tr_dec_3_1907.txt
No man is above the law and no man is below it;
nor do we ask any man's permission when we require him to obey it.
Obedience to the law is demanded as a right; not asked as a favor.
(Theodore Roosevelt - December 3, 1907)
Example Use/Output
$ ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
string refcnt
-------------------------------
No 0
man 1
is 2
above 0
the 1
law 1
and 0
no 0
below 0
it 1
nor 0
do 0
we 1
ask 0
any 0
man's 0
permission 0
when 0
require 0
him 0
to 1
obey 0
Obedience 0
demanded 0
as 1
a 1
right 0
not 0
asked 0
favor 0
Theodore 0
Roosevelt 0
December 0
3 0
1907 0
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to ensure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528== Memcheck, a memory error detector
==8528== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==8528== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==8528== Command: ./bin/lls_string_nosort_refcnt dat/tr_dec_3_1907.txt
==8528==
string refcnt
-------------------------------
No 0
man 1
is 2
above 0
the 1
law 1
and 0
no 0
below 0
it 1
nor 0
do 0
we 1
ask 0
any 0
man's 0
permission 0
when 0
require 0
him 0
to 1
obey 0
Obedience 0
demanded 0
as 1
a 1
right 0
not 0
asked 0
favor 0
Theodore 0
Roosevelt 0
December 0
3 0
1907 0
==8528==
==8528== HEAP SUMMARY:
==8528== in use at exit: 0 bytes in 0 blocks
==8528== total heap usage: 73 allocs, 73 frees, 6,693 bytes allocated
==8528==
==8528== All heap blocks were freed -- no leaks are possible
==8528==
==8528== For counts of detected and suppressed errors, rerun with: -v
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
You already have a solution to your immediate problem, but going forward in your project consider some of the tips above to eliminate multiple traversals of your list, and the portability (and validation) issues surrounding strdup(). Good luck with your coding.

C: Linked list and free() - Mistake in example?

studying the book C Primer Plus by Prata, 6th edition, I came across the linked list example of Listing 17.2 which is copied below this text. I am confused about the part where he frees the memory again. Isn't he freeing the pointer to the next list in his example? In the end, he sets current to head, which makes current point to the starting memory address of the first structure. Then he frees current and sets current=current->next, but current->next shouldn't contain any address anymore because current is freed? Running the program via CodeBlocks works fine, but debugging the free() while loop in CodeBlocks, I get a segment fault.
I believe my understanding is correct that the program is faulty, right?
Thank you.
/* films2.c -- using a linked list of structures */
#include <stdio.h>
#include <stdlib.h> /* has the malloc prototype */
#include <string.h> /* has the strcpy prototype */
#define TSIZE 45 /* size of array to hold title */
struct film
{
char title[TSIZE];
int rating;
struct film * next; /* points to next struct in list */
};
char * s_gets(char * st, int n);
int main(void)
{
struct film * head = NULL;
struct film * prev, * current;
char input[TSIZE];
/* Gather and store information */
puts("Enter first movie title:");
while (s_gets(input, TSIZE) != NULL && input[0] != '\0')
{
current = (struct film *) malloc(sizeof(struct film));
if (head == NULL) /* first structure */
head = current;
else /* subsequent structures */
prev->next = current;
current->next = NULL;
strcpy(current->title, input);
puts("Enter your rating <0-10>:");
scanf("%d", &current->rating);
while(getchar() != '\n')
continue;
puts("Enter next movie title (empty line to stop):");
prev = current;
}
/* Show list of movies */
if (head == NULL)
printf("No data entered. ");
else
printf ("Here is the movie list:\n");
current = head;
while (current != NULL)
{
printf("Movie: %s Rating: %d\n",
current->title, current->rating);
current = current->next;
}
/* Program done, so free allocated memory */
current = head;
while (current != NULL)
{
free(current);
current = current->next;
}
printf("Bye!\n");
return 0;
}
char * s_gets(char * st, int n)
{
char * ret_val;
char * find;
ret_val = fgets(st, n, stdin);
if (ret_val)
{
find = strchr(st, '\n'); // look for newline
if (find) // if the address is not NULL,
*find = '\0'; // place a null character there
else
while (getchar() != '\n')
continue; // dispose of rest of line
}
return ret_val;
}
current = head;
while (current != NULL)
{
free(current);
current = current->next;
}
but current->next shouldn't contain any address anymore because
current is freed?
Your assumptions are correct, you are trying to access an already deleted node and that's why your code segfaults, instead, use a temporary node, in this case you can reuse head:
current = head;
while (current != NULL)
{
head = current->next;
free(current);
current = head;
}
Running the program via CodeBlocks works fine ...
Pure luck ...

My linked list is printing the last word in my text file for all the elements

This code is reading a text file and inserting each word into the linked list.
I am new to linked lists and have been working on this for four hours now and cannot for the life of me figure this out.
So what is going on here? I have checked every way I know how to, and for the life of me cannot get the linked list to print correctly. I believe it has something to do with the push/append functions. Somehow it is overwriting everything previously in the linked list. Maybe the printlist function is overwriting everything but I don't see how it could possibly be doing that.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
// A complete working C program to demonstrate all insertion methods
// on Linked List
// A linked list node
struct Node;
void push(struct Node **head_ref, char *new_data);
void insertAfter(struct Node *prev_node, char *new_data);
void append(struct Node **head_ref, char *new_data);
void printList(struct Node *node);
int LINECOUNT(FILE *(*stream), char *filename);
struct Node {
char *data;
struct Node *next;
};
/* Given a reference (pointer to pointer) to the head of a list and
an int, inserts a new node on the front of the list. */
void push(struct Node **head_ref, char *new_data) {
/* 1. allocate node */
struct Node* new_node = (struct Node *)malloc(sizeof(struct Node));
/* 2. put in the data */
new_node->data = new_data;
printf("push data:%s ", new_data);
/* 3. Make next of new node as head */
new_node->next = (*head_ref);
/* 4. move the head to point to the new node */
(*head_ref) = new_node;
}
/* Given a reference (pointer to pointer) to the head
of a list and an int, appends a new node at the end */
void append(struct Node **head_ref, char *new_data) {
/* 1. allocate node */
struct Node* new_node = (struct Node *)malloc(sizeof(struct Node));
struct Node *last = *head_ref; /* used in step 5*/
/* 2. put in the data */
new_node->data = new_data;
printf("push data:%s ", new_data);
/* 3. This new node is going to be the last node, so make next of
it as NULL*/
new_node->next = NULL;
/* 4. If the Linked List is empty, then make the new node as head */
if (*head_ref == NULL) {
*head_ref = new_node;
return;
}
/* 5. Else traverse till the last node */
while (last->next != NULL)
last = last->next;
/* 6. Change the next of last node */
last->next = new_node;
return;
}
// This function prints contents of linked list starting from head
void printList(struct Node *node) {
while (node != NULL) {
printf(" %s ", node->data);
node = node->next;
}
}
int LINECOUNT(FILE *(*stream), char *filename) {
int size = 0;
size_t chrCount;
char *text;
if ((*stream = fopen(filename, "r")) == NULL) {
printf("LC Could not open hw8 data file.\n");
exit(0);
}
while (1) {
text = NULL;
getline(&text, &chrCount, *stream);
free(text); /*free text*/
if (feof(*stream))
break;
size++;
}
rewind(*stream);
return size;
}
/*int wordCount(FILE *(*stream), char *filename, int lineCount) {
char ch;
int wordcount = 0;
int charcount = 0;
*stream = fopen(filename, "r");
int x = 0;
int lineArr[lineCount];
for (int i = 0; i < lineCount; i++) {
lineArr[i] = 0;
}
if (*stream) {
while ((ch = getc(*stream)) != EOF) {
if (ch != ' ' && ch != '\n') {
charcount++;
}
if (ch == ' ' || ch == '\n') {
wordcount++;
lineArr[x] = lineArr[x] + 1;
}
if (ch == '\n') {
x++;
}
}
if (charcount > 0) {
wordcount++;
charcount++;
}
} else {
printf("Failed to open the file\n");
}
// rewind(*stream);
return lineArr;
}*/
int main(void) {
char *fn = "hw8data.txt";
int lineCount;
FILE *stream;
lineCount = LINECOUNT(&stream, fn);
//int lineArr[lineCount];
//int lineArr[];//lineArr[0] = 4 would say the first line has 4 words. using this data for strtok
//lineArr = wordCount(&stream, fn, lineCount);
//-------------------------------------
char ch;
int wordcount = 0;
int charcount = 0;
stream = fopen("./hw8data.txt", "r");
int x = 0;
int lineArr[lineCount];
for (int i = 0; i < lineCount; i++) {
lineArr[i] = 0;
}
if (stream) {
while ((ch = getc(stream)) != EOF) {
if (ch != ' ' && ch != '\n') {
charcount++;
}
if (ch == ' ' || ch == '\n') {
wordcount++;
lineArr[x] = lineArr[x] + 1;
}
if (ch == '\n') {
x++;
}
}
//if (charcount > 0) { wordcount++; charcount++; }
} else {
printf("Failed to open the file\n");
}
/* Start with the empty list */
struct Node *head = NULL;
rewind(stream);
char *sArr = malloc(42 * sizeof(char));
fscanf(stream, "%s ", sArr);
printf("%s ", sArr);
push(&head, sArr);
fscanf(stream, "%s ", sArr);
printf("%s ",sArr);
append(&head, sArr);
printList(head);
return 0;
}
char* sArr=malloc(42*sizeof(char));
fscanf(stream,"%s ",sArr);
printf("%s ",sArr);
push(&head,sArr);
fscanf(stream,"%s ",sArr);
printf("%s ",sArr);
append(&head,sArr);
You add the same value to the list twice, the value you got back from your one and only call to malloc. If you want two nodes to hold different values, don't add the same value twice. One ugly fix is if after push(&head,sArr) you add another sArr = malloc(42*sizeof(char));. That way, your call to append will add a different value to the list.
If you don't see this, add code to output the value of node->data as you print the list. You'll see that both nodes have pointers to the same chunk of memory, the value you got back from that call to malloc.
But it would be much more elegant if your list entries owned their contents. That would require functions like push and append to allocate their own pointers, copy the strings into them, and use those new pointers. Your code to destroy a list could call free on the data pointed to as well as the nodes.
I would suggest a completely different approach.
I would use a C99 flexible array member for storing each word. Also, because I don't want my code to be submittable as a homework answer, I'll show how to do it with wide-character input. (On basically all OSes except possibly Windows, it treats non-ASCII characters like Ö and Ø as letters, if your locale says they are.)
struct word {
struct word *next;
wchar_t data[]; /* Flexible array member */
};
I would use a helper function that reads the next word from a wide stream, skipping any non-word characters (which I assume to be alphanumeric characters, i.e. letters and digits):
struct word *wide_word(FILE *input)
{
struct word *w = NULL, *tempw;
size_t max = 0; /* No characters allocated in w yet */
size_t len = 0; /* No characters in w yet */
wint_t c;
/* NULL input is not allowed. */
if (!input) {
errno = EINVAL;
return NULL;
}
/* Also fail if the stream is already in an error state. */
if (ferror(input)) {
errno = EIO;
return NULL;
}
c = getwc(input);
/* Skip leading non-word characters. */
while (c != WEOF && !iswalnum(c))
c = getwc(input);
/* End of input? */
if (c == WEOF) {
errno = 0;
return NULL;
}
/* Append each wide word character. */
while (c != WEOF && iswalnum(c)) {
/* Need to reallocate? */
if (len >= max) {
/* Since words are usually short, let's allocate
in chunks of 64 wide characters. */
max = (len | 63) + 65;
tempw = realloc(w, sizeof (struct word) + max * sizeof (wchar_t));
if (!tempw) {
/* Out of memory. */
free(w);
errno = ENOMEM;
return NULL;
}
w = tempw;
}
/* Append. */
w->data[len++] = c;
c = getwc(input);
}
/* Although not useful for this particular case,
we normally want to keep the separator intact. */
if (c != WEOF)
ungetwc(c, input);
/* Optimize the memory allocated to the word. */
if (max != len + 1) {
max = len + 1;
tempw = realloc(w, sizeof (struct word) + max * sizeof (wchar_t));
if (!tempw) {
free(w);
errno = ENOMEM;
return NULL;
}
w = tempw;
}
/* Terminate the wide string in w. */
w->data[len] = L'\0';
/* Success! */
return w;
}
I personally prefer to prepend new nodes to the list, then reverse the entire list afterwards:
struct word *reverse_list(struct word *oldlist)
{
struct word *newlist = NULL;
struct word *w;
while (oldlist) {
w = oldlist;
oldlist = oldlist->next;
w->next = newlist;
newlist = w;
}
return newlist;
}
With the above, a program to read wide words from standard input is basically
#define _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <locale.h>
#include <stdio.h>
#include <wchar.h>
#include <errno.h>
/* Functions listed above */
int main(void)
{
struct word *list, *node;
if (!setlocale(LC_ALL, ""))
fprintf(stderr, "Warning: Your C library does not support your current locale.\n");
if (fwide(stdin, 1) < 1)
fprintf(stderr, "Warning: Your C library does not support wide standard input.\n");
if (fwide(stdout, 1) < 1)
fprintf(stderr, "Warning: Your C library does not support wide standard output.\n");
/* Read words from standard input into reversed list. */
while (1) {
node = wide_word(stdin);
if (!node) {
if (errno) {
fprintf(stderr, "Error reading standard input: %s.\n", strerror(errno));
exit(EXIT_FAILURE);
}
/* No error, just end of input. */
break;
}
/* Prepend to list. */
node->next = list;
list = node;
}
/* Reverse the list so first word is first in list. */
list = reverse_list(list);
/* Print each word in the list to standard output, in order. */
for (node = list; node != NULL; node = node->next)
wprintf(L"%ls\n", node->data);
/* We could free() each word in 'list' here. */
return EXIT_SUCCESS;
}

How to use char * in linked list

In a struct Node type, there is a char* data. And each node uses a linked list to combine.
If the type of "data" is INT it is ok. (ex. the age: 23,45,33....) But when the type turn to "char * ", ex. save name: “Jack”,"Jay","Jame". The value is all the same, the later will cover the fronter. ex:
First time input: Jack. The list is Jack
Second time input: Jay. The list is Jay Jay
Third time input:Jame. The list is Jame Jame Jame
The code is like:
#include <stdio.h>
#include <stdlib.h>
typedef struct listNode{
char *data;
struct listNode *next;
} *ListNodePtr;
typedef struct list {
ListNodePtr head;
} List;
List new_list(){
List temp;
temp.head = NULL;
return temp;
}
//Student Courses
void insert_at_front(ListNodePtr* head, char *data){
ListNodePtr new_node = malloc(sizeof(struct listNode));
new_node->data = data;
new_node->next = *head;
*head = new_node;
}
void print_list(List *self)
{
ListNodePtr current = self->head;
while(current!=NULL)
{
printf("%s \n", current->data);
current = current->next;
}
printf("\n");
}
int main()
{
char i = 'y';
char *name;
List mylist = new_list();
while(i=='y'){
printf("your name is :");
scanf("%s",name);
insert_at_front(&mylist.head,name);
print_list(&mylist);
}
return 0;
}
The good news is your thinking on list operation is not too far off... the bad news is it isn't right-on either. The biggest stumbling blocks you have are just handling the basics of user-input, and insuring your have storage for each bit of data you want to store.
In main() when you declare char *name;, name is an uninitialized pointer. It does not point to any valid memory yet in which you can store the characters that make up name (plus the terminating nul-character). The only thing you can store in a pointer, is a memory address -- and, to be useful, that memory address must be the start of a valid block of memory sufficiently sized to hold what it is you are attempting to store.
In your code, since name does not point to any valid block of memory capable of storing the characters in name, you immediately invoke Undefined Behavior with scanf("%s",name); (Boom! - "Game Over" for your program).
Since you are reading a name, rarely over 64 characters, just use a fixed-size buffer to hold name to pass it to insert_at_front. Don't skimp on buffer size, so just to be sure, you can use something reasonable like 512 bytes. Don't use magic numbers in your code, so if you need a constant:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXN 512 /* if you need a constant, #define one (or more) */
...
int main (void) {
char i = 'y';
char name[MAXN] = ""; /* fixed size buf for reading name input */
...
Now for your structs, you are using a "wrapper" struct holding head that essentially wraps your list. It is fine to do, though not required. However, since you are using one, you need to either declare it using automatic storage (and pass it as a parameter) or dynamically allocate for it. Since you use a new_list function, if you were using automatic storage, you would have to declare mylist as type List in main() and pass it as a parameter to new_list() for initialization.
Why? You cannot return temp from new_list because temp was declared within new_list (it is local to new_list) and the function stack for new_list (the memory that contains the temp variable) is destroyed (released for reuse) when new_list returns.
If you want to return a pointer from new_list, then you must either (1) pass a previously declared temp from main() to new_list -- or -- (2) dynamically allocate for temp in new_list giving temp allocated storage duration so that the memory containing temp survives until free() is called on that memory, or the program ends. The normal option is (2), though there is nothing wrong with (1) if you adequately account for the storage duration of the variable.
Makes a few tweaks, and avoiding typedeffing a pointer, because It is NOT a good idea to typedef pointers?, and adding a convenient example of the flexibility you have to track list statistics in your wrapper struct, you could do something similar to:
typedef struct lnode { /* don't typedef pointers -- it will confuse you */
char *data;
struct lnode *next;
} lnode;
typedef struct list { /* you pass this list to insert_at_front */
lnode *head;
size_t size; /* you can track any list stats you like */
} list;
/* create a dynamically allocated list struct */
list *new_list (void)
{
list *temp = malloc (sizeof *temp); /* create storage for list */
if (!temp) { /* validate ALL allocations */
perror ("malloc-new_list");
return NULL;
}
temp->head = NULL; /* initialize head NULL */
temp->size = 0;
return temp; /* return pointer to new list */
}
To help make lists more logical while you are learning, it often helps to create a separate function that is responsible for creating each node you add to your list. This allows you to concentrate on which items within your node struct need storage allocated, and provides a convenient place to handle the allocation (and validation of that allocation), as well as initializing all values in a single place -- without cluttering your list logic. You could implement a create_node function similar to:
/* create new dynamically allocated node, initialize all values */
lnode *create_new_node (const char *data)
{
lnode *new_node = NULL;
if (!data) { /* validate data not NULL */
fputs ("error: data is NULL in create_new_node.\n", stderr);
return NULL;
}
new_node = malloc (sizeof *new_node); /* allocate/validate node */
if (!new_node) {
perror ("malloc-new_node");
return NULL;
}
/* allocate/validate storage for data */
if (!(new_node->data = malloc (strlen (data) + 1))) {
perror ("malloc-new_node->data");
free (new_node);
return NULL;
}
strcpy (new_node->data, data); /* copy data to new_node->data */
new_node->next = NULL; /* set next pointer NULL */
return new_node; /* return pointer to new_node */
}
(note: you have allocated for (1) the list, (2) the node, and (3) the data)
That leaves your logic for insert_at_front clean and readable. Additionally, you need to always use a proper type for any list operation, especially where any allocation is involved, that allows you to gauge success/failure of the list operation. Generally returning a pointer to the node added (a new head here) or NULL on failure, is all you need, e.g..
/* insert new node at front, returning pointer to head
* to guage success/failure of addition.
*/
lnode *insert_at_front (list *mylist, const char *data)
{
lnode *new_node = create_new_node(data);
if (!new_node)
return NULL;
new_node->next = mylist->head;
mylist->head = new_node;
mylist->size++;
return mylist->head;
}
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed. Get in the habit now to cleaning up after yourself -- it will pay big dividends as your programs become more complex. If you are dealing with a list, and nodes, then write a function to free all data, nodes and the list when you are done with it. Something simple is all that is needed, e.g.
/* you are responsible for freeing any memory you allocate */
void free_list (list *mylist)
{
lnode *current = mylist->head;
while (current) {
lnode *victim = current;
current = current->next;
free (victim->data);
free (victim);
}
free (mylist);
}
With user input, your are responsible for validating that you received good input and that it satisfies any conditions you have placed on the input. You are also responsible for insuring that the state of the input buffer is ready for the next input operation. That means clearing any extraneous characters that may be left in the input buffer (e.g. stdin) that would cause your next attempt at input to fail. A simple helper function to empty stdin can save you from a world of trouble.
/* you are responsible for the state of stdin when doing user input */
void empty_stdin (void)
{
int c = getchar();
while (c != '\n' && c != EOF)
c = getchar();
}
Further, every input function you will use has a return. You must validate the return of any input function you use to determine if (1) valid input was read, (2) whether the user canceled input by generating a manual EOF, and (3) when using scanf whether a matching or input failure occurred. So always check the return! For example:
while (i == 'y' || i == '\n') { /* 'y' or (default '\n') */
fputs ("\nenter name: ", stdout);
if (scanf ("%511[^\n]", name) != 1) { /* ALWAYS CHECK RETURN! */
fputs ("error: invalid input or user canceled.", stderr);
return 1;
}
empty_stdin(); /* empty any chars that remain in stdin */
...
Putting it altogether in a short example, you could do something like the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXN 512 /* if you need a constant, #define one (or more) */
typedef struct lnode { /* don't typedef pointers -- it will confuse you */
char *data;
struct lnode *next;
} lnode;
typedef struct list { /* you pass this list to insert_at_front */
lnode *head;
size_t size; /* you can track any list stats you like */
} list;
/* create a dynamically allocated list struct */
list *new_list (void)
{
list *temp = malloc (sizeof *temp); /* create storage for list */
if (!temp) { /* validate ALL allocations */
perror ("malloc-new_list");
return NULL;
}
temp->head = NULL; /* initialize head NULL */
temp->size = 0;
return temp; /* return pointer to new list */
}
/* create new dynamically allocated node, initialize all values */
lnode *create_new_node (const char *data)
{
lnode *new_node = NULL;
if (!data) { /* validate data not NULL */
fputs ("error: data is NULL in create_new_node.\n", stderr);
return NULL;
}
new_node = malloc (sizeof *new_node); /* allocate/validate node */
if (!new_node) {
perror ("malloc-new_node");
return NULL;
}
/* allocate/validate storage for data */
if (!(new_node->data = malloc (strlen (data) + 1))) {
perror ("malloc-new_node->data");
free (new_node);
return NULL;
}
strcpy (new_node->data, data); /* copy data to new_node->data */
new_node->next = NULL; /* set next pointer NULL */
return new_node; /* return pointer to new_node */
}
/* insert new node at front, returning pointer to head
* to guage success/failure of addition.
*/
lnode *insert_at_front (list *mylist, const char *data)
{
lnode *new_node = create_new_node(data);
if (!new_node)
return NULL;
new_node->next = mylist->head;
mylist->head = new_node;
mylist->size++;
return mylist->head;
}
/* print_list - tweaked for formatted output */
void print_list (list *self)
{
lnode *current = self->head;
while (current != NULL)
{
if (current == self->head)
printf (" %s", current->data);
else
printf (", %s", current->data);
current = current->next;
}
putchar ('\n');
}
/* you are responsible for freeing any memory you allocate */
void free_list (list *mylist)
{
lnode *current = mylist->head;
while (current) {
lnode *victim = current;
current = current->next;
free (victim->data);
free (victim);
}
free (mylist);
}
/* you are responsible for the state of stdin when doing user input */
void empty_stdin (void)
{
int c = getchar();
while (c != '\n' && c != EOF)
c = getchar();
}
int main (void) {
char i = 'y';
char name[MAXN] = ""; /* fixed size buf for reading name input */
list *mylist = new_list();
while (i == 'y' || i == '\n') { /* 'y' or (default '\n') */
fputs ("\nenter name: ", stdout);
if (scanf ("%511[^\n]", name) != 1) { /* ALWAYS CHECK RETURN! */
fputs ("error: invalid input or user canceled.", stderr);
return 1;
}
empty_stdin(); /* empty any chars that remain in stdin */
insert_at_front (mylist, name); /* insert name */
fputs ("continue (y)/n: ", stdout); /* prompt to continue */
scanf ("%c", &i); /* read answer (or '\n' from pressing Enter) */
}
printf ("\nfinal list (%zu nodes):", mylist->size);
print_list (mylist);
free_list (mylist); /* don't forget to free memory you allocate */
return 0;
}
(note: the prompt to continue allows the user to simply press Enter to indicate he wants to continue entering names, any other character will exit. That is why (y) is shown as the default in the prompt -- can you explain why and how that works?)
Example Use/Output
$ ./bin/llinshead
enter name: Mickey Mouse
continue (y)/n:
enter name: Donald Duck
continue (y)/n:
enter name: Pluto (the dog)
continue (y)/n:
enter name: Minnie Mouse
continue (y)/n: n
final list (4 nodes): Minnie Mouse, Pluto (the dog), Donald Duck, Mickey Mouse
Memory Use/Error Check
It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/llinshead
==5635== Memcheck, a memory error detector
==5635== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==5635== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==5635== Command: ./bin/llinshead
==5635==
enter name: Mickey Mouse
continue (y)/n:
enter name: Donald Duck
continue (y)/n:
enter name: Pluto (the dog)
continue (y)/n:
enter name: Minnie Mouse
continue (y)/n: n
final list (4 nodes): Minnie Mouse, Pluto (the dog), Donald Duck, Mickey Mouse
==5635==
==5635== HEAP SUMMARY:
==5635== in use at exit: 0 bytes in 0 blocks
==5635== total heap usage: 9 allocs, 9 frees, 134 bytes allocated
==5635==
==5635== All heap blocks were freed -- no leaks are possible
==5635==
==5635== For counts of detected and suppressed errors, rerun with: -v
==5635== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me know if you have any further questions.

CSV file advanced parsing

I have a problem with parsing a .csv file. I have a struct world defined like this:
typedef struct world
{
char worldName[30];
int worldId;
char *message;
char **constellationArray;
struct world *next;
} tWorld;
And I have a .csv file designed like this (so the 'c' is for 'semi-Colon'):
worldId;worldName;message;constellationArray
1;K'tau;Planeta pod ochranou Freyra;Aquarius;Crater;Orion;Sagittarius;Cetus;Gemini;Earth
2;Martin's homeworld;Znicena;Aries;Sagittarius;Monoceros;Serpens;Caput;Scutum;Hydra;Earth
3;...
The task seems simple: write a method loadWorlds(char *file). Load the file and parse it. The number of constellations is not guaranteed. Each new line signals a new world and I have to create a linked list of these worlds. I have a rough idea of doing this, but I can't make it work. I have a method called tWorld *createWorld() which is implemented as such:
tWorld *createWorld() {
tWorld *world;
world = (*tWorld)malloc((sizeof(tWorld)));
return world;
}
I have to use this method inside my loadWorlds(char *file). Plus I have to serialize them into the linked list with this:
if (*lastWorld == NULL){
*lastWorld = nextWorld;
}else{
(*actualWorld)->next = nextWorld;
}
*actualWorld = nextWorld;
But I don't know when to use it. This is my rough sketch of loadWorlds(char *file):
void loadWorlds(char *file)
{
FILE *f;
char text[30];
char letter;
tWorld *lastWorld = NULL, *actualWorld = NULL, *world;
//f = fopen(file, "r");
if(!(f = fopen(file, "r")))
{
printf("File does not exist! \n");
while(!kbhit());
}
else
{
while(!(feof(f)) && (letter = fgetc(f))!= '\n')
{
if((znak = fgetc(f)) != ';')
{
}
}
}
}
I would be grateful for any ideas to make this work.
The question "How do I parse this file?... (Plus I have to serialize them into the linked list)" is a non-trivial undertaking when considered in total. Your "How do I parse this file?" is a question in its own right. The second part, regarding the linked list, is a whole separate issue that is not at all explained sufficiently, though it appears you are referring to a singularly-linked-list. There are as many different ways to approach this as there are labels of wine. I'll attempt to provide an example of one approah to help you along.
In the example below, rather than creating a single static character array worldName within a tWorld struct where all other strings are dynamically allocated, I've changed worldName to a character pointer as well. If you must use a static array of chars, that can be changed easily, but as long as you are allocating the remainder of the strings, it makes sense to allocate for worldName as well.
As to the parsing part of the question, you can use any number of library functions identified in the comments, or you can simply use a couple of pointers and step through each line parsing each string as required. Either approach is fine. The only benefit to using simple pointers, (aside from the learning aspect), is avoidance of repetative function calls which in some cases can be a bit more efficient. One note when parsing data from a line that has been dynamically allocated is to make sure you preserve the starting address for the buffer to insure the allocated memory can be properly tracked and freed. Some of the library functions clobber the original buffer (i.e. strtok, etc.) which can cause interesting errors if you pass the buffer itself without, in some way, preserving the original start address.
The function read_list_csv below parses each line read from the csv file (actually semi-colon separated values) into each of the members of the tWorld struct using a pair of character pointers to parse the input line. read_list_csv then calls ins_node_end to insert each of filled & allocated tWorld nodes into a singularly-linked circular linked-list. The parsing is commented to help explain the logic, but in summary it simply sets a starting pointer p to the beginning, then using an ending pointer ep checks each character in the line until a semi-colon ; is found, temporarily sets the ; to \0 (nul) and reads the string pointed to by p. The temporary \n is replaced with the original ; and the process repeats beginning with the following character, until the line has been completely parsed.
The linked-list part of your question is somewhat more involved. It is complicated by many linked-list examples being only partially explained and usually equivalently correct. Further, a linked-list is of little use unless you can add to it, read from it, remove from it, and get rid of it without leaking memory like a sieve. When you look at examples, note there are two primary forms linked-lists take. Either HEAD/TAIL lists or circular lists. Both can be either singularly or doubly linked. HEAD/TAIL lists generally use separate pointers for the list start or HEAD and the list end or TAIL node (generally set to NULL). circular lists simply have the end node next pointer point back to the beginning of the list. Both have their uses. The primary benefit to the circular list is that you can traverse the list from any node to any other node, regardless where you start in the list. (since there is no end-node, you can iterate through all nodes starting from any node).
The example below is a singularly linked circular list. It provides functions for creating nodes, inserting them into the list, counting the nodes, printing the entire list, removing nodes from the list, and deleting the list. Importantly, it frees all memory allocated to the list.
Go through both the parsing part of the example and the linked-list part of the example and let me know if you have questions. While the list implementation should be fairly solid, there may be some undiscovered issues. The datafile used for testing as well as the sample output is shown following the code. The code expects the datafile as the first argument and an optional (zero based) node to delete as a second argument (default: node 2):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXL 256
// #define wName 30
typedef struct world
{
// char worldName[wName];
char *worldName;
int worldId;
char *message;
char **constellationArray;
struct world *next;
} tWorld;
/* allocate & populate node */
tWorld *create_node (int wid, char *wnm, char *msg, char **ca);
/* insert node into list */
tWorld *ins_node_end (tWorld **list, int wid, char *wnm, char *msg, char **ca);
/* read data from file fname and add to list */
tWorld *read_list_csv (tWorld **list, char *fname);
/* return the number of nodes in list */
size_t getszlist (tWorld *list);
/* print all nodes in list */
void print_list (tWorld *list);
/* free memory allocated to tWorld list node */
void free_node (tWorld *node);
/* (zero-based) delete of nth node */
void delete_node (tWorld **list, int nth);
/* delete tWorld list & free allocated memory */
void delete_list (tWorld *list);
int main (int argc, char **argv)
{
if (argc < 2) {
fprintf (stderr, "error: insufficient input. Usage: %s <filename> [del_row]\n", argv[0]);
return 1;
}
char *fname = argv[1];
tWorld *myworld = NULL; /* create pointer to struct world */
read_list_csv (&myworld, fname); /* read fname and fill linked list */
printf ("\n Read '%zd' records from file: %s\n\n", getszlist (myworld), fname);
print_list (myworld); /* simple routine to print list */
int nth = (argc > 2) ? atoi (argv[2]) : 2;
printf ("\n Deleting node: %d\n\n", nth);
delete_node (&myworld, nth); /* delete a node from the list */
print_list (myworld); /* simple routine to print list */
delete_list (myworld); /* free memory allocated to list */
return 0;
}
/* allocate & populate node */
tWorld *create_node (int wid, char *wnm, char *msg, char **ca)
{
tWorld *node = NULL;
node = malloc (sizeof *node);
if (!node) return NULL;
node-> worldId = wid;
node-> worldName = wnm;
node-> message = msg;
node-> constellationArray = ca;
return node;
}
/* insert node into list */
tWorld *ins_node_end (tWorld **list, int wid, char *wnm, char *msg, char **ca)
{
tWorld *node = NULL;
if (!(node = create_node (wid, wnm, msg, ca))) return NULL;
if (!*list) { /* if empty, create first node */
node-> next = node;
*list = node;
} else { /* insert as new end node */
if (*list == (*list)-> next) { /* second node, no need to iterate */
(*list)-> next = node;
}
else /* iterate to end node & insert */
{
tWorld *iter = *list; /* second copy to iterate list */
for (; iter->next != *list; iter = iter->next) ;
iter-> next = node; /* insert node at end of list */
}
node-> next = *list; /* set next pointer to list start */
}
return *list; /* provides return as confirmation */
}
/* read list from file fname and add to list */
tWorld *read_list_csv (tWorld **list, char *fname)
{
FILE *fp = fopen (fname, "r");
if (!fp) {
fprintf (stderr, "%s() error: file open failed for '%s'\n", __func__, fname);
return NULL;
}
/* allocate and initialize all variables */
char *line = calloc (MAXL, sizeof *line);
char *p = NULL;
char *ep = NULL;
char *wnm = NULL;
int wid = 0;
int lcnt = 0;
char *msg = NULL;
char **ca = NULL;
size_t idx = 0;
while (fgets (line, MAXL, fp)) /* for each line in file */
{
if (lcnt++ == 0) continue; /* skip header row */
p = line;
idx = 0;
ep = p;
size_t len = strlen (line); /* get line length */
if (line[len-1] == '\n') /* strip newline from end */
line[--len] = 0;
while (*ep != ';') ep++; /* parse worldId */
*ep = 0;
wid = atoi (p);
*ep++ = ';';
p = ep;
while (*ep != ';') ep++; /* parse worldName */
*ep = 0;
wnm = strdup (p);
*ep++ = ';';
p = ep;
while (*ep != ';') ep++; /* parse message */
*ep = 0;
msg = strdup (p);
*ep++ = ';';
p = ep;
ca = calloc (MAXL, sizeof *ca); /* allocate constellationArray */
if (!ca) {
fprintf (stderr, "%s() error allocation failed for 'ca'.\n", __func__);
return NULL;
}
while (*ep) /* parse ca array elements */
{
if (*ep == ';')
{
*ep = 0;
ca[idx++] = strdup (p);
*ep = ';';
p = ep + 1;
/* if (idx == MAXL) reallocate ca */
}
ep++;
}
if (*p) ca[idx++] = strdup (p); /* add last element in line */
ins_node_end (list, wid, wnm, msg, ca); /* add to list */
}
/* close file & free line */
if (fp) fclose (fp);
if (line) free (line);
return *list;
}
/* return the number of nodes in list */
size_t getszlist (tWorld *list) {
const tWorld *iter = list; /* pointer to iterate list */
register int cnt = 0;
if (iter == NULL) {
fprintf (stdout,"%s(), The list is empty\n",__func__);
return 0;
}
for (; iter; iter = (iter->next != list ? iter->next : NULL)) {
cnt++;
}
return cnt;
}
/* print all nodes in list */
void print_list (tWorld *list) {
const tWorld *iter = list; /* pointer to iterate list */
register int idx = 0;
char *stub = " ";
if (iter == NULL) {
fprintf (stdout,"%s(), The list is empty\n",__func__);
return;
}
for (; iter; iter = (iter->next != list ? iter->next : NULL)) {
printf (" %2d %-20s %-20s\n",
iter-> worldId, iter-> worldName, iter-> message);
idx = 0;
while ((iter-> constellationArray)[idx])
printf ("%38s %s\n", stub, (iter-> constellationArray)[idx++]);
}
}
/* free memory allocated to tWorld list node */
void free_node (tWorld *node)
{
if (!node) return;
register int i = 0;
if (node-> worldName) free (node-> worldName);
if (node-> message) free (node-> message);
while (node-> constellationArray[i])
free (node-> constellationArray[i++]);
if (node-> constellationArray)
free (node-> constellationArray);
free (node);
}
/* (zero-based) delete of nth node */
void delete_node (tWorld **list, int nth)
{
/* test that list exists */
if (!*list) {
fprintf (stdout,"%s(), The list is empty\n",__func__);
return;
}
/* get list size */
int szlist = getszlist (*list);
/* validate node to delete */
if (nth >= szlist || nth < 0) {
fprintf (stderr, "%s(), error: delete out of range (%d). allowed: (0 <= nth <= %d)\n",
__func__, nth, szlist-1);
return;
}
/* create node pointers */
tWorld *victim = *list;
tWorld *prior = victim;
/* if nth 0, prior is last, otherwise node before victim */
if (nth == 0) {
for (; prior->next != *list; prior = prior->next) ;
} else {
while (nth-- && victim-> next != *list) {
prior = victim;
victim = victim-> next;
}
}
/* non-self-reference node, rewire next */
if (victim != victim->next) {
prior-> next = victim-> next;
/* if deleting node 0, change list pointer address */
if (victim == *list)
*list = victim->next;
} else { /* if self-referenced, last node, delete list */
*list = NULL;
}
free_node (victim); /* free memory associated with node */
}
/* delete tWorld list */
void delete_list (tWorld *list)
{
if (!list) return;
tWorld *iter = list; /* pointer to iterate list */
for (; iter; iter = (iter->next != list ? iter->next : NULL))
if (iter) free_node (iter);
}
input test data file:
$ cat dat/struct.csv
worldId;worldName;message;constellationArray
1;K'tau;Planeta pod ochranou Freyra;Aquarius;Crater;Orion;Sagittarius;Cetus;Gemini;Earth
2;Martin's homeworld;Znicena;Aries;Sagittarius;Monoceros;Serpens;Caput;Scutum;Hydra;Earth
3;Martin's homeworld2;Znicena2;Aries2;Sagittarius2;Monoceros2;Serpens2;Caput2;Scutum2;Hydra2;Earth2
4;Martin's homeworld3;Znicena3;Aries3;Sagittarius3;Monoceros3;Serpens3;Caput3;Scutum3;Hydra3;Earth3
output:
$ ./bin/struct_ll_csv dat/struct.csv 1
Read '4' records from file: dat/struct.csv
1 K'tau Planeta pod ochranou Freyra
Aquarius
Crater
Orion
Sagittarius
Cetus
Gemini
Earth
2 Martin's homeworld Znicena
Aries
Sagittarius
Monoceros
Serpens
Caput
Scutum
Hydra
Earth
3 Martin's homeworld2 Znicena2
Aries2
Sagittarius2
Monoceros2
Serpens2
Caput2
Scutum2
Hydra2
Earth2
4 Martin's homeworld3 Znicena3
Aries3
Sagittarius3
Monoceros3
Serpens3
Caput3
Scutum3
Hydra3
Earth3
Deleting node: 1
1 K'tau Planeta pod ochranou Freyra
Aquarius
Crater
Orion
Sagittarius
Cetus
Gemini
Earth
3 Martin's homeworld2 Znicena2
Aries2
Sagittarius2
Monoceros2
Serpens2
Caput2
Scutum2
Hydra2
Earth2
4 Martin's homeworld3 Znicena3
Aries3
Sagittarius3
Monoceros3
Serpens3
Caput3
Scutum3
Hydra3
Earth3

Resources