I have some code to create a simple tree based on pointers to nodes, and would like to write this tree (with its data) to a file and than read it back from file into memory. How can I do this ? Here is my code to create a simple tree:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Node
{
void *data;
struct Node *left;
struct Node *right;
};
int main(void)
{
unsigned int_size = sizeof(int);
int data;
/* Tree root node */
struct Node *start = (struct Node*)malloc(sizeof(struct Node));
data = 5;
start->data = malloc(int_size);
memcpy(start->data, &data, int_size);
/* Left node of root */
start->left = (struct Node*)malloc(sizeof(struct Node));
data = 4;
start->left->data = malloc(int_size);
memcpy(start->left->data, &data, int_size);
start->left->left = NULL;
start->left->right = NULL;
/* Right node of root */
start->right = (struct Node*)malloc(sizeof(struct Node));
data = 3;
start->right->data = malloc(int_size);
memcpy(start->right->data, &data, int_size);
start->right->left = NULL;
start->right->right = NULL;
/* Print data */
printf("%d\n",*(int*)(start->data));
printf("%d\n",*(int*)(start->left->data));
printf("%d\n",*(int*)(start->right->data));
return 0;
}
Here a proposal, I use preprocessor macro TYPE to be able to change type and separated functions to read/write an element of that type, I also use function mk to easily create node rather than to duplicate the code as you did for each node.
For the serialization I use a very simple way
'e' indicates an empty node
'n' indicate a non empty node, then I write the value then the left then the right node
I also added pr to easily print a tree as a debug function.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TYPE int
struct Node
{
TYPE data;
struct Node *left;
struct Node *right;
};
/* make a new Node */
struct Node * mk(TYPE v, struct Node * l, struct Node * r)
{
struct Node * n = malloc(sizeof(struct Node));
if (n == NULL) {
fprintf(stderr, "not enough memory\n");
exit(-1);
}
n->data = v;
n->left = l;
n->right = r;
return n;
}
/* write/read data */
void wrData(TYPE v, FILE * fp)
{
fprintf(fp, "%d", v); /* dedicated to int */
}
TYPE rdData(FILE * fp)
{
TYPE v;
fscanf(fp, "%d", &v); /* dedicated to int */
return v;
}
/* serialize a tree */
void wr(struct Node * n, FILE * fp)
{
if (n == NULL)
fputc('e', fp);
else {
fputc('n', fp);
wrData(n->data, fp);
wr(n->left, fp);
wr(n->right, fp);
}
}
/* unserialize a tree */
struct Node * rd(FILE * fp)
{
switch (fgetc(fp)) {
case 'e':
return NULL;
case 'n':
{
TYPE v = rdData(fp);
struct Node * l = rd(fp);
return mk(v, l, rd(fp));
}
default:
fprintf(stderr, "invalid file");
exit(-1);
}
}
/* for debug, show tree */
void pr(struct Node * t)
{
if (t == NULL)
printf("()");
else {
putchar('(');
pr(t->left);
putchar(' ');
wrData(t->data, stdout);
putchar(' ');
pr(t->right);
putchar(')');
}
}
/* free a tree */
void del(struct Node * t)
{
if (t != NULL) {
del(t->left);
del(t->right);
free(t);
}
}
int main()
{
/* Tree root node */
struct Node *start = mk(5, mk(4, NULL, NULL), mk(3, NULL, NULL));
/* show it */
pr(start);
putchar('\n');
/* serialize */
FILE * fp;
if ((fp = fopen("/tmp/t", "w")) == 0) {
fprintf(stderr, " cannot open /tmp/t to write");
exit(-1);
}
wr(start, fp);
fclose(fp);
/* free tree */
del(start);
/* unserialize */
if ((fp = fopen("/tmp/t", "r")) == 0) {
fprintf(stderr, " cannot open /tmp/t to read");
exit(-1);
}
start = rd(fp);
fclose(fp);
/* show it */
pr(start);
putchar('\n');
/* free it */
del(start);
return 0;
}
Compilation and execution :
/tmp % gcc -pedantic -Wextra -Wall t.c
/tmp % ./a.out
((() 4 ()) 5 (() 3 ()))
((() 4 ()) 5 (() 3 ()))
/tmp % cat t ; echo
n5n4een3ee
Execution under valgrind :
/tmp % valgrind ./a.out
==19907== Memcheck, a memory error detector
==19907== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==19907== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==19907== Command: ./a.out
==19907==
((() 4 ()) 5 (() 3 ()))
((() 4 ()) 5 (() 3 ()))
==19907==
==19907== HEAP SUMMARY:
==19907== in use at exit: 0 bytes in 0 blocks
==19907== total heap usage: 8 allocs, 8 frees, 1,280 bytes allocated
==19907==
==19907== All heap blocks were freed -- no leaks are possible
==19907==
==19907== For counts of detected and suppressed errors, rerun with: -v
==19907== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 6)
There are two common ways for serializing objects having pointers on other objects. First one is to replace the pointer with an offset in the file, second one is to use a simple index. In fact, the offset way is mainly used when the data is used directly from the file (database files for example) while the index way is more used for simple storage.
Here a simple way would be to store the nodes into a file recursively as left_node - right_node - main_node. That way when you store a node you already know the index in the file of its left and right children. A possible implementation is then:
static size_t do_save(FILE *fd, struct Node *node, unsigned data_size, int curr) {
size_t left=0, right=0;
if (node->left != NULL) { // first left sub_hierarchy
left = do_save(fd, node->left, data_size, curr);
curr = left;
}
if (node->left != NULL) { // then right one
right = do_save(fd, node->right, data_size, curr);
curr = right;
}
fwrite(&left, sizeof(left), 1, fd); // store index of left child
fwrite(&right, sizeof(right), 1, fd); // then index of right child
fwrite(node->data, data_size, 1, fd); // then the data
return curr + 1; // and return current index
}
size_t save(FILE *fd, struct Node *root, unsigned data_size) {
size_t nb = do_save(fd, root, data_size, 0);
return nb;
}
Here, an index of 0 means a null pointer, and a non null index is a one based index in the file
To deserialize, as I assume that you want each node to be individually allocated, I would use a temporary array of pointers to keep the actual addresses of the allocated nodes:
struct Node* load(FILE *fd, unsigned data_size) {
size_t nb_elts, left, right;
fseek(fd, 0, SEEK_END); // computer number of nodes in the file
nb_elts = ftell(fd) / (data_size + 2*sizeof(size_t));
struct Node** ptx = malloc(nb_elts * sizeof(*ptx)); // allocate array of pointers
fseek(fd, 0, SEEK_SET);
for(size_t i=0; i<nb_elts; i++) { // loop reading nodes
ptx[i] = malloc(sizeof(struct Node)); // allocate node and data
ptx[i]->data = malloc(data_size);
fread(&left, sizeof(size_t), 1, fd); // extract child indices
fread(&right, sizeof(size_t), 1, fd);
fread(ptx[i]->data, data_size, 1, fd); // read data
ptx[i]->left = (left == 0) ? NULL : ptx[left - 1]; // convert indices to pointers
ptx[i]->right = (right == 0) ? NULL : ptx[right - 1];
}
struct Node *last = ptx[nb_elts - 1];
free(ptx); // free the temporary array
return last; // and return the root node
}
Related
I defined a display function to display the content of a queue:
void display(queue_t* s) {
queue_t* c = s;
int i = c->size;
while (i > 0) {
const char* elem = dequeue(c).value;
printf("Item n°%d : %s\n",i,elem);
i--;
};
};
where queue_t is defined as follow:
typedef struct queue {
node_t* head;
node_t* tail;
int size;
} queue_t;
and dequeue is a function that removes a node from the queue and frees it. This function works as intended.
The function display should display the content of the queue without deleting its content but when testing it, the queue is empty if I call display before removing each element one by one by hand by calling dequeue. I thought that queue_t* c = s; would copy the element without any link between c and s.
How can I copy the content of s into c without any link between the two variables ?
EDIT - MWE
Header file - queue.h
#ifndef QUEUE_H
#define QUEUE_H
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
typedef struct element {
bool type;
const char* value;
} element_t;
typedef struct node {
element_t content;
struct node* previous;
struct node* next;
} node_t;
typedef struct queue {
node_t* head;
node_t* tail;
int size;
} queue_t;
queue_t* init_queue(void);
node_t* init_node(element_t e);
void queue(queue_t* s, element_t e);
element_t dequeue(queue_t* s);
void display(queue_t* s);
#endif
Source code - queue.c
#include "queue.h"
queue_t* init_queue(void) {
queue_t* new = (queue_t*)malloc(sizeof(queue_t));
new->head = NULL;
new->tail = NULL;
new->size = 0;
return new;
};
node_t* init_node(element_t e) {
node_t* new = (node_t*)malloc(sizeof(node_t));
new->content = e;
new->next = NULL;
new->previous = NULL;
return new;
};
void queue(queue_t* s, element_t e) {
node_t* n = init_node(e);
if (s->size == 0) {
s->head = n;
s->tail = n;
s->size = 1;
} else {
n->previous = s->tail;
s->tail = n;
s->size++;
};
};
element_t dequeue(queue_t* s) {
if (s->size == 0) {
element_t empty;
empty.type = true;
empty.value = "0";
return empty;
} if (s->size == 1) {
element_t c = s->head->content;
node_t* old = s->head;
s->head = NULL;
s->size = 0;
s->tail = NULL;
free(old);
return c;
} else {
element_t c = s->tail->content;
node_t* old = s->tail;
s->tail = s->tail->previous;
s->tail->next = NULL;
s->size--;
free(old);
return c;
};
};
void display(queue_t* s) {
queue_t* c = s;
int i = c->size;
while (i > 0) {
const char* elem = dequeue(c).value;
printf("Item n°%d : %s\n",i,elem);
i--;
};
};
Test file - test.c
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include "queue.h"
int main(void) {
element_t e1 = {.type = false, .value = "1"};
element_t e2 = {.type = false, .value = "5"};
element_t e3 = {.type = false, .value = "10"};
queue_t* test = init_queue();
queue(test,e1);
queue(test,e2);
queue(test,e3);
display(test);
element_t e4 = dequeue(test);
printf("%s\n",e4.value);
element_t e5 = dequeue(test);
printf("%s\n",e5.value);
element_t e6 = dequeue(test);
printf("%s\n",e6.value);
element_t e7 = dequeue(test);
printf("%s\n",e7.value);
return 0;
}
Run the test file using
gcc -g -std=c99 -Wall -o test.o -c test.c
gcc -g -std=c99 -Wall -o queue.o -c queue.c
gcc -g -std=c99 -Wall -o test queue.o test.o
A few issues ...
You're leaking memory.
Passing structs by value doesn't scale.
queue does not set next for s->tail so forward list traversal doesn't work.
queue is much more complicated than it needs to be
dequeue should be split into two functions: dequeue and destroy
display should just display a list by traversal.
init_node should do a deep copy of content.value
You're leaking memory.
dequeue doesn't free old->value.
It returns a copy that has a valid value.
But, in display, you do:
const char *elem = dequeue(c).value;
You never free elem
Here is the valgrind output:
==2168790== Memcheck, a memory error detector
==2168790== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2168790== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==2168790== Command: ./fix1
==2168790==
Item n°3 : 10
Item n°2 : 5
Item n°1 : 1
0
0
0
0
==2168790==
==2168790== HEAP SUMMARY:
==2168790== in use at exit: 24 bytes in 1 blocks
==2168790== total heap usage: 5 allocs, 4 frees, 4,216 bytes allocated
==2168790==
==2168790== LEAK SUMMARY:
==2168790== definitely lost: 24 bytes in 1 blocks
==2168790== indirectly lost: 0 bytes in 0 blocks
==2168790== possibly lost: 0 bytes in 0 blocks
==2168790== still reachable: 0 bytes in 0 blocks
==2168790== suppressed: 0 bytes in 0 blocks
==2168790== Rerun with --leak-check=full to see details of leaked memory
==2168790==
==2168790== For lists of detected and suppressed errors, rerun with: -s
==2168790== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
dequeue returns "by value" -- this doesn't scale
Add int data[10000000]; to element_t and watch the stack blow up ;-)
In general, passing around a struct by value has similar issues in other places.
dequeue should just dequeue the element.
Do one thing well. dequeue does a dequeue and a [partial/incomplete] destroy.
We should have a separate function destroy that fully frees the node_t/element_t
And, dequeue [as written] should be renamed as pop or dequeue_back because it works from tail to head.
Also, we'd probably like a dequeue_front that works from head to tail
And, it's much more useful if the functions return pointers to the dequeued elements and let the caller use destroy on them when the caller is finished with them.
display should just display the queue and not alter/destroy it.
It could be split [after fixup] into two functions (e.g.) display_reverse and display_forward
Here is the refactored code. It is annotated:
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#ifndef DEEPCOPY
#define DEEPCOPY 1
#endif
typedef struct element {
bool type;
#if DEEPCOPY
char *value;
#else
const char *value;
#endif
} element_t;
typedef struct node {
element_t content;
struct node *previous;
struct node *next;
} node_t;
typedef struct queue {
node_t *head;
node_t *tail;
int size;
} queue_t;
queue_t *init_queue(void);
node_t *init_node(const element_t *e);
void queue(queue_t *s, const element_t *e);
node_t *dequeue_back(queue_t *s);
void display_forward(const queue_t *s);
void display_reverse(const queue_t *s);
// prevent/reduce unintential "use after free"
#define FREE(_ptr) \
do { \
free(_ptr); \
_ptr = NULL; \
} while (0)
queue_t *
init_queue(void)
{
queue_t *new = malloc(sizeof(queue_t));
new->head = NULL;
new->tail = NULL;
new->size = 0;
return new;
}
node_t *
init_node(const element_t *e)
{
node_t *new = malloc(sizeof(node_t));
new->content = *e;
// NOTE/FIX: we should deep copy the element
#if DEEPCOPY
new->content.value = strdup(e->value);
#endif
new->next = NULL;
new->previous = NULL;
return new;
}
void
queue(queue_t *s, const element_t *e)
{
node_t *n = init_node(e);
if (s->size == 0) {
s->head = n;
s->tail = n;
s->size = 1;
}
else {
n->previous = s->tail;
// NOTE/BUG: without this forward traversal doesn't work
#if 1
s->tail->next = n;
#endif
s->tail = n;
s->size++;
}
}
void
destroy_element(element_t *elem)
{
#if DEEPCOPY
FREE(elem->value);
#endif
}
node_t *
destroy_node(node_t *node)
{
destroy_element(&node->content);
FREE(node);
// convenience to caller
return node;
}
node_t *
dequeue_back(queue_t *s)
{
node_t *ret = s->tail;
if (ret != NULL) {
s->tail = ret->previous;
if (s->head == ret)
s->head = ret->next;
s->size -= 1;
}
return ret;
}
void
display_forward(const queue_t *s)
{
int i = 0;
const node_t *node = s->head;
for (; node != NULL; node = node->next) {
printf("Item n°%d : %s\n", i, node->content.value);
i++;
}
}
void
display_reverse(const queue_t *s)
{
int i = s->size;
const node_t *node = s->tail;
for (; node != NULL; node = node->previous) {
printf("Item n°%d : %s\n", i, node->content.value);
i--;
}
}
int
main(void)
{
element_t e1 = {.type = false,.value = "1" };
element_t e2 = {.type = false,.value = "5" };
element_t e3 = {.type = false,.value = "10" };
queue_t *test = init_queue();
queue(test, &e1);
queue(test, &e2);
queue(test, &e3);
printf("display_reverse:\n");
display_reverse(test);
printf("display_forward:\n");
display_forward(test);
for (int i = 4; i <= 7; ++i) {
node_t *node = dequeue_back(test);
if (node == NULL)
break;
printf("main: %s (dequeue_back)\n", node->content.value);
destroy_node(node);
}
#if 1
FREE(test);
#endif
return 0;
}
In the above code, I've used cpp conditionals to denote old vs. new code:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
Note: this can be cleaned up by running the file through unifdef -k
Here is the program output:
display_reverse:
Item n°3 : 10
Item n°2 : 5
Item n°1 : 1
display_forward:
Item n°0 : 1
Item n°1 : 5
Item n°2 : 10
main: 10 (dequeue_back)
main: 5 (dequeue_back)
main: 1 (dequeue_back)
I thought that queue_t* c = s; would copy the element without any link between c and s.
No. It only makes a copy of the pointer s that you passed in the function.
Copying would require you to define "copy semantics", in other words:
What happens if you copy a queue, will the "elements" in your queue be copied too? (This is trivial for primitive types, but not so easy for objects).
If your elements are objects, what copy semantic do they have?
(in the case of strings, you'll need something like strncpy or memcpy to copy all characters into a new buffer)
Also, making a "true" (truly independent) copy you will need to make a complete copy of the entire queue.
This means setting up a completely new queue with all needed element data copied.
I'm phrasing it this way, because you certainly cannot re-use the head, tail, next and previous pointers from your existing queue, otherwise it would not be independent.
And... it's a bad design choice to have a method that is supposedly to be read-only* to modify your queue (data).
To display the contents of your queue you don't need to modify it.
You need to be aware, especially in C, when, how, by whom and for how long your data structures are being accessed. Just a single dangling pointer makes your whole program unpredictable.
*displaying data is considered a read-only operation by most (if not all) programmers.
I am trying to add strings that I am reading from a text file to a linked list.
Since I don't know how long the file or the string is , I want to do this dynamically.
But somewhere along the line I get a segmentation fault.
I have tried everything but I think I overlooked something crucial.
Could someone tell me what I am doing wrong?
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
char *name;
int age;
struct node* next;
}node;
node *add(node *head, char* n_m){
node *new_node;
new_node = (node*)malloc(sizeof(node));
if(new_node == NULL)
printf("Fehler bei Speicher reservierung...");
new_node->name = (char*)malloc(100*sizeof(char));
if(new_node->name == NULL)
printf("Fehler bei Speicher reservierung...");
strcpy(new_node->name, n_m);
if(head == NULL){
head = new_node;
head->next = NULL;
return head;
}
node *current;
current = head;
while(current->next != NULL){
current = current->next;
}
current->next = new_node;
new_node->next = NULL;
return head;
}
void print(node *head){
node *current;
current = head;
while(current != NULL){
printf("%s\n", current->name);
current = current->next;
}
}
int main(){
node *head = NULL;
char character;
FILE *fp;
fp = fopen("test.txt", "r");
while ((character = fgetc(fp)) != EOF) {
char *n_m;
n_m = (char*)malloc(100 * sizeof(char));
if(n_m == NULL)
printf("Fehler bei Speicher reservierung...");
int i = 0;
while (character != ' ') {
n_m[i++] = character;
character = fgetc(fp);
}
n_m[++i] = '\0'; // NULL-terminate
head = add(head, n_m);
free(n_m);
}
print(head);
return 0;
}
Your biggest issue is your read of characters does not trap EOF and will continue reading after EOF is encountered, causing i to exceed your array bounds invoking Undefined Behavior leading to your SegFault. The code in question is:
while (character != ' ') {
n_m[i++] = character;
character = fgetc(fp);
}
Since POSIX files do not end with a ' ' (space), but instead a '\n', your read of the last word in the file does not stop at EOF. You further have problems with multiple spaces together repeatedly writing nodes containing the empty-string to your list.
You also fail to handle any other whitespace other than space, meaning you are including '\t', '\n', vertical tab, etc.. within the words you store. Instead of checking space with ' ', use the isspace() macro included in ctype.h.
n_m[++i] = '\0'; should be n_m[i] = '\0';. You increment i with n_m[i++] = character;. You do NOT want to increment i again with the pre-increment operator before nul-terminating your string. That results in the last character in the string being indeterminate (again invoking undefined behavior when a read of the string is attempted)
Fixing those issues (and using c instead of character, ndx instead of i and buf instead of n_m), your read and add() to your list would resemble:
while ((c = fgetc(fp)) != EOF) { /* read each char in file */
if (isspace(c) || ndx == MAXC - 1) { /* is space or buf full? */
if (in) { /* were we within word? */
buf[ndx] = 0; /* nul-terminate */
head = add (head, buf); /* add node to list */
}
if (ndx < MAXC - 1) /* buffer not full */
in = 0; /* set in flag zero */
ndx = 0; /* reset index zero */
}
else { /* otherwise */
buf[ndx++] = c; /* add char to buf */
in = 1; /* set in flag 1 */
}
}
(note: using the variable in as an in/out flag to keep track of whether you are within a word reading characters, or between words reading whitespace solves the problem you would have with multiple whitespace characters in sequences, e.g. "hello world")
Optional, but helpful, when allocating nodes that also contain members that need to be allocated is to write a createnode() function that fully Allocates and Initializes all node members rather than putting that code in add(). It keeps things clean and ensures every node you allocate is fully initialized before its use in add(). For example:
/** create new node, allocate and initialize all member values,
* return pointer to node on success, NULL otherwise.
*/
node *createnode (char *s)
{
node *new_node;
new_node = malloc (sizeof *new_node); /* allocate/validate node */
if (new_node == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;
}
new_node->name = malloc (strlen (s) + 1); /* allocate/validate name */
if (new_node->name == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;;
}
strcpy (new_node->name, s); /* initialize all node values */
new_node->age = 0;
new_node->next = NULL;
return new_node; /* return newly allocated/initialized node */
}
Then your add() function reduces to:
/** add node containing allocated string 'n_m' to list, return pointer
* to 1st node in list on success, exit with failure otherwise.
*/
node *add (node *head, char *n_m)
{
node *new_node = createnode (n_m); /* allocate/initialize new node */
node *current = head; /* pointer to current head */
if (new_node == NULL) /* validate allocation */
exit (EXIT_FAILURE);
if (!head) /* handle 1st node */
return new_node;
while (current->next != NULL) /* iterate to end of list */
current = current->next;
current->next = new_node; /* set next node to new_node */
return head; /* return pointer to head */
}
Putting it altogether and adding a del_list() function to free all allocated memory for the list, you could do the following:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXC 1024
typedef struct node {
char *name;
int age;
struct node *next;
} node;
/** create new node, allocate and initialize all member values,
* return pointer to node on success, NULL otherwise.
*/
node *createnode (char *s)
{
node *new_node;
new_node = malloc (sizeof *new_node); /* allocate/validate node */
if (new_node == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;
}
new_node->name = malloc (strlen (s) + 1); /* allocate/validate name */
if (new_node->name == NULL) {
perror ("malloc-Fehler bei Speicher reservierung...");
return NULL;;
}
strcpy (new_node->name, s); /* initialize all node values */
new_node->age = 0;
new_node->next = NULL;
return new_node; /* return newly allocated/initialized node */
}
/** add node containing allocated string 'n_m' to list, return pointer
* to 1st node in list on success, exit with failure otherwise.
*/
node *add (node *head, char *n_m)
{
node *new_node = createnode (n_m); /* allocate/initialize new node */
node *current = head; /* pointer to current head */
if (new_node == NULL) /* validate allocation */
exit (EXIT_FAILURE);
if (!head) /* handle 1st node */
return new_node;
while (current->next != NULL) /* iterate to end of list */
current = current->next;
current->next = new_node; /* set next node to new_node */
return head; /* return pointer to head */
}
void print (node * head)
{
node *current;
current = head;
while (current != NULL) {
printf ("%s\n", current->name);
current = current->next;
}
}
/** delete all nodes in list */
void del_list (node *head)
{
node *pn = head; /* pointer to iterate */
while (pn) { /* iterate over each node */
node *victim = pn; /* set victim to current */
pn = pn->next; /* advance pointer to next */
free (victim->name); /* free current string */
free (victim); /* free current node */
}
}
int main (int argc, char **argv) {
char buf[MAXC];
int c = 0, in = 0, ndx = 0;
node *head = NULL;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
while ((c = fgetc(fp)) != EOF) { /* read each char in file */
if (isspace(c) || ndx == MAXC - 1) { /* is space or buf full? */
if (in) { /* were we within word? */
buf[ndx] = 0; /* nul-terminate */
head = add (head, buf); /* add node to list */
}
if (ndx < MAXC - 1) /* buffer not full */
in = 0; /* set in flag zero */
ndx = 0; /* reset index zero */
}
else { /* otherwise */
buf[ndx++] = c; /* add char to buf */
in = 1; /* set in flag 1 */
}
}
if (fp != stdin) /* close file if not stdin */
fclose (fp);
print (head); /* print list */
del_list (head); /* free all allocated memory */
}
Example Input File
$ cat dat/captnjack.txt
This is a tale
Of Captain Jack Sparrow
A Pirate So Brave
On the Seven Seas.
Example Use/Output
$ ./bin/ll_name_age dat/captnjack.txt
This
is
a
tale
Of
Captain
Jack
Sparrow
A
Pirate
So
Brave
On
the
Seven
Seas.
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to ensure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/ll_name_age dat/captnjack.txt
==18265== Memcheck, a memory error detector
==18265== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==18265== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==18265== Command: ./bin/ll_name_age dat/captnjack.txt
==18265==
This
is
a
tale
Of
Captain
Jack
Sparrow
A
Pirate
So
Brave
On
the
Seven
Seas.
==18265==
==18265== HEAP SUMMARY:
==18265== in use at exit: 0 bytes in 0 blocks
==18265== total heap usage: 35 allocs, 35 frees, 6,132 bytes allocated
==18265==
==18265== All heap blocks were freed -- no leaks are possible
==18265==
==18265== For counts of detected and suppressed errors, rerun with: -v
==18265== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me know if you have further questions.
You need to guard against reading beyond the end of file and against going beyond the memory you've allocated. It may also be possible that your file had a "long line" right at 100 characters by both postfixing and prefixing i you went passed your end of buffer.
while ((character = fgetc(fp)) != EOF) {
char *n_m;
n_m = (char*)malloc(100 * sizeof(char));
if(n_m == NULL)
printf("Fehler bei Speicher reservierung...");
int i = 0;
while ((character != ' ')
&& (character != EOF)
&& (i < 100)) {
n_m[i++] = character;
character = fgetc(fp);
}
// don't postfix add about then prefix add below
if (i >= 100) {
… there is a problem …
}
n_m[i] = '\0'; // NULL-terminate
head = add(head, n_m);
free(n_m);
}
You might consider something more like this
#define BUFF_SIZE 100
char buff[BUFF_SIZE];
buff[0] = '\0';
int i = 0;
while (((character = fgetc(fp)) != EOF)
&& (i < BUFF_SIZE)) {
buff[i++] = character;
character = fgetc(fp);
if (character = ' ') {
buff[i] = '\0'; // NULL-terminate
head = add(head, buff);
i = 0;
buff[0] = '\0';
}
}
if (i >= BUFF_SIZE) { … there is a problem … }
This does a couple of useful things. One is your buffer is statically allocated and it's size is controlled by a single #define. Second, it reduces the number of loops involved which can improve readability.
I need to write function which loads dictionary in hash table.I'm confused about error message: c:37:20 runtime error: load of null pointer of type 'const char', which runs in segmentation fault.
I've tried to change load function, but still didn`t help. And also tried to allocate memory for hashtable, as I thought problem might be in memory leaks.
` // Represents number of buckets in a hash table
#define N 26
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node *next;
}
node;
// Represents a hash table
node *hashtable[N];
// Hashes word to a number between 0 and 25, inclusive, based on its first letter
unsigned int hash(const char *word)
{
// Allocates memory for hashtable
int *ht = malloc(26*sizeof(int));
if(!ht)
{
unload();
return false;
}
return tolower(word[0]) - 'a'; // this is error line 37:20
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// Initialize hash table
for (int i = 0; i < N; i++)
{
hashtable[i] = NULL;
}
// Open dictionary
FILE *file = fopen(dictionary, "r");
if (file == NULL)
{
unload();
return false;
}
// Buffer for a word
char word[LENGTH + 1];
// Insert words into hash table
while (fscanf(file, "%s", word) != EOF)
{
for (int i = 0; i < N; i++ )
{
// Allocate memory for node for each new word
node *new_node = malloc(sizeof(node));
if (!new_node)
{
unload();
return false;
}
// Copies word into node
strcpy(new_node->word, word);
new_node->next = NULL;
// Hashes word
hash(new_node->word);
// Inserts word into linked list
if(hashtable[i] == 0)
{
hashtable[i] = new_node;
}
else if(hashtable[i] == new_node)
{
new_node->next = hashtable[i];
hashtable[i] = new_node;
}
}
}
// Close dictionary
fclose(file);
// Indicate success
return true;
}
The function load should retun true when dictionary is loaded. But I get segmentation fault. Does it mean I didn't get right output from load function?
In
new_node->next = NULL;
hash(new_node->word);
// Inserts word into linked list
if(hashtable[i] == 0)
{
hashtable[i] = new_node;
}
else if(hashtable[i] == new_node)
{
new_node->next = hashtable[i];
hashtable[i] = new_node;
}
you do not use the result of hash() and you use i rather than the hash result as index in hashtable, if N greater than 26 you read/write out of hashtable, in the other case you do not put the word in the right entry because the first at index 0, the next at index 1 etc whatever their first letter
Note else if(hashtable[i] == new_node) is never true and in fact never reach because if(hashtable[i] == 0) is always true because you limit the number of word to read
Must be something like that doing minimal changes
int h = hash(new_node->word);
// Inserts word into linked list
if(hashtable[h] == 0)
{
hashtable[h] = new_node;
new_node->next = NULL;
}
else
{
new_node->next = hashtable[h];
hashtable[h] = new_node;
}
but in fact can be simplified to be :
int h = hash(new_node->word);
new_node->next = hashtable[h];
hashtable[h] = new_node;
Note I suppose you do not read several times the same word (it is a dictionary)
To do
while (fscanf(file, "%s", word) != EOF)
is dangerous because there is no protection if the read word is longer than LENGTH
supposing LENGTH is 32 do ( the word can store 32 characters more the final null character) :
while (fscanf(file, "%32s", word) == 1)
There is no reason to have the loop :
for (int i = 0; i < N; i++ )
{
...
}
remove it (but not its body of course), so :
while (fscanf(file, "%32s", word) == 1)
{
// Allocate memory for node for each new word
node *new_node = malloc(sizeof(node));
if (!new_node)
{
unload();
return false;
}
// Copies word into node
strcpy(new_node->word, word);
int h = hash(new_node->word);
new_node->next = hashtable[h];
hashtable[h] = new_node;
}
tte part
// Initialize hash table
for (int i = 0; i < N; i++)
{
hashtable[i] = NULL;
}
is useless because hashtable being global is initialized with 0
If you want to reload the dictionary you need to free the linked list before to reset to NULL
memory leaks
the malloc in hash is useless and only create memory leaks, remove it :
// Hashes word to a number between 0 and 25, inclusive, based on its first letter
unsigned int hash(const char *word)
{
return tolower(word[0]) - 'a';
}
Warning if the first letter is not a-z or A-Z the return index is not a valid index for hashtable
For readability reason replace #define N 26 by #define N ('z' - 'a' + 1)
A proposal adding missing definitions :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define bool int
#define true 1
#define false 0
// Represents number of buckets in a hash table
#define N ('z' - 'a' + 1)
// Represent max word length
#define LENGTH 32
// Represents a node in a hash table
typedef struct node
{
char word[LENGTH + 1];
struct node * next;
}
node;
// Represents a hash table
node * hashtable[N];
// Hashes word to a number between 0 and 25, inclusive, based on its first letter
unsigned int hash(const char *word)
{
return tolower(word[0]) - 'a';
}
// probable goal : empty hashtable
void unload()
{
for (size_t i = 0; i != N; ++i) {
while (hashtable[i] != NULL) {
node * next = hashtable[i]->next;
free(hashtable[i]);
hashtable[i] = next;
}
}
}
// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
// Open dictionary
FILE * file = fopen(dictionary, "r");
if (file == NULL)
return false;
// Buffer for a word
char word[LENGTH + 1];
// Insert words into hash table
while (fscanf(file, "%32s", word) == 1)
{
if (isalpha(word[0])) {
// Allocate memory for node for each new word
node * new_node = malloc(sizeof(node));
if (!new_node)
{
unload();
return false;
}
// Copies word into node
strcpy(new_node->word, word);
int h = hash(new_node->word);
new_node->next = hashtable[h];
hashtable[h] = new_node;
}
}
// Close dictionary
fclose(file);
// Indicate success
return true;
}
int main(int argc, char ** argv)
{
if (argc != 2)
printf("Usage : %s <dictionary>\n", *argv);
else if (!load(argv[1]))
fprintf(stderr, "Error when loading '%s'\n", argv[1]);
else {
puts("dictionary content");
for (size_t i = 0; i != N; ++i) {
node * n = hashtable[i];
if (n != NULL) {
printf("%c :", i + 'a');
do {
printf(" %s", n->word);
n = n->next;
} while (n != NULL);
putchar('\n');
}
}
unload();
}
}
Compilation and execution :
pi#raspberrypi:/tmp $ gcc -pedantic -Wextra -Wall d.c
pi#raspberrypi:/tmp $ cat d
alternate
bellow and
Below
dictionary
Hash main zombie
test
Zorro
pi#raspberrypi:/tmp $ ./a.out
Usage : ./a.out <dictionary>
pi#raspberrypi:/tmp $ ./a.out d
dictionary content
a : and alternate
b : Below bellow
d : dictionary
h : Hash
m : main
t : test
z : Zorro zombie
Execution under valgrind :
pi#raspberrypi:/tmp $ valgrind ./a.out d
==2370== Memcheck, a memory error detector
==2370== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2370== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==2370== Command: ./a.out d
==2370==
dictionary content
a : and alternate
b : Below bellow
d : dictionary
h : Hash
m : main
t : test
z : Zorro zombie
==2370==
==2370== HEAP SUMMARY:
==2370== in use at exit: 0 bytes in 0 blocks
==2370== total heap usage: 13 allocs, 13 frees, 5,872 bytes allocated
==2370==
==2370== All heap blocks were freed -- no leaks are possible
==2370==
==2370== For counts of detected and suppressed errors, rerun with: -v
==2370== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 3)
i had to write my own free & malloc function.
So far I have not had a problem with that, but now I am supposed to merge neighboring free memory blocks in the halde_free() function. I would be really grateful if you could help me there.
#include "halde.h"
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
/// Magic value for occupied memory chunks.
#define MAGIC ((void*)0xbaadf00d)
/// Size of the heap (in bytes).
#define SIZE (1024*1024*1)
/// Memory-chunk structure.
struct mblock {
struct mblock *next;
size_t size;
char memory[];
};
/// Heap-memory area.
char memory[SIZE];
/// Pointer to the first element of the free-memory list.
static struct mblock *head;
/// Helper function to visualise the current state of the free-memory list.
void halde_print(void) {
struct mblock* lauf = head;
// Empty list
if ( head == NULL ) {
fprintf(stderr, "(empty)\n");
return;
}
// Print each element in the list
while ( lauf ) {
fprintf(stderr, "(addr: 0x%08zx, off: %7zu, ", (uintptr_t) lauf, (uintptr_t)lauf - (uintptr_t)memory);
fflush(stderr);
fprintf(stderr, "size: %7zu)", lauf->size);
fflush(stderr);
if ( lauf->next != NULL ) {
fprintf(stderr, " --> ");
fflush(stderr);
}
lauf = lauf->next;
}
fprintf(stderr, "\n");
fflush(stderr);
}
void *halde_malloc (size_t size) {
static int initialized = 0;
if(initialized == 0){
head = (struct mblock *) memory;
head->size = sizeof(memory) - sizeof (struct mblock);
head->next = NULL;
initialized = 1;
}
if(size == 0){
return NULL;
}
struct mblock *lauf = head;
struct mblock **prev_next = &head;
while (lauf != NULL && lauf->size < size){
prev_next = &(lauf->next);
lauf = *prev_next;
}
if(lauf == NULL){
errno = ENOMEM;
return NULL;
}
if((lauf->size -size) <= sizeof(struct mblock)){
*prev_next = lauf->next;
} else {
//mblock anlegen und init.
struct mblock* neu = (struct mblock*) (lauf->memory + size);
neu->size = lauf->size - sizeof(struct mblock) - size;
neu->next = lauf->next;
//mblock anpassen
lauf->size = size;
//verketten wiederherstellen
*prev_next = neu;
}
lauf->next = MAGIC;
return lauf->memory;
}
void halde_free (void *ptr) {
if(ptr == NULL){
return;
}
struct mblock *mbp = (struct mblock *) ptr - 1;
if(mbp->next != MAGIC){
abort();
} else {
mbp->next = head;
head = mbp;
}
}
The code works so far but i have really no idea how to merge blocks..
The memory management runs over a simply linked lists. The variable head points to the first free memory block.
My idea is to merge the blocks directly in the else part but i don't have a good idea to do that..
I'm supposed to create a program that can read any file into a linked list. This is what I came up with so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_BUFFER_SIZE 1024
typedef struct list {
char *string;
struct list *next;
} LIST;
void print_list(LIST *head) {
LIST *current = head;
while (current != NULL) {
printf("%s", current->string);
current = current->next;
}
}
void push(LIST **head, FILE **fp) {
char line[MAX_BUFFER_SIZE];
LIST *node, *current = *head;
while(fgets(line, sizeof(line), *fp)) {
node = malloc(sizeof(LIST));
if (node == NULL) exit(1);
node->string = strdup(line);
node->next = NULL;
if(current == NULL) {
*head = node;
current = node;
} else {
current->next = node;
current = current->next;
}
}
}
int main(int argc, char *argv[]) {
FILE *fp = fopen(argv[1], "r");
LIST *head = NULL;
push(&head, &fp);
fclose(fp);
print_list(head);
return 0;
}
When comparing the contents of the linked list with the contents of the input file this comparison succeeds when using a .txt file but fails when using a file with binary data. This suggests that my program changes the contents of the binary file.
What am I doing wrong?
Random binary data can contain characters that are not printable. Or might contain zeroes, which is the string terminator and thus terminate your strings early. Just don't read and write raw binary data as strings or using string functions, it will simply not work as you expect.
If you want to read and write arbitrary data of any kind, use e.g. fread and fwrite instead, and open your files in binary mode.
Since you are using Linux, you can use POSIX.1 getline() to read lines, including lines with embedded NUL bytes; you do need to write those lines using fwrite().
For the linked list, you should include a length field for fwrite(). I'd also make the linked list data element a flexible array member:
struct node {
struct node *next;
size_t size;
char data[];
/* Note: data[size+1], data[size] == '\0'.
This is not necessary for correct operation,
but allows one to assume there is always at
least one char in data, and the data is followed
by a nul byte. It makes further use of this
structure easier. */
};
struct node *node_new(const char *data, size_t size)
{
struct node *n;
n = malloc(sizeof (struct node) + size + 1);
if (!n) {
fprintf(stderr, "node_new(): Out of memory.\n");
exit(EXIT_FAILURE);
}
n->next = NULL;
n->size = size;
if (size > 0)
memcpy(n->data, data, size);
n->data[size] = '\0';
return n;
}
When reading lines, it is easiest to prepend the lines to the list:
struct node *list = NULL;
struct node *curr;
char *line = NULL;
size_t size = 0;
ssize_t len;
while (1) {
len = getline(&line, &size, stdin);
if (len < 0)
break;
curr = node_new(line, (size_t)len);
curr->next = list;
list = curr;
}
list = list_reverse(list);
When done, you reverse the list, to get the first read line at the beginning of the list:
struct node *list_reverse(struct node *curr)
{
struct node *root = NULL;
struct node *next;
while (curr) {
next = curr->next;
curr->next = root;
root = curr;
curr = next;
}
return root;
}
To write each line to a stream, you use for example fwrite(node->data, node->size, 1, stdout).
If the output stream is not a local file, but a pipe or socket, fwrite() can return a short count. It is not an error; it only means that only part of the data could be written. To cater for those cases, you can use two helper functions: one to ensure all of the data is written, even when writing to a pipe, and another to scan through the list, using the first one to output each line:
static int fwriteall(const char *data, size_t size, FILE *out)
{
size_t n;
while (size > 0) {
n = fwrite(data, 1, size, out);
if (n > 0) {
data += n;
size -= n;
} else
return -1; /* Error */
}
return 0; /* Success */
}
int list_writeall(FILE *out, struct node *list)
{
for (; list != NULL; list = list->next)
if (list->size > 0)
if (fwriteall(list->data, list->size, out)
return -1; /* Error */
return 0; /* Success */
}
Instead of getline(), you can read chunks of some predefined size using fread():
struct node *read_all(FILE *in, const size_t size)
{
struct node *list = NULL;
struct node *curr;
size_t used;
while (1) {
curr = malloc(sizeof (struct node) + size + 1);
if (!curr) {
fprintf(stderr, "read_all(): Out of memory.\n");
exit(EXIT_FAILURE);
}
size = fread(curr->data, 1, size, in);
if (used > 0) {
/* Optional: Optimize memory use. */
if (used != size) {
void *temp;
temp = realloc(curr, sizeof (struct node) + used + 1);
/* Reallocation failure is not fatal. */
if (temp) {
curr = temp;
curr->size = used;
}
}
}
curr->data[used] = '\0';
curr->next = list;
list = curr;
}
return list_reverse(list);
}
The function returns the reversed list (i.e., with first line first in list). After calling the function, you should check using ferror(in) whether the entire input stream was read, or if there was an error.