I have a function that reads a text file filled with a word on every line. Here is an example of a text file I'm using
and
but
five
follows
four
has
is
like
line
lines
littlest
not
once
one
only
other
six
the
three
twice
two
word
words
Code:
typedef struct node node_t;
struct node {
char data[MAX_WORD];
int term;
node_t *next;
};
node_t *head;
int
int_struct(int lines){
FILE *fp;
char ch;
int n = 0, i, switch_num=1, test_first=0, test_first_2=0;
node_t *node, *curr_add;
fp = fopen("text.txt", "r");
node = (node_t*)malloc(sizeof(node_t));
for (i=1; i<=lines; i++){
switch_num = 1;
n=0;
if (test_first != 0){
if (test_first_2){
node = (node_t*)malloc(1000000);
}
test_first_2=1;
while ((ch = getc(fp)) != '\n'){
node -> term = i;
node -> data[n] = ch;
n++;
}
curr_add -> next = node;
curr_add = node;
}
else{
test_first = 1;
head = curr_add = node;
}
}
curr_add -> next = NULL;
fclose(fp);
return num;
}
What I want to do is to read each word and add it to a linked list.
However I am having trouble with malloc (at the moment I just add in a lot of bytes) and need advice on how to properly use it inside the function I have. I've done a general search and tried my best to try and do what most examples do. But I still can't seem to get my function working. For example, every time I execute the program it will read and add all the words into the linked list. However, the program crashes on the last word, and returns NULL. If anyone is able to point me in the right direction, I'd be very grateful.
Issues
There are no checks for return values. Particularly, fopen and malloc
may return NULL. If they do, you'll catch a segmentation fault error on the
first attempt to access the returned value.
Overcomplicated logic. You don't need these switch_num, test_first and test_first_2
variables (see sample code below).
No need in getc when you're reading a text file line-by-line - use
fgets instead.
Too many memory allocations. You don't need more than sizeof(node_t) + length of the line bytes per line.
The allocated memory is not freed. The dynamic memory should be freed as
soon as it is not needed.
Example using linked list
The following reads a text file into a linked list. Memory is allocated for
each list item, and for each line in the file resulting in n * 2 memory
allocations, where n is the number of lines in the file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* strerror, strdup */
#include <errno.h>
typedef struct _node {
unsigned line;
char *data;
struct _node *next;
} node_t;
static void
destroy_list(node_t *list)
{
node_t *node;
for (node = list; node; node = node->next) {
if (node->data != NULL)
free(node->data);
free(node);
}
}
static node_t *
create_list_item(const char *data, unsigned line)
{
node_t *node = calloc(1, sizeof(node_t));
if (node == NULL) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
} else {
node->line = line;
node->data = strdup(data);
if (node->data == NULL) {
fprintf(stderr, "strdup: %s\n", strerror(errno));
free(node);
node = NULL;
}
}
return node;
}
/* Returns pointer to new linked list */
static node_t *
read_file(FILE *fp, char *buf, size_t buf_len)
{
node_t *list = NULL;
node_t *prev = NULL;
node_t *node;
unsigned i;
for (i = 0; fgets(buf, buf_len, fp); prev = node) {
if ((node = create_list_item(buf, ++i)) == NULL) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
break;
}
if (list == NULL)
list = node;
if (prev != NULL)
prev->next = node;
}
return list;
}
static void
print_list(const node_t *list)
{
const node_t *node;
for (node = list; node; node = node->next)
printf("%d: %s", node->line, node->data);
}
int main(int argc, char const* argv[])
{
const char *filename = "text.txt";
char buf[1024] = {0};
FILE *fp = NULL;
node_t *list = NULL;
if (NULL == (fp = fopen(filename, "r"))) {
fprintf(stderr, "failed to open file %s: %s\n",
filename, strerror(errno));
return 1;
}
list = read_file(fp, buf, sizeof(buf));
fclose(fp);
if (list) {
print_list(list);
destroy_list(list);
}
return 0;
}
Example using dynamic array
It is inefficient to allocate memory for each line (twice) in the file,
not only because the system calls (malloc, realloc, etc.) are costly,
but also because the items are placed non-contiguously. Accessing contiguous
region of memory is usually faster.
In the following code, the linked list is replaced with dynamic array. We
initialize memory for 10 lines at once. The size is increased as necessary.
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* strerror, strdup */
#include <errno.h>
typedef struct _node {
size_t line;
char *data;
} node_t;
static void
destroy_array(node_t *array, size_t size)
{
size_t i;
node_t *item;
for (i = 0; i < size; i++) {
item = &array[i];
if (item->data)
free(item->data);
}
free(array);
}
static void
print_array(node_t *array, size_t size)
{
size_t i;
node_t *item;
for (i = 0; i < size; i++) {
item = &array[i];
if (item->data) {
printf("%ld: %s", item->line, item->data);
}
}
}
static node_t *
read_file(FILE *fp, char *buf, size_t buf_len,
const size_t array_step, size_t *array_size)
{
node_t *item;
node_t *array = calloc(array_step, sizeof(node_t));
size_t size = 0;
if (array == NULL) {
fprintf(stderr, "calloc:%s\n", strerror(errno));
return array;
}
while (fgets(buf, buf_len, fp)) {
if (size && size % array_step == 0) {
array = realloc(array, sizeof(node_t) * (array_step + size));
if (array == NULL) {
fprintf(stderr, "realloc:%s\n", strerror(errno));
break;
}
}
item = &array[size++];
item->line = size;
item->data = strdup(buf);
if (item->data == NULL) {
fprintf(stderr, "strdup: %s\n", strerror(errno));
break;
}
}
*array_size = size;
return array;
}
int main(int argc, char const* argv[])
{
node_t *array;
const size_t array_step = 10;
size_t array_size;
const char *filename = "text.txt";
char buf[1024] = {0};
FILE *fp = NULL;
if (NULL == (fp = fopen(filename, "r"))) {
fprintf(stderr, "failed to open file %s: %s\n",
filename, strerror(errno));
return 1;
}
array = read_file(fp, buf, sizeof(buf), array_step, &array_size);
fclose(fp);
if (array) {
print_array(array, array_size);
destroy_array(array, array_size);
}
return 0;
}
Note the changes in node_t structure.
Related
I am writing a program that takes strings from lines of a file, and passes them into a linked list of char arrays/C-style strings.
Here is the struct/typedef I am using:
struct linked_list {
char *string;
struct linked_list *next;
};
typedef struct linked_list Node;
Now, the function where I am processing the file is the following snippet:
void function(char *filename, Node **header) {
FILE *fp;
fp = fopen(filename, "r");
if (fp != NULL) {
int line_count = 0;
Node *curr = NULL;
char* line = NULL;
size_t len = 0;
ssize_t read;
while ((read = getline(&line, &len, fp)) != -1) {
printf("File got: %s\n", line);
curr = (Node*)malloc(sizeof(*curr);
curr->name = malloc(line);
strcpy(curr->name, line);
printf("Curr added: %s\n", curr->name);
if (line_count == 0) {
*header = curr;
printf("Header assigned curr\n");
curr = curr->next;
line_count++;
}
header = &curr;
}
The prints confirm that I am successfully assigning the variables. Next, the function to print is as follows:
void print(Node *header) {
while (header != NULL) {
printf("%s\n", header->name);
header = header->next;
}
}
Which obviously doesn't work because the header doesn't properly point to the list.
How would I go about fixing this? I have tried some things like trying to point the header to the linked list "curr" before adding any values, but that didn't seem to work.
I am new to C, so some syntax may be rough/inefficient (especially the malloc() functions, but I will get to that later.
Here is the main function:
int main(int argc, char* argv[]) {
Node* header = NULL;
function(argv[i], &header);
print(header);
return 0;
}
I've written this C code. In the beginning, I used file handing to read a text file and insert every line as a string in a linked list. I need to free all cases of memory allocation in the program in a separate void function. How do I do that? I only included the parts of the code that are relevant because it's a pretty long program.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdbool.h>
#include <ctype.h>
/*Node of linked list*/
typedef struct node {
char *data;
struct node *next;
} node;
node *start = NULL;
node *current;
typedef enum {
not_tested, found, missed
} state;
/*Appending nodes to linked list*/
void add(char *line) {
node *temp = (node *)malloc(sizeof(node));
temp->data = strdup(line);
temp->next = NULL;
current = start;
if (start == NULL) {
start = temp;
}
else {
while (current->next != NULL) {
current = current->next;
}
current->next = temp;
}
}
/*read text file*/
void readfile(char *filename) {
FILE *file = fopen(filename, "r");
if (file == NULL) {
exit(1);
}
char buffer[512];
while (fgets(buffer, sizeof(buffer), file) != NULL) {
add(buffer);
}
fclose(file);
}
This is not exactly what you're asking for, but I show you how to build a little class that allocates chunks of memory that can bee freed in a single call. This is especially useful when you have lots of small pieces of memory to allocate and therefore to free after usage.
It could seem too many codes for your usage, but note that such a class can be saved in a independent file and reused each time it's needed :
struct Allocator {
void * buffer;
size_t capacity;
size_t usedSize;
};
struct Allocator * newAllocator(size_t initialSize) {
struct Allocator * allocator = malloc(sizeof(*allocator));
if (! allocator) return NULL;
allocator->buffer = malloc(initialSize);
if (! allocator->buffer) { free(allocator); return NULL; }
allocator->capacity = initialSize;
allocator->usedSize = 0;
return allocator;
}
void freeAllocator(struct Allocator * allocator) {
if (!allocator) return;
if (allocator->buffer) free(allocator->buffer);
free(allocator);
}
void * allocate(struct Allocator * allocator, size_t size) {
if (size + allocator->usedSize > allocator->capacity) {
while (size + allocator->usedSize > allocator->capacity) allocator->capacity *= 2;
allocator->buffer = realloc(allocator->buffer, allocator->capacity);
}
void * ptr = allocator->buffer + allocator->usedSize;
allocator->usedSize += size;
return ptr;
}
//-------- END ALLOCATOR
struct node {
//...
};
// How to replace a call to malloc to allocate a node :
void add(struct Allocator *allocator, char *line) {
struct node *temp = allocate(allocator, sizeof(*temp));
//...
}
int main()
{
FILE *file = fopen("myfileName", "r");
if (file == NULL) exit(1);
// Allocates the buffer and as many nodes as needed
struct Allocator *allocator = newAllocator(1024);
char * buffer = allocate(allocator, 512);
while (fgets(buffer, 512, file) != NULL) {
add(allocator, buffer);
}
// Free all allocated memory in a single call
freeAllocator(allocator);
return 0;
}
I'm supposed to create a program that can read any file into a linked list. This is what I came up with so far:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_BUFFER_SIZE 1024
typedef struct list {
char *string;
struct list *next;
} LIST;
void print_list(LIST *head) {
LIST *current = head;
while (current != NULL) {
printf("%s", current->string);
current = current->next;
}
}
void push(LIST **head, FILE **fp) {
char line[MAX_BUFFER_SIZE];
LIST *node, *current = *head;
while(fgets(line, sizeof(line), *fp)) {
node = malloc(sizeof(LIST));
if (node == NULL) exit(1);
node->string = strdup(line);
node->next = NULL;
if(current == NULL) {
*head = node;
current = node;
} else {
current->next = node;
current = current->next;
}
}
}
int main(int argc, char *argv[]) {
FILE *fp = fopen(argv[1], "r");
LIST *head = NULL;
push(&head, &fp);
fclose(fp);
print_list(head);
return 0;
}
When comparing the contents of the linked list with the contents of the input file this comparison succeeds when using a .txt file but fails when using a file with binary data. This suggests that my program changes the contents of the binary file.
What am I doing wrong?
Random binary data can contain characters that are not printable. Or might contain zeroes, which is the string terminator and thus terminate your strings early. Just don't read and write raw binary data as strings or using string functions, it will simply not work as you expect.
If you want to read and write arbitrary data of any kind, use e.g. fread and fwrite instead, and open your files in binary mode.
Since you are using Linux, you can use POSIX.1 getline() to read lines, including lines with embedded NUL bytes; you do need to write those lines using fwrite().
For the linked list, you should include a length field for fwrite(). I'd also make the linked list data element a flexible array member:
struct node {
struct node *next;
size_t size;
char data[];
/* Note: data[size+1], data[size] == '\0'.
This is not necessary for correct operation,
but allows one to assume there is always at
least one char in data, and the data is followed
by a nul byte. It makes further use of this
structure easier. */
};
struct node *node_new(const char *data, size_t size)
{
struct node *n;
n = malloc(sizeof (struct node) + size + 1);
if (!n) {
fprintf(stderr, "node_new(): Out of memory.\n");
exit(EXIT_FAILURE);
}
n->next = NULL;
n->size = size;
if (size > 0)
memcpy(n->data, data, size);
n->data[size] = '\0';
return n;
}
When reading lines, it is easiest to prepend the lines to the list:
struct node *list = NULL;
struct node *curr;
char *line = NULL;
size_t size = 0;
ssize_t len;
while (1) {
len = getline(&line, &size, stdin);
if (len < 0)
break;
curr = node_new(line, (size_t)len);
curr->next = list;
list = curr;
}
list = list_reverse(list);
When done, you reverse the list, to get the first read line at the beginning of the list:
struct node *list_reverse(struct node *curr)
{
struct node *root = NULL;
struct node *next;
while (curr) {
next = curr->next;
curr->next = root;
root = curr;
curr = next;
}
return root;
}
To write each line to a stream, you use for example fwrite(node->data, node->size, 1, stdout).
If the output stream is not a local file, but a pipe or socket, fwrite() can return a short count. It is not an error; it only means that only part of the data could be written. To cater for those cases, you can use two helper functions: one to ensure all of the data is written, even when writing to a pipe, and another to scan through the list, using the first one to output each line:
static int fwriteall(const char *data, size_t size, FILE *out)
{
size_t n;
while (size > 0) {
n = fwrite(data, 1, size, out);
if (n > 0) {
data += n;
size -= n;
} else
return -1; /* Error */
}
return 0; /* Success */
}
int list_writeall(FILE *out, struct node *list)
{
for (; list != NULL; list = list->next)
if (list->size > 0)
if (fwriteall(list->data, list->size, out)
return -1; /* Error */
return 0; /* Success */
}
Instead of getline(), you can read chunks of some predefined size using fread():
struct node *read_all(FILE *in, const size_t size)
{
struct node *list = NULL;
struct node *curr;
size_t used;
while (1) {
curr = malloc(sizeof (struct node) + size + 1);
if (!curr) {
fprintf(stderr, "read_all(): Out of memory.\n");
exit(EXIT_FAILURE);
}
size = fread(curr->data, 1, size, in);
if (used > 0) {
/* Optional: Optimize memory use. */
if (used != size) {
void *temp;
temp = realloc(curr, sizeof (struct node) + used + 1);
/* Reallocation failure is not fatal. */
if (temp) {
curr = temp;
curr->size = used;
}
}
}
curr->data[used] = '\0';
curr->next = list;
list = curr;
}
return list_reverse(list);
}
The function returns the reversed list (i.e., with first line first in list). After calling the function, you should check using ferror(in) whether the entire input stream was read, or if there was an error.
If I have a file stream with content
123 1234
1223 124235
21432 325
In my program I read line by line of the file and store the first target of each line into my list. These line with same location and when I run the program it will keep pointing to the most recent data and place it in to list. Which means If I have a function called printL() in while loop. It will print
123/
1223/1223/
21432/21432/21432/
instead of
123/
123/1223/
123/1223/21432
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef struct n{
char *value;
struct n *next;
} Node;
void printList(Node *head){
Node *cur = head;
while(cur!=NULL){
printf("%s/", cur->value);
cur = cur->next;
}
printf("\n");
}
void insertIntoList(Node **head, char *data){
Node *newNode = malloc(sizeof(Node));
if (newNode == NULL){
perror("Failed to allocate a new node for the linked list");
exit(1);
}
newNode->value = data;
newNode->next = NULL;
Node *currentList = *head;
if(*head == NULL){ //if the linked list head is null, then add the target into linked list
*head = newNode;
}
else{
while(currentList->next!=NULL){
currentList = currentList->next;
}
currentList->next = newNode;
}
}
int main(int argc, char**argv){
FILE *fileStream;
size_t len = 0;
char *line = NULL;
Node *head = NULL;
int j;
for(j=1; j<argc-2;j++){
fileStream = fopen(argv[j], "r");
if(fileStream == NULL){
fprintf(stderr, "could not open");
continue;
}
insertIntoList(&head,"a"); /////////////Line 95
insertIntoList(&head,"b");
insertIntoList(&head,"c");
insertIntoList(&head,"d");
printf("here is a try\n");
printList(head);
while(getline(&line, &len, fileStream)!=EOF){ /////////////Line 101
char *targetNum = strtok(line, " \t\r\n");
printf("*****%s\n", targetNum);
insertIntoList(&head, targetNum);
printf("######print head here is##########\n");
printList(head);
printf("######print head here is##########->\n");
}
//printList(head);
}
return 0;
}
In order to keep the content of each loaded field returned from strtok(), just add a strdup() before calling insertIntoList() after checking if not a null-pointer.
In your code, if you compare the value of both line and targetNum
are the same. If fact, the strtok() function returns a pointer to
the input string and keep the pointer for the next argument.
Replace the following code:
char *targetNum = strtok(line, " \t\r\n");
printf("*****%s\n", targetNum);
insertIntoList(&head, targetNum);
By that one:
char *targetNum = strtok(line, " \t\r\n");
if (targetNum != NULL) {
printf("*****%s\n", targetNum);
insertIntoList(&head, strdup(targetNum));
}
You don't store the contents of the string in your list nodes; you store a pointer to the buffer used for the contents of the string.
Consider changing your list node structure to
typedef struct node Node;
struct node {
Node *next;
char data[];
};
where the contents of the string are stored in the C99 flexible array member.
Your node constructor is then something like
Node *new_node(const char *data)
{
const size_t datalen = (data) ? strlen(data) : 0;
Node *result;
result = malloc(sizeof (Node) + datalen + 1);
if (!result) {
fprintf(stderr, "Out of memory!\n");
exit(EXIT_FAILURE);
}
if (datalen > 0)
memcpy(result->data, data, datalen);
result->next = NULL;
result->data[datalen] = '\0';
return result;
}
See how the function allocates memory for the copy of the data?
Personally, I prefer something like
typedef struct node Node;
struct node {
Node *next;
size_t hash;
size_t size;
char data[];
};
where the size member is basically strlen(data) (except that you can also use the nodes to hold binary data that includes nul bytes \0), and hash is a simple hash computed from data. hash is useful if you intend to compare the entire contents of nodes; if two nodes' lengths or hashes differ, then it is certain their contents differ; if they are the same, then you compare them character by character (memcmp(node1->data, node2->data, node1->length) == 0 if they are the same).
The constructor for the above is something like (using DJB2 hash):
Node *new_node(Node *next, const void *data, const size_t size)
{
Node *result;
result = malloc(sizeof (Node) + size + 1);
if (!result) {
fprintf(stderr, "new_node(): Out of memory (%zu bytes)\n", size);
exit(EXIT_FAILURE);
}
/* Copy and hash data using DJB2 hash (not that good, but fast) */
{
unsigned char *src = (unsigned char *)data;
unsigned char *const end = (unsigned char *)data + size;
unsigned char *dst = result->data;
size_t hash = 5381;
while (src < end) {
hash = hash * 33 + (size_t)(*src);
*(dst++) = *(src++);
}
/* Add terminator */
*dst = '\0';
}
result->next = next;
result->hash = hash;
result->size = size;
return result;
}
These Nodes can also be used in e.g. hash tables, which makes the type quite versatile.
I would like to read from a file, line by line. Each line has 3 arguments guaranteed. First 2 are first and last name and third is age.
I want to make a linked list, in which, each node represents a person (line) in the file.
I don't know the size of the names so I made it dynamic. I also don't know the number of lines in the file, so I would like that to be dynamic too.
My approach was to use fscanf, but then I wouldn't know how much memory needs to be allocated prior to reading it.
The function convertToList is supposed to receive a file path of the file we wanna read, convert it to a linked list, then return the head node. (Open to improvements)
Check out my code and see where I got stuck:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef enum
{
FALSE,
TRUE
}bool;
struct Node{
char firstName[50];
char lastName[50];
int age;
struct Node *next;
};
typedef struct {
struct Node *head;
}LinkedList;
struct Node * convertToList(char *inputFilePath);
int main(int argc, char* argv[]) {
if(argc != 4) {
printf("Invalid arguments.\n");
exit(0);
}
if (strlen(argv[3])!=1) {
printf("Invalid sorting type.\n");
exit(0);
}
char *inputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[1]) +1);
memcpy(inputFilePath, argv[1], strlen(argv[1]));
char *outputFilePath = (char*) malloc(sizeof(char*) * strlen(argv[2]) +1);
memcpy(outputFilePath, argv[2], strlen(argv[2]) +1);
char *sortType = argv[3];
//LinkedList* inputList = (LinkedList*)malloc(sizeof(struct Node));
struct Node* head = malloc(sizeof(struct Node));
head = convertToList(inputFilePath);
printf("\n%s %s %d\n", head->firstName, head->lastName, head->age);
// printf("\nsaaap\n");
getchar();
}
struct Node * convertToList(char *inputFilePath) {
FILE* ifp;
ifp = fopen(inputFilePath, "r");
if (!ifp) { perror("fopen"); exit(0); }
struct Node *head = NULL;
struct Node *prev = NULL;
bool isHead = TRUE;
while(!feof(ifp)) {
struct Node *tmp = (struct Node*)malloc(sizeof(struct Node));
if (prev != NULL)
prev->next = tmp;
if (head==NULL)
head = tmp;
fscanf(ifp, "%s %s %d\n", tmp->firstName, tmp->lastName, &tmp->age);
prev = tmp;
//Need to link to next node as well
}
fclose(ifp);
return head;
}
I know that the fscanf is wrong, but I'm not sure how to fix it.
Also, how do I return the root? Is my approach gonna work?
And lastly, how do can I set the next node in the list? I don't see it happening with the current while loop.
Thanks.
If you need to link the nodes this is how you can do it and use dynamic storage, here you go, I didn't think this very much but it is Ok.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
struct Node
{
char *firstName;
char *lastName;
int age;
struct Node *next;
};
struct Node *convertToList(const char *const inputFilePath);
void freeList(struct Node *);
int main(int argc, char* argv[])
{
struct Node *head;
if (argc != 2)
{
printf("Invalid arguments.\n");
return 1;
}
head = convertToList(argv[1]);
if (head != NULL)
{
struct Node *current;
current = head;
while (current != NULL)
{
fprintf(stderr, "%s %s %d\n", current->firstName, current->lastName, current->age);
current = current->next;
}
/* do manupulations with the list, example above, print the values */
freeList(head);
}
return 0;
}
void freeList(struct Node *node)
{
struct Node *current;
current = node;
while (current != NULL)
{
struct Node *next;
next = current->next;
if (current->firstName != NULL)
free(current->firstName);
if (current->lastName != NULL)
free(current->lastName);
free(current);
current = next;
}
}
size_t appendChar(char **buffer, char character, size_t length)
{
char *temporary;
if (buffer == NULL)
return length;
temporary = realloc(*buffer, 1 + length);
if (temporary == NULL)
return length;
temporary[length] = character;
*buffer = temporary;
return 1 + length;
}
struct Node *parseFileLine(char *line)
{
char *word;
struct Node *node;
char *endptr;
if (line == NULL)
return NULL;
node = malloc(sizeof(struct Node));
if (node == NULL)
return NULL;
node->firstName = NULL;
node->lastName = NULL;
node->age = -1; // an invalid value;
node->next = NULL;
word = strtok(line, " ");
if (word == NULL)
return node;
node->firstName = strdup(word);
word = strtok(NULL, " ");
if (word == NULL)
return node;
node->lastName = strdup(word);
word = strtok(NULL, " ");
if (word == NULL)
return node;
node->age = strtol(word, &endptr, 10);
if (*endptr != '\0')
node->age = -1;
return node;
}
struct Node *getNode(FILE *file)
{
char *line;
int character;
size_t length;
line = NULL;
length = 0;
while ((character = fgetc(file)) != EOF)
{
if (((char)character == '\n') && (line != NULL))
{
struct Node *node;
length = appendChar(&line, '\0', length);
node = parseFileLine(line);
free(line);
return node;
}
length = appendChar(&line, (char)character, length);
}
if (line != NULL)
free(line);
return NULL;
}
struct Node *convertToList(const char *const inputFilePath)
{
FILE *ifp;
struct Node *head;
struct Node *current;
struct Node *last;
ifp = fopen(inputFilePath, "r");
if (ifp == NULL)
{
perror("fopen");
return NULL;
}
head = NULL;
last = NULL;
while ((current = getNode(ifp)) != NULL)
{
if (current == NULL)
return head;
if (head == NULL)
head = current;
if (last != NULL)
last->next = current;
last = current;
}
fclose(ifp);
return head;
}
Here you can also print the nodes to see that the data is correctly there.
I think you don't understand what malloc is for and you don't know much about pointers too, in your fscanf you are storing data in firstName and lastName without allocating memory for it, they are not even initialized so you would get a segmentation fault.
A somewhat different approach.
argv copying
First off, as mentioned, you do not need to copy argv values. Main reason for doing do is if you manipulate the values. There are also cases where one want to erase argv values as they can be read by ps and other tools, read from /proc/ etc. For example some programs take passwords as argument, to prevent password to be readable by anyone having access to the system one typically copy the argument then overwrite the argv value.
It is however usually good practice to use variables for the arguments. It usually makes the code clearer, but also makes it easier to maintain if one do changes. E.g. implement flag arguments like -f <filename>.
exit() and return from main()
You also exit() with zero on error. You would want to exit with zero on success, and other value on error or other. This is the norm. 0 == success. Some applications implement numeric exit codes that can mean different things. E.g. 0 is normal exit, 1 is not an error but some special case, 2 likewise 3 might be an error etc. For example grep:
EXIT STATUS
The exit status is 0 if selected lines are found, and 1 if not found. If an
error occurred the exit status is 2. (Note: POSIX error handling code should
check for '2' or greater.)
scanf
When you use scanf to read strings there are some tricks that can be used to make it better. First off always use the size parameter.
char name[16]
sscanf(buf, "%15s", name);
Do also check items read:
if (sscanf(buf, "%15s %d", name, &age) != 2)
... error ...
Third you can also save number of bytes read by %n:
sscanf(buf, "%n%15s%n %n%d%n", &of1, name, &of2, &age, &of3)
Usage
A very simple, but also quick and user-friendly thing, is to add a usage function.
Typically:
int usage(const char *self, const char *err_str)
{
fprintf(stderr,
"Usage: %s <in-file> <out-file> <sort-type>\n"
" Sort types:\n"
" f Sort by First Name\n"
" l Sort by Last Name\n"
" a Sort by Age\n"
,
self
);
if (err_str) {
fprintf(stderr,
"\nError: %s\n",
err_str
);
}
return ERR_ARG;
}
Then in main() you can quickly and clean add something like:
if (argc < 4)
return usage(argv[0], "Missing arguments.");
A note on you validation of the sort argument. Instead of using strlen() you can check if byte 2 is 0.
if (argv[3][1] != '\0')
... error ...
Finally main could be something like:
int main(int argc, char *argv[])
{
char *in_file, *out_file, sort;
struct Node *head = NULL;
int err = 0;
if (argc < 4)
return usage(argv[0], "Missing arguments.");
if (argc > 4)
return usage(argv[0], "Unknown arguments.");
if (argv[3][1] != '\0')
return usage(argv[0], "Invalid sorting type.");
in_file = argv[1];
out_file = argv[2];
sort = argv[3][0];
if (sort != 'f' && sort != 'l' && sort != 'a')
return usage(argv[0], "Invalid sorting type.");
if ((err = file_to_llist(in_file, &head)) != 0)
return err;
prnt_llist(stdout, head);
free_ll(head);
return err;
}
malloc helpers
When dealing with a lot of mallocing and similar it can be useful to add some helper functions. If you get a memory error you normally would exit right away.
void *alloc(size_t size)
{
void *buf;
if ((buf = malloc(size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
void *re_alloc(void *old, size_t size)
{
void *buf;
if ((buf = realloc(old, size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
Parsing of the file
As you want to have everything dynamically allocated and no limits (beyond system memory) one solution is to implement some sort of tokenizer. It can be helpful to use a struct to hold it together. Something like:
struct file_toker {
FILE *fh; /* File handle */
char *buf; /* Dynamic Read buffer */
size_t size; /* Size of buffer */
size_t len; /* Length of actual data in buffer. */
};
One point here is to keep length of tokens read. By this one do not need to keep using strlen etc.
If you can afford it it would usually be better to read whole file in one go, then parse the buffer. Optionally one can read file in chunks of say 4096*16 bytes, but then one get some complexity when it comes to overlapping lines between reads etc.
Anyhow in this example one byte is read at a time.
Start code
Finally a starting ground could be something like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* memcpy/strncpy */
#include <errno.h> /* errno for fopen() */
#include <ctype.h> /* isspace() */
#define ERR_ARG 1
#define ERR_FILE_FMT 2
#define ERR_MEM 3
struct Node {
char *name_first;
char *name_last;
int age;
struct Node *next;
};
struct file_toker {
FILE *fh;
char *buf;
size_t size;
size_t len;
};
/* ===============----- GEN HELPERS ------=================== */
int usage(const char *self, const char *err_str)
{
fprintf(stderr,
"Usage: %s <in-file> <out-file> <sort-type>\n"
" Sort types:\n"
" f Sort by First Name\n"
" l Sort by Last Name\n"
" a Sort by Age\n"
,
self
);
if (err_str) {
fprintf(stderr,
"\nError: %s\n",
err_str
);
}
return ERR_ARG;
}
void *alloc(size_t size)
{
void *buf;
if ((buf = malloc(size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
void *re_alloc(void *old, size_t size)
{
void *buf;
if ((buf = realloc(old, size)) == NULL) {
fprintf(stderr, "Memory error.\n");
exit(ERR_MEM);
}
return buf;
}
/* ===============----- LINKED LIST ------=================== */
void free_node(struct Node *n)
{
if (!n)
return;
if (n->name_first)
free(n->name_first);
if (n->name_last)
free(n->name_last);
free(n);
}
void free_ll(struct Node *n)
{
struct Node *p;
if (!n)
return;
for ( ; n ; ) {
p = n;
n = n->next;
free_node(p);
}
}
void prnt_llist(FILE *fd, struct Node *n)
{
int i = 0;
fprintf(fd, "NODELIST:\n");
for ( ; n != NULL ; n = n->next) {
fprintf(fd,
"Entry %d {\n"
" Name: %s, %s\n"
" Age : %d\n"
"}\n",
++i,
n->name_last,
n->name_first,
n->age
);
}
}
/* ================--------- FILE TOKER ------------==================== */
/* Free / close reader. */
void free_ft(struct file_toker *ft)
{
if (!ft)
return;
if (ft->fh)
fclose(ft->fh);
free(ft->buf);
ft->fh = NULL;
ft->buf = NULL;
}
/* Initiate reader. */
int ft_init(struct file_toker *ft, const char *fn, size_t buf_sz)
{
ft->size = buf_sz;
ft->len = 0;
ft->buf = alloc(ft->size);
ft->fh = fopen(fn, "r");
if (!ft->fh) {
perror("Unable to open file");
return errno;
}
return 0;
}
/* Increase buffer size. */
size_t ft_increase(struct file_toker *ft)
{
if (ft->size < 1)
ft->size = 1;
ft->size *= 2;
ft->buf = re_alloc(ft->buf, ft->size);
return ft->size;
}
/* Read and skip spaces (\n, \r, ' ', \t etc.). Return first non-space. */
char ft_skip_space(struct file_toker *ft)
{
int c;
while ((c = fgetc(ft->fh)) != EOF && isspace(c))
;
return c == EOF ? 0 : (char)c;
}
/* Read next token */
size_t file_tok(struct file_toker *ft)
{
size_t i = 1;
size_t max;
int c;
if (ft->size < 2)
ft_increase(ft);
ft->len = 0;
max = ft->size - 1;
/* Skip any leading spaces. Function return first non-space. */
if ((ft->buf[0] = ft_skip_space(ft)) == 0)
return 0;
while ((c = fgetc(ft->fh)) != EOF) {
/* If space, break. */
if (isspace(c))
break;
/* Save char to buffer. */
ft->buf[i++] = (char)c;
/* If entire buffer used, increase it's size. */
if (i > max)
max = ft_increase(ft) - 1;
}
/* Null terminate. */
ft->buf[i] = 0x00;
/* Length without terminating null */
ft->len = i;
return i;
}
/* Read next space separated token and save it as new allocated string. */
int file_tok_str(struct file_toker *ft, char **out)
{
if (file_tok(ft) == 0)
return 1;
*out = alloc(ft->len + 1);
memcpy(*out, ft->buf, ft->len + 1);
return 0;
}
/* Read next space separated token and scan it as int. */
int file_tok_int(struct file_toker *ft, int *out)
{
if (file_tok(ft) == 0)
return 1;
if ((sscanf(ft->buf, "%d", out)) != 1)
return 1;
return 0;
}
/* ===============----- FILE PARSER ------=================== */
int file_to_llist(const char *fn, struct Node **head)
{
struct Node *node = NULL, *cur = *head;
struct file_toker ft;
/* Initiate new file token reader, initial buffer size 4096 bytes. */
if (ft_init(&ft, fn, 4096))
return 1;
while (1) {
/* Allocate next node */
node = alloc(sizeof(struct Node));
node->name_first = NULL;
node->name_last = NULL;
/* Read and copy first name. */
if (file_tok_str(&ft, &node->name_first))
break;
/* Read and copy last name. */
if (file_tok_str(&ft, &node->name_last))
break;
/* Read and copy age. */
if (file_tok_int(&ft, &node->age))
break;
/* Link and save current for next iteration. */
node->next = NULL;
if (cur) {
cur->next = node;
}
cur = node;
if (*head == NULL)
*head = node;
}
/* Free last unused node. */
free_node(node);
free_ft(&ft);
return 0;
}
/* ===============----- MAIN ROUTINE ------=================== */
int main(int argc, char *argv[])
{
char *in_file, *out_file, sort;
struct Node *head = NULL;
int err = 0;
if (argc < 4)
return usage(argv[0], "Missing arguments.");
if (argc > 4)
return usage(argv[0], "Unknown arguments.");
if (argv[3][1] != '\0')
return usage(argv[0], "Invalid sorting type.");
in_file = argv[1];
out_file = argv[2];
sort = argv[3][0];
if (sort != 'f' && sort != 'l' && sort != 'a')
return usage(argv[0], "Invalid sorting type.");
if ((err = file_to_llist(in_file, &head)) != 0)
return err;
prnt_llist(stdout, head);
free_ll(head);
return err;
}