context:https://stackoverflow.com/a/50655730/15603477
#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#include<ctype.h>
#include<limits.h>
#include<uchar.h>
#include<assert.h>
#define MAXW 128
#define MAXC 112
int main(int argc,char **argv)
{
int readdef = 0; /* flag for reading definition */
size_t offset = 0 /* offset for each part of definition */
,len = 0; /* length of each line */
char buf[MAXC] = ""; /* read (line) buffer */
char word[MAXW] = ""; /* buffer storing word */
char defn[MAXC] = ""; /* buffer storing definition */
/* open filename given as 1st argument, (or read stdin by default) */
FILE *fp = argc > 1 ? fopen(argv[1],"r") : stdin;
if(!fp){ /* validate file open for reading */
fprintf(stderr,"error: file '%s' open failed\n",argv[1]);
exit(EXIT_FAILURE);
}
while(fgets(buf,MAXC,fp))
{
char *p = buf; /* pointer to parse word & 1st part of defn */
if(*buf == '\n') {
defn[offset-1] = 0;
printf("defn:%s\n\n",defn);
readdef = 0;
offset= 0;
}
else if(readdef == 0)
{
while(*p && *p != '.') p++;
if(p-buf+1 > MAXW){
fprintf(stderr,"error: word exceeds %d chars.\n",MAXW-1);
exit(EXIT_FAILURE);
}
snprintf(word,p-buf+1,"%s",buf); /* copy to word */
printf("word=%s|\n",word);
while(ispunct(*p) || isspace(*p))
p++;
len = strlen(p);
if(len && p[len-1] == '\n')
p[len-1] = ' ';
strcpy(defn,p);
offset +=len;
readdef = 1;
}
else{
len = strlen(buf); /*get the line lenfth */
if(len && buf[len-1] == '\n') /* chk \n, overwite w/' ' */
buf[len-1] = ' ';
if(offset + len + 1 > MAXC){
fprintf(stderr,"error: definition exceed %d chars\n",MAXC-1);
// free(buf);
exit(EXIT_FAILURE);
}
snprintf(defn+offset,len+1,"%s",buf); /* append to defn */
offset += len; /*update offset*/
}
}
if(fp != stdin) fclose(fp);
defn[offset - 1] = 0;
printf("defn: %s\n\n",defn);
exit(EXIT_SUCCESS);
}
valgrind info.
error: definition exceed 111 chars
==28017==
==28017== HEAP SUMMARY:
==28017== in use at exit: 472 bytes in 1 blocks
==28017== total heap usage: 3 allocs, 2 frees, 5,592 bytes allocated
==28017==
==28017== 472 bytes in 1 blocks are still reachable in loss record 1 of 1
==28017== at 0x4848899: malloc (in /usr/libexec/valgrind/vgpreload_memcheck-amd64-linux.so)
==28017== by 0x48ED6CD: __fopen_internal (iofopen.c:65)
==28017== by 0x48ED6CD: fopen##GLIBC_2.2.5 (iofopen.c:86)
==28017== by 0x109285: main (in /home/jian/helloc/a.out)
==28017==
==28017== LEAK SUMMARY:
==28017== definitely lost: 0 bytes in 0 blocks
==28017== indirectly lost: 0 bytes in 0 blocks
==28017== possibly lost: 0 bytes in 0 blocks
==28017== still reachable: 472 bytes in 1 blocks
==28017== suppressed: 0 bytes in 0 blocks
==28017==
==28017== For lists of detected and suppressed errors, rerun with: -s
==28017== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
if MAXC is larger enough, then no LEAK SUMMARY. however if it's smaller, then there is memory leak error.
How can I fix the memory leak error when MAXC is not large enough to hold the string.
I also wonder even if dat/definitions.txt first line is empty new line, then defn[offset-1] = 0; would be defn[-1] = 0; but gcc still no error. should i expect error or warning like above array bounds of array char[112]?
The Valgrind report shows that the memory that remains allocated at program exit (but is still reachable, so is not leaked per se), was allocated by fopen(). Presumably, this will be released if you fclose() the stream that was opened, which is pointed to by fp.
Alternatively, just don't worry about it. The file will be closed and the memory released at program termination. Nothing has actually been leaked, as all the allocated memory remains accessible to the program until its termination.
Note: you may fclose(fp) even if fp is a copy of stdin, as long as the program does not afterward attempt to read anything from its standard input.
buf does not need to be freed here.
if(offset + len + 1 > MAXC){
fprintf(stderr,"error: definition exceed %d chars\n",MAXC-1);
// free(buf);//If you are calling this...
exit(EXIT_FAILURE);
}
...it would be a mistake as buf is shown to be created on the stack as an array: char buf[MAXC] = "";, not as a pointer to memory on the heap. (eg: char *buf = malloc(MAXC);)
Calling fclose(fp) when you are finished with the file pointer will free the memory cited in valgrind.
Also consider the case where the first line of your file may contain just a newline. This code section
if(*buf == '\n') {
defn[offset-1] = 0;
results in negative array index.
Finally, a simple way to handle a newline after calling fgets() is to just eliminate it:
while(fgets(buf,MAXC,fp))
{
buf[strcspn(buf, "\n")] = 0;//deletes newline, preserves NULL termination
char *p = buf; /* pointer to parse word & 1st part of defn */
//if(*buf == '\n') {//no longer needed
Related
I am reading a file word by word using fscanf and writing them to a char** array.
If I want to print the current index it works fine but after full writing finishes, printing the array causes wrong output.
char **stop_words = (char**)malloc(1000*sizeof(char*));
FILE *fp;
fp = fopen("englishstopwords.txt", "r");
int i = 0;
while(!feof(fp)) {
fscanf(fp,"%s\n", &stop_words[i]);
// printf("%s\n", &stop_words[i]); //this works fine
i++;
}
// for (int i = 0; i < 1000; i++) { //this works buggy
// printf("%s\n", &stop_words[i]);
// }
fclose(fp);
Broken print looks like this:
immediatimportanimportanindex
working print look like this:
immediately
importance
important
index
What is the difference between them?
The problem
You did memory allocation fundamentally wrong.
char **stop_words = (char**)malloc(1000*sizeof(char*)); only allocates a block of memory that is capable to store 1000 pointers.
The content of stop_words[0] to stop_words[999] is undefined, they are all garbage value after the malloc() returns.
Sometimes it looks fine to write to stop_words[i], but it is just a lucky part that the garbage is a pointer to mapped memory (still bad though, you probably have a memory corruption because of that).
The fix for this is simply to allocate another block of memory to contain the data from your file.
Wrong target buffer
This part
fscanf(fp,"%s\n", &stop_words[i]);
writes to array of pointer that you have allocated with malloc(). The type of expression &stop_words[i] itself doesn't match with %s, you should really activate warn flags and a good compiler should warn you about that by default.
Potential buffer overflow
Your method to read a line is dangerous, because fscanf with %s doesn't care about how big your buffer is, and your program is vulnerable to buffer overflow because of that.
Fix for this is that you can use fgets and specify the size of your buffer.
You can then realloc() if a line has more than allocated memory for the buffer. To detect this, you can see the last character returned. If it is a line feed, then it is the end of line, otherwise it may be end of file or a line that has characters more than buffer size (so you can decide to realloc).
Fix for this
englishstopwords.txt (sample file for testing)
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
test.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#define MAX_WORDS (1000u)
#define INIT_ALLOC (128u)
int main(void)
{
size_t i, total_words;
FILE *fp;
char **stop_words = malloc(MAX_WORDS * sizeof(*stop_words));
/* TODO: Handle `stop_words == NULL` */
fp = fopen("englishstopwords.txt", "r");
/* TODO: Handle `fp == NULL` */
i = 0;
while (true) {
size_t len = 0;
char *ret, *buf = malloc(INIT_ALLOC * sizeof(*buf));
/* TODO: Handle `buf == NULL` */
ret = buf;
re_fgets:
ret = fgets(ret, INIT_ALLOC, fp);
if (ret == NULL) {
/* We've reached the end of file */
if (len == 0) {
/*
* Throw away the buffer, this is unused
*/
free(buf);
} else {
/* Last line buffer. */
stop_words[i++] = buf;
}
break;
}
len = strlen(buf);
if (buf[len - 1] != '\n') {
/*
*
* We don't see an LF, this means this line
* has more than `INIT_ALLOC` characters or
* it may be the EOF.
*
*/
ret = realloc(buf, (len + 1 + INIT_ALLOC) * sizeof(*buf));
/* TODO: Handle `ret == NULL` */
buf = ret;
/*
* Shift the pointer to the right (end of string).
* Because this line has not been fully read.
*
* We put the next `fgets` buffer to the end of this
* string.
*/
ret += len;
goto re_fgets;
}
/* TODO: Trim CR on Windows platform */
/* Trim the LF */
buf[len - 1] = '\0';
stop_words[i++] = buf;
if (i >= MAX_WORDS) {
/*
* TODO: You can do realloc(stop_words, ...) if you
* want to.
*/
break;
}
}
fclose(fp);
total_words = i;
for (i = 0; i < total_words; i++)
printf("%s\n", stop_words[i]);
for (i = 0; i < total_words; i++)
free(stop_words[i]);
free(stop_words);
return 0;
}
Compile and Run
ammarfaizi2#integral:/tmp$ cat englishstopwords.txt
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
ammarfaizi2#integral:/tmp$ gcc -ggdb3 -Wall -Wextra -pedantic-errors test.c -o test
ammarfaizi2#integral:/tmp$ valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --track-fds=yes --error-exitcode=99 -s ./test
==503906== Memcheck, a memory error detector
==503906== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==503906== Using Valgrind-3.17.0 and LibVEX; rerun with -h for copyright info
==503906== Command: ./test
==503906==
i
me
my
myself
we
our
test_long_line_123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123
ours
ourselves
==503906==
==503906== FILE DESCRIPTORS: 3 open (3 std) at exit.
==503906==
==503906== HEAP SUMMARY:
==503906== in use at exit: 0 bytes in 0 blocks
==503906== total heap usage: 22 allocs, 22 frees, 20,476 bytes allocated
==503906==
==503906== All heap blocks were freed -- no leaks are possible
==503906==
==503906== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
ammarfaizi2#integral:/tmp$
[EDIT]: I added full code.
I have to create a simple version of "grep" command on unix systems in C. Everything is working fine, only Valgrind says Conditional jump or move depends on uninitialised value(s).
I think, it might be connected to the file, that I am trying to open. Please see my code bellow.
Please note, that I can't use <string.h> in my code.
I compile the code with clang on Ubuntu:
cc -pedantic -Wall -Werror -g -std=c99 grep.c -o program
This is what Valgrind says:
lukas#lukas-VirtualBox:~/Desktop/shared/Lab04/prg-hw04$ valgrind --track-origins=yes ./program Mem /proc/meminfo
==2588== Memcheck, a memory error detector
==2588== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2588== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==2588== Command: ./program Mem /proc/meminfo
==2588==
==2588== Conditional jump or move depends on uninitialised value(s)
==2588== at 0x4C32D08: strlen (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2588== by 0x4EBC9D1: puts (ioputs.c:35)
==2588== by 0x108970: check (grep.c:14)
==2588== by 0x108AA9: read (grep.c:50)
==2588== by 0x108B66: main (grep.c:71)
==2588== Uninitialised value was created by a heap allocation
==2588== at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==2588== by 0x108A04: read (grep.c:33)
==2588== by 0x108B66: main (grep.c:71)
==2588==
MemTotal: 10461696 kB
MemFree: 7701488 kB
MemAvailable: 8480772 kB
==2588==
==2588== HEAP SUMMARY:
==2588== in use at exit: 0 bytes in 0 blocks
==2588== total heap usage: 4 allocs, 4 frees, 2,700 bytes allocated
==2588==
==2588== All heap blocks were freed -- no leaks are possible
==2588==
==2588== For counts of detected and suppressed errors, rerun with: -v
==2588== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
Could you help me with locating the problem?
This is my grep.c file.
#include <stdio.h>
#include <stdlib.h>
#define SIZE 100
int printed = 1; // return value -> 0 for patter found, 1 for pattern not found
char *pattern;
char *dest;
void check(char *line, int length, int size) {
for (int i = 0; i < length; i++) {
if (line[i] == pattern[0]) {
for (int j = 1; j < size && (i+j) < length; j++) {
if (line[i+j] == pattern[j]) {
if (j==size-1) {
printf("%s\n", line); // print line
printed = 0; // pattern found
goto END;
}
} else {
break;
}
}
}
}
END: ;
}
void read(void) { // read lines, then check individual lines
int c;
int lengthPat = 0;
while(pattern[++lengthPat] != '\0'); // check length of pattern - I can't use string.h library
FILE *file = fopen(dest, "r");
size_t size =100;
char *line = (char*)malloc(size * sizeof(char));
if (line == NULL) //succesfully created malloc?
exit(102);
int last = 0;
if (file != NULL) { // file succesfully opened
while ((c = getc(file)) != EOF) {
if (c != '\n') { // read line until \n
if(last ==size) {
char *p_line = realloc(line, 2*size*sizeof(char));
if (p_line == NULL)
free(line);
line = p_line;
size *= 2;
}
line[last++] = (char)c;
}
else { // end of line, check for pattern
check(line, last, lengthPat);
last = 0;
for (int i = 0; i < size; i++) {
line[i] = '\0';
}
}
}
fclose(file);
free(line);
}
else {
fprintf(stderr, "Error: Could not open file!\n");
}
}
/* The main program */
int main(int argc, char *argv[])
{
if (argc == 3) {
pattern = argv[1];
dest = argv[2];
read();
}
return printed;
}
The problem was missing null terminator \0 at the end of the string line.
Thanks all for help.
"connected to the file" is the key.
Your input file has lines longer than 100 characters. Replace stack array with dynamically growing heap array.
size_t size =100;
char c_line = malloc(size);
...
if(last ==size)
line = c_line = realloc(c_line, size<<=1);
On fixing that, the mistake is on this line:
printf("%s\n", line); // print line
line is not null terminated so using printf is an advanced topic. We do this instead:
for (int k = 0; k < length; k++)
putc(line[k], stdout);
putc(line[k], stdout);
I'm making a function char** read_from_file(char* fname, int * size) that reads all the words from a file fname and returns them as char**. My file only has 5 words, there is only one word per line. I then have another function print_strings(char** words, int num_words) that prints the strings.
I'm having 3 problems:
When I am comparing the index to < *size I get "comparison between pointer and integer"
I can't store the words in the **words
I'm not sure how to return all the words.
This is my code:
void test_sort(char* fname){
int i;
int num_words;
char** words = read_from_file(fname, &num_words);
printf("\n ORIGINAL data:\n");
print_strings(words, num_words);
}
In Main:
int main(){
// test sorting array of string by string length
test_sort("data.txt");
}
Reading Function
char** read_from_file(char* fname, int * size) {
char** words = (char **)malloc(N_MAX);
FILE *ifp;
ifp = fopen(fname, "r");
if(ifp == NULL){
fprintf(stderr, "Can't open file\n");
exit(1);
}
int index;
while (!feof(ifp)){
for(index = 0; index < size; index++)
{
fscanf(ifp,"%s", words[index]);
}
}
fclose(ifp);
return words;
}
When you allocate an array of pointer-to-pointer-to-char (e.g. char **words;), you must allocate memory for the array of pointers:
char **words = malloc (N_MAX * sizeof *words);
as well as each array of characters (or string) pointed to by each pointer:
words[index] = malloc ((N_MAX + 1) * sizeof **words);
or simply:
words[index] = malloc (N_MAX + 1);
or when allocating memory for a null-terminated string, a shortcut with strdup that both allocates sufficient memory to hold the string and copies the string (including the null-terminating character) to the new block of memory, returning a pointer to the new block:
words[index] = strdup (buf);
A short example of your intended functions could be as follows (note, index is passed as a pointer below, so it must be deferenced to obtain its value *index):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N_MAX 10
#define C_MAX 64
void test_sort(char* fname);
char** read_from_file(char* fname, size_t *index);
int main (void) {
test_sort ("data.txt");
return 0;
}
void test_sort (char* fname)
{
size_t i = 0;
size_t num_words = 0;
char **words = read_from_file (fname, &num_words);
printf("\n ORIGINAL data:\n\n");
// print_strings(words, num_words);
for (i = 0; i < num_words; i++)
printf (" words[%2zu] : %s\n", i, words[i]);
putchar ('\n');
/* free allocated memory */
for (i = 0; i < num_words; i++)
free (words[i]);
free (words);
}
char** read_from_file (char* fname, size_t *index)
{
char **words = malloc (N_MAX * sizeof *words);
char buf[C_MAX] = {0};
FILE *ifp = fopen (fname, "r");
if (ifp == NULL){
fprintf (stderr, "Can't open file\n");
exit(1);
}
*index = 0;
while (fgets (buf, C_MAX, ifp))
{
char *p = buf; /* strip trailing newline/carriage return */
size_t len = strlen (p);
while (len && (p[len-1] == '\r' || p[len-1] == '\n'))
p[--len] = 0;
/* strdup allocates and copies buf */
words[(*index)++] = strdup (buf);
if (*index == N_MAX) {
fprintf (stderr, "warning: N_MAX words read.\n");
break;
}
}
fclose(ifp);
return words;
}
Input
$ cat data.txt
A quick
brown fox
jumps over
the lazy
dog.
Output
$ ./bin/read5str
ORIGINAL data:
words[ 0] : A quick
words[ 1] : brown fox
words[ 2] : jumps over
words[ 3] : the lazy
words[ 4] : dog.
Memory Error Check
In any code your write that dynamically allocates memory, you have 2 responsibilites regarding any block of memory allocated: (1) always preserves a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed. It is imperative that you use a memory error checking program to insure you haven't written beyond/outside your allocated block of memory and to confirm that you have freed all the memory you have allocated. For Linux valgrind is the normal choice. There are so many subtle ways to misuse a block of memory that can cause real problems, there is no excuse not to do it. There are similar memory checkers for every platform. They are all simple to use. Just run your program through it.
$ valgrind ./bin/read5str
==5507== Memcheck, a memory error detector
==5507== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==5507== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==5507== Command: ./bin/read5str
==5507==
ORIGINAL data:
words[ 0] : A quick
words[ 1] : brown fox
words[ 2] : jumps over
words[ 3] : the lazy
words[ 4] : dog.
==5507==
==5507== HEAP SUMMARY:
==5507== in use at exit: 0 bytes in 0 blocks
==5507== total heap usage: 7 allocs, 7 frees, 691 bytes allocated
==5507==
==5507== All heap blocks were freed -- no leaks are possible
==5507==
==5507== For counts of detected and suppressed errors, rerun with: -v
==5507== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
The salient parts above are that there were 7 allocs, 7 frees and All heap blocks were freed. Further ERROR SUMMARY: 0 errors from 0 contexts. You should receive similar output every time. Let me know if you have additional questions.
I have a text file which may contain one or up to 400 numbers. Each number is separated by a comma and a semicolon is used to indicate end of numbers stream.
At the moment I am reading the text file line by line using the fgets. For this reason I am using a fixed array of 1024 elements (the maximum characters per line for a text file).
This is not the ideal way how to implement this since if only one number is inputted in the text file, an array of 1024 elements will we pointless.
Is there a way to use fgets with the malloc function (or any other method) to increase memory efficiency?
If you are looking into using this in a production code then I would request you to follow the suggestions put in the comments section.
But if you requirement is more for learning or school, then here is a complex approach.
Pseudo code
1. Find the size of the file in bytes, you can use "stat" for this.
2. Since the file format is known, from the file size, calculate the number of items.
3. Use the number of items to malloc.
Voila! :p
How to find file size
You can use stat as shown below:
#include <sys/stat.h>
#include <stdio.h>
int main(void)
{
struct stat st;
if (stat("file", &st) == 0) {
printf("fileSize: %d No. of Items: %d\n", (st.st_size), (st.st_size/2));
return st.st_size;
}
printf("failed!\n");
return 0;
}
This file when run will return the file size:
$> cat file
1;
$> ./a.out
fileSize: 3 No. of Items: 1
$> cat file
1,2,3;
$> ./a.out
fileSize: 7 No. of Items: 3
Disclaimer: Is this approach to minimize the pre-allocated memory an optimal approach? No ways in heaven! :)
Dynamically allocating space for you data is a fundamental tool for working in C. You might as well pay the price to learn. The primary thing to remember is,
"if you allocate memory, you have the responsibility to track its use
and preserve a pointer to the starting address for the block of
memory so you can free it when you are done with it. Otherwise your
code with leak memory like a sieve."
Dynamic allocation is straight forward. You allocate some initial block of memory and keep track of what you add to it. You must test that each allocation succeeds. You must test how much of the block of memory you use and reallocate or stop writing data when full to prevent writing beyond the end of your block of memory. If you fail to test either, you will corrupt the memory associated with your code.
When you reallocate, always reallocate using a temporary pointer because with a reallocation failure, the original block of memory is freed. (causing loss of all previous data in that block). Using a temporary pointer allows you to handle failure in a manner to preserve that block if needed.
Taking that into consideration, below we initially allocate space for 64 long values (you can easily change to code to handle any type, e.g. int, float, double...). The code then reads each line of data (using getline to dynamically allocate the buffer for each line). strtol is used to parse the buffer assigning values to the array. idx is used as an index to keep track of how many values have been read, and when idx reaches the current nmax, array is reallocated twice as large as it previously was and nmax is updated to reflect the change. The reading, parsing, checking and reallocating continues for every line of data in the file. When done, the values are printed to stdout, showing the 400 random values read from the test file formatted as 353,394,257,...293,58,135;
To keep the read loop logic clean, I've put the error checking for the strtol conversion into a function xstrtol, but you are free to include that code in main() if you like. The same applies to the realloc_long function. To see when the reallocation takes place, you can compile the code with the -DDEBUG definition. E.g:
gcc -Wall -Wextra -DDEBUG -o progname yoursourcefile.c
The program expects your data filename as the first argument and you can provide an optional conversion base as the second argument (default is 10). E.g.:
./progname datafile.txt [base (default: 10)]
Look over it, test it, and let me know if you have any questions.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#define NMAX 64
long xstrtol (char *p, char **ep, int base);
long *realloc_long (long *lp, unsigned long *n);
int main (int argc, char **argv)
{
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
size_t idx = 0; /* array index counter */
long *array = NULL; /* pointer to long */
unsigned long nmax = NMAX; /* initial reallocation counter */
FILE *fp = NULL; /* input file pointer */
int base = argc > 2 ? atoi (argv[2]) : 10; /* base (default: 10) */
/* open / validate file */
if (!(fp = fopen (argv[1], "r"))) {
fprintf (stderr, "error: file open failed '%s'.", argv[1]);
return 1;
}
/* allocate array of NMAX long using calloc to initialize to 0 */
if (!(array = calloc (NMAX, sizeof *array))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from file - separate into array */
while ((nchr = getline (&ln, &n, fp)) != -1)
{
char *p = ln; /* pointer to ln read by getline */
char *ep = NULL; /* endpointer for strtol */
while (errno == 0)
{ /* parse/convert each number in line into array */
array[idx++] = xstrtol (p, &ep, base);
if (idx == nmax) /* check NMAX / realloc */
array = realloc_long (array, &nmax);
/* skip delimiters/move pointer to next digit */
while (*ep && *ep != '-' && (*ep < '0' || *ep > '9')) ep++;
if (*ep)
p = ep;
else
break;
}
}
if (ln) free (ln); /* free memory allocated by getline */
if (fp) fclose (fp); /* close open file descriptor */
int i = 0;
for (i = 0; i < idx; i++)
printf (" array[%d] : %ld\n", i, array[i]);
free (array);
return 0;
}
/* reallocate long pointer memory */
long *realloc_long (long *lp, unsigned long *n)
{
long *tmp = realloc (lp, 2 * *n * sizeof *lp);
#ifdef DEBUG
printf ("\n reallocating %lu to %lu\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "%s() error: reallocation failed.\n", __func__);
// return NULL;
exit (EXIT_FAILURE);
}
lp = tmp;
memset (lp + *n, 0, *n * sizeof *lp); /* memset new ptrs 0 */
*n *= 2;
return lp;
}
long xstrtol (char *p, char **ep, int base)
{
errno = 0;
long tmp = strtol (p, ep, base);
/* Check for various possible errors */
if ((errno == ERANGE && (tmp == LONG_MIN || tmp == LONG_MAX)) ||
(errno != 0 && tmp == 0)) {
perror ("strtol");
exit (EXIT_FAILURE);
}
if (*ep == p) {
fprintf (stderr, "No digits were found\n");
exit (EXIT_FAILURE);
}
return tmp;
}
Sample Output (with -DDEBUG to show reallocation)
$ ./bin/read_long_csv dat/randlong.txt
reallocating 64 to 128
reallocating 128 to 256
reallocating 256 to 512
array[0] : 353
array[1] : 394
array[2] : 257
array[3] : 173
array[4] : 389
array[5] : 332
array[6] : 338
array[7] : 293
array[8] : 58
array[9] : 135
<snip>
array[395] : 146
array[396] : 324
array[397] : 424
array[398] : 365
array[399] : 205
Memory Error Check
$ valgrind ./bin/read_long_csv dat/randlong.txt
==26142== Memcheck, a memory error detector
==26142== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==26142== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==26142== Command: ./bin/read_long_csv dat/randlong.txt
==26142==
reallocating 64 to 128
reallocating 128 to 256
reallocating 256 to 512
array[0] : 353
array[1] : 394
array[2] : 257
array[3] : 173
array[4] : 389
array[5] : 332
array[6] : 338
array[7] : 293
array[8] : 58
array[9] : 135
<snip>
array[395] : 146
array[396] : 324
array[397] : 424
array[398] : 365
array[399] : 205
==26142==
==26142== HEAP SUMMARY:
==26142== in use at exit: 0 bytes in 0 blocks
==26142== total heap usage: 7 allocs, 7 frees, 9,886 bytes allocated
==26142==
==26142== All heap blocks were freed -- no leaks are possible
==26142==
==26142== For counts of detected and suppressed errors, rerun with: -v
==26142== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
I want to print the data from .csv file line by line which is separated by comma delimeter.
This code prints the garbage value .
enum gender{ M, F };
struct student{
int stud_no;
enum gender stud_gen;
char stud_name[100];
int stud_marks;
};
void main()
{
struct student s[60];
int i=0,j,roll_no,marks,k,select;
FILE *input;
FILE *output;
struct student temp;
input=fopen("Internal test 1 Marks MCA SEM 1 oct 2014 - CS 101.csv","r");
output=fopen("out.txt","a");
if (input == NULL) {
printf("Error opening file...!!!");
}
while(fscanf(input,"%d,%c,%100[^,],%d", &s[i].stud_no,&s[i].stud_gen,&s[i].stud_name,&s[i].stud_marks)!=EOF)
{
printf("\n%d,%c,%s,%d", s[i].stud_no,s[i].stud_gen,s[i].stud_name,s[i].stud_marks);
i++;
}
}
I also tried the code from: Read .CSV file in C But it prints only the nth field. I want to display all fields line by line.
Here is my sample input.
1401,F,FERNANDES SUZANNA ,13
1402,M,PARSEKAR VIPUL VILAS,14
1403,M,SEQUEIRA CLAYTON DIOGO,8
1404,M,FERNANDES GLENN ,17
1405,F,CHANDRAVARKAR TANUSHREE ROHIT,15
While there are a number of ways to parse any line into components, one way that can really increase understanding is to use a start and end pointer to work down each line identifying the commas, replacing them with null-terminators (i.e. '\0' or just 0), reading the field, restoring the comma and moving to the next field. This is just a manual application of strtok. The following example does that so you can see what is going on. You can, of course, replace use of the start and end pointers (sp & p, respectively) with strtok.
Read through the code and let me know if you have any questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* maximum number of student to initially allocate */
#define MAXS 256
enum gender { M, F };
typedef struct { /* create typedef to struct */
int stud_no;
enum gender stud_gen;
char *stud_name;
int stud_marks;
} student;
int main (int argc, char *argv[]) {
if (argc < 2) {
printf ("filename.csv please...\n");
return 1;
}
char *line = NULL; /* pointer to use with getline () */
ssize_t read = 0; /* characters read by getline () */
size_t n = 0; /* number of bytes to allocate */
student **students = NULL; /* ptr to array of stuct student */
char *sp = NULL; /* start pointer for parsing line */
char *p = NULL; /* end pointer to use parsing line */
int field = 0; /* counter for field in line */
int cnt = 0; /* counter for number allocated */
int it = 0; /* simple iterator variable */
FILE *fp;
fp = fopen (argv[1], "r"); /* open file , read only */
if (!fp) {
fprintf (stderr, "failed to open file for reading\n");
return 1;
}
students = calloc (MAXS, sizeof (*students)); /* allocate 256 ptrs set to NULL */
/* read each line in input file preserving 1 pointer as sentinel NULL */
while (cnt < MAXS-1 && (read = getline (&line, &n, fp)) != -1) {
sp = p = line; /* set start ptr and ptr to beginning of line */
field = 0; /* set/reset field to 0 */
students[cnt] = malloc (sizeof (**students)); /* alloc each stuct with malloc */
while (*p) /* for each character in line */
{
if (*p == ',') /* if ',' end of field found */
{
*p = 0; /* set as null-term char (temp) */
if (field == 0) students[cnt]->stud_no = atoi (sp);
if (field == 1) {
if (*sp == 'M') {
students[cnt]->stud_gen = 0;
} else {
students[cnt]->stud_gen = 1;
}
}
if (field == 2) students[cnt]->stud_name = strdup (sp); /* strdup allocates for you */
*p = ','; /* replace with original ',' */
sp = p + 1; /* set new start ptr start pos */
field++; /* update field count */
}
p++; /* increment pointer p */
}
students[cnt]->stud_marks = atoi (sp); /* read stud_marks (sp alread set to begin) */
cnt++; /* increment students count */
}
fclose (fp); /* close file stream */
if (line) /* free memory allocated by getline */
free (line);
/* iterate over all students and print */
printf ("\nThe students in the class are:\n\n");
while (students[it])
{
printf (" %d %c %-30s %d\n",
students[it]->stud_no, (students[it]->stud_gen) ? 'F' : 'M', students[it]->stud_name, students[it]->stud_marks);
it++;
}
printf ("\n");
/* free memory allocated to struct */
it = 0;
while (students[it])
{
if (students[it]->stud_name)
free (students[it]->stud_name);
free (students[it]);
it++;
}
if (students)
free (students);
return 0;
}
(note: added condition on loop that cnt < MAXS-1 to preserve at least one pointer in students NULL as a sentinel allowing iteration.)
input:
$ cat dat/people.dat
1401,F,FERNANDES SUZANNA ,13
1402,M,PARSEKAR VIPUL VILAS,14
1403,M,SEQUEIRA CLAYTON DIOGO,8
1404,M,FERNANDES GLENN ,17
1405,F,CHANDRAVARKAR TANUSHREE ROHIT,15
output:
$./bin/stud_struct dat/people.dat
The students in the class are:
1401 F FERNANDES SUZANNA 13
1402 M PARSEKAR VIPUL VILAS 14
1403 M SEQUEIRA CLAYTON DIOGO 8
1404 M FERNANDES GLENN 17
1405 F CHANDRAVARKAR TANUSHREE ROHIT 15
valgrind memcheck:
I have updated the code slightly to insure all allocated memory was freed to prevent against any memory leaks. Simple things like the automatic allocation of memory for line by getline or failing to close a file stream can result in small memory leaks. Below is the valgrind memcheck confirmation.
valgrind ./bin/stud_struct dat/people.dat
==11780== Memcheck, a memory error detector
==11780== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==11780== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==11780== Command: ./bin/stud_struct dat/people.dat
==11780==
The students in the class are:
1401 F FERNANDES SUZANNA 13
1402 M PARSEKAR VIPUL VILAS 14
1403 M SEQUEIRA CLAYTON DIOGO 8
1404 M FERNANDES GLENN 17
1405 F CHANDRAVARKAR TANUSHREE ROHIT 15
==11780==
==11780== HEAP SUMMARY:
==11780== in use at exit: 0 bytes in 0 blocks
==11780== total heap usage: 13 allocs, 13 frees, 2,966 bytes allocated
==11780==
==11780== All heap blocks were freed -- no leaks are possible
==11780==
==11780== For counts of detected and suppressed errors, rerun with: -v
==11780== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)