Read unknown number of lines from stdin, C - c

i have a problem with reading stdin of unknown size. In fact its a table in .txt file, which i get to stdin by calling parameter '<'table.txt. My code should look like this:
#include <stdio.h>
#include <string.h>
int main(int argc,char *argv[])
{
char words[10][1024];
int i=0;
while(feof(stdin)==0)
{
fgets(words[i],100,stdin);
printf("%s", words[i]);
i++;
}
return 0;
}
but there is the problem i dont know the nuber of lines, which in this case is 10(we know the number of characters in line - 1024).
It would be great if someone know the solution. Thanks in advance.

You have hit on one of the issues that plagues all new C-programmers. How do I dynamically allocate all memory I need to free myself from static limits while still keeping track of my collection of 'stuff' in memory. This problem usually presents itself when you need to read an unknown number of 'things' from an input. The initial options are (1) declare some limit big enough to work (defeating the purpose), or (2) dynamically allocate a pointers as needed.
Obviously, the goal is (2). However, you then run into the problem of "How do I keep track of what I've allocated?" This in itself is an issue that dogs beginners. The problem being, If I dynamically allocate using a bunch of pointers, **How do I iterate over the list to get my 'stuff' back out? Also, you have to initialize some initial number of pointers (unless using an advanced data structure like a linked-list), so the next question is "what do I do when I run out?"
The usual solution is to allocate an initial set of pointers, then when the limit is reached, reallocate to twice as many as original, and keep going. (as Grayson indicated in his answer).
However, there is one more trick to iterate over the list to get your 'stuff' back out that is worth understanding. Yes, you can allocate with malloc and keep track of the number of pointers used, but you can free yourself from tying a counter to your list of pointers by initially allocating with calloc. That not only allocates space, but also sets the allocated pointers to NULL (or 0). This allows you to iterate over your list with a simple while (pointer != NULL). This provides many benefits when it comes to passing your collection of pointers to functions, etc.. The downside (a minimal one) is that you get to write a reallocation scheme that uses calloc to allocate new space when needed. (bummer, I get to get smarter -- but I have to work to do it...)
You can evaluate whether to use malloc/realloc off-the-shelf, or whether to reallocate using calloc and a custom reallocate function depending on what your requirements are. Regardless, understanding both, just adds more tools to your programming toolbox.
OK, enough jabber, where is the example in all this blather?
Both of the following examples simply read all lines from any text file and print the lines (with pointer index numbers) back to stdout. Both expect that you will provide the filename to read as the first argument on the command line. The only difference between the two is the second has the reallocation with calloc done is a custom reallocation function. They both allocate 255 pointers initially and double the number of pointers each time the limit is hit. (for fun, you can set MAXLINES to something small like 10 and force repeated reallocations to test).
first example with reallocation in main()
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#define MAXLINES 255
void free_buffer (char **buffer)
{
register int i = 0;
while (buffer[i])
{
free (buffer[i]);
i++;
}
free (buffer);
}
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file\n", argv[0]);
return 1;
}
char *line = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit number of chars to 'n', 0 no limit */
char **filebuf = NULL;
char **rtmp = NULL;
int linecnt = 0;
size_t limit = MAXLINES;
size_t newlim = 0;
FILE *ifp = fopen(argv[1],"r");
if (!ifp)
{
fprintf(stderr, "\nerror: failed to open file: '%s'\n\n", argv[1]);
return 1;
}
filebuf = calloc (MAXLINES, sizeof (*filebuf)); /* allocate MAXLINES pointers */
while ((read = getline (&line, &n, ifp)) != -1) /* read each line in file with getline */
{
if (line[read - 1] == 0xa) { line[read - 1] = 0; read--; } /* strip newline */
if (linecnt >= (limit - 1)) /* test if linecnt at limit, reallocate */
{
newlim = limit * 2; /* set new number of pointers to 2X old */
if ((rtmp = calloc (newlim, sizeof (*filebuf)))) /* calloc to set to NULL */
{
/* copy original filebuf to newly allocated rtmp */
if (memcpy (rtmp, filebuf, linecnt * sizeof (*filebuf)) == rtmp)
{
free (filebuf); /* free original filebuf */
filebuf = rtmp; /* set filebuf equal to new rtmp */
}
else
{
fprintf (stderr, "error: memcpy failed, exiting\n");
return 1;
}
}
else
{
fprintf (stderr, "error: rtmp allocation failed, exiting\n");
return 1;
}
limit = newlim; /* update limit to new limit */
}
filebuf[linecnt] = strdup (line); /* copy line (strdup allocates) */
linecnt++; /* increment linecnt */
}
fclose(ifp);
if (line) free (line); /* free memory allocated to line */
linecnt = 0; /* reset linecnt to iterate filebuf */
printf ("\nLines read in filebuf buffer:\n\n"); /* output all lines read */
while (filebuf[linecnt])
{
printf (" line[%d]: %s\n", linecnt, filebuf[linecnt]);
linecnt++;
}
printf ("\n");
free_buffer (filebuf); /* free memory allocated to filebuf */
return 0;
}
second example with reallocation in custom function
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#define MAXLINES 255
/* function to free allocated memory */
void free_buffer (char **buffer)
{
register int i = 0;
while (buffer[i])
{
free (buffer[i]);
i++;
}
free (buffer);
}
/* custom realloc using calloc/memcpy */
char **recalloc (size_t *lim, char **buf)
{
int newlim = *lim * 2;
char **tmp = NULL;
if ((tmp = calloc (newlim, sizeof (*buf))))
{
if (memcpy (tmp, buf, *lim * sizeof (*buf)) == tmp)
{
free (buf);
buf = tmp;
}
else
{
fprintf (stderr, "%s(): error, memcpy failed, exiting\n", __func__);
return NULL;
}
}
else
{
fprintf (stderr, "%s(): error, tmp allocation failed, exiting\n", __func__);
return NULL;
}
*lim = newlim;
return tmp;
}
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file\n", argv[0]);
return 1;
}
char *line = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit number of chars to 'n', 0 no limit */
char **filebuf = NULL;
int linecnt = 0;
size_t limit = MAXLINES;
FILE *ifp = fopen(argv[1],"r");
if (!ifp)
{
fprintf(stderr, "\nerror: failed to open file: '%s'\n\n", argv[1]);
return 1;
}
filebuf = calloc (MAXLINES, sizeof (*filebuf)); /* allocate MAXLINES pointers */
while ((read = getline (&line, &n, ifp)) != -1) /* read each line in file with getline */
{
if (line[read - 1] == 0xa) { line[read - 1] = 0; read--; } /* strip newline */
if (linecnt >= (limit - 1)) /* test if linecnt at limit, reallocate */
{
filebuf = recalloc (&limit, filebuf); /* reallocate filebuf to 2X size */
if (!filebuf)
{
fprintf (stderr, "error: recalloc failed, exiting.\n");
return 1;
}
}
filebuf[linecnt] = strdup (line); /* copy line (strdup allocates) */
linecnt++; /* increment linecnt */
}
fclose(ifp);
if (line) free (line); /* free memory allocated to line */
linecnt = 0; /* reset linecnt to iterate filebuf */
printf ("\nLines read in filebuf buffer:\n\n"); /* output all lines read */
while (filebuf[linecnt])
{
printf (" line[%d]: %s\n", linecnt, filebuf[linecnt]);
linecnt++;
}
printf ("\n");
free_buffer (filebuf); /* free memory allocated to filebuf */
return 0;
}
Take a look at both examples. Know that there are many, many ways to do this. These examples just give one approach that provide example of using a few extra tricks than you will normally find. Give them a try. Drop a comment if you need more help.

I suggest that you use malloc and realloc to manage your memory. Keep track of how big your array is or how many entries it has, and call realloc to double its size whenever the array is not big enough.

Op appears to need to store the data somewhere
#define N 100000u
char BABuffer[N];
int main(int argc, char *argv[]) {
size_t lcount = 0;
size_t ccount = 0;
char words[1024 + 2];
while(fgets(words, sizeof words, stdin) != NULL) {
size_t len = strlen(words);
if (ccount + len >= N - 1) {
fputs("Too much!\n", stderr);
break;
}
memcpy(&BABuffer[ccount], words, len);
ccount += len;
lcount++;
}
BABuffer[ccount] = '\0';
printf("Read %zu lines.\n", lcount);
printf("Read %zu char.\n", ccount);
fputs(BABuffer, stdout);
return 0;
}
Note: ccount includes the end-of-line character(s).

Related

C Program to Convert a Text File into a CSV File

The question is to convert a text file into a CSV file using C programming. The input text file is formatted as the following:
JACK Maria Stephan Nora
20 34 45 28
London NewYork Toronto Berlin
The output CSV file should look like:
Jack,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
The following code is what I tried so far:
void load_and_convert(const char* filename){
FILE *fp1, *fp2;
char ch;
fp1=fopen(filename,"r");
fp2=fopen("output.csv","w");
for(int i=0;i<1000;i++){
ch=fgetc(fp1);
fprintf(fp2,"%c",ch);
if(ch==' '|| ch=='\n')
fprintf(fp2,"%c,\n",ch);
}
fclose(fp1);
fclose(fp2);
}
The output from my code looks like:
Jack,
Maria,
Stephan,
Nora,
20,
34,
45,
28,
London,
NewYork,
Toronto,
Berlin,
How should I modify my code to make it work correctly?
What's the idea to treat this question?
Since I have some times, here is a working solution for you (tried my best to make the solution as elegant as I can):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_STRING_LENGTH 50
#define MAX_NUMBER_OF_PEOPLE 50
typedef struct
{
char name[MAX_STRING_LENGTH];
int age;
char city[MAX_STRING_LENGTH];
} Person;
void getName(char *src, char *delim, Person *people) {
char *ptr = strtok(src, delim);
int i = 0;
while(ptr != NULL)
{
strncpy(people[i].name, ptr, MAX_STRING_LENGTH);
ptr = strtok(NULL, delim);
i++;
}
}
void getAge(char *src, char *delim, Person *people) {
char *ptr = strtok(src, delim);
int i = 0;
while(ptr != NULL)
{
people[i].age = atoi(ptr);
i++;
ptr = strtok(NULL, delim);
}
}
void getCity(char *src, char *delim, Person *people) {
char *ptr = strtok(src, delim);
int i = 0;
while(ptr != NULL)
{
strncpy(people[i].city, ptr, MAX_STRING_LENGTH);
i++;
ptr = strtok(NULL, delim);
}
}
int main(void)
{
Person somebody[MAX_NUMBER_OF_PEOPLE];
FILE *fp;
char *line = NULL;
size_t len = 0;
ssize_t read;
int ln = 0;
fp = fopen("./test.txt", "r");
if (fp == NULL)
return -1;
// Read every line, support first line is name, second line is age...
while ((read = getline(&line, &len, fp)) != -1) {
// remote trailing newline character
line = strtok(line, "\n");
if (ln == 0) {
getName(line, " ", somebody);
} else if (ln == 1) {
getAge(line, " ", somebody);
} else {
getCity(line, " ", somebody);
}
ln++;
}
for (int j = 0; j < MAX_NUMBER_OF_PEOPLE; j++) {
if (somebody[j].age == 0)
break;
printf("%s, %d, %s\n", somebody[j].name, somebody[j].age, somebody[j].city);
}
fclose(fp);
if (line)
free(line);
return 0;
}
What you are needing to do is non-trivial if you want to approach the problem holding all values in memory as you transform the 3-rows with 4-fields in each row, to a format of 4-rows with 3-fields per-row. So when you have your datafile containing:
Example Input File
$ cat dat/col2csv3x4.txt
JACK Maria Stephan Nora
20 34 45 28
London NewYork Toronto Berlin
You want to read each of the three lines and then transpose the columns into rows for .csv output. Meaning you will then end up with 4-rows of 3-csv fields each, e.g.
Expected Program Output
$ ./bin/transpose2csv < dat/col2csv3x4.txt
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
There is nothing difficult in doing it, but it takes meticulous attention to handling the memory storage of object and allocating/reallocating to handle the transformation between 3-rows with 4-pieces of data to 4-rows with 3-pieces of data.
One approach is to read all original lines into a typical pointer-to-pointer to char setup. Then transform/transpose the columns into rows. Since conceivably there could be 100-rows with 500-fields next time, you will want to approach the transformation using indexes and counters to track your allocation and reallocation requirement to make your finished code able to handle transposing a generic number of lines and fields into fields-number of lines with as many vales per row as you had original lines.
You can design your code to provide the transformation in two basic functions. The first to read and store the lines (saygetlines`) and the second to then transpose those lines into a new pointer-to-pointer to char so it can be output as comma separated values
One way to approach these two functions would be similar to the following that takes the filename to read as the first-arguments (or will read from stdin by default if no argument is given). The code isn't trivial, but it isn't difficult either. Just keep track of all your allocations, preserving a pointer to the beginning of each, so the memory may be freed when no longer needed, e.g.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define NPTR 2
#define NWRD 128
#define MAXC 1024
/** getlines allocates all storage required to read all lines from file.
* the pointers are doubled each time reallocation is needed and then
* realloc'ed a final time to exactly size to the number of lines. all
* lines are stored with the exact memory required.
*/
char **getlines (size_t *n, FILE *fp)
{
size_t nptr = NPTR; /* tracks number of allocated pointers */
char buf[MAXC]; /* tmp buffer sufficient to hold each line */
char **lines = calloc (nptr, sizeof *lines);
if (!lines) { /* validate EVERY allocaiton */
perror ("calloc-lines");
return NULL;
}
*n = 0; /* pointer tracks no. of lines read */
rewind (fp); /* clears stream error state if set */
while (fgets (buf, MAXC, fp)) { /* read each line o finput */
size_t len;
if (*n == nptr) { /* check/realloc ptrs if required */
void *tmp = realloc (lines, 2 * nptr * sizeof *lines);
if (!tmp) { /* validate reallocation */
perror ("realloc-tmp");
break;
}
lines = tmp; /* assign new block, (opt, zero new mem below) */
memset (lines + nptr, 0, nptr * sizeof *lines);
nptr *= 2; /* increment allocated pointer count */
}
buf[(len = strcspn(buf, "\r\n"))] = 0; /* get line, remove '\n' */
lines[*n] = malloc (len + 1); /* allocate for line */
if (!lines[*n]) { /* validate */
perror ("malloc-lines[*n]");
break;
}
memcpy (lines[(*n)++], buf, len + 1); /* copy to line[*n] */
}
if (!*n) { /* if no lines read */
free (lines); /* free pointers */
return NULL;
}
/* optional final realloc to free unused pointers */
void *tmp = realloc (lines, *n * sizeof *lines);
if (!tmp) {
perror ("final-realloc");
return lines;
}
return (lines = tmp); /* return ptr to exact no. of required ptrs */
}
/** free all pointers and n alocated arrays */
void freep2p (void *p2p, size_t n)
{
for (size_t i = 0; i < n; i++)
free (((char **)p2p)[i]);
free (p2p);
}
/** transpose a file of n rows and a varying number of fields to an
* allocated pointer-to-pointer t0 char structure with a fields number
* of rows and n csv values per row.
*/
char **transpose2csv (size_t *n, FILE *fp)
{
char **l = NULL, **t = NULL;
size_t csvl = 0, /* csv line count */
ncsv = 0, /* number of csv lines allocated */
nchr = MAXC, /* initial chars alloc for csv line */
*offset, /* array tracking read offsets in lines */
*used; /* array tracking write offset to csv lines */
if (!(l = getlines (n, fp))) { /* read all lines to l */
fputs ("error: getlines failed.\n", stderr);
return NULL;
}
ncsv = *n;
#ifdef DEBUG
for (size_t i = 0; i < *n; i++)
puts (l[i]);
#endif
if (!(t = malloc (ncsv * sizeof *t))) { /* alloc ncsv ptrs for csv */
perror ("malloc-t");
freep2p (l, *n); /* free everything else on failure */
return NULL;
}
for (size_t i = 0; i < ncsv; i++) /* alloc MAXC chars to csv ptrs */
if (!(t[i] = malloc (nchr * sizeof *t[i]))) {
perror ("malloc-t[i]");
while (i--) /* free everything else on failure */
free (t[i]);
free (t);
freep2p (l, *n);
return NULL;
}
if (!(offset = calloc (*n, sizeof *offset))) { /* alloc offsets array */
perror ("calloc-offsets");
free (t);
freep2p (l, *n);
return NULL;
}
if (!(used = calloc (ncsv, sizeof *used))) { /* alloc used array */
perror ("calloc-used");
free (t);
free (offset);
freep2p (l, *n);
return NULL;
}
for (;;) { /* loop continually transposing cols to csv rows */
for (size_t i = 0; i < *n; i++) { /* read next word from each line */
char word[NWRD]; /* tmp buffer for word */
int off; /* number of characters consumed in read */
if (sscanf (l[i] + offset[i], "%s%n", word, &off) != 1)
goto readdone; /* break nested loops on read failure */
size_t len = strlen (word); /* get word length */
offset[i] += off; /* increment read offset */
if (csvl == ncsv) { /* check/realloc new csv row as required */
size_t newsz = ncsv + 1; /* allocate +1 row over *n */
void *tmp = realloc (t, newsz * sizeof *t); /* realloc ptrs */
if (!tmp) {
perror ("realloc-t");
freep2p (t, ncsv);
goto readdone;
}
t = tmp;
t[ncsv] = NULL; /* set new pointer NULL */
/* allocate nchr chars to new pointer */
if (!(t[ncsv] = malloc (nchr * sizeof *t[ncsv]))) {
perror ("malloc-t[i]");
while (ncsv--) /* free everything else on failure */
free (t[ncsv]);
goto readdone;
}
tmp = realloc (used, newsz * sizeof *used); /* realloc used */
if (!tmp) {
perror ("realloc-used");
freep2p (t, ncsv);
goto readdone;
}
used = tmp;
used[ncsv] = 0;
ncsv++;
}
if (nchr - used[csvl] - 2 < len) { /* check word fits in line */
/* realloc t[i] if required (left for you) */
fputs ("realloc t[i] required.\n", stderr);
}
/* write word to csv line at end */
sprintf (t[csvl] + used[csvl], used[csvl] ? ",%s" : "%s", word);
t[csvl][used[csvl] ? used[csvl] + len + 1 : len] = 0;
used[csvl] += used[csvl] ? len + 1 : len;
}
csvl++;
}
readdone:;
freep2p (l, *n);
free (offset);
free (used);
*n = csvl;
return t;
}
int main (int argc, char **argv) {
char **t;
size_t n = 0;
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!(t = transpose2csv (&n, fp))) {
fputs ("error: transpose2csv failed.\n", stderr);
return 1;
}
if (fp != stdin) fclose (fp); /* close file if not stdin */
for (size_t i = 0; i < n; i++)
if (t[i])
puts (t[i]);
freep2p (t, n);
return 0;
}
Example Use/Output
$ ./bin/transpose2csv < dat/col2csv3x4.txt
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
Memory Use/Error Check
In any code you write that dynamically allocates memory, you have 2 responsibilities regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed.
It is imperative that you use a memory error checking program to insure you do not attempt to access memory or write beyond/outside the bounds of your allocated block, attempt to read or base a conditional jump on an uninitialized value, and finally, to confirm that you free all the memory you have allocated.
For Linux valgrind is the normal choice. There are similar memory checkers for every platform. They are all simple to use, just run your program through it.
$ valgrind ./bin/transpose2csv < dat/col2csv3x4.txt
==18604== Memcheck, a memory error detector
==18604== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==18604== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==18604== Command: ./bin/transpose2csv
==18604==
JACK,20,London
Maria,34,NewYork
Stephan,45,Toronto
Nora,28,Berlin
==18604==
==18604== HEAP SUMMARY:
==18604== in use at exit: 0 bytes in 0 blocks
==18604== total heap usage: 15 allocs, 15 frees, 4,371 bytes allocated
==18604==
==18604== All heap blocks were freed -- no leaks are possible
==18604==
==18604== For counts of detected and suppressed errors, rerun with: -v
==18604== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Always confirm that you have freed all memory you have allocated and that there are no memory errors.
Look things over and let me know if you have further questions.

Text line character count dynamically in c

char *line = NULL;
int count=0;
line = (char*)malloc(sizeof(char));
while(fgets(line,sizeof(line),file)){
line = realloc(line,sizeof(char*)); // dynamically increase allocate memory
count++;
}
printf("count number :%d\n",count);
free(line);
I am trying to count character in every line reading text , but for now trying to one line . Every time count number is 4 even i give more character string . I am confused . Please help me !!
Some issues :
First, you want a line :
line = (char*)malloc(sizeof(char));
This is equivalent to allocate one byte - sizeof(char) - and store its address to line. Maybe you want to get a larger buffer to get some characters from a file.
One of the way to do that is to define a constant size :
#define BUFFER_SIZE 256
line = (char *)malloc(sizeof(char) * BUFFER_SIZE);
After, you run the counter while.
while(fgets(line,sizeof(line),file))
is also wrong, because you want to read at most sizeof(line) bytes, which is equivalent to sizeof(char *). It's 32 or 64 bits depending on your system architecture.
You want to read as most the size of your buffer, which means you want to read at most BUFFER_SIZE characters. So it's better to do :
while(fgets(line,sizeof(char) * BUFFER_SIZE, file))
{
/* do stuff */
}
It's a warning : the use of fgets is dangerous. If you want to get bytes from file and also to count them you can use fread like :
size_t tmp;
while(tmp = fread(line, sizeof(char), BUFFER_SIZE, file))
{
count += tmp;
/* do stuff on line */
}
But if you only want to get the size of your file, go check this other post.
One way to do this without tying yourself in knots over memory allocation, etc, is:
FILE *f;
int n;
char c;
int line_number = 1;
int line_length = 0;
f = fopen("whatever", "r");
while (n = fread(&c, 1, 1, f))
{
if (c != '\n')
line_length += 1;
else
{
printf("Length of line %d = %d\n", line_number , line_length);
line_number += 1;
line_length = 0;
}
}
fclose(f);
i.e. read the file one character at a time, counting characters as you go. Let the OS and the runtime library worry about buffering - that's what they're there for. Perhaps not the most efficient, but sometimes simplicity is beneficial.
Best of luck.
Here is a function mfgets that reads a line into a dynamically allocated buffer. It should be reasonably bomb-proof.
Like fgets it returns NULL if no characters were read. However, it can also return NULL if the initial buffer allocation failed before any characters were read.
It sets errno to ENOMEM if a buffer allocation or reallocation failed at any point, but if any characters have been read, then a buffer is still returned.
As a bonus, the second parameter can be used to obtain the length of the string in the buffer.
The returned buffer can be freed by calling the free function.
mfgets.h:
#ifndef MFGETS_H__INCLUDED__
#define MFGETS_H__INCLUDED__
#include <stdio.h>
char *mfgets(FILE *stream, size_t *stringlen);
#endif
mfgets.c:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include "mfgets.h"
/**
* Read a line into allocated memory.
*
* Reads a line from a stream into memory allocated by \b malloc() or
* \b realloc() until an \b EOF or a newline is read. If a newline is
* read, it is stored into the memory. A terminating null byte is
* stored after the last character in the memory. The memory can be
* freed with \b free().
*
* \param stream The stream pointer.
* \param[out] stringlen If non-null, set to length of string read.
*
* \return A pointer to the memory if at least one character was read,
* otherwise \c NULL.
*
* \remark \c errno is set to \c ENOMEM on failure to allocate memory
* of sufficient size to store the whole line. If the line has been
* partially read, memory is still returned even if \c errno is set to
* \c ENOMEM.
*/
char *mfgets(FILE *stream, size_t *stringlen)
{
size_t buflen = 256; /* initial allocation size */
size_t slen = 0; /* string length */
int err = 0; /* new error */
int olderr = errno; /* old error propagation */
char *buf; /* allocated buffer */
char *newbuf; /* reallocated buffer */
/* allocate initial buffer */
buf = malloc(buflen);
if (!buf) {
err = ENOMEM;
} else {
/* read remainder of line into new part of buffer */
while (fgets(buf + slen, buflen - slen, stream)) {
/* update string length */
slen += strlen(buf + slen);
if (slen < buflen - 1 || buf[slen - 1] == '\n') {
/* fgets() did not run out of space */
break;
}
/* need to increase buffer size */
if (buflen == SIZE_MAX) {
/* cannot increase buffer size */
err = ENOMEM;
break;
}
if (SIZE_MAX - buflen >= buflen && buflen <= INT_MAX) {
/* double buffer size */
buflen *= 2;
} else if (SIZE_MAX - buflen > INT_MAX) {
/* increase buffer size by maximum amount
* that can be passed to fgets() */
buflen += INT_MAX;
} else {
/* increase buffer size to maximum amount */
buflen = SIZE_MAX;
}
/* reallocate buffer with new size */
newbuf = realloc(buf, buflen);
if (!newbuf) {
err = ENOMEM;
break;
}
buf = newbuf;
}
/* finished reading line (or reached EOF or stream error) */
if (slen) {
/* reallocate buffer to actual string size */
newbuf = realloc(buf, slen + 1);
if (newbuf) {
buf = newbuf;
}
} else {
/* no characters read, so do not return a buffer */
free(buf);
buf = NULL;
}
}
if (stringlen) {
/* caller wants actual length of string */
*stringlen = slen;
}
/* set new error or propagate old error */
errno = err ? err : olderr;
/* return buffer or NULL */
return buf;
}
Test program:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "mfgets.h"
int main(void)
{
size_t slen;
char *line;
errno = 0;
while ((line = mfgets(stdin, &slen)) != NULL) {
printf("(%zu) %s", slen, line);
free(line);
errno = 0;
}
if (errno) {
perror("");
}
return 0;
}

How to Delete Duplicate Elements from Dynamically Allocated String Array in C

I have created a program in C that reads in a word file and counts how many words are in that file, along with how many times each word occurs.
When I run it through Valgrind I either get too many bytes lost or a Segmentation Fault.
How can I remove a duplicate element from a dynamically allocated array and free the memory as well?
Gist: wordcount.c
int tokenize(Dictionary **dictionary, char *words, int total_words)
{
char *delim = " .,?!:;/\"\'\n\t";
char **temp = malloc(sizeof(char) * strlen(words) + 1);
char *token = strtok(words, delim);
*dictionary = (Dictionary*)malloc(sizeof(Dictionary) * total_words);
int count = 1, index = 0;
while (token != NULL)
{
temp[index] = (char*)malloc(sizeof(char) * strlen(token) + 1);
strcpy(temp[index], token);
token = strtok(NULL, delim);
index++;
}
for (int i = 0; i < total_words; ++i)
{
for (int j = i + 1; j < total_words; ++j)
{
if (strcmp(temp[i], temp[j]) == 0) // <------ segmentation fault occurs here
{
count++;
for (int k = j; k < total_words; ++k) // <----- loop to remove duplicates
temp[k] = temp[k+1];
total_words--;
j--;
}
}
int length = strlen(temp[i]) + 1;
(*dictionary)[i].word = (char*)malloc(sizeof(char) * length);
strcpy((*dictionary)[i].word, temp[i]);
(*dictionary)[i].count = count;
count = 1;
}
free(temp);
return 0;
}
Thanks in advance.
Without A Minimal, Complete, and Verifiable example, there is no guarantee that additional problems do not originate elsewhere in your code, but the following need careful attention:
char **temp = malloc(sizeof(char) * strlen(words) + 1);
Above you are allocating pointers not words, your allocation is too small by a factor of sizeof (char*) - sizeof (char). To prevent such problems, if you use the sizeof *thepointer, you will always have the correct size, e.g.
char **temp = malloc (sizeof *temp * strlen(words) + 1);
(unless you plan on providing a sentinel NULL as the final pointer, then + 1 is unnecessary. You must also validate the return (see below))
Next:
*dictionary = (Dictionary*)malloc(sizeof(Dictionary) * total_words);
There is no need to cast the return of malloc, it is unnecessary. See: Do I cast the result of malloc?. Further, if *dictionary was previously allocated elsewhere, the allocation above creates a memory leak because you lose the reference to the original pointer. If it has been previously allocated, you need realloc, not malloc. And if wasn't allocate, a better way of writing it would be:
*dictionary = malloc (sizeof **dictionary * total_words);
You must also validation the allocation succeeds before attempting to use the block of memory, e.g.
if (! *dictionary) {
perror ("malloc - *dictionary");
exit (EXIT_FAILURE);
}
In:
temp[index] = (char*)malloc(sizeof(char) * strlen(token) + 1);
sizeof(char) is always 1 and can be omitted. Better written as:
temp[index] = malloc (strlen(token) + 1);
or better, allocate and validate in a single block:
if (!(temp[index] = malloc (strlen(token) + 1))) {
perror ("malloc - temp[index]");
exit (EXIT_FAILURE);
}
then
strcpy(temp[index++], token);
Next, while total_words may be equal to the words in temp, you have only validated that you have index number of words. That combined with your original allocation times sizeof (char) instead of sizeof (char *), makes it no wonder there can be segfaults where you attempt to iterate over your list of pointers in temp. Better:
for (int i = 0; i < index; ++i)
{
for (int j = i + 1; j < index; ++j)
(the same applies to your k loop as well. Additionally, since you have allocated each temp[index], when you shuffle pointers with temp[k] = temp[k+1]; you overwrite the pointer address in temp[k] causing a memory leak with every pointer you overwrite. Each temp[k] that is overwritten should be freed before the assignment is made.
While you are updating total_words--, there still to this point has never been a validation that index == total_words, and in the event they are not, you can have no confidence in total_words or that you won't segfault attempting to iterate over uninitialized pointers as the result.
The rest appears workable, but after changes are made above, you should insure that the are no additional changes needed. Look things over and let me know if you need additional help. (and with a MCVE, I'm happy to help further)
Additional Problems
I apologize for the delay, real-world called -- and this took a lot longer than anticipated, because what you have is an awkward slow-motion logical train-wreck. First and foremost, while there is nothing wrong with reading an entire text-file file into a buffer with fread -- the buffer is NOT nul-terminated and therefore cannot be used with any functions expecting a string. Yes, strtok, strcpy or any string function will read past the end of word_data looking for the nul-terminating character (well out into memory you don't own) resulting in a SegFault.
Your various scattered +1 tacked onto your malloc allocations now make a little more sense, as it appears you were looking for where you needed to add an additional character to make sure you could nul-terminate word_data, but couldn't quite figure out where it went. (don't worry, I straightened that out for you, but it is a big hint that you are probably going about this in the wrong way -- reading with POSIX getline or fgets is probably a better approach than the file-at-once for this type of text processing)
That is literally, just the tip of the iceberg in the problems encountered in your code. As hinted at earlier, in tokenize, you failed to validate that index equals total_words. This ends up being important given your choice of delim which includes the ASCII apostrophe (or single-quote). This causes your index to exceed the word_count any time a plural-possessive or contraction is encountered in the buffer (e.g. "can't" is split is "can" and "t", "Peter's" is split into "Peter" and "s", etc.... You will have to decide how you want to resolve this, I have simply removed the single quote for now.
Your logic in both tokenize and count_words was difficult to follows, and just wrong in some aspects, and your return type (void) for read_file provided absolutely no way to indicate a success (or failure) within. Always choose a return type that provides meaningful information from which you can determine is a critical function has succeeded or failed (reading your data qualifies as critical).
If it provides a return -- use it. This applies to all functions that can fail (including functions like fseek)
Returning 0 from tokenize misses the return of the number of words (allocated struts) in dictionary leaving you unable to properly free the information and leaving you to guess at some number to display (e.g. for (int i = 0; i < 333; ++i) in main()). You need to track the number of dictionary structs and member word that are allocated in tokenize (keep an index, say dindex). Then returning dindex to main() (assigned to hello in your code) provides the information you need to iterate over the structs in main() to output your information, as well as to free each allocated word before freeing the pointers.
If you don't have an accurate count of the number of allocated dictionary structs back in main(), you have failed in the two responsibilities you have regarding any block of memory allocated: (1) always preserve a pointer to the starting address for the block of memory so, (2) it can be freed when it is no longer needed. If you don't know how many blocks there are, then you haven't done (1) and can't do (2).
This is a nit about style, and while not an error, the standard coding style for C avoids the use of Initialcaps, camelCase or MixedCase variable names in favor of all lower-case while reserving upper-case names for use with macros and constants. It is a matter of style -- so it is completely up to you, but failing to follow it can lead to the wrong first impression in some circles.
Rather than carry on for another handful of paragraphs, I've reworked your example for you and added a few comments inline. Go though it, I haven't punishingly tested it for all corner-cases, but it should be a sound base to build from. You will note in going though it, your count_words and tokenize have been simplified. Try and understand why what was done, was done, and ask if you have any questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
typedef struct{
char *word;
int count;
} dictionary_t;
char *read_file (FILE *file, char **words, size_t *length)
{
size_t size = *length = 0;
if (fseek (file, 0, SEEK_END) == -1) {
perror ("fseek SEEK_END");
return NULL;
}
size = (size_t)ftell (file);
if (fseek (file, 0, SEEK_SET) == -1) {
perror ("fseek SEEK_SET");
return NULL;
}
/* +1 needed to nul-terminate buffer to pass to strtok */
if (!(*words = malloc (size + 1))) {
perror ("malloc - size");
return NULL;
}
if (fread (*words, 1, size, file) != size) {
perror ("fread words");
free (*words);
return NULL;
}
*length = size;
(*words)[*length] = 0; /* nul-terminate buffer - critical */
return *words;
}
int tokenize (dictionary_t **dictionary, char *words, int total_words)
{
// char *delim = " .,?!:;/\"\'\n\t"; /* don't split on apostrophies */
char *delim = " .,?!:;/\"\n\t";
char **temp = malloc (sizeof *temp * total_words);
char *token = strtok(words, delim);
int index = 0, dindex = 0;
if (!temp) {
perror ("malloc temp");
return -1;
}
if (!(*dictionary = malloc (sizeof **dictionary * total_words))) {
perror ("malloc - dictionary");
return -1;
}
while (token != NULL)
{
if (!(temp[index] = malloc (strlen (token) + 1))) {
perror ("malloc - temp[index]");
exit (EXIT_FAILURE);
}
strcpy(temp[index++], token);
token = strtok (NULL, delim);
}
if (total_words != index) { /* validate total_words = index */
fprintf (stderr, "error: total_words != index (%d != %d)\n",
total_words, index);
/* handle error */
}
for (int i = 0; i < total_words; i++) {
int found = 0, j = 0;
for (; j < dindex; j++)
if (strcmp((*dictionary)[j].word, temp[i]) == 0) {
found = 1;
break;
}
if (!found) {
if (!((*dictionary)[dindex].word = malloc (strlen (temp[i]) + 1))) {
perror ("malloc (*dictionay)[dindex].word");
exit (EXIT_FAILURE);
}
strcpy ((*dictionary)[dindex].word, temp[i]);
(*dictionary)[dindex++].count = 1;
}
else
(*dictionary)[j].count++;
}
for (int i = 0; i < total_words; i++)
free (temp[i]); /* you must free storage for words */
free (temp); /* before freeing pointers */
return dindex;
}
int count_words (char *words, size_t length)
{
int count = 0;
char previous_char = ' ';
while (length--) {
if (isspace (previous_char) && !isspace (*words))
count++;
previous_char = *words++;
}
return count;
}
int main (int argc, char **argv)
{
char *word_data = NULL;
int word_count, hello;
size_t length = 0;
dictionary_t *dictionary = NULL;
FILE *input = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!input) { /* validate file open for reading */
fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
return 1;
}
if (!read_file (input, &word_data, &length)) {
fprintf (stderr, "error: file_read failed.\n");
return 1;
}
if (input != stdin) fclose (input); /* close file if not stdin */
word_count = count_words (word_data, length);
printf ("wordct: %d\n", word_count);
/* number of dictionary words returned in hello */
if ((hello = tokenize (&dictionary, word_data, word_count)) <= 0) {
fprintf (stderr, "error: no words or tokenize failed.\n");
return 1;
}
for (int i = 0; i < hello; ++i) {
printf("%-16s : %d\n", dictionary[i].word, dictionary[i].count);
free (dictionary[i].word); /* you must free word storage */
}
free (dictionary); /* free pointers */
free (word_data); /* free buffer */
return 0;
}
Let me know if you have further questions.
There are a few things that you need to do to make your code work:
Fix the memory allocation of temp by replacing sizeof(char) with sizeof(char *) like so:
char **temp = malloc(sizeof(char *) * strlen(words) + 1);
Fix the memory allocation of dictionary by replacing sizeof(Dictionary) with sizeof(Dictionary *):
*dictionary = (Dictionary*)malloc(sizeof(Dictionary *) * (*total_words));
Pass the address of address of word_count when calling tokenize:
int hello = tokenize(&dictionary, word_data, &word_count);
Replace all occurrences of total_words in tokenize function with (*total_words). In the tokenize function signature, you can replace int total_words with int *total_words.
You should also replace the hard-coded value of 333 in your for loop in the main function with word_count.
After you make these changes, your code should work as expected. I was able to run it successfully with these changes.

reading large lists through stdin in C

If my program is going to have large lists of numbers passed in through stdin, what would be the most efficient way of reading this in?
The input I'm going to be passing into the program is going to be of the following format:
3,5;6,7;8,9;11,4;;
I need to process the input so that I can use the numbers between the colons (i.e I want to be able to use 3 and 5, 6 and 7 etc etc). The ;; indicates that it is the end of the line.
I was thinking of using a buffered reader to read entire lines and then using parseInt.
Would this be the most efficient way of doing it?
This is a working solution
One way to do this is to use strtok() and store the values in an array. Ideally, dynamically allocated.
int main(int argc, char *argv[])
{
int lst_size=100;
int line_size=255;
int lst[lst_size];
int count=0;
char buff[line_size];
char * token=NULL;
fgets (buff, line_size, stdin); //Get input
Using strtok by passing ',' and ';' as deleminator.
token=strtok(buff, ";,");
lst[count++]=atoi(token);
while(token=strtok(NULL, ";,")){
lst[count++]=atoi(token);
}
Finally you have to account for the double ";;" by reducing the count by 1, because atoi(token) will return 0 for that case and store it in the nth index. Which you don't want.
count--;
}
One other fairly elegant way to handle this is to allow strtol to parse the input by advancing the string to be read to endptr as returned by strtol. Combined with an array allocated/reallocated as needed, you should be able to handle lines of any length (up to memory exhaustion). The example below uses a single array for the data. If you want to store multiple lines, each as a separate array, you can use the same approach, but start with a pointer to array of pointers to int. (i.e. int **numbers and allocate the pointers and then each array). Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
int main () {
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
int *numbers = NULL; /* array to hold numbers */
size_t nmax = NMAX; /* check for reallocation */
size_t idx = 0; /* numbers array index */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while ((nchr = getline (&ln, &n, stdin)) != -1)
{
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
{
int *tmp = realloc (numbers, 2 * nmax * sizeof *numbers);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
exit (EXIT_FAILURE);
}
numbers = tmp;
memset (numbers + nmax, 0, nmax * sizeof *numbers);
nmax *= 2;
}
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
size_t i = 0;
for (i = 0; i < idx; i++)
printf (" numbers[%2zu] %d\n", i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
Output
$ echo "3,5;6,7;8,9;11,4;;" | ./bin/prsistdin
numbers[ 0] 3
numbers[ 1] 5
numbers[ 2] 6
numbers[ 3] 7
numbers[ 4] 8
numbers[ 5] 11
numbers[ 6] 4
Also works where the string is stored in a file as:
$ cat dat/numsemic.csv | ./bin/prsistdin
or
$ ./bin/prsistdin < dat/numsemic.csv
Using fgets and without size_t
It took a little reworking to come up with a revision I was happy with that eliminated getline and substituted fgets. getline is far more flexible, handling the allocation of space for you, with fgets it is up to you. (not to mention getline returning the actual number of chars read without having to call strlen).
My goal here was to preserve the ability to read any length line to meet your requirement. That either meant initially allocating some huge line buffer (wasteful) or coming up with a scheme that would reallocate the input line buffer as needed in the event it was longer than the space initially allocate to ln. (this is what getline does so well). I'm reasonably happy with the results. Note: I put the reallocation code in functions to keep main reasonably clean. footnote 2
Take a look at the following code. Note, I have left the DEBUG preprocessor directives in the code allowing you to compile with the -DDEBUG flag if you want to have it spit out each time it allocates. [footnote 1] You can compile the code with:
gcc -Wall -Wextra -o yourexename yourfilename.c
or if you want the debugging output (e.g. set LMAX to 2 or something less than the line length), use the following:
gcc -Wall -Wextra -o yourexename yourfilename.c -DDEBUG
Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
#define LMAX 1024
char *realloc_char (char *sp, unsigned int *n); /* reallocate char array */
int *realloc_int (int *sp, unsigned int *n); /* reallocate int array */
char *fixshortread (FILE *fp, char **s, unsigned int *n); /* read all stdin */
int main () {
char *ln = NULL; /* dynamically allocated for fgets */
int *numbers = NULL; /* array to hold numbers */
unsigned int nmax = NMAX; /* numbers check for reallocation */
unsigned int lmax = LMAX; /* ln check for reallocation */
unsigned int idx = 0; /* numbers array index */
unsigned int i = 0; /* simple counter variable */
char *nl = NULL;
/* initial allocation for numbers */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed (numbers).");
return 1;
}
/* initial allocation for ln */
if (!(ln = calloc (LMAX, sizeof *ln))) {
fprintf (stderr, "error: memory allocation failed (ln).");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while (fgets (ln, lmax, stdin) != NULL)
{
/* provide a fallback to read remainder of line
if the line length exceeds lmax */
if (!(nl = strchr (ln, '\n')))
fixshortread (stdin, &ln, &lmax);
else
*nl = 0;
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
realloc_int (numbers, &nmax);
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
for (i = 0; i < idx; i++)
printf (" numbers[%2u] %d\n", (unsigned int)i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
/* reallocate character pointer memory */
char *realloc_char (char *sp, unsigned int *n)
{
char *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: char pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* reallocate integer pointer memory */
int *realloc_int (int *sp, unsigned int *n)
{
int *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: int pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* if fgets fails to read entire line, fix short read */
char *fixshortread (FILE *fp, char **s, unsigned int *n)
{
unsigned int i = 0;
int c = 0;
i = *n - 1;
realloc_char (*s, n);
do
{
c = fgetc (fp);
(*s)[i] = c;
i++;
if (i == *n)
realloc_char (*s, n);
} while (c != '\n' && c != EOF);
(*s)[i-1] = 0;
return *s;
}
footnote 1
nothing special about the choice of the word DEBUG (it could have been DOG, etc..), the point to take away is if you want to conditionally include/exclude code, you can simply use preprocessor flags to do that. You just add -Dflagname to pass flagname to the compiler.
footnote 2
you can combine the reallocation functions into a single void* function that accepts a void pointer as its argument along with the size of the type to be reallocated and returns a void pointer to the reallocated space -- but we will leave that for a later date.
What you could do is read in from stdin using fgets or fgetc. You could also use getline() since you're reading in from stdin.
Once you read in the line you can use strtok() with the delimiter for ";" to split the string into pieces at the semicolons. You can loop through until strok() is null, or in this case, ';'. Also in C you should use atoi() to convert strings to integers.
For Example:
int length = 256;
char* str = (char*)malloc(length);
int err = getline(&str, &length, stdin);
I would read in the command args, then parse using the strtok() library method
http://man7.org/linux/man-pages/man3/strtok.3.html
(The web page referenced by the URL above even has a code sample of how to use it.)
I'm a little rusty at C, but could this work for you?
char[1000] remainder;
int first, second;
fp = fopen("C:\\file.txt", "r"); // Error check this, probably.
while (fgets(&remainder, 1000, fp) != null) { // Get a line.
while (sscanf(remainder, "%d,%d;%s", first, second, remainder) != null) {
// place first and second into a struct or something
}
}
getchar_unlocked() is what you are looking for.
Here is the code:
#include <stdio.h>
inline int fastRead_int(int * x)
{
register int c = getchar_unlocked();
*x = 0;
// clean stuff in front of + look for EOF
for(; ((c<48 || c>57) && c != EOF); c = getchar_unlocked());
if(c == EOF)
return 0;
// build int
for(; c>47 && c<58 ; c = getchar_unlocked()) {
*x = (*x<<1) + (*x<<3) + c - 48;
}
return 1;
}
int main()
{
int x;
while(fastRead_int(&x))
printf("%d ",x);
return 0;
}
For input 1;2;2;;3;;4;;;;;54;;;; the code above produces 1 2 2 3 4 54.
I guarantee, this solution is a lot faster than others presented in this topic. It is not only using getchar_unlocked(), but also uses register, inline as well as multiplying by 10 tricky way: (*x<<1) + (*x<<3).
I wish you good luck in finding better solution.

Read text file, save all digits into character string

I am trying to read a text file containing the string "a3rm5t?7!z*&gzt9v" and put all the numeric characters into a character string to later convert into an integer.
I am currently trying to do this by using sscanf on the buffer after reading the file, and then using sprintf to save all characters found using %u in a character string called str.
However, the integer that is returning when I call printf on str is different each time I run the program. What am I doing right and what am I doing wrong?
This code works when the text file contains a string like "23dog" and returns 23 but not when the string is something like 23dog2.
EDIT: I now realize that i should be putting the numeric characters in a character ARRAY rather than just one string.
int main(int argc, const char **argv)
{
int in;
char buffer[128];
char *str;
FILE *input;
in = open(argv[1], O_RDONLY);
read(in, buffer, 128);
unsigned x;
sscanf(buffer, "%u", &x);
sprintf(str,"%u\n", x);
printf("%s\n",str);
close (in);
exit(0);
}
If you simply want to filter out any non-digits from your input, you need not use scanf, sprintf and the like. Simply loop over the buffer and copy the characters that are digits.
The following program only works for a single line of input read from standard input and only if it is less than 512 characters long but it should give you the correct idea.
#include <stdio.h>
#define BUFFER_SIZE 512
int
main()
{
char buffer[BUFFER_SIZE]; /* Here we read into. */
char digits[BUFFER_SIZE]; /* Here we insert the digits. */
char * pos;
size_t i = 0;
/* Read one line of input (max BUFFER_SIZE - 1 characters). */
if (!fgets(buffer, BUFFER_SIZE, stdin))
{
perror("fgets");
return 1;
}
/* Loop over the (NUL terminated) buffer. */
for (pos = buffer; *pos; ++pos)
{
if (*pos >= '0' && *pos <= '9')
{
/* It's a digit: copy it over. */
digits[i++] = *pos;
}
}
digits[i] = '\0'; /* NUL terminate the string. */
printf("%s\n", digits);
return 0;
}
A good approach to any problem like this is to read the entire line into a buffer and then assign a pointer to the buffer. You can then use the pointer to step through the buffer reading each character and acting on it appropriately. The following is one example of this approach. getline is used to read the line from the file (it has the advantage of allocating space for buffer and returning the number of characters read). You then allocate space for the character string based on the size of buffer as returned by getline. Remember, when done, you are responsible for freeing the memory allocated by getline.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, const char **argv)
{
char *buffer = NULL; /* forces getline to allocate required space */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit of characters to read, (0 no limit) */
char *str = NULL; /* string to hold digits read from file */
char *p = NULL; /* ptr to use with buffer (could use buffer) */
int idx = 0; /* index for adding digits to str */
int number = 0; /* int to hold number parsed from file */
FILE *input;
/* validate input */
if (argc < 2) { printf ("Error: insufficient input. Usage: %s filename\n", argv[0]); return 1; }
/* open and validate file */
input = fopen(argv[1], "r");
if (!input) { printf ("Error: failed to open file '%s\n", argv[1]); return 1; }
/* read line from file with getline */
if ((read = getline (&buffer, &n, input)) != -1)
{
str = malloc (sizeof (char) * read); /* allocate memory for str */
p = buffer; /* set pointer to buffer */
while (*p) /* read each char in buffer */
{
if (*p > 0x2f && *p < 0x3a) /* if char is digit 0-9 */
{
str[idx] = *p; /* copy to str at idx */
idx++; /* increment idx */
}
p++; /* increment pointer */
}
str[idx] = 0; /* null-terminate str */
number = atoi (str); /* convert str to int */
printf ("\n string : %s number : %d\n\n", buffer, number);
} else {
printf ("Error: nothing read from file '%s\n", argv[1]);
return 1;
}
if (input) fclose (input); /* close input file stream */
if (buffer) free (buffer); /* free memory allocated by getline */
if (str) free (str); /* free memory allocated to str */
return 0;
}
datafile:
$ cat dat/fwintstr.dat
a3rm5t?7!z*&gzt9v
output:
$ ./bin/prsint dat/fwintstr.dat
string : a3rm5t?7!z*&gzt9v
number : 3579

Resources