char *line = NULL;
int count=0;
line = (char*)malloc(sizeof(char));
while(fgets(line,sizeof(line),file)){
line = realloc(line,sizeof(char*)); // dynamically increase allocate memory
count++;
}
printf("count number :%d\n",count);
free(line);
I am trying to count character in every line reading text , but for now trying to one line . Every time count number is 4 even i give more character string . I am confused . Please help me !!
Some issues :
First, you want a line :
line = (char*)malloc(sizeof(char));
This is equivalent to allocate one byte - sizeof(char) - and store its address to line. Maybe you want to get a larger buffer to get some characters from a file.
One of the way to do that is to define a constant size :
#define BUFFER_SIZE 256
line = (char *)malloc(sizeof(char) * BUFFER_SIZE);
After, you run the counter while.
while(fgets(line,sizeof(line),file))
is also wrong, because you want to read at most sizeof(line) bytes, which is equivalent to sizeof(char *). It's 32 or 64 bits depending on your system architecture.
You want to read as most the size of your buffer, which means you want to read at most BUFFER_SIZE characters. So it's better to do :
while(fgets(line,sizeof(char) * BUFFER_SIZE, file))
{
/* do stuff */
}
It's a warning : the use of fgets is dangerous. If you want to get bytes from file and also to count them you can use fread like :
size_t tmp;
while(tmp = fread(line, sizeof(char), BUFFER_SIZE, file))
{
count += tmp;
/* do stuff on line */
}
But if you only want to get the size of your file, go check this other post.
One way to do this without tying yourself in knots over memory allocation, etc, is:
FILE *f;
int n;
char c;
int line_number = 1;
int line_length = 0;
f = fopen("whatever", "r");
while (n = fread(&c, 1, 1, f))
{
if (c != '\n')
line_length += 1;
else
{
printf("Length of line %d = %d\n", line_number , line_length);
line_number += 1;
line_length = 0;
}
}
fclose(f);
i.e. read the file one character at a time, counting characters as you go. Let the OS and the runtime library worry about buffering - that's what they're there for. Perhaps not the most efficient, but sometimes simplicity is beneficial.
Best of luck.
Here is a function mfgets that reads a line into a dynamically allocated buffer. It should be reasonably bomb-proof.
Like fgets it returns NULL if no characters were read. However, it can also return NULL if the initial buffer allocation failed before any characters were read.
It sets errno to ENOMEM if a buffer allocation or reallocation failed at any point, but if any characters have been read, then a buffer is still returned.
As a bonus, the second parameter can be used to obtain the length of the string in the buffer.
The returned buffer can be freed by calling the free function.
mfgets.h:
#ifndef MFGETS_H__INCLUDED__
#define MFGETS_H__INCLUDED__
#include <stdio.h>
char *mfgets(FILE *stream, size_t *stringlen);
#endif
mfgets.c:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include "mfgets.h"
/**
* Read a line into allocated memory.
*
* Reads a line from a stream into memory allocated by \b malloc() or
* \b realloc() until an \b EOF or a newline is read. If a newline is
* read, it is stored into the memory. A terminating null byte is
* stored after the last character in the memory. The memory can be
* freed with \b free().
*
* \param stream The stream pointer.
* \param[out] stringlen If non-null, set to length of string read.
*
* \return A pointer to the memory if at least one character was read,
* otherwise \c NULL.
*
* \remark \c errno is set to \c ENOMEM on failure to allocate memory
* of sufficient size to store the whole line. If the line has been
* partially read, memory is still returned even if \c errno is set to
* \c ENOMEM.
*/
char *mfgets(FILE *stream, size_t *stringlen)
{
size_t buflen = 256; /* initial allocation size */
size_t slen = 0; /* string length */
int err = 0; /* new error */
int olderr = errno; /* old error propagation */
char *buf; /* allocated buffer */
char *newbuf; /* reallocated buffer */
/* allocate initial buffer */
buf = malloc(buflen);
if (!buf) {
err = ENOMEM;
} else {
/* read remainder of line into new part of buffer */
while (fgets(buf + slen, buflen - slen, stream)) {
/* update string length */
slen += strlen(buf + slen);
if (slen < buflen - 1 || buf[slen - 1] == '\n') {
/* fgets() did not run out of space */
break;
}
/* need to increase buffer size */
if (buflen == SIZE_MAX) {
/* cannot increase buffer size */
err = ENOMEM;
break;
}
if (SIZE_MAX - buflen >= buflen && buflen <= INT_MAX) {
/* double buffer size */
buflen *= 2;
} else if (SIZE_MAX - buflen > INT_MAX) {
/* increase buffer size by maximum amount
* that can be passed to fgets() */
buflen += INT_MAX;
} else {
/* increase buffer size to maximum amount */
buflen = SIZE_MAX;
}
/* reallocate buffer with new size */
newbuf = realloc(buf, buflen);
if (!newbuf) {
err = ENOMEM;
break;
}
buf = newbuf;
}
/* finished reading line (or reached EOF or stream error) */
if (slen) {
/* reallocate buffer to actual string size */
newbuf = realloc(buf, slen + 1);
if (newbuf) {
buf = newbuf;
}
} else {
/* no characters read, so do not return a buffer */
free(buf);
buf = NULL;
}
}
if (stringlen) {
/* caller wants actual length of string */
*stringlen = slen;
}
/* set new error or propagate old error */
errno = err ? err : olderr;
/* return buffer or NULL */
return buf;
}
Test program:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include "mfgets.h"
int main(void)
{
size_t slen;
char *line;
errno = 0;
while ((line = mfgets(stdin, &slen)) != NULL) {
printf("(%zu) %s", slen, line);
free(line);
errno = 0;
}
if (errno) {
perror("");
}
return 0;
}
Related
I am trying to read a file and store every word into a dynamically allocated 2D array. The size of the input file is unknown.
I am totally lost and don't know how I could "fix/finish" the program.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void) {
char filename[25];
printf("Input the filename");
scanf("%s", filename);
fileConverter(filename);
}
int fileConverter(char filename[25]) {
//int maxLines = 50000;
//int maxWordSize = 128;
//char words[maxLines][maxWordSize];
//char **words;
char **arr = (char**) calloc(num_elements, sizeof(char*));
for ( i = 0; i < num_elements; i++ ) {
arr[i] = (char*) calloc(num_elements_sub, sizeof(char));
}
FILE *file = NULL;
int amountOfWords = 0;
file = fopen(filename, "r");
if(file == NULL) {
exit(0);
}
while(fgets(words[amountOfWords], 10000, file)) {
words[amountOfWords][strlen(words[amountOfWords]) - 1] = "\0";
amountOfWords++;
}
for(int i = 0; i < amountOfWords; i++) {
printf("a[%d] = ", i);
printf("%s\n", words[i]);
}
printf("The file contains %d words and the same amount of lines.\n", amountOfWords);
return amountOfWords;
The main challenges for this kind of problem are
reallocating the array of strings as the program reads new words, and
handling words that are larger than the buffer used by fgets.
The general approach for these kind of parsing problems, is to design a state machine. The state machine here has two states:
The current character is whitespace. Action: Continue reading whitespace until we reach the end of the buffer, or until we land on a non-whitespace character, in which case we switch to state 2.
The current character is non-whitespace (i.e. a word). Action: Continue reading non-whitespace until we reach the end of the buffer, or until we land on a whitespace character, in which case we copy the word we just read to the array of strings and switch to state 1.
Particularly difficult is the case in which we are in state 2 and reach the end of the buffer. This means that this word spans multiple buffers. To accommodate for this, we deviate slightly from a direct state machine implementation. State 2 is slightly different, depending on if we are reading a new word or continuing one that was started in a previous buffer.
We now keep track of wordSize. If we start reading from the start of a buffer, but wordSize is not 0, then we know we are continuing a previous word and we know what size it was for the realloc we need.
Below is one possible implementation. All the work is done in the wordArrayRead function. Walking through it from the top of the function:
First we declare the variables that we need across lineBuffer reads: an index for the word itself and the length of the word we are currently reading, followed by the declaration of the buffer itself. The outside loop repeatedly reads using fgets until we have exhausted the input.
We start reading at index 0 and stop at the null-terminator. The first if-statement checks if we should be in state 2: either the current character is the start of a word or we were already reading a word.
State 2
The index wordStartIdx stays at the first character of the word (segment) and we walk the wordEndIdx to the end of the word (segment) or to the end of the buffer.
We then check if we need to increase the size of the array of strings. Here we increase it to 2 times + 1 the previous size to avoid frequent reallocations.
We set a boolean value, indication whether we have reached the end of a word. If we have, we need to allocate for and write the null-terminator at the end of the string.
If wordLength == 0 it means we are reading a new word and have to allocate memory for it for the first time. If wordLength != 0, we have to reallocate to append to an existing word.
We copy the word (segment) currently in the lineBuffer to the array of strings.
Now, we do some bookkeeping. If we reached the end of a word, we write the null-terminator, increment the index to point to the next word location and reset wordLength. If this wasn't the case, we only increment the wordLength with the length of the segment we just read. Finally, we update wordStartIdx, which still points to the start of the word, to point to the end of the word, so we can continue iterating over the buffer.
State 1
Having finishing the State 2 processing, we go into State 1 which has only two lines. It simply advances the index until we land at non-whitespace. Note that the null-terminator of the lineBuffer ('\0') does not count as whitespace, so this loop will not continue past the end of the buffer.
After all input has been processed, we shrink the array of strings to the actual size of its data. This "corrects" the allocation policy of increasing the size by 2n+1 each time it wasn't large enough.
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// BUFFER_SIZE must be >1U
#define BUFFER_SIZE 1024U
struct WordArray
{
char **words;
size_t numberOfWords;
};
static struct WordArray wordArrayConstruct(void);
static void wordArrayResize(struct WordArray *wordArray, size_t const newSize);
static void wordArrayDestruct(struct WordArray *wordArray);
static void wordArrayRead(FILE *restrict stream, struct WordArray *wordArray);
static char *reallocStringWrapper(char *restrict str, size_t const newSize);
static void wordArrayPrint(struct WordArray const *wordArray);
int main(void)
{
struct WordArray wordArray = wordArrayConstruct();
wordArrayRead(stdin, &wordArray);
wordArrayPrint(&wordArray);
wordArrayDestruct(&wordArray);
}
static void wordArrayRead(FILE *restrict stream, struct WordArray *wordArray)
{
size_t wordArrayIdx = 0U;
size_t wordLength = 0U;
char lineBuffer[BUFFER_SIZE];
while (fgets(lineBuffer, sizeof lineBuffer, stream) != NULL)
{
size_t wordStartIdx = 0U;
while (lineBuffer[wordStartIdx] != '\0')
{
if (!isspace(lineBuffer[wordStartIdx]) || wordLength != 0U)
{
size_t wordEndIdx = wordStartIdx;
while (!isspace(lineBuffer[wordEndIdx]) && wordEndIdx != BUFFER_SIZE - 1U)
++wordEndIdx;
if (wordArrayIdx >= wordArray->numberOfWords)
wordArrayResize(wordArray, wordArray->numberOfWords * 2U + 1U);
size_t wordSegmentLength = wordEndIdx - wordStartIdx;
size_t foundWordEnd = wordEndIdx != BUFFER_SIZE - 1U; // 0 or 1 bool
// Allocate for a new word, or reallocate for an existing word
// If a word end was found, add 1 to the size for the '\0' character
char *dest = wordLength == 0U ? NULL : wordArray->words[wordArrayIdx];
size_t allocSize = wordLength + wordSegmentLength + foundWordEnd;
wordArray->words[wordArrayIdx] = reallocStringWrapper(dest, allocSize);
memcpy(&(wordArray->words[wordArrayIdx][wordLength]),
&lineBuffer[wordStartIdx], wordSegmentLength);
if (foundWordEnd)
{
wordArray->words[wordArrayIdx][wordLength + wordSegmentLength] = '\0';
++wordArrayIdx;
wordLength = 0U;
}
else
{
wordLength += wordSegmentLength;
}
wordStartIdx = wordEndIdx;
}
while (isspace(lineBuffer[wordStartIdx]))
++wordStartIdx;
}
}
// All done. Shrink the words array to the size of the actual data
if (wordArray->numberOfWords != 0U)
wordArrayResize(wordArray, wordArrayIdx);
}
static struct WordArray wordArrayConstruct(void)
{
return (struct WordArray) {.words = NULL, .numberOfWords = 0U};
}
static void wordArrayResize(struct WordArray *wordArray, size_t const newSize)
{
assert(newSize > 0U);
char **tmp = (char**) realloc(wordArray->words, newSize * sizeof *wordArray->words);
if (tmp == NULL)
{
wordArrayDestruct(wordArray);
fprintf(stderr, "WordArray allocation error\n");
exit(EXIT_FAILURE);
}
wordArray->words = tmp;
wordArray->numberOfWords = newSize;
}
static void wordArrayDestruct(struct WordArray *wordArray)
{
for (size_t wordStartIdx = 0U; wordStartIdx < wordArray->numberOfWords; ++wordStartIdx)
{
free(wordArray->words[wordStartIdx]);
wordArray->words[wordStartIdx] = NULL;
}
free(wordArray->words);
}
static char *reallocStringWrapper(char *restrict str, size_t const newSize)
{
char *tmp = (char*) realloc(str, newSize);
if (tmp == NULL)
{
free(str);
fprintf(stderr, "Realloc string allocation error\n");
exit(EXIT_FAILURE);
}
return tmp;
}
static void wordArrayPrint(struct WordArray const *wordArray)
{
for (size_t wordStartIdx = 0U; wordStartIdx < wordArray->numberOfWords; ++wordStartIdx)
printf("%zu: %s\n", wordStartIdx, wordArray->words[wordStartIdx]);
}
Note: This program reads input from stdin, as Unix/Linux utilities typically do. Use input redirection to read from a file, or provide a file descriptor to the readWordArray function.
to allocate dynamic 2D array you need:
void allocChar2Darray(size_t rows, size_t columns, char (**array)[columns])
{
*array = malloc(rows * sizeof(**array));
}
I have a file in which few lines are being dumped. I want to post process the file and insert a header at the beginning of this file. The header is basically the number of lines in the file before inserting the header. I need to write this in C preferable irrespective of the platform. I was thinking of using system command in C code as follows to calculate the number of lines in the file:
int header = system("wc -l myfile");
save header in the temp file;
append myfile in temp file;
replace or move tempfile with myfile
Looking for a better way.
Use diff ( https://unix.stackexchange.com/questions/252927/what-do-the-numbers-in-the-line-mean-in-output-of-diff ) then you know which lines were dumped
To write text to the begin of a file use shell commands like sed -i '1s/^/task goes here\n/' todo.txt (https://superuser.com/questions/246837/how-do-i-add-text-to-the-beginning-of-a-file-in-bash), in C you can execute shell commands using sh or system depends if the shell command is built-in.
If you want this platform independent you have to count the lines in C like in https://www.geeksforgeeks.org/c-program-count-number-lines-file/
The straightforward option is to do the postprocessing and line counting in one pass (counting the number of lines written to a temporary file), then creating an another temporary file but this time in the same directory as the target file, writing the header to the latter temporary file, copying the contents of the first temporary file to the latter temporary file, removing the first temporary file, and finally renaming the latter temporary file as the final file.
This would be very simple to do using POSIX C getline() (which is included in standard C libraries in Linux, BSDs, and Mac OS), but it is not available on Windows. We'd need to first write a wrapper function, that on non-Windows systems uses POSIX getline() to read unlimited-length lines; and uses fgetc() or some other function to implement same/similar function Windows.
Another approach would be to read the file using fread() into a dynamically allocated buffer, and call a callback/filtering function for each line read. This approach has the benefit of being able to support any newline encoding (NUL, LF, CR, LF CR, or CR LF) and minimizing buffer copies, but the downside of being unfamiliar approach to many programmers.
Consider the following example implementation of for_each_line():
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
typedef enum {
NEWLINE_INVALID = -1, /* Invalid NEWLINE_ enum value */
NEWLINE_NONE = 0, /* No newline at end of line; ended at end of stream */
NEWLINE_CR = 1, /* "\r" */
NEWLINE_LF = 2, /* "\n" */
NEWLINE_CRLF = 3, /* "\r\n" */
NEWLINE_LFCR = 4, /* "\n\r" */
NEWLINE_NUL = 5 /* "", or '\0', */
} newline_type;
int for_each_line(FILE *source,
int (*filter)(char *line, size_t len, void *context),
void *context)
{
char *data = NULL; /* source data buffer */
size_t size = 0; /* size of source data buffer in chars */
size_t head = 0; /* start of next line to pass to filter */
size_t tail = 0; /* next char to read into buffer */
int newline = NEWLINE_INVALID;
/* Neither source nor filter can be NULL. */
if (!source || !filter)
return -1; /* -1: Invalid parameters */
/* source stream should not be in error state. */
if (ferror(source))
return -2; /* -2: Error reading source. */
while (1) {
size_t next = head;
size_t n;
int retval;
/* Check if we have a complete line to supply to filter. */
if (tail > head) {
/* To be able to safely use strcspn(), we need a terminating nul at end of buffer. */
data[tail] = '\0';
/* Find the first occurrence of any newline character. */
n = strcspn(data + head, "\r\n");
if (n < tail - head) {
/* Found, and it wasn't the nul we added. */
if (data[head + n] == '\r') {
/* CR or CR LF */
if (data[head + n + 1] == '\n') {
newline = NEWLINE_CRLF;
next = head + n + 2;
} else {
newline = NEWLINE_CR;
next = head + n + 1;
}
} else
if (data[head + n] == '\n') {
/* LF or LF CR */
if (data[head + n + 1] == '\r') {
newline = NEWLINE_LFCR;
next = head + n + 2;
} else {
newline = NEWLINE_LF;
next = head + n + 1;
}
} else {
/* Must have been NUL */
newline = NEWLINE_NUL;
next = head + n + 1;
}
/* Note: A two-character newline might have been split at the end of a buffer,
so at this point, 'newline' is *tentative*, not exact. */
if (newline != NEWLINE_INVALID) {
/* We replace the newline with end-of-string NUL mark. */
data[head + n] = '\0';
retval = filter(data + head, n, context);
if (retval) {
free(data);
return retval;
}
head = next;
/* Check for next line, before trying to refill the buffer. */
continue;
}
}
}
/* If the buffer is at least half full, move the data to beginning of buffer. */
if (head >= tail) {
/* Buffer is completely empty. */
head = 0;
tail = 0;
} else
if (head > 0 && tail >= size / 2) {
/* Buffer is at least half full, and not aligned at start of buffer. Move. */
tail -= head;
memmove(data, data + head, tail);
head = 0;
}
/* Need to grow the buffer? */
if (tail + 2 >= size) {
/* TODO: Improve on this linear growth policy. */
const size_t new_size = (tail | 4095) + 4097 - 32;
char *new_data;
new_data = realloc(data, new_size);
if (!new_data) {
free(data);
return -3; /* Not enough memory. */
}
data = new_data;
size = new_size;
}
/* Read some data into the buffer. */
n = fread(data + tail, 1, size - tail - 1, source);
if (n > 0) {
tail += n;
/* Consume partial newline, since now we know we can. */
if (newline == NEWLINE_CR && data[head] == '\n') {
newline = NEWLINE_CRLF;
head++;
} else
if (newline == NEWLINE_LF && data[head] == '\r') {
newline = NEWLINE_LFCR;
head++;
}
} else
if (ferror(source)) {
free(data);
return -2; /* Error reading source. */
} else {
/* End of input. */
if (tail > head) {
data[tail] = '\0';
retval = filter(data + head, tail - head, context);
free(data);
return retval;
} else {
free(data); /* Note: free(NULL) is safe. */
return 0;
}
}
}
/* Execution never reaches here. */
}
struct context {
FILE *out;
const char *newline;
unsigned long long lines;
};
static int do_copy_count_lines(char *line, size_t len, void *ctxptr)
{
struct context *const ctx = ctxptr;
(void)len; /* Silence warning about unused parameter 'len'. Does nothing. */
fputs(line, ctx->out);
fputs(ctx->newline, ctx->out);
ctx->lines++;
return ferror(ctx->out);
}
int copy_count_lines(FILE *source, FILE *target, unsigned long long *linecount)
{
struct context ctx;
int retval;
if (!source || !target)
return -1; /* Invalid parameters */
ctx.out = target;
ctx.newline = "\n";
ctx.lines = 0;
retval = for_each_line(source, do_copy_count_lines, &ctx);
if (linecount)
*linecount = ctx.lines;
return retval;
}
int main(void)
{
unsigned long long count = 0;
int err;
err = copy_count_lines(stdin, stdout, &count);
if (err) {
fflush(stdout);
fprintf(stderr, "Failed: for_each_line() returned %d.\n", err);
return EXIT_FAILURE;
}
fflush(stdout);
fprintf(stderr, "Copied %llu lines from standard input to standard output.\n", count);
return EXIT_SUCCESS;
}
The above example program simply reads from standard input, writing the lines (replacing any newline convention with the default newline convention for standard output), and the number of lines copied to standard error.
Because this uses fread(), it is very fast, but not suitable for interactive input (from terminals or terminal emulators), because it reads the input in large-ish blocks.
A variant that uses fgetc() is easy to implement, well suited for interactive input, but because of the large number of function calls, is somewhat slower.
Here is an example variant for use with wide character strings. (Do note that Windows' wide character support may be broken; the following is strictly C standard compliant code.)
#include <stdlib.h>
#include <locale.h>
#include <stdio.h>
#include <wchar.h>
#include <errno.h>
/* Only EDOM, EILSEQ, and ERANGE are guaranteed to be known,
so errno constants may need to be mapped. */
/* getline(): End of stream */
#ifndef WGETLINE_EOF
#define WGETLINE_EOF 0
#endif
/* getline(): Invalid parameters */
#ifndef WGETLINE_EINVAL
#ifdef EINVAL
#define WGETLINE_EINVAL EINVAL
#else
#define WGETLINE_EINVAL EILSEQ
#endif
#endif
/* getline(): Not enough memory */
#ifndef WGETLINE_ENOMEM
#ifdef ENOMEM
#define WGETLINE_ENOMEM ENOMEM
#else
#define WGETLINE_ENOMEM ERANGE
#endif
#endif
/* getline(): Read error */
#ifndef WGETLINE_EIO
#ifdef EIO
#define WGETLINE_EIO EIO
#else
#define WGETLINE_EIO EDOM
#endif
#endif
size_t wgetline(wchar_t **lineptr, size_t *sizeptr, FILE *handle)
{
if (!lineptr || !sizeptr || !handle) {
errno = WGETLINE_EINVAL;
return 0;
} else
if (ferror(handle)) {
errno = WGETLINE_EIO;
return 0;
} else
if (feof(handle)) {
errno = WGETLINE_EOF;
return 0;
}
wchar_t *line = *lineptr;
size_t size = *sizeptr;
size_t used = 0;
wint_t wc;
if (!size)
line = NULL;
while (1) {
/* Make sure there are is room for at least three wide chars in the buffer. */
if (used + 3 > size) {
/* TODO: Better buffer growth policy? size is in wchar_t's. */
size = (used | 1023) + 1025 - 16;
line = realloc(line, size * sizeof line[0]); /* realloc(NULL, ..) is safe. */
if (!line) {
errno = WGETLINE_ENOMEM;
return 0;
}
*lineptr = line;
*sizeptr = size;
}
wc = getwc(handle);
if (wc == WEOF) {
line[used] = L'\0';
if (!used)
errno = WGETLINE_EOF;
return used;
} else {
line[used++] = wc;
if (wc == L'\n') {
line[used] = L'\0';
return used;
}
}
}
/* Never reached. */
}
int main(void)
{
wchar_t *line = NULL;
size_t size = 0;
size_t len;
unsigned long long lines = 0uLL, wchars = 0uLL;
setlocale(LC_ALL, "");
if (fwide(stdin, 1) != 1)
fprintf(stderr, "Warning: Your C library or locale does not support wide character standard input.\n");
if (fwide(stdout, 1) != 1)
fprintf(stderr, "Warning: Your C library or locale does not support wide character standard output.\n");
while (1) {
len = wgetline(&line, &size, stdin);
if (!len)
break;
lines++;
wchars += len;
fputws(line, stdout);
}
if (ferror(stdin) || !feof(stdin)) {
fprintf(stderr, "Error reading from standard input.\n");
return EXIT_FAILURE;
}
fflush(stdout);
if (ferror(stdout)) {
fprintf(stderr, "Error writing to standard output.\n");
return EXIT_FAILURE;
}
fprintf(stderr, "Copied %llu lines (%llu wide characters).\n", lines, wchars);
return EXIT_SUCCESS;
}
To give you an idea on how performant these are, time wc -l /usr/share/dict/american-english takes 5ms real time on my system; time ./ex1 < /usr/share/dict/american-english > /dev/null takes 12ms; and time ./ex2 < /usr/share/dict/american-english > /dev/null takes 85ms. All three agree it has 102,305 lines. It has 971,304 wide characters, using UTF-8 character set (with file size 971,578 bytes).
If my program is going to have large lists of numbers passed in through stdin, what would be the most efficient way of reading this in?
The input I'm going to be passing into the program is going to be of the following format:
3,5;6,7;8,9;11,4;;
I need to process the input so that I can use the numbers between the colons (i.e I want to be able to use 3 and 5, 6 and 7 etc etc). The ;; indicates that it is the end of the line.
I was thinking of using a buffered reader to read entire lines and then using parseInt.
Would this be the most efficient way of doing it?
This is a working solution
One way to do this is to use strtok() and store the values in an array. Ideally, dynamically allocated.
int main(int argc, char *argv[])
{
int lst_size=100;
int line_size=255;
int lst[lst_size];
int count=0;
char buff[line_size];
char * token=NULL;
fgets (buff, line_size, stdin); //Get input
Using strtok by passing ',' and ';' as deleminator.
token=strtok(buff, ";,");
lst[count++]=atoi(token);
while(token=strtok(NULL, ";,")){
lst[count++]=atoi(token);
}
Finally you have to account for the double ";;" by reducing the count by 1, because atoi(token) will return 0 for that case and store it in the nth index. Which you don't want.
count--;
}
One other fairly elegant way to handle this is to allow strtol to parse the input by advancing the string to be read to endptr as returned by strtol. Combined with an array allocated/reallocated as needed, you should be able to handle lines of any length (up to memory exhaustion). The example below uses a single array for the data. If you want to store multiple lines, each as a separate array, you can use the same approach, but start with a pointer to array of pointers to int. (i.e. int **numbers and allocate the pointers and then each array). Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
int main () {
char *ln = NULL; /* NULL forces getline to allocate */
size_t n = 0; /* max chars to read (0 - no limit) */
ssize_t nchr = 0; /* number of chars actually read */
int *numbers = NULL; /* array to hold numbers */
size_t nmax = NMAX; /* check for reallocation */
size_t idx = 0; /* numbers array index */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed.");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while ((nchr = getline (&ln, &n, stdin)) != -1)
{
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
{
int *tmp = realloc (numbers, 2 * nmax * sizeof *numbers);
if (!tmp) {
fprintf (stderr, "Error: struct reallocation failure.\n");
exit (EXIT_FAILURE);
}
numbers = tmp;
memset (numbers + nmax, 0, nmax * sizeof *numbers);
nmax *= 2;
}
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
size_t i = 0;
for (i = 0; i < idx; i++)
printf (" numbers[%2zu] %d\n", i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
Output
$ echo "3,5;6,7;8,9;11,4;;" | ./bin/prsistdin
numbers[ 0] 3
numbers[ 1] 5
numbers[ 2] 6
numbers[ 3] 7
numbers[ 4] 8
numbers[ 5] 11
numbers[ 6] 4
Also works where the string is stored in a file as:
$ cat dat/numsemic.csv | ./bin/prsistdin
or
$ ./bin/prsistdin < dat/numsemic.csv
Using fgets and without size_t
It took a little reworking to come up with a revision I was happy with that eliminated getline and substituted fgets. getline is far more flexible, handling the allocation of space for you, with fgets it is up to you. (not to mention getline returning the actual number of chars read without having to call strlen).
My goal here was to preserve the ability to read any length line to meet your requirement. That either meant initially allocating some huge line buffer (wasteful) or coming up with a scheme that would reallocate the input line buffer as needed in the event it was longer than the space initially allocate to ln. (this is what getline does so well). I'm reasonably happy with the results. Note: I put the reallocation code in functions to keep main reasonably clean. footnote 2
Take a look at the following code. Note, I have left the DEBUG preprocessor directives in the code allowing you to compile with the -DDEBUG flag if you want to have it spit out each time it allocates. [footnote 1] You can compile the code with:
gcc -Wall -Wextra -o yourexename yourfilename.c
or if you want the debugging output (e.g. set LMAX to 2 or something less than the line length), use the following:
gcc -Wall -Wextra -o yourexename yourfilename.c -DDEBUG
Let me know if you have questions:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define NMAX 256
#define LMAX 1024
char *realloc_char (char *sp, unsigned int *n); /* reallocate char array */
int *realloc_int (int *sp, unsigned int *n); /* reallocate int array */
char *fixshortread (FILE *fp, char **s, unsigned int *n); /* read all stdin */
int main () {
char *ln = NULL; /* dynamically allocated for fgets */
int *numbers = NULL; /* array to hold numbers */
unsigned int nmax = NMAX; /* numbers check for reallocation */
unsigned int lmax = LMAX; /* ln check for reallocation */
unsigned int idx = 0; /* numbers array index */
unsigned int i = 0; /* simple counter variable */
char *nl = NULL;
/* initial allocation for numbers */
if (!(numbers = calloc (NMAX, sizeof *numbers))) {
fprintf (stderr, "error: memory allocation failed (numbers).");
return 1;
}
/* initial allocation for ln */
if (!(ln = calloc (LMAX, sizeof *ln))) {
fprintf (stderr, "error: memory allocation failed (ln).");
return 1;
}
/* read each line from stdin - dynamicallly allocated */
while (fgets (ln, lmax, stdin) != NULL)
{
/* provide a fallback to read remainder of line
if the line length exceeds lmax */
if (!(nl = strchr (ln, '\n')))
fixshortread (stdin, &ln, &lmax);
else
*nl = 0;
char *p = ln; /* pointer for use with strtol */
char *ep = NULL;
errno = 0;
while (errno == 0)
{
/* parse/convert each number on stdin */
numbers[idx] = strtol (p, &ep, 10);
/* note: overflow/underflow checks omitted */
/* if valid conversion to number */
if (errno == 0 && p != ep)
{
idx++; /* increment index */
if (!ep) break; /* check for end of str */
}
/* skip delimiters/move pointer to next digit */
while (*ep && (*ep <= '0' || *ep >= '9')) ep++;
if (*ep)
p = ep;
else
break;
/* reallocate numbers if idx = nmax */
if (idx == nmax)
realloc_int (numbers, &nmax);
}
}
/* free mem allocated by getline */
if (ln) free (ln);
/* show values stored in array */
for (i = 0; i < idx; i++)
printf (" numbers[%2u] %d\n", (unsigned int)i, numbers[i]);
/* free mem allocate to numbers */
if (numbers) free (numbers);
return 0;
}
/* reallocate character pointer memory */
char *realloc_char (char *sp, unsigned int *n)
{
char *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: char pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* reallocate integer pointer memory */
int *realloc_int (int *sp, unsigned int *n)
{
int *tmp = realloc (sp, 2 * *n * sizeof *sp);
#ifdef DEBUG
printf ("\n reallocating %u to %u\n", *n, *n * 2);
#endif
if (!tmp) {
fprintf (stderr, "Error: int pointer reallocation failure.\n");
exit (EXIT_FAILURE);
}
sp = tmp;
memset (sp + *n, 0, *n * sizeof *sp); /* memset new ptrs 0 */
*n *= 2;
return sp;
}
/* if fgets fails to read entire line, fix short read */
char *fixshortread (FILE *fp, char **s, unsigned int *n)
{
unsigned int i = 0;
int c = 0;
i = *n - 1;
realloc_char (*s, n);
do
{
c = fgetc (fp);
(*s)[i] = c;
i++;
if (i == *n)
realloc_char (*s, n);
} while (c != '\n' && c != EOF);
(*s)[i-1] = 0;
return *s;
}
footnote 1
nothing special about the choice of the word DEBUG (it could have been DOG, etc..), the point to take away is if you want to conditionally include/exclude code, you can simply use preprocessor flags to do that. You just add -Dflagname to pass flagname to the compiler.
footnote 2
you can combine the reallocation functions into a single void* function that accepts a void pointer as its argument along with the size of the type to be reallocated and returns a void pointer to the reallocated space -- but we will leave that for a later date.
What you could do is read in from stdin using fgets or fgetc. You could also use getline() since you're reading in from stdin.
Once you read in the line you can use strtok() with the delimiter for ";" to split the string into pieces at the semicolons. You can loop through until strok() is null, or in this case, ';'. Also in C you should use atoi() to convert strings to integers.
For Example:
int length = 256;
char* str = (char*)malloc(length);
int err = getline(&str, &length, stdin);
I would read in the command args, then parse using the strtok() library method
http://man7.org/linux/man-pages/man3/strtok.3.html
(The web page referenced by the URL above even has a code sample of how to use it.)
I'm a little rusty at C, but could this work for you?
char[1000] remainder;
int first, second;
fp = fopen("C:\\file.txt", "r"); // Error check this, probably.
while (fgets(&remainder, 1000, fp) != null) { // Get a line.
while (sscanf(remainder, "%d,%d;%s", first, second, remainder) != null) {
// place first and second into a struct or something
}
}
getchar_unlocked() is what you are looking for.
Here is the code:
#include <stdio.h>
inline int fastRead_int(int * x)
{
register int c = getchar_unlocked();
*x = 0;
// clean stuff in front of + look for EOF
for(; ((c<48 || c>57) && c != EOF); c = getchar_unlocked());
if(c == EOF)
return 0;
// build int
for(; c>47 && c<58 ; c = getchar_unlocked()) {
*x = (*x<<1) + (*x<<3) + c - 48;
}
return 1;
}
int main()
{
int x;
while(fastRead_int(&x))
printf("%d ",x);
return 0;
}
For input 1;2;2;;3;;4;;;;;54;;;; the code above produces 1 2 2 3 4 54.
I guarantee, this solution is a lot faster than others presented in this topic. It is not only using getchar_unlocked(), but also uses register, inline as well as multiplying by 10 tricky way: (*x<<1) + (*x<<3).
I wish you good luck in finding better solution.
i have a problem with reading stdin of unknown size. In fact its a table in .txt file, which i get to stdin by calling parameter '<'table.txt. My code should look like this:
#include <stdio.h>
#include <string.h>
int main(int argc,char *argv[])
{
char words[10][1024];
int i=0;
while(feof(stdin)==0)
{
fgets(words[i],100,stdin);
printf("%s", words[i]);
i++;
}
return 0;
}
but there is the problem i dont know the nuber of lines, which in this case is 10(we know the number of characters in line - 1024).
It would be great if someone know the solution. Thanks in advance.
You have hit on one of the issues that plagues all new C-programmers. How do I dynamically allocate all memory I need to free myself from static limits while still keeping track of my collection of 'stuff' in memory. This problem usually presents itself when you need to read an unknown number of 'things' from an input. The initial options are (1) declare some limit big enough to work (defeating the purpose), or (2) dynamically allocate a pointers as needed.
Obviously, the goal is (2). However, you then run into the problem of "How do I keep track of what I've allocated?" This in itself is an issue that dogs beginners. The problem being, If I dynamically allocate using a bunch of pointers, **How do I iterate over the list to get my 'stuff' back out? Also, you have to initialize some initial number of pointers (unless using an advanced data structure like a linked-list), so the next question is "what do I do when I run out?"
The usual solution is to allocate an initial set of pointers, then when the limit is reached, reallocate to twice as many as original, and keep going. (as Grayson indicated in his answer).
However, there is one more trick to iterate over the list to get your 'stuff' back out that is worth understanding. Yes, you can allocate with malloc and keep track of the number of pointers used, but you can free yourself from tying a counter to your list of pointers by initially allocating with calloc. That not only allocates space, but also sets the allocated pointers to NULL (or 0). This allows you to iterate over your list with a simple while (pointer != NULL). This provides many benefits when it comes to passing your collection of pointers to functions, etc.. The downside (a minimal one) is that you get to write a reallocation scheme that uses calloc to allocate new space when needed. (bummer, I get to get smarter -- but I have to work to do it...)
You can evaluate whether to use malloc/realloc off-the-shelf, or whether to reallocate using calloc and a custom reallocate function depending on what your requirements are. Regardless, understanding both, just adds more tools to your programming toolbox.
OK, enough jabber, where is the example in all this blather?
Both of the following examples simply read all lines from any text file and print the lines (with pointer index numbers) back to stdout. Both expect that you will provide the filename to read as the first argument on the command line. The only difference between the two is the second has the reallocation with calloc done is a custom reallocation function. They both allocate 255 pointers initially and double the number of pointers each time the limit is hit. (for fun, you can set MAXLINES to something small like 10 and force repeated reallocations to test).
first example with reallocation in main()
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#define MAXLINES 255
void free_buffer (char **buffer)
{
register int i = 0;
while (buffer[i])
{
free (buffer[i]);
i++;
}
free (buffer);
}
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file\n", argv[0]);
return 1;
}
char *line = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit number of chars to 'n', 0 no limit */
char **filebuf = NULL;
char **rtmp = NULL;
int linecnt = 0;
size_t limit = MAXLINES;
size_t newlim = 0;
FILE *ifp = fopen(argv[1],"r");
if (!ifp)
{
fprintf(stderr, "\nerror: failed to open file: '%s'\n\n", argv[1]);
return 1;
}
filebuf = calloc (MAXLINES, sizeof (*filebuf)); /* allocate MAXLINES pointers */
while ((read = getline (&line, &n, ifp)) != -1) /* read each line in file with getline */
{
if (line[read - 1] == 0xa) { line[read - 1] = 0; read--; } /* strip newline */
if (linecnt >= (limit - 1)) /* test if linecnt at limit, reallocate */
{
newlim = limit * 2; /* set new number of pointers to 2X old */
if ((rtmp = calloc (newlim, sizeof (*filebuf)))) /* calloc to set to NULL */
{
/* copy original filebuf to newly allocated rtmp */
if (memcpy (rtmp, filebuf, linecnt * sizeof (*filebuf)) == rtmp)
{
free (filebuf); /* free original filebuf */
filebuf = rtmp; /* set filebuf equal to new rtmp */
}
else
{
fprintf (stderr, "error: memcpy failed, exiting\n");
return 1;
}
}
else
{
fprintf (stderr, "error: rtmp allocation failed, exiting\n");
return 1;
}
limit = newlim; /* update limit to new limit */
}
filebuf[linecnt] = strdup (line); /* copy line (strdup allocates) */
linecnt++; /* increment linecnt */
}
fclose(ifp);
if (line) free (line); /* free memory allocated to line */
linecnt = 0; /* reset linecnt to iterate filebuf */
printf ("\nLines read in filebuf buffer:\n\n"); /* output all lines read */
while (filebuf[linecnt])
{
printf (" line[%d]: %s\n", linecnt, filebuf[linecnt]);
linecnt++;
}
printf ("\n");
free_buffer (filebuf); /* free memory allocated to filebuf */
return 0;
}
second example with reallocation in custom function
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#define MAXLINES 255
/* function to free allocated memory */
void free_buffer (char **buffer)
{
register int i = 0;
while (buffer[i])
{
free (buffer[i]);
i++;
}
free (buffer);
}
/* custom realloc using calloc/memcpy */
char **recalloc (size_t *lim, char **buf)
{
int newlim = *lim * 2;
char **tmp = NULL;
if ((tmp = calloc (newlim, sizeof (*buf))))
{
if (memcpy (tmp, buf, *lim * sizeof (*buf)) == tmp)
{
free (buf);
buf = tmp;
}
else
{
fprintf (stderr, "%s(): error, memcpy failed, exiting\n", __func__);
return NULL;
}
}
else
{
fprintf (stderr, "%s(): error, tmp allocation failed, exiting\n", __func__);
return NULL;
}
*lim = newlim;
return tmp;
}
int main (int argc, char **argv) {
if (argc < 2) {
fprintf (stderr, "Error: insufficient input. Usage: %s input_file\n", argv[0]);
return 1;
}
char *line = NULL; /* forces getline to allocate space for buf */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit number of chars to 'n', 0 no limit */
char **filebuf = NULL;
int linecnt = 0;
size_t limit = MAXLINES;
FILE *ifp = fopen(argv[1],"r");
if (!ifp)
{
fprintf(stderr, "\nerror: failed to open file: '%s'\n\n", argv[1]);
return 1;
}
filebuf = calloc (MAXLINES, sizeof (*filebuf)); /* allocate MAXLINES pointers */
while ((read = getline (&line, &n, ifp)) != -1) /* read each line in file with getline */
{
if (line[read - 1] == 0xa) { line[read - 1] = 0; read--; } /* strip newline */
if (linecnt >= (limit - 1)) /* test if linecnt at limit, reallocate */
{
filebuf = recalloc (&limit, filebuf); /* reallocate filebuf to 2X size */
if (!filebuf)
{
fprintf (stderr, "error: recalloc failed, exiting.\n");
return 1;
}
}
filebuf[linecnt] = strdup (line); /* copy line (strdup allocates) */
linecnt++; /* increment linecnt */
}
fclose(ifp);
if (line) free (line); /* free memory allocated to line */
linecnt = 0; /* reset linecnt to iterate filebuf */
printf ("\nLines read in filebuf buffer:\n\n"); /* output all lines read */
while (filebuf[linecnt])
{
printf (" line[%d]: %s\n", linecnt, filebuf[linecnt]);
linecnt++;
}
printf ("\n");
free_buffer (filebuf); /* free memory allocated to filebuf */
return 0;
}
Take a look at both examples. Know that there are many, many ways to do this. These examples just give one approach that provide example of using a few extra tricks than you will normally find. Give them a try. Drop a comment if you need more help.
I suggest that you use malloc and realloc to manage your memory. Keep track of how big your array is or how many entries it has, and call realloc to double its size whenever the array is not big enough.
Op appears to need to store the data somewhere
#define N 100000u
char BABuffer[N];
int main(int argc, char *argv[]) {
size_t lcount = 0;
size_t ccount = 0;
char words[1024 + 2];
while(fgets(words, sizeof words, stdin) != NULL) {
size_t len = strlen(words);
if (ccount + len >= N - 1) {
fputs("Too much!\n", stderr);
break;
}
memcpy(&BABuffer[ccount], words, len);
ccount += len;
lcount++;
}
BABuffer[ccount] = '\0';
printf("Read %zu lines.\n", lcount);
printf("Read %zu char.\n", ccount);
fputs(BABuffer, stdout);
return 0;
}
Note: ccount includes the end-of-line character(s).
I am trying to read a text file containing the string "a3rm5t?7!z*&gzt9v" and put all the numeric characters into a character string to later convert into an integer.
I am currently trying to do this by using sscanf on the buffer after reading the file, and then using sprintf to save all characters found using %u in a character string called str.
However, the integer that is returning when I call printf on str is different each time I run the program. What am I doing right and what am I doing wrong?
This code works when the text file contains a string like "23dog" and returns 23 but not when the string is something like 23dog2.
EDIT: I now realize that i should be putting the numeric characters in a character ARRAY rather than just one string.
int main(int argc, const char **argv)
{
int in;
char buffer[128];
char *str;
FILE *input;
in = open(argv[1], O_RDONLY);
read(in, buffer, 128);
unsigned x;
sscanf(buffer, "%u", &x);
sprintf(str,"%u\n", x);
printf("%s\n",str);
close (in);
exit(0);
}
If you simply want to filter out any non-digits from your input, you need not use scanf, sprintf and the like. Simply loop over the buffer and copy the characters that are digits.
The following program only works for a single line of input read from standard input and only if it is less than 512 characters long but it should give you the correct idea.
#include <stdio.h>
#define BUFFER_SIZE 512
int
main()
{
char buffer[BUFFER_SIZE]; /* Here we read into. */
char digits[BUFFER_SIZE]; /* Here we insert the digits. */
char * pos;
size_t i = 0;
/* Read one line of input (max BUFFER_SIZE - 1 characters). */
if (!fgets(buffer, BUFFER_SIZE, stdin))
{
perror("fgets");
return 1;
}
/* Loop over the (NUL terminated) buffer. */
for (pos = buffer; *pos; ++pos)
{
if (*pos >= '0' && *pos <= '9')
{
/* It's a digit: copy it over. */
digits[i++] = *pos;
}
}
digits[i] = '\0'; /* NUL terminate the string. */
printf("%s\n", digits);
return 0;
}
A good approach to any problem like this is to read the entire line into a buffer and then assign a pointer to the buffer. You can then use the pointer to step through the buffer reading each character and acting on it appropriately. The following is one example of this approach. getline is used to read the line from the file (it has the advantage of allocating space for buffer and returning the number of characters read). You then allocate space for the character string based on the size of buffer as returned by getline. Remember, when done, you are responsible for freeing the memory allocated by getline.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main (int argc, const char **argv)
{
char *buffer = NULL; /* forces getline to allocate required space */
ssize_t read = 0; /* number of characters read by getline */
size_t n = 0; /* limit of characters to read, (0 no limit) */
char *str = NULL; /* string to hold digits read from file */
char *p = NULL; /* ptr to use with buffer (could use buffer) */
int idx = 0; /* index for adding digits to str */
int number = 0; /* int to hold number parsed from file */
FILE *input;
/* validate input */
if (argc < 2) { printf ("Error: insufficient input. Usage: %s filename\n", argv[0]); return 1; }
/* open and validate file */
input = fopen(argv[1], "r");
if (!input) { printf ("Error: failed to open file '%s\n", argv[1]); return 1; }
/* read line from file with getline */
if ((read = getline (&buffer, &n, input)) != -1)
{
str = malloc (sizeof (char) * read); /* allocate memory for str */
p = buffer; /* set pointer to buffer */
while (*p) /* read each char in buffer */
{
if (*p > 0x2f && *p < 0x3a) /* if char is digit 0-9 */
{
str[idx] = *p; /* copy to str at idx */
idx++; /* increment idx */
}
p++; /* increment pointer */
}
str[idx] = 0; /* null-terminate str */
number = atoi (str); /* convert str to int */
printf ("\n string : %s number : %d\n\n", buffer, number);
} else {
printf ("Error: nothing read from file '%s\n", argv[1]);
return 1;
}
if (input) fclose (input); /* close input file stream */
if (buffer) free (buffer); /* free memory allocated by getline */
if (str) free (str); /* free memory allocated to str */
return 0;
}
datafile:
$ cat dat/fwintstr.dat
a3rm5t?7!z*&gzt9v
output:
$ ./bin/prsint dat/fwintstr.dat
string : a3rm5t?7!z*&gzt9v
number : 3579