Reading each line of file into array - c

I'm reading a file and want to put each line into a string in an array. The length of the file is arbitrary and the length of each line is arbitrary (albeit assume it will be less than 100 characters).
Here's what I've got and it's not compiling. Essentially this is an array to an array of characters, right? So shouldn't it be char** words = (**char)malloc(sizeof(*char));?
#include <stdio.h>
#include <stdlib.h>
int main(){
int BUFSIZE = 32767;//max number of lines to read
char** words = (**char)malloc(sizeof(*char));//gives error: expected expression before 'char'
FILE *fp = fopen("coll.txt", "r");
if (fp == 0){
fprintf(stderr, "Error opening file");
exit(1);
}
int i = 0;
words[i] = malloc(BUFSIZE);
while(fscanf(fp, "%100s", words[i]) == 1)//no line will be longer than 100
{
i++;
words[i] = realloc(words, sizeof(char*)*i);
}
int j;
for(j = 0; j < i; j++)
printf("%s\n", words);
return 0;
}
Note: I've read "Reading from a file and storing in array" but it doesn't answer my question.

There are a few issues with your program. The realloc() statement is not used correctly. I also prefer fgets() for getting a line. Here is my solution. This also uses realloc() to increase the allocation of the buffer lines so that you neither have to know the number of lines in advance nor do you have to read the file in two passes (faster that way). This is a common technique to use when you don't know how much memory you'll have to allocate in advance.
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int lines_allocated = 128;
int max_line_len = 100;
/* Allocate lines of text */
char **words = (char **)malloc(sizeof(char*)*lines_allocated);
if (words==NULL)
{
fprintf(stderr,"Out of memory (1).\n");
exit(1);
}
FILE *fp = fopen("coll.txt", "r");
if (fp == NULL)
{
fprintf(stderr,"Error opening file.\n");
exit(2);
}
int i;
for (i=0;1;i++)
{
int j;
/* Have we gone over our line allocation? */
if (i >= lines_allocated)
{
int new_size;
/* Double our allocation and re-allocate */
new_size = lines_allocated*2;
words = (char **)realloc(words,sizeof(char*)*new_size);
if (words==NULL)
{
fprintf(stderr,"Out of memory.\n");
exit(3);
}
lines_allocated = new_size;
}
/* Allocate space for the next line */
words[i] = malloc(max_line_len);
if (words[i]==NULL)
{
fprintf(stderr,"Out of memory (3).\n");
exit(4);
}
if (fgets(words[i],max_line_len-1,fp)==NULL)
break;
/* Get rid of CR or LF at end of line */
for (j=strlen(words[i])-1;j>=0 && (words[i][j]=='\n' || words[i][j]=='\r');j--)
;
words[i][j+1]='\0';
}
/* Close file */
fclose(fp);
int j;
for(j = 0; j < i; j++)
printf("%s\n", words[j]);
/* Good practice to free memory */
for (;i>=0;i--)
free(words[i]);
free(words);
return 0;
}

You should change the line:
char** words = (**char)malloc(sizeof(*char));
into this:
char** words=(char **)malloc(sizeof(char *)*Max_Lines);

Related

How to return 2d char array (char double pointer) in C?

I am reading a file that contains several lines of strings(max length 50 characters). To store those strings I created a char double-pointer using calloc. The way my code works is as it finds a line in the file it adds one new row (char *) and 50 columns (char) and then stores the value.
My understanding is that I can call this method and get this pointer with values in return. However, I was not getting the values so I check where I am losing it and I found that the memory is not persisting after while loop. I am able to print strings using print 1 statement but print 2 gives me null.
Please let me know what I am doing wrong here.
char **read_file(char *file)
{
FILE *fp = fopen(file, "r");
char line[50] = {0};
char **values = NULL;
int index = 0;
if (fp == NULL)
{
perror("Unable to open file!");
exit(1);
}
// read both sequence
while (fgets(line, 50, fp))
{
values = (char **)calloc(index + 1, sizeof(char *));
values[index] = (char *)calloc(50, sizeof(char));
values[index] = line;
printf("%s",values[index]); // print 1
index++;
}
fclose(fp);
printf("%s", values[0]); // print 2
return values;
}
line content is overwritten on each loop iteration (by fgets()).
values is overwritten (data loss) and leaks memory on each iteration index > 1.
value[index] is allocated memory on each iteration which leaks as you overwrite it with the address of line on the following line.
line is a local variable so you cannot return it to caller where it will be out of scope.
caller has no way to tell how many entries values contain.
Here is a working implementation with a few changes. On error it closes the file and frees up memory allocated and return NULL instead of exiting. Moved printf() to caller:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define BUF_LEN 50
char **read_file(char *file) {
FILE *fp = fopen(file, "r");
if(!fp) {
perror("Unable to open file!");
return NULL;
}
char **values = NULL;
char line[BUF_LEN];
unsigned index;
for(index = 0;; index++) {
char **values2 = realloc(values, (index + 1) * sizeof(char *));
if(!values2) {
perror("realloc failed");
goto err;
}
values = values2;
if(!fgets(line, BUF_LEN, fp)) break;
values[index] = strdup(line);
}
fclose(fp);
values[index] = NULL;
return values;
err:
fclose(fp);
for(unsigned i = 0; i < index; i++) {
free(values[i]);
}
free(values);
return NULL;
}
int main() {
char **values = read_file("test.txt");
for(unsigned i = 0; values[i]; i++) {
printf("%s", values[i]);
free(values[i]);
}
free(values);
return 0;
}
fgets() returns line ending in '\n' or at most BUF_LEN - 1 of data. This means a given value[i] may or may not be ending with a \n. You may want this behavior, or you want value[i] to be consistent and not contain any trailing \n irregardless of the input.
strdup() is _POSIX_C_SOURCE >= 200809L and not standard c,
so if you build with --std=c11 the symbol would not be defined.

string of undetermined length c

Hi I was trying to create an array of string of an undetermined length in c.
This is my code :
int main()
{
int lineCount=linesCount();
char text[lineCount][10];
printf("%d",lineCount);
FILE * fpointer = fopen("test.txt","r");
fgets(text,10,fpointer);
fclose(fpointer);
printf("%s",text);
return 0;
}
I would like to replace 10 in
char text[lineCount][10];
My code reads out a file I already made the amount of lines dynamic.
Since the line length is unpredictable I would like to replace 10 by a something dynamic.
Thanks in advance.
To do this cleanly, we want a char * array rather than an 2D char array:
char *text[lineCount];
And, we need to use memory from the heap to store the individual lines.
Also, don't "hardwire" so called "magic" numbers like 10. Use an enum or #define (e.g) #define MAXWID 10. Note that with the solution below, we obviate the need for using the magic number at all.
Also, note the use of sizeof(buf) below instead of a magic number.
And, we want [separate] loops when reading and printing.
Anyway, here's the refactored code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
linesCount(void)
{
return 23;
}
int
main(void)
{
int lineCount = linesCount();
char *text[lineCount];
char buf[10000];
printf("%d", lineCount);
// open file and _check_ the return
const char *file = "test.txt";
FILE *fpointer = fopen(file, "r");
if (fpointer == NULL) {
perror(file);
exit(1);
}
int i = 0;
while (fgets(buf, sizeof(buf), fpointer) != NULL) {
// strip newline
buf[strcspn(buf,"\n")] = 0;
// store line -- we must allocate this
text[i++] = strdup(buf);
}
fclose(fpointer);
for (i = 0; i < lineCount; ++i)
printf("%s\n", text[i]);
return 0;
}
UPDATE:
The above code is derived from your original code. But, it assumes that the linesCount function can predict the number of lines. And, it doesn't check against overflow of the fixed length text array.
Here is a more generalized version that will allow an arbitrary number of lines with varying line lengths:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
main(void)
{
int lineCount = 0;
char **text = NULL;
char buf[10000];
// open file and _check_ the return
const char *file = "test.txt";
FILE *fpointer = fopen(file, "r");
if (fpointer == NULL) {
perror(file);
exit(1);
}
int i = 0;
while (fgets(buf, sizeof(buf), fpointer) != NULL) {
// strip newline
buf[strcspn(buf,"\n")] = 0;
++lineCount;
// increase number of lines in array
text = realloc(text,sizeof(*text) * lineCount);
if (text == NULL) {
perror("realloc");
exit(1);
}
// store line -- we must allocate this
text[lineCount - 1] = strdup(buf);
}
fclose(fpointer);
// print the lines
for (i = 0; i < lineCount; ++i)
printf("%s\n", text[i]);
// more processing ...
// free the lines
for (i = 0; i < lineCount; ++i)
free(text[i]);
// free the list of lines
free(text);
return 0;
}

How do I compare string pointers?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void sort(char *array[1000][3], int size){
char *temp;
temp = malloc(30);
int j, i;
for(i = 0; i < size-1; i ++){
for(j = j+1; j < size; j ++){
if(strcmp(array[i][0],array[j][0]) > 0){
strcpy(temp, array[i][0]);
strcpy(array[i][0], array[j][0]);
strcpy(array[j][0], temp);
}
}
}
}
int main(){
FILE * myfile;
myfile = fopen("/public/lab4/hurricanes.csv", "r");
char line[100];
char *token;
char *array[1000][3];
int counter = 0;
if(myfile == NULL){
perror("Could not open file");
return 1;
}
while(fgets(line, 100,myfile) != NULL){
token = (char*) malloc((strlen(line)+1) * sizeof(char));
strcpy(token,line);
token = strtok(token, ",");
for(int i = 0; token != NULL; i ++){
array[counter][i] = token;
token = strtok(NULL, ",");
}
counter ++;
}
printf("%s", array[0][0]);
sort(array, counter);
printf("%s", array[0][0]);
return 0;
}
The file gives info on hurricanes with each line looking similar to this
Easy,Category 4 hurricane,5-Sep,1950
Having trouble being able to compare some of the string pointers to sort them alphabetically. Not sure if I need to use malloc to allocate some memory or what I need to do. Right now the array is staying the exact same.
Well, your program has only a few drawbacks, all of them commented in the corrected version in comments. I have edited on top of your source code and tried to conserver as much of your original code as possible, so it is coded as you did in the first place. Please, read the code and don't hesitate to make any comment you want:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* better use constants, so in case you have to change them, you
* only change them here. */
#define MAX 1000
#define NFIELDS 3
/* this is for pretty formatting error messages. */
#define F(_fmt) __FILE__":%d:%s: "_fmt, __LINE__, __func__
/* if you pass the field number used to sort, then you
* have a more flexible way to sort (any field can be used)
*/
void sort(char *array[MAX][NFIELDS], int size, int field)
{
/* space for NFIELDS pointers, no need to malloc it */
char *temp[NFIELDS];
int j, i;
for(i = 0; i < size - 1; i++) {
/* ---v--- oops!!! (there was a j here) :) */
for(j = i+1; j < size; j++) {
if(strcmp(array[i][field], array[j][field]) > 0){
/* exchange the arrays of NFIELD pointers */
memcpy(temp, array[i], sizeof temp);
memcpy(array[i], array[j], sizeof array[i]);
memcpy(array[j], temp, sizeof array[j]);
}
}
}
}
void print_array(char *array[MAX][NFIELDS], int size)
{
int i;
for (i = 0; i < size; i++) {
int j;
printf("#%d:", i);
for (j = 0; j < NFIELDS; j++) {
printf(" %s", array[i][j]);
}
printf("\n");
}
}
int main()
{
char *filename = "/public/lab4/hurricanes.csv";
/* better do the declaration and the initialization at the same time */
FILE *myfile = fopen(filename, "r");
char line[100];
char *array[MAX][NFIELDS];
int counter = 0;
if(myfile == NULL){
fprintf(stderr,
F("FOPEN: %s: %s(errno = %d)\n"),
filename, strerror(errno), errno);
return 1;
}
/* don't hesitate to use the sizeof operator below, it will save you a
* lot of nightmares */
while(fgets(line, sizeof line, myfile) != NULL) {
/* don't do anything if we don't have enough space */
if (counter >= MAX) {
fprintf(stderr, F("MAX NUMBER OF LINES EXHAUSTED (%d)\n)"), MAX);
exit(1);
}
/* NEVER, NEVER, NEVER... cast the result of malloc. See text
* below */
/* by the way, why don't use strdup(3) instead? (it does the
* allocation and the copy in one shot) */
char *token = strdup(line);
if (!token) {
fprintf(stderr, F("Not enough memory: %s (errno = %d)\n"),
strerror(errno), errno);
exit(1);
}
/* don't use strtok, because it considers a sequence of commas as
* a single delimiter, use strsep(3), that will consider ",," as
* three empty strings. By the way, be careful because in your
* sample data you have included commas in the dates. */
int i;
char *s;
/* Add also \n to the separator string, so you don't get the
* last \n included in the last field.
/* Here: ---vv--- */
for (i = 0;
(i < NFIELDS) && (s = strsep(&token, ",\n")) != NULL;
i++)
{
array[counter][i] = s;
}
counter++;
}
print_array(array, counter);
sort(array, counter, 2);
printf("\n");
print_array(array, counter);
return 0;
}
Note: Never cast the result of malloc(3) this is a legacy from ancient times, when there was no void type to allow for automatic pointer conversion. Casting malloc makes the compiler to silently comply if you forget to #include <stdlib.h> and this can make an error if pointer types are not the same size as integer types (you get an undefined behaviour on the compiler assuming by mistake that malloc() returns an int, but as you have stated so in the source, there will be no message from the compiler) Casting malloc(3) is a very bad habit, and makes you more difficult to search for errors.

How to populate Dynamic array with Strings in C

I am doing a project where I have to read in text from a file and then extract every word that is 4 characters long and allocate it into dynamic array.My approach is to create int function that will get number of 4 letter words and return that number , then create another function that will grab that number and create dynamic array consisting of that many elements. The problem with this approach is how to populate that array with words that meet the requirement.
int func1(FILE *pFile){
int counter = 0;
int words = 0;
char inputWords[length];
while(fscanf(pFile,"%s",inputWords) != EOF){
if(strlen(inputWords)==4){
#counting 4 letter words
counter++;
}
}
}
return counter;
}
int main(){
#creating pointer to a textFile
FILE *pFile = fopen("smallDictionary.txt","r");
int line = 0;
#sending pointer into a function
func1(pFile);
fclose(pFile);
return 0;
}
I would suggest reading lines of input with fgets(), and breaking each line into tokens with strtok(). As each token is found, the length can be checked, and if the token is four characters long it can be saved to an array using strdup().
In the code below, storage is allocated for pointers to char which will store the addresses of four-letter words. num_words holds the number of four-letter words found, and max_words holds the maximum number of words that can currently be stored. When a new word needs to be added, num_words is incremented, and if there is not enough storage, more space is allocated. Then strdup() is used to duplicate the token, and the address is assigned to the next pointer in words.
Note that strdup() is not in the C Standard Library, but that it is POSIX. The feature test macro in the first line of the program may be needed to enable this function. Also note that strdup() allocates memory for the duplicated string which must be freed by the caller.
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
Update
OP has mentioned that nonstandard functions are not permitted in solving this problem. Though strdup() is POSIX, and both common and standard in this sense, it is not always available. In such circumstances it is common to simply implement strdup(), as it is straightforward to do so. Here is the above code, modified so that now the function my_strdup() is used in place of strdup(). The code is unchanged, except that the feature test macro has been removed, the call to strdup() has been changed to my_strdup(), and of course now there is a function prototype and a definition for my_strdup():
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
char * my_strdup(const char *);
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = my_strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
char * my_strdup(const char *str)
{
size_t sz = strlen(str) + 1;
char *dup = malloc(sizeof *dup * sz);
if (dup) {
strcpy(dup, str);
}
return dup;
}
Final Update
OP had not posted code in the question when the above solution was written. The posted code does not compile as is. In addition to missing #includes and various syntax errors (extra braces, incorrect comment syntax) there are a couple of more significant issues. In func1(), the length variable is used uninitialized. This should be large enough so that inputWords[] can hold any expected word. Also, width specifiers should be used with %s in scanf() format strings to avoid buffer overflow. And, OP code should be checking whether the file opened successfully. Finally, func1() returns a value, but the calling function does not even assign this value to a variable.
To complete the task, the value returned from func1() should be used to declare a 2d array to store the four-letter words. The file can be rewound, but this time as fscanf() retrieves words in a loop, if a word has length 4, strcpy() is used to copy the word into the array.
Here is a modified version of OP's code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD 100
int func1(FILE *pFile){
int counter = 0;
char inputWords[MAX_WORD];
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
counter++;
}
}
return counter;
}
int main(void)
{
FILE *pFile = fopen("filename.txt","r");
if (pFile == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char inputWords[MAX_WORD];
int num_4words = func1(pFile);
char words[num_4words][MAX_WORD];
int counter = 0;
rewind(pFile);
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
strcpy(words[counter], inputWords);
counter++;
}
}
if (fclose(pFile) != 0) {
perror("Unable to close file");
}
for (int i = 0; i < num_4words; i++) {
puts(words[i]);
}
return 0;
}

Segmentation fault when writing contents of dictionary to file

The code I am working on reads in a dictionary of 45430 words and then prints to the file all the other words in the dictionary contained within each word. I am just working on getting the file MyDictionary txt file read into the char array word[45430][30] and then printing this to the words-in-words txt file. I run into a seg fault at 44946 word when I do so, but in the same while loop I am also printing to the console and all words print out properly. Why is it I am getting this seg fault for writing to the file? And why is there no seg fault writing to the console?
Code:
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <string.h>
//char ***alloc_array(int,int);
int main(void){
FILE *fr; //declare file read file pointer
FILE *fp; //declare file printed file pointer
//char array to read in up to 30 chars
char line[31];
long numwords=45430; //number of words in dictionary
int maxlength=31; // the longest string in dictionary (30 chars)
long i; //counts up to 45430
//allocate space for 45430 words at a max length of 30 chars (1 extra char for "\0")
char ***word = calloc(numwords, sizeof(char **));
for(i = 0; i != numwords; i++) {
word[i] = calloc(maxlength, sizeof(char *));
}
//Open MyDictionary.txt and determine if there is an error or not
fr = fopen ("MyDictionary.txt", "r"); // open the file for reading
if(fr==NULL){
printf("\nError! opening input file");
exit(1); //Program exits if file pointer returns NULL.
}
//Open words-within-words.txt and determine if there is an error or not
fp = fopen ("words-within-words.txt", "w"); // open the file for reading
if(fp==NULL){
printf("\nError! opening output file");
exit(1); //Program exits if file pointer returns NULL.
}
int j=0; //counts to 30 for max length
i=0;
while(fgets(line, 40, fr) != NULL){ //get a line, up to 40 chars from fr and put first . done if NULL
for(j=0;j<30;){
word[i][j]=&line[j];
j++;
}
j=0;
printf("\n%s",word[i][j]); //print out each word of dictionary to console on its own line
/*
if((i>4 && i<8)||(i>45428)){
fprintf(fp,"\nanalyze:word[i][0]=%s\tword[i][2]=%s\ti=%li",word[i][0],word[i][2],i+1);
}
*/
fprintf(fp,"%s",word[i][j]); //print out each word of dictionary to words-in-words on its own line
i++;
}
fclose(fr); //close the files prior to exiting
fclose(fp);
return 0;
} //main
char ***word = calloc(numwords, sizeof(char **));
for(i = 0; i != numwords; i++) {
word[i] = calloc(maxlength, sizeof(char *));
}
You've got one too many levels of indirection. You are storing a list of words. A word is a char *, so a list of words would be char **.
char **word = calloc(numwords, sizeof(char *));
for (i = 0; i != numwords; i++) {
word[i] = calloc(maxlength, sizeof(char));
}
This will then necessitate changes to the rest of your code. You can get rid of j entirely. This:
for(j=0;j<30;){
word[i][j]=&line[j];
j++;
}
Becomes:
strcpy(word[i], line);
And this:
j=0;
printf("\n%s",word[i][j]);
fprintf(fp,"%s",word[i][j]);
i++;
Becomes:
printf("%s\n", word[i]);
fprintf(fp, "%s\n", word[i]);
'word' should be an array of pointers, so the right type is char **, not char ***.
Each entry in the array is a pointer to a buffer of characters:
char **word = (char **)calloc(numwords, sizeof(char *));
if (!word)
// exit with error
for (i = 0; i != numwords; i++) {
word[i] = (char *)calloc(maxlength, sizeof(char)); // just allocate 31 bytes
if (!word[i])
// exit with error
}
Then a read from file can be done like this:
for (i = 0; fgets(line, 40, fr); i++) {
strncpy(word[i], line, maxlength);
printf("word %d: %s\n", i, word[i]);
}
To have one chunk of memory do allocate like this:
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int result = EXIT_SUCCESS;
size_t n = 45430;
size_t l = 30;
char (* words)[n][l + 1] = calloc(n, l + 1);
if (NULL == words)
{
result = EXIT_FAILURE;
perror("calloc() failed");
goto lblExit;
}
for (size_t i = 0; i < n; ++i)
{
strncpy((*words)[i], "test", l);
}
for (size_t i = 0; i < n; ++i)
{
printf("%zu: '%s'\n", i, (*words)[i]);
}
free(words);
lblExit:
return result;
}

Resources