DISCLAIMER: I'm new to C.
What is the best way to convert every line in a .txt file (can be other file types too) to a dinamic calloc() array, and the other way around?
In my file I have to following:
1 Hello
2 18
3 World
4 15
etc...
I want something like this in the array:
[0] Hello
[1] 18
[2] World
[3] 15
etc...
The code i have now:
FILE *file;
file = fopen("test.txt", "r");
int i = 0;
//make dynamic calloc array
//while line!= EOF
//put line[i] into array
//i++
//realloc array, size +1
fclose(file);
Is this way of doing this a good one or is there a better one?
If someone could help me a little bit with the code I would be really thankful.
You are close to the right solution, but here you are reallocing the dynamic array every time there is a new line, what you could do, is to allocate N byte in advance in the array and realloc each time with this size, this will avoid frequent memory movement of the array and sys call:
FILE *file;
file = fopen("test.txt", "r");
int i = 0;
int max = 256;
int resize = 512;
char **arr = malloc(max * sizeof(*arr));
//make dynamic calloc array
//while line!= EOF
//put line[i] into array
//i++
if(i == max)
realloc array, size + reisze;
fclose(file);
i would get the number of lines first and then allocate memory for an array to avoid realloc() call (possibly allocates a new memory block and copies memory area).
but i'm not sure that this way is more efficient.
here's my example code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static int get_nlines(FILE* fp)
{
int nlines = 0;
int ch;
while (!feof(fp)) {
ch = fgetc(fp);
if (ch == '\n') {
nlines++;
}
}
fseek(fp, 0, SEEK_SET);
return nlines;
}
static char* get_value(char* s)
{
char* pch;
pch = strtok(s, " ");
if (pch != NULL) {
pch = strtok(NULL, " ");
if (pch != NULL) {
return strdup(pch); // need to free() after use
} else {
return NULL;
}
} else {
return NULL;
}
}
int main()
{
FILE* fp = fopen("test.txt", "r");
if (fp != NULL) {
int nlines = get_nlines(fp);
printf("nlines: %d\n", nlines);
// make values..
char** values = calloc(nlines + 1, sizeof(char*)); // +1 for a sentinel
if (values != NULL) {
char line[1024];
int idx = 0;
while (fgets(line, sizeof(line), fp) != NULL) {
values[idx] = get_value(line);
idx++;
}
}
// use values..
char** p = &values[0];
while (*p != NULL) {
printf("%s\n", *p);
p++;
}
// clean values..
p = &values[0];
while (*p != NULL) {
free(*p);
p++;
}
fclose(fp);
} else {
perror("test.txt");
}
}
the result:
$ ./a.out
nlines: 4
Hello
18
World
15
Related
So I'm trying to create a function that takes in a text file, which contains a bunch of words separated by the newline character, and reads the text file into a char** array.
When I run this code in netbeans on windows, it works fine but if I run it in Linux, I get a segmentation fault error.
// globals
FILE *words_file;
char **dic;
int num_words = 0;
void read_to_array() {
words_file = fopen("words.txt", "r");
char *line = NULL;
int i = 0;
size_t len = 0;
dic = (char **)malloc(99999 * sizeof(char *));
// read dic to array
while (getline(&line, &len, words_file) != -1) {
dic[i] = (char*)malloc(len);
strcpy(dic[i], line);
// get rid of \n after word
if (dic[i][strlen(dic[i]) - 1] == '\n') {
dic[i][strlen(dic[i]) - 1] = '\0';
}
++i;
num_words++;
}
//printf("%s", dic[i][strlen(dic[i]) - 1]); //testing
fclose(words_file);
dic[i] = NULL;
}
What am I missing here?
There are some problems in your program that may cause the undefined behavior that you observe:
You do not test if the file was open successfully, causing undefined behavior if the file is not where you expect it or has a different name.
You do not limit the number of lines read into the array, causing undefined behavior if the file contains more than 99998 lines, which may be be the case in linux as /usr/share/dict/words has 139716 lines on my system, for example.
Your memory allocation scheme is suboptimal but correct: you should compute the length of the word and strip the newline before allocating the copy. As coded, you allocate too much memory. Yet you should free line before returning from read_to_array and you should avoid using global variables.
Here is a modified version:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char **read_to_array(const char *filename, int *countp) {
FILE *words_file;
char *line = NULL;
size_t line_size = 0;
char **dic = NULL;
int dic_size = 0;
int i = 0;
words_file = fopen(filename, "r");
if (words_file == NULL) {
fprintf(stderr, "cannot open dictionary file %s\n", filename);
return NULL;
}
dic_size = 99999;
dic = malloc(dic_size * sizeof(char *));
if (dic == NULL) {
fprintf(stderr, "cannot allocate dictionary array\n");
fclose(words_file);
return NULL;
}
// read dic to array
while (getline(&line, &line_size, words_file) != -1) {
size_t len = strlen(line);
/* strip the newline if any */
if (len > 0 && line[len - 1] == '\n') {
line[--len] = '\0';
}
if (i >= dic_size - 1) {
/* too many lines: should reallocate the dictionary */
fprintf(stderr, "too many lines\n");
break;
}
dic[i] = malloc(len + 1);
if (dic[i] == NULL) {
/* out of memory: report the error */
fprintf(stderr, "cannot allocate memory for line %d\n", i);
break;
}
strcpy(dic[i], line);
i++;
}
dic[i] = NULL;
*countp = i;
fclose(words_file);
free(line);
return dic;
}
int main(int argc, char **argv) {
const char *filename = (argc > 1) ? argv[1] : "words.txt";
int num_words;
char **dic = read_to_array(filename, &num_words);
if (dic != NULL) {
printf("dictionary loaded: %d lines\n", num_words);
while (num_words > 0)
free(dic[--num_words]);
free(dic);
}
return 0;
}
Output:
chqrlie> readdic /usr/share/dict/words
too many lines
dictionary loaded: 99998 lines
again. I'm new to C. Still thinking in Python terms (readlines, append them to variable) so I'm having difficulties translating that to C. This is what I want to do: open a text file for reading, store each line in an array row by row, print it out to make sure it's stored.
This is how far I've got:
int main(){
FILE * fp = fopen("sometext.txt", "r");
char text[100][100];
if(fp == NULL){
printf("File not found!");
}
else{
char aLine[20];
int row = 0;
while(fgets(aLine, 20, fp) != NULL){
printf("%s", aLine);
//strcpy(text[row], aLine); Trying to append a line (as row)
return 0;
}
Please don't start with "invest in some more time and look somewhere else because it's easy and has been answered". I'm bad at this, and I'm trying.
You can try this. Basically you need an array of arrays to store each line. You find the length of the longest line in the file and you allocate space for it. Then rewind the pointer to the start of the file and use fgets to get each line from the file and strdup to allocate space and copy the line to the respective position. Hope this helps.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char *argv[]) {
FILE * fp = fopen("sometext.txt", "r");
int maxLineSize = 0, count = 0;
char c;
while ((c = fgetc(fp)) != EOF) {
if (c == '\n' && count > maxLineSize) maxLineSize = count;
if (c == '\n') count = 0;
count++;
}
rewind(fp);
char ** lines = NULL;
char * line = calloc(maxLineSize, sizeof(char));
for (int i = 0 ; fgets(line, maxLineSize + 1, fp) != NULL ; i++) { // +1 for \0
lines = realloc(lines, (i + 1) * sizeof(char *));
line[strcspn(line, "\n")] = 0; // optional if you want to cut \n from the end of the line
lines[i] = strdup(line);
printf("%s\n", lines[i]);
memset(line, maxLineSize, '\0');
}
fclose(fp);
}
You could solve it without copy
The follow code could work:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <math.h>
int main()
{
FILE * fp = fopen("sometext.txt", "r");
if(fp == NULL){
printf("File not found!");
return -1;
}
char text[100][20];
int row = 0;
while(row < 100 && fgets(text[row], sizeof(text[0]), fp) != NULL)
++row;
for (int i= 0; i != row; ++i)
fputs(text[i], stdout);
return 0;
}
I am doing a project where I have to read in text from a file and then extract every word that is 4 characters long and allocate it into dynamic array.My approach is to create int function that will get number of 4 letter words and return that number , then create another function that will grab that number and create dynamic array consisting of that many elements. The problem with this approach is how to populate that array with words that meet the requirement.
int func1(FILE *pFile){
int counter = 0;
int words = 0;
char inputWords[length];
while(fscanf(pFile,"%s",inputWords) != EOF){
if(strlen(inputWords)==4){
#counting 4 letter words
counter++;
}
}
}
return counter;
}
int main(){
#creating pointer to a textFile
FILE *pFile = fopen("smallDictionary.txt","r");
int line = 0;
#sending pointer into a function
func1(pFile);
fclose(pFile);
return 0;
}
I would suggest reading lines of input with fgets(), and breaking each line into tokens with strtok(). As each token is found, the length can be checked, and if the token is four characters long it can be saved to an array using strdup().
In the code below, storage is allocated for pointers to char which will store the addresses of four-letter words. num_words holds the number of four-letter words found, and max_words holds the maximum number of words that can currently be stored. When a new word needs to be added, num_words is incremented, and if there is not enough storage, more space is allocated. Then strdup() is used to duplicate the token, and the address is assigned to the next pointer in words.
Note that strdup() is not in the C Standard Library, but that it is POSIX. The feature test macro in the first line of the program may be needed to enable this function. Also note that strdup() allocates memory for the duplicated string which must be freed by the caller.
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
Update
OP has mentioned that nonstandard functions are not permitted in solving this problem. Though strdup() is POSIX, and both common and standard in this sense, it is not always available. In such circumstances it is common to simply implement strdup(), as it is straightforward to do so. Here is the above code, modified so that now the function my_strdup() is used in place of strdup(). The code is unchanged, except that the feature test macro has been removed, the call to strdup() has been changed to my_strdup(), and of course now there is a function prototype and a definition for my_strdup():
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
char * my_strdup(const char *);
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = my_strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
char * my_strdup(const char *str)
{
size_t sz = strlen(str) + 1;
char *dup = malloc(sizeof *dup * sz);
if (dup) {
strcpy(dup, str);
}
return dup;
}
Final Update
OP had not posted code in the question when the above solution was written. The posted code does not compile as is. In addition to missing #includes and various syntax errors (extra braces, incorrect comment syntax) there are a couple of more significant issues. In func1(), the length variable is used uninitialized. This should be large enough so that inputWords[] can hold any expected word. Also, width specifiers should be used with %s in scanf() format strings to avoid buffer overflow. And, OP code should be checking whether the file opened successfully. Finally, func1() returns a value, but the calling function does not even assign this value to a variable.
To complete the task, the value returned from func1() should be used to declare a 2d array to store the four-letter words. The file can be rewound, but this time as fscanf() retrieves words in a loop, if a word has length 4, strcpy() is used to copy the word into the array.
Here is a modified version of OP's code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD 100
int func1(FILE *pFile){
int counter = 0;
char inputWords[MAX_WORD];
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
counter++;
}
}
return counter;
}
int main(void)
{
FILE *pFile = fopen("filename.txt","r");
if (pFile == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char inputWords[MAX_WORD];
int num_4words = func1(pFile);
char words[num_4words][MAX_WORD];
int counter = 0;
rewind(pFile);
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
strcpy(words[counter], inputWords);
counter++;
}
}
if (fclose(pFile) != 0) {
perror("Unable to close file");
}
for (int i = 0; i < num_4words; i++) {
puts(words[i]);
}
return 0;
}
Struggling to move tokens to a 2D array .
The idea is that I am reading a file with multiple lines , get the number of lines and then based on that create a 2D array to use memory wisely(I dont want to create a 100 x 3 array for no reason).
I think I got the 2D array initialized in a separate funtion but when I try to enter data read from strtok() , I am getting error :
error: 'arr' undeclared (first use in this function)
strcpy(&arr[s2][c2],token);
Here is my code :
#include <stdio.h>
#include <string.h>
int ch, lines;
int no_of_lines(char* fp)
{
while(!feof(fp)) {
ch = fgetc(fp);
if(ch == '\n') {
lines++;
}
}
lines++;
return lines;
}
void declare_space_array(int size)
{
char* arr = (char*)malloc(size * 3 * sizeof(char));
return;
}
int main(void)
{
int c2 = 0;
int s2 = 0;
int len;
// char data[10][4];
static const char filename[] = "C:\\Users\\PC\\Documents\\Assignments\\stringops\\test.txt";
FILE* file = fopen(filename, "r");
no_of_lines(file);
printf("No of lines in file = %d", lines);
printf("\n");
// Closing file because it was read once till the end of file
fclose(file);
// Opening file again to read for parsing
file = fopen(filename, "r");
declare_space_array(lines);
char* token;
if(file != NULL) {
char line[128];
while(fgets(line, sizeof line, file) != NULL)
{
len = strlen(line);
printf("%d %s", len - 1, line);
const char s = ",";
token = strtok(line, ",");
while(token != NULL) {
strcpy(arr[s2][c2], token);
// printf( "%s\n", token );
token = strtok(NULL, ",");
c2++;
}
s2++;
}
fclose(file);
} else {
perror(filename); /* why didn't the file open? */
}
for(r1 = 0; r1 < lines; r1++) {
for(c1 = 0; c1 < 3; c1++) {
printf("%s", &arr[r1][c1]);
}
}
return 0;
}
file is something like this:
A1,B1,C1
A2,B2,C2
A3,B3,C3
EXPECTED OUTPUT TO SOMETHIGN LIKE THIS:
A1
B1
C1
A2
B2
C2
A3
B3
C3
After discussion in chat, etc, you could end up with code like this. This uses a global variable arr that's a pointer to an array of arrays of 3 char * values.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int lines = 0;
static char *(*arr)[3] = 0; // global definition.
static int no_of_lines(FILE *fp)
{
lines = 0;
int ch;
while ((ch = fgetc(fp)) != EOF)
{
if (ch == '\n')
lines++;
}
return ++lines; // Allow for last line possibly not having a newline
}
static void declare_space_array(int size)
{
arr = calloc(size, 3 * sizeof(char *)); // zeroed memory allocation
if (arr == 0)
{
fprintf(stderr, "Failed to allocate memory\n");
exit(1);
}
}
int main(void)
{
int c2 = 0;
int s2 = 0;
int len;
// char data[10][4];
// static const char filename[] = "C:\\Users\\PC\\Documents\\Assignments\\stringops\\test.txt";
const char *filename = "data";
FILE *file = fopen(filename, "r");
if (file == 0)
{
fprintf(stderr, "Failed to open file '%s' for reading\n", filename);
exit(1);
}
no_of_lines(file);
printf("No of lines in file = %d\n", lines);
rewind(file);
declare_space_array(lines);
const char delims[] = ",\n";
char line[128];
while (fgets(line, sizeof line, file) != NULL)
{
char *token;
c2 = 0;
len = strlen(line);
printf("%d [%.*s]\n", len - 1, len - 1, line);
token = strtok(line, delims);
while (token != NULL)
{
arr[s2][c2] = strdup(token); // copy token (from strtok) into newly allocated string.
token = strtok(NULL, delims);
c2++;
}
s2++;
}
fclose(file);
for (int r1 = 0; r1 < lines; r1++)
{
if (arr[r1][0] != 0)
{
for (int c1 = 0; c1 < 3; c1++)
printf(" %-10s", arr[r1][c1]);
putchar('\n');
}
}
return 0;
}
It doesn't release the memory that's allocated — I got lazy.
Sample data (note that the names are longer than 2 characters and are of variable length):
server1,Phoenix,Windows
server2,Dallas,Linux
server-99,London,z/OS
Sample output:
No of lines in file = 4
23 [server1,Phoenix,Windows]
20 [server2,Dallas,Linux]
21 [server-99,London,z/OS]
server1 Phoenix Windows
server2 Dallas Linux
server-99 London z/OS
The 'number of lines in file = 4' allows for the possibility that there isn't a newline at the end of the last line. The code in the printing loop allows for the possibility that there was a newline at the end and therefore the count is an over-estimate. It would spot a memory allocation from strdup() as long as the failure was on the first field of a line. It might crash if it was the second or third field that was not successfully copied.
I have a FILE pointer that contains input from popen(). I want to put all of the input into a char *str, but I don't know how to do this (new to C-programming).
void save_cmd(int fd) {
char buf[100];
char *str;
FILE *ls;
if (NULL == (ls = popen("ls", "r"))) {
perror("popen");
exit(EXIT_FAILURE);
}
while (fgets(buf, sizeof(buf), ls) != NULL) {
//Don't know what to do here....
}
pclose(ls);
}
I guess I somehow have to concatenate inside the while loop, but how is this possible when I don't know the total size in advance (I want to save the whole result in char *str). If anyone have som pointers on how to do this I would be very grateful.
so in your code you have captured a line into the buf.
now you want to have it all in the *str variable correct.
you need to allocate memory for it and then copy. here is an example:
void save_cmd(int fd) {
char buf[100];
char *str = NULL;
char *temp = NULL;
unsigned int size = 1; // start with size of 1 to make room for null terminator
unsigned int strlength;
FILE *ls;
if (NULL == (ls = popen("ls", "r"))) {
perror("popen");
exit(EXIT_FAILURE);
}
while (fgets(buf, sizeof(buf), ls) != NULL) {
strlength = strlen(buf);
temp = realloc(str, size + strlength); // allocate room for the buf that gets appended
if (temp == NULL) {
// allocation error
} else {
str = temp;
}
strcpy(str + size - 1, buf); // append buffer to str
size += strlength;
}
pclose(ls);
}