I need to allocate memory using malloc or calloc, for a large file that looks like this:
2357 VKLYKK
7947 1WTFWZ
3102 F2IXK3
2963 EXMW55
2865 50CJES
2510 8PC1AI
There are around 10K of lines in that .txt file. How can I allocate the required memory?
What is the program supposed to do? The program has to read the whole .txt file. Sort it by the first number and send output to out.txt. But since the the input of the file is huge it won't let me.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma warning(disable : 4996)
typedef struct {
int number;
char order[10];
} Data;
int sorting(const void *a, const void *b)
{
Data *dataA = (Data *)a;
Data *dataB = (Data *)b;
// return (dataA->number - dataB->number); // Ascending order
return (dataB->number - dataA->number); // Descending order
}
int main()
{
FILE *fp;
FILE *f = fopen("out.txt", "w");
Data data[20];
char *line[150]
int i = 0;
char file_name[10] = "";
printf("enter file name: ");
scanf("%s", &file_name);
fp = fopen(file_name, "r");
if (fp == NULL)
{
printf("\n%s\" File not found!", file_name);
exit(1);
}
while (1)
{
if (fgets(line, 150, fp) == NULL)
break;
char *pch;
pch = strtok(line, " ");
data[i].number = atoi(pch);
pch = strtok(NULL, " ");
strcpy(data[i].order, pch);
i++;
}
printf("#################\n");
printf("number\torder\n");
for (int k = 0; k < 10; k++)
{
printf("%d\t%s", data[k].number, data[k].order);
}
qsort(data, 10, sizeof(Data), sorting);
printf("\n#################\n");
printf("number\torder\n");
for (int k = 0; k < 10; k++)
{
printf("%d\t%s", data[k].number, data[k].order);
fprintf(f, "%d\t%s", data[k].number, data[k].order);
}
fclose(fp);
fclose(f);
return 0;
}
If your file contains 10,000 lines or so, your while loop will quickly overrun your data array (which you declared with only 20 elements). If the number of lines is not known in advance, the best way to do this is with a growing array. Start by initialing data (and new dataSize and dataCount variables) as follows:
int dataSize = 0;
int dataCount = 0;
Data *data = NULL;
Then as you use up the space in the array, when it reaches dataSize entries you will have to grow your array. Something like this:
while (1) {
if (dataCount >= dataSize) {
Data *new;
dataSize += 1000;
new = realloc(data,dataSize * sizeof *data);
if (new == NULL) {
perror("realloc");
free(data);
return 2;
}
data = new;
}
int cnt = fscanf(fp,"%d %9s", &data[dataCount].number, data[dataCount].order);
if (cnt == EOF)
break;
if (cnt != 2) {
printf("Error reading data\n");
return 1;
}
dataCount++;
}
When the while loop finishes (if there were no errors), the data array will contain all of the data, and dataCount will be the total number of data items found.
Note that I used fscanf instead of fgets, as this eliminates the need for intermediate step like calls to atoi and strcpy. I also put in some simple error checking. I chose 1000 as the growth increment, though you can change that. But too small and it fragments the heap more rapidly, and too big requires larger amounts of memory too quickly.
this line
char* line[150];
creates an array of 150 char pointers, this is not what you want if you are reading one line like this
if (fgets(line, 150, fp) == NULL) break;
I suspect you wanted one line of 150 chars
so do
char line[150];
You can use qsort to sort the array of lines, but that may not be the best approach. It may be more effective to insert the lines into a data structure that can be easily traversed in order. Although this simple minded solution is very much less than ideal, here's a simple-minded example of inserting into a tree. This sorts the lines lexicographically; modifying it to sort numerically based on the line is a good exercise.
/* Build an (unbalanced) binary search tree of lines in input. */
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void * xrealloc(void *buf, size_t num, size_t siz, void *end);
FILE * xfopen(const char *path, const char *mode);
struct entry {
const char *line;
struct entry *node[2];
};
static struct entry *
new_node(const char *line)
{
struct entry *e = calloc(1, sizeof *e);
if( e == NULL ){
perror("calloc");
exit(EXIT_FAILURE);
}
e->line = line;
return e;
}
/*
* Note that this tree needs to be rebalanced. In a real
* project, we would use existing libraries.
*/
static struct entry *
lookup(struct entry **lines, const char *line)
{
struct entry *t = *lines;
if( t ){
int cmp = strcmp(line, t->line);
return lookup(&t->node[cmp > 0], line);
} else {
return *lines = new_node(line);
}
}
/* In-order descent of the tree, printing one line per entry */
static void
print_table(const struct entry *t)
{
if( t ){
print_table(t->node[0]);
printf("%s", t->line);
print_table(t->node[1]);
}
}
static void *
xrealloc(void *buf, size_t num, size_t siz, void *endvp)
{
char **endp = endvp;
ptrdiff_t offset = endp && *endp ? *endp - (char *)buf : 0;
buf = realloc(buf, num * siz);
if( buf == NULL ){
perror("realloc");
exit(EXIT_FAILURE);
}
if( endp != NULL ){
*endp = buf + offset;
}
return buf;
}
int
main(int argc, char **argv)
{
FILE *ifp = argc > 1 ? xfopen(argv[1], "r") : stdin;
struct entry *lines = NULL;
char *line = NULL;
size_t cap = 0;
while( getline(&line, &cap, ifp) > 0 ){
(void) lookup(&lines, line);
line = NULL;
}
print_table(lines);
}
FILE *
xfopen(const char *path, const char *mode)
{
FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
*mode == 'r' ? stdin : stdout;
if( fp == NULL ){
perror(path);
exit(EXIT_FAILURE);
}
return fp;
}
Related
I have a .csv file. Let's say the data is like this:
Location 1,Location 2,Price,Rooms,Bathrooms,CarParks,Type,Area,Furnish
Upper-East-Side,New-York,310000,3,2,0,Built-up,1000,Partly
West-Village,New-York,278000,2,2,0,Built-up,1000,Partly
Theater-District,New-York,688000,3,2,0,Built-up,1000,Partly
Expected output (alphabetized):
Theater-District
Upper-East-Side
West-Village
How can I only show and alphabetize the first column (Location 1) of the file while also skipping the header?
This is currently my code but it's still in a "read and display" form.
#include <stdio.h>
int main()
{
FILE *fh;
fh = fopen("file.csv", "r");
if (fh != NULL)
{
int line_number = 0;
char c;
while ( (c = fgetc(fh)) != EOF )
{
if(line_number > 0 || c == '\n'){
putchar(c);
}
if(c == '\n'){
line_number++;
}
}
fclose(fh);
} else printf("Error opening file.\n");
return 0;
}
csv is not a well defined format so I suggest you use an existing csv library instead of parsing the data yourself. For instance, this will not work if the first field has any embedded commas. It relies on scanf() to allocate the line, and resizes the lines array as needed. This means there are no arbitrary limits.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int strcmp2(const void *a, const void *b) {
return strcmp((const char *) a, (const char *) b);
}
int main() {
FILE *f = fopen("unsorted.csv", "r");
if(!f) return 1;
char **lines = NULL;
size_t n = 0;
for(;; n++) {
char *location1;
int rv = fscanf(f, "%m[^,]%*[^\n]\n", &location1);
if(rv != 1) break;
char **tmp = realloc(lines, (n + 1) * sizeof *tmp);
if(!tmp) return 1;
lines = tmp;
tmp[n] = location1;
}
fclose(f);
free(lines[0]); // header
qsort(&lines[1], n - 1, sizeof *lines, strcmp2);
for(size_t i = 1; i < n; i++) {
printf("%s\n", lines[i]);
free(lines[i]);
}
free(lines);
}
It produces the expected output:
Theater-District
Upper-East-Side
West-Village
So, assuming some hard limits on line length and CSV file record count, we can just use arrays.
To read a record, just use fgets(). Add each line of text to the array using the usual method.
We use a simple string search and truncate to isolate the first field. (Assuming no fancy stuff like double-quoted fields. I assume you are doing homework.)
To sort everything except the CSV header record, use qsort() with a little additional mathematics.
#include <iso646.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define unused(x) (void)(x)
#define MAX_LINE_LENGTH 100
#define MAX_RECORD_COUNT 100
int main( int argc, char ** argv )
{
unused( argc );
char records[MAX_RECORD_COUNT][MAX_LINE_LENGTH];
size_t record_count = 0;
const char * filename = argv[1];
if (!filename) return 1;
// Read our records from file
FILE * f = fopen( filename, "r" );
if (!f) return 1;
while ((record_count < MAX_RECORD_COUNT)
and fgets( records[record_count], MAX_LINE_LENGTH, f ))
record_count += 1;
fclose( f );
// Truncate the strings to just the first field
for (size_t n = 0; n < record_count; n++)
{
char * p = strchr( records[n], ',' );
if (p) *p = '\0';
}
// Sort everything but the header
if (record_count > 2) // must exist at least two records + header
qsort( records+1, record_count-1, MAX_LINE_LENGTH,
(int (*)( const void *, const void * ))strcmp );
// Print everything but the header
for (size_t n = 1; n < record_count; n++)
printf( "%s\n", records[n] );
return 0;
}
I am doing a project where I have to read in text from a file and then extract every word that is 4 characters long and allocate it into dynamic array.My approach is to create int function that will get number of 4 letter words and return that number , then create another function that will grab that number and create dynamic array consisting of that many elements. The problem with this approach is how to populate that array with words that meet the requirement.
int func1(FILE *pFile){
int counter = 0;
int words = 0;
char inputWords[length];
while(fscanf(pFile,"%s",inputWords) != EOF){
if(strlen(inputWords)==4){
#counting 4 letter words
counter++;
}
}
}
return counter;
}
int main(){
#creating pointer to a textFile
FILE *pFile = fopen("smallDictionary.txt","r");
int line = 0;
#sending pointer into a function
func1(pFile);
fclose(pFile);
return 0;
}
I would suggest reading lines of input with fgets(), and breaking each line into tokens with strtok(). As each token is found, the length can be checked, and if the token is four characters long it can be saved to an array using strdup().
In the code below, storage is allocated for pointers to char which will store the addresses of four-letter words. num_words holds the number of four-letter words found, and max_words holds the maximum number of words that can currently be stored. When a new word needs to be added, num_words is incremented, and if there is not enough storage, more space is allocated. Then strdup() is used to duplicate the token, and the address is assigned to the next pointer in words.
Note that strdup() is not in the C Standard Library, but that it is POSIX. The feature test macro in the first line of the program may be needed to enable this function. Also note that strdup() allocates memory for the duplicated string which must be freed by the caller.
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
Update
OP has mentioned that nonstandard functions are not permitted in solving this problem. Though strdup() is POSIX, and both common and standard in this sense, it is not always available. In such circumstances it is common to simply implement strdup(), as it is straightforward to do so. Here is the above code, modified so that now the function my_strdup() is used in place of strdup(). The code is unchanged, except that the feature test macro has been removed, the call to strdup() has been changed to my_strdup(), and of course now there is a function prototype and a definition for my_strdup():
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BUF_SZ 1000
#define ALLOC_INC 100
char * my_strdup(const char *);
int main(void)
{
FILE *fp = fopen("filename.txt", "r");
if (fp == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char buffer[BUF_SZ];
char **words = NULL;
size_t num_words = 0;
size_t max_words = 0;
char *token;
char *delims = " \t\r\n";
while (fgets(buffer, sizeof buffer, fp) != NULL) {
token = strtok(buffer, delims);
while (token != NULL) {
if (strlen(token) == 4) {
++num_words;
if (num_words > max_words) {
max_words += ALLOC_INC;
char **temp = realloc(words, sizeof *temp * max_words);
if (temp == NULL) {
perror("Unable to allocate memory");
exit(EXIT_FAILURE);
}
words = temp;
}
words[num_words-1] = my_strdup(token);
}
token = strtok(NULL, delims);
}
}
if (fclose(fp) != 0) {
perror("Unable to close file");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < num_words; i++) {
puts(words[i]);
}
/* Free allocated memory */
for (size_t i = 0; i < num_words; i++) {
free(words[i]);
}
free(words);
return 0;
}
char * my_strdup(const char *str)
{
size_t sz = strlen(str) + 1;
char *dup = malloc(sizeof *dup * sz);
if (dup) {
strcpy(dup, str);
}
return dup;
}
Final Update
OP had not posted code in the question when the above solution was written. The posted code does not compile as is. In addition to missing #includes and various syntax errors (extra braces, incorrect comment syntax) there are a couple of more significant issues. In func1(), the length variable is used uninitialized. This should be large enough so that inputWords[] can hold any expected word. Also, width specifiers should be used with %s in scanf() format strings to avoid buffer overflow. And, OP code should be checking whether the file opened successfully. Finally, func1() returns a value, but the calling function does not even assign this value to a variable.
To complete the task, the value returned from func1() should be used to declare a 2d array to store the four-letter words. The file can be rewound, but this time as fscanf() retrieves words in a loop, if a word has length 4, strcpy() is used to copy the word into the array.
Here is a modified version of OP's code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD 100
int func1(FILE *pFile){
int counter = 0;
char inputWords[MAX_WORD];
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
counter++;
}
}
return counter;
}
int main(void)
{
FILE *pFile = fopen("filename.txt","r");
if (pFile == NULL) {
perror("Unable to open file");
exit(EXIT_FAILURE);
}
char inputWords[MAX_WORD];
int num_4words = func1(pFile);
char words[num_4words][MAX_WORD];
int counter = 0;
rewind(pFile);
while(fscanf(pFile,"%99s",inputWords) != EOF) {
if(strlen(inputWords) == 4) {
strcpy(words[counter], inputWords);
counter++;
}
}
if (fclose(pFile) != 0) {
perror("Unable to close file");
}
for (int i = 0; i < num_4words; i++) {
puts(words[i]);
}
return 0;
}
Sample Text file:
234765 PETER
867574 SMITH
I'm trying to take the id and string from the text file and save it into a struct. The id is saving fine but the string isn't.
typedef struct student
{
int id[DATA_SIZE];
char *student[DATA_SIZE];
}studentinfo;
studentinfo list;
struct student *create_space(int size)
{
struct student *tmp = (struct student*)malloc(size*sizeof(struct student));
return(tmp);
}
struct student * readData(struct student*pointer,studentinfo v)
{
int count =0;
int tmpid;
char str[256];
FILE* in_file;
in_file = fopen("studentlist.txt","r");
while(fscanf(in_file,"%d",&tmpid)!= EOF && count<DATA_SIZE)
{
fscanf(in_file,"%s",v.student[count]);
//printf("%s\n",str );
v.id[count]=tmpid;
count++;
}
pointer =&v;
return pointer;
}
int main()
{
struct student *data;
struct student *sdata;
data = create_space(1);
sdata = readData(data,list);
//printf("%s\n",sdata->student[2] );
}
Their are a couple of issues:
fscanf() reads formatted input, and returns the number of items read.
This line:
while(fscanf(in_file,"%d",&tmpid)!= EOF && count<DATA_SIZE)
Could be this:
while (count < DATA_SIZE && fscanf(in_file, "%d %255s", &list.id[count], str) == 2) {
Which verifies that 2 values are being read on each line successfully.
You are not checking if in_file returns NULL. It's safe to do this. This goes the same for malloc().
You need to correctly create space for char *students[DATA_SIZE], as this is an array of char * pointers. Once you allocate space for this via malloc() or strdup(), then you can copy the contents into students.
Here is an example of doing such a thing:
while (count < DATA_SIZE && fscanf(in_file, "%d %255s", &list.id[count], str) == 2) {
/* allocate space for one student */
list.student[count] = malloc(strlen(str)+1);
if (!list.student[count]) {
printf("Cannot allocate string\n");
exit(EXIT_FAILURE);
}
/* copy it into array */
strcpy(list.student[count], str);
count++;
}
Here is an example that you can use to help achieve your desired result:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DATA_SIZE 256
typedef struct {
int id[DATA_SIZE];
char *student[DATA_SIZE];
} studentinfo_t;
int main(void) {
FILE *in_file;
studentinfo_t list;
char str[DATA_SIZE];
size_t count = 0;
in_file = fopen("studentlist.txt", "r");
if (!in_file) {
fprintf(stderr, "%s\n", "Error reading file");
exit(EXIT_FAILURE);
}
while (count < DATA_SIZE && fscanf(in_file, "%d %255s", &list.id[count], str) == 2) {
list.student[count] = malloc(strlen(str)+1);
if (!list.student[count]) {
printf("Cannot allocate string\n");
exit(EXIT_FAILURE);
}
strcpy(list.student[count], str);
count++;
}
for (size_t i = 0; i < count; i++) {
printf("%d %s\n", list.id[i], list.student[i]);
}
return 0;
}
I am supoosed write a function that reads two text files line by line, compare them, delete the duplicates and but them into a third file in alphabetical order...I have been working on this for over a month and I am still stuck I have tried several ways to do this and come up with nothing...I was in formed that i have to use strcmp to do this and I cant use any other predefined sorting function...I have also looked around on this site and cannot find much that helps with this...any help would be greatly appreciated..Here is what I have so far:
#include<stdio.h>
#include<string.h>
main (void)
{
char str [200];
char str2 [200];
char new [100];
char temp [100];
int row = 10;
FILE *fa = fopen ("book1.dat", "r");
FILE *fb = fopen ("book2.dat", "r");
FILE *fc = fopen ("fixed.txt", "w");
int i;
int j;
int k;
while (fgets (str, 200, fa) !=NULL && fgets (str2, 200, fb) !=NULL)
{
puts(str);
puts(str2);
if (strcmp( str, str2) ==0 )
{
strcpy (str , new);
} else {
strcpy (new, str);
strcpy (new, str2);
}
}
for ( i = 0; i < row; i++)
{
for (j = i+1; j< row; j++)
{
if(strcmp(new[i], new [j]) > 0)
{
strcpy (temp, new);
strcpy(new, new);
strcpy(new, temp);
}
}
}
for (i = 0; i < length; i ++)
{
fputs(new, fc);
}
}
Your use of strcpy() is peculiar. Recall its signature:
char *strcpy(char *dest, const char *src)
Here's a usage that doesn't make immediate sense to me:
strcpy (new, str); // new now has str
strcpy (new, str2); // new now has str2
You've effectively overwritten something there. I would start from there, and see what else may not be working as you intend. Furthermore, if you can use gcc, look into using gdb as well to debug your code. (You would need to compile with the -g flag.)
First off, can you assume the duplicates from book1 and book2 line up nicely?
Think about how you would detect if the first entry in book1 is identical to the last entry in book2.
Secondly, you have to sort your output alphabetically. Sorting algorithms is kind of one of those common things that students are forced to do all the time. It builds character. For bonus kudos, implement quick sort.
sample a way.
error handling is omitted.
since we are using the sort function of the library sqort, implement your own.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define LINE_MAX_SIZE 256
typedef struct filePos {
FILE *fp;
long pos;
} FilePos;
typedef struct myfile {
int lines;
int capacity;
FILE *fp;
FilePos *filePoss;
} MyFile;
MyFile *myfopen(const char *filepath){
char buff[LINE_MAX_SIZE];
MyFile *mfp;
mfp = (MyFile*)malloc(sizeof(MyFile));
mfp->lines = 0;
mfp->capacity=16;
mfp->filePoss=NULL;
mfp->filePoss=(FilePos*)realloc(mfp->filePoss, sizeof(FilePos)*(mfp->capacity *= 2));
mfp->fp = fopen(filepath, "r");
do{
mfp->filePoss[mfp->lines].fp = mfp->fp;
mfp->filePoss[mfp->lines].pos = ftell(mfp->fp);
if(++mfp->lines == mfp->capacity){
mfp->filePoss=(FilePos*)realloc(mfp->filePoss, sizeof(FilePos)*(mfp->capacity *= 2));
}
}while(NULL!=fgets(buff, LINE_MAX_SIZE, mfp->fp));
--mfp->lines;
return mfp;
}
void myfclose(MyFile *mfp){
free(mfp->filePoss);
fclose(mfp->fp);
free(mfp);
}
char *myfgets(FilePos *p, char *buff){
fseek(p->fp, p->pos, SEEK_SET);
return fgets(buff, LINE_MAX_SIZE, p->fp);
}
int myfcomp(const void *a, const void *b){
char buff_a[LINE_MAX_SIZE];
char buff_b[LINE_MAX_SIZE];
FilePos *fpa,*fpb;
fpa=(FilePos*)a;
fpb=(FilePos*)b;
myfgets(fpa, buff_a);
myfgets(fpb, buff_b);
return strcmp(buff_a, buff_b);
}
void myfsort(MyFile *mfp){
qsort(mfp->filePoss, mfp->lines, sizeof(FilePos), myfcomp);
}
void myfprint(MyFile *mfp){
char buff[LINE_MAX_SIZE];
int i;
for(i=0;i<mfp->lines ;++i)
printf("%s", myfgets(mfp->filePoss + i, buff));
}
void merge(const char *inpfile1, const char *inpfile2, const char *outfile){
FILE *fo;
MyFile *fi1, *fi2;
char buff_f1[LINE_MAX_SIZE];
char buff_f2[LINE_MAX_SIZE];
char buff_fo[LINE_MAX_SIZE];
char *outbuff=NULL;
int fi1_line, fi2_line;
int eof1, eof2;
fo=fopen(outfile, "w");
fi1=myfopen(inpfile1);
fi2=myfopen(inpfile2);
myfsort(fi1);
myfsort(fi2);
fi1_line=fi2_line=0;
eof1=eof2=0;
*buff_fo='\0';
while(1){
if(!eof1 && outbuff != buff_f2){
myfgets(&(fi1->filePoss[fi1_line]), buff_f1);
}
if(!eof2 && outbuff != buff_f1){
myfgets(&(fi2->filePoss[fi2_line]), buff_f2);
}
if(!eof1 && !eof2){
if(strcmp(buff_f1, buff_f2) <= 0){
outbuff=buff_f1;
++fi1_line;
} else {
outbuff=buff_f2;
++fi2_line;
}
} else if(!eof1 && eof2){
outbuff=buff_f1;
++fi1_line;
} else if(eof1 && !eof2){
outbuff=buff_f2;
++fi2_line;
} else {
break;
}
if(strcmp(outbuff, buff_fo) != 0){//duplicate check
strcpy(buff_fo, outbuff);
fputs(buff_fo, fo);
}
if(fi1->lines == fi1_line)
eof1 = !0;
if(fi2->lines == fi2_line)
eof2 = !0;
}
myfclose(fi2);
myfclose(fi1);
fclose(fo);
}
int main(){
merge("book1.txt", "book2.txt", "fixed.txt");
return 0;
}
I'm new to C; please try to help me as much as you can.
I'm getting as arguments to main() pointers to files,
so in a for loop I fopen() them and want to send them to a function that will
read the text info inside them and put it in char variables.
Here is an example file:
#station name
Station Name : A1
#octan of fuel 6.54 full service price 6.40 self service
Octan95,6.54,6.40
Octan98,8.30,8.15
#carNum,Octan,numOfLiters,Kind of service
22-334-55,95,31.3,FullService
22-334-55,95,31.3,SelfService
11-444-77,95,12,FullService
11-444-77,95,44.1,FullService
11-444-77,95,11.22,SelfService
The text has fields separated with commas, and I need the information between those commas to be added to vars.
What will be the best way or function to read these text files?
Also should I expect '\n' after each line or will it stream as one big char[] without the new line character?
read file line by line
use strtok function to get everything in between commas
read file line by line and use sscanf with return-value to get everything in between commas
Some 200 lines of code later...and using a slightly modified version of your data file (note that the second header line in the original is missing all the commas):
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
** Example data:
**
** #station name
** Station Name : A1
** #octan of fuel,full service price,self service price
** Octan95,6.54,6.40
** Octan98,8.30,8.15
** #carNum,Octan,numOfLiters,Kind of service
** 22-334-55,95,31.3,FullService
** 22-334-55,95,31.3,SelfService
** 11-444-77,95,12,FullService
** 11-444-77,95,44.1,FullService
** 11-444-77,95,11.22,SelfService
**
** - Header lines are followed by one or more data lines
** - Number of fields in header matches number of fields in each data line
** - Commas separate fields and do not appear within fields (not full CSV)
*/
/* A Line structure holds the fields for one line */
typedef struct Line
{
size_t num_fields;
char **fields;
} Line;
/* A Section structure holds the header line and the set of data lines */
typedef struct Section
{
size_t num_rows;
size_t num_cols;
Line header;
Line *lines; /* Array of lines - num_rows entries in array */
} Section;
/* An Info structure holds all the sections for a single file */
typedef struct Info
{
size_t num_sections;
Section *sections;
} Info;
static void err_exit(const char *format, ...)
{
va_list args;
va_start(args, format);
vfprintf(stderr, format, args);
va_end(args);
exit(1);
}
static void *xrealloc(void *old_data, size_t nbytes)
{
void *new_data = realloc(old_data, nbytes);
if (new_data == 0)
err_exit("Out of memory!\n");
return new_data;
}
static void *xmalloc(size_t nbytes)
{
void *new_data = malloc(nbytes);
if (new_data == 0)
err_exit("Out of memory!\n");
return new_data;
}
/* Duplicate a string of given length (excluding NUL) */
static char *xstrndup(const char *str, size_t len)
{
char *new_data = xmalloc(len+1);
memmove(new_data, str, len);
new_data[len] = '\0';
return new_data;
}
static void dump_line(FILE *fp, const Line * const line)
{
size_t i;
const char *pad = "";
for (i = 0; i < line->num_fields; i++)
{
fprintf(fp, "%s%*s", pad, 1, line->fields[i]);
pad = " ";
}
fputc('\n', fp);
}
static void dump_section(FILE *fp, const char *tag, const Section * const section)
{
if (tag != 0)
fprintf(fp, "Dump Section: %s\n", tag);
fprintf(fp, "Number of columns: %zd\n", section->num_cols);
fprintf(fp, "Number of lines: %zd\n", section->num_rows);
dump_line(fp, §ion->header);
for (size_t i = 0; i < section->num_rows; i++)
dump_line(fp, §ion->lines[i]);
}
static void dump_info(FILE *fp, const char *tag, const Info * const info)
{
size_t i;
fprintf(fp, "Dump Information: %s\n", tag);
fprintf(fp, "Number of sections: %zd\n", info->num_sections);
for (i = 0; i < info->num_sections; i++)
{
char title[20];
snprintf(title, sizeof(title), "%d", i+1);
dump_section(fp, title, &info->sections[i]);
}
fprintf(fp, "End of Information Dump\n");
}
static int num_fields(const char *buffer)
{
size_t posn = 0;
size_t next;
int count = 0;
while ((next = strcspn(buffer + posn, ",\n")) > 0)
{
count++;
if (buffer[posn+next] == '\n')
break;
posn += next + 1;
}
return count;
}
static void set_line(Line *line, int nfields, const char *buffer)
{
size_t posn = 0;
line->num_fields = nfields;
line->fields = xmalloc(nfields * sizeof(*line->fields));
for (int i = 0; i < nfields; i++)
{
size_t next = strcspn(buffer+posn, ",\n");
line->fields[i] = xstrndup(buffer+posn, next);
if (buffer[posn+next] == '\n')
{
if (i != nfields - 1)
err_exit("Internal error: field count mismatch\n");
break;
}
posn += next + 1;
}
}
static int add_section(Info *info, char *buffer)
{
int nfields = num_fields(buffer);
int nsections = info->num_sections + 1;
info->sections = xrealloc(info->sections, nsections * sizeof(*info->sections));
info->num_sections = nsections;
Section *new_section = &info->sections[nsections-1];
new_section->num_cols = nfields;
new_section->num_rows = 0;
set_line(&new_section->header, nfields, buffer);
new_section->lines = 0;
return nfields;
}
/* Beware - very compact code! */
static void add_line_to_section(Section *section, const char *buffer, int nfields)
{
section->lines = xrealloc(section->lines, (section->num_rows + 1) * sizeof(*section->lines));
set_line(§ion->lines[section->num_rows++], nfields, buffer);
}
static int peek(FILE *fp)
{
int c;
if ((c = getc(fp)) != EOF)
ungetc(c, fp);
return c;
}
static void read_info(FILE *fp, Info *info)
{
char buffer[1024];
while (fgets(buffer, sizeof(buffer), fp) != 0)
{
if (*buffer != '#')
err_exit("Format error: expected line beginning '#' (got '%.*s')\n",
10, buffer);
int nfields = add_section(info, buffer+1);
int c;
Section *cursect = &info->sections[info->num_sections-1];
while ((c = peek(fp)) != EOF && c != '#')
{
if (fgets(buffer, sizeof(buffer), fp) != 0)
{
int lfields = num_fields(buffer);
if (lfields != nfields)
err_exit("Mismatch in number of fields (got %d, wanted %) at '%*s'\n",
lfields, nfields, 20, buffer);
add_line_to_section(cursect, buffer, nfields);
}
}
}
}
int main(int argc, char **argv)
{
int i;
Info info = { 0, 0 };
for (i = 1; i < argc; i++)
{
FILE *fp;
if ((fp = fopen(argv[i], "r")) != 0)
{
read_info(fp, &info);
dump_info(stdout, "After loop", &info);
}
else
fprintf(stderr, "Failed to open file %s (%s)\n", argv[i], strerror(errno));
}
dump_info(stdout, "End of main loop", &info);
return 0;
}
The code is not optimal in most senses - it allocates far too many small bits of memory. I also got lazy and didn't write the code to free the memory. I don't think it would be a good idea to hand this in as your code, though.