Related
I have a hard time understanding how you process ascii files in c. I have no problem opening files and closing them or reading files with one value on each line. However, when the data is separated with characters, I really don't understand what the code is doing at a lower level.
Example: I have a file containing names separated with comas that looks like this:
"MARY","PATRICIA","LINDA","BARBARA","ELIZABETH","JENNIFER"
I have created an array to store them:
char names[6000][20];
And now, my code to process it is while (fscanf(data, "\"%s\",", names[index]) != EOF) { index++; }
The code executes for the 1st iteration and names[0] contains the whole file.
How can I separate all the names?
Here is the full code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
char names[6000][20]; // an array to store 6k names of max length 19
FILE * data = fopen("./022names.txt", "r");
int index = 0;
int nbNames;
while (fscanf(data, "\"%s\",", names[index]) != EOF) {
index++;
}
nbNames = index;
fclose(data);
printf("%d\n", index);
for (index=0; index<nbNames; index++) {
printf("%s \n", names[index]);
}
printf("\n");
return 0;
}
PS: I am thinking this might also be because of the data structure of my array.
If you want a simple solution, you can read the file character by character using fgetc. Since there are no newlines in the file, just ignore quotation marks and move to the next index when you find a comma.
char names[6000][20]; // an array to store 6k names of max length 19
FILE * data = fopen("./022names.txt", "r");
int name_count = 0, current_name_ind = 0;
int c;
while ((c = fgetc(data)) != EOF) {
if (c == ',') {
names[name_count][current_name_ind] = '\0';
current_name_ind = 0;
++name_count;
} else if (c != '"') {
names[name_count][current_name_ind] = c;
++current_name_ind;
}
}
names[name_count][current_name_ind] = '\0';
fclose(data);
"The code executes for the 1st iteration and names[0] contains the whole file...., How can I separate all the names?"
Regarding the first few statements:
char names[6000][20]; // an array to store 6k names of max length 19
FILE * data = fopen("./022names.txt", "r");
What if there are there are 6001 names. Or one of the names has more than 20 characters?
Or what if there are way less than 6000 names?
The point is that with some effort to enumerate the tasks you have listed, and some time mapping out what information is needed to create the code that matches your criteria, you can create a better product: The following is derived from your post:
Process ascii files in c
Read file content that is separated by characters
input is a comma separated file, with other delimiters as well
Choose a method best suited to parse a file of variable size
As mentioned in the comments under your question there are ways to create your algorithms in such way as to flexibly allow for extra long names, or for a variable number of names. This can be done using a few C standard functions commonly used in parsing files. ( Although fscanf() has it place, it is not the best option for parsing file contents into array elements.)
The following approach performs the following steps to accomplish the user needs enumerated above
Read file to determine number of, and longest element
Create array sized to contain exact contents of file using count of elements and longest element using variable length array (VLA)
Create function to parse file contents into array. (using this technique of passing VLA as function argument.)
Following is a complete example of how to implement each of these, while breaking the tasks into functions when appropriate...
Note, code below was tested using the following input file:
names.txt
"MARY","PATRICIA","LINDA","BARBARA","ELIZABETH","JENNIFER",
"Joseph","Bart","Daniel","Stephan","Karen","Beth","Marcia",
"Calmazzothoulumus"
.
//Prototypes
int count_names(const char *filename, size_t *count);
size_t filesize(const char *fn);
void populateNames(const char *fn, int longest, char arr[][longest]);
char *filename = ".\\names.txt";
int main(void)
{
size_t count = 0;
int longest = count_names(filename, &count);
char names[count][longest+1];//VLA - See linked info
// +1 is room for null termination
memset(names, 0, sizeof names);
populateNames(filename, longest+1, names);
return 0;
}
//populate VLA with names in file
void populateNames(const char *fn, int longest, char names[][longest])
{
char line[80] = {0};
char *delim = "\",\n ";
char *tok = NULL;
FILE * fp = fopen(fn, "r");
if(fp)
{
int i=0;
while(fgets(line, sizeof line, fp))
{
tok = strtok(line, delim);
while(tok)
{
strcpy(names[i], tok);
tok = strtok(NULL, delim);
i++;
}
}
fclose(fp);
}
}
//passes back count of tokens in file, and return longest token
int count_names(const char *filename, size_t *count)
{
int len=0, lenKeep = 0;
FILE *fp = fopen(filename, "r");
if(fp)
{
char *tok = NULL;
char *delim = "\",\n ";
int cnt = 0;
size_t fSize = filesize(filename);
char *buf = calloc(fSize, 1);
while(fgets(buf, fSize, fp)) //goes to newline for each get
{
tok = strtok(buf, delim);
while(tok)
{
cnt++;
len = strlen(tok);
if(lenKeep < len) lenKeep = len;
tok = strtok(NULL, delim);
}
}
*count = cnt;
fclose(fp);
free(buf);
}
return lenKeep;
}
//return file size in bytes (binary read)
size_t filesize(const char *fn)
{
size_t size = 0;
FILE*fp = fopen(fn, "rb");
if(fp)
{
fseek(fp, 0, SEEK_END);
size = ftell(fp);
fseek(fp, 0, SEEK_SET);
fclose(fp);
}
return size;
}
You can use the in-built strtok() function which is easy to use.
I have used the tok+1 instead of tok to omit the first " and strlen(tok) - 2 to omit the last ".
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
char names[6000][20]; // an array to store 6k names of max length 19
FILE * data = fopen("./022names.txt", "r");
int index = 0;
int nbNames;
char *str = (char*)malloc(120000*sizeof(char));
while (fscanf(data, "%s", str) != EOF) {
char *tok = strtok(str, ",");
while(tok != 0){
strncpy(names[index++], tok+1, strlen(tok)-2);
tok = strtok(0, ",");
}
}
nbNames = index;
fclose(data);
free(str); // just to free the memory occupied by the str variable in the heap.
printf("%d\n", index);
for (index=0; index<nbNames; index++) {
printf("%s \n", names[index]);
}
printf("\n");
return 0;
}
Also, the parameter 120000 is just the maximum number of characters that can be in the file. It is just 6000 * 20 as you mentioned.
I want to learn how to load multiple structures (many students: name, surname, index, address...) from a text file looking like:
Achilles, 9999
Hector, 9998
Menelaos, 9997
... and so on
Struct can be like:
struct student_t {
char *name;
int index;
}
My attempt (doesn't work; I'm not even sure if fgets+sscanf is a considerable option here):
int numStudents=3; //to simplify... I'd need a function to count num of lines, I imagine
int x, y=1000, err_code=1;
FILE *pfile = fopen("file.txt", "r");
if(pfile==0) {return 2;}
STUDENT* students = malloc(numStudents * sizeof *students);
char buffer[1024];
char *ptr[numStudents];
for (x = 0; x < numStudents; x++){ //loop for each student
students[x].name=malloc(100); //allocation of each *name field
fgets(buffer, 100, pfile); //reads 1 line containing data of 1 student, to buffer
if(x==0) *ptr[x] = strtok(buffer, ",");//cuts buffer into tokens: ptr[x] for *name
else *ptr[x] = strtok(NULL, ","); //cuts next part of buffer
sscanf(ptr[x], "%19s", students[x].name); //loads the token to struct field
*ptr[y] = strtok(NULL, ","); //cuts next part of the buffer
students[y].index = (int)strtol(ptr[y], NULL, 10); //loads int token to struct field
*buffer='\0';//resets buffer to the beginning for the next line from x++ fgets...
y++;//the idea with y=1000 is that I need another pointer to each struct field right?
}
for (x = 0; x < numStudents; x++)
printf("first name: %s, index: %d\n",students[x].name, students[x].index);
return students;
Then printf it to see what was loaded. (to simplify my real structure that has 6 fields). I know a nice method to load 1 student from user input...(How to scanf commas, but with commas not assigned to a structure? C) however to load multiple, I have this idea but I'm not sure if it's too clumsy to work or just terrybly written.
Later I'd try to sort students by name , and perhaps even try to do a realloc buffer that increases it's size along with new students being loaded to buffer... and then to sort what'd been loaded... but I imagine that first I need to load it from the file to buffer and from buffer to fill structure, to be able to sort it then?...
Thanks A LOT for all the help!
C is a little harsh. I use GNU getline below, which may be not portable, which you might end up implementing yourself. I use stdin for input FILE * just for simplicity.
The program reads the students list into the students array. Then I sort the students by comparing indexes, then by name, each time with printing out.
Your code is a bit of a mishmash - try to write a separate function for loading a single student, you don't need char ptr[students] just a single char *ptr for strtok function. strtok is a little mixy, I prefer using just strchr mutliple times. I used memcpy to just copy the name from the string and remember to null delimeter it.
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <limits.h>
struct student_s {
char *name;
int index;
};
static int students_name_cmp(const void *a, const void *b)
{
const struct student_s *s1 = a;
const struct student_s *s2 = b;
return strcmp(s1->name, s2->name);
}
static int students_index_cmp(const void *a, const void *b)
{
const struct student_s *s1 = a;
const struct student_s *s2 = b;
return s1->index - s2->index;
}
int main()
{
struct student_s *students = NULL;
size_t students_cnt = 0;
FILE *fp = stdin;
size_t read;
char *line = NULL;
size_t len = 0;
// for each line
while ((read = getline(&line, &len, fp)) != -1) {
// resize students!
students = realloc(students, (students_cnt + 1) * sizeof(*students));
// handle erros
if (students == NULL) {
fprintf(stderr, "ERROR allocating students!\n");
exit(-1);
}
// find the comma in the line
const const char * const commapos = strchr(line, ',');
if (commapos == NULL) {
fprintf(stderr, "ERROR file is badly formatted!\n");
exit(-1);
}
// student has the neme between the start to the comma adding null delimeter
const size_t namelen = (commapos - line) + 1;
// alloc memory for the name and copy it and null delimeter it
students[students_cnt].name = malloc(namelen * sizeof(char));
// handle errors
if (students[students_cnt].name == NULL) {
fprintf(stderr, "ERROR allocating students name!\n");
exit(-1);
}
memcpy(students[students_cnt].name, line, namelen - 1);
students[students_cnt].name[namelen] = '\0';
// convert the string after the comma to the number
// strtol (sadly) discards whitespaces before it, but in this case it's lucky
// we can start after the comma
errno = 0;
char *endptr;
const long int tmp = strtol(&line[namelen], &endptr, 10);
// handle strtol errors
if (errno) {
fprintf(stderr, "ERROR converting student index into number\n");
exit(-1);
}
// handle out of range values, I use INT_MIN/MAX cause index is int, no better idea, depends on application
if (tmp <= INT_MIN || INT_MAX <= tmp) {
fprintf(stderr, "ERROR index number is out of allowed range\n");
exit(-1);
}
students[students_cnt].index = tmp;
// handle the case when the line consist of any more characters then a string and a number
if (*endptr != '\n' && *endptr != '\0') {
fprintf(stderr, "ERROR there are some rabbish characters after the index!");
exit(-1);
}
// finnally, increment students count
students_cnt++;
}
if (line) {
free(line);
}
// sort by index
qsort(students, students_cnt, sizeof(*students), students_index_cmp);
// print students out sorted by index
printf("Students sorted by index:\n");
for (size_t i = 0; i < students_cnt; ++i) {
printf("student[%zu] = '%s', %d\n", i, students[i].name, students[i].index);
}
// now we have students. We can sort them.
qsort(students, students_cnt, sizeof(*students), students_name_cmp);
// print students out sorted by name
printf("Students sorted by name:\n");
for (size_t i = 0; i < students_cnt; ++i) {
printf("student[%zu] = '%s', %d\n", i, students[i].name, students[i].index);
}
// free students, lucky them!
for (size_t i = 0; i < students_cnt; ++i) {
free(students[i].name);
}
free(students);
return 0;
}
For the following input on stdin:
Achilles, 9999
Hector, 9998
Menelaos, 9997
the program outputs:
Students sorted by index:
student[0] = 'Menelaos', 9997
student[1] = 'Hector', 9998
student[2] = 'Achilles', 9999
Students sorted by name:
student[0] = 'Achilles', 9999
student[1] = 'Hector', 9998
student[2] = 'Menelaos', 9997
A test version available here on onlinegdb.
Currently working on how to split a .csv file with ",". Then creating a
2-D array to store the Alphabet and the number together. As it stands, the code below outputs: "a,,,,,,,,,,,,,,,,,,,". Also, what is the appropriate data type to declare the 2-D array since the values would be Char and int? Furthermore, I know this is a duplicate question because I've not found previous questions helpful. A simple explanation would be great and appreciated, explanation on how to split the file with this piece code would be perfect "%*[^,]" if possible. Thanks in advance.
Sample contents of the .csv file below.
A,1
B,2
C,3
.....
The program:
char single;
/* char array[26][2]; I was thinking the 2-d array would be declared like that. */
while ((single = fgetc(fpointer)) != EOF)
{
fscanf(fpointer,"%*[^,]");
printf("%c",single);
}
fclose(fpointer);
............................................................
edit code: With strtok() and fgetc()
............................................................
//char single;
char s[26] = ",";
char *token;
char str[100];
while (fgets(str,100,fpointer))
{
while((token = strtok(NULL, s)) != NULL)
{
printf(" %s\n", token);
}
}
fclose(fpointer);
typedef struct
{
char charVal;
int intVal;
}SplitValue;
SplitValue result[50];
int count = 0;
FILE *myFile = NULL;
fopen_s(&myFile, "mycsvfile.csv", "r");
char single[100];
if (myFile != NULL)
{
while (fgets(single, 100, myFile) != NULL)
{
// store the first char value
result[count].charVal = single[0];
// store the int value as string
char intval[25];
int i = 0;
for (i = 2; single[i] != '\n'; ++i)
{
intval[i - 2] = single[i];
}
intval[i-2] = 0;
// convert the string to int, either using atoi or sscanf
result[count].intVal = atoi(intval);
// get ready for the next item
count++;
}
fclose(myFile);
}
if (count)
{
for (int i = 0; i < count; ++i)
{
printf("Char value: %c and int value: %d\n", result[i].charVal, result[i].intVal);
}
}
Hope this helps!
Try the following solution, considering comments from DYZ and RoadRunner. Hope it helps somehow.
#include <stdio.h>
#include <stdlib.h>
typedef struct charIntPair {
char alpha;
int value;
} charIntPair_t;
#define MAX_ALPHABET_LENGTH 26
charIntPair_t myAlphabet[MAX_ALPHABET_LENGTH];
int alphabetLength = 0;
int main() {
FILE *fp = fopen("mycsvfile.csv","r");
if (!fp)
return 1; // File could not be opened.
char line[100];
for (alphabetLength=0; alphabetLength < MAX_ALPHABET_LENGTH && fgets(line,100,fp); alphabetLength++) {
int elementsRead = sscanf (line,"%c,%d",
&myAlphabet[alphabetLength].alpha,
&myAlphabet[alphabetLength].value);
if (elementsRead < 2) // not a valid char/int-combination?
break;
}
for (int i=0; i<alphabetLength; i++) {
printf("element %d is (%c,%d)\n", i, myAlphabet[i].alpha, myAlphabet[i].value);
}
return 0;
}
It expects that the character is the first element in a line and that it is immediately followed by a ,. The number may have spaces upfront. The following input yields the following output:
A,1
B,2
C, 3
D,15
E,17
=>
element 0 is (A,1)
element 1 is (B,2)
element 2 is (C,3)
element 3 is (D,15)
element 4 is (E,17)
I have been working on the following bit of code and am having trouble with my file handling operations.
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
int main(void)
{
int lineCount=0;
char line[200];
char *lineArray[lineCount];
char *CityName[20];
double longitudinal[10];
double latitudinal[10];
int serialno[10];
char *token;
const char j=' ';
int x=0,p=0;
FILE *file;
file=fopen("chota.txt","r");
if(file==NULL)
{
printf("file is not opened properly\n");
return -1;
}
//below one to give total number of lines.
while ((fgets(line,sizeof(line),file)) != NULL)
{
lineCount++;
}
lineArray = (char *)malloc(sizeof(char *)*lineCount);
rewind(file);
printf("The total number of cities in the file is: %d\n",(lineCount-1));
fgets(line,sizeof(line),file);//moves file pointer to beg of 2nd line
while ((fgets(line,sizeof(line),file)) != NULL)
{
lineArray[p]=malloc(strlen(line));//1st bunch of memory allocated
strcpy(lineArray[p],line);
printf("%s\n",lineArray[p]);
token = strtok(lineArray[p],j);
//printf("%s\n",token);
serialno[p]=atoi(token);
printf("%d\n",serialno[p]);
x=1;
/* walk through other tokens */
while( token != NULL )
{
//printf( " %s\n", token );
if((x%4)==1)
{
//longitudinal[p] =malloc(strlen(token));
longitudinal[p] =atof(token);
}
else if((x%4)==2)
{
//latitudinal[p]=malloc(strlen(token));
latitudinal[p]=atof(token);
}
else if((x%4)==3)
{
CityName[p] = malloc(strlen(token));
strcpy(CityName[p],token);
printf("%s\n",CityName[p]);
}
token = strtok(NULL, j);
x++;
} //end of inner while
p++;
}//end of outer while
}//end of main
The file that I am using is:
City_No Latitude Longitude City_Name
1 12.58 77.38 Bangalore
2 14.18 74.55 JogFalls
3 15.09 76.55 Bellary
4 26.48 84.33 Bettiah
5 25.37 85.13 Patna
6 19.18 84.51 Berahampur
7 20.15 85.51 Bhuvneshwar
8 25.30 90.30 Shillong
The problem is that I have been trying this for the past few days and I keep getting errors. I am not getting anywhere with debugging and cannot figure out where I am going wrong.
The array is better declared and allocated like this:
char **lineArray;.
lineArray = (char **)malloc(sizeof(char *) * (lineCount-1));
strtok take a string as second argument.
Replace const char j=' '; by const char *j=" ";
Get rid of the first strtok:
lineArray[p]=malloc(strlen(line));//1st bunch of memory allocated
strcpy(lineArray[p],line);
printf("%s\n",lineArray[p]);
//token = strtok(lineArray[p],j);
//printf("%s\n",token);
//serialno[p]=atoi(token);
//printf("%d\n",serialno[p]);
x=1;
Given a text file:
I Angelina Jolie 1 7728323
I Mel Gibson 3 7809606 7733889 7724609
I Robert Redford 2 7721170 7731959
I Jennifer Aniston 4 2188989 2189898 2181020 2183456
I Jami Gertz 4 7734404 7774012 7773023 7921492
I Brad Pitt 2 7774017 7878485
R Sylvester Stallone 0
I Victoria Principal 3 7933045 7771234 7820987
R Jennifer Aniston 0
R Sean Penn 0
I Kevin Costner 1 7874014
Q
I need to read each line, separate the values by spaces, and create structs of each one. My current code is:
int main(){
int y;
FILE *data;
char action;
char line[100];
int counter = 0;
int index = 0;
struct number{
int phoneNumber;
struct number *next;
};
struct contact{
char fName[10];
char lName[10];
struct number *start;
};
struct number numbers[50];
struct contact directory[10];
if((data=fopen("hw6data.txt", "r")) != NULL){
while(fscanf(data, "%s", line) != EOF){
char s[2] = " ";
char *token;
token = strtok(line, s);
while(token != NULL){
if(counter==0){
if(s == "I") {
if(counter==1){
strcpy(directory[index].fName, s);
}
if(counter==2){
strcpy(directory[index].lName, s);
}
}
}
token = strtok(NULL, s);
}
}
}
for(y = 0; y < 10; y++){
printf("%s ", directory[y].fName);
printf("%s\n", directory[y].lName);
}
fclose(data);
return 1;
}
I'm trying to create a struct for each phone contact. The I or R indicates whether I should insert the contact or remove it. The directory is an array that contains up to 10 contacts. I can hold a total of 50 numbers. Each contact struct holds a pointer that should point to the first number in the numbers array of number structs. I'm creating an array-based linked list. I thought this code should create the contact structs. It compiles, but when I run it I get:
��f
�
ɷ�
�E
�����
�
��
.N=�
|�X�|���^�
�
Segmentation fault
Help?
An example that parse the "I" lines and print what's was read :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(){
int y;
FILE *data;
char action;
char line[100];
int counter = 0;
int index = 0;
struct contact{
char fName[10];
char lName[10];
};
struct contact directory[10];
if((data=fopen("hw6data.txt", "r")) != NULL){
while(fgets(line,sizeof(line),data)){
char s[2] = " ";
char *token = strtok(line, s);
while(token != NULL) {
if(strcmp(token,"I")==0) {
counter = 0;
}
if(counter==1) {
strcpy(directory[index].fName, token);
}
if(counter==2) {
strcpy(directory[index].lName, token);
index++;
}
counter++;
token = strtok(NULL, s);
}
}
}
for(y = 0; y < index; y++){
printf("%s ", directory[y].fName);
printf("%s\n", directory[y].lName);
}
fclose(data);
return 1;
}
If it helps...
A few problems I can see just at a glance (not necessarily a complete list):
The line while (fscanf(data, "%s", line) != EOF) does not read in an entire line at a time (which appears to be your intent, since you named your variable line). You probably want to do while (fgets(data, 100, line) != NULL) instead.
You can't do string comparison in C as if (s == "I"). If you're just checking the first character, you can do if (s[0] == 'I') (note that single quote marks ('') are used here to denote a character literal, versus the double quote marks ("") used to denote string literals.
You have if (counter == 1) and if (counter == 2) nested inside if (counter == 0), so those conditions will never be true, unless you modify counter at some point after the if (counter == 0) and before the if (counter == 1).
counter and index are never being incremented, so your entire while loop is having no effect whatsoever on the directory array. This is why you get garbage when you try to print out its values.