How to read dataset from text file to a 2D matrix - c

I have a dataset of form
0.547,0.797,2.860,1.398,Sharp-Right-Turn
0.541,0.786,2.373,1.919,Sharp-Right-Turn
0.549,0.784,2.370,1.930,Sharp-Right-Turn
0.983,0.780,2.373,1.701,Move-Forward
0.984,0.780,2.372,1.700,Move-Forward
0.983,0.780,2.378,1.602,Move-Forward
0.983,0.780,2.381,1.701,Move-Forward
.
.
ROWS=5456, COL 5
Its easy in MATLAB to load the text file into a data matrix. But am struggling in C.
I tried this code
int main()
{
struct node {
float at1;
float at2;
float at3;
float at4;
char at5[30];
} record[ROW][COL];
FILE *file;
int i, j;
memset(record, 0, sizeof(record));
file = fopen("sensor.txt", "r");
if (file == NULL) {
printf("File does not exist!");
} else {
for (i = 0; i < ROW; ++i) {
for (j = 0; j < COL; ++j) {
fscanf(file, "%f,%f,%f,%f,%s", &record[i][j].at1, &record[i][j].at2, &record[i][j].at3, &record[i][j].at4, &record[i][j].at5);
}
}
}
fclose(file);
for (i = 0; i < ROW; ++i)
for (j = 0; j < COL; ++j) {
printf("%f\t%f\t%f\t%f\t%s\n", record[i][j].at1, record[i][j].at2, record[i][j].at3, record[i][j].at4, record[i][j].at5);
}
return 0;
}
I am getting infinite rows and 4 cols of 0.000000 only.
I want to save the first four columns in one matrix and last column as another column matrix. Could I do that?
I have to build a classifier which I easily did in MATLAB without using predefined functions but reading data in C is hampering my code.
I know this might be a repeated question, but I tried solutions in other threads, they are not working on my dataset.

First of all you have defined a record holding all your fields, that together forms each row. This means that when you read you have all values for a row so the struct dimension should be the maximum record available that is a monodimensional array of structures record.
But you cannot allocate such an huge struct on the stack, it will overflow, it's better to allocate it in dynamic memory:
struct node {
float at1;
float at2;
float at3;
float at4;
char at5[30];
} record;
struct node *record = malloc(sizeof(struct node) * MAXRECORDS);
Another error is in the scanf, the last field of the structure record is already a pointer to char, so you don't need to dereference it.
This is a working code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXRECORDS 10
int main(int argc, char *argv[])
{
struct node {
float at1;
float at2;
float at3;
float at4;
char at5[30];
};
struct node *record = malloc(sizeof(struct node) * MAXRECORDS);
FILE *file;
int nRecords = 0;
memset(record, 0, sizeof(record));
file = fopen("sensor.txt", "r");
if (file == NULL)
{
printf("File does not exist!");
}
else
{
while (EOF != fscanf(file, "%f,%f,%f,%f,%s", &record[nRecords].at1, &record[nRecords].at2,
&record[nRecords].at3, &record[nRecords].at4, record[nRecords].at5) && nRecords<MAXRECORDS)
{
nRecords++;
}
}
fclose(file);
for (int i = 0; i < nRecords; ++i)
{
printf("%f\t%f\t%f\t%f\t%s\n",
record[i].at1, record[i].at2,
record[i].at3, record[i].at4, record[i].at5);
}
return 0;
}
In a 'real' application you want dimension the array to some large enough value, and when you reach the end of the allocated space you can reallocate it for other data. This allows you to read a file of how many entries you want without knowing their number before the reading.
P.S. I added the check for maximum number of record to read. But this remain a sample, many checks are still missing i.e. I don't check value returned by malloc.

Related

I want to get integer values from three text files using multiple threads and check if these are prime number if so store them in new file

The function used here is working but as i am sending each threads to do this job the threads are overdoing or doing noofthreads time because of the for loops used for them when creating. I am out of logic here what can i do so that after succesfully loading and checking the 1159999 values from multiple text files i want to store them in seperate text file.
`
#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>
#include <string.h>
pthread_mutex_t lck;
typedef struct{
int start;
int end;
}Innerstruct;`
struct data{
FILE **fptr;
Innerstruct *inner;
int numberthreads;
int totalno;
};
int primecount=0;//to store prime numbers count
int nofiles=3;
void countlines(int *count,FILE **fptr,int nofiles){ //counts the no of lines of cordinates from the text file
int i;
*count = 0;
for(i=0;i<nofiles;i++){
fseek(fptr[i], 0, SEEK_SET);
char line[256]; // buffer to store each line
while(fgets(line, sizeof(line), fptr[i])){
if(strlen(line) > 1){ // only increment if line is not empty so that this doesn't count blank spaces (*count)++;
}
}
}
}
void *getstorecountprime(void *p){
FILE *fptr4;
fptr4 = fopen("PrimeNumber.txt","w");
if(fptr4==NULL){
printf("Error creating file");
}
struct data *d1 = p;
int i,j,k,num,start,end;
for(k=0;k<d1->numberthreads;k++){
start= d1->inner[k].start;
end = d1->inner[k].end;
for(j=start;j<=end;j++){
for(i=0;i<nofiles;i++){
fseek(d1->fptr[i], j, SEEK_SET);
fscanf(d1->fptr[i],"%d",&num);
if(checkprime(num)){
pthread_mutex_lock(&lck);
fprintf(fptr4,"%d\n",num);
primecount++;
pthread_mutex_unlock(&lck);
}
}
}
}
}
void main(){
//checking for any error on opening the files
FILE *fptr1,*fptr2,*fptr3;
// opening text files
fptr1 = fopen("PrimeData1.txt","r");
fptr2 = fopen("PrimeData2.txt","r");
fptr3 = fopen("PrimeData3.txt","r");
if(fptr1==NULL || fptr2==NULL || fptr3==NULL){
printf("Could not open all files");
exit(1);
}
pthread_mutex_init(&lck,NULL);
FILE *fptr[] = {fptr1,fptr2,fptr3}; //array of file pointer
int num_threads,sliceperthread,*numarray;
int i;
int totalcount = 0;//to store total numbers
pthread_t *id;
struct data d1;
countlines(&totalcount,fptr,nofiles);//counts the no of lines from txt files
numarray = malloc(totalcount*sizeof(int));//dma to store all datas in array
printf("Enter how many threads do you want:");
scanf("%d",&num_threads);
id = malloc(num_threads*sizeof(pthread_t));//dma thread equal to the number of thread
d1.inner = malloc(num_threads*sizeof(Innerstruct));//dma the inner structure containing
start and end
d1.fptr = malloc(nofiles*sizeof(FILE*));//dma the fptr as array of nofiles size
sliceperthread = totalcount/num_threads;
d1.fptr=fptr;//assigned the fptr of structure to array of file pointer created above
d1.numberthreads=num_threads;\
d1.totalno = totalcount;
d1.inner[0].start = 0;
d1.inner[0].end = sliceperthread;
for(i=1;i<=num_threads;i++){
d1.inner[i].start = d1.inner[i-1].end + 1;
d1.inner[i].end = d1.inner[i-1].end + sliceperthread;
}
for(i=0;i<num_threads;i++){
pthread_create(&id[i],NULL,getstorecountprime,&d1);
pthread_join(id[i],NULL);
}
printf("%d",primecount);
}
int checkprime(int n){
int i;
for(i=2;i<=n/2;i++){
if(n%i == 0){
return 0;
}else{
return 1;
}
}
}``
As you can see from the code that i tried using a nested structure for this process. I created a structure which will store file pointers array and another inner structure array having start and end so that this could be used for threads start and end point. So i used a for loop to create threads and assign them function sending the structure as parameter. I was expecting to get prime numbers in text file but because of the iteration the program just keeps on adding values to the file.
At least these problems:
No increment
(*count)++; is lost in a comment. #M Oehm
if(strlen(line) > 1){ // only increment if line is not empty so that this doesn't count blank spaces (*count)++;
}
Wrong checkprime(int n)
Hopefully with uniform formatting it is easy to see the loop only iterates, at most, once and fails to return a value when i < 2.
int checkprime(int n) {
int i;
for (i = 2; i <= n / 2; i++) {
if (n % i == 0) {
return 0;
} else {
return 1;
}
}
}
Save time, auto format code and enable all compiler warnings to rapidly identify various code problems.

how to dynamically allocate memory for a struct type array in C

so I have a program where I need to read a file and store some words from it in an array, but I want to do it so that in each array in which I store the respective words to have custom size depending on how many words I have
typedef struct {
char* keyword;
int keywordCount;
int stdev;
} keywordData;
int keywordNumber;
keywordData* keyword;
void fetchKeywords(const char* filename)
{
FILE* keywords = fopen(filename, "r");
// first number in the file is the number of keywords in the file, so i dont need to count them
fscanf(keywords,"%d", &keywordNumber);
keyword = (keywordData *) malloc(keywordNumber * sizeof(keywordData));
for(int i = 0; i < keywordNumber; i++)
{
fscanf(keywords,"%s", keyword[i].keyword);
//printf("%s\n", keyword[i].keyword);
}
}
Your code is invalid as you did not allocate memory for keyword and you scan the string into it. It invokes undefined behaviour UB.
Try not to use global variables. Use function return values and if needed pointer parameters.
#define MAXKEYWORDLENGTH 64
typedef struct {
char keyword[MAXKEYWORDLENGTH];
int keywordCount;
int stdev;
} keywordData;
keywordData *fetchKeywords(const char* filename, int *keywordNumber)
{
FILE* keywords = fopen(filename, "r");
keywordData *kd;
// first number in the file is the number of keywords in the file, so i dont need to count them
if(keywords)
if(fscanf(keywords,"%d", keywordNumber) != 1) { /* error handling*/}
kd = malloc(*keywordNumber * sizeof(*kd));
if(kd)
for(int i = 0; i < keywordNumber; i++)
{
fscanf(keywords,"%s", kd[i].keyword);
//printf("%s\n", keyword[i].keyword);
}
if(keywords) fclose(keywords);
return kd;
}
I don't have enough rep to comment, but the warning you're getting with the other answer's code is because keywordNumber is an int * whereas i is just an int, so you need to dereference keywordNumber first in the for loop (so for (int i = 0; i < *keywordNumber; i++)); this code should fix it:
#define MAXKEYWORDLENGTH 64
typedef struct {
char keyword[MAXKEYWORDLENGTH];
int keywordCount;
int stdev;
} keywordData;
keywordData *fetchKeywords(const char* filename, int *keywordNumber)
{
FILE* keywords = fopen(filename, "r");
keywordData *kd;
// first number in the file is the number of keywords in the file, so i dont need to count them
if(keywords)
if(fscanf(keywords,"%d", keywordNumber) != 1) { /* error handling*/}
kd = malloc(*keywordNumber * sizeof(*kd));
if(kd)
for(int i = 0; i < *keywordNumber; i++)
{
fscanf(keywords,"%s", kd[i].keyword);
//printf("%s\n", keyword[i].keyword);
}
if(keywords) fclose(keywords);
return kd;
}

Why is realloc giving me inconsistent behaviour?

I am currently taking a procedural programming course at my school. We are using C with C99 standard. I discussed this with my instructor and I cannot understand why realloc() is working for his machine, but it is not working for mine.
The goal of this program is to parse a text file students.txt that has students' name and their GPA formatted like this:
Mary 4.0
Jack 2.45
John 3.9
Jane 3.8
Mike 3.125
I have a function that resizes my dynamically allocated array, and when I use realloc the debugger in my CLion IDE, it gave me SIGABRT.
I tried using an online compiler and I get realloc(): invalid next size.
I have been trying to debug this all weekend and I can't find the answer and I need help.
My code is currently looking like this
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INITIAL_SIZE 4
#define BUFFER_SIZE 512
#define GRADE_CUTOFF 3.9
// ERROR CODES
#define FILE_OPEN_ERROR 1
#define MEMORY_ALLOCATION_ERROR 2
struct student {
double gpa;
char *name;
};
struct student *resizeAllocationIfNeeded(struct student *listOfStudents,
unsigned int studentCount, size_t *currentSize) {
if (studentCount <= *currentSize) {
return listOfStudents;
}
*currentSize *= 2;
struct student *resizedList = (struct student *) realloc(listOfStudents, *currentSize * sizeof(struct student));
if (resizedList == NULL) {
perror("Failed to allocate memory");
exit(MEMORY_ALLOCATION_ERROR);
}
return resizedList;
}
size_t getNamesAndGrades(FILE *file, struct student *listOfStudents, size_t size) {
unsigned int studentCount = 0;
char buffer[BUFFER_SIZE];
while(fscanf(file, "%s %lf", buffer, &listOfStudents[studentCount].gpa) > 0) {
listOfStudents[studentCount].name = strdup(buffer);
studentCount++;
listOfStudents = resizeAllocationIfNeeded(listOfStudents, studentCount, &size);
}
return studentCount;
}
void swapStudents(struct student *listOfStudents, int x, int y) {
struct student temp = listOfStudents[x];
listOfStudents[x] = listOfStudents[y];
listOfStudents[y] = temp;
}
void sortStudentsByGPA(struct student *listOfStudents, unsigned int studentCount) {
for (int i = 0; i < studentCount; i++) {
for (int j = 0; j < studentCount - i - 1; j++) {
if (listOfStudents[j].gpa < listOfStudents[j + 1].gpa) {
swapStudents(listOfStudents, j, j + 1);
}
}
}
}
void printStudentAndGPA(struct student *listOfStudents, unsigned int studentCount) {
for (int i = 0; i < studentCount; i++) {
if (listOfStudents[i].gpa > GRADE_CUTOFF) {
printf("%s %lf\n", listOfStudents[i].name, listOfStudents[i].gpa);
}
free(listOfStudents[i].name);
}
}
void topStudents(char *fileName) {
FILE *file = fopen(fileName, "r");
if (!file) {
perror("Could not open file for reading");
exit(FILE_OPEN_ERROR);
}
struct student *listOfStudents = (struct student *) malloc(INITIAL_SIZE * sizeof(struct student));
if (listOfStudents == NULL) {
perror("Failed to allocate memory");
exit(MEMORY_ALLOCATION_ERROR);
}
unsigned int studentCount = getNamesAndGrades(file, listOfStudents, INITIAL_SIZE);
sortStudentsByGPA(listOfStudents, studentCount);
printStudentAndGPA(listOfStudents, studentCount);
free(listOfStudents);
}
int main() {
topStudents("students.txt");
return 0;
}
You have a fencepost error when checking whether you need to resize the array.
Your initial allocation size is 4, which means that the highest valid index is 3.
In the loop in getNamesAndGrades(), after you read into listOfStudents[3] you increment studentCount to 4. Then you call resizeAllocationIfNeeded(listOfStudents, studentCount, &size);
Inside resizeAllocationIfNeeded(), studentCount == 4 and *currentSize == 4. So the test
if (studentCount <= *currentSize) {
return listOfStudents;
}
succeeds and you return without calling realloc().
Then the next iteration of the loop assigns to listOfStudents[4], which causes a buffer overflow.
You need to change that condition to studentCount < *currentSize.
There are two errors in your code: one is just a typo, the other is a more serious logical error.
First, you are reallocating too late, because of the condition in resizeAllocationIfNeeded(). When studentCount == currentSize, this doesn't resize (even though it should), which makes you overflow the array of students and causes problems.
You can change the condition to fix this:
if (studentCount < *currentSize) {
return listOfStudents;
}
Apart from the above, your main error is in getNamesAndGrades(), where you are reallocating memory and assigning the new pointers to a local variable. You then use that variable in topStudents() as if it was updated. This will of course not work, as the initial pointer passed by topStudents() becomes invalid after the first realloc() and memory is irrevocably lost when getNamesAndGrades() returns.
You should either pass a pointer to the student array, or better just make the function create the array for you.
Here's a solution, renaming getNamesAndGrades to getStudents:
struct student *getStudents(FILE *file, unsigned int *studentCount) {
char buffer[BUFFER_SIZE];
struct student *listOfStudents;
size_t size = INITIAL_SIZE;
*studentCount = 0;
listOfStudents = malloc(size * sizeof(struct student));
if (listOfStudents == NULL) {
perror("Failed to allocate memory");
exit(MEMORY_ALLOCATION_ERROR);
}
while(fscanf(file, "%511s %lf", buffer, &listOfStudents[*studentCount].gpa) == 2) {
listOfStudents[*studentCount].name = strdup(buffer);
(*studentCount)++;
listOfStudents = resizeAllocationIfNeeded(listOfStudents, *studentCount, &size);
}
return listOfStudents;
}
// ...
void topStudents(char *fileName) {
FILE *file = fopen(fileName, "r");
if (!file) {
perror("Could not open file for reading");
exit(FILE_OPEN_ERROR);
}
unsigned int studentCount;
struct student *listOfStudents = getStudents(file, &studentCount);
sortStudentsByGPA(listOfStudents, studentCount);
printStudentAndGPA(listOfStudents, studentCount);
free(listOfStudents);
}
int main() {
topStudents("students.txt");
return 0;
}
Additional notes:
When scanning on a fixed size buffer (in this case 512 bytes), use %511s, not just %s, that's a buffer overflow waiting to happen.
You are scanning two fields, so check if fscanf's return value is == 2, not > 0, you don't want for example one field initialized and one not.
Don't cast the result of malloc() or realloc()
For the future, if you are on Linux, compiling with gcc -g -fsanitize=address will give you detailed error reports when something goes bad in the heap, telling you exactly where memory was allocated, freed and used.

Sorting structures from files

I recently got an assignment to sort members in a struct by last name and if they are the same to sort by first name. What i have so far only reads their name and age from the file but I am not properly grapsing how I would be able to sort it. So far I gathered the data from the file but im at a loss from there. I followed a code I saw but i didnt get a proper grasping of the process so i reverted back to step one.
struct Members{
int id;
char fname[50];
char lname[50];
int age;
}bio;
int main(){
int i=0;
FILE *fptr;
file = fopen("Members Bio.txt", "r");
while ( fscanf(file, "%d%s%s%d", &bio[i].id,bio[i].fname,bio[i].lname,&bio[i].age) != EOF)
{
printf("%d %s %s %d %d\n", bio[i].id,bio[i].fname, bio[i].lname, bio[i].age);
i++;
}
fclose(fptr);
}
Can anyone help me out on this one?
Code goes something like this for your case.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Members{
int id;
char fname[50];
char lname[50];
int age;
};
typedef int (*compare_func)(void*, void*);
int struct_cmp(void* s1, void* s2)
{
int l_result = strcmp(((struct Members*) s1)->lname, \
((struct Members*) s2)->lname);
if (l_result < 0)
return 1;
else if (l_result > 0)
return 0;
else
return (strcmp(((struct Members*) s1)->fname, \
((struct Members*) s2)->fname) < 0 ? 1 : 0);
}
void sort(void* arr,long ele_size,long start,long end,compare_func compare)
{
// Generic Recursive Quick Sort Algorithm
if (start < end)
{
/* Partitioning index */
void* x = arr+end*ele_size;
long i = (start - 1);
void* tmp=malloc(ele_size);
for (long j = start; j <= end - 1; j++)
{
if ((*compare)(arr+j*ele_size,x))
{
i++;
// Swap is done by copying memory areas
memcpy(tmp,arr+i*ele_size,ele_size);
memcpy(arr+i*ele_size,arr+j*ele_size,ele_size);
memcpy(arr+j*ele_size,tmp,ele_size);
}
}
memcpy(tmp,arr+(i+1)*ele_size,ele_size);
memcpy(arr+(i+1)*ele_size,arr+end*ele_size,ele_size);
memcpy(arr+end*ele_size,tmp,ele_size);
i= (i + 1);
sort(arr,ele_size,start, i - 1,compare);
sort(arr,ele_size,i + 1, end,compare);
}
}
int main()
{
FILE* fp;
int bio_max = 3;
struct Members bio[bio_max]; // Define bio to be large enough.
/* Open FILE and setup bio matrix */
/* For testing */
bio[0].id = 0;
strcpy(bio[0].fname, "");
strcpy(bio[0].lname, "Apple");
bio[0].age = 0;
bio[1].id = 1;
strcpy(bio[1].fname, "");
strcpy(bio[1].lname, "Cat");
bio[1].age = 1;
bio[2].id = 2;
strcpy(bio[2].fname, "");
strcpy(bio[2].lname, "Bat");
bio[2].age = 2;
/* Sort the structure */
sort(bio, sizeof(struct Members), 0, bio_max - 1, struct_cmp);
/* Print the sorted structure */
for (int i = 0; i < bio_max; i++) {
printf("%d %s %s %d\n", bio[i].id, bio[i].fname, \
bio[i].lname, bio[i].age);
}
}
Output
0 Apple 0
2 Bat 2
1 Cat 1
If the strings are not sorting in the way you want, you can redefine the struct_cmp function. Code is self explanatory, the base logic in the code is pass an array and swap elements using memcpy functions. You cant use simple assignment operator if you want to be generic, so that is why the element size is explicitly passed.
Edit
The code was not handling the condition, if lname are same. I missed it thanks for #4386427 for pointing this out.
I think you should define bio to be an array. And google sort algorithms please. Also recommend you google how to use libc function qsort.

array of linked list for hash table

I want to know about bring the text file with 10 names and read it. 10 names are sorting by descending and forming a hash table with division method. I need to construct linked list of them. The hash table's index is number of 7.
I've tried on match pointer variable and made a hash table, but I can't do that. I'm in trouble with making hash table, inserting data, printing hash table and searching data(A function to find when I type a name.). I need to add more function..how do i made it?
#define SIZE 7
struct node {
char data[100][20];
struct node* next;
};
struct index {
struct node* head;
int count;
};
struct sum (data){
struct node* ptr;
int sum,i;
for (i=0; i<20; i++) {
ptr -> data[i] = ptr;
strcpy(sum,ptr);
}
return sum;
};
int hashFunction (int sum) {
return sum%SIZE;
}
void descend (data) {
int temp;
for(i=0;i<100;i++) {
for(j=0;j=20;j++) {
if (data[i][j+1]>data[i][j])
temp=data[i][j];
data[i][j]=data[i][j+1];
data[i][j+1]=temp;
}
}
}
int main (void) {
char data[100][20];
FILE *fp;
fp = fopen("data.txt","r");
for (int i=0; i<20; i++)
fscanf (fp,"%s",&data);
printf("%s\n",data);
}
fclose(fp);
hashTable = (struct index*)malloc(SIZE*sizeof(struct index));
descend(data);
return 0;
}
There are lot of bugs in the code, I'm just putting my possible observation. Firstly this
fscanf (fp,"%s",&data);
should be
fscanf (fp,"%s",&data[i]);
Secondly, here in descend() function inner loop condition part you are using j=20 which loops to run infinitely. This is where MACRO comes handy as this j=20 simply runs i.e if it could have ROW=j where ROW is 20 compiler produces meaningful error. This
void descend (data) { /* what is the tyep of data ? you should mention the data type */
int temp;
for(i=0;i<100;i++) { /* there are only 20 lines not 100 i.e it should be i<20 */
for(j=0;j=20;j++) { /* condition is wrong, you indented for j<20 but that too
wrong as there are supposed to be max 100 char in line
it should be j<100 */
if (data[i][j+1]>data[i][j]) /* condition is not correct */
temp=data[i][j];
data[i][j]=data[i][j+1];
data[i][j+1]=temp;
}
}
}
Correct version descend function can be
void descend (char (*data)[ROW], int col) { /* define ROW as macro with value 20 and pass the col i.e 100 */
int temp;
for(i=0;i < ROW; i++) {
for(j=0;j < col; j++) {
if (data[i][j] > data[i][j+1])
temp = data[i][j];
data[i][j] = data[i][j+1];
data[i][j+1] = temp;
}
}
}
Also check the return value of fopen() to check whether it was success r failed and do proper validation. For e.g
fp = fopen("data.txt","r");
if(fp == NULL) {
/* #TODO error handling */
fprintf(stderr,"file doesn't exist");
return 0;
}
To begin with, it looks like you should declare char data[20][100] instead of char data[100][20].
Then, inside the loop of 20 iterations, you should refer to data[i] instead of data:
for (int i=0; i<20; i++)
fscanf(fp,"%s",data[i]);
printf("%s\n",data[i]);
}
Keep in mind you're assuming that each line in your input file is at most 99-character long.
This doesn't answer the actual question I suppose, but you should by the least get all the above fixed.

Resources