reading CSV file in C and creating an matrix - arrays

I have tried to write a code for reading my CSV file and put the data in a 2D array. but when I run this code I see that all element of array are zero, kindly please help me to know what should I change?
int main(void){
int rowID = 0; //declare row index
int columnID = 0; //declare column index
int data_new[10000][5] = {{0, 0}}; //create the matrix that we want to put the data
char line[50000]; //size of array line
char token = NULL;
FILE *data_csv = fopen("data.csv", "r");
if (data_csv == NULL){
perror("unable to open the file");
exit(1);
}
while (fgets(line, 50000, data_csv))
{
columnID = 0;
rowID++;
if (rowID == 1)
{
continue;
} //to remove the headers
char *each_line = strtok(line, ", "); //this split the data by ,
each_line = strtok(NULL, ", ");
columnID++;
}
fclose(data_csv); // Close the file
for (int i = 0; i < rowID; ++i)
{
for (int j = 0; j < 5; ++j)
printf("%10.4lf", data_new[i][j]);
putchar('\n');
}
}

Related

My matrix vector multiplication is returning incorrect values

After digging deep into the internet I managed to read all the numbers in my csv to a matrix vector and also for the other single dimension vector from its related csv. The matrix csv file contains a matrix in the following format
91,86,94
12,54,88
79,58,66
The other input vector file contains the members of the one dimension vector as follows
14
20
22
So I expect the output Vector as a result of this multiplication to be for the first row as
91*14+86*20+94*22=5062
Instead of the above my C code is giving me an insane -1469150284 as the member of the first row of the resultant Vector, I suspected the initialization of the two dimension matrix at first but then even after using memset() to set all elements in the array to 0, I still get the same incorrect values.
The complete code on how I read the csvs and how I load each number into the arrays and how I multiply is provided below, help me trace the bug that is causing the multiplication error
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main() {
//define the three files for reading
FILE *matFile = fopen("test1_input_mat.csv", "r");
FILE *vecFile = fopen("test1_input_vec.csv", "r");
//we are writing to the below file
FILE *outFile = fopen("test1_out_vec.csv", "w");
//make sure the file exists
if (matFile == NULL) {
printf("%s","File does not exist");
//break and return an exit code to the operating system
return 99;
}
//define the dimensions of the matrix
int x = 3;
int y = 3;
//allocate memory to the matrix dynamically
int (*matrix_array)[x] = malloc(sizeof(int[x][y]));
//initialize all the members to zero
memset(matrix_array, 0, sizeof(matrix_array));
//read from the matFile and assign to the vector
char *r, l;
//create a buffer variable for the read file process
char buffer[255];
char line[255] = "";
char *replaced = NULL;
while (fgets(buffer, sizeof(buffer), matFile)) {
strncat(line, buffer, 255);
}
// printf("%s",line);
replaced = replaceWord(line, "\n", ",");
//printf("%s", replaced);
//now that we have the elements of the file in a line
//separated by commas
char delim[] = ",";
char *token;
//get the first token
token = strtok(replaced, delim);
//walk through other tokens
while (token != NULL) {
//parse this and add it to the array
int sub = atoi(token);
//assign the number to th array
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
matrix_array[i][j] = sub;
}
}
token = strtok(NULL, delim);
}
//allocate memory to one dimension array
int *vec = (int *)malloc(3 * sizeof(int));
//section below handles the parsing of numbers from the vector file
char vline[255] = "";
char vbuffer[255];
char concatenated[255];
char *replacing;
//read the vector file
while (fgets(vbuffer, 255, vecFile)) {
strncat(concatenated, vbuffer, sizeof(vbuffer));
}
//replace the new line characters with commas
replacing = replaceWord(concatenated, "\n", ",");
//now parse that into the one dimension vector
char *vtoken;
//get the first token
vtoken = strtok(replacing, delim);
//get the rest of the tokens
while (vtoken != NULL) {
int no = atoi(vtoken);
//append the numbers to the one dimension vector
for (int i = 0; i < 3; i++) {
vec[i] = no;
}
vtoken = strtok(NULL, delim);
}
//this is the section where we do the multiplication of the two
int *out_vec = (int *)malloc(3 * sizeof(int));
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
//assign the product of the multiplication to the right index in the vector
out_vec[j] += matrix_array[i][j] * vec[j];
}
}
//write the integers in the second to the out file
char str[255];
char fin[255];
for (int i = 0; i < 3; i++) {
printf("%d\n", out_vec[i]);
}
//close the matFile
fclose(matFile);
}
//method to replace the newline characters with commas
This code helped me replace the new line characters with commas in the single line comprised of all lines in the file
//this method replaces a string in the target string with another string
char *replaceWord(const char *s, const char *oldW,
const char *newW)
{
char *result;
int i, cnt = 0;
int newWlen = strlen(newW);
int oldWlen = strlen(oldW);
// Counting the number of times old word
// occur in the string
for (i = 0; s[i] != '\0'; i++) {
if (strstr(&s[i], oldW) == &s[i]) {
cnt++;
// Jumping to index after the old word.
i += oldWlen - 1;
}
}
// Making new string of enough length
result = (char *)malloc(i + cnt * (newWlen - oldWlen) + 1);
i = 0;
while (*s) {
// compare the substring with the result
if (strstr(s, oldW) == s) {
strcpy(&result[i], newW);
i += newWlen;
s += oldWlen;
} else
result[i++] = *s++;
}
result[i] = '\0';
return result;
}
I have tried to simplify and clean up your code as much as possible.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ROWS 3
#define COLS 3
int main(int argc, char* argv[])
{
FILE* matFile = fopen("test1_input_mat.csv", "r");
if (!matFile) {
puts("File does not exist");
return 99;
}
int (*matrix_array)[ROWS] = malloc(sizeof(int[ROWS][COLS]));
char buffer[255];
for (int row = 0; row < ROWS; row++) {
if (fgets(buffer, sizeof(buffer), matFile)) {
char *p = strtok(buffer, ",");
for (int col = 0; col < COLS; col++) {
if (!p) return 99;
matrix_array[row][col] = atoi(p);
p = strtok(NULL, ",");
}
}
}
fclose(matFile);
FILE* vecFile = fopen("test1_input_vec.csv", "r");
if (!vecFile) {
puts("File does not exist");
return 99;
}
int* vec = malloc(ROWS * sizeof(int));
for (int row = 0; row < ROWS; row++) {
if (fscanf(vecFile, "%d", &vec[row]) != 1) return 99;
}
fclose(vecFile);
int* out_vec = calloc(ROWS, sizeof(int));
for (int row = 0; row < ROWS; row++){
for (int col = 0; col < COLS; col++){
out_vec[row] += matrix_array[row][col] * vec[col];
}
}
for (int row = 0; row < ROWS; row++){
printf("%d\n", out_vec[row]);
}
free(matrix_array);
free(vec);
free(out_vec);
return 0;
}
One thing still to do is check the return values from malloc and calloc. Though it is unlikely in a small program like this, they can return NULL
Also, since you have hard-coded the size to be 3x3, there is no need for malloc/calloc.
#define ROWS 3
#define COLS 3
...
int matrix_array[ROWS][COLS];
int vec[ROWS];
int out_vec[ROWS] = {0}

How to assign subsequent columns in csv to array using strtok in C

I am trying to write a simple program that will allow the user to extract data from a csv file after specifying what rows and columns they would like to extract. The program accepts inputs for the start row (can be any number, not necessarily 1) and the final row desired. The same principle applies for the columns. The output is then assigned to a multidimensional array that will later be passed to functions for calculations.
I am having issues with using strtok to extract a column that is not the first as from my understanding, strtok has to read a file sequentially?
i have been using this project to teach myself C, so have had many iterations of this principle to develop my knowledge. I have successfully read csv files, so i understand the basic principles of strtok in csv files.
If i was using the first column to read from, then i can just use strtok as i have been;
var[i][0] = atof(strtok(buf, ","));
var[i][j] = atof(strtok(NULL, ","));
however to read from column 2 onwards, the above code still has to be used as strtok is sequential, but not assign column 0 to the variable. The only rough method i have working is to introduce a temporary variable, place all columns into that, and extract the columns specified by the use, but this is undesirable.
#include<stdio.h>
#include<string.h>
#include<stdint.h>
#include<stdlib.h>
void CSV_Col_read(int r, int c, int start_row, int start_col, float var[r][c])
{
char buf[1024];
float temp[r][c];
FILE *fp = fopen("PV_Data.csv", "r");
if(!fp)
{
printf("Could Not Open File\n");
}
int i = 0;
int index_row = 0;
while(fgets(buf, sizeof buf, fp))
{
index_row++;
if (index_row >= start_row){
if(i >= r - start_row + 1){
break;
}
if (c == 1){
var[i][0] = atof(strtok(buf, ","));
}
else if(start_col >= 2){
temp[i][0] = atof(strtok(buf, ","));
for (int j = 1; j <= c-1; j++)
{
temp[i][j] = atof(strtok(NULL, ","));
}
for (int I = -1; I <= 2; I++){
var[i][I+1] = temp[i][start_col + I];
}
}
else{
var[i][0] = atof(strtok(buf, ","));
for (int j = 1; j <= c-1; j++)
{
var[i][j] = atof(strtok(NULL, ","));
}
}
i++;
}
}
fclose(fp);
}
void printData(int r, int c, int start_row, int start_col, float var[r][c])
{
if ( c == 1){
for (int i = 1; i <= (r - start_row); i++)
{
printf("%f\n", var[i][0]);
}
}
else{
for(int i = 0; i <= (r - start_row); i++)
{
printf("%f", var[i][0]);
for(int j = 1; j <= (c - start_col); j++)
{
printf("\t");
printf("%f", var[i][j]);
}
printf("\n");
}
}
}
int main()
{
int start_row = 705;
int start_col = 3;
int r = 720;
int c = 5;
float var[r][c];
(void) CSV_Col_read(r, c, start_row, start_col, var);
printData(r, c, start_row, start_col, var);
}
the code above is not "insert correct programming word for idiot proof" however as i know i have only specfied the elseif() statement for one scenario. Is this method of a temporary variable the way to go or is there a much cleaner way to approach this? I am not asking for code to fix this and am perfectly happy with an answer that directs me to a learning resource.
Thanks
For something simple where I know the input file will be well-formed and without "empty" values, then I might be doing something like this:
float values[rows][columns] = { 0 }; // Initialize all to zero
char line[512]; // Hopefully large enough
unsigned r; // Row index
// Loop to get all rows, taking care to not go out of bounds
for (r = 0; r < rows && fgets(line, sizeof line, fp) != NULL; ++r)
{
unsigned c = 0; // Column index
// Get the first column value
char *current = strtok(line, ",");
// Loop to get all columns
while (c < columns && current != NULL)
{
values[r][c] = atof(current);
++c; // Increase to next column
current = strtok(NULL, ","); // Get next (if any) column value
}
}
// Here the whole file will have been read and parsed, and the values
// from the file will be in the array `values`.

Parsing .csv file into 2D array in C

I have a .csv file that reads like:
SKU,Plant,Qty
40000,ca56,1245
40000,ca81,12553.3
40000,ca82,125.3
45000,ca62,0
45000,ca71,3
45000,ca78,54.9
Note: This is my example but in reality this has about 500,000 rows and 3 columns.
I am trying to convert these entries into a 2D array so that I can then manipulate the data. You'll notice that in my example I just set a small 10x10 matrix A to try and get this example to work before moving on to the real thing.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const char *getfield(char *line, int num);
int main() {
FILE *stream = fopen("input/input.csv", "r");
char line[1000000];
int A[10][10];
int i, j = 0;
//Zero matrix
for (i = 0; i < 10; i++) {
for (j = 0; j < 10; j++) {
A[i][j] = 0;
}
}
for (i = 0; fgets(line, 1000000, stream); i++) {
while (j < 10) {
char *tmp = strdup(line);
A[i][j] = getfield(tmp, j);
free(tmp);
j++;
}
}
//print matrix
for (i = 0; i < 10; i++) {
for (j = 0; j < 10; j++) {
printf("%s\t", A[i][j]);
}
printf("\n");
}
}
const char *getfield(char *line, int num) {
const char *tok;
for (tok = strtok(line, ",");
tok && *tok;
tok = strtok(NULL, ",\n"))
{
if (!--num)
return tok;
}
return 0;
}
It prints only "null" errors, and it is my belief that I am making a mistake related to pointers on this line: A[i][j] = getfield(tmp, j). I'm just not really sure how to fix that.
This is work that is based almost entirely on this question: Read .CSV file in C . Any help in adapting this would be very much appreciated as it's been a couple years since I last touched C or external files.
It looks like commenters have already helped you find a few errors in your code. However, the problems are pretty entrenched. One of the biggest issues is that you're using strings. Strings are, of course, char arrays; that means that there's already a dimension in use.
It would probably be better to just use a struct like this:
struct csvTable
{
char sku[10];
char plant[10];
char qty[10];
};
That will also allow you to set your columns to the right data types (it looks like SKU could be an int, but I don't know the context).
Here's an example of that implementation. I apologize for the mess, it's adapted on the fly from something I was already working on.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Based on your estimate
// You could make this adaptive or dynamic
#define rowNum 500000
struct csvTable
{
char sku[10];
char plant[10];
char qty[10];
};
// Declare table
struct csvTable table[rowNum];
int main()
{
// Load file
FILE* fp = fopen("demo.csv", "r");
if (fp == NULL)
{
printf("Couldn't open file\n");
return 0;
}
for (int counter = 0; counter < rowNum; counter++)
{
char entry[100];
fgets(entry, 100, fp);
char *sku = strtok(entry, ",");
char *plant = strtok(NULL, ",");
char *qty = strtok(NULL, ",");
if (sku != NULL && plant != NULL && qty != NULL)
{
strcpy(table[counter].sku, sku);
strcpy(table[counter].plant, plant);
strcpy(table[counter].qty, qty);
}
else
{
strcpy(table[counter].sku, "\0");
strcpy(table[counter].plant, "\0");
strcpy(table[counter].qty, "\0");
}
}
// Prove that the process worked
for (int printCounter = 0; printCounter < rowNum; printCounter++)
{
printf("Row %d: column 1 = %s, column 2 = %s, column 3 = %s\n",
printCounter + 1, table[printCounter].sku,
table[printCounter].plant, table[printCounter].qty);
}
// Wait for keypress to exit
getchar();
}
There are multiple problems in your code:
In the second loop, you do not stop reading the file after 10 lines, so you would try and store elements beyond the end of the A array.
You do not reset j to 0 at the start of the while (j < 10) loop. j happens to have the value 10 at the end of the initialization loop, so you effectively do not store anything into the matrix.
The matrix A should be a 2D array of char *, not int, or potentially an array of structures.
Here is a simpler version with an allocated array of structures:
#include <stdio.h>
#include <stdlib.h>
typedef struct item_t {
char SKU[20];
char Plant[20];
char Qty[20];
};
int main(void) {
FILE *stream = fopen("input/input.csv", "r");
char line[200];
int size = 0, len = 0, i, c;
item_t *A = NULL;
if (stream) {
while (fgets(line, sizeof(line), stream)) {
if (len == size) {
size = size ? size * 2 : 1000;
A = realloc(A, sizeof(*A) * size);
if (A == NULL) {
fprintf(stderr, "out of memory for %d items\n", size);
return 1;
}
}
if (sscanf(line, "%19[^,\n],%19[^,\n],%19[^,\n]%c",
A[len].SKU, A[len].Plant, A[len].Qty, &c) != 4
|| c != '\n') {
fprintf(stderr, "invalid format: %s\n, line);
} else {
len++;
}
}
fclose(stream);
//print matrix
for (i = 0; i < len; i++) {
printf("%s,%s,%s\n", A[i].SKU, A[i].Plant, A[i].Qty);
}
free(A);
}
return 0;
}

Multidimensional array with unknown items

#include <stdio.h>
#include <stdlib.h>
int main() {
int *width;
int *height;
int row;
int column;
int character;
int count;
int pictureit;
double i = 0;
FILE *fp;
char file[50];
char line[25]; // assume each line has max 25 characters
printf("What file should we pull from: ");
scanf("%s", file);
//read file using File pointer
fp = fopen(file, "r");
// read the first line in the file
fgets(line, sizeof(line), fp);
width = strtok(line,"x");
height = strtok(NULL, "/0");
// read all the future lines in the file excluding the first
while (fgets(line, sizeof(line), fp)) {
row = strtok(line, ",");
column = strtok(NULL, ",");
character = strtok(NULL, ",");
count = strtok(NULL, "/0");
if(i < count) {
**printf("%s", pictureit[row][column] = character);**
i++;
}
}
fclose(fp);
return 0;
}
I'm pulling in a file with this kind of setup
75x53
0,36,.,1
0,37,M,1
1,32,.,1
1,33,:,1
1,34,A,1
1,35,M,2
1,37,O,1
1,38,:,1
2,23,.,1
2,24,:,1
2,25,A,1
2,26,M,5
I've been brainstorming for a while. How would I go about displaying this on the console? It obviously needs to go into a 2d array. The program needs to know the height and width of the array to display a space or character in that spot.
PS: This program will display a picture in the console when finished. The "** **" is where I am working.
You could dynamically allocate a 2d array with the right dimensions (according to your first line), then fill it up with the data from your file and finally print it with two nested for loops.
EDIT: Basically, you would do:
//...
//Create the dynamic array
char ** array = malloc(sizeof(char) * height);
int i;
for(i = 0; i < height; i++)
array[i] = malloc(sizeof(char) * width);
// Fill your array here (put different chars in it) ...
//Print it
int x,y;
for(y = 0; y < height; y++)
{
for(x = 0; x < width; x++)
printf("%c ", array[y][x]);
printf("\n");
}
//Free the array
for(i = 0; i < height; i++)
free(array[i]);
free(array);
I voluntarily omitted the part where you check the return value of the malloc (whether it's NULL or not), but you should definitely add it.
Normally I wouldn't do this, but I felt the need to do a scanning exercise:
int main(void)
{
char fn[100];
fprintf(stdout, "Enter file name:");
fflush(stdout);
int result = fscanf(stdin, " %99s", fn);
if (1 != result)
{
fprintf(stderr, "Reading the file's name failed.\n");
exit(EXIT_FAILURE);
}
{
size_t width= 0;
size_t height 0;
FILE * pf = fopen(fn, "r");
if (NULL == pf)
{
fprintf(stderr, "Opening file '%s' failed.\n", fn);
exit(EXIT_FAILURE);
}
{
result = fscanf(pf, " %zux%zu", &width, &height);
if (2 != result)
{
fprintf(stderr, "Reading width and/or heigth from file '%s' failed.\n", fn);
exit(EXIT_FAILURE);
}
{
char (*pa)[width][height] = calloc(1, sizeof *pa);
if (NULL == pa)
{
perror("calloc() failed");
exit(EXIT_FAILURE);
}
{
size_t number_of_rows = width * height;
fprintf(stderr, "Trying to read %zu data rows.\n", number_of_rows);
for (size_t row = 0; row < number_of_rows; ++row)
{
size_t x, y;
char c;
int i;
result = fscanf(pf, " %zu,%zu,%c,%d", &x, &y, &c, &i);
if (4 != result)
{
fprintf(stderr, "Reading data (#%zu) row from '%s' failed.\n", row, fn);
exit(EXIT_FAILURE);
}
/* Add check here to avoid accessing the array out-of-bounds! */
(*pa)[x][y] = c;
}
}
{
for (size_t row = 0; row < width; ++row)
{
for (size_t column = 0; column < height; ++column)
{
fprintf(stdout, "%c", (*pa)[row][column]);
}
fprintf(stdout, "\n");
}
}
free(pa);
}
}
fclose(pf);
}
return EXIT_SUCCESS;
}
Also I am curious about the picture to be printed ... ;-)

Reading in data from a file into an array

If I have an options file along the lines of this:
size = 4
data = 1100010100110010
And I have a 2d size * size array that I want to populate the values in data into, what's the best way of doing it?
To clarify, for the example I have I'd want an array like this:
int[4][4] array = {{1,1,0,0}, {0,1,0,1}, {0,0,1,1}, {0,0,1,0}}. (Not real code but you get the idea).
Size can be really be any number though.
I'm thinking I'd have to read in the size, maloc an array and then maybe read in a string full of data then loop through each char in the data, cast it to an int and stick it in the appropriate index? But I really have no idea how to go about it, have been searching for a while with no luck.
Any help would be cool! :)
int process_file(int **array, char const *file_name)
{
int size = 0;
FILE *file = fopen(file_name, "rt");
if(fp == null)
return -1;//can't open file
char line[1024]; //1024 just for example
if(fgets(line, 1024, file) != 0)
{
if(strncmp(line, "size = ", 7) != 0)
{
fcloes(file);
return -2; //incorrect format
}
size = atoi(line + 7);
array = new int * [size];
for(int i = 0; i < size; ++i)
array[i] = new int [size];
}
else
{
fclose(file);
return -2;//incorrect format
}
if(fgets(line, 1024, file) != 0)
{
if(strncmp(line, "data = ", 7) != 0)
{
fcloes(file);
for(int i = 0; i < size; ++i)
delete [] array[i];
delete [] array;
return -2; //incorrect format
}
for(int i = 7; line[i] != '\n' || line[i] != '\0'; ++i)
array[(i - 7) / size][(i - 7) % size] = line[i] - '0';
}
else
{
fclose(file);
for(int i = 0; i < size; ++i)
delete [] array[i];
delete [] array;
return -2; //incorrect format
}
return 0;
}
Don't forget delete array before program ends;
Loops.
FILE *fp = fopen("waaa.txt", "r");
if(fp == null) { /* bleh */ return; }
int j = 0;
while(char ch = fgetc(fp)) {
for(int i = 0; i < 4; ++i) {
array[j][i] = ch;
}
++j;
}
I am not sure with the fgetc() syntax.. Just check on it. It reads one character at a time.

Resources