Debugging csv read in c - c

I'm having some trouble with a project I'm working on for my master thesis. I got some help from a user here a while back, but the code doesn't quite seem to be working. My application crashes and saves a dump-file, but my debugging skills are quite limited so I'm not sure what is causing the crash. The code that I have that I believe is causing the crash looks like this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdbool.h>
#include "flames.h"
#include "fmodel.h"
typedef struct row_tag
{
int index;
double *data;
} row_t;
size_t get_col_count(FILE *is)
{
size_t col_count = 1;
int ch;
while ((ch = fgetc(is)) != EOF && ch != '\n')
if (ch == ',')
++col_count;
rewind(is);
return col_count;
}
row_t* csv_read(FILE *is, size_t *cols, size_t *rows)
{
*cols = get_col_count(is);
*rows = 0;
char const *origin_format = "%*[^ ,]%c";
char const *row_header_format = "%d%c";
char const *format = "%lf%c";
row_t *csv = NULL;
bool valid = true;
for (size_t current_row = 0; valid; ++current_row) {
csv = (row_t*)realloc(csv, (current_row + 1)* sizeof(row_t));
csv[current_row].data = (double*)calloc((size_t)cols - 1, sizeof(double));
for (size_t current_col = 0; valid && current_col < (size_t)cols; ++current_col) {
char delim;
if (!current_col && !current_row) {
if (fscanf(is, origin_format, &delim) != 1 || delim != ',') {
valid = false;
//continue;
}
csv[0].index = -1;
}
else if (!current_col) {
int result = -1;
if ((result = fscanf(is, row_header_format, &csv[current_row].index, &delim)) != 2 || delim != ',') {
valid = false;
continue;
}
}
else {
if (fscanf(is, format, &csv[current_row].data[current_col - 1], &delim) != 2 || delim != ',' && delim != '\n')
valid = false;
}
}
if (!valid)
free(csv[current_row].data);
else *rows = current_row + 1;
}
return csv;
}
void csv_free(row_t *csv, size_t rows)
{
for (size_t row = 0; row < rows; ++row)
free(csv[row].data);
free(csv);
}
double csv_get_value(row_t *csv, int col_index, size_t cols, int row_index, size_t rows)
{
size_t col;
for (col = 1; csv[0].data[col] != col_index && col < cols; ++col);
if (col >= cols || csv[0].data[col] != col_index)
return 0.;
size_t row;
for (row = 1; csv[row].index != row_index && row < rows; ++row);
if (row >= rows || csv[row].index != row_index)
return 0.;
return csv[row].data[col];
}
And then in my main function: (note that FMHPrint is just a printing function in the application I'm working in called FLAMES, therefore the includes aswell)
char const *filename = "Dampening.csv";
FILE *is = fopen(filename, "r");
if (!is) {
FMHPrint(0, 0, "Couldnt open \%s\ for reading!\n\n", filename);
return (FFAILURE);
}
size_t cols;
size_t rows;
row_t *csv = csv_read(is, &cols, &rows);
FMHPrint(0, 0, "Cols: %zu\n", cols);
FMHPrint(0, 0, "Rows: %zu\n", rows);
fclose(is);
for (size_t y = 0; y < rows; ++y) {
printf("%2d: ", csv[y].index);
for (size_t x = 0; x < cols - 1; ++x)
printf("%f ", csv[y].data[x]);
putchar('\n');
}
double value = csv_get_value(csv, 550, cols, 7, rows);
FMHPrint(0, 0, "Dampening value is: %f", value);
csv_free(csv, rows);
I have no idea why FLAMES keeps crashing, and the DMP-file isn't very helpful for me. Can anyone explain what is wrong? :)
Regards, Anders

You pass cols and rows as pointer to the reading function, so that you can update the variables in the calling function. When you allocate memory for each row:
csv[current_row].data = (double*)calloc((size_t)cols - 1, sizeof(double));
you use cols, which is a pointer. You probably got a compiler warning, so you decided to cast the value to size_t, but that doesn't solve the problem, it only makes the warning go away.
The value you want is where the pointer points to, so you must dereference it:
csv[current_row].data = calloc(*cols - 1, sizeof(double));

Related

How to print empty value for element without an integer value in C

So far I have the program does what it needs to do. My issue now is that I have two arrays and when I print them I get 0s for empty elements. I want the empty elements to print nothing.
Example:
Array 1:
1 1
Array 2:
3 3 3 3
Output:
1 3 1 3 0 3 0 3
My goal is:
1 3 1 3 3 3
which is to remove the 0s if I didnt input 0 in array
My code:
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
int main(void) {
char * line = NULL;
size_t len = 0;
char * line2 = NULL;
size_t len2 = 0;
char ch;
int counter = 0;
char ch2;
int counter2 = 0;
int mSize;
int mergedArray[20];
int i = 0; // for loop
int j = 0;
int * myPtr;
myPtr = (int * ) malloc(counter * sizeof(int));
int * myPtr2;
myPtr2 = (int * ) malloc(counter2 * sizeof(int));
while (getline( & line, & len, stdin) != EOF) {
//===============
//value 1 for line 1
ch = * line;
printf("line 1: Test: %s\n", line);
char * start = line;
char * eon;
long value;
//===============
//value 2 line 2
getline( & line2, & len2, stdin);
ch2 = * line2;
printf("line 2: Test: %s\n", line2);
char * start2 = line2;
char * eon2;
long value2;
//==============
errno = 0;
//============loop for line 1 =================
printf("=============\n");
printf("Line 1\n");
while ((value = strtol(start, & eon, 0)),
eon != start &&
!((errno == EINVAL && value == 0) ||
(errno == ERANGE && (value == LONG_MIN || value == LONG_MAX))))
{
//getting the size of the line
counter++;
start = eon;
errno = 0;
myPtr[counter] = value;
// printf("Array #1 [%d] %d\n",counter , myPtr[counter]);
} //end of while
printf("Size: %d\n", counter);
printf("=============\n");
//============loop for line 2 =================
printf("Line 2\n");
while ((value2 = strtol(start2, & eon2, 0)),
eon2 != start2 &&
!((errno == EINVAL && value2 == 0) ||
(errno == ERANGE && (value2 == LONG_MIN || value2 == LONG_MAX))))
{
//getting the size of the line
counter2++;
start2 = eon2;
errno = 0;
myPtr2[counter2] = value2;
//printf("Array #2 [%d] %d\n",counter2 , myPtr2[counter2]);
} //end of while
printf("Size: %d\n", counter2);
printf("=============\n");
for (i = 0; i < counter; i++) {
mergedArray[i] = myPtr[i + 1]; // not used
}
mSize = counter + counter2;
for (i = 0, j = counter; j < mSize && i < counter2; i++, j++) {
mergedArray[j] = myPtr2[i + 1]; // not used
//here I print out both arrays
printf("%d %d ", myPtr[i + 1], myPtr2[i + 1]);
}
} // end of main while
return 0;
}
Your program is extremelly complicated. It is a good practice to separate login into functions. In the solution below you need to provide large enouth array for the destination array and both arrays to be merged.
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define SA(a) (sizeof(a)/sizeof((a)[0]))
size_t mergeArrays(int *dest, const int *src1, const int *src2, size_t size_1, size_t size_2)
{
size_t pos = 0;
for(size_t index = 0; index < MAX(size_1, size_2); index++)
{
if(index < size_1) dest[pos++] = src1[index];
if(index < size_2) dest[pos++] = src2[index];
}
return pos;
}
int main(void)
{
int arr1[] = {1,1};
int arr2[] = {3,3,3,3,3};
int dest[SA(arr1) + SA(arr2)];
size_t destsize = mergeArrays(dest, arr1, arr2, SA(arr1), SA(arr2));
for(size_t index = 0; index < destsize; index++)
{
printf("%d ", dest[index]);
}
printf("\n");
}
https://godbolt.org/z/WG4v6T
Here you have the version reading from user and using dynamic memory allocation:
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#define SA(a) (sizeof(a)/sizeof((a)[0]))
size_t mergeArrays(int *dest, const int *src1, const int *src2, size_t size_1, size_t size_2)
{
size_t pos = 0;
for(size_t index = 0; index < MAX(size_1, size_2); index++)
{
if(index < size_1) dest[pos++] = src1[index];
if(index < size_2) dest[pos++] = src2[index];
}
return pos;
}
int *readArray(size_t *size)
{
int *arr = NULL;
printf("Enter size:");
if(scanf(" %zu", size) != 1) goto func_return;
arr = malloc(*size * sizeof(*arr));
if(!arr) { free(arr); arr = NULL; goto func_return;}
for(size_t index = 0; index < *size; index++)
{
if(scanf(" %d", &arr[index]) != 1) {free(arr); arr = NULL; goto func_return;}
}
func_return:
return arr;
}
int main(void)
{
size_t size1, size2;
int *arr1 = readArray(&size1);
int *arr2 = readArray(&size2);
int *dest = NULL;
if(arr1 && arr2) dest = malloc(size1 + size2);
if(dest)
{
size_t destsize = mergeArrays(dest, arr1, arr2, size1, size2);
for(size_t index = 0; index < destsize; index++)
{
printf("%d ", dest[index]);
}
}
printf("\n");
free(arr1);
free(arr2);
free(dest);
}
https://godbolt.org/z/5sMbYj

How modify a multidimensional pointer inside a function?

I have a function that manipulates a char*** using malloc and memcpy this way
// Convert a buffer full line to separated variables
int parseBufferToVariables(char ***variableContainer, char *bufferToParse, int maxVarSize) {
int i = 0;
// Get number of rows of the string
int numberOfRows = 0;
for (i = 0; bufferToParse[i] != '\0'; i++) {
if (bufferToParse[i] == '\n')
++numberOfRows;
}
// Get number of columns of the string
int numberOfColumns = 1;
for (i = 0; bufferToParse[i] != '\n'; i++) {
if (bufferToParse[i] == '\t')
++numberOfColumns;
}
// Allocate separated variable array
size_t dim0 = numberOfColumns, dim1 = numberOfRows, dim2 = maxVarSize;
variableContainer = malloc(sizeof *variableContainer * dim0);
if (variableContainer) {
size_t i;
for (i = 0; i < dim0; i++) {
variableContainer[i] = malloc(sizeof *variableContainer[i] * dim1);
if (variableContainer[i]) {
size_t j;
for (j = 0; j < dim1; j++) {
variableContainer[i][j] = malloc(sizeof *variableContainer[i][j] * dim2);
}
}
}
}
// Start parsing string to 3D array
int init = 0;
int numberOfVars = 0;
int numberOfLines = 0;
int sizeOfVar = 0;
int position = 0;
char emptyArray[MAXVARSIZE] = {0};
// Loop trought all lines
i = 0;
while (numberOfLines < numberOfRows) {
// Every delimiter
if (bufferToParse[i] == '\t' || bufferToParse[i] == '\n') {
// Size of the new sring
sizeOfVar = i - init;
// Set last \0 character in order to recognize as a proper string
memcpy(&variableContainer[numberOfVars][numberOfLines], emptyArray, maxVarSize);
// Copy the string to array
memcpy(&variableContainer[numberOfVars][numberOfLines], &bufferToParse[position], sizeOfVar);
// Handle pointers poisition
init = i + 1;
position += sizeOfVar + 1;
// Handle when end of line is reached
if (bufferToParse[i] == '\n') {
numberOfVars = 0;
numberOfLines++;
}
}
i++;
}
return numberOfRows;
}
And Im trying to call it in different ways:
char*** container= {0};
parseBufferToVariables (&container, inputString, MAXVARSIZE);
char*** container= {0};
parseBufferToVariables (container, inputString, MAXVARSIZE);
Even I try calling a char**** in the function:
int parseBufferToVariables(char**** variableContainer, char* bufferToParse, int maxVarSize)
But I always have a seg-fault calling the char*** outside the parseBufferToVariables function.
Any ideas?
OP is shooting for a 4 * parameter, yet other approaches are better.
The high degree of *s mask a key failing is that code needs to convey the column (# of tabs) width somehow.
Further, I see no certain null character termination in forming the _strings_as
the 2nd memcpy() is unbounded in size - may even overwrite allocation boundaries.
The idea below is that each level of allocation ends with a null.
csv = parse_file_string(const char *file_string);
Upon return, when csv[line] == NULL, there are no more lines
When csv[line][tab] == NULL, there are no more strings.
This approach also allows for a different number of strings per line.
Adjusted algorithm, pseudo C code
// return NULL on error
char ***parse_file_string(const char *file_string) {
number_lines = find_line_count(file_string);
char ***csv = calloc(number_lines + 1, sizeof *csv);
if (csv == NULL) return NULL;
for (line=0; line < number_lines; line++) {
tab_count = find_tab_count(file_string);
csv[line] = calloc(tab_count + 2, sizeof *(csv[line]));
// add NULL check
for (tab=0; tab < tab_count; tab++) {
char *end = strchr(file_string, '\t');
csv[line][tab] = malloc_string(file_string, end);
// add NULL check
file_string = end + 1;
}
char *end = strchr(file_string, '\n');
csv[line][tab++] = malloc_str(file_string, end);
// add NULL check
file_string = end + 1;
csv[line][tab] = NULL;
}
csv[line] = NULL;
return csv;
}
Usage
char ***container = parse_file_string(file_string);
for (line=0; container[line]; line++)
for (tab=0; container[line][tab]; tab++)
puts(container[line][tab]);
//free
for (line=0; container[line]; line++)
for (tab=0; container[line][tab]; tab++)
free(container[line][tab]);
free(container[line]);
free (container)
A pointer to a variable length array could be used if supported.
First get the dimensions of the contents of the buffer. This assumes that each line will have the same number of tabs.
Declare the pointer and allocate the memory.
Then parse the buffer into the allocated memory.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void getdimension ( char *buffer, int *rows, int *cols, int *size) {
int maxsize = 0;
*rows = 0;
*cols = 0;
*size = 0;
while ( *buffer) {//not the terminating zero
if ( '\n' == *buffer) {
if ( ! *rows) {//no rows counted yet
++*cols;//add a column
}
++*rows;
if ( maxsize > *size) {
*size = maxsize;
}
maxsize = 0;
}
if ( '\t' == *buffer) {
if ( ! *rows) {//no rows counted yet
++*cols;
}
if ( maxsize > *size) {
*size = maxsize;
}
maxsize = 0;
}
++maxsize;
++buffer;
}
if ( '\n' != *(buffer - 1)) {//last character is not a newline
++*rows;
if ( maxsize > *size) {
*size = maxsize;
}
}
}
void createptr ( int rows, int columns, int size, char (**ptr)[columns][size]) {
if ( NULL == ( *ptr = malloc ( sizeof **ptr * rows))) {
fprintf ( stderr, "malloc problem\n");
exit ( EXIT_FAILURE);
}
for ( int line = 0; line < rows; ++line) {
for ( int tab = 0; tab < columns; ++tab) {
(*ptr)[line][tab][0] = 0;
}
}
}
void parsebuffer ( char *buffer, int rows, int columns, int size, char (*ptr)[columns][size]) {
int eachrow = 0;
int eachcol = 0;
int eachsize = 0;
while ( *buffer) {
if ( '\n' == *buffer) {
++eachrow;
eachcol = 0;
eachsize = 0;
}
else if ( '\t' == *buffer) {
++eachcol;
eachsize = 0;
}
else {
ptr[eachrow][eachcol][eachsize] = *buffer;
++eachsize;
ptr[eachrow][eachcol][eachsize] = 0;
}
++buffer;
}
}
int main ( void) {
char line[] = "12\t34\t56\t78\t!##\n"
"abc\tdef\tghi\tjkl\t$%^\n"
"mno\tpqr\tstu\tvwx\tyz\n"
"ABC\tDEF\tGHI\tJKL\tMNOPQ\n";
int rows = 0;
int columns = 0;
int size = 0;
getdimension ( line, &rows, &columns, &size);
printf ( "rows %d cols %d size %d\n", rows, columns, size);
char (*ptr)[columns][size] = NULL;//pointer to variable length array
createptr ( rows, columns, size, &ptr);
parsebuffer ( line, rows, columns, size, ptr);
for ( int row = 0; row < rows; ++row) {
for ( int col = 0; col < columns; ++col) {
printf ( "ptr[%d][%d] %s\n", row, col, ptr[row][col]);
}
}
free ( ptr);
return 0;
}

How can I get the proper dimensions of unknown matrixes for a matrix multiplier C program?

So my attempt was that creating a program that automatically gets two matrixes' size from a .txt file and multiplies them. I could make the program with given sizes so in itself I only have problem with counting the cols and rows.
The input something like (MxN matrix):
1 2 3 4
1 2 3 4
1 2 3 4
To be specific, here is my program so far (the beginning of the code is relevant I think):
#include <stdio.h>
#include <stdlib.h>
struct mat1
{
int cols;
int rows;
};
struct mat2
{
int cols;
int rows;
};
struct mat1 dim1(const char* file)
{
struct mat1 m1;
int rows = 0;
int cols = 0;
char c;
FILE *f = fopen(file, "r+");
while((c = fgetc(f) != EOF))
{
if(c != '\n' && rows == 0)
{
cols++;
}
else if(c == '\n')
rows++;
}
rows++;
return m1;
}
struct mat2 dim2(const char* file)
{
struct mat2 m2;
int rows = 0;
int cols = 0;
char c;
FILE *f = fopen(file, "r+");
while((c = fgetc(f) != EOF))
{
if(c != '\n' && rows == 0)
{
cols++;
}
else if(c == '\n')
rows++;
}
rows++;
return m2;
}
double* alloc_matrix(int cols, int rows) {
double* m = (double*)malloc(cols * rows * sizeof(double));
if (m == 0) {
printf("Memory allocation error.\n");
exit(-1);
}
return m;
}
void read_matrix(FILE* f, double* m, int cols, int rows) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
fscanf(f, "%lf", &m[i * cols + j]);
}
}
}
void multiplication(double* m1, double* m2, double* m3, int cols, int rows) {
for(int i = 0; i < rows; i++) {
for(int j = 0; j < cols; j++) {
m3[i * cols +j]=0;
for(int k = 0; k < cols; k++) {
m3[i * cols +j]+=m1[i * cols +k]*m2[k * cols +j];
}
}
}
}
void write_matrix(double* m, int cols, int rows) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%f ", m[i * cols + j]);
}
printf("\n");
}
}
int main(int argc, char* argv[])
{
char* matrix1 = argv[1];
char* matrix2 = argv[2];
if (argc < 3) {
printf("Not enough arguments.\n");
exit(-1);
}
struct mat1 m1 = dim1(matrix1);
struct mat2 m2 = dim2(matrix2);
printf(" %d %d \n", m1.cols, m1.rows);
printf(" %d %d \n", m2.cols, m2.rows);
int c1 = m1.cols;
int r1 = m1.rows;
int c2 = m2.cols;
int r2 = m2.rows;
if (r1!=c2)
{
printf("Matrixes are not suitable for multiplication. \n");
exit(-1);
}
double* mtx1 = alloc_matrix(c1, r1);
double* mtx2 = alloc_matrix(c2, r2);
FILE* f1 = fopen(matrix1, "r");
if (f1 == 0)
{
printf("Cannot open file %s.", argv[1]);
exit(-1);
}
FILE* f2 = fopen(matrix2, "r");
if (f1 == 0)
{
printf("Cannot open file %s.", argv[1]);
exit(-1);
}
read_matrix(f1, mtx1, c1, r1);
read_matrix(f2, mtx2, c2, r2);
double* mtx3 = alloc_matrix(c1, r2);
multiplication(mtx1, mtx2, mtx3, c1, r2);
write_matrix(mtx3, c1, r2);
free(mtx1);
free(mtx2);
free(mtx3);
fclose(f1);
fclose(f2);
return 0;
}
When I tried it out with 2 3x3 matrixes, The outpot:
6422164 4199040 (from 2 printf()s that I set to check the dimensions).
6422164 4199040
Matrixes are not suitable for multiplication. (it's irrelevant)
So basically it doesn't use 3x3.
I cannot figure out what the problem is.
This is prefaced by my top comments.
I had to refactor dim to handle an arbitrarily large matrix, so I had to scan the first line of the file char-by-char, counting whitespace strings (which yields the number of columns - 1). It handles/strips leading/trailing whitespace [malformed]
I had dim then rewind the file and use fscanf and realloc to create the matrix dynamically.
Here's the working code [please pardon the gratuitous style cleanup]:
#include <stdio.h>
#include <stdlib.h>
struct mat {
int cols;
int rows;
double *m;
};
// size and read in matrix
struct mat
dim(const char *file)
{
struct mat m;
int rows = 0;
int cols = 0;
int maxcnt;
int curcnt;
int ret;
int c;
int c2;
FILE *f = fopen(file, "r+");
// strip leading whitespace [if any] off first line
while (1) {
c = fgetc(f);
if (c == EOF)
break;
if (c == '\n')
break;
if (c != ' ')
break;
}
// scan first line and count columns (number of space separaters)
while (1) {
c2 = ' ';
while (1) {
c = fgetc(f);
if (c == EOF)
break;
if (c == '\n') {
if (c2 != ' ')
++cols;
break;
}
if (c == ' ') {
if (c != c2)
++cols;
break;
}
c2 = c;
}
if (c == '\n')
break;
}
// convert number of whitespace separaters into number of columns
if (cols > 0)
++cols;
rewind(f);
m.rows = 0;
m.cols = cols;
m.m = NULL;
curcnt = 0;
maxcnt = 0;
while (1) {
if (curcnt >= maxcnt) {
maxcnt += m.cols * 100;
double *tmp = realloc(m.m,sizeof(double) * maxcnt);
if (tmp == NULL) {
printf("dim: realloc failure\n");
exit(1);
}
m.m = tmp;
}
ret = 0;
for (int idx = 0; idx < cols; ++idx, ++curcnt) {
ret = fscanf(f, "%lf", &m.m[curcnt]);
if (ret != 1)
break;
}
if (ret != 1)
break;
rows += 1;
}
fclose(f);
m.rows = rows;
// trim matrix to actual size;
m.m = realloc(m.m,sizeof(double) * rows * cols);
return m;
}
double *
alloc_matrix(int cols, int rows)
{
double *m = (double *) malloc(cols * rows * sizeof(double));
if (m == 0) {
printf("Memory allocation error.\n");
exit(-1);
}
return m;
}
void
multiplication(double *m1, double *m2, double *m3, int cols, int rows)
{
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
m3[i * cols + j] = 0;
for (int k = 0; k < cols; k++) {
m3[i * cols + j] += m1[i * cols + k] * m2[k * cols + j];
}
}
}
}
void
write_matrix(double *m, int cols, int rows)
{
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%f ", m[i * cols + j]);
}
printf("\n");
}
}
int
main(int argc, char *argv[])
{
if (argc < 3) {
printf("Not enough arguments.\n");
exit(1);
}
struct mat m1 = dim(argv[1]);
struct mat m2 = dim(argv[2]);
printf(" %d %d \n", m1.cols, m1.rows);
printf(" %d %d \n", m2.cols, m2.rows);
int c1 = m1.cols;
int r1 = m1.rows;
int c2 = m2.cols;
int r2 = m2.rows;
if (r1 != c2) {
printf("Matrixes are not suitable for multiplication.\n");
exit(-1);
}
double *mtx3 = alloc_matrix(c1, r2);
multiplication(m1.m, m2.m, mtx3, c1, r2);
write_matrix(mtx3, c1, r2);
free(m1.m);
free(m2.m);
free(mtx3);
return 0;
}
Here are two test files I used. Note that although you can't see it, the first line has trailing whitespace [as a test]:
This is m1.txt:
1 2 3 4
5 6 7 8
9 10 11 12
Here is the second file:
1 2 3
4 5 6
7 8 9
10 11 12
Here is the program output:
4 3
3 4
38.000000 44.000000 202.000000 232.000000
98.000000 116.000000 438.000000 504.000000
158.000000 188.000000 674.000000 776.000000
9.000000 10.000000 87.000000 100.000000
UPDATE:
Here's an alterate dim function that replaces the [somewhat fragile] char-by-char scan of the first line with a scan for newline [to get line length], followed by malloc of a buffer, fgets, and then loop on strtok to count the non-space strings in the lines (i.e. the number of columns):
// size and read in matrix
struct mat
dim(const char *file)
{
struct mat m;
int rows = 0;
int cols = 0;
int maxcnt;
int curcnt;
int ret;
char *buf;
char *bp;
char *tok;
int c;
int c2;
FILE *f = fopen(file, "r+");
// count number of chars in first line of the file
curcnt = 0;
while (1) {
c = fgetc(f);
if (c == EOF)
break;
++curcnt;
if (c == '\n')
break;
}
++curcnt;
buf = malloc(curcnt);
rewind(f);
fgets(buf,curcnt,f);
cols = 0;
bp = buf;
while (1) {
tok = strtok(bp," \n");
if (tok == NULL)
break;
++cols;
bp = NULL;
}
free(buf);
rewind(f);
m.rows = 0;
m.cols = cols;
m.m = NULL;
curcnt = 0;
maxcnt = 0;
while (1) {
if (curcnt >= maxcnt) {
maxcnt += m.cols * 100;
double *tmp = realloc(m.m,sizeof(double) * maxcnt);
if (tmp == NULL) {
printf("dim: realloc failure\n");
exit(1);
}
m.m = tmp;
}
ret = 0;
for (int idx = 0; idx < cols; ++idx, ++curcnt) {
ret = fscanf(f, "%lf", &m.m[curcnt]);
if (ret != 1)
break;
}
if (ret != 1)
break;
rows += 1;
}
fclose(f);
m.rows = rows;
// trim matrix to actual size;
m.m = realloc(m.m,sizeof(double) * rows * cols);
return m;
}
UPDATE #2:
I didn't like either solution for getting the number of columns, so here is a cleaner one that is as fast as the first one but is simpler and less cumbersome:
// scan first line and count columns
int
colcalc(FILE *f)
{
int c;
int noncur;
int nonprev = 0;
int cols = 0;
while (1) {
c = fgetc(f);
if (c == EOF)
break;
if (c == '\n')
break;
// only count non-whitespace chars
switch (c) {
case ' ':
case '\t':
noncur = 0;
break;
default:
noncur = 1;
break;
}
// column starts on _first_ char in word
if (noncur)
cols += (noncur != nonprev);
nonprev = noncur;
}
rewind(f);
return cols;
}
UPDATE #3:
I tried out the previous 2 methods by you and it works so smoothly! Thank you once again! and your comments about making my program simpler with less variables and stuff!
You're welcome!
My coding style/methodology comes from a [very] old book: "The Elements of Programming Style" by Kernighan and Plauger.
The examples from that book are written in Fortran, but the maxims are on par with "Code Complete" by Steve McConnell.
From Chapter 7 [Efficiency and Instrumentation]:
Make it right before you make it faster.
Keep it right when you make it faster.
Make it clear before you make it faster.
Don't sacrifice clarity for small gains in "efficiency".
Don't strain to re-use code; reorganize instead.
Make sure special cases are truly special.
Keep it simple to make it faster.
Don't diddle code to make it faster -- find a better algorithm.
Instrument your programs. Measure before making "efficiency" changes.

Uninitialised values in dynamic array in C

I've been given a task that requires a dynamic 2D array in C, but we haven't even covered pointers yet, so I'm kind of at a loss here. I have to read some text input and store it in a 2D array, without limiting its size.
Unfortunately, Valgrind keeps throwing me an error saying that there's an uninitialised value, when the puts() function executes and sometimes it prints out some random signs. I understand that I must have omitted some indexes, but I just can't find where the issue stems from. Additionally, all advices regarding the quality of my code are very much appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <assert.h>
#define MULT 3
#define DIV 2
char **read(int *row, int *col) {
char **input = NULL;
int row_size = 0;
int col_size = 0;
int i = 0;
int c;
while ((c = getchar()) != EOF) {
if (c != '\n') { // skip empty lines
assert(i < INT_MAX);
if (i == row_size) { // if not enough row memory, allocate more
row_size = 1 + row_size * MULT / DIV;
input = realloc(input, row_size * sizeof *input);
assert(input != NULL);
}
char *line = NULL;
int j = 0;
// I need all the rows to be of the same size (see last loop)
line = malloc(col_size * sizeof *line);
// do while, so as to not skip the first character
do {
assert(j < INT_MAX-1);
if (j == col_size) {
col_size = 1 + col_size * MULT / DIV;
line = realloc(line, col_size * sizeof *line);
assert(line != NULL);
}
line[j++] = c;
} while(((c = getchar()) != '\n') && (c != EOF));
// zero-terminate the string
if (j == col_size) {
++col_size;
line = realloc(line, col_size * sizeof *line);
line[j] = '\0';
}
input[i++] = line;
}
}
// Here I give all the lines the same length
for (int j = 0; j < i; ++j)
input[j] = realloc(input[j], col_size * sizeof *(input+j));
*row = i;
*col = col_size;
return input;
}
int main(void) {
int row_size, col_size, i, j;
char **board = read(&row_size, &col_size);
// Initialize the remaining elements of each array
for (i = 0; i < row_size; ++i) {
j = 0;
while (board[i][j] != '\0')
++j;
while (j < col_size-1)
board[i][++j] = ' ';
}
for (i = 0; i < row_size; ++i) {
puts(board[i]);
}
for (i = 0; i < row_size; ++i)
free(board[i]);
free(board);
return 0;
}

read integer values from a CSV file, how to get only the last two values of the record?

The sample record will be like this,
14/11/2014,Sh2345,423,10
12/12/2014,AV2345,242,20
From the above record I need only
423,10
242,20
The below code will give me all the row and Column count.
rowIndex = 0;
columnIndex = 0;
while(fgets(part,1024,fp) != NULL){
token = NULL;
while((token = strtok((token == NULL)?part:NULL,",")) != NULL){
if(rowIndex == 0){
columnIndex++;
}
for(idx = 0;idx<strlen(token);idx++){
if(token[idx] == '\n'){
rowIndex++;
break;
}
}
}
}
If you want to use strtok which I believe is the right way to do this kind of things, since fscanf will be very problematic in case of invalid input, then I think this is the way:
rowIndex = 0;
while (fgets(part, sizeof part, fp) != NULL)
{
char *token;
size_t partLength;
char *saveptr; // for strtok_r to store it's current state
partLength = strlen(part);
/* check if this is a complete line */
if (part[partLength - 1] == '\n')
rowIndex++;
columnIndex = 0;
token = strtok_r(part, ",", &saveptr);
while ((token = strtok_r(NULL, ",", &saveptr)) != NULL)
{
char *endptr;
/* if columnIndex >= 1 then we are in the right columns */
if (columnIndex >= 1)
values[columnIndex - 1] = strtol(token, &endptr, 10);
/* in case the conversion rejected some characters */
if ((*endptr != '\0') && (*endptr != '\n'))
values[columnIndex - 1] = -1; /* some invalid value (if it's possible) */
columnIndex++;
}
/* if we have columnIndex == 3, then we've read the two values */
if (columnIndex == 3)
printf("(%d, %d)\n", values[0], values[1]);
/* the last column will not be counted in the while loop */
columnIndex++;
}
In case of very long lines, for which sizeof part is small enough to leave some , in between, you are going to need some different approach, but as long as the lines fit part you are ok.
To read the values into an array, maybe this could work:
int **fileToMatrix(const char *const filename, int *readRowCount, int *readColumnCount, int skipColumns)
{
char part[256];
FILE *file;
int rowIndex;
int columnIndex;
int index;
int **values;
file = fopen(filename, "r");
if (file == NULL)
return NULL;
values = NULL; /* calling realloc, it behaves like malloc if ptr argument is NULL */
rowIndex = 0;
while (fgets(part, sizeof part, file) != NULL)
{
char *token;
int **pointer;
char *saveptr; // for strtok_r to store it's current state
/* check if this is a complete line */
pointer = realloc(values, (1 + rowIndex) * sizeof(int *));
if (pointer == NULL)
goto abort;
values = pointer;
values[rowIndex] = NULL;
columnIndex = 0;
token = strtok_r(part, ",", &saveptr);
while ((token = strtok_r(NULL, ",", &saveptr)) != NULL)
{
columnIndex += 1;
/* if columnIndex > skipColumns - 1 then we are in the right columns */
if (columnIndex > (skipColumns - 1))
{
int value;
char *endptr;
int *currentRow;
int columnCount;
endptr = NULL;
value = strtol(token, &endptr, 10);
/* in case the conversion rejected some characters */
if ((endptr != NULL) && (*endptr != '\0') && (*endptr != '\n'))
value = -1;
/* ^ some invalid value (if it's possible) */
columnCount = columnIndex - skipColumns + 1;
currentRow = realloc(values[rowIndex], columnCount * sizeof(int));
if (currentRow == NULL)
goto abort;
currentRow[columnIndex - skipColumns] = value;
values[rowIndex] = currentRow;
}
}
/* the last column will not be counted in the while loop */
columnIndex++;
rowIndex++;
}
fprintf(stderr, "%d rows and %d columns parsed\n", rowIndex, columnIndex - skipColumns);
fclose(file);
*readRowCount = rowIndex;
*readColumnCount = columnIndex - skipColumns;
return values;
abort:
*readRowCount = -1;
*readColumnCount = -1;
for (index = rowIndex - 1 ; index >= 0 ; index--)
free(values[index]);
free(values);
fclose(file);
return NULL;
}
void freeMatrix(int **matrix, int rows, int columns)
{
int row;
for (row = 0 ; row < rows ; row++)
free(matrix[row]);
free(matrix);
}
void printMatrix(int **matrix, int rows, int columns)
{
int row;
int column;
for (row = 0 ; row < rows ; row++)
{
int *currentRow;
currentRow = matrix[row];
for (column = 0 ; column < columns ; column++)
printf("%8d", currentRow[column]);
printf("\n");
}
}
int main()
{
int **matrix;
int rows;
int columns;
matrix = fileToMatrix("data.dat", &rows, &columns, 2);
if (matrix != NULL)
{
printMatrix(matrix, rows, columns);
freeMatrix(matrix, rows, columns);
}
return 0;
}
You should also note, that sometimes fields in a CSV file contain " or ' quotes, you might want to remove them from the tokens returned by strtok_r to avoid the failure of strtol.
int v1, v2;
while(fgets(part,1024,fp) != NULL){
sscanf(part, "%*[^,],%*[^,],%d,%d", &v1, &v2);//skip 2 field
//do stuff .. printf("%d,%d\n", v1, v2);
}
int CheckMatrix(int Matrix, int Checkrow, int Checkvalue) /* to check whether the area code existing in the matrix*/
{
int i,j;
for(i=0;i<=Checkrow;i++)
{
if( Matrix[i][0]== Checkvalue)
{
return i;
}
else
{
return -1;
}
}
}
int **fileToMatrix(const char *const filename, int *readRowCount)
{
char part[256];
FILE *file;
int rowIndex;
int index;
int **values;
size_t partLength;
int v1,v2;
int CheckValue;
file = fopen(filename, "r");
if (file == NULL)
return NULL;
values = NULL; /* calling realloc, it behaves like malloc if ptr argument is NULL */
rowIndex = 0;
while (fgets(part, sizeof part, file) != NULL)
{
int **pointer;
/* check if this is a complete line */
pointer = realloc(values, (1 + rowIndex) * sizeof(int *));
if (pointer == NULL)
goto abort;
partLength = strlen(part);
/* check if this is a complete line */
if (part[partLength - 1] == '\n')
rowIndex++;
sscanf(part, "%*[^,],%*[^,],%d,%d", &v1, &v2);//skip 2 field to get the Area code and Distance
CheckValue = CheckMatrix(Values,rowIndex,V1); //Call the function to check whether the area code existing or not in the array
If (CheckValue!=-1) // If existing the current distace will add to the existing and increase the count of areacode.
{
Values[CheckValue][1]=Values[CheckValue][1]+V2;
Values[CheckValue][2]=Values[CheckValue][2];
}
else // If not existing will add to the matrix as new entry.
{
Values[CheckValue][0]=V1;
Values[CheckValue][1]=V2;
Values[CheckValue][2]=1;
}
}
return values;
abort:
*readRowCount = -1;
*readColumnCount = -1;
for (index = rowIndex - 1 ; index >= 0 ; index--)
free(values[index]);
free(values);
fclose(file);
return NULL;
}
void freeMatrix(int **matrix, int rows)
{
int row;
for (row = 0 ; row < rows ; row++)
free(matrix[row]);
free(matrix);
}
void printMatrix(int **matrix, int rows)
{
int row;
for (row = 0 ; row < rows ; row++)
{
for (column = 0 ; column < 4 ; column++)
{
if (column==3)
{
double Mean = double(matrix[row][1])/double(matrix[row][2]); /* To get the mean */
printf("%f",matrix[row][column];
}
printf("%d |", matrix[row][column]);
}
printf("\n");
}
}
# include <stdio.h>
int main()
{
int **matrix;
int rows;
matrix = fileToMatrix("data.dat", &rows);
if (matrix != NULL)
{
printf("|AreaCode|Total Distace|Area Count|Mean");
printf("------------------------------------------");
printMatrix(matrix, rows);
freeMatrix(matrix, rows);
}
return 0;
}

Resources