I'm trying to multiply two matrices stored in a file thus formatted:
1 2
2 3
*
-4 1
1 0
I do not know initially what the dimension of each matrix is. But I let the user define it or otherwise a default value of 100 is taken.
int maxc = argc > 2 ? atoi(argv[2]) * atoi(argv[2]) : 100;
I can already perform the calculation correctly, but I've noticed that if I enter the dimension argv[2] = "2" so that maxc = 8, (that should be enough for this example), errors are produced in reading or printing the file. But if I enter argv[2] = "3" everything works out fine for this example. Since maxc is used to allocate memory here: matrix = malloc(maxc * sizeof *matrix), I suspect the problem could be located on that line. Should I allocate memory also for size_t row; size_t col;?
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>
#include <string.h>
#define MAXNOP 50 /*Max number of operations allowed */
#define MAXNMATR 20 /*Max number of matrices */
struct m {
size_t row;
size_t col;
double *data;
};
struct m multiply(struct m *A, struct m *B);
void f(double x);
void print_matrix(struct m *A);
void read_file(int maxc, FILE *fp);
void scalar_product(double scalar, struct m *B);
void calculate(struct m *matrix, int nop, int id, char *op);
int main(int argc, char *argv[]) {
FILE *file = argc > 1 ? fopen(argv[1], "rb") : stdin;
/* define max dimension of a matrix */
int maxc = argc > 2 ? atoi(argv[2]) * atoi(argv[2]) : 100;
read_file(maxc, file);
return 0;
}
void read_file(int maxc, FILE *fp) {
struct m *matrix;
int id = 0; /* id of a matrix */
size_t ncol, nrow; /* No of columns of a matrix*/
ncol = nrow = 0;
int nop = 0; /*No of operators*/
int off = 0;
int i;
int n;
double *d;
char buf[2 * maxc]; /*to store each lines of file */
char *p = buf;
char op[MAXNOP];
for (i = 0; i < MAXNOP; i++)
op[i] = '?';
if (!(matrix = malloc(maxc * sizeof *matrix))) {
perror("malloc-matrix");
exit(1);
}
/* Read file line by line */
while (fgets(buf, maxc, fp)) {
if (nrow == 0) {
/* allocate/validate max no. of matrix */
d = matrix[id].data = malloc(sizeof(double) * MAXNMATR);
}
/* check if line contains operator */
if ((!isdigit(*buf) && buf[1] =='\n')) {
op[nop++] = *buf;
matrix[id].col = ncol;
matrix[id].row = nrow;
nrow = ncol = 0;
id++;
continue;
} else {
/* read integers in a line into d */
while (sscanf(p + off, "%lf%n", d, &n) == 1) {
d++;
if (nrow == 0)
ncol++;
off += n;
}
nrow++;
off = 0;
}
} /*end of while fgets cycle */
/* Assign last matrix No of columns and rows */
matrix[id].col = ncol;
matrix[id].row = nrow;
/* Printing the matrices and operations */
for (i = 0; i <= id; i++) {
if (op[i] == '*' || op[i] == '-' || op[i] =='+') {
print_matrix(&matrix[i]);
if (op[i-1] != 'i')
printf("%c\n", op[i]);
else
continue;
} else
if (op[i] == '?') {
print_matrix(&matrix[i]);
}
}
calculate(matrix, nop, id, op);
}
void calculate(struct m *matrix, int nop, int id, char *op) {
int i;
for (i = 0; i <= nop; i += 2) {
if (op[i] == '*' && op[i+1] == '?') {
if (matrix[i].row == 1 && matrix[i].col == 1)
scalar_product(matrix[i].data[0], &matrix[i + 1]); //Multiplication of Scalar per matrix
else {
matrix[i + 1] = multiply(&matrix[i], &matrix[i + 1]);
matrix[i + 2] = multiply(&matrix[i + 1], &matrix[i + 2]);
}
break;
}
}
printf("=\n");
print_matrix(&matrix[id]); /* Print the result */
free(matrix);
}
struct m multiply(struct m *A, struct m *B) {
size_t i, j, k;
struct m C;
C.data = malloc(sizeof(double) * A->row * B->col);
C.row = A->row;
C.col = B->col;
for (i = 0; i < C.row; i++)
for (j= 0 ; j < C.col; j++)
C.data[i * C.col + j] = 0;
// Multiplying matrix A and B and storing in C.
for (i = 0; i < A->row; ++i)
for (j = 0; j < B->col; ++j)
for (k = 0; k < A->col; ++k)
C.data[i * C.col + j] += A->data[i * A->col + k] * B->data[k * B->col + j];
return C;
}
void f(double x) {
double i, f = modf(x, &i);
if (f < .00001)
printf("%.f ", i);
else
printf("%f ", x);
}
/* printing a Matrix */
void print_matrix(struct m *A) {
size_t i, j;
double *tmp = A->data;
for (i = 0; i < A->row; i++) {
for (j = 0; j < A->col; j++) {
f(*(tmp++));
}
putchar('\n');
}
}
void scalar_product(double scalar, struct m *B) {
size_t i, j;
for (i = 0; i < B->row; i++)
for (j = 0; j < B->col; j++)
B->data[i * B->col + j] = scalar * B->data[i * B->col + j];
}
The expected result is this: https://ideone.com/Z7UtiR
here argv[2] is not read so there is enough memory to store all data.
Your read buffer only has room for maxc (ie. 4) characters :
char buf[maxc]; /*to store each lines of file */
You then attempt to get a line from the file into that buffer :
while (fgets (buf, maxc, fp)){
But that buffer is only large enough for 2 characters, followed by a newline, and then a '\0' terminator.
Looking at your sample file, the longest line has 4 characters : "-4 1". So, your buffer needs to at least be able to hold 6 (including the newline and '\0' terminator).
It's probably better to make your buffer quite a bit larger.
The problem is entirely in reading the arrays.
The maxc = 4 and the buffer char buf[maxc]; has place only for 3 characters and terminating character.
So fgets (buf, maxc, fp):
on the first will read buf = "1 2" (3 characters and zero byte)
on the second will read buf = "\n" (1 newline character, fgets terminates)
then reads buf = "2 3"
then reads buf = "\n"
buf = "*\n"
buf = "-4 "
and so on
Because of the empty line, inside this code snipped:
else /* read integers in a line into d */
{
while (sscanf (p + off, "%lf%n", d, &n) == 1) {
d++;
if(nrow == 0)
ncol++;
off += n;
}
nrow++;
off = 0;
}
The variable nrow will be incremented 4 times (2 times for rows, and 2 times for empty lines with only newlines read), which will be 2 times too many. The second matrix will have 1 column, because you will read only -4 from the line, so your while(sscanf loop will scan only one number, so ncol will be only 1.
Your fix you posted in the comment is invalid, because you only increased buffer size, but didn't increase the size argument you pass to fgets. If you did char buf[2*maxc]; you also should fgets (buf, 2 * maxc, fp), which will "fix" the current problem. I would rather re-write the whole thing or rather write fgets(buf, sizeof(buf)/sizeof(buf[0]), fp) to accommodate future changes.
Don't use VLAs ex. char buf[maxc];. For simplicity you can use arbitrary long buffer for the line, ex. #define LINE_MAX 1024 and char buf[LINE_MAX] and then fgets(buf, sizeof(buf)/sizeof(buf[0]), file). Or use or rewrite function that will dynamically resize memory and read line, like GNUs getline.
Related
I have to do an assignment where I have to read a file that contains an adjacency matrix and later do some stuff.
I have all working but my code is very slow, at least for the benchmarking system.
I'm reading all the file rows in this code snippet below:
while (fgets(buf, sizeof(buf), stdin) != NULL) {
parse(buf);
i++;
}
and then I initialize my 2d array with all the values using strtok and atoi:
void parse(char *str, int count, char *sep) {
//char *aux = malloc(count * sizeof(char*));
char *aux;
aux = strtok(str, sep);
int j = 0;
while (aux) {
array[(i*DIM) + j] = atoi(aux);
j++;
aux = strtok(NULL, sep);
}
//free(aux);
}
Arrays are DIM*DIM size and each INT is separated by a comma.
Sample input for a 3*3 matrix:
1,20,1
0,111,3
4,7,10
How can I improve this for better performances?
EDIT:
array definition:
array = malloc(DIM*DIM*sizeof(int));
The malloc part makes no sense since you just need one single character pointer for strtok. Similarly, functions like strtol or atoi already parse the data, so you don't even need strtok - it just takes up extra time in this case. Furthermore, atoi doesn't have any error handling so it should never be used.
So you can just call strtol in a loop and it will do what you want. By checking the endptr argument you can see if each read was successful or not (man strtol). And then next lap in the loop, start over from endptr + 1.
If combining this with your 2D int array requirement, the function might look like this:
#include <stdio.h>
#include <stdlib.h>
void csv_to_int (size_t col, size_t row, int dst[col][row], const char* str)
{
const char* ptr = str;
char* end;
for(size_t c=0; c<col; c++)
{
for(size_t r=0; r<row; r++)
{
int val=strtol(ptr,&end,10);
if(ptr==end)
{
return ;
}
dst[c][r]=val;
ptr = end+1;
}
}
}
int main (void)
{
const char* input = "1,20,1\n0,111,3\n4,7,10\n";
int arr[3][3];
csv_to_int(3, 3, arr, input);
for(size_t i=0; i<3; i++)
{
for(size_t j=0; j<3; j++)
{
printf("%3d ", arr[i][j]);
}
puts("");
}
}
Output:
1 20 1
0 111 3
4 7 10
This is of course assuming that the input suits the 3x3 format - this code has almost no error handling.
How can I improve this for better performances?
So do not use these functions, if you think they are slow. Limit your requirements - do not handle locale specific digits. So read and convert it yourself. Blatantly disregard error checking. Something along:
#define _GNU_SOURCE 1
#include <stdio.h>
int main() {
char data[] ="1,20,1\n0,111,3\n4,7,10\n";
FILE *f = fmemopen(data, sizeof(data), "r");
int i = 0, j = 0;
#define DIM 3
int array[20];
// this reading part
int buf = 0;
for (int c; (c = fgetc(f)) != EOF; ) {
if (c == '\n') {
array[i * DIM + j] = buf;
buf = 0;
++i;
j = 0;
} else if (c == ',') {
array[i * DIM + j] = buf;
buf = 0;
++j;
} else {
buf *= 10;
buf += (c - '0');
}
}
array[i * DIM + j] = buf;
++j;
// checking
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
printf("%d %d = %d\n", i, j, array[i * DIM + j]);
}
}
}
Even further, ignore portability, and use system calls (on unix - read(STDIN_FILENO)) instead of C API.
So my attempt was that creating a program that automatically gets two matrixes' size from a .txt file and multiplies them. I could make the program with given sizes so in itself I only have problem with counting the cols and rows.
The input something like (MxN matrix):
1 2 3 4
1 2 3 4
1 2 3 4
To be specific, here is my program so far (the beginning of the code is relevant I think):
#include <stdio.h>
#include <stdlib.h>
struct mat1
{
int cols;
int rows;
};
struct mat2
{
int cols;
int rows;
};
struct mat1 dim1(const char* file)
{
struct mat1 m1;
int rows = 0;
int cols = 0;
char c;
FILE *f = fopen(file, "r+");
while((c = fgetc(f) != EOF))
{
if(c != '\n' && rows == 0)
{
cols++;
}
else if(c == '\n')
rows++;
}
rows++;
return m1;
}
struct mat2 dim2(const char* file)
{
struct mat2 m2;
int rows = 0;
int cols = 0;
char c;
FILE *f = fopen(file, "r+");
while((c = fgetc(f) != EOF))
{
if(c != '\n' && rows == 0)
{
cols++;
}
else if(c == '\n')
rows++;
}
rows++;
return m2;
}
double* alloc_matrix(int cols, int rows) {
double* m = (double*)malloc(cols * rows * sizeof(double));
if (m == 0) {
printf("Memory allocation error.\n");
exit(-1);
}
return m;
}
void read_matrix(FILE* f, double* m, int cols, int rows) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
fscanf(f, "%lf", &m[i * cols + j]);
}
}
}
void multiplication(double* m1, double* m2, double* m3, int cols, int rows) {
for(int i = 0; i < rows; i++) {
for(int j = 0; j < cols; j++) {
m3[i * cols +j]=0;
for(int k = 0; k < cols; k++) {
m3[i * cols +j]+=m1[i * cols +k]*m2[k * cols +j];
}
}
}
}
void write_matrix(double* m, int cols, int rows) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%f ", m[i * cols + j]);
}
printf("\n");
}
}
int main(int argc, char* argv[])
{
char* matrix1 = argv[1];
char* matrix2 = argv[2];
if (argc < 3) {
printf("Not enough arguments.\n");
exit(-1);
}
struct mat1 m1 = dim1(matrix1);
struct mat2 m2 = dim2(matrix2);
printf(" %d %d \n", m1.cols, m1.rows);
printf(" %d %d \n", m2.cols, m2.rows);
int c1 = m1.cols;
int r1 = m1.rows;
int c2 = m2.cols;
int r2 = m2.rows;
if (r1!=c2)
{
printf("Matrixes are not suitable for multiplication. \n");
exit(-1);
}
double* mtx1 = alloc_matrix(c1, r1);
double* mtx2 = alloc_matrix(c2, r2);
FILE* f1 = fopen(matrix1, "r");
if (f1 == 0)
{
printf("Cannot open file %s.", argv[1]);
exit(-1);
}
FILE* f2 = fopen(matrix2, "r");
if (f1 == 0)
{
printf("Cannot open file %s.", argv[1]);
exit(-1);
}
read_matrix(f1, mtx1, c1, r1);
read_matrix(f2, mtx2, c2, r2);
double* mtx3 = alloc_matrix(c1, r2);
multiplication(mtx1, mtx2, mtx3, c1, r2);
write_matrix(mtx3, c1, r2);
free(mtx1);
free(mtx2);
free(mtx3);
fclose(f1);
fclose(f2);
return 0;
}
When I tried it out with 2 3x3 matrixes, The outpot:
6422164 4199040 (from 2 printf()s that I set to check the dimensions).
6422164 4199040
Matrixes are not suitable for multiplication. (it's irrelevant)
So basically it doesn't use 3x3.
I cannot figure out what the problem is.
This is prefaced by my top comments.
I had to refactor dim to handle an arbitrarily large matrix, so I had to scan the first line of the file char-by-char, counting whitespace strings (which yields the number of columns - 1). It handles/strips leading/trailing whitespace [malformed]
I had dim then rewind the file and use fscanf and realloc to create the matrix dynamically.
Here's the working code [please pardon the gratuitous style cleanup]:
#include <stdio.h>
#include <stdlib.h>
struct mat {
int cols;
int rows;
double *m;
};
// size and read in matrix
struct mat
dim(const char *file)
{
struct mat m;
int rows = 0;
int cols = 0;
int maxcnt;
int curcnt;
int ret;
int c;
int c2;
FILE *f = fopen(file, "r+");
// strip leading whitespace [if any] off first line
while (1) {
c = fgetc(f);
if (c == EOF)
break;
if (c == '\n')
break;
if (c != ' ')
break;
}
// scan first line and count columns (number of space separaters)
while (1) {
c2 = ' ';
while (1) {
c = fgetc(f);
if (c == EOF)
break;
if (c == '\n') {
if (c2 != ' ')
++cols;
break;
}
if (c == ' ') {
if (c != c2)
++cols;
break;
}
c2 = c;
}
if (c == '\n')
break;
}
// convert number of whitespace separaters into number of columns
if (cols > 0)
++cols;
rewind(f);
m.rows = 0;
m.cols = cols;
m.m = NULL;
curcnt = 0;
maxcnt = 0;
while (1) {
if (curcnt >= maxcnt) {
maxcnt += m.cols * 100;
double *tmp = realloc(m.m,sizeof(double) * maxcnt);
if (tmp == NULL) {
printf("dim: realloc failure\n");
exit(1);
}
m.m = tmp;
}
ret = 0;
for (int idx = 0; idx < cols; ++idx, ++curcnt) {
ret = fscanf(f, "%lf", &m.m[curcnt]);
if (ret != 1)
break;
}
if (ret != 1)
break;
rows += 1;
}
fclose(f);
m.rows = rows;
// trim matrix to actual size;
m.m = realloc(m.m,sizeof(double) * rows * cols);
return m;
}
double *
alloc_matrix(int cols, int rows)
{
double *m = (double *) malloc(cols * rows * sizeof(double));
if (m == 0) {
printf("Memory allocation error.\n");
exit(-1);
}
return m;
}
void
multiplication(double *m1, double *m2, double *m3, int cols, int rows)
{
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
m3[i * cols + j] = 0;
for (int k = 0; k < cols; k++) {
m3[i * cols + j] += m1[i * cols + k] * m2[k * cols + j];
}
}
}
}
void
write_matrix(double *m, int cols, int rows)
{
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%f ", m[i * cols + j]);
}
printf("\n");
}
}
int
main(int argc, char *argv[])
{
if (argc < 3) {
printf("Not enough arguments.\n");
exit(1);
}
struct mat m1 = dim(argv[1]);
struct mat m2 = dim(argv[2]);
printf(" %d %d \n", m1.cols, m1.rows);
printf(" %d %d \n", m2.cols, m2.rows);
int c1 = m1.cols;
int r1 = m1.rows;
int c2 = m2.cols;
int r2 = m2.rows;
if (r1 != c2) {
printf("Matrixes are not suitable for multiplication.\n");
exit(-1);
}
double *mtx3 = alloc_matrix(c1, r2);
multiplication(m1.m, m2.m, mtx3, c1, r2);
write_matrix(mtx3, c1, r2);
free(m1.m);
free(m2.m);
free(mtx3);
return 0;
}
Here are two test files I used. Note that although you can't see it, the first line has trailing whitespace [as a test]:
This is m1.txt:
1 2 3 4
5 6 7 8
9 10 11 12
Here is the second file:
1 2 3
4 5 6
7 8 9
10 11 12
Here is the program output:
4 3
3 4
38.000000 44.000000 202.000000 232.000000
98.000000 116.000000 438.000000 504.000000
158.000000 188.000000 674.000000 776.000000
9.000000 10.000000 87.000000 100.000000
UPDATE:
Here's an alterate dim function that replaces the [somewhat fragile] char-by-char scan of the first line with a scan for newline [to get line length], followed by malloc of a buffer, fgets, and then loop on strtok to count the non-space strings in the lines (i.e. the number of columns):
// size and read in matrix
struct mat
dim(const char *file)
{
struct mat m;
int rows = 0;
int cols = 0;
int maxcnt;
int curcnt;
int ret;
char *buf;
char *bp;
char *tok;
int c;
int c2;
FILE *f = fopen(file, "r+");
// count number of chars in first line of the file
curcnt = 0;
while (1) {
c = fgetc(f);
if (c == EOF)
break;
++curcnt;
if (c == '\n')
break;
}
++curcnt;
buf = malloc(curcnt);
rewind(f);
fgets(buf,curcnt,f);
cols = 0;
bp = buf;
while (1) {
tok = strtok(bp," \n");
if (tok == NULL)
break;
++cols;
bp = NULL;
}
free(buf);
rewind(f);
m.rows = 0;
m.cols = cols;
m.m = NULL;
curcnt = 0;
maxcnt = 0;
while (1) {
if (curcnt >= maxcnt) {
maxcnt += m.cols * 100;
double *tmp = realloc(m.m,sizeof(double) * maxcnt);
if (tmp == NULL) {
printf("dim: realloc failure\n");
exit(1);
}
m.m = tmp;
}
ret = 0;
for (int idx = 0; idx < cols; ++idx, ++curcnt) {
ret = fscanf(f, "%lf", &m.m[curcnt]);
if (ret != 1)
break;
}
if (ret != 1)
break;
rows += 1;
}
fclose(f);
m.rows = rows;
// trim matrix to actual size;
m.m = realloc(m.m,sizeof(double) * rows * cols);
return m;
}
UPDATE #2:
I didn't like either solution for getting the number of columns, so here is a cleaner one that is as fast as the first one but is simpler and less cumbersome:
// scan first line and count columns
int
colcalc(FILE *f)
{
int c;
int noncur;
int nonprev = 0;
int cols = 0;
while (1) {
c = fgetc(f);
if (c == EOF)
break;
if (c == '\n')
break;
// only count non-whitespace chars
switch (c) {
case ' ':
case '\t':
noncur = 0;
break;
default:
noncur = 1;
break;
}
// column starts on _first_ char in word
if (noncur)
cols += (noncur != nonprev);
nonprev = noncur;
}
rewind(f);
return cols;
}
UPDATE #3:
I tried out the previous 2 methods by you and it works so smoothly! Thank you once again! and your comments about making my program simpler with less variables and stuff!
You're welcome!
My coding style/methodology comes from a [very] old book: "The Elements of Programming Style" by Kernighan and Plauger.
The examples from that book are written in Fortran, but the maxims are on par with "Code Complete" by Steve McConnell.
From Chapter 7 [Efficiency and Instrumentation]:
Make it right before you make it faster.
Keep it right when you make it faster.
Make it clear before you make it faster.
Don't sacrifice clarity for small gains in "efficiency".
Don't strain to re-use code; reorganize instead.
Make sure special cases are truly special.
Keep it simple to make it faster.
Don't diddle code to make it faster -- find a better algorithm.
Instrument your programs. Measure before making "efficiency" changes.
I've been given a task that requires a dynamic 2D array in C, but we haven't even covered pointers yet, so I'm kind of at a loss here. I have to read some text input and store it in a 2D array, without limiting its size.
Unfortunately, Valgrind keeps throwing me an error saying that there's an uninitialised value, when the puts() function executes and sometimes it prints out some random signs. I understand that I must have omitted some indexes, but I just can't find where the issue stems from. Additionally, all advices regarding the quality of my code are very much appreciated.
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <assert.h>
#define MULT 3
#define DIV 2
char **read(int *row, int *col) {
char **input = NULL;
int row_size = 0;
int col_size = 0;
int i = 0;
int c;
while ((c = getchar()) != EOF) {
if (c != '\n') { // skip empty lines
assert(i < INT_MAX);
if (i == row_size) { // if not enough row memory, allocate more
row_size = 1 + row_size * MULT / DIV;
input = realloc(input, row_size * sizeof *input);
assert(input != NULL);
}
char *line = NULL;
int j = 0;
// I need all the rows to be of the same size (see last loop)
line = malloc(col_size * sizeof *line);
// do while, so as to not skip the first character
do {
assert(j < INT_MAX-1);
if (j == col_size) {
col_size = 1 + col_size * MULT / DIV;
line = realloc(line, col_size * sizeof *line);
assert(line != NULL);
}
line[j++] = c;
} while(((c = getchar()) != '\n') && (c != EOF));
// zero-terminate the string
if (j == col_size) {
++col_size;
line = realloc(line, col_size * sizeof *line);
line[j] = '\0';
}
input[i++] = line;
}
}
// Here I give all the lines the same length
for (int j = 0; j < i; ++j)
input[j] = realloc(input[j], col_size * sizeof *(input+j));
*row = i;
*col = col_size;
return input;
}
int main(void) {
int row_size, col_size, i, j;
char **board = read(&row_size, &col_size);
// Initialize the remaining elements of each array
for (i = 0; i < row_size; ++i) {
j = 0;
while (board[i][j] != '\0')
++j;
while (j < col_size-1)
board[i][++j] = ' ';
}
for (i = 0; i < row_size; ++i) {
puts(board[i]);
}
for (i = 0; i < row_size; ++i)
free(board[i]);
free(board);
return 0;
}
I'm trying to build a program that takes two strings and fills in the edit distance matrix for them. The thing that is tripping me up is, for the second string input, it is skipping over the second input. I've tried clearing the buffer with getch(), but it didn't work. I've also tried switching over to scanf(), but that resulted in some crashes as well. Help please!
Code:
#include <stdio.h>
#include <stdlib.h>
int min(int a, int b, int c){
if(a > b && a > c)
return a;
else if(b > a && b > c)
return b;
else
return c;
}
int main(){
// allocate size for strings
int i, j;
char *input1 = (char*)malloc(sizeof(char)*100);
char *input2 = (char*)malloc(sizeof(char)*100);
// ask for input
printf("Enter the first string: ");
fgets(input1, sizeof(input1), stdin);
printf("\nEnter the second string: ");
fgets(input2, sizeof(input2), stdin);
// make matrix
int len1 = sizeof(input1), len2 = sizeof(input2);
int c[len1 + 1][len2 + 1];
// set up input 2 length
for(i = 0; i < len2 + 1; i++){
c[0][i] = i;
}
// set up input 1 length
for(i = 0; i < len1 + 1; i++){
c[i][0] = i;
}
// fill in the rest of the matrix
for(i = 1; i < len1; i++){
for(j = 1; j < len2; j++){
if(input1[i] == input2[j]) // if the first letters are equal make the diagonal equal to the last
c[i][j] = c[i - 1][j - 1];
else
c[i][j] = 1 + min(c[i - 1][j - 1], c[i - 1][j], c[i][j - 1]);
}
}
// print the matrix
printf("\n");
for(j = 0; j < len2; j++){
for(i = 0; i < len1; i++){
printf("| %d", c[i][j]);
}
printf("\n");
}
return 1;
}
Stick with fgets.
As others have pointed out, use char input1[100] instead of char *input1 = malloc(...)
But, even with that change, which makes the sizeof inside of the fgets correct, using sizeof when setting up len1 and len2 is wrong. You'll be processing an entire buffer of 100, even if their are only 10 valid characters in it (i.e. the remaining ones are undefined/random).
What you [probably] want is strlen [and a newline strip] to get the actual useful lengths.
Here's the modified code [please pardon the gratuitous style cleanup]:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int
min(int a, int b, int c)
{
if (a > b && a > c)
return a;
if (b > a && b > c)
return b;
return c;
}
int
main(void)
{
// allocate size for strings
int i;
int j;
char input1[100];
char input2[100];
// ask for input
printf("Enter the first string: ");
fgets(input1, sizeof(input1), stdin);
int len1 = strlen(input1);
if (input1[len1 - 1] == '\n') {
input1[len1 - 1] = 0;
--len1;
}
printf("\nEnter the second string: ");
fgets(input2, sizeof(input2), stdin);
int len2 = strlen(input2);
if (input2[len2 - 1] == '\n') {
input2[len2 - 1] = 0;
--len2;
}
// make matrix
int c[len1 + 1][len2 + 1];
// set up input 2 length
for (i = 0; i < len2 + 1; i++) {
c[0][i] = i;
}
// set up input 1 length
for (i = 0; i < len1 + 1; i++) {
c[i][0] = i;
}
// fill in the rest of the matrix
for (i = 1; i < len1; i++) {
for (j = 1; j < len2; j++) {
// if the 1st letters are equal make the diagonal equal to the last
if (input1[i] == input2[j])
c[i][j] = c[i - 1][j - 1];
else
c[i][j] = 1 + min(c[i - 1][j - 1], c[i - 1][j], c[i][j - 1]);
}
}
// print the matrix
printf("\n");
for (j = 0; j < len2; j++) {
for (i = 0; i < len1; i++) {
printf("| %d", c[i][j]);
}
printf("\n");
}
return 1;
}
UPDATE:
Okay sweet I see what you mean! The reason I was trying to use malloc though was to avoid making the matrix that I had to print a size of 100x100 blank spaces.
With either the fixed size input1 or the malloced one, fgets will only fill it to the input size entered [clipped to the second argument, if necessary]. But, it does not pad/fill the remainder of the buffer with anything (e.g. spaces on the right). What it does do is add an EOS [end-of-string] character [which is a binary 0x00] after the last char read from input [which is usually the newline].
Thus, if the input string is: abcdef\n, the length [obtainable from strlen] is 7, input[7] will be 0x00, and input1[8] through input1[99] will have undefined/random/unpredictable values and not spaces.
Since a newline char isn't terribly useful, it is often stripped out before further processing. For example, it isn't terribly relevant when computing edit distance for a small phrase.
Does using strlen() only count the number of chars inside the array, or does it include all the blank spaces too?
As I mentioned above, fgets does not pad the string at the end, so, not to worry. It will do what you want/expect.
strlen only counts chars up to [but not including the EOS terminator character (i.e.) zero]. If some of these chars happen to be spaces, they will be counted by strlen--which is what we want.
Consider computing the edit distance between any two of the following phrases:
quick brown fox jumped over the lazy dogs
the quick brown fox jumped over lazy dogs
quick brown fox jumps over the lazy dog
In each case, we want strlen to include the [internal/embedded] spaces in the length calculation. That's because it is perfectly valid to compute the edit distance of phrases.
There is a valid usage for malloc: when the amount of data is too big to fit on the stack. Most systems have a default limit (e.g. under linux, it's 8 MB).
Suppose we were computing the edit distance for two book chapters [read from files], we'd have (e.g.):
char input1[50000];
char input2[50000];
The above would fit, but the c matrix would cause a stack overflow:
int c[50000][50000];
because the size of this would be 50000 * 50000 * 4 which is approx 9.3 GB.
So, to fit all this data, we'd need to allocate it on the heap. While it is possible to do a malloc for c and maintain the 2D matrix access, we'd have to create a function and pass off the pointer to c to it.
So, here's a modified version that takes input of arbitrarily large sizes:
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#define sysfault(_fmt...) \
do { \
fprintf(stderr,_fmt); \
exit(1); \
} while (0)
#define C(y,x) c[((y) * (len2 + 1)) + (x)]
long
min(long a, long b, long c)
{
if (a > b && a > c)
return a;
if (b > a && b > c)
return b;
return c;
}
char *
input(const char *prompt,long *lenp,const char *file)
{
FILE *fp;
char *lhs;
int chr;
long siz;
long len;
if (file != NULL)
fp = fopen(file,"r");
else {
fp = stdin;
printf("Enter %s string: ",prompt);
fflush(stdout);
}
lhs = NULL;
siz = 0;
len = 0;
while (1) {
chr = fgetc(fp);
if (chr == EOF)
break;
if ((chr == '\n') && (file == NULL))
break;
// grow the character array
if ((len + 1) >= siz) {
siz += 100;
lhs = realloc(lhs,siz);
if (lhs == NULL)
sysfault("input: realloc failure -- %s\n",strerror(errno));
}
lhs[len] = chr;
len += 1;
}
if (file != NULL)
fclose(fp);
if (lhs == NULL)
sysfault("input: premature EOF\n");
// add the EOS
lhs[len] = 0;
// return the length to the caller
*lenp = len;
return lhs;
}
int
main(int argc,char **argv)
{
long i;
long j;
char *input1;
long len1;
char *input2;
long len2;
long *c;
--argc;
++argv;
switch (argc) {
case 2:
input1 = input("first",&len1,argv[0]);
input2 = input("second",&len2,argv[1]);
break;
default:
input1 = input("first",&len1,NULL);
input2 = input("second",&len2,NULL);
break;
}
// make matrix
c = malloc(sizeof(*c) * (len1 + 1) * (len2 + 1));
if (c == NULL)
sysfault("main: malloc failure -- %s\n",strerror(errno));
// set up input 2 length
for (i = 0; i < len2 + 1; i++) {
C(0,i) = i;
}
// set up input 1 length
for (i = 0; i < len1 + 1; i++) {
C(i,0) = i;
}
// fill in the rest of the matrix
for (i = 1; i < len1; i++) {
for (j = 1; j < len2; j++) {
// if the 1st letters are equal make the diagonal equal to the last
if (input1[i] == input2[j])
C(i,j) = C(i - 1,j - 1);
else
C(i,j) = 1 + min(C(i - 1,j - 1), C(i - 1,j), C(i,j - 1));
}
}
// print the matrix
printf("\n");
for (j = 0; j < len2; j++) {
for (i = 0; i < len1; i++) {
printf("| %ld", C(i,j));
}
printf("\n");
}
return 1;
}
I tried to store strings in an array. But there is a mistake. My code is here:
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <malloc.h>
const long long max_size = 2000; // max length of strings
const long long N = 40; // number of closest words that will be shown
const long long max_w = 50; // max length of vocabulary entries
int main(int argc, char **argv) {
FILE *f;
char st1[max_size];
char kelimeler[max_size];
char *kelimelerim[max_size]; //string array initialization here
char *bestw[N];
char file_name[max_size], st[100][max_size];
float dist, len, bestd[N], vec[max_size];
long long words, size, a, b, c, d, cn, bi[100];
char ch;
float *M;
char *vocab;
strcpy(file_name, argv[1]);
f = fopen(file_name, "rb");
if (f == NULL) {
printf("Input file not found\n");
return -1;
}
fscanf(f, "%lld", &words);
fscanf(f, "%lld", &size);
vocab = (char *)malloc((long long)words * max_w * sizeof(char));
for (a = 0; a < N; a++) bestw[a] = (char *)malloc(max_size * sizeof(char));
M = (float *)malloc((long long)words * (long long)size * sizeof(float));
if (M == NULL) {
printf("Cannot allocate memory");
return -1;
}
for (b = 0; b < words; b++) {
a = 0;
int sayac=0;
while (1) {
sayac++;
vocab[b * max_w + a] = fgetc(f);
if (feof(f) || (vocab[b * max_w + a] == ' ')) {
strcpy(kelimeler,&vocab[b * max_w + a-sayac+2]); //gets the string here
kelimelerim[b] = kelimeler; //and store it into string array here
printf("%s %lld\n",kelimelerim[b],b);
sayac=0;
break;
}
if ((a < max_w) && (vocab[b * max_w + a] != '\n'))
a++;
}
vocab[b * max_w + a] = 0;
for (a = 0; a < size; a++)
fread(&M[a + b * size], sizeof(float), 1, f);
len = 0;
for (a = 0; a < size; a++)
len += M[a + b * size] * M[a + b * size];
len = sqrt(len);
for (a = 0; a < size; a++)
M[a + b * size] /= len;
}
fclose(f);
int index;
for (index = 0; index < words; index ++){
printf("%s %d \n",kelimelerim[index ], index );
}
// here, the loop prints last string stored into array, for all indexes.
I deleted the unimportant rows. When I run the above code and print the kelimelerim array, the last string is printed for all indexes of the array. Where is my mistake? Could you help me, please.
You never initialize vocab, so the following has undefined behaviour:
vocab[b * max_w + a] = fgetc(f);
From that point on, all bets are off.
This
kelimelerim[b] = kelimeler;
does not copy any data, but only stores the address of kelimeler to kelimelerim[b]. If then looping over kelimelerim[b]'s elements, only references to kelimeler are found and as kelimeler gets re-used for each iteration, it contains the string read last, which then in turn is printed for each of element of kelimelerim[b].
Update:
To fix this either replace kelimelerim[b] by an array of "string" not just pointers to strings and do
strcpy(kelimelerim[b], kelimeler);
or dynamically create a real copy of kelimeler by doing:
kelimelerim[b] = strdup(kelimeler);
Be aware that for this latter case each call to strdup() allocates memory from the heap, which you shall free if not used anymore, by calling free() on each elelment of kelimelerim.
Also strdup() isn't Standard C but a POSIX extension. You might need to #define something to have it avaliable. See your implementation's documentaion on strdup() for details.
If strdup() is not available you might like to use this:
#include <stdlib.h> /* for malloc() */
#include <string.h> /* for strcpy() */
#include <errno.h> /* for errno */
char * strdup(const char * s)
{
char * p = NULL;
if (NULL == s)
{
errno = EINVAL;
}
else
{
p = malloc(strlen(s) + 1);
if (NULL == p)
{
errno = ENOMEM;
}
else
{
strcpy(p, s);
}
}
return p;
}
You are using char *vocab; as uninitialized pointer. This results in undefined behavior. You have to initialize this pointer before using it with a valid memory ( e.g. using malloc).