Pseudoinverse code results in C inaccurate compared to MATLAB results - c
I am trying to figure out why my pseudoinverse C code results differ from MATLAB results.
This is the code for pseudo-inverse: http://www.mymathlib.com/c_source/matrices/linearsystems/singular_value_decomposition.c
#include <string.h> // required for memcpy()
#include <float.h> // required for DBL_EPSILON
#include <math.h> // required for fabs(), sqrt();
#define MAX_ITERATION_COUNT 30 // Maximum number of iterations
// Internally Defined Routines
static void Householders_Reduction_to_Bidiagonal_Form(double* A, int nrows,
int ncols, double* U, double* V, double* diagonal, double* superdiagonal );
static int Givens_Reduction_to_Diagonal_Form( int nrows, int ncols,
double* U, double* V, double* diagonal, double* superdiagonal );
static void Sort_by_Decreasing_Singular_Values(int nrows, int ncols,
double* singular_value, double* U, double* V);
////////////////////////////////////////////////////////////////////////////////
// int Singular_Value_Decomposition(double* A, int nrows, int ncols, //
// double* U, double* singular_values, double* V, double* dummy_array) //
// //
// Description: //
// This routine decomposes an m x n matrix A, with m >= n, into a product //
// of the three matrices U, D, and V', i.e. A = UDV', where U is an m x n //
// matrix whose columns are orthogonal, D is a n x n diagonal matrix, and //
// V is an n x n orthogonal matrix. V' denotes the transpose of V. If //
// m < n, then the procedure may be used for the matrix A'. The singular //
// values of A are the diagonal elements of the diagonal matrix D and //
// correspond to the positive square roots of the eigenvalues of the //
// matrix A'A. //
//
int Singular_Value_Decomposition(double* A, int nrows, int ncols, double* U,
double* singular_values, double* V, double* dummy_array)
{
Householders_Reduction_to_Bidiagonal_Form( A, nrows, ncols, U, V,
singular_values, dummy_array);
if (Givens_Reduction_to_Diagonal_Form( nrows, ncols, U, V,
singular_values, dummy_array ) < 0) return -1;
Sort_by_Decreasing_Singular_Values(nrows, ncols, singular_values, U, V);
return 0;
}
////////////////////////////////////////////////////////////////////////////////
// static void Householders_Reduction_to_Bidiagonal_Form(double* A, int nrows,//
// int ncols, double* U, double* V, double* diagonal, double* superdiagonal )//
// //
// Description: //
// This routine decomposes an m x n matrix A, with m >= n, into a product //
// of the three matrices U, B, and V', i.e. A = UBV', where U is an m x n //
// matrix whose columns are orthogonal, B is a n x n bidiagonal matrix, //
// and V is an n x n orthogonal matrix. V' denotes the transpose of V. //
// If m < n, then the procedure may be used for the matrix A'. The //
// //
/
////////////////////////////////////////////////////////////////////////////////
// //
static void Householders_Reduction_to_Bidiagonal_Form(double* A, int nrows,
int ncols, double* U, double* V, double* diagonal, double* superdiagonal )
{
int i,j,k,ip1;
double s, s2, si, scale;
double dum;
double *pu, *pui, *pv, *pvi;
double half_norm_squared;
// Copy A to U
memcpy(U,A, sizeof(double) * nrows * ncols);
//
diagonal[0] = 0.0;
s = 0.0;
scale = 0.0;
for ( i = 0, pui = U, ip1 = 1; i < ncols; pui += ncols, i++, ip1++ ) {
superdiagonal[i] = scale * s;
//
// Perform Householder transform on columns.
//
// Calculate the normed squared of the i-th column vector starting at
// row i.
//
for (j = i, pu = pui, scale = 0.0; j < nrows; j++, pu += ncols)
scale += fabs( *(pu + i) );
if (scale > 0.0) {
for (j = i, pu = pui, s2 = 0.0; j < nrows; j++, pu += ncols) {
*(pu + i) /= scale;
s2 += *(pu + i) * *(pu + i);
}
//
//
// Chose sign of s which maximizes the norm
//
s = ( *(pui + i) < 0.0 ) ? sqrt(s2) : -sqrt(s2);
//
// Calculate -2/u'u
//
half_norm_squared = *(pui + i) * s - s2;
//
// Transform remaining columns by the Householder transform.
//
*(pui + i) -= s;
for (j = ip1; j < ncols; j++) {
for (k = i, si = 0.0, pu = pui; k < nrows; k++, pu += ncols)
si += *(pu + i) * *(pu + j);
si /= half_norm_squared;
for (k = i, pu = pui; k < nrows; k++, pu += ncols) {
*(pu + j) += si * *(pu + i);
}
}
}
for (j = i, pu = pui; j < nrows; j++, pu += ncols) *(pu + i) *= scale;
diagonal[i] = s * scale;
//
// Perform Householder transform on rows.
//
// Calculate the normed squared of the i-th row vector starting at
// column i.
//
s = 0.0;
scale = 0.0;
if (i >= nrows || i == (ncols - 1) ) continue;
for (j = ip1; j < ncols; j++) scale += fabs ( *(pui + j) );
if ( scale > 0.0 ) {
for (j = ip1, s2 = 0.0; j < ncols; j++) {
*(pui + j) /= scale;
s2 += *(pui + j) * *(pui + j);
}
s = ( *(pui + ip1) < 0.0 ) ? sqrt(s2) : -sqrt(s2);
//
// Calculate -2/u'u
//
half_norm_squared = *(pui + ip1) * s - s2;
//
// Transform the rows by the Householder transform.
//
*(pui + ip1) -= s;
for (k = ip1; k < ncols; k++)
superdiagonal[k] = *(pui + k) / half_norm_squared;
if ( i < (nrows - 1) ) {
for (j = ip1, pu = pui + ncols; j < nrows; j++, pu += ncols) {
for (k = ip1, si = 0.0; k < ncols; k++)
si += *(pui + k) * *(pu + k);
for (k = ip1; k < ncols; k++) {
*(pu + k) += si * superdiagonal[k];
}
}
}
for (k = ip1; k < ncols; k++) *(pui + k) *= scale;
}
}
// Update V
pui = U + ncols * (ncols - 2);
pvi = V + ncols * (ncols - 1);
*(pvi + ncols - 1) = 1.0;
s = superdiagonal[ncols - 1];
pvi -= ncols;
for (i = ncols - 2, ip1 = ncols - 1; i >= 0; i--, pui -= ncols,
pvi -= ncols, ip1-- ) {
if ( s != 0.0 ) {
pv = pvi + ncols;
for (j = ip1; j < ncols; j++, pv += ncols)
*(pv + i) = ( *(pui + j) / *(pui + ip1) ) / s;
for (j = ip1; j < ncols; j++) {
si = 0.0;
for (k = ip1, pv = pvi + ncols; k < ncols; k++, pv += ncols)
si += *(pui + k) * *(pv + j);
for (k = ip1, pv = pvi + ncols; k < ncols; k++, pv += ncols)
*(pv + j) += si * *(pv + i);
}
}
pv = pvi + ncols;
for ( j = ip1; j < ncols; j++, pv += ncols ) {
*(pvi + j) = 0.0;
*(pv + i) = 0.0;
}
*(pvi + i) = 1.0;
s = superdiagonal[i];
}
// Update U
pui = U + ncols * (ncols - 1);
for (i = ncols - 1, ip1 = ncols; i >= 0; ip1 = i, i--, pui -= ncols ) {
s = diagonal[i];
for ( j = ip1; j < ncols; j++) *(pui + j) = 0.0;
if ( s != 0.0 ) {
for (j = ip1; j < ncols; j++) {
si = 0.0;
pu = pui + ncols;
for (k = ip1; k < nrows; k++, pu += ncols)
si += *(pu + i) * *(pu + j);
si = (si / *(pui + i) ) / s;
for (k = i, pu = pui; k < nrows; k++, pu += ncols)
*(pu + j) += si * *(pu + i);
}
for (j = i, pu = pui; j < nrows; j++, pu += ncols){
*(pu + i) /= s;
}
}
else
for (j = i, pu = pui; j < nrows; j++, pu += ncols) *(pu + i) = 0.0;
*(pui + i) += 1.0;
}
}
////////////////////////////////////////////////////////////////////////////////
// static int Givens_Reduction_to_Diagonal_Form( int nrows, int ncols, //
// double* U, double* V, double* diagonal, double* superdiagonal ) //
// //
// Description: //
// This routine decomposes a bidiagonal matrix given by the arrays //
// diagonal and superdiagonal into a product of three matrices U1, D and //
// V1', the matrix U1 premultiplies U and is returned in U, the matrix //
// V1 premultiplies V and is returned in V. The matrix D is a diagonal //
// matrix and replaces the array diagonal. //
// //
// The method used to annihilate the offdiagonal elements is a variant //
// of the QR transformation. The method consists of applying Givens //
// rotations to the right and the left of the current matrix until //
// the new off-diagonal elements are chased out of the matrix. //
// //
// The process is an iterative process which due to roundoff errors may //
// not converge within a predefined number of iterations. (This should //
// be unusual.) //
// //
// Arguments: //
// int nrows //
// The number of rows of the matrix U. //
// int ncols //
// The number of columns of the matrix U. //
// double* U //
// On input, a pointer to a matrix already initialized to a matrix //
// with mutually orthogonal columns. On output, the matrix with //
// mutually orthogonal columns. //
// double* V //
// On input, a pointer to a square matrix with the same number of rows //
// and columns as the columns of the matrix U, i.e. V[ncols][ncols]. //
// The matrix V is assumed to be initialized to an orthogonal matrix. //
// On output, V is an orthogonal matrix. //
// double* diagonal //
// On input, a pointer to an array of dimension ncols which initially //
// contains the diagonal of the bidiagonal matrix. On output, the //
// it contains the diagonal of the diagonal matrix. //
// double* superdiagonal //
// On input, a pointer to an array of dimension ncols which initially //
// the first component is zero and the successive components form the //
// superdiagonal of the bidiagonal matrix. //
// //
// Return Values: //
// 0 Success //
// -1 Failure - The procedure failed to terminate within //
// MAX_ITERATION_COUNT iterations. //
// //
// Example: //
// #define M //
// #define N //
// double U[M][N]; //
// double V[N][N]; //
// double diagonal[N]; //
// double superdiagonal[N]; //
// int err; //
// //
// (your code to initialize the matrices U, V, diagonal, and ) //
// ( superdiagonal. - Note this routine is not accessible from outside) //
// ( i.e. it is declared static.) //
// //
// err = Givens_Reduction_to_Diagonal_Form( M,N,(double*)U,(double*)V, //
// diagonal, superdiagonal ); //
// if ( err < 0 ) printf("Failed to converge\n"); //
// else { ... } //
// ... //
////////////////////////////////////////////////////////////////////////////////
// //
static int Givens_Reduction_to_Diagonal_Form( int nrows, int ncols,
double* U, double* V, double* diagonal, double* superdiagonal )
{
double epsilon;
double c, s;
double f,g,h;
double x,y,z;
double *pu, *pv;
int i,j,k,m;
int rotation_test;
int iteration_count;
for (i = 0, x = 0.0; i < ncols; i++) {
y = fabs(diagonal[i]) + fabs(superdiagonal[i]);
if ( x < y ) x = y;
}
epsilon = x * DBL_EPSILON;
for (k = ncols - 1; k >= 0; k--) {
iteration_count = 0;
while(1) {
rotation_test = 1;
for (m = k; m >= 0; m--) {
if (fabs(superdiagonal[m]) <= epsilon) {rotation_test = 0; break;}
if (fabs(diagonal[m-1]) <= epsilon) break;
}
if (rotation_test) {
c = 0.0;
s = 1.0;
for (i = m; i <= k; i++) {
f = s * superdiagonal[i];
superdiagonal[i] *= c;
if (fabs(f) <= epsilon) break;
g = diagonal[i];
h = sqrt(f*f + g*g);
diagonal[i] = h;
c = g / h;
s = -f / h;
for (j = 0, pu = U; j < nrows; j++, pu += ncols) {
y = *(pu + m - 1);
z = *(pu + i);
*(pu + m - 1 ) = y * c + z * s;
*(pu + i) = -y * s + z * c;
}
}
}
z = diagonal[k];
if (m == k ) {
if ( z < 0.0 ) {
diagonal[k] = -z;
for ( j = 0, pv = V; j < ncols; j++, pv += ncols)
*(pv + k) = - *(pv + k);
}
break;
}
else {
if ( iteration_count >= MAX_ITERATION_COUNT ) return -1;
iteration_count++;
x = diagonal[m];
y = diagonal[k-1];
g = superdiagonal[k-1];
h = superdiagonal[k];
f = ( (y - z) * ( y + z ) + (g - h) * (g + h) )/(2.0 * h * y);
g = sqrt( f * f + 1.0 );
if ( f < 0.0 ) g = -g;
f = ( (x - z) * (x + z) + h * (y / (f + g) - h) ) / x;
// Next QR Transformtion
c = 1.0;
s = 1.0;
for (i = m + 1; i <= k; i++) {
g = superdiagonal[i];
y = diagonal[i];
h = s * g;
g *= c;
z = sqrt( f * f + h * h );
superdiagonal[i-1] = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = -x * s + g * c;
h = y * s;
y *= c;
for (j = 0, pv = V; j < ncols; j++, pv += ncols) {
x = *(pv + i - 1);
z = *(pv + i);
*(pv + i - 1) = x * c + z * s;
*(pv + i) = -x * s + z * c;
}
z = sqrt( f * f + h * h );
diagonal[i - 1] = z;
if (z != 0.0) {
c = f / z;
s = h / z;
}
f = c * g + s * y;
x = -s * g + c * y;
for (j = 0, pu = U; j < nrows; j++, pu += ncols) {
y = *(pu + i - 1);
z = *(pu + i);
*(pu + i - 1) = c * y + s * z;
*(pu + i) = -s * y + c * z;
}
}
superdiagonal[m] = 0.0;
superdiagonal[k] = f;
diagonal[k] = x;
}
}
}
return 0;
}
////////////////////////////////////////////////////////////////////////////////
// static void Sort_by_Decreasing_Singular_Values(int nrows, int ncols, //
// double* singular_values, double* U, double* V) //
// //
// Description: //
// This routine sorts the singular values from largest to smallest //
// singular value and interchanges the columns of U and the columns of V //
// whenever a swap is made. I.e. if the i-th singular value is swapped //
// with the j-th singular value, then the i-th and j-th columns of U are //
// interchanged and the i-th and j-th columns of V are interchanged. //
// //
// Arguments: //
// int nrows //
// The number of rows of the matrix U. //
// int ncols //
// The number of columns of the matrix U. //
// double* singular_values //
// On input, a pointer to the array of singular values. On output, the//
// sorted array of singular values. //
// double* U //
// On input, a pointer to a matrix already initialized to a matrix //
// with mutually orthogonal columns. On output, the matrix with //
// mutually orthogonal possibly permuted columns. //
// double* V //
// On input, a pointer to a square matrix with the same number of rows //
// and columns as the columns of the matrix U, i.e. V[ncols][ncols]. //
// The matrix V is assumed to be initialized to an orthogonal matrix. //
// On output, V is an orthogonal matrix with possibly permuted columns.//
// //
// Return Values: //
// The function is of type void. //
// //
// Example: //
// #define M //
// #define N //
// double U[M][N]; //
// double V[N][N]; //
// double diagonal[N]; //
// //
// (your code to initialize the matrices U, V, and diagonal. ) //
// ( - Note this routine is not accessible from outside) //
// ( i.e. it is declared static.) //
// //
// Sort_by_Decreasing_Singular_Values(nrows, ncols, singular_values, //
// (double*) U, (double*) V); //
// ... //
////////////////////////////////////////////////////////////////////////////////
// //
static void Sort_by_Decreasing_Singular_Values(int nrows, int ncols,
double* singular_values, double* U, double* V)
{
int i,j,max_index;
double temp;
double *p1, *p2;
for (i = 0; i < ncols - 1; i++) {
max_index = i;
for (j = i + 1; j < ncols; j++)
if (singular_values[j] > singular_values[max_index] )
max_index = j;
if (max_index == i) continue;
temp = singular_values[i];
singular_values[i] = singular_values[max_index];
singular_values[max_index] = temp;
p1 = U + max_index;
p2 = U + i;
for (j = 0; j < nrows; j++, p1 += ncols, p2 += ncols) {
temp = *p1;
*p1 = *p2;
*p2 = temp;
}
p1 = V + max_index;
p2 = V + i;
for (j = 0; j < ncols; j++, p1 += ncols, p2 += ncols) {
temp = *p1;
*p1 = *p2;
*p2 = temp;
}
}
}
////////////////////////////////////////////////////////////////////////////////
// void Singular_Value_Decomposition_Inverse(double* U, double* D, double* V,//
// double tolerance, int nrows, int ncols, double *Astar) //
// //
// Description: //
// This routine calculates the pseudo-inverse of the matrix A = UDV'. //
// where U, D, V constitute the singular value decomposition of A. //
// Let Astar be the pseudo-inverse then Astar = V(1/D)U', where 1/D is //
// the pseudo-inverse of D, i.e. if D[i] > 0 then (1/D)[i] = 1/D[i] and //
// if D[i] = 0, then (1/D)[i] = 0. Because the singular values are //
// subject to round-off error. A tolerance is given so that if //
// D[i] < tolerance, D[i] is treated as if it were 0. //
// The default tolerance is D[0] * DBL_EPSILON * ncols, assuming that the //
// diagonal matrix of singular values is sorted from largest to smallest, //
// if the user specified tolerance is less than the default tolerance, //
// then the default tolerance is used. //
// //
// Arguments: //
// double* U //
// A matrix with mutually orthonormal columns. //
// double* D //
// A diagonal matrix with decreasing non-negative diagonal elements. //
// i.e. D[i] > D[j] if i < j and D[i] >= 0 for all i. //
// double* V //
// An orthogonal matrix. //
// double tolerance //
// An lower bound for non-zero singular values (provided tolerance > //
// ncols * DBL_EPSILON * D[0]). //
// int nrows //
// The number of rows of the matrix U and B. //
// int ncols //
// The number of columns of the matrix U. Also the number of rows and //
// columns of the matrices D and V. //
// double* Astar //
// On input, a pointer to the first element of an ncols x nrows matrix.//
// On output, the pseudo-inverse of UDV'. //
// //
// Return Values: //
// The function is of type void. //
// //
// Example: //
// #define M //
// #define N //
// double U[M][N]; //
// double V[N][N]; //
// double D[N]; //
// double Astar[N][M]; //
// double tolerance; //
// //
// (your code to initialize the matrices U,D,V) //
// //
// Singular_Value_Decomposition_Inverse((double*) U, D, (double*) V, //
// tolerance, M, N, (double*) Astar); //
// //
// printf(" The pseudo-inverse of A = UDV' is \n"); //
// ... //
////////////////////////////////////////////////////////////////////////////////
// //
void Singular_Value_Decomposition_Inverse(double* U, double* D, double* V,
double tolerance, int nrows, int ncols, double *Astar)
{
int i,j,k;
double *pu, *pv, *pa;
double dum;
dum = DBL_EPSILON * D[0] * (double) ncols;
if (tolerance < dum) tolerance = dum;
for ( i = 0, pv = V, pa = Astar; i < ncols; i++, pv += ncols)
for ( j = 0, pu = U; j < nrows; j++, pa++)
for (k = 0, *pa = 0.0; k < ncols; k++, pu++)
if (D[k] > tolerance) *pa += *(pv + k) * *pu / D[k];
}
I have set my tolerance to 1e-16.
Input Matrix:
MatA[4][4] = {
{1e-15,2e-15,3e-15,4e-15},
{5e-15,10e-15,7e-15,8e-15},
{9e-15, 18e-15, 11e-15,12e-15},
{13e-15,26e-15,15e-15,16e-15}
};
C code results:
-7.3177e+13 -3.6957e+13 -7.3773e+11 3.5482e+13
-1.4635e+14 -7.3915e+13 -1.4755e+12 7.0964e+13
1.0264e+14 5.7015e+13 1.1387e+13 -3.4240e+13
1.9055e+14 1.0400e+14 1.7450e+13 -6.9101e+13
Matlab results:
1.0e+14 *
-0.7348 -0.3712 -0.0076 0.3561
-1.4697 -0.7424 -0.0152 0.7121
1.0227 0.5682 0.1136 -0.3409
1.9015 1.0379 0.1742 -0.6894
I am not sure where I am losing accuracy. The only place I can see accuracy to come into effect is DBL_Epsilon and Tolerance. I have also put the value for DBL_Epsilon as 4.94065645841247E-32. Not sure how I would get closer values to the matlab output.
It is likely your choice of tolerance. MATLAB sets the tolerance by default to max(size(A)) * eps(norm(A)) (according to the docs). For your matrix, this is 2.5244e-29.
If I compute A*pinv(A)*A - A I see
1.0e-28 *
-0.0039 -0.0079 0 0
-0.0237 -0.0473 -0.0158 0
-0.0473 -0.0947 -0.0316 0
-0.0789 -0.1578 -0.0316 -0.0316
If I compute the same but using your result instead of pinv(A), I see
1.0e-16 *
-0.0430 -0.0860 0.0582 0.1088
-0.1356 -0.2712 0.1862 0.3472
-0.2282 -0.4565 0.3143 0.5855
-0.3209 -0.6417 0.4423 0.8239
Looking at these magnitudes, this really points to the two choices for tolerance.
Related
Comparing two triangles in C (3, 3, 4 and 4, 3, 3 for example)
I need to find all possible triangles in a set of integers. I successfully got the result, but I have many duplicates, for example: A(3, 3, 4) == B(4, 3, 3) I don't need to find similar triangles, I need to know when they are equal. I tried to save my triangles as structs and wanted to compare them: struct triangle { int a; int b; // I stored all found triangles this way int c; }; int getNumberOfDuplicates(struct triangle savedTriangles[], int size) { int count = 0; for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { int a,b,c,x,y,z; if (i != j) { a = savedTriangles[i].a; b = savedTriangles[i].b; c = savedTriangles[i].c; x = savedTriangles[j].a; y = savedTriangles[j].b; z = savedTriangles[j].c; if (areTheSame) { // Here I don't know how to compare them count++; } } } } return count; Is there any mathematical way to compare them? Or any programming way?
The simplest solution would be to sort the three numbers. void swap(int *a, int *b) { int tmp = *a; *a = *b; *b = tmp; } void bubble_sort(int *a, int *b, int *c) { if (a > b) swap(&a, &b); if (b > c) swap(&b, &c); if (a > b) swap(&a, &b); } At which point it should be trivial to compare if they are equal.
You have to do two modifications to your code: Just compare a triangle to the followers only, you are now counting it twice, which will give you double the desired result. Sort the lengths in ascending order by using min and max, then compare them. So your code should look like this: for (int i = 0; i < size - 1; i++) { for (int j = i + 1; j < size; j++) { //Start from i + 1 not 0 int a, b, c, x, y, z, a1, b1, c1, x1, y1, z1; a = savedTriangles[i].a; b = savedTriangles[i].b; c = savedTriangles[i].c; x = savedTriangles[j].a; y = savedTriangles[j].b; z = savedTriangles[j].c; a1 = min(a, min(b, c)); c1 = max(a, max(b, c)); b1 = a + b + c - a1 - c1; x1 = min(x, min(y, z)); z1 = max(x, max(y, z)); y1 = x + y + z - x1 - z1; if (a1 == x1 && b1 == y1 && c1 == z1) count++; } } int min (int a, int b) { return a < b ? a : b; } int max (int a, int b) { return a > b ? a : b; } There is a mathematical way by getting their sum, product and sum of their squares without the need to sort them, any triplet will give a unique set of results regardless of their order according to this, so the alternative code should look like this: for (int i = 0; i < size - 1; i++) { for (int j = i + 1; j < size; j++) { int a, b, c, x, y, z, s1, s2, ss1, ss2, p1, p2; a = savedTriangles[i].a; b = savedTriangles[i].b; c = savedTriangles[i].c; x = savedTriangles[j].a; y = savedTriangles[j].b; z = savedTriangles[j].c; s1 = a + b + c; ss1 = a * a + b * b + c * c; p1 = a * b * c; s2 = x + y + c; ss2 = x * x + y * y + z * z; p2 = x * y * z; if (s1 == s2 && ss1 == ss2 && p1 == p2) count++; } }
Assuming the list is complete, every triangle will be repeated for every permutation of its sides a, b, c. Therefore each triangle is a duplicate of one with a <= b <= c, so it is enough to only check those, and automatically count any triangle with b < a or c < b as a duplicate. int getNumberOfDuplicates(struct triangle savedTriangles[], int size){ int count = 0; for (int i = 0; i < size; i++){ int a = savedTriangles[i].a, b = savedTriangles[i].b, c = savedTriangles[i].c; if(b < a || c < b){ count++; continue; } for (int j = i + 1; j < size; j++){ int x = savedTriangles[j].a, y = savedTriangles[j].b, z = savedTriangles[j].c; if(a == x && b == y && c == z){ count++; break; } } } return count; }
Exception thrown, Dynamic memory allocation in C
Hi I'm trying to do dynamic memory allocation of a large matrix in C but I'm running into the following error: Exception thrown at 0x00007FF63A248571 in cdempd.exe: 0xC0000005: Access violation writing location 0x0000000000000000. occurred sometimes it's Access violation writing location 0xFFFFFFFFB412E2A0. double ndivx, ndivy, ndivz, nt, r, box, dx, totnode; int main() { ndivx = 19.0; ndivy = 19.0; ndivz = 19.0; int totnode = ndivx * ndivy * ndivz; r = 0.005; //diameter of sphere dx = 0.0025 / ndivx; double dx = r / ndivx; // distance between points int cols = 3; int** coords; coords = malloc(totnode * sizeof(int*)); for (int i = 0; i < totnode; i++) { coords[i] = malloc(cols * sizeof(int)); } //int* coord = (int*)malloc(totnode * cols * sizeof(int)); // int offset = i * cols + j; // now mat[offset] corresponds to m(i, j) //create a cube of equidistant points int numm = 0; for (int i = 1; i <= ndivx; i++) { for (int j = 1; j <= ndivy; j++) { for (int k = 1; k <= ndivz; k++) { coords[numm][0] = -1.0 / 2.0 * (r)+(dx / 2.0) + (i - 1.0) * dx; coords[numm][1] = -1.0 / 2.0 * (r)+(dx / 2.0) + (j - 1.0) * dx; coords[numm][2] = -1.0 / 2.0 * (r)+(dx / 2.0) + (k - 1.0) * dx; numm = numm + 1; } } } } pd.r is a double 0.005, dx is a double about 0.00026315, totnode is 6859. I've tried two methods, the one that is there and the one commented out with //. Both give me the same error. I'm using visual studio 2019. I'm not so familiar with c and visual studio so forgive me if the question is silly. Any help would be appreciated thank you.
Aside from some of the other errors [after correction], all values of coords are set to zero. This is because coords is a pointer to int and not (e.g.) double and your equation uses -1.0 / ... which will always produce a fraction. Also, as David pointed out, you're indexing from 1 [vs. 0] in the for loops. This could cause access violations/segfaults. I've changed the for loops to start from 0. And, I've adjusted the equation accordingly (using a macro). You were defining some things like index variables or size variables as double instead of int (e.g.) ndivx Also, I introduced a typedef for the coordinate values. Here's some cleaned up code that may help get you further: #include <stdio.h> #include <stdlib.h> #if 0 double ndivx, ndivy, ndivz, nt, r, box, dx, totnode; #endif #if 0 typedef int coord_t; #else typedef double coord_t; #endif #define SETCOORD(_xidx,_var) \ do { \ coords[numm][_xidx] = -1.0 / 2.0 * r + (dx / 2.0) + (_var * dx); \ printf("coords[%d][%d]=%g\n",numm,_xidx,(double) coords[numm][_xidx]); \ } while (0) int main(void) { #if 1 int ndivx; int ndivy; int ndivz; double r; double dx; #endif ndivx = 19; ndivy = 19; ndivz = 19; int totnode = ndivx * ndivy * ndivz; r = 0.005; // diameter of sphere dx = 0.0025 / ndivx; #if 0 double dx = r / ndivx; // distance between points #else dx = r / ndivx; // distance between points #endif int cols = 3; #if 0 int **coords; #else coord_t **coords; #endif coords = malloc(totnode * sizeof(coord_t *)); for (int i = 0; i < totnode; i++) { coords[i] = malloc(cols * sizeof(coord_t)); } // int* coord = (int*)malloc(totnode * cols * sizeof(int)); // int offset = i * cols + j; // now mat[offset] corresponds to m(i, j) // create a cube of equidistant points int numm = 0; for (int i = 0; i < ndivx; i++) { for (int j = 0; j < ndivy; j++) { for (int k = 0; k < ndivz; k++) { SETCOORD(0,i); SETCOORD(1,j); SETCOORD(2,k); numm = numm + 1; } } } return 0; }
C Program runs in repl it compiler but not on gcc (seg fault) Cant find it
Apologies for the horrible indentation. For whatever reason it wont paste with it. As in the title Im getting a segmentation error pointing to both main and ReadData funtion. However it doesnt say in which line. Ive tried multiple changes and it end sup in the same. edit: gdb gives me:at vfscanf.c:1898 1898 vfscanf.c: No such file or directory. As input Im using in.txt with: -1 1 5 14 3 1 -6 -2 -4 2 4 -4 2 4 -1 3 2 2 0 -2 -4 -2 -6 6 4 4 -2 4 2 -2 -4 6 C CODE: #include<stdio.h> #include<stdlib.h> #include<math.h> /* point structure to store x & y co-ordinates */ typedef struct { int x, y; } point; /* Prototype declarations*/ point * ReadData(point * center, int * radius, int * nPoints); point * FilterData(point * data, point center, int radius, int * nPoints); void Merge(point * data, int p, int q, int r); void MergeSort(point * data, int p, int r); void BinarySearch(point * data, int nPoints, point p); void SearchPhase(point * data, int nPoints); int main() { point * data, * filter; point center; FILE * fp = fopen("out.txt", "w"); // output file out.txt int radius, nPoints, i; data = ReadData( & center, & radius, & nPoints); //call of ReadData() filter = FilterData(data, center, radius, & nPoints); // call of FilterData() MergeSort(filter, 0, nPoints - 1); // call of MersgeSort() for (i = 0; i < nPoints; i++) fprintf(fp, "%d\t%d\n", filter[i].x, filter[i].y); // writing to output file fclose(fp); printf("Filtered and sorted data written to out.txt"); SearchPhase(filter, nPoints); // call of SearchPhase() return 0; } point * ReadData(point * center, int * radius, int * nPoints) { FILE * fp = fopen("in.txt", "r"); // input file in.txt point * ptr; int i, n; /* reading of center radius and no. of poins from file*/ fscanf(fp, "%d%d%d%d", & center - > x, & center - > y, radius, nPoints); n = * nPoints; ptr = (point * ) malloc(sizeof(point) * n); // dynamic memory allocation for (i = 0; i < n; i++) fscanf(fp, "%d%d", & ptr[i].x, & ptr[i].y); // reading of x and y from file fclose(fp); return ptr; } point * FilterData(point * data, point center, int radius, int * nPoints) { point * filter; int n = * nPoints, i, j, pos, x, y; double dist; int * a = (int * ) calloc(sizeof(int), n); // dynamic memory allocation for (i = 0, j = 0; i < n; i++) { x = data[i].x; y = data[i].y; dist = sqrt(pow(center.x - x, 2) + pow(center.y - y, 2)); // distance between center and point if (dist <= (double) radius) // if distance <= radius then point is within circle { a[j] = i; j++; } } * nPoints = j; filter = (point * ) malloc(sizeof(point) * j); // dynamic memory allocation for (i = 0; i < j; i++) { pos = a[i]; filter[i] = data[pos]; } return filter; } /* Merge()*/ void Merge(point * data, int p, int q, int r) { int n1 = q - p + 1; int n2 = r - q; point L[n1 + 1], R[n2 + 1]; int i, j, k; for (i = 0; i < n1; i++) L[i] = data[p + i]; for (j = 0; j < n2; j++) R[j] = data[q + 1 + j]; point sentinel; sentinel.x = 999; sentinel.y = 999; L[n1] = sentinel; R[n2] = sentinel; i = 0, j = 0; for (k = p; k <= r; k++) { if (L[i].x < R[j].x || (L[i].x == R[j].x && L[i].y < R[j].y)) { data[k] = L[i]; i++; } else { data[k] = R[j]; j++; } } } /* MergeSort()*/ void MergeSort(point * data, int p, int r) { int q, i; if (p < r) { q = (p + r) / 2; MergeSort(data, p, q); MergeSort(data, q + 1, r); Merge(data, p, q, r); } } /* BinarySearch()*/ void BinarySearch(point * data, int nPoints, point p) { int l = 0, u = nPoints - 1, m, flag = 0; while (l <= u) { m = (l + u) / 2; if (data[m].x == p.x && data[m].y == p.y) { flag = 1; break; } else if (p.x < data[m].x || (p.x == data[m].x && p.y < data[m].y)) u = m - 1; else l = m + 1; } if (flag) printf("\nOutput: Found at record %d", m + 1); else printf("\nOutput: Not Found"); } /*SearchPhase()*/ void SearchPhase(point * data, int nPoints) { point p; while (1) { printf("\nSearch input ( x y): "); scanf("%d%d", & p.x, & p.y); if (p.x == -999 || p.y == -999) { printf("\nOutput: Exit\n"); break; } BinarySearch(data, nPoints, p); } }
matrix determinant in c using Gauss elimination Core dumped error
I'm trying to make a simple console application in C which will calculate the determinant of a Matrix using the Gauss elimination. after a lot of tests I found out that my program is not working because of the core dumped error.After 2 days of editing and undoing, i could not find the problem. Any help is more than welcomed. #include <stdio.h> #include <stdlib.h> #include <string.h> int recherche_pivot(int k, int n, float *A) { int i, j; if (A[((k - 1) * n + k) - 1] != 0) { return k; } else { //parcours du reste de la colonne for (i = k + 1; i <= n; i++) { if (A[((k - 1) * n + i) - 1] != 0) { return i; } } return -1; } } void fois(int n, float p, int i, float * A, float *b, float * x) { int a; for (a = 1; a <= n; a++) { x[a - 1] = A[((i - 1) * n + a) - 1] * p; } x[n] = b[i - 1] * p; } void afficher_system(int n, float * X, float *b) { int i, j; for (i = 1; i <= n; i++) { for (j = 1; j <= n; j++) printf("%f ", X[((i - 1) * n + j) - 1]); printf(" | %f", b[i - 1]); printf("nn"); } printf("nnnn"); } void saisirmatrice(int n, float *A) { int i, j; for (i = 1; i <= n; i++) for (j = 1; j <= n; j++) scanf("%f", &A[((i - 1) * n + j) - 1]); } void affichermatrice(int n, float *A) { int i, j; for (i = 1; i <= n; i++) for (j = 1; j <= n; j++) printf("A[%d][%d] = %fn", i, j, A[((i - 1) * n + j) - 1]); } void elemination(int n, int k, float *b, float *A) { int i, l, j; float * L, piv; L = (float *) malloc((n) * sizeof(float)); for (i = k + 1; i <= n; i++) { piv = -1 * (A[((i - 1) * n + k) - 1] / A[((k - 1) * n + k) - 1]); fois(n, piv, k, A, b, L); //afficher_vecteur(n,L); for (j = 1; j <= n; j++) { A[((i - 1) * n + j) - 1] = A[((i - 1) * n + j) - 1] + L[j - 1]; } b[i - 1] = b[i - 1] + L[n]; afficher_system(n, A, b); } } void permutter(int n, float * A, int i, int j, float * b) { int a; float t[n + 1]; for (a = 1; a <= n; a++) { t[a - 1] = A[((i - 1) * n + a) - 1]; A[((i - 1) * n + a) - 1] = A[((j - 1) * n + a) - 1]; A[((j - 1) * n + a) - 1] = t[a - 1]; } t[n] = b[i - 1]; b[i - 1] = b[j - 1]; b[j - 1] = t[n]; } void main() { float * A, det, *L, *R, *b, s; int i, j, i0, n, k, stop = 0; printf("Veuillez donner la taille de la matrice"); scanf("%d", &n); A = (float *) malloc(sizeof(float) * (n * n)); L = (float*) malloc(n * sizeof(float)); R = (float*) malloc(n * sizeof(float)); b = (float*) malloc(n * sizeof(float)); printf("Veuillez remplir la matrice"); saisirmatrice(n, A); det = 1; stop = 0; k = 1; do { do { i0 = recherche_pivot(k, n, A); if (i0 == k) { //Elémination elemination(n, k, b, A); k++; } else if (i0 == -1) { stop = 1; } else { //cas ou ligne pivot=i0 != k //permutation det = -det; permutter(n, A, k, i0, b); //elemination elemination(n, k, b, A); //afficher_matrice(n,A); k++; } } while ((k <= n) && (stop == 0)); } while (stop == 1 || k == n); for (i = 1; i < n; i++) { det = det * A[((i - 1) * n + i) - 1]; } printf("Le determinant est :%f", det); free(A); free(L); free(R); free(b); }
There are many problems in the above code. Since arrays are zero-indexed in C, you should count the rows and columns of your matrices starting from zero, instead of counting from 1 and then attempting to convert when array-indexing. There is no need to cast the result of malloc(), and it is better to use an identifier rather than an explicit type as the argument for the sizeof operator: A = malloc(sizeof(*A) * n * n)); You allocate space for L and R in main(), and then never use these pointers until the end of the program when they are freed. Then you allocate for L within the elemination() function; but you never free this memory, so you have a memory leak. You also allocate space for b in main(), but you don't store any values in b before passing it to the elemination() function. This is bound to cause problems. There is no need for dynamic allocation here in the first place; I suggest using a variable length array to store the elements of the matrix. These have been available since C99, and will allow you to avoid all of the allocation issues. There is a problem in the recherche_pivot() function, where you compare: if(A[((k - 1) * n + i) - 1] != 0) {} This is a problem because the array element is a floating point value which is the result of arithmetic operations; this value should not be directly compared with 0. I suggest selecting an appropriate DELTA value to represent a zero range, and instead comparing: #define DELTA 0.000001 ... if (fabs(A[((k - 1) * n + i) - 1]) < DELTA) {} In the permutter() function you use an array, float t[n];, to hold temporary values. But an array is unnecessary here since you don't need to save these temporary values after the swap; instead just use float t;. Further, when you interchange the values in b[], you use t[n] to store the temporary value, but this is out of bounds. The elemination() function should probably iterate over all of the rows (excepting the kth row), rather that starting from the kth row, or it should start at the k+1th row. As it is, the kth row is used to eliminate itself. Finally, the actual algorithm that you use to perform the Gaussian elimination in main() is broken. Among other things, the call permutter(n, A, k, i0, b); swaps the kth row with the i0th row, but i0 is the pivot column of the kth row. This makes no sense. It actually looks like you want to do more than just calculate determinants with this code, since you have b, which is the constant vector of a linear system. This is not needed for the task alluded to in the title of your question. Also, it appears that your code gives a result of 1 for any 1X1 determinant. This is incorrect; it should be the value of the single number in this case. The Gaussian elimination method for calculating the determinant requires that you keep track of how many row-interchanges are performed, and that you keep a running product of any factors by which individual rows are multiplied. Adding a multiple of one row to another row to replace that row does not change the value of the determinant, and this is the operation used in the reduce() function below. The final result is the product of the diagonal entries in the reduced matrix, multiplied by -1 once for every row-interchange operation, divided by the product of all of the factors used to scale individual rows. In this case, there are no such factors, so the result is simply the product of the diagonal elements of the reduced matrix, with the sign correction. This is the method used by the code posted in the original question. There were so many issues here that I just wrote a fresh program that implements this algorithm. I think that it is close, at least in spirit, to what you were trying to accomplish. I did add some input validation for the size of the matrix, checking to be sure that the user inputs a positive number, and prompting for re-entry if the input is bad. The input loop that fills the matrix would benefit from similar input validation. Also note that the input size is stored in a signed int, to allow checks for negative input, and a successful input is cast and stored in a variable of type size_t, which is an unsigned integer type guaranteed to hold any array index. This is the correct type to use when indexing arrays, and you will note that size_t is used throughout the program. #include <stdio.h> #include <math.h> #include <stdbool.h> #define DELTA 0.000001 void show_matrix(size_t mx_sz, double mx[mx_sz][mx_sz]); void interchange(size_t r1, size_t r2, size_t mx_sz, double mx[mx_sz][mx_sz]); void reduce(double factor, size_t r1, size_t r2, size_t mx_sz, double mx[mx_sz][mx_sz]); size_t get_pivot(size_t row, size_t mx_sz, double mx[mx_sz][mx_sz]); double find_det(size_t mx_sz, double mx[mx_sz][mx_sz]); int main(void) { size_t n; int read_val, c; printf("Enter size of matrix: "); while (scanf("%d", &read_val) != 1 || read_val < 1) { while ((c = getchar()) != '\n' && c != EOF) { continue; // discard extra characters } printf("Enter size of matrix: "); } n = (size_t) read_val; double matrix[n][n]; printf("Enter matrix elements:\n"); for (size_t i = 0; i < n; i++) { for (size_t j = 0; j < n; j++) { scanf("%lf", &matrix[i][j]); } } printf("You entered:\n"); show_matrix(n, matrix); putchar('\n'); double result = find_det(n, matrix); show_matrix(n, matrix); putchar('\n'); printf("Determinant: %f\n", result); return 0; } void show_matrix(size_t n, double mx[n][n]) { for (size_t i = 0; i < n; i++) { for (size_t j = 0; j < n; j++) { printf("%7.2f", mx[i][j]); } putchar('\n'); } } /* interchange rows r1 and r2 */ void interchange(size_t r1, size_t r2, size_t mx_sz, double mx[mx_sz][mx_sz]) { double temp; for (size_t j = 0; j < mx_sz; j++) { temp = mx[r1][j]; mx[r1][j] = mx[r2][j]; mx[r2][j] = temp; } } /* add factor * row r1 to row r2 to replace row r2 */ void reduce(double factor, size_t r1, size_t r2, size_t mx_sz, double mx[mx_sz][mx_sz]) { for (size_t j = 0; j < mx_sz; j++) { mx[r2][j] += (factor * mx[r1][j]); } } /* returns pivot column, or mx_sz if there is no pivot */ size_t get_pivot(size_t row, size_t mx_sz, double mx[mx_sz][mx_sz]) { size_t j = 0; while (j < mx_sz && fabs(mx[row][j]) < DELTA) { ++j; } return j; } double find_det(size_t mx_sz, double mx[mx_sz][mx_sz]) { size_t pivot1, pivot2; size_t row; double factor; bool finished = false; double result = 1.0; while (!finished) { finished = true; row = 1; while (row < mx_sz) { // determinant is zero if there is a zero row if ((pivot1 = get_pivot(row - 1, mx_sz, mx)) == mx_sz || (pivot2 = get_pivot(row, mx_sz, mx)) == mx_sz) { return 0.0; } if (pivot1 == pivot2) { factor = -mx[row][pivot1] / mx[row - 1][pivot1]; reduce(factor, row - 1, row, mx_sz, mx); finished = false; } else if (pivot2 < pivot1) { interchange(row - 1, row, mx_sz, mx); result = -result; finished = false; } ++row; } } for (size_t j = 0; j < mx_sz; j++) { result *= mx[j][j]; } return result; } Sample session: Enter size of matrix: oops Enter size of matrix: 0 Enter size of matrix: -1 Enter size of matrix: 3 Enter matrix elements: 0 1 3 1 2 0 0 3 4 You entered: 0.00 1.00 3.00 1.00 2.00 0.00 0.00 3.00 4.00 1.00 2.00 0.00 -0.00 -3.00 -9.00 0.00 0.00 -5.00 Determinant: 5.000000
How can I improve locality of reads and writes in the following code?
I'm working on the following image convolution code: typedef struct fmatrix{ int rows; int cols; float** array; } fmatrix; typedef struct image{ unsigned char* data; int w; int h; int c; } image; typedef struct kernel{ fmatrix* psf; int divisor; } kernel; void convolve_sq(image* src, image* dst, kernel* psf, int pixel){ int size = psf->psf->rows * psf->psf->cols; float tmp[size]; int n, m; //for psf int x, y, x0, y0, cur; //for image y0 = pixel / (src->w * src->c); x0 = (pixel / src->c) % src->w; for (n = 0; n < psf->psf->rows; ++n){ for (m = 0; m < psf->psf->cols; ++m){ y = n - (psf->psf->rows / 2); x = m - (psf->psf->cols / 2); if ((y + y0) < 0 || (y + y0) >= src->h || (x + x0) < 0 || (x + x0) >= src->w){ tmp[n*psf->psf->rows+m] = 255 * psf->psf->array[n][m]; } else{ cur = (pixel + y * src->w * src->c + x * src->c); tmp[n*psf->psf->rows+m] = src->data[cur] * psf->psf->array[n][m]; //misses on read } } } m = 0; for (n = 0; n < size; ++n){ m += (int) tmp[n]; } m /= psf->divisor; if (m < 0) m = 0; if (m > 255) m = 255; dst->data[pixel] = m; //misses on write } void convolve_image(image* src, image* dst, kernel* psf){ int i, j, k; for (i = 0; i < src->h; ++i){ for (j = 0; j < src->w; ++j){ for (k = 0; k < src->c; ++k){ convolve_sq(src, dst, psf, (i * src->w * src->c + j * src->c + k) ); } } } } Running cachegrind, I've determined two places where there are a substantial number of cache misses, which I've annotated in the code above. For the line marked "misses on read", there were 97,205 D1mr and 97,201 DLmr. For the line marked "misses on write", there were 97,201 D1mw and DLmw. These lines read and write directly to/from the image respectively. How can I make this code more efficient, in terms of avoiding cache misses?