lapack dgels_ segmentation fault 11 - c

I am trying to use LAPACK's dgels_ in C to solve a linear least squares problem. I have to read the matrix A (assumed to have full rank and m>=n) and a vector b from 2 text files. I can easily compile my code, but when i try to run it I get a "segmentation fault 11", but I can't really see why. It is my first time using LAPACK so I don't know if maybe I am using the dgels_ function wrong?? The way I get it the solution x will be overwritten in the vector b? :
lssolve.c:
#include <stdlib.h>
#include <stdio.h>
#include "linalg.h"
/* C prototype for LAPACK routine DGELS */
void dgels_(const char * trans, const int * m, const int * n, const int *
nrhs, double * A, const int * lda, double * B, const int * ldb, double * work,
int * lwork,int * info);
int main(int argc, char * argv[]) {
vector_t * b_t = NULL;
matrix_t * A_t = NULL;
char trans = 'N';
int m, n, nrhs, mb, lda, ldb, info, lwork;
double optwork;
double * work;
// we read the matrix A and the vector b:
b_t = read_vector("b.txt");
A_t = read_matrix("A.txt");
m = A_t-> m; //number of rows in A
n = A_t-> n; //number of columns in A
nrhs = 1; //number of columns in B (will always be 1, since we read b_t with read_vector)
mb = b_t -> n; //number of rows in B
if (mb != m ) { //end program if A and B doesn't have the same number of rows
free(A_t);
free(b_t);
fprintf(stderr, "Sorry, but the matrix A and the vector b have incompatible dimensions. Good Bye!\n");
exit(EXIT_FAILURE);
}
//We make A and B into the wanted input form for the dgels_-function:
double * B = b_t -> v;
double ** A = A_t ->A;
lda = m;
ldb = mb;
//we calculate the optimal size of the work array:
lwork = -1;
dgels_(&trans, &n, &m, &nrhs, *A, &lda, B, &ldb, &optwork, &lwork, &info);
lwork = (int)optwork;
//we allocate space for the work array:
work = (double*)malloc( lwork*sizeof(double));
//solving the least squares problem:
dgels_(&trans, &n, &m, &nrhs, *A, &lda, B, &ldb, work, &lwork, &info);
//Check whether there was an successful exit:
if (info > 0){
fprintf(stderr, "Sorry, but illegal arguments were used, and therefore a least square solution cannot be computes. Good Bye!\n");
exit(EXIT_FAILURE);
} else if(info < 0){
fprintf(stderr, "Sorry, but A doesn't have full rank, and therefore a least square solution cannot be computed. Good Bye!\n");
exit(EXIT_FAILURE);
}
//Saving the least square problem as a vector_t:
vector_t * x = NULL;
x->n = mb;
x->v = B;
print_vector(x);
//Free memory
free_vector(b_t);
free_matrix(A_t);
free_vector(x);
return(EXIT_SUCCESS);
}
I am using the functions read_matrix, read_vector, print_vector, print_matrix and free_vector, which is why I use the struct vector_t and matrix_t:
typedef struct vector {
unsigned long n; /* length of vector */
double * v; /* pointer to array of length n */
} vector_t;
typedef struct matrix {
unsigned long m; /* number of rows */
unsigned long n; /* number of columns */
double ** A; /* pointer to two-dimensional array */
} matrix_t;
I don't think that anything is wrong with read_vector and read_matrix because I can easily do this and use print_vector or print_matrix before I do all of the other operations.

You dereference a NULL pointer here, causing the segfault:
//Saving the least square problem as a vector_t:
vector_t * x = NULL;
x->n = mb;
x->v = B;
Maybe you should use/create a new vector_t instead of just a pointer to a vector_t?

Related

C malloc giving pointer to already used memory

I have a struct in my code that gets its value changed even though I don't reference at all. The code is:
layer l = linear(2, 3);
neural_network network = create_network();
add_layer(&network, &l);
printf("Before: %llu\n", network.layers[0].weights.column_size);
matrix i = create_matrix(2, 1);
printf("After: %llu\n", network.layers[0].weights.column_size);
And the output to this code is:
Before: 2
After: 0
This doesn't make sense to me as create_matrix is defined as:
matrix create_matrix(uint64_t row_size, uint64_t column_size) {
matrix mat;
mat.row_size = row_size;
mat.column_size = column_size;
mat.array = malloc(sizeof(double) * mat.row_size * mat.column_size);
return mat;
}
I guessed that malloc was messing up for some reason so I printed the addresses of each one and got this:
Address of network.layers[0].weights.column_size: 0x600000b4c010
Before: 2
Address of mat.array: 0x600000b4c010
After: 0
So C is somehow allocating heap memory that should already be used. I'm not really sure why this is going on as I never freed any of the memory. The relevant structs are defined as:
typedef struct layer {
uint64_t neurons;
matrix weights;
matrix biases;
matrix (*compute_activations)(struct layer *l, matrix *activations);
} layer;
typedef struct {
uint16_t number_of_layers;
layer *layers;
} neural_network;
typedef struct {
uint64_t row_size;
uint64_t column_size;
double *array;
} matrix;
Minimum Reproductible Example:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cblas.h>
#include <string.h>
typedef struct {
uint64_t row_size;
uint64_t column_size;
double *array;
} matrix;
typedef struct layer{
uint64_t neurons;
matrix weights;
matrix biases;
matrix (*compute_activations)(struct layer *l, matrix *activations);
} layer;
typedef struct {
uint16_t number_of_layers;
layer *layers;
} neural_network;
matrix create_matrix(uint64_t row_size, uint64_t column_size) {
matrix mat;
mat.row_size = row_size;
mat.column_size = column_size;
mat.array = malloc(sizeof(double) * mat.row_size * mat.column_size);
return mat;
}
matrix matrix_m_multiply(matrix *A, matrix *B, matrix *C, double alpha, double beta) {
matrix C_copy = create_matrix(C->row_size, C->column_size);
memcpy(C_copy.array, C->array, C->row_size * C->column_size * sizeof(double));
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
A->row_size, B->column_size, A->column_size, alpha,
A->array, A->column_size, B->array, B->column_size, beta,
C_copy.array, C->column_size);
return C_copy;
}
matrix matrix_v_multiply(matrix *A, matrix *B, matrix *C, double alpha, double beta) {
matrix C_copy = create_matrix(C->row_size, C->column_size);
memcpy(C_copy.array, C->array, C->row_size * C->column_size * sizeof(double));
cblas_dgemv(CblasRowMajor, CblasNoTrans,
A->row_size, A->column_size, alpha,
A->array, A->column_size, B->array, 1, beta,
C_copy.array, 1);
return C_copy;
}
neural_network create_network() {
neural_network network = { .number_of_layers = 0 };
network.layers = malloc(sizeof(layer) * network.number_of_layers);
return network;
}
void add_layer(neural_network *network, layer *l) {
network->layers = realloc(network->layers, ++network->number_of_layers);
network->layers[network->number_of_layers - 1] = *l;
}
matrix forward_pass(neural_network *network, matrix *inputs) {
matrix activations = *inputs;
for (int i = 0; i < network->number_of_layers; i++) {
activations = network->layers[i].compute_activations(&network->layers[i], &activations);
}
return activations;
}
matrix linear_function(layer *linear_layer, matrix *activations) {
return matrix_v_multiply(&linear_layer->weights, activations, &linear_layer->biases, 1.0, 1.0);
}
layer linear(uint64_t in, uint64_t out) {
layer linear_layer;
linear_layer.neurons = out;
linear_layer.weights = create_matrix(out, in); // Don't have to transpose matrix when doing vector product with inputs
linear_layer.biases = create_matrix(out, 1);
linear_layer.compute_activations = linear_function;
return linear_layer;
}
int main(int argc, char *argv[]) {
srand(0);
layer l = linear(2, 3);
neural_network network = create_network();
add_layer(&network, &l);
printf("Before: %llu\n", network.layers[0].weights.column_size);
matrix i = create_matrix(2, 1);
printf("After: %llu\n", network.layers[0].weights.column_size);
matrix output = forward_pass(&network, &i);
return 0;
}
There are at least two problems in your code:
in function create_network(), you allocate 0 bytes with malloc(), which has implementation defined behavior. You might instead initialize the layers member to a null pointer:
neural_network create_network(void) {
neural_network network = { .number_of_layers = 0, .layers = NULL };
return network;
}
the size reallocated by add_layer() is incorrect: you forgot to multiply the new number of elements by the element size. This problem is the likely explanation for your observations. You should write:
void add_layer(neural_network *network, layer *l) {
network->layers = realloc(network->layers,
sizeof(*network->layers) *
(network->number_of_layers + 1));
network->layers[network->number_of_layers++] = *l;
}
you do not check for memory allocation failure anywhere in your code. I would recommend using malloc, calloc, strdup and realloc wrappers to test for unlikely yet possible allocation failure and exit with an informative message.

Matrix multiplication with MKL

I have the CSR coordinates of a matrix.
/* alloc space for COO matrix */
int *coo_rows = (int*) malloc(K.n_rows * sizeof(int));
int *coo_cols = (int*) malloc(K.n_rows * sizeof(int));
float *coo_vals = (float*) malloc(K.n_rows * sizeof(float));
/*Load coo values*/
int *rowptrs = (int*) malloc((N_unique+1)*sizeof(int));
int *colinds = (int*) malloc(K.n_rows *sizeof(int));
double *vals = (double*) malloc(K.n_rows *sizeof(double));
/* take csr values */
int job[] = {
2, // job(1)=2 (coo->csr with sorting)
0, // job(2)=1 (one-based indexing for csr matrix)
0, // job(3)=1 (one-based indexing for coo matrix)
0, // empty
n1, // job(5)=nnz (sets nnz for csr matrix)
0 // job(6)=0 (all output arrays filled)
};
int info;
mkl_scsrcoo(job, &n, vals, colinds, rowptrs, &n1, coo_vals, coo_rows, coo_cols, &info);
assert(info == 0 && "Converted COO->CSR");
Now I want to apply the mkl_dcsrmm function to compute C := alpha*A*B + beta*C with beta = 0;
/* function declaration */
void mkl_dcsrmm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);
Since now I have.
int A_rows = ..., A_cols = ..., C_cols = ...
double alpha = 1.0;
mkl_dcsrmm ((char*)"N", &A_rows, &C_cols, &A_cols, &alpha, char *matdescra, vals, coo_cols, rowptrs, colinds , double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);
I found some difficulties on filling the inputs. Could you please help me to fill the rest of the inputs?
A specific input for which I want to go in more details is the matdescra. I borrowed the following code from cspblas_ccsr example
char matdescra[6];
matdescra[0] = 'g';
matdescra[1] = 'l';
matdescra[2] = 'n';
matdescra[3] = 'c';
but I have some questions about that. The matrix A I am working is not triangular and the initialization of this char array engage you to make such a declaration, how should I configure the parameters of the matdescra array?
Here is what I use, and what works for me.
char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'};
/* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6
G: General. D: Diagonal
L/U Lower/Upper triangular (ignored with G)
N: non-unit diagonal (ignored with G)
C: zero-based indexing.
*/
Complete Example
Here is a complete example. I first create a random matrix by filling a dense matrix with a specified density of Non-Zero elements. Then I convert it to a sparse matrix in CSR-format. Finally, I do the multiplication using mkl_dcsrmm. As a possible check (check not done), I do the same multiplication using the cblas_dgemm function with the dense matrix.
#include "mkl.h"
#include "mkl_spblas.h"
#include <stddef.h> // For NULL
#include <stdlib.h> // for rand()
#include <assert.h>
#include <stdio.h>
#include <limits.h>
// Compute C = A * B; where A is sparse and B is dense.
int main() {
MKL_INT m=10, n=5, k=11;
const double sparsity = 0.9; ///< #param sparsity Values below which are set to zero (sampled from uniform(0,1)-distribution).
double *A_dense;
double *B;
double *C;
double alpha = 1.0;
double beta = 0.0;
const int allignment = 64;
// Seed the RNG to always be the same
srand(42);
// Allocate memory to matrices
A_dense = (double *)mkl_malloc( m*k*sizeof( double ), allignment);
B = (double *)mkl_malloc( k*n*sizeof( double ), allignment);
C = (double *)mkl_malloc( m*n*sizeof( double ), allignment);
if (A_dense == NULL || B == NULL || C == NULL) {
printf("ERROR: Can't allocate memory for matrices. Aborting... \n\n");
mkl_free(A_dense);
mkl_free(B);
mkl_free(C);
return 1;
}
// Initializing matrix data
int i;
int nzmax = 0;
for (i = 0; i < (m*k); i++) {
double val = rand() / (double)RAND_MAX;
if ( val < sparsity ) {
A_dense[i] = 0.0;
} else {
A_dense[i] = val;
nzmax++;
}
}
for (i = 0; i < (k*n); i++) {
B[i] = rand();
}
for (i = 0; i < (m*n); i++) {
C[i] = 0.0;
}
// Convert A to a sparse matrix in CSR format.
// INFO: https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8.htm#GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8
MKL_INT job[6];
job[0] = 0; // convert TO CSR.
job[1] = 0; // Zero-based indexing for input.
job[2] = 0; // Zero-based indexing for output.
job[3] = 2; // adns is a whole matrix A.
job[4] = nzmax; // Maximum number of non-zero elements allowed.
job[5] = 3; // all 3 arays are generated for output.
/* JOB: conversion parameters
* m: number of rows of A.
* k: number of columns of A.
* adns: (input/output). Array containing non-zero elements of the matrix A.
* lda: specifies the leading dimension of adns. must be at least max(1, m).
* acsr: (input/output) array containing non-zero elements of the matrix A.
* ja: array containing the column indices.
* ia length m+1, rowIndex.
* OUTPUT:
* info: 0 if successful. i if interrupted at i-th row because of lack of space.
*/
int info = -1;
printf("nzmax:\t %d\n", nzmax);
double *A_sparse = mkl_malloc(nzmax * sizeof(double), allignment);
if (A_sparse == NULL) {
printf("ERROR: Could not allocate enough space to A_sparse.\n");
return 1;
}
MKL_INT *A_sparse_cols = mkl_malloc(nzmax * sizeof(MKL_INT), allignment);
if (A_sparse_cols == NULL) {
printf("ERROR: Could not allocate enough space to A_sparse_cols.\n");
return 1;
}
MKL_INT *A_sparse_rowInd = mkl_malloc((m+1) * sizeof(MKL_INT), allignment);
if (A_sparse_rowInd == NULL) {
printf("ERROR: Could not allocate enough space to A_sparse_rowInd.\n");
return 1;
}
mkl_ddnscsr(job, &m, &k, A_dense, &k, A_sparse, A_sparse_cols, A_sparse_rowInd, &info);
if(info != 0) {
printf("WARNING: info=%d, expected 0.\n", info);
}
assert(info == 0);
char transa = 'n';
MKL_INT ldb = n, ldc=n;
char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'};
/* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6
G: General. D: Diagonal
L/U Lower/Upper triangular (ignored with G)
N: non-unit diagonal (ignored with G)
C: zero-based indexing.
*/
mkl_dcsrmm(&transa, &m, &n, &m, &alpha, matdescra, A_sparse, A_sparse_cols,
A_sparse_rowInd, &(A_sparse_rowInd[1]), B, &ldb, &beta, C, &ldc);
// The same computation in dense format
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k, alpha, A_dense, k, B, n, beta, C, n);
mkl_free(A_dense);
mkl_free(A_sparse);
mkl_free(A_sparse_cols);
mkl_free(A_sparse_rowInd);
mkl_free(B);
mkl_free(C);
return 0;
}

In C, initializing an array using a variable led to stack overflow error or caused R to crash when code is called in R

Okay. My original question turned out to be caused by not initializing some arrays. The original issue had to do with code crashing R. When I was trying to debug it by commenting things out, I by mistake commented out the lines that initialized the arrays. So I thought my problem had to do with passing pointers.
The actual problem is this. As I said before, I want to use outer_pos to calculate outer differences and pass both the pointers of the results and the total number of positive differences back to a function that calls outer_pos
#include <R.h>
#include <Rmath.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
void outer_pos(double *x, double *y, int *n, double *output){
int i, j, l=0;
for(i=0; i<*n; i++){
for(j=0; j<*n; j++){
if((x[j]-x[i])>0){
output[l+1]=(y[j]-y[i])/(x[j]-x[i]);
output[0]=(double)(++l);
}
}
}
Rprintf("%d\n", (int)output[0]);
}
void foo1(double *x, double *y, int *nsamp){
int i, j, k, oper=2, l;
double* v1v2=malloc(sizeof(double)*((*nsamp)*(*nsamp-1)/2 + 1));
outer_pos(x, y, nsamp, &v1v2[0]);
double v1v2b[1999000]; // <--------------HERE
for(i=1; i<= (int)v1v2[0]; i++){
v1v2b[i-1]=1;
}
}
Suppose foo1 is the function that calls outer_pos. Here I specified the size of the array v1v2b using an actual number 1999000. This value corresponds to the number of positive differences. Calling foo1 from R causes no problem. It's all fine.
In the scenario above, I know the number of positive differences, so I can use the actual value to set the array size. But I would like to accommodate situations where I don't necessarily know the value. foo2 below is intended to do that. As you can see, v1v2b is initialized using the first value of the array v1v2. Recall that the first slot of the output of outer_pos stores the number of positive differences. So basically I use this value to set v1v2's size. However, calling this function in R causes R to either show a stack overflow error or causes it to crash (see screen shot below)
void foo2(double *x, double *y, int *nsamp){
int i, j, k, oper=2, l;
double* v1v2=malloc(sizeof(double)*((*nsamp)*(*nsamp-1)/2 + 1));
outer_pos(x, y, nsamp, &v1v2[0]);
double v1v2b[(int)v1v2[0]]; //<--------HERE
for(i=1; i<= (int)v1v2[0]; i++){
v1v2b[i-1]=1;
}
}
So I thought, maybe it has to do with indexation. Maybe the actual size of v1v2b is too small, or something, so the loop iterates outside the bound. So I created foo2b in which I commented out the loop, and use Rprintf to print the first slot of v1v2 to see if the value stored in it is correct. But it seems that the value v1v2[0] is correct, namely 1999000. So I don't know what is happening here.
Sorry for the confusion with my previous question!!
void foo2b(double *x, double *y, int *nsamp){
int i, j, k, oper=2, l;
double* v1v2=malloc(sizeof(double)*((*nsamp)*(*nsamp-1)/2 + 1));
outer_pos(x, y, nsamp, &v1v2[0]);
double v1v2b[(int)v1v2[0]]; //<----Array size declared by a variable
Rprintf("%d", (int)v1v2[0]);
//for(i=1; i<= (int)v1v2[0]; i++){
//v1v2b[i-1]=v1v2[i];
//}
}
R code to run the code above:
x=rnorm(2000)
y=rnorm(2000)
.C("foo1", x=as.double(x), y=as.double(y), nsamp=as.integer(2000))
.C("foo2", x=as.double(x), y=as.double(y), nsamp=as.integer(2000))
.C("foo2b", x=as.double(x), y=as.double(y), nsamp=as.integer(2000))
** FOLLOW UP **
I modified my code based on Martin's suggestion to check if the stack overflow issue can be resolved:
void foo2b(double *x, double *y, int *nsamp) {
int n = *nsamp, i;
double *v1v2, *v1v2b;
v1v2 = (double *) R_alloc(n * (n - 1) / 2 + 1, sizeof(double));
/* outer_pos(x, y, nsamp, v1v2); */
v1v2b = (double *) R_alloc((size_t) v1v2[0], sizeof(int));
for(i=0; i< (int)v1v2[0]; i++){
v1v2b[i]=1;
}
//qsort(v1v2b, (size_t) v1v2[0], sizeof(double), mycompare);
/* ... */
}
After compiling it, I ran the code:
x=rnorm(1000)
y=rnorm(1000)
.C("foo2b", x=as.double(x), y=as.double(y), nsamp=as.integer(length(x)))
And got an error message:
Error: cannot allocate memory block of size 34359738368.0 Gb
** FOLLOW UP 2 **
It seems that the error message shows up every other run of the function. At least it did not crash R...So basically function alternates between running with no problem and showing an error message.
(I included both headers in my script file).
As before, you're allocating on the stack, but should be allocating from the heap. Correct this using malloc / free as you did in your previous question (actually, I think the recommended approach is Calloc / Free or if your code returns to R simply R_alloc; R_alloc automatically recovers the memory when returning to R, even in the case of an error that R catches).
qsort is mentioned in a comment. It takes as its final argument a user-supplied function that defines how its first argument is to be sorted. The signature of qsort (from man qsort) is
void qsort(void *base, size_t nmemb, size_t size,
int(*compar)(const void *, const void *));
with the final argument being 'a pointer to a function that takes two constant void pointers and returns an int'. A function satisfying this signature and sorting pointers to two doubles according to the specification on the man page is
int mycompare(const void *p1, const void *p2)
{
const double d1 = *(const double *) p1,
d2 = *(const double *) p2;
return d1 < d2 ? -1 : (d2 > d1 ? 1 : 0);
}
So
#include <Rdefines.h>
#include <stdlib.h>
int mycompare(const void *p1, const void *p2)
{
const double d1 = *(const double *) p1,
d2 = *(const double *) p2;
return d1 < d2 ? -1 : (d2 > d1 ? 1 : 0);
}
void outer_pos(double *x, double *y, int *n, double *output){
int i, j, l = 0;
for (i = 0; i < *n; i++) {
for (j = 0; j < *n; j++) {
if ((x[j] - x[i]) > 0) {
output[l + 1] = (y[j] - y[i]) / (x[j] - x[i]);
output[0] = (double)(++l);
}
}
}
}
void foo2b(double *x, double *y, int *nsamp) {
int n = *nsamp;
double *v1v2, *v1v2b;
v1v2 = (double *) R_alloc(n * (n - 1) / 2 + 1, sizeof(double));
outer_pos(x, y, nsamp, v1v2);
v1v2b = (double *) R_alloc((size_t) v1v2[0], sizeof(double));
qsort(v1v2b, (size_t) v1v2[0], sizeof(double), mycompare);
/* ... */
}
When foo2b calls outer_pos, it is passing two allocated but uninitialized arrays as x and y. You can't depend on their contents, thus you have different results from different invocations.
Edit
You're dangerously close to your stack size with 1999000 doubles, which take just over 15.25MB, and that's because you're on Mac OS. On most other platforms, threads don't get anywhere near 16M of stack.
You don't start out with a clean (empty) stack when you call this function -- you're deep into R functions, each creating frames that take space on the stack.
Edit 2
Below, you are using an uninitialized value v1v2[0] as an argument to R-alloc. That you get an error sometimes (and not always) is not a surprise.
v1v2 = (double *) R_alloc(n * (n - 1) / 2 + 1, sizeof(double));
/* outer_pos(x, y, nsamp, v1v2); */
v1v2b = (double *) R_alloc((size_t) v1v2[0], sizeof(int));

error C2102: '&' requires l-value

The code line: gsl_blas_daxpy(-a,&gsl_matrix_column(D, q).vector,y);
cause the error
error C2102: '&' requires l-value
, now the problem is that I have no control of the GSL functions so I don't know how to figure this out (removing the "&" didn't work)
afterwards i get
error C2198: 'gsl_blas_daxpy' : too few arguments for call
I'm using Visual studio 2010.
GSL_EXPORT int gsl_blas_daxpy (double alpha,
const gsl_vector * X,
gsl_vector * Y);
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_blas.h>
#define M (10) // Number of columns in dictionary */
#define N ((int)(M/2)) // Number of rows in dictionary */
int K = 0.07*M; //Number of non-zero elements in signal - the sparsity
int P=1; //number of signals
double epsilon = 1.0e-7; // Residual error
int numOfIterations = N; /* Max num of iterations - same as num of elements in signal */
double sign(double x){return (x>=0) - (x<0);} // Sign function
int main(int argc, char** argv)
{
int n, m, k, iter, q;
double normi, normf, tmp , norm=sqrt(N), htime;
gsl_matrix *D; // A random dictionary used for encoding the sparse signal NxM
gsl_vector *x; // Sparse info signal (encoder input) MxP
gsl_vector *z; // Evaluated Sparse info signal (decoder output) MxP
gsl_vector *r; // Residual error vector MxP
gsl_vector *y; // Sparse representation of signal (encoder output) NxP
gsl_vector_view v;
clock_t start; //for measuring performance
printf("\nDictionary is:NxM=%dx%d,and the signal sparsity is K=%d", N, M, K);
srand(time(NULL)); //Initialize srand
start =clock(); //Initialize clock
/* Initiallize D as a Bernoulli random dictionary */
D = gsl_matrix_alloc (N, M);
for(m=0; m<M; m++)
{
for(n=0; n<N; n++)
{
tmp=sign(2.0*rand()/(double)RAND_MAX-1.0)/norm;
gsl_matrix_set (D, n, m, tmp); //D[n,m]=tmp
}
}
/* Create a random K-sparse info signal */
x = gsl_vector_alloc(M);
for(k=0; k<K; k++)
{
gsl_vector_set(x, rand()%M, 2.0*rand()/(float)RAND_MAX - 1.0); //put random values at k random positions
}
/* Allocate memory for solution (evaluated signal) */
z = gsl_vector_calloc(M);
/* Allocate memory for residual vector */
r = gsl_vector_calloc(M);
/* Allocate memory for the encoded signal vector (its representation) */
y = gsl_vector_alloc(N);
htime=((double)clock()-start)/CLOCKS_PER_SEC;
printf("\nTime data allocation: %f", htime);
/* Encoding the signal (x to y) */
start = clock();
gsl_blas_dgemv(CblasNoTrans, 1, D, x, 0, y); // y = Dx
htime=((double)clock()-start)/CLOCKS_PER_SEC;
printf("\nTime for encoding: %f", htime);
/* Decoding the signal */
start = clock();
normi = gsl_blas_dnrm2(y); // ||y|| (L2 norm)
epsilon = sqrt(epsilon * normi);
normf = normi;
iter = 0;
/*iterate till the computational error is small enough*/
while(normf > epsilon && iter < numOfIterations)
{
gsl_blas_dgemv(CblasTrans, 1, D, y, 0, r); // r=D'*y
q = gsl_blas_idamax(r); //index of max element in residual vector
tmp = gsl_vector_get(r, q); //the max element in r
gsl_vector_set(z, q, gsl_vector_get(z, q)+tmp); // z[q]=z[q]+ tmp
v=gsl_matrix_column(D, q); // choose the dictrionary's atom (coloum) with the index of largest element in r
gsl_blas_daxpy(-tmp,&v.vector,y); // y = y-tmp*v
normf = gsl_blas_dnrm2(y); // ||y|| (L2 norm)
iter++;
}
htime = ((double)clock()-start)/CLOCKS_PER_SEC;
printf("\nTime for decoding: %f", htime);
tmp = 100.0*(normf*normf)/(normi*normi); // the error at end of algorithm
printf("\nComputation residual error: %f",tmp);
/* Check the solution (evaluated signal) against the original signal */
printf("\nSolution (first column),Reference (second column):");
getchar(); // wait for pressing a key
for(m=0; m<M; m++)
{
printf("\n%.3f\t%.3f", gsl_vector_get(x, m),gsl_vector_get(z, m));
}
normi = gsl_blas_dnrm2(x);
gsl_blas_daxpy(-1.0, x, z); // z = z-x
normf = gsl_blas_dnrm2(z); // ||z|| (L2 norm)
tmp = 100.0*(normf*normf)/(normi*normi); //final error
printf("\nSolution residual error: %f\n",tmp);
/* Memory clean up and shutdown*/
gsl_vector_free(y); gsl_vector_free(r);
gsl_vector_free(z); gsl_vector_free(x);
gsl_matrix_free(D);
getchar();
return EXIT_SUCCESS;
}
gsl_matrix_column(D, q).vector is an R-value. You can't take its address. You need an L-value, so assign it to a named variable first, then pass the address of that variable to the function.
If you make a more permanent home for the return value of gsl_matrix_column, (this particular) problem will go away.
Here is some simplified code that illustrates how one might capture a return value in an addressable slot:
struct _foo {
int i;
};
struct _foo bar () {
struct _foo result = { 5 };
return result;
}
/* won't compile; 'lvalue required as unary & operand */
void qux () {
int *j = &bar().i;
}
/* compiles OK */
void qal () {
struct _foo result = bar();
int* j = &result.i;
}
gsl_vector_view c=gsl_matrix_column(D, q);
gsl_blas_daxpy(-a,&c.vector,y);
I think, introducing a temporal variable led you pass a pointer to it to the function.
EDIT: Well, trying to understand the problem, I wanted to know what the function expect:
int gsl_blas_daxpy (double alpha, const gsl_vector * x, gsl_vector * y)
and
gsl_vector_view gsl_matrix_column (gsl_matrix * m, size_t j)
witj some explanation:
A vector view can be passed to any subroutine which takes a vector
argument just as a directly allocated vector would be, using
&view.vector.
and an example:
for (j = 0; j < 10; j++)
{
gsl_vector_view column = gsl_matrix_column (m, j);
double d;
d = gsl_blas_dnrm2 (&column.vector);
printf ("matrix column %d, norm = %g\n", j, d);
}
Now we have another problem:
Here another answer:
Are you aware that int K= 0.7 is K=0 ??
#define M (10) // Number of columns in dictionary */
int K = 0.07*M; //Number of non-zero elements in signal - the sparsity
alloc do not initialice the vector x. x will contain garbage values, not 0. Did you meant x = gsl_vector_calloc(M); with c? It will set x to 0.
/* Create a random K-sparse info signal */
x = gsl_vector_alloc(M);
for(k=0; k<K; k++) // K=0, for get skiped and x not modified.
{
gsl_vector_set(x, rand()%M, 2.0*rand()/(float)RAND_MAX - 1.0); //put random values at k random positions
}
(And here you will have at most K random values, but possible lest)

Computing the reciprocal condition number with lapack (i.e. rcond(x))

I wish to do exactly what rcond does in MATLAB/Octave using LAPACK from C.
The MATLAB manual tells me dgecon is used, and that is uses a 1-based norm.
I wrote a simple test program for an extremely simple case; [1,1; 1,0]
For this input matlab and octave gives me 0.25 using rcond and 1/cond(x,1), but in the case using LAPACK, this sample program prints 0.0. For other cases, such as identity, it prints the correct value.
Since MATLAB is supposely actually using this routine with success, what am I doing wrong?
I'm trying to decipher what Octave does, with little success as its wrapped in
#include <stdio.h>
extern void dgecon_(const char *norm, const int *n, const double *a,
const int *lda, const double *anorm, double *rcond, double *work,
int *iwork, int *info, int len_norm);
int main()
{
int i, info, n, lda;
double anorm, rcond;
double w[8] = { 0,0,0,0,0,0,0,0 };
int iw[2] = { 0,0 };
double x[4] = { 1, 1, 1, 0 };
anorm = 2.0; /* maximum column sum, computed manually */
n = 2;
lda = 2;
dgecon_("1", &n, x, &lda, &anorm, &rcond, w, iw, &info, 1);
if (info != 0) fprintf(stderr, "failure with error %d\n", info);
printf("%.5e\n", rcond);
return 0;
}
Compiled with cc testdgecon.c -o testdgecon -llapack; ./testdgecon
I found the answer to me own question.
The matrix is must be LU-decomposed before it is sent to dgecon. This seems very logical since one often wants to solve the system after checking the condition, in which case there is no need to decompose the matrix twice. The same idea goes for the norm which is computed separately.
The following code is all the necessary parts the compute the reciprocal condition number with LAPACK.
#include "stdio.h"
extern int dgecon_(const char *norm, const int *n, double *a, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info, int len);
extern int dgetrf_(const int *m, const int *n, double *a, const int *lda, int *lpiv, int *info);
extern double dlange_(const char *norm, const int *m, const int *n, const double *a, const int *lda, double *work, const int norm_len);
int main()
{
int i, info, n, lda;
double anorm, rcond;
int iw[2];
double w[8];
double x[4] = {7,3,-9,2 };
n = 2;
lda = 2;
/* Computes the norm of x */
anorm = dlange_("1", &n, &n, x, &lda, w, 1);
/* Modifies x in place with a LU decomposition */
dgetrf_(&n, &n, x, &lda, iw, &info);
if (info != 0) fprintf(stderr, "failure with error %d\n", info);
/* Computes the reciprocal norm */
dgecon_("1", &n, x, &lda, &anorm, &rcond, w, iw, &info, 1);
if (info != 0) fprintf(stderr, "failure with error %d\n", info);
printf("%.5e\n", rcond);
return 0;
}

Resources