#include<stdio.h>
#include<fftw3.h>
#include<math.h>
#include <stdlib.h>
int main(){
int N=2000;
int i,j;
FILE *filepointer;
filepointer=fopen("2DDFT_spacetime.plt","w");
//double in[N][N];
double *in;
fftw_complex *out;
fftw_plan p;
double fx=13.0;
double fz=9.0;
double x[N];
double xstart=0.0;
double xend=5.0/fx;
double z[N];
double zstart=0.0;
double zend=5.0/fz;
double dx=(xend-xstart)/(N-1);
double dz=(zend-zstart)/(N-1);
x[0]=xstart;
z[0]=zstart;
in = (double*) malloc(sizeof(double) * N * N); //allocates input array
out = fftw_alloc_complex(N*((int)floor(N/2)+1)); //wrapper function ;allocates output array
p = fftw_plan_dft_r2c_2d(N,N, in, out, FFTW_MEASURE);
for(i=1;i<N;i++) {
x[i]=x[i-1]+dx;
}
for(i=1;i<N;i++) {
z[i]=z[i-1]+dz;
}
for(i=0;i<N;i++) {
for(j=0;j<N;j++) {
in[i*N+j]=cos(2*M_PI*fx*x[i]+2*M_PI*fz*z[j]);
}
}
fftw_execute(p);
fprintf(filepointer,"TITLE =\"FFTW\"\nVARIABLES=\"Wavenumber-x\"\n\"Wavenumber-z\"\n\"Amplitude\"\nZONE T=\"Amplitude\"\n I=%d, J=%d, K=1, ZONETYPE=Ordered\n DATAPACKING=POINT\n DT=(SINGLE SINGLE SINGLE)\n",N,(int)floor(N/2)+1);
for(j=0;j<(int)floor(N/2)+1;j++) {
for(i=0;i<N;i++) {
fprintf(filepointer," %.9E %.9E %.9E\n",i/(xend-xstart),j/(zend-zstart),sqrt(pow(out[i*((int)floor(N/2)+1)+j][0],2)+pow(out[i*((int)floor(N/2)+1)+j][1],2)));
}
}
fftw_destroy_plan(p);
free(in);
fftw_free(out);
fclose(filepointer);
return(1);
}
I begin by allocating memory for a NxN double array and a NxN fftw_complex array, which is defined as typedef double fftw_complex[2] in the FFTW library. I assign real numbers to the array of doubles, do the real to complex FFT, and get the output in the array of fftw_complex.
Should I access the real and imaginary parts of the output complex number as out[i*((int)floor(N/2)+1)+j][0] and out[i*((int)floor(N/2)+1)+j][1] respectively?
The most beautiful course of action is to include the standard native complex type #include <complex.h> before <fftw3.h> as signaled in the documentation of FFTW.
In particular, if you #include before , then fftw_complex is defined to be the native complex type and you can manipulate it with ordinary arithmetic (e.g. x = y * (3+4*I), where x and y are fftw_complex and I is the standard symbol for the imaginary unit);
The following example is compiled using gcc main.c -o main -Wall -lfftw3 -lm
#include<stdlib.h>
#include<math.h>
#include<complex.h>
#include<fftw3.h>
int main(void){
fftw_plan p;
unsigned long int N = 10;
fftw_complex *in=fftw_malloc(N*sizeof(fftw_complex));
if(in==NULL){fprintf(stderr,"malloc failed\n");exit(1);}
fftw_complex *out=fftw_malloc(N*sizeof(fftw_complex));
if(out==NULL){fprintf(stderr,"malloc failed\n");exit(1);}
printf("sizeof fftw complex %ld\n",sizeof(fftw_complex));
p=fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
if (p==NULL){fprintf(stderr,"plan creation failed\n");exit(1);}
unsigned int i;
for(i=0;i<N;i++){
in[i]=30.+12.*sin(2*3.1415926535*i/((double)N));
}
fftw_execute(p);
for (i = 0; i < N; i++){
printf("result: %d || %g %gI |\n", i, creal(out[i]), cimag(out[i]));
}
fftw_destroy_plan(p);
fftw_free(in);
fftw_free(out);
return(0);
}
It shows how to retreive the imaginary and real part of a complex.
Now any function operating on complex number can be used, as listed in https://en.cppreference.com/w/c/numeric/complex .
i have been trying for hours and it drives me crazy. The last error I get is :
demo_cblas.c:(.text+0x83): undefined reference to `clapack_sgetrf'
demo_cblas.c:(.text+0xa3): undefined reference to `clapack_sgetri'
I am compiling the code using
/usr/bin/gcc -o demo_cblas demo_cblas.c -L /usr/lib64 -l :libgfortran.so.3 -L /usr/lib64 \
-llapack -L /usr/lib64 -lblas
I try with and without libgfortran, with different compilers gcc-33, gcc-47, gcc-48. The test code is not from me but comes from this forum ...
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include "clapack.h"
#include "cblas.h"
void invertMatrix(float *a, unsigned int height){
int info, ipiv[height];
info = clapack_sgetrf(CblasColMajor, height, height, a, height, ipiv);
info = clapack_sgetri(CblasColMajor, height, a, height, ipiv);
}
void displayMatrix(float *a, unsigned int height, unsigned int width)
{
int i, j;
for(i = 0; i < height; i++){
for(j = 0; j < width; j++)
{
printf("%1.3f ", a[height*j + i]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char *argv[])
{
int i;
float a[9], b[9], c[9];
srand(time(NULL));
for(i = 0; i < 9; i++)
{
a[i] = 1.0f*rand()/RAND_MAX;
b[i] = a[i];
}
displayMatrix(a, 3, 3);
return 0;
}
I am on Suse 12.3 64bits. In /usr/lib64 I have liblapack.a liblapack.so, ... and libblas.a libblas.so, ... and libgfortran.so.3
The same code without the function "invertMatrix" (the one using the library) compiles fine.
Any idea or suggestion ?
Thank you all for your help.
Vava
I'm quite positive that you also need to link to libcblas, which is the c wrapper library for libblas. Note that libblas is a FORTRAN library which therefore does not contain the function clapack_* you're calling.
I've just got this working on FreeBSD with:
gcc -o test test.c \
-llapack -lblas -lalapack -lcblas
I'd installed math/atlas (from ports) and the lapack and blas packages.
See my question here
So I'm writing a very basic CUDA code (vector addition) to teach myself the basics of CUDA programming. I've got it working when I write one .cu file, but now I am trying to make it work with a .c and .cu file linked together. My main.c file is as follows:
#include "Test.h"
#include <stdlib.h>
int main(int argc, char *argv[]) {
int n = 1000;
size_t size = n * sizeof(float);
int i;
float *h_a = malloc(size), *h_b = malloc(size), *h_c = malloc(size);
for(i = 0; i < n; i++) {
h_a[i] = h_b[i] = i;
}
addVec(h_a, h_b, h_c, n);
exit(0);
}
Here, Test.h simply says:
void addVec(float *, float *, float *, int);
My vecAdd.cu file says:
#include "Test.h"
__global__ void vecAdd(float *a, float *b, float *c, int n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i < n)
c[i] = a[i] + b[i];
}
void addVec(float *a, float *b, float *c, int n) {
float *d_a, *d_b, *d_c;
size_t size = n * sizeof(float);
cudaMalloc(&d_a, size);
cudaMalloc(&d_b, size);
cudaMalloc(&d_c, size);
...
}
I then run the commands:
gcc -c -Wall -O3 main.c -o ../obj/main.o
nvcc -c -O3 vecAdd.cu -o ../obj/vecAdd.o
gcc -L/usr/local/cuda/lib64 -lcudart ../obj/main.o ../obj/vecAdd.o -o ../bin/nvTest
The first two work fine. The last one, when I try to link the two object files, tells me that I have an undefined reference to addVec, though it is defined in vecAdd.cu... what am I doing wrong?
You have a C/C++ linkage problem that is basically identical to that described here. This is because nvcc is using a c++ compiler for host code (creating c++ style linkage references i.e. "mangling") and gcc is interpreting main.c as a c (not c++) file and therefore creating c style linkage references.
There are at least 2 ways to fix it:
convert your main.c into a main.cpp and use g++ where you are using gcc now (for your first and 3rd compile and link steps). Then everything will be consistently c++ style references.
Declare within your C++ module (vecAdd.cu) that the external reference should be C style as described here.
I would like to be able to compute the inverse of a general NxN matrix in C/C++ using lapack.
My understanding is that the way to do an inversion in lapack is by using the dgetri function, however, I can't figure out what all of its arguments are supposed to be.
Here is the code I have:
void dgetri_(int* N, double* A, int* lda, int* IPIV, double* WORK, int* lwork, int* INFO);
int main(){
double M [9] = {
1,2,3,
4,5,6,
7,8,9
};
return 0;
}
How would you complete it to obtain the inverse of the 3x3 matrix M using dgetri_?
Here is the working code for computing the inverse of a matrix using lapack in C/C++:
#include <cstdio>
extern "C" {
// LU decomoposition of a general matrix
void dgetrf_(int* M, int *N, double* A, int* lda, int* IPIV, int* INFO);
// generate inverse of a matrix given its LU decomposition
void dgetri_(int* N, double* A, int* lda, int* IPIV, double* WORK, int* lwork, int* INFO);
}
void inverse(double* A, int N)
{
int *IPIV = new int[N];
int LWORK = N*N;
double *WORK = new double[LWORK];
int INFO;
dgetrf_(&N,&N,A,&N,IPIV,&INFO);
dgetri_(&N,A,&N,IPIV,WORK,&LWORK,&INFO);
delete[] IPIV;
delete[] WORK;
}
int main(){
double A [2*2] = {
1,2,
3,4
};
inverse(A, 2);
printf("%f %f\n", A[0], A[1]);
printf("%f %f\n", A[2], A[3]);
return 0;
}
First, M has to be a two-dimensional array, like double M[3][3]. Your array is, mathematically speaking, a 1x9 vector, which is not invertible.
N is a pointer to an int for the
order of the matrix - in this case,
N=3.
A is a pointer to the LU
factorization of the matrix, which
you can get by running the LAPACK
routine dgetrf.
LDA is an integer for the "leading
element" of the matrix, which lets
you pick out a subset of a bigger
matrix if you want to just invert a
little piece. If you want to invert
the whole matrix, LDA should just be
equal to N.
IPIV is the pivot indices of the
matrix, in other words, it's a list
of instructions of what rows to swap
in order to invert the matrix. IPIV
should be generated by the LAPACK
routine dgetrf.
LWORK and WORK are the "workspaces"
used by LAPACK. If you are inverting
the whole matrix, LWORK should be an
int equal to N^2, and WORK should be
a double array with LWORK elements.
INFO is just a status variable to
tell you whether the operation
completed successfully. Since not all
matrices are invertible, I would
recommend that you send this to some
sort of error-checking system. INFO=0 for successful operation, INFO=-i if the i'th argument had an incorrect input value, and INFO > 0 if the matrix is not invertible.
So, for your code, I would do something like this:
int main(){
double M[3][3] = { {1 , 2 , 3},
{4 , 5 , 6},
{7 , 8 , 9}}
double pivotArray[3]; //since our matrix has three rows
int errorHandler;
double lapackWorkspace[9];
// dgetrf(M,N,A,LDA,IPIV,INFO) means invert LDA columns of an M by N matrix
// called A, sending the pivot indices to IPIV, and spitting error
// information to INFO.
// also don't forget (like I did) that when you pass a two-dimensional array
// to a function you need to specify the number of "rows"
dgetrf_(3,3,M[3][],3,pivotArray[3],&errorHandler);
//some sort of error check
dgetri_(3,M[3][],3,pivotArray[3],9,lapackWorkspace,&errorHandler);
//another error check
}
Here is a working version of the above using OpenBlas interface to LAPACKE.
Link with openblas library (LAPACKE is already contained)
#include <stdio.h>
#include "cblas.h"
#include "lapacke.h"
// inplace inverse n x n matrix A.
// matrix A is Column Major (i.e. firts line, second line ... *not* C[][] order)
// returns:
// ret = 0 on success
// ret < 0 illegal argument value
// ret > 0 singular matrix
lapack_int matInv(double *A, unsigned n)
{
int ipiv[n+1];
lapack_int ret;
ret = LAPACKE_dgetrf(LAPACK_COL_MAJOR,
n,
n,
A,
n,
ipiv);
if (ret !=0)
return ret;
ret = LAPACKE_dgetri(LAPACK_COL_MAJOR,
n,
A,
n,
ipiv);
return ret;
}
int main()
{
double A[] = {
0.378589, 0.971711, 0.016087, 0.037668, 0.312398,
0.756377, 0.345708, 0.922947, 0.846671, 0.856103,
0.732510, 0.108942, 0.476969, 0.398254, 0.507045,
0.162608, 0.227770, 0.533074, 0.807075, 0.180335,
0.517006, 0.315992, 0.914848, 0.460825, 0.731980
};
for (int i=0; i<25; i++) {
if ((i%5) == 0) putchar('\n');
printf("%+12.8f ",A[i]);
}
putchar('\n');
matInv(A,5);
for (int i=0; i<25; i++) {
if ((i%5) == 0) putchar('\n');
printf("%+12.8f ",A[i]);
}
putchar('\n');
}
Example:
% g++ -I [OpenBlas path]/include/ example.cpp [OpenBlas path]/lib/libopenblas.a
% a.out
+0.37858900 +0.97171100 +0.01608700 +0.03766800 +0.31239800
+0.75637700 +0.34570800 +0.92294700 +0.84667100 +0.85610300
+0.73251000 +0.10894200 +0.47696900 +0.39825400 +0.50704500
+0.16260800 +0.22777000 +0.53307400 +0.80707500 +0.18033500
+0.51700600 +0.31599200 +0.91484800 +0.46082500 +0.73198000
+0.24335255 -2.67946180 +3.57538817 +0.83711880 +0.34704217
+1.02790497 -1.05086895 -0.07468137 +0.71041070 +0.66708313
-0.21087237 -4.47765165 +1.73958308 +1.73999641 +3.69324020
-0.14100897 +2.34977565 -0.93725915 +0.47383541 -2.15554470
-0.26329660 +6.46315378 -4.07721533 -3.37094863 -2.42580445
Here is a working version of Spencer Nelson's example above. One mystery about it is that the input matrix is in row-major order, even though it appears to call the underlying fortran routine dgetri. I am led to believe that all the underlying fortran routines require column-major order, but I am no expert on LAPACK, in fact, I'm using this example to help me learn it. But, that one mystery aside:
The input matrix in the example is singular. LAPACK tries to tell you that by returning a 3 in the errorHandler. I changed the 9 in that matrix to a 19, getting an errorHandler of 0 signalling success, and compared the result to that from Mathematica. The comparison was also successful and confirmed that the matrix in the example should be in row-major order, as presented.
Here is the working code:
#include <stdio.h>
#include <stddef.h>
#include <lapacke.h>
int main() {
int N = 3;
int NN = 9;
double M[3][3] = { {1 , 2 , 3},
{4 , 5 , 6},
{7 , 8 , 9} };
int pivotArray[3]; //since our matrix has three rows
int errorHandler;
double lapackWorkspace[9];
// dgetrf(M,N,A,LDA,IPIV,INFO) means invert LDA columns of an M by N matrix
// called A, sending the pivot indices to IPIV, and spitting error information
// to INFO. also don't forget (like I did) that when you pass a two-dimensional
// array to a function you need to specify the number of "rows"
dgetrf_(&N, &N, M[0], &N, pivotArray, &errorHandler);
printf ("dgetrf eh, %d, should be zero\n", errorHandler);
dgetri_(&N, M[0], &N, pivotArray, lapackWorkspace, &NN, &errorHandler);
printf ("dgetri eh, %d, should be zero\n", errorHandler);
for (size_t row = 0; row < N; ++row)
{ for (size_t col = 0; col < N; ++col)
{ printf ("%g", M[row][col]);
if (N-1 != col)
{ printf (", "); } }
if (N-1 != row)
{ printf ("\n"); } }
return 0; }
I built and ran it as follows on a Mac:
gcc main.c -llapacke -llapack
./a.out
I did an nm on the LAPACKE library and found the following:
liblapacke.a(lapacke_dgetri.o):
U _LAPACKE_dge_nancheck
0000000000000000 T _LAPACKE_dgetri
U _LAPACKE_dgetri_work
U _LAPACKE_xerbla
U _free
U _malloc
liblapacke.a(lapacke_dgetri_work.o):
U _LAPACKE_dge_trans
0000000000000000 T _LAPACKE_dgetri_work
U _LAPACKE_xerbla
U _dgetri_
U _free
U _malloc
and it looks like there is a LAPACKE [sic] wrapper that would presumably relieve us of having to take addresses everywhere for fortran's convenience, but I am probably not going to get around to trying it because I have a way forward.
EDIT
Here is a working version that bypasses LAPACKE [sic], using LAPACK fortran routines directly. I do not understand why a row-major input produces correct results, but I confirmed it again in Mathematica.
#include <stdio.h>
#include <stddef.h>
int main() {
int N = 3;
int NN = 9;
double M[3][3] = { {1 , 2 , 3},
{4 , 5 , 6},
{7 , 8 , 19} };
int pivotArray[3]; //since our matrix has three rows
int errorHandler;
double lapackWorkspace[9];
/* from http://www.netlib.no/netlib/lapack/double/dgetrf.f
SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
*
* -- LAPACK routine (version 3.1) --
* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
* November 2006
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, M, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * )
*/
extern void dgetrf_ (int * m, int * n, double * A, int * LDA, int * IPIV,
int * INFO);
/* from http://www.netlib.no/netlib/lapack/double/dgetri.f
SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
*
* -- LAPACK routine (version 3.1) --
* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
* November 2006
*
* .. Scalar Arguments ..
INTEGER INFO, LDA, LWORK, N
* ..
* .. Array Arguments ..
INTEGER IPIV( * )
DOUBLE PRECISION A( LDA, * ), WORK( * )
*/
extern void dgetri_ (int * n, double * A, int * LDA, int * IPIV,
double * WORK, int * LWORK, int * INFO);
// dgetrf(M,N,A,LDA,IPIV,INFO) means invert LDA columns of an M by N matrix
// called A, sending the pivot indices to IPIV, and spitting error information
// to INFO. also don't forget (like I did) that when you pass a two-dimensional
// array to a function you need to specify the number of "rows"
dgetrf_(&N, &N, M[0], &N, pivotArray, &errorHandler);
printf ("dgetrf eh, %d, should be zero\n", errorHandler);
dgetri_(&N, M[0], &N, pivotArray, lapackWorkspace, &NN, &errorHandler);
printf ("dgetri eh, %d, should be zero\n", errorHandler);
for (size_t row = 0; row < N; ++row)
{ for (size_t col = 0; col < N; ++col)
{ printf ("%g", M[row][col]);
if (N-1 != col)
{ printf (", "); } }
if (N-1 != row)
{ printf ("\n"); } }
return 0; }
built and run like this:
$ gcc foo.c -llapack
$ ./a.out
dgetrf eh, 0, should be zero
dgetri eh, 0, should be zero
-1.56667, 0.466667, 0.1
1.13333, 0.0666667, -0.2
0.1, -0.2, 0.1
EDIT
The mystery no longer appears to be a mystery. I think the computations are being done in column-major order, as they must, but I am both inputting and printing the matrices as if they were in row-major order. I have two bugs that cancel each other out so things look row-ish even though they're column-ish.