I want to use matrix algebra and optimization. I have tested different C and C++ libraries for matrix algebra but the problem with those is they cannot handle garbage data as good as GNU Octave does. Garbage data in C and C++ goes low to like e-8 but in GNU Octave it will be pushed down way to low as e-17. That's very useful if you planning to use garbage data from e.g measurement in calculations. They don't effect nothing of your results.
But GNU Octave have a C++ API, which I don't really understand how to use. But I want to use C and call GNU Octave functions from C.
Is that possible that I can create a struct that contains a 2D array and dimensions, and send it to GNU Octave and I will return a struct again that have the result and the dimension e.g solution.
There is a c mex interface. However the octave interpreter must be embedded and initialized before any mex function can be called. As of Octave 4.4 octave_main as suggested by the linked answer has been deprecated and some other changes also are needed for it to be useful for mex programs. So I have prepared a c++ source file calloctave.cc containing the functions mexCallOctave and free_arg_list and its header calloctave.h.
calloctave.cc
// calloctave.cc
#include "interpreter.h"
#include "mxarray.h"
#include "parse.h"
extern "C"
int
mexCallOctave (int nargout, mxArray *argout[], int nargin,
mxArray *argin[], const char *fname)
{
static octave::interpreter embedded_interpreter;
if (!embedded_interpreter.initialized())
embedded_interpreter.execute ();
octave_value_list args;
args.resize (nargin);
for (int i = 0; i < nargin; i++)
args(i) = mxArray::as_octave_value (argin[i]);
bool execution_error = false;
octave_value_list retval;
retval = octave::feval (fname, args, nargout);
int num_to_copy = retval.length ();
if (nargout < retval.length ())
num_to_copy = nargout;
for (int i = 0; i < num_to_copy; i++)
{
argout[i] = new mxArray (retval(i));
}
while (num_to_copy < nargout)
argout[num_to_copy++] = nullptr;
return execution_error ? 1 : 0;
}
extern "C"
void
free_arg_list (int nargs, mxArray* arglist[])
{
for(int i = 0; i < nargs; i++)
delete arglist[i];
}
calloctave.h
// calloctave.h
#pragma once
#include "mex.h"
#if defined (__cplusplus)
extern "C" {
#endif
int
mexCallOctave (int nargout, mxArray *argout[], int nargin,
mxArray *argin[], const char *fname);
void
free_arg_list (int nargs, mxArray* arglist[]);
#if defined (__cplusplus)
}
#endif
Here is a basic introduction into mex files. You can compile an example hello world program adding the option --verbose as mkoctfile --mex --verbose hello.c to get the list of compiler options that you need to use them for compilation of your actual programs. Note that because calloctave.cc is a c++ source it should be compiled using a c++ compiler such as g++.
In the following example a m function "myfunction" is called. It gets one input and produces one output. mexCallOctave is used for calling the octave function and it has the same signature as mexCallMATLAB.
myfunction.m
% myfunction.m
function out= myfunction( a )
out = sum(a);
endfunction
main.c
//main.c
#include <stdio.h>
#include "calloctave.h"
int main()
{
double input_data[] = {0,1,2,3,4,5,6,7,8,9,10};
const int nargin = 1;
const int nargout = 1;
mxArray* rhs[nargin];
mxArray* lhs[nargout];
// allocate mex array
rhs[0] = mxCreateDoubleMatrix( 10, 1, mxREAL);
double* rhs_ptr = mxGetPr( rhs[0] );
// copy data from input buffer to mex array
for (int i = 0 ; i < 10; i++)
rhs_ptr[i] = input_data[i];
// call octave function
mexCallOctave(nargout, lhs, nargin, rhs, "myfunction");
double* lhs_ptr = mxGetPr( lhs[0] );
double output_data = *lhs_ptr;
// show the result
printf ("result = %f", output_data);
// free memory
mxDestroyArray(rhs[0]);
free_arg_list(nargout, lhs);
}
Related
I try to import some C-function that generates an array in SystemVerilog.
Here is code:
#include "svdpi.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void getPacket(int packetSize, svOpenArrayHandle fpSig, svOpenArrayHandle err)
{
int* cfpSig = (int*) calloc(packetSize, sizeof(int));
double* cerr = (double*)calloc(packetSize, sizeof(double));
for(int i = 0; i < packetSize; ++i)
{
cfpSig[i] = i;
cerr[i] = 1.1*i;
printf("%d %f\n",cfpSig[i],cerr[i]);
}
printf("----------");
memcpy((int*) svGetArrayPtr(fpSig),cfpSig,packetSize);
memcpy((int*) svGetArrayPtr(err),cerr,packetSize);
free(cfpSig);
free(cerr);
}
import "DPI-C" function void getPacket(input int packetSize,
output int fpSig[], output real err[]);
module top();
initial begin
parameter int packetSize = 4;
int fpSig[packetSize];
real err[packetSize];
getPacket(packetSize,fpSig,err);
for(int i = 0; i < packetSize; ++i) begin
$display("fpSig: %d\nerr : %f",fpSig[i],err[i]);
end
end
endmodule
But when I compile the c-code manually, an error is generated at the linking stage: undefined reference to 'svGetArrayPtr'.
I have not previously worked with svOpenArrayHandle and it was enough to connect the header file "svdpi.h". I tried to look for some svdpi.dll lib in the questa install folder, but didn't find it.
If I compile c-file by vlog it's working fine, but I want to compile it manually because I plan to include matlab libs and compiling via vlog will become uncomfortable.
In Questasim simulator, the library containing the svGetArrayPtr symbol is mtipli.dll
I have a (64-bit) DLL written in C to call from CTypes in Windows. It's compiled and linked with GCC in Cygwin, but when I call it from CTypes I get "[WinError 126] The specified module could not be found."
The path to the DLL is correct (I even checked it with "if os.path.exists(path_to_dll)"). Research says that this error is most commonly found when the dll depends on other dlls, so I compiled it with the -M flag in GCC and it reports no dependencies.
I have called DLLs written in assembler using the same ctypes strings, but this C dll does not load even though there are no dependencies.
Here is the C code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "WL_02.h"
double* main(double* X, int64_t len_X)
{
double *collect;
int64_t while_counter = 0;
int64_t collect_counter = 0;
int64_t new_len = 0;
int64_t this = 0;
int64_t start = 0;
int64_t stop = 0;
int64_t x = 0;
/* Initial memory allocation */
collect = (double *) malloc(len_X);
while (while_counter < len_X)
{
this = X[while_counter];
if (this < 10)
{
start = 0;
stop = this;
}
else
{
start = this - 10;
stop = this;
}
for (x = start; x < stop; x++)
{
if ((x % 2) != 0)
{
x *= x;
collect[collect_counter] = x;
collect_counter ++;
if (collect_counter >= len_X)
{
/* Reallocating memory */
new_len = len_X * 2;
collect = (double *) realloc(collect, new_len);
len_X = new_len;
}
}
}
while_counter += 1;
}
return (0);
}
Here is the .h file (WL_02.h):
#ifndef WL_02_H
#define WL_02_H
#define EXPORT_DLL __declspec(dllexport)
EXPORT_DLL double* main (double* X, int64_t len_X);
#endif
Here are the GCC compile and link strings:
gcc -c WL_02.c -o WL_02.obj
gcc -shared -o WL_02.dll WL_02.obj
Here is the CTypes code:
hDLL = ctypes.WinDLL("C:/cygwin64/our_files/WL_02/WL_02.dll")
CallName = hDLL.main
CallName.argtypes = [ctypes.POINTER(ctypes.c_double),ctypes.c_int64]
CallName.restype = ctypes.POINTER(ctypes.c_int64)
ret_ptr = CallName(CA_X,length_array_out)
The error occurs on the first line (hDLL = ...) where I load the DLL.
The only external dependency I can think of is msvcrt.dll which contains malloc and realloc, but I believe all I need to do is include stdlib.h, which I did.
Thanks in advance for any help.
On a 64-bit architecture pc, the next program should return the result 1.350948.
But it is not thread safe and every time I run it gives (obviously) a different result.
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <pthread.h>
const unsigned int ndiv = 1000;
double res = 0;
struct xval{
double x;
};
// Integrate exp(x^2 + y^2) over the unit circle on the
// first quadrant.
void* sum_function(void*);
void* sum_function(void* args){
unsigned int j;
double y = 0;
double localres = 0;
double x = ((struct xval*)args)->x;
for(j = 0; (x*x)+(y*y) < 1; y = (++j)*(1/(double)ndiv)){
localres += exp((x*x)+(y*y));
}
// Globla variable:
res += (localres/(double)(ndiv*ndiv));
// This is not thread safe!
// mutex? futex? lock? semaphore? other?
}
int main(void){
unsigned int i;
double x = 0;
pthread_t thr[ndiv];
struct xval* xvarray;
if((xvarray = calloc(ndiv, sizeof(struct xval))) == NULL){
exit(EXIT_FAILURE);
}
for(i = 0; x < 1; x = (++i)*(1/(double)ndiv)){
xvarray[i].x = x;
pthread_create(&thr[i], NULL, &sum_function, &xvarray[i]);
// Should check return value.
}
for(i = 0; i < ndiv; i++){
pthread_join(thr[i], NULL);
// If
// pthread_join(thr[i], &retval);
// res += *((double*)retval) <-?
// there would be no problem.
}
printf("The integral of exp(x^2 + y^2) over the unit circle on\n\
the first quadrant is: %f\n", res);
return 0;
}
How can it be thread safe?
NOTE: I know that 1000 threads is not a good way to solve this problem, but I really really want to know how to write thread-safe c programs.
Compile the above program with
gcc ./integral0.c -lpthread -lm -o integral
pthread_mutex_lock(&my_mutex);
// code to make thread safe
pthread_mutex_unlock(&my_mutex);
Declare my_mutex either as a global variable like pthread_mutex_t my_mutex;. Or initialize in code using pthread_mutex_t my_mutex; pthread_mutex_init(&my_mutex, NULL);. Also don't forget to include #include <pthread.h> and link your program with -lpthread when compiling.
The question (in a comment in the code):
// mutex? futex? lock? semaphore? other?
Answer: mutex.
See pthread_mutex_init, pthread_mutex_lock, and pthread_mutex_unlock.
The Problem
I have prepared one sample CUDA code using the constant memory. I can run this in cuda 4.2 successfully but I get "invalid device symbol" when I compile using the CUDA 5.
I have attached the sample code here.
The Code
#include <iostream>
#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda.h>
struct CParameter
{
int A;
float B;
float C;
float D;
};
__constant__ CParameter * CONSTANT_PARAMETER;
#define PARAMETER "CONSTANT_PARAMETER"
bool ERROR_CHECK(cudaError_t Status)
{
if(Status != cudaSuccess)
{
printf(cudaGetErrorString(Status));
return false;
}
return true;
}
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
{
a[idx] = CONSTANT_PARAMETER->A * a[idx];
}
}
////Main Function/////
int main(void)
{
/////Variable Definition
const int N = 10;
size_t size = N * sizeof(float);
cudaError_t Status = cudaSuccess;
CParameter * m_dParameter;
CParameter * m_hParameter;
float * m_D;
float * m_H;
//Memory Allocation Host
m_hParameter = new CParameter;
m_H = new float[N];
//Memory Allocation Device
cudaMalloc((void **) &m_D, size);
cudaMalloc((void**)&m_dParameter,sizeof(CParameter));
////Data Initialization
for (int i=0; i<N; i++)
m_H[i] = (float)i;
m_hParameter->A = 5;
m_hParameter->B = 3;
m_hParameter->C = 98;
m_hParameter->D = 100;
//Memory Copy from Host To Device
Status = cudaMemcpy(m_D, m_H, size, cudaMemcpyHostToDevice);
ERROR_CHECK(Status);
Status = cudaMemcpy(m_dParameter,m_hParameter,sizeof(CParameter),cudaMemcpyHostToDevice);
ERROR_CHECK(Status);
Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter));
ERROR_CHECK(Status);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
square_array <<<n_blocks, block_size>>>(m_D,N);
// Retrieve result from device and store it in host array
cudaMemcpy(m_H, m_D, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++)
printf("%d %f\n", i, m_H[i]);
// Cleanup
free(m_H);
free(m_hParameter);
cudaFree(m_dParameter);
cudaFree(m_D);
return 0;
}
I have tried WINDOWS: CUDA 5.0 Production Release and the Graphics card is GTX 590.
Any help will be appreciated.
In an effort to avoid being "Stringly Typed", the use of character strings to refer to device symbols was deprecated in CUDA runtime API functions in CUDA 4.1, and removed in CUDA 5.0.
The CUDA 5 release notes read:
** The use of a character string to indicate a device symbol, which was possible
with certain API functions, is no longer supported. Instead, the symbol should be
used directly.
If you change your code to the following, it should work.
Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter));
ERROR_CHECK(Status);
From the CUDA 5.0 Release Notes:
** The use of a character string to indicate a device symbol, which was possible with certain API functions, is no longer supported. Instead, the symbol should be used directly. "
These API functions still exist, but they accept the target symbol argument only as a bare identifier now, not as either a bare identifier or a string literal naming an ident. E.g.
__ device__ __ constant__ type ident;
main() { cudaMemcpyToSymbol("ident", ...); } // no longer valid, returns cudaErrorInvalidSymbol
main() { cudaMemcpyToSymbol(ident, ...); } // valid
So get rid of this:
#define PARAMETER "CONSTANT_PARAMETER"
And change this:
Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter));
To this:
Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter));
And I think it will work.
This is related to calling C functions (made into dynamic libraries) from SAS. There are 4 files. the first 2 (1 c-file and 1 sas-file) are a positive control using doubles. The remaining files are the problematic.
C-FILE-1
#ifdef BUILD_DLL
#define EXPORT __declspec(dllexport)
#else
#define EXPORT __declspec(dllimport)
#endif
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
EXPORT void test (double *inarray, double *outarray, int n)
{
int i;
for (i=0; i<n;i++)
{
outarray[i]= inarray[i]*2;
}
return;
}
//gcc -c -DBUILD_DLL pointersVoid.c
//gcc -shared -o pointersVoid.dll pointersVoid.o
SAS-FILE-1
filename sascbtbl catalog 'work.api.MYFILE';
data _null_;
file sascbtbl;
input;
put _infile_;
cards4;
routine test
module=pointersVoid
minarg=3
maxarg=3;
arg 1 input num byvalue format=IB4.;
arg 2 input num byvalue format=IB4.;
arg 3 input num byvalue format=PIB4.;
;;;;
run;
data test;
array arr(5) _temporary_ (7.56 2.356 63.54 5.14 8.2);
array ret(5);
rc=modulen ("*e","test",addr(arr(1)), addr(ret(1)), 5);
run;
This works fine and ret array now contains the *2 of the original values.
But when we use strings we get errors:
C-FILE-2
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
char *strtrim_right(char *p)
{
char *end;
int len;
len = strlen(p);
while (*p && len)
{
end = p + len-1;
if(isalpha(*end))
*end = 0;
else
break;
len = strlen(p);
}
return(p);
}
EXPORT char **test (char **x, char **y, int n)
{
int i;
for (i = 0; i < n; i++)
{
y[i] = strtrim_right(x[i]);
}
}
/*
gcc -c -DBUILD_DLL pointers-array-string-void.c
gcc -shared -o pointers-array-string-void.dll pointers-array-string-void.o
*/
SAS-FILE-2
filename sascbtbl catalog 'work.api.MYFILE';
data _null_;
file sascbtbl;
input;
put _infile_;
cards4;
routine test
module=pointers-array-string-void
minarg=3
maxarg=3;
arg 1 input char byvalue format=$CSTR200. ;
arg 2 input char byvalue format=$CSTR200. ;
arg 3 input num byvalue format=PIB4. ;
;;;;
run;
data test;
array arr(5) $ _temporary_ ('PM23RO' '85AB12RE' 'RE147AMF' 'TAGH14MMF' 'LCA2Q');
array ret(5) $;
call module ("*e","test",addr(arr(1)), addr(ret(1)), 5);
run;
This doesn't work and gives errors:
Unrecognized option - in ROUTINE statement
NOTE: Invalid argument to function MODULE
ret1= ret2= ret3= ret4= ret5= rc=. _ERROR_=1 _N_=1
I know the C-FILE-2 works well because the dll has been tested from another aplication, so ther error source is very likely the SAS code in SAS-FILE-2. Any suggestions to make it work?
In 64-bit SAS you will want to use addrlong and update the module parameter declarations to have format=$ptr. datalen=8.
If your .dll is 32 bit you should still be able to invoke its routines by adding the routine declaration option dlltype=32. ("When I'm 64-bit: How to Still Use 32-bit DLLs in Microsoft Windows" Rick Langston, SAS Global Forum 2015.)