Getting value from a dynamic allocated 2d array by pointers - c

I have filled a dynamic allocated float multi array in a function.
A second function has to get the values of the array exploiting the pointer to the first element of the array defined in the former function.
The second function do not access to the correct memory location so it doesn't work but it does if the multy array is defined in a static way.
Does somebody know why?
eval_cell should get values defined in div_int
float f_imp(float x, float y){
return pow(x,2)+pow(y,2)-1;
}
int eval_cell(float* p){
int s[4];
s[0] = f_imp(*p, *(p+1)) <= 0;
printf("%f %f\n",*p, *(p+1));
s[1] = f_imp(*(p+3), *(p+4)) <= 0;
printf("%f %f\n",*(p+3), *(p+4));
s[2] = f_imp(*(p+9), *(p+10)) <= 0;
printf("%f %f\n",*(p+9), *(p+10));
s[3] = f_imp(*(p+6), *(p+7)) <= 0;
printf("%f %f\n",*(p+6), *(p+7));
printf("%d%d%d%d\n",s[0],s[1],s[2],s[3]);
return s[0];
}
void div_int(float* x1, float* y1,float* x2,float* y2,
float* f0, float* f2,float* f6,float* f8){
int i,j,m;
float* p;
float** a_cell; // array 9x3 contente coordinate vertici e valore funzione
*a_cell = (float**) malloc(9*sizeof(float*));
for (i=0;i<9;i++){
a_cell[i] = (float*) malloc(3*sizeof(float));
}
a_cell[0][0] = *x1;
a_cell[0][1] = *y1;
a_cell[0][2] = *f0;
a_cell[2][0] = *x2;
a_cell[2][1] = *y1;
a_cell[2][2] = *f2;
a_cell[6][0] = *x1;
a_cell[6][1] = *y2;
a_cell[6][2] = *f6;
a_cell[8][0] = *x2;
a_cell[8][1] = *y2;
a_cell[8][2] = *f8;
/*** calcolo dei valori incogniti di a_cell ***/
a_cell[1][0] = (*x1+*x2)/2;
a_cell[1][1] = *y1;
a_cell[1][2] = f_imp(a_cell[1][0], a_cell[1][1]);
a_cell[3][0] = *x1;
a_cell[3][1] = (*y1+*y2)/2;
a_cell[3][2] = f_imp(a_cell[3][0], a_cell[3][1]);;
a_cell[4][0] = (*x2+*x1)/2;
a_cell[4][1] = (*y2+*y1)/2;
a_cell[4][2] = f_imp(a_cell[4][0], a_cell[4][1]);
a_cell[5][0] = *x2;
a_cell[5][1] = (*y2+*y1)/2;
a_cell[5][2] = f_imp(a_cell[5][0], a_cell[5][1]);
a_cell[7][0] = (*x1+*x2)/2;
a_cell[7][1] = *y2;
a_cell[7][2] = f_imp(a_cell[7][0], a_cell[7][1]);
for (j=0;j<2;j++){
m = j*3;
for(i=0;i<2;i++){
m += i;
eval_cell(&a_cell[m][0]);
}
}
p = *a_cell;
for (i=0;i<9;i++){
for (j=0;j<3;j++){
printf("%f \n",*(p+3*i+j));
printf("%f \n",a_cell[i][j]);
printf("\n");
}
}
free(a_cell);
return;
}

It's because you using pointer in incorrect way:
See a_cell is pointer to dynamic array of 9 pointers to dynamic array of 3 floats.
So when you do eval_cell(&a_cell[m][0]) (or just eval_cell(a_cell[m]) this is actually the same) you actually get pointer to array of 3 floats. And after that you do:
int eval_cell(float* p){
...
s[2] = f_imp(*(p+9), *(p+10)) <= 0;
*(p+9) will get 9th element in array of 3 floats, so this is incorrect.
It works in static way, because static multi dimension array in memory is just one dimension array for which you was given multi indexing (by compiler). That's why in static you will probably address valid memory area.
See picture for more explanation:

If you want a completely dynamic matrix (2d array), you have to make your own element access function:
double *
make_array (unsigned int rows, unsigned int cols)
{
return malloc (rows * cols * sizeof (double));
}
double *
array_element (double *a, unsigned int cols, unsigned int i, unsigned int j)
{
return a + i * cols + j;
}
#define A(i,j) (*array_element ((a), (cols), (i), (j)))
double *a;
unsigned int rows, cols;
a = make_array (rows, cols);
A(3,4) = 3.14;
printf ("%f\n:" A(3,4));
EDIT:
In your program
*a_cell = (float**) malloc(9*sizeof(float*));
should be
a_cell = (float**) malloc(9*sizeof(float*));
And likewise for
p = *a_cell;

Related

Python C Extension

I am having issues returning a 2D array from a C extension back to Python. When I allocate memory using malloc the returned data is rubbish. When I just initialise an array like sol_matrix[nt][nvar] the returned data is as expected.
#include <Python.h>
#include <numpy/arrayobject.h>
#include <math.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
// function to be solved by Euler solver
double func (double xt, double y){
double y_temp = pow(xt, 2);
y = y_temp;
return y;
}
static PyObject* C_Euler(double h, double xn)
{
double y_temp, dydx; //temps required for solver
double y_sav = 0; //temp required for solver
double xt = 0; //starting value for xt
int nvar = 2; //number of variables (including time)
int nt = xn/h; //timesteps
double y = 0; //y starting value
//double sol_matrix[nt][nvar]; //works fine
double **sol_matrix = malloc(nt * sizeof(double*)); //doesn't work
for (int i=0; i<nt; ++i){
sol_matrix[i] = malloc (nvar * sizeof(double));
}
int i=0;
//solution loop - Euler method.
while (i < nt){
sol_matrix[i][0]=xt;
sol_matrix[i][1]=y_sav;
dydx = func(xt, y);
y_temp = y_sav + h*dydx;
xt = xt+h;
y_sav=y_temp;
i=i+1;
}
npy_intp dims[2];
dims[0] = nt;
dims[1] = 2;
//Create Python object to copy solution array into, get pointer to
//beginning of array, memcpy the data from the C colution matrix
//to the Python object.
PyObject *newarray = PyArray_SimpleNew(2, dims, NPY_DOUBLE);
double *p = (double *) PyArray_DATA(newarray);
memcpy(p, sol_matrix, sizeof(double)*(nt*nvar));
// return array to Python
return newarray;
}
static PyObject* Euler(PyObject* self, PyObject* args)
{
double h, xn;
if (!PyArg_ParseTuple(args, "dd", &h, &xn)){
return NULL;
}
return Py_BuildValue("O", C_Euler(h,xn));
}
Could you provide any guidance on where I am going wrong?
Thank you.
The data in sol_matrix is not in contiguous memory, it's in nt separately allocated arrays. Therefore the line
memcpy(p, sol_matrix, sizeof(double)*(nt*nvar));
is not going to work.
I'm not a big fan of pointer-to-pointer arrays so believe your best option is to allocate sol_matrix as one big chunk:
double *sol_matrix = malloc(nt*nvar * sizeof(double));
This does mean you can't do 2D indexing so will need to do
// OLD: sol_matrix[i][0]=xt;
sol_matrix[i*nvar + 0] = xt;
In contrast
double sol_matrix[nt][nvar]; //works fine
is a single big chunk of memory so the copy works fine.

How to allocate array starting negative index

I am trying to allocate a 3D array u[-nx/2:nx/2-1][-nx/2:nx/2-1][-nx/2:nx/2-1]
int nx = 512;
double *** u = (double ***)malloc(nx * sizeof(double**));
for (int i = -nx/2; i < nx/2; i++) {
u[i] = (double **)malloc(nx * sizeof(double *));
for (int j = -nx/2; j < nx/2; j++) {
u[i][j] = (double *)malloc(nx * sizeof(double));
}
}
Is this a correct way to do it? If it's not, how should I change it?
No, that’s not correct. You can get it to work by placing every pointer in the middle of the dimension it represents:
int nx = 512;
double*** u = (double***)malloc(nx * sizeof(double**)) + nx/2;
for (int i = -nx/2; i < nx/2; i++) {
u[i] = (double**)malloc(nx * sizeof(double*)) + nx/2;
for (int j = -nx/2; j < nx/2; j++) {
u[i][j] = (double*)malloc(nx * sizeof(double)) + nx/2;
}
}
but that’s unusual and confusing, does a lot of separate allocations, and has to be undone for the deallocation step.
Consider one block with accessors instead:
#define NX 512
/* or just double* if nx is dynamic, and calculate the index manually */
double[NX][NX][NX]* u = malloc(sizeof(*u));
double array_get(double[NX][NX][NX] const* u, int i, int j, int k) {
return u[i + NX/2][j + NX/2][k + NX/2];
}
void array_set(double[NX][NX][NX]* u, int i, int j, int k, double value) {
u[i + NX/2][j + NX/2][k + NX/2] = value;
}
No.
Array in C is actually just plain/flat memory block, which is always 0 based and always in 1d (one demension).
Suppose you need a 3d array in arbitrary boundary,
say u[lb_1d, ub_1d][lb_2d, ub_2d][lb_3d, ub_3d],
you will need to do some mapping -- address space from 3d to 1d and vice versa --.
Sample implementation like this:
typedef struct
{
double* _arr;
int _lb_1d;
int _ub_1d;
int _lb_2d;
int _ub_2d;
int _lb_3d;
int _ub_3d;
}DoubleArr3D;
DoubleArr3D* create_3d_arr(int lb_1d, int ub_1d, int lb_2d, int ub_2d, int lb_3d, int ub_3d)
{
int array_size = (ub_1d - lb_1d +1) * (ub_2d - lb_2d +1) * (ub_3d - lb_3d +1);
DoubleArr3D * arr = (DoubleArr3D *)malloc( sizeof( DoubleArr3D) );
if (!arr)
{
return NULL;
}
arr->_lb_1d = lb_1d;
arr->_ub_1d = ub_1d;
arr->_lb_2d = lb_2d;
arr->_ub_2d = ub_2d;
arr->_lb_3d = lb_3d;
arr->_ub_3d = ub_3d;
arr->_arr = (double*) malloc(sizeof(double) * (size_t) array_size);
if (!arr)
{
free(arr);
return NULL;
}
return arr;
}
// arr[i1d, i2d, i3d] ==> arr_get_at(arr, i1d, i2d, i3d)
double arr_get_at(DoubleArr3D* arr, int i1d, int i2d, int i3d )
{
if (!arr || !arr->_arr)
{
// just demo of 'validation check'. in real code we should have meanful error report
return 0;
}
return arr->_arr [
i3d - arr->_lb_3d
+ (i2d - arr->_lb_2d ) * (arr->_ub_3d - arr->_lb_3d +1)
+ (i1d - arr->_lb_1d ) * (arr->_ub_2d - arr->_lb_2d +1) * (arr->_ub_3d - arr->_lb_3d +1)
];
}
First off, all C arrays have index values ranging from 0 to ELEMENT_COUNT-1. Always.
As you are using malloc, I am presuming that the value of nx is only defined at runtime. This rules out static array sizes and thus rules out using the cute arr[x][y][z] syntax as in:
#define NX 512
double arr[NX][NX][NX];
void foo(void)
{
...
arr[z1][y1][x1] += 2 * arr[z2][y2][x2];
...
}
That in turn means that to have the functionality of a 3D array with nx different values for each of its three dimensions dimension, you will need to allocate a linear memory area of size nx_cubed = nx * nx * nx. To calculate that value nx_cubed properly, you will need to check for integer overflows.
Also, you need to properly convert from signed int coordinate values to unsigned size_t values used in the 0 based index ranges.
if (nx < 0) {
fprintf(stderr, "negative value of nx\n");
exit(EXIT_FAILURE);
}
const size_t unx = nx;
const size_t nx_cubed = unx * unx * unx;
/* TODO: Complete check for overflows */
if (nx_cubed < unx) {
fprintf(stderr, "nx_cubed overflow\n");
exit(EXIT_FAILURE);
}
Then you can allocate a memory buffer of the appropriate size, and then check that the malloc call has actually worked.
double *buf = malloc(nx_cubed);
if (!buf) {
fprintf(stderr, "Error allocating memory for nx_cubed elements\n");
exit(EXIT_FAILURE);
}
Now there is the question of calculcating the array index from your x, y, and z values each ranging from -nx/2 to nx/2-1. I recommend writing a function for that which maps that range to the 0 to nx-1 range, and then calculates the proper linear index from the three 0-based values. Again, proper integer overflow checks should be performed.
size_t array3index(const size_t nx, const int x, const int y, const int z) {
const size_t half_nx = nx/2;
/* zero based 3D coordinates,
* this probably triggers some signedness warnings */
const size_t x0 = half_nx + x;
const size_t y0 = half_nx + y;
const size_t z0 = half_nx + z;
if ((x0 >= nx) || (y0 >= nx) || (z0 >= nx)) {
fprintf(stderr, "Signed coordinate(s) out of range: (%d, %d, %d)\n",
x, y, z);
exit(EXIT_FAILURE);
}
const size_t idx = nx * (nx * z0 + y0) + x0;
/* Assuming that we have already checked that nx*nx*nx does not
* overflow, and given that we have checked for x0, y0, z0 to be
* in the range of 0 to (nx-1), the idx calculation should not
* have overflown here. */
return idx;
}
Then you can do your accesses to the 3D array like
const i1 = array3index(nx, x1, y1, z1);
const i2 = array3index(nx, x2, y2, z2);
buf[i1] += 2*buf[i2];
Considering the amount of calculations needed inside array3index, I would examine whether it makes more sense to do the array iteration in the 0 to nx-1 domain directly, and only convert that to -nx/2 to nx/2-1 range values if you actually need that value within a calculation.

Calling C in RStudio causes crash

I am constantly facing a fatal error when calling a C function in R and I suspect it may be because of the way I have used "realloc" routine for variable n_k in the gCRSF_gibbs function. Can somebody tell me if the reallocation of memory to n_k is correct or not?
void gCRSF_gibbs(double *z, double **n_k, double *SampleDex,
double *r, double *a, double *p,
int *Ksize, int *WordNum) {
int i, j, k;
double mass;
double *prob_cumsum;
double cum_sum, probrnd;
prob_cumsum = (double *) calloc(Ksize[0],sizeof(double));
mass = r[0]*pow(p[0],-a[0]);
for (i=0;i<WordNum[0];i++){
j = (int) SampleDex[i] -1;
k = (int) z[j] -1;
if(z[j]>0){
(*n_k)[k]--;
}
for (cum_sum=0, k=0; k<Ksize[0]; k++) {
cum_sum += (*n_k)[k]-a[0];
prob_cumsum[k] = cum_sum;
}
if ( ((double) rand() / RAND_MAX * (cum_sum + mass) < cum_sum)){
probrnd = (double)rand()/(double)RAND_MAX*cum_sum;
k = BinarySearch(probrnd, prob_cumsum, Ksize[0]);
}
else{
for (k=0; k<Ksize[0]; k++){
if ((int) (*n_k)[k]==0){
break;
}
}
if (k==Ksize[0]){
Ksize[0]++;
realloc(*n_k,sizeof(**n_k)*Ksize[0]);
(*n_k)[Ksize[0]-1]=0;
prob_cumsum = realloc(prob_cumsum,sizeof(*prob_cumsum)*Ksize[0]);
}
}
z[j] = k+1;
(*n_k)[k]++;
}
free(prob_cumsum);}
And this is how it is called in R:
gCRSF_gibbs <- function(z, n_k, sampleDex, r, a, p){
out <- .C("gCRSF_gibbs", z=as.double(z), n_k=as.double(n_k),
SampleDex=as.double(sampleDex), r=as.double(r), a=as.double(a),
p=as.double(p), Ksize=as.integer(length(n_k)),
WordNum=as.integer(length(sampleDex)))
out}
You're using realloc wrong. It should be:
*n_k = realloc(*n_k,sizeof(**n_k)*Ksize[0]);
You always want to use realloc like p = realloc(p, size). Otherwise, if the buffer gets moved by realloc, *n_k will be pointing to a freed pointer.

Matrix multiplication with MKL

I have the CSR coordinates of a matrix.
/* alloc space for COO matrix */
int *coo_rows = (int*) malloc(K.n_rows * sizeof(int));
int *coo_cols = (int*) malloc(K.n_rows * sizeof(int));
float *coo_vals = (float*) malloc(K.n_rows * sizeof(float));
/*Load coo values*/
int *rowptrs = (int*) malloc((N_unique+1)*sizeof(int));
int *colinds = (int*) malloc(K.n_rows *sizeof(int));
double *vals = (double*) malloc(K.n_rows *sizeof(double));
/* take csr values */
int job[] = {
2, // job(1)=2 (coo->csr with sorting)
0, // job(2)=1 (one-based indexing for csr matrix)
0, // job(3)=1 (one-based indexing for coo matrix)
0, // empty
n1, // job(5)=nnz (sets nnz for csr matrix)
0 // job(6)=0 (all output arrays filled)
};
int info;
mkl_scsrcoo(job, &n, vals, colinds, rowptrs, &n1, coo_vals, coo_rows, coo_cols, &info);
assert(info == 0 && "Converted COO->CSR");
Now I want to apply the mkl_dcsrmm function to compute C := alpha*A*B + beta*C with beta = 0;
/* function declaration */
void mkl_dcsrmm (char *transa, MKL_INT *m, MKL_INT *n, MKL_INT *k, double *alpha, char *matdescra, double *val, MKL_INT *indx, MKL_INT *pntrb, MKL_INT *pntre, double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);
Since now I have.
int A_rows = ..., A_cols = ..., C_cols = ...
double alpha = 1.0;
mkl_dcsrmm ((char*)"N", &A_rows, &C_cols, &A_cols, &alpha, char *matdescra, vals, coo_cols, rowptrs, colinds , double *b, MKL_INT *ldb, double *beta, double *c, MKL_INT *ldc);
I found some difficulties on filling the inputs. Could you please help me to fill the rest of the inputs?
A specific input for which I want to go in more details is the matdescra. I borrowed the following code from cspblas_ccsr example
char matdescra[6];
matdescra[0] = 'g';
matdescra[1] = 'l';
matdescra[2] = 'n';
matdescra[3] = 'c';
but I have some questions about that. The matrix A I am working is not triangular and the initialization of this char array engage you to make such a declaration, how should I configure the parameters of the matdescra array?
Here is what I use, and what works for me.
char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'};
/* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6
G: General. D: Diagonal
L/U Lower/Upper triangular (ignored with G)
N: non-unit diagonal (ignored with G)
C: zero-based indexing.
*/
Complete Example
Here is a complete example. I first create a random matrix by filling a dense matrix with a specified density of Non-Zero elements. Then I convert it to a sparse matrix in CSR-format. Finally, I do the multiplication using mkl_dcsrmm. As a possible check (check not done), I do the same multiplication using the cblas_dgemm function with the dense matrix.
#include "mkl.h"
#include "mkl_spblas.h"
#include <stddef.h> // For NULL
#include <stdlib.h> // for rand()
#include <assert.h>
#include <stdio.h>
#include <limits.h>
// Compute C = A * B; where A is sparse and B is dense.
int main() {
MKL_INT m=10, n=5, k=11;
const double sparsity = 0.9; ///< #param sparsity Values below which are set to zero (sampled from uniform(0,1)-distribution).
double *A_dense;
double *B;
double *C;
double alpha = 1.0;
double beta = 0.0;
const int allignment = 64;
// Seed the RNG to always be the same
srand(42);
// Allocate memory to matrices
A_dense = (double *)mkl_malloc( m*k*sizeof( double ), allignment);
B = (double *)mkl_malloc( k*n*sizeof( double ), allignment);
C = (double *)mkl_malloc( m*n*sizeof( double ), allignment);
if (A_dense == NULL || B == NULL || C == NULL) {
printf("ERROR: Can't allocate memory for matrices. Aborting... \n\n");
mkl_free(A_dense);
mkl_free(B);
mkl_free(C);
return 1;
}
// Initializing matrix data
int i;
int nzmax = 0;
for (i = 0; i < (m*k); i++) {
double val = rand() / (double)RAND_MAX;
if ( val < sparsity ) {
A_dense[i] = 0.0;
} else {
A_dense[i] = val;
nzmax++;
}
}
for (i = 0; i < (k*n); i++) {
B[i] = rand();
}
for (i = 0; i < (m*n); i++) {
C[i] = 0.0;
}
// Convert A to a sparse matrix in CSR format.
// INFO: https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8.htm#GUID-AD67DD8D-4C22-4232-8D3F-AF97DC2ABBC8
MKL_INT job[6];
job[0] = 0; // convert TO CSR.
job[1] = 0; // Zero-based indexing for input.
job[2] = 0; // Zero-based indexing for output.
job[3] = 2; // adns is a whole matrix A.
job[4] = nzmax; // Maximum number of non-zero elements allowed.
job[5] = 3; // all 3 arays are generated for output.
/* JOB: conversion parameters
* m: number of rows of A.
* k: number of columns of A.
* adns: (input/output). Array containing non-zero elements of the matrix A.
* lda: specifies the leading dimension of adns. must be at least max(1, m).
* acsr: (input/output) array containing non-zero elements of the matrix A.
* ja: array containing the column indices.
* ia length m+1, rowIndex.
* OUTPUT:
* info: 0 if successful. i if interrupted at i-th row because of lack of space.
*/
int info = -1;
printf("nzmax:\t %d\n", nzmax);
double *A_sparse = mkl_malloc(nzmax * sizeof(double), allignment);
if (A_sparse == NULL) {
printf("ERROR: Could not allocate enough space to A_sparse.\n");
return 1;
}
MKL_INT *A_sparse_cols = mkl_malloc(nzmax * sizeof(MKL_INT), allignment);
if (A_sparse_cols == NULL) {
printf("ERROR: Could not allocate enough space to A_sparse_cols.\n");
return 1;
}
MKL_INT *A_sparse_rowInd = mkl_malloc((m+1) * sizeof(MKL_INT), allignment);
if (A_sparse_rowInd == NULL) {
printf("ERROR: Could not allocate enough space to A_sparse_rowInd.\n");
return 1;
}
mkl_ddnscsr(job, &m, &k, A_dense, &k, A_sparse, A_sparse_cols, A_sparse_rowInd, &info);
if(info != 0) {
printf("WARNING: info=%d, expected 0.\n", info);
}
assert(info == 0);
char transa = 'n';
MKL_INT ldb = n, ldc=n;
char matdescra[6] = {'g', 'l', 'n', 'c', 'x', 'x'};
/* https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-34C8DB79-0139-46E0-8B53-99F3BEE7B2D4.htm#TBL2-6
G: General. D: Diagonal
L/U Lower/Upper triangular (ignored with G)
N: non-unit diagonal (ignored with G)
C: zero-based indexing.
*/
mkl_dcsrmm(&transa, &m, &n, &m, &alpha, matdescra, A_sparse, A_sparse_cols,
A_sparse_rowInd, &(A_sparse_rowInd[1]), B, &ldb, &beta, C, &ldc);
// The same computation in dense format
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, k, alpha, A_dense, k, B, n, beta, C, n);
mkl_free(A_dense);
mkl_free(A_sparse);
mkl_free(A_sparse_cols);
mkl_free(A_sparse_rowInd);
mkl_free(B);
mkl_free(C);
return 0;
}

sending struct array to cuda kernel

I'm working on a project and I have to sent a struct array to cuda kernel. The struct also contains an array. To test it I have written a simple program.
struct Point {
short x;
short *y;
};
my kernel code:
__global__ void addKernel(Point *a, Point *b, Point *c)
{
int i = threadIdx.x;
c[i].x = a[i].x + b[i].x;
for (int j = 0; j<4; j++){
c[i].y[j] = a[i].y[j] + a[i].y[j];
}
}
my main code:
int main()
{
const int arraySize = 4;
const int arraySize2 = 4;
short *ya, *yb, *yc;
short *dev_ya, *dev_yb, *dev_yc;
Point *a;
Point *b;
Point *c;
Point *dev_a;
Point *dev_b;
Point *dev_c;
size_t sizeInside = sizeof(short) * arraySize2;
ya = (short *)malloc(sizeof(short) * arraySize2);
yb = (short *)malloc(sizeof(short) * arraySize2);
yc = (short *)malloc(sizeof(short) * arraySize2);
ya[0] = 1; ya[1] =2; ya[2]=3; ya[3]=4;
yb[0] = 2; yb[1] =3; yb[2]=4; yb[3]=5;
size_t sizeGeneral = (sizeInside+sizeof(short)) * arraySize;
a = (Point *)malloc( sizeGeneral );
b = (Point *)malloc( sizeGeneral );
c = (Point *)malloc( sizeGeneral );
a[0].x = 2; a[0].y = ya;
a[1].x = 2; a[1].y = ya;
a[2].x = 2; a[2].y = ya;
a[3].x = 2; a[3].y = ya;
b[0].x = 4; b[0].y = yb;
b[1].x = 4; b[1].y = yb;
b[2].x = 4; b[2].y = yb;
b[3].x = 4; b[3].y = yb;
cudaMalloc((void**)&dev_a, sizeGeneral);
cudaMalloc((void**)&dev_b, sizeGeneral);
cudaMalloc((void**)&dev_c, sizeGeneral);
cudaMemcpy(dev_a, a, sizeGeneral, cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, sizeGeneral, cudaMemcpyHostToDevice);
addKernel<<<1, 4>>>(dev_a, dev_b, dev_c);
cudaError_t err = cudaMemcpy(c, dev_c, sizeGeneral, cudaMemcpyDeviceToHost);
printf("{%d-->%d,%d,%d,%d} \n err= %d",c[0].x,c[0].y[0],c[1].y[1],c[1].y[2],c[2].y[3], err);
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
}
It seems cuda kernel is not working. Actually I can access structs 'x' variable but I cannot access 'y' array. What can I do to access the 'y' array? Thanks in advance.
When you are sending this struct to kernel you send short and pointer to short in host memory not device. This is crucial. For simple type - as short this works, because kernel has its local copy in memory designated to accept parameters. So when you call this kernel you have moved x and y to device, but not the area pointed by y. This you have to do manually by allocating space for it and updating pointer y to point to device memory.
You are not passin the array to the device. You can either make the array a part of the struct, by defining it like this:
struct {
short normalVal;
short inStructArr[4];
}
Or pass the array into the device memory and update the pointer in the struct.

Resources