First we are reading from the files and forming arrays, then we are calculating hanning window(hanning fn) for each array sample , finally we are multiplying(mul fn) the array and the hanning window to form the array final.
#include <stdio.h>
#include<math.h>
#include <stdlib.h>
#include </usr/local/src/libsndfile-1.0.25/libsndfile-1.0.25/src/sndfile.h>
#include </usr/local/src/libsndfile-1.0.25/libsndfile- 1.0.25/src/sndfile.h.in>
void hanning(int);
float w[256];
float mul(float*,int*,float*,float*,int);
int main()
{
SNDFILE *sf1,*sf2,*sf3,*sf4,*sf5,*sout;
SF_INFO info1,info2,info3,info4,info5,infout;
int num_channels;
int num_items1,num_items2,num_items3,num_items4,num_items5;
int num1,num2,num3,num4,num5;
int *buf1,*buf2,*buf3,*buf4,*buf5;
int f1,f2,f3,f4,f5;
int sr1,sr2,sr3,sr4,sr5;
int c1,c2,c3,c4,c5,d;
int i,j=0,N=128,k=0;
float t[128],w1[64],w2[64];
// FILE *out;
hanning(N);
/* Open the WAV file. */
info1.format = 0;
info2.format=0;
info3.format=0;
info3.format=0;
info4.format=0;
info5.format=0;
sf1 = sf_open("/mnt/usb2/voice/a.wav",SFM_READ,&info1);
sf2 = sf_open("/mnt/usb2/voice/na1.wav",SFM_READ,&info2);
sf3 = sf_open("/mnt/usb2/voice/ma.wav",SFM_READ,&info3);
sf4 = sf_open("/mnt/usb2/voice/ra__.wav",SFM_READ,&info4);
sf5 = sf_open("/mnt/usb2/voice/ttha.wav",SFM_READ,&info5);
if (sf1 == NULL)
{
printf("Failed to open the file.\n");
exit(-1);
}
/* Print some of the info, and figure out how much data to read. */
c1 = info1.channels;
c2 = info2.channels;
c3 = info3.channels;
c4 = info4.channels;
c5 = info5.channels;
f1 = info1.frames;
f2 = info2.frames;
f3 = info3.frames;
f4 = info4.frames;
f5 = info5.frames;
sr1 = info2.samplerate;
sr2 = info2.samplerate;
sr3 = info3.samplerate;
sr4 = info4.samplerate;
sr5 = info5.samplerate;
// printf("frames=%d\n",f);
// printf("samplerate=%d\n",sr);
//printf("channels=%d\n",c);
num_items1 = f1*c1;
num_items2 = f2*c2;
num_items3 = f3*c3;
num_items4 = f4*c4;
num_items5 = f5*c5;
//printf("num_items=%d\n",num_items);
/* Allocate space for the data to be read, then read it. */
buf1 = (int *) malloc(num_items1*sizeof(int));
buf2 = (int *) malloc(num_items2*sizeof(int));
buf3 = (int *) malloc(num_items3*sizeof(int));
buf4 = (int *) malloc(num_items4*sizeof(int));
buf5 = (int *) malloc(num_items5*sizeof(int));
num1 = sf_read_int(sf1,buf1,num_items1);
num2 = sf_read_int(sf2,buf2,num_items2);
num3 = sf_read_int(sf3,buf3,num_items3);
num4 = sf_read_int(sf4,buf4,num_items4);
num5 = sf_read_int(sf5,buf5,num_items5);
for(i=0;i<128;i++)
{
if(i<64){
w1[j]=t[i];
j++;}
else{
w2[k]=t[i];
k++;}
}
x1 = (float *) malloc(num_items1*sizeof(float));
x2 = (float *) malloc(num_items2*sizeof(float));
x3 = (float *) malloc(num_items3*sizeof(float));
x4 = (float *) malloc(num_items4*sizeof(float));
x5 = (float *) malloc(num_items5*sizeof(float));
mul(x1,buf1,w1,w2,num_items1);
mul(x2,buf2,w1,w2,num_items2);
mul(x3,buf3,w1,w2,num_items3);
mul(x4,buf4,w1,w2,num_items4);
mul(x5,buf5,w1,w2,num_items5);
//printf("num=%d\n",num);
sf_close(sf1);
sf_close(sf2);
sf_close(sf3);
sf_close(sf4);
sf_close(sf5);
d=num_items1+num_items2+num_items3+num_items4+num_items5;
final = (float *) malloc(d*sizeof(float));
for(j=0;j<num_items1;j++){
final[i]=x1[j];
i++;}
for(j=0;j<num_items1;j++){
final[i]=x1[j];
i++;}
for(j=0;j<num_items1;j++){
final[i]=x1[j];
i++;}
for(j=0;j<num_items3;j++){
final[i]=x3[j];
i++;}
for(j=0;j<num_items4;j++){
final[i]=x4[j];
i++;}
for(j=0;j<num_items1;i++){
final[i]=x1[j];
i++;}
for(j=0;j<num_items5;j++){
final[i]=x5[j];
i++;}
//sout=sf_open(final,SFM_READ,&infout);
// printf("Read %d items\n",num);
/* Write the data to filedata.out. */
/* out = fopen("filedata.txt","w");
if(out==NULL)
{ printf("Error!");
exit(1); }
printf("a");
for (i = 0; i < 100; i++)
{
fprintf(out,"%d ",final[i]);
fprintf(out,"/n");
}
fclose(out);*/
return 0;
}
void hanning(int N)
{
int half, i, idx, n,j=0,k=0;
float PI=3.1428;
// w = (float*) calloc(N, sizeof(float));
// memset(w, 0, N*sizeof(float));
n = N;
if(n%2==0)
{
half = n/2;
for(i=0; i<half; i++)//Calculates Hanning window samples.
{w[i] = 0.5 * (1 - cos(2*PI*(i+1) / (n+1)));
printf("%f\n",w[i]);}
idx = half-1;
for(i=half; i<n; i++) {
w[i] = w[idx];
printf("%f\n",w[i]);
idx--;
}
}
else
{
half = (n+1)/2;
for(i=0; i<half; i++) //Calculates Hanning window for samples
w[i] = 0.5 * (1 - cos(2*PI*(i+1) / (n+1)));
printf("%f\n",w[i]);
}
}
float mul(float *x,int *buf,float *w1,float *w2,int k)/*multiplication of hanning window and array*/
{
float final_1[k],final_2[k];
int i;
for(i=0;i<k;i++){
if(i<64)
final_1[i]=w1[i];
else
final_1[i]=1;}
for(i=0;i<k;i++){
if(i<k-64)
final_2[i]=1;
else
final_2[i]=w2[i];
}
for(i=0;i<k;i++){
x[i]=final_1[i]*final_2[i]*buf[i];
printf("%f\n",x[i]);
}
}
You should initialize your loop variables closer to the actual loops:
There is a missing i=0; after final = (float *) malloc(d*sizeof(float));
This is definitely a bug, but there are other ones: the size you allocate for the final array is probably too small for the copies you make into it: 4 copies of x1, one copy of x3, x4 and x5, but none of x2 (probably a copy/paste bug here).
Also consider using double instead of float for better accuracy in these computations and use M_PI from <math.h> instead of hard coding a very inaccurate value for pi.
Also try and indent your code more consistently, use K&R style for improved readability.
Related
So I am currently writing a function that can output data to an array ready for export to text. Function works fine when collecting variables but an getting the "Thread 1: EXC_BAD_ACCESS (code=1, address=0x0)" error within Xcode and don't know how to debug this. I have tried using calloc to assign memory to the array and using address locations but am still getting similar error messages and using address locations just don't work.
Has anyone got any suggestions for how I can solve this? The error code is showing itself on the first line of the for loop and this function is running as part of a larger function.
void LamVelProf(double dP, double L, double d, double mu)
{
double *r = malloc(sizeof(r)); //Point radius from centreline
double *R = malloc(sizeof(R)); //Absolute pipe radius
double *vx = malloc(sizeof(vx));
double *gvx = malloc(sizeof(gvx));
double *offset = malloc(sizeof(offset));
double **profile[7500][4];
**profile = calloc((7500*4), sizeof(double));
//double **profile = calloc((7500*4), sizeof(double));
int *i = malloc(sizeof(i));
*R = d/2; //Setting the boundary condition
*offset = 0.001;
*i = 0;
for(*r = 0; *r < (*R + (*offset/2)); *r = (*r)+(*offset))
{
**profile[*i][0] = *r;
LamVelProCalc(dP, L, d, mu, *r, vx);
**profile[*i][1] = *vx;
LamGenProCalc(*r, d, gvx);
**profile[*i][2] = *gvx;//Results from general profile
**profile[*i][3] = *i+1;
++*i; //Increasing count by 1
}
printf("%i rows generated\n", *i);
free(r);
free(R);
free(offset);
int *row = malloc(sizeof(row));
int *col = malloc(sizeof(col));
for(*row = 0; *row < *i + 1; *row = *row + 1)
{
for(*col = 0; *col < 4; *col = *col + 1)
{
printf("%f", **profile[*row][*col]);
if(*col == 3)
{
printf("\n");
}else{
printf("\t");
}
}
}
}
I've had to aggressively de-pointerize this code to bring it back into the realm of understandability, and the end result is this:
void LamVelProf(double dP, double L, double d, double mu)
{
double R = d/2;
double offset = 0.001;
double profile[7500][4];
int i = 0;
for (double r = 0; r < (R + (offset/2)); r += offset) {
double vx = 0.0; // Initialize appropriately
double gvx = 0.0;
profile[i][0] = r;
// No idea what this does, or why the return value is ignored
LamVelProCalc(dP, L, d, mu, r, vx);
profile[i][1] = vx;
// No idea what this does, or why the return value is ignored
LamGenProCalc(r, d, gvx);
profile[i][2] = gvx;//Results from general profile
profile[i][3] = i+1;
++i; //Increasing count by 1
}
printf("%i rows generated\n", i);
for(int row = 0; row < i + 1; ++row)
{
for(int col = 0; col < 4; ++col)
{
printf("%f", profile[row][col]);
if (col == 3) {
printf("\n");
} else {
printf("\t");
}
}
}
}
As you can see there's two function calls buried in there that should probably have pointer arguments, my guess is vx and gvx are intended to be manipulated by that function. In C it is common to use use pointers to manipulate external variables, so a pointer argument almost always means "array" or "mutable argument" depending on context.
In other words I'd expect to see:
LamVelProCalc(dP, L, d, mu, r, &vx);
Or even better:
double vx = LamVelProCalc(dP, L, d, mu, r);
Where that value is explicitly returned instead.
This should compile and run without crashing now, though note the above mentioned issues.
When it comes to compiler suggestions to fix a problem, remember to take them all under advisement. At the end of the day you're the programmer, not the compiler, and not every educated guess it makes will be a valid interpretation of the problem at hand. If you unwaveringly follow the compiler's advice it may lead you down really, really strange paths, as perhaps has happened here.
As a note, having variables r and R is borderline programmer abuse. Please don't do this.
Another thing to keep in mind is your rather arbitrary use of 7500 here. Is that just a wild guess as to how many entries you'll need? It's almost always better to compute that, you know how the for loop will run in advance so you can do the math, and allocate accordingly.
If it is a limit you've arrived at through some other method it's worth using a #define to indicate as such, like:
#define MAX_PROFILE_ENTRIES 7500
Where it's now clear what the meaning behind that number is.
So after a lot of head scratching and realising I didn't need malloc because my array would definitely never reach the 125000 elements. Thanks to #tadman for helping with that. Here's the final program which is designed to be called from a menu function:
//
// 02g1LamVelPro .c
// Process Model (MacOS Version)
//
// Created by --- on 30/06/2020.
// Copyright © 2020 ---. All rights reserved.
//
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define maxstrlen 128
//Declaring global variables and allocating memory
//Function Output
double profile; //Array of doubles
//Calculation Variables
double dP;
double L;
double d;
double mu;
double r;
//Miscellaneous Variables
void LamVelProVar(double *dP, double *L, double *d, double *mu)
{
//Declaring input variables
char pres[maxstrlen];
char len[maxstrlen];
char dia[maxstrlen];
char visc[maxstrlen];
printf("Fluid pressure loss (Pa) = ");
*dP = atof(fgets(pres, sizeof(pres), stdin));
printf("Pipe length (m) = ");
*L = atof(fgets(len, sizeof(len), stdin));
printf("Pipe diameter (mm) = ");
*d = atof(fgets(dia, sizeof(dia), stdin));
*d = (*d)*0.001;
printf("Fluid viscosity (cP) = ");
*mu = atof(fgets(visc, sizeof(visc), stdin));
*mu = (*mu)*0.001;
fflush(stdout);
}
double LamVelCalc(double dP, double L, double d, double mu, double r, double *v_x)
{
//Calculation of the theoretical velocity profile with the flow possessing laminar characteristics
double frac1;
double frac2;
double frac3;
frac1 = (dP/L);
frac2 = pow(d,2);
frac2 = (frac2)/(16*mu);
frac3 = 2*r;
frac3 = (frac3)/d;
frac3 = pow(frac3, 2);
frac3 = 1 - (frac3);
*v_x = frac1 * frac2;
*v_x = (*v_x) * frac3;
return *v_x;
}
double LamGenCalc(double r, double d, double *func)
{
//Calculation of the general velocity profile with the flow possessing laminar characteristics
*func = 2*r;
*func = (*func)/d;
*func = pow(*func, 2);
*func = 1 - (*func);
return *func; //Returns v/v_max
}
double **LamVelProfCalc(double dP, double L, double d, double mu)
{
char display[maxstrlen];
double v_x = 0;
double offset = 0.0001;
//Calculating number of rows for the profile results matrix
double prad = d/2;
int whildisp = 1;
int rows = ((prad)/ (offset)) + 1;
printf("%i rows required\n", rows);
double profile[rows][3];
int i = 0;
for(double r = 0.0; r < (prad + (offset/2)); r += offset)
{
profile[i][0] = r; //Displaying point radius
profile[i][1] = LamVelCalc(dP, L, d, mu, r, &v_x); //Calculating point velocity
profile[i][2] = LamGenCalc(r, d, &v_x); //Calculating
//profile[i][3] = i + 1;
++i;
}
printf("%i rows successfully generated\n\n", i);
while(whildisp == 1)
{
printf("Do you want to display the generated data? ");
fgets(display, sizeof(display), stdin);
switch(display[0])
{
case '1':
case 'Y':
case 'y':
printf("Displaying data\n");
printf("Inputted variables:\n");
printf("dP =\t%.3f\tPa\n", dP);
printf("L =\t%.3f\tm\n", L);
printf("d =\t%.1f\tmm\n", d*1000);
printf("mu =\t%.3f\tPa.s\n", mu);
printf("v_max =\t%.3f\tm/s\n\n", LamVelCalc(dP, L, d, mu, 0, &v_x));
printf("r (m)\tv_x (m/s)\tv/v_max\n");
int row = 0;
int col = 0;
for(row = 0; row < i; ++row)
{
for(col = 0; col < 3; ++col)
{
printf("%.5f", profile[row][col]);
if(col == 2)
{
printf("\n");
}else{
printf("\t");
}
}
}
whildisp = 0;
break;
case '0':
case 'N':
case 'n':
whildisp = 0;
default:
printf("Input not recognised.\n");
break;
}
}
return profile;
}
void LamVelPro()
{
//Main Function
char ContCond[maxstrlen];
int whilmain = 1;
printf("Laminar flow velocity profile\n");
while(whilmain == 1)
{
//Variable declaration
double dP;
double L;
double d;
double mu;
double r;
//Data collection
LamVelProVar(&dP, &L, &d, &mu);
//Data manipulation
LamVelProfCalc(dP, L, d, mu);
//Ask for file write (Remember while loop)
//...
//Continue function
int whilcont = 1;
while(whilcont == 1)
{
printf("Do you want to continue? ");
fgets(ContCond, sizeof(ContCond), stdin);
switch(ContCond[0])
{
case '1':
case 'T':
case 'Y':
case 't':
case 'y':
whilcont = 0;
break;
case '0':
case 'F':
case 'N':
case 'f':
case 'n':
whilcont = 0;
whilmain = 0;
break;
default:
printf("Input not recognised\n");
break;
}
}
}
fflush(stdout);
}
hi guys i m stuck on this error while running my program.
I created a float matrix of values with this function
void calcolaSubpixel(IplImage *GT, costi ** totalCostMatrix, float ** subpixelM, int h, int w){
int modulo;
int subpixel;
int i,j;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
//TODO i valori ottneuti vanno 0-1, io li voglio da -0.5,0.5
int modulo = (CV_IMAGE_ELEM(GT, ushort, i, j))%256;
float subpixel01 = modulo/256.0f;
subpixelM[i][j] = subpixel01;
}
}
}
and then i put them in an array of float with this function
void fillLabels( IplImage *GT, float** subpixelM, float * labels){
int h = GT->height, w = GT->width;
int i,j,L = 0;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
//se il pixel in esame della ground truth non ha intensita 0 lo considero,
//altrimenti lo inserisco nella label:
if (CV_IMAGE_ELEM(GT, ushort, i, j) != 0) {
labels[L] = subpixelM[i][j];
L++;
}
}
}
}
and use this array labels for a Mat labelsMat(nRighe, 1, CV_32FC1, labels); where nRighe is a value calculated before to teach a SVM
CvSVM SVM;
SVM.train(trainingDatasMat, labelsMat, Mat(), Mat(), params);
trainingDatasMat doesn t give me any trouble, but labelsMat, built like that' return
OpenCV Error: Bad argument (response #0 is not integral) in cvPreprocessCategoricalResponses, file /home/vision/opencv-2.4.11/modules/ml/src/inner_functions.cpp, line 715
terminate called after throwing an instance of 'cv::Exception' what(): /home/vision/opencv-2.4.11/modules/ml/src/inner_functions.cpp:715: error: (-5) response #0 is not integral in function cvPreprocessCategoricalResponses
any suggestion to overcome this problem ? thank you
this is the main
#include "cost.h"
#include "disparity.h"
#include "fixed_window.h"
#include "confidence.h"
#include "utils.h"
#include "stereoPipeline.h"
#include "interpolation.h"
#include "SGM.h"
#include "learning.h"
#include<iostream>
#include<iomanip>
#include<cv.h>
using namespace cv;
using namespace std;
int main(int argc, char *argv[]) {
int dMax=15;
if (argc != 4)
{
printf("Usage: %s <image_id> <gaps> < <algo>\n", argv[0]);
return -1;
}
int id=atoi(argv[1]);
//sistema l'id per evitare problemi con il numero di cifre
char q[10];
if(id < 10){sprintf(q, "00%d", id); }
if(id >= 10 && id <= 99){sprintf(q, "0%d", id); }
if(id >= 100){sprintf(q, "%d", id); }
int gaps=atoi(argv[2]);
int algo=atoi(argv[3]);
IplImage *L, *R, *GT;
// ***********************************************************
// ***********************************************************
// ****** STEREO ALGORITHM ******
// ***********************************************************
// ***********************************************************
char filenameL[100];
char filenameR[100];
char filenameGT[100];
//carico il nome delle immagini
sprintf(filenameL,"/KITTI/image_0/000%s_10.png",q);
sprintf(filenameR,"/KITTI/image_1/000%s_10.png",q);
sprintf(filenameGT,"/KITTI/disp_occ/000%s_10.png",q);
// load grayscale images
L = cvLoadImage(filenameL, CV_LOAD_IMAGE_GRAYSCALE);
R = cvLoadImage(filenameR, CV_LOAD_IMAGE_GRAYSCALE);
GT= cvLoadImage(filenameGT, CV_LOAD_IMAGE_UNCHANGED );
dMax=kitti_dMax(GT);
int h = L->height, w = L->width;
IplImage* DisparityL = cvCreateImage(cvGetSize(L),8,1);
IplImage* DisparityR = cvCreateImage(cvGetSize(R),8,1);
t_DSI* DSI=create_DSI(w,h,dMax);
t_DSI *boxFilteredDSI=create_DSI(w,h,dMax);
t_DSI* outDSI=create_DSI(w,h,dMax);
// point-wise cost (Absolute Difference or Hamming distance on Census transforms)
if (algo == 0)
AbsoluteDifferenceCost(L, R, dMax, DSI);
else
HammingDistanceCost(L, R, dMax, 5, DSI);
// TAD aggregation
BoxFiltering(DSI, boxFilteredDSI, 5, 100);
disparity_map(boxFilteredDSI,DisparityL, true);
disparity_map_R(boxFilteredDSI,DisparityR, true);
// show results
/*cvShowImage("Left",L);
cvShowImage("FW Left",DisparityL);
cvShowImage("FW Right",DisparityR);
cvWaitKey(0);*/
//matrice disparità
int **disparityMatrix;
disparityMatrix= (int **) calloc(h,sizeof(int *));
int z=0;
for (z;z<h;z++){
disparityMatrix[z]=(int *) calloc(w,sizeof (int ));
}
//matrice dei costi
costi ** totalCostMatrix;
totalCostMatrix= (costi **) calloc(h,sizeof(costi *));
for (z=0;z<h;z++){
totalCostMatrix[z]=(costi *) calloc(w,sizeof (costi ));
}
SGM(boxFilteredDSI, outDSI, dMax, 30, 300, 255);
disparity_map(outDSI,DisparityL, true);
disparity_map_R(outDSI,DisparityR, true);
//findtotalcost riempie la matrice ---->> test con outdSI usata per SGM
findTotalCost(outDSI,totalCostMatrix);
int nRighe = linesLength(GT);
printf ("\n\n%d\n", nRighe);
float * labels;
labels = (float *) calloc(nRighe, sizeof(float));
float ** trainingDatas;
trainingDatas = (float **) calloc(nRighe, sizeof(float *));
for (z=0;z<nRighe;z++){
trainingDatas[z] = (float *) calloc(3, sizeof(float));
}
//matricisubpixels
float ** subpixelM;
subpixelM= (float **) calloc(h,sizeof(float *));
for (z=0;z<h;z++){
subpixelM[z]=(float *) calloc(w,sizeof (float ));
}
//riempio i vettori labels e traingDatas
//labels -> valori della GT
//traingDatas -> i costi
calcolaSubpixel(GT, totalCostMatrix, subpixelM, h, w);
fillLabels(GT, subpixelM, labels);
fillTrainingdatas(GT, totalCostMatrix, trainingDatas);
//test
labels[5] = 1.0;
//trainingDatas[0][0] = 1;
//trainingDatas[0][1] = 1;
//trainingDatas[0][2] = 1;
//imposto le label e i dati di training per effettuare il learning
Mat labelsMat(nRighe, 1, CV_32FC1, labels);
Mat trainingDatasMat(nRighe, 3, CV_32FC1, trainingDatas);
// Set up SVM's parameters
CvSVMParams params;
params.svm_type = CvSVM::C_SVC;
params.kernel_type = CvSVM::LINEAR;
params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 1e-6);
// Train the SVM
CvSVM SVM;
SVM.train(trainingDatasMat, labelsMat, Mat(), Mat(), params);
With these settings for SVM, all the values in labelsMat must be integral. A possible solution would be to remove the following line from calcolaSubpixel.
float subpixel01 = modulo/256.0f;
However, there are possibly other problems with your code as well. One in particular is that if you want to initialize a cv::Mat with a data pointer, then that pointer has to be a pointer to contiguous memory for the desired underlying type (respecting the dimensions and step size). In your code, the data pointer is set to an array of float* pointers (trainingDatas is a float**). A correct way to do this would be to pass a float* (pointing to nRighe*3 floats) to the constructor for trainingDatasMat.
That being said an easier way would be to allow cv::Mat to do the memory management for you. Something along the lines of the following:
Mat trainingDatasMat(nRighe, 3, CV_32FC1);
fillTrainingdatas(GT, totalCostMatrix, (float*) trainingDatasMat.data);
I'm considering using CUDA C for a particular problem involving sparse matrix addition.
The docs seem to discuss only operations between a sparse and a dense object.
This leads me to think either: sparse-sparse addition is so trivial it may just be a case of using '+' or similar; or sparse-sparse addition is not implemented. Which is correct, and where can I find the docs?
CUSPARSE has some routines that can operate on two operands that are both sparse matrices, for addition and multiplication.
You can do sparse matrix - sparse matrix addition with CUSPARSE using the cusparse<t>csrgeam function:
This function performs following matrix-matrix operation
C=α∗A+β∗B
where A, B, and C are m×n sparse matrices (defined in CSR storage format ...
Although dense matrix addition is fairly trivial (could be about 3 lines of code, whether in serial or parallel), I personally would not put sparse addition of two CSR matrices at the same level of triviality, especially if the goal is to perform it in parallel. You could try writing your own routine; I wouldn't.
Sparse-sparse addition is surprisingly tricky unless the matrices are the same sparsity pattern. (If they are, just add the elements of the data vectors and call it a day). You'll probably note that even calling the csrgeam method takes a couple of steps - one to calculate the size of the resulting matrix, and then another to do the operation. The reason is that the resulting matrix contains the union of the two nonzero patterns.
If this wasn't tricky enough, let's talk the parallel case, which you're obviously interested in since you're talking about CUDA. If you're in the CSR format, you could parallelize by rows (something like 1 CUDA thread per matrix row as a first pass). You would want to do a first pass, possibly single-threaded to compute the row pointers and column indices, and then a parallel pass to actually run the computation.
Following Robert Crovella's answer, here is a fully worked example on how summing up two sparse matrices in CUDA:
#include <stdio.h>
#include <assert.h>
#include <cusparse.h>
/*******************/
/* iDivUp FUNCTION */
/*******************/
int iDivUp(int a, int b){ return ((a % b) != 0) ? (a / b + 1) : (a / b); }
/********************/
/* CUDA ERROR CHECK */
/********************/
// --- Credit to http://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { exit(code); }
}
}
void gpuErrchk(cudaError_t ans) { gpuAssert((ans), __FILE__, __LINE__); }
/***************************/
/* CUSPARSE ERROR CHECKING */
/***************************/
static const char *_cusparseGetErrorEnum(cusparseStatus_t error)
{
switch (error)
{
case CUSPARSE_STATUS_SUCCESS:
return "CUSPARSE_STATUS_SUCCESS";
case CUSPARSE_STATUS_NOT_INITIALIZED:
return "CUSPARSE_STATUS_NOT_INITIALIZED";
case CUSPARSE_STATUS_ALLOC_FAILED:
return "CUSPARSE_STATUS_ALLOC_FAILED";
case CUSPARSE_STATUS_INVALID_VALUE:
return "CUSPARSE_STATUS_INVALID_VALUE";
case CUSPARSE_STATUS_ARCH_MISMATCH:
return "CUSPARSE_STATUS_ARCH_MISMATCH";
case CUSPARSE_STATUS_MAPPING_ERROR:
return "CUSPARSE_STATUS_MAPPING_ERROR";
case CUSPARSE_STATUS_EXECUTION_FAILED:
return "CUSPARSE_STATUS_EXECUTION_FAILED";
case CUSPARSE_STATUS_INTERNAL_ERROR:
return "CUSPARSE_STATUS_INTERNAL_ERROR";
case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
case CUSPARSE_STATUS_ZERO_PIVOT:
return "CUSPARSE_STATUS_ZERO_PIVOT";
}
return "<unknown>";
}
inline void __cusparseSafeCall(cusparseStatus_t err, const char *file, const int line)
{
if (CUSPARSE_STATUS_SUCCESS != err) {
fprintf(stderr, "CUSPARSE error in file '%s', line %d, error %s\nterminating!\n", __FILE__, __LINE__, \
_cusparseGetErrorEnum(err)); \
assert(0); \
}
}
extern "C" void cusparseSafeCall(cusparseStatus_t err) { __cusparseSafeCall(err, __FILE__, __LINE__); }
/********/
/* MAIN */
/********/
int main() {
// --- Initialize cuSPARSE
cusparseHandle_t handle; cusparseSafeCall(cusparseCreate(&handle));
// --- Initialize matrix descriptors
cusparseMatDescr_t descrA, descrB, descrC;
cusparseSafeCall(cusparseCreateMatDescr(&descrA));
cusparseSafeCall(cusparseCreateMatDescr(&descrB));
cusparseSafeCall(cusparseCreateMatDescr(&descrC));
const int M = 5; // --- Number of rows
const int N = 6; // --- Number of columns
const int nnz1 = 10; // --- Number of non-zero blocks for matrix A
const int nnz2 = 8; // --- Number of non-zero blocks for matrix A
// --- Host vectors defining the first block-sparse matrix
float *h_csrValA = (float *)malloc(nnz1 * sizeof(float));
int *h_csrRowPtrA = (int *)malloc((M + 1) * sizeof(int));
int *h_csrColIndA = (int *)malloc(nnz1 * sizeof(int));
// --- Host vectors defining the second block-sparse matrix
float *h_csrValB = (float *)malloc(nnz1 * sizeof(float));
int *h_csrRowPtrB = (int *)malloc((M + 1) * sizeof(int));
int *h_csrColIndB = (int *)malloc(nnz1 * sizeof(int));
h_csrValA[0] = 1.f;
h_csrValA[1] = 7.f;
h_csrValA[2] = 1.f;
h_csrValA[3] = 3.f;
h_csrValA[4] = -1.f;
h_csrValA[5] = 10.f;
h_csrValA[6] = 1.f;
h_csrValA[7] = -4.f;
h_csrValA[8] = 1.f;
h_csrValA[9] = 3.f;
h_csrRowPtrA[0] = 0;
h_csrRowPtrA[1] = 3;
h_csrRowPtrA[2] = 5;
h_csrRowPtrA[3] = 6;
h_csrRowPtrA[4] = 8;
h_csrRowPtrA[5] = 10;
h_csrColIndA[0] = 0;
h_csrColIndA[1] = 3;
h_csrColIndA[2] = 5;
h_csrColIndA[3] = 2;
h_csrColIndA[4] = 4;
h_csrColIndA[5] = 1;
h_csrColIndA[6] = 0;
h_csrColIndA[7] = 3;
h_csrColIndA[8] = 3;
h_csrColIndA[9] = 5;
h_csrValB[0] = 3.f;
h_csrValB[1] = 1.f;
h_csrValB[2] = -1.f;
h_csrValB[3] = 1.f;
h_csrValB[4] = -4.f;
h_csrValB[5] = -3.f;
h_csrValB[6] = -2.f;
h_csrValB[7] = 10.f;
h_csrRowPtrB[0] = 0;
h_csrRowPtrB[1] = 2;
h_csrRowPtrB[2] = 4;
h_csrRowPtrB[3] = 5;
h_csrRowPtrB[4] = 7;
h_csrRowPtrB[5] = 8;
h_csrColIndB[0] = 0;
h_csrColIndB[1] = 4;
h_csrColIndB[2] = 0;
h_csrColIndB[3] = 1;
h_csrColIndB[4] = 3;
h_csrColIndB[5] = 0;
h_csrColIndB[6] = 1;
h_csrColIndB[7] = 3;
// --- Device vectors defining the block-sparse matrices
float *d_csrValA; gpuErrchk(cudaMalloc(&d_csrValA, nnz1 * sizeof(float)));
int *d_csrRowPtrA; gpuErrchk(cudaMalloc(&d_csrRowPtrA, (M + 1) * sizeof(int)));
int *d_csrColIndA; gpuErrchk(cudaMalloc(&d_csrColIndA, nnz1 * sizeof(int)));
float *d_csrValB; gpuErrchk(cudaMalloc(&d_csrValB, nnz2 * sizeof(float)));
int *d_csrRowPtrB; gpuErrchk(cudaMalloc(&d_csrRowPtrB, (M + 1) * sizeof(int)));
int *d_csrColIndB; gpuErrchk(cudaMalloc(&d_csrColIndB, nnz2 * sizeof(int)));
gpuErrchk(cudaMemcpy(d_csrValA, h_csrValA, nnz1 * sizeof(float), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_csrRowPtrA, h_csrRowPtrA, (M + 1) * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_csrColIndA, h_csrColIndA, nnz1 * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_csrValB, h_csrValB, nnz2 * sizeof(float), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_csrRowPtrB, h_csrRowPtrB, (M + 1) * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_csrColIndB, h_csrColIndB, nnz2 * sizeof(int), cudaMemcpyHostToDevice));
// --- Summing the two matrices
int baseC, nnz3;
// --- nnzTotalDevHostPtr points to host memory
int *nnzTotalDevHostPtr = &nnz3;
cusparseSafeCall(cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
int *d_csrRowPtrC; gpuErrchk(cudaMalloc(&d_csrRowPtrC, (M + 1) * sizeof(int)));
cusparseSafeCall(cusparseXcsrgeamNnz(handle, M, N, descrA, nnz1, d_csrRowPtrA, d_csrColIndA, descrB, nnz2, d_csrRowPtrB, d_csrColIndB, descrC, d_csrRowPtrC, nnzTotalDevHostPtr));
if (NULL != nnzTotalDevHostPtr) {
nnz3 = *nnzTotalDevHostPtr;
}
else{
gpuErrchk(cudaMemcpy(&nnz3, d_csrRowPtrC + M, sizeof(int), cudaMemcpyDeviceToHost));
gpuErrchk(cudaMemcpy(&baseC, d_csrRowPtrC, sizeof(int), cudaMemcpyDeviceToHost));
nnz3 -= baseC;
}
int *d_csrColIndC; gpuErrchk(cudaMalloc(&d_csrColIndC, nnz3 * sizeof(int)));
float *d_csrValC; gpuErrchk(cudaMalloc(&d_csrValC, nnz3 * sizeof(float)));
float alpha = 1.f, beta = 1.f;
cusparseSafeCall(cusparseScsrgeam(handle, M, N, &alpha, descrA, nnz1, d_csrValA, d_csrRowPtrA, d_csrColIndA, &beta, descrB, nnz2, d_csrValB, d_csrRowPtrB, d_csrColIndB, descrC, d_csrValC, d_csrRowPtrC, d_csrColIndC));
// --- Transforming csr to dense format
float *d_C; gpuErrchk(cudaMalloc(&d_C, M * N * sizeof(float)));
cusparseSafeCall(cusparseScsr2dense(handle, M, N, descrC, d_csrValC, d_csrRowPtrC, d_csrColIndC, d_C, M));
float *h_C = (float *)malloc(M * N * sizeof(float));
gpuErrchk(cudaMemcpy(h_C, d_C, M * N * sizeof(float), cudaMemcpyDeviceToHost));
// --- m is row index, n column index
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
printf("%f ", h_C[m + n * M]);
}
printf("\n");
}
return 0;
}
I have filled a dynamic allocated float multi array in a function.
A second function has to get the values of the array exploiting the pointer to the first element of the array defined in the former function.
The second function do not access to the correct memory location so it doesn't work but it does if the multy array is defined in a static way.
Does somebody know why?
eval_cell should get values defined in div_int
float f_imp(float x, float y){
return pow(x,2)+pow(y,2)-1;
}
int eval_cell(float* p){
int s[4];
s[0] = f_imp(*p, *(p+1)) <= 0;
printf("%f %f\n",*p, *(p+1));
s[1] = f_imp(*(p+3), *(p+4)) <= 0;
printf("%f %f\n",*(p+3), *(p+4));
s[2] = f_imp(*(p+9), *(p+10)) <= 0;
printf("%f %f\n",*(p+9), *(p+10));
s[3] = f_imp(*(p+6), *(p+7)) <= 0;
printf("%f %f\n",*(p+6), *(p+7));
printf("%d%d%d%d\n",s[0],s[1],s[2],s[3]);
return s[0];
}
void div_int(float* x1, float* y1,float* x2,float* y2,
float* f0, float* f2,float* f6,float* f8){
int i,j,m;
float* p;
float** a_cell; // array 9x3 contente coordinate vertici e valore funzione
*a_cell = (float**) malloc(9*sizeof(float*));
for (i=0;i<9;i++){
a_cell[i] = (float*) malloc(3*sizeof(float));
}
a_cell[0][0] = *x1;
a_cell[0][1] = *y1;
a_cell[0][2] = *f0;
a_cell[2][0] = *x2;
a_cell[2][1] = *y1;
a_cell[2][2] = *f2;
a_cell[6][0] = *x1;
a_cell[6][1] = *y2;
a_cell[6][2] = *f6;
a_cell[8][0] = *x2;
a_cell[8][1] = *y2;
a_cell[8][2] = *f8;
/*** calcolo dei valori incogniti di a_cell ***/
a_cell[1][0] = (*x1+*x2)/2;
a_cell[1][1] = *y1;
a_cell[1][2] = f_imp(a_cell[1][0], a_cell[1][1]);
a_cell[3][0] = *x1;
a_cell[3][1] = (*y1+*y2)/2;
a_cell[3][2] = f_imp(a_cell[3][0], a_cell[3][1]);;
a_cell[4][0] = (*x2+*x1)/2;
a_cell[4][1] = (*y2+*y1)/2;
a_cell[4][2] = f_imp(a_cell[4][0], a_cell[4][1]);
a_cell[5][0] = *x2;
a_cell[5][1] = (*y2+*y1)/2;
a_cell[5][2] = f_imp(a_cell[5][0], a_cell[5][1]);
a_cell[7][0] = (*x1+*x2)/2;
a_cell[7][1] = *y2;
a_cell[7][2] = f_imp(a_cell[7][0], a_cell[7][1]);
for (j=0;j<2;j++){
m = j*3;
for(i=0;i<2;i++){
m += i;
eval_cell(&a_cell[m][0]);
}
}
p = *a_cell;
for (i=0;i<9;i++){
for (j=0;j<3;j++){
printf("%f \n",*(p+3*i+j));
printf("%f \n",a_cell[i][j]);
printf("\n");
}
}
free(a_cell);
return;
}
It's because you using pointer in incorrect way:
See a_cell is pointer to dynamic array of 9 pointers to dynamic array of 3 floats.
So when you do eval_cell(&a_cell[m][0]) (or just eval_cell(a_cell[m]) this is actually the same) you actually get pointer to array of 3 floats. And after that you do:
int eval_cell(float* p){
...
s[2] = f_imp(*(p+9), *(p+10)) <= 0;
*(p+9) will get 9th element in array of 3 floats, so this is incorrect.
It works in static way, because static multi dimension array in memory is just one dimension array for which you was given multi indexing (by compiler). That's why in static you will probably address valid memory area.
See picture for more explanation:
If you want a completely dynamic matrix (2d array), you have to make your own element access function:
double *
make_array (unsigned int rows, unsigned int cols)
{
return malloc (rows * cols * sizeof (double));
}
double *
array_element (double *a, unsigned int cols, unsigned int i, unsigned int j)
{
return a + i * cols + j;
}
#define A(i,j) (*array_element ((a), (cols), (i), (j)))
double *a;
unsigned int rows, cols;
a = make_array (rows, cols);
A(3,4) = 3.14;
printf ("%f\n:" A(3,4));
EDIT:
In your program
*a_cell = (float**) malloc(9*sizeof(float*));
should be
a_cell = (float**) malloc(9*sizeof(float*));
And likewise for
p = *a_cell;
Could someone please advise me on how to resolve this problem.
I have a function which performs a simple regression analysis on a sets of point contained in an array.
I have one array (pval) which contains all the data I want to perform regression analysis on.
This is how I want to implement this.
I get an average value for the first 7 elements of the array. This is what I call a 'ref_avg' in the programme.
I want to perform a regression analysis for every five elements of the array taking the first element of this array as the 'ref_avg'. That is in every step of the regression analysis I will have 6 points in the array.
e.g
For the 1st step the ref_avg as calculated below is 70.78. So the 1st step in the simple regression will contain these points
1st = {70.78,76.26,69.17,68.68,71.49,73.08},
The second step will contain the ref_avg as the 1st element and other elements starting from the second element in the original array
2nd = {70.78,69.17,68.68,71.49,73.08,72.99},
3rd = {70.78,68.68,71.49,73.08,72.99,70.36},
4th = {70.78,71.49,73.08,72.99,70.36,57.82} and so on until the end.
The regression function is also shown below.
I don't understand why the first 3 elements of the 'calcul' array have value 0.00 on the first step of the regression, 2 elements on the 2nd step,1 elements on the 3rd.
Also the last step of the regression function is printed 3 times.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
float pval[]={76.26,69.17,68.68,71.49,73.08,72.99,70.36,57.82,58.98,69.71,70.43,77.53,80.77,70.30,70.5,70.79,75.58,76.88,80.20,77.69,80.80,70.5,85.27,75.25};
int count,Nhour;
const int MAX_HOUR = 24;
float *calcul=NULL;
float *tab_time =NULL;
float ref_avg;
int size_hour=7;
float sum=0;
int length = Nhour+1;
float m;
float b;
calcul=(float*)calloc(MAX_HOUR,sizeof(calcul));
if (calcul==NULL)
{
printf(" error in buffer\n");
exit(EXIT_FAILURE);
}
tab_time= calloc(MAX_HOUR,sizeof(float));
/* Get the average of the first seven elements */
int i;
for (i=0;i<size_hour;i++)
{
sum += pval[i];
}
ref_avg = sum / size_hour;
count=0;
/* perform the regression analysis on 5 hours increment */
while(count<=MAX_HOUR)
{
++count;
Nhour=5;
int pass = -(Nhour-1);
int i=0;
for(i=0;i<Nhour+1;i++)
{
if(count<MAX_HOUR)
{
calcul[0]=ref_avg;
calcul[i] =pval[count+pass];
pass++;
}
printf("calc=%.2f\n",calcul[i]); // For debug only
tab_time[i]=i+1;
if(i==Nhour)
{
linear_regression(tab_time, calcul, length, &m, &b);
printf("Slope= %.2f\n", m);
}
}
}
free(calcul);
calcul=NULL;
free(tab_time);
tab_time=NULL;
return 0;
}
/* end of the main function */
/* This function is used to calculate the linear
regression as it was called above in the main function.
It compiles and runs very well, was just included for the
compilation and execution of the main function above where I have a problem. */
int linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0)
{
float sumx = 0,
sumy = 0,
sumx2 = 0,
sumxy = 0;
int i;
if (n <= 1) {
*beta1 = 0;
*beta0= 0;
printf("Not enough data for regression \n");
}
else
{
float variance;
for (i = 0; i < n; i++)
{
sumx += x[i];
sumy += y[i];
sumx2 += (x[i] * x[i]);
sumxy += (x[i] * y[i]);
}
variance = (sumx2 - ((sumx * sumx) / n));
if ( variance != 0) {
*beta1 = (sumxy - ((sumx * sumy) / n)) / variance;
*beta0 = (sumy - ((*beta1) * sumx)) / n;
}
else
{
*beta1 = 0;
*beta0 = 0;
}
}
return 0;
}
I think this code produces sane answers. The reference average quoted in the question seems to be wrong. The memory allocation is not needed. The value of MAX_HOUR was 24 but there were only 23 data values in the array. The indexing in building up the array to be regressed was bogus, referencing negative indexes in the pval array (and hence leading to erroneous results). The variable Nhour was referenced before it was initialized; the variable length was not correctly set. There wasn't good diagnostic printing.
The body of main() here is substantially rewritten; the editing on linear_regression() is much more nearly minimal. The code is more consistently laid out and white space has been used to make it easier to read. This version terminates the regression when there is no longer enough data left to fill the array with 5 values - it is not clear what the intended termination condition was.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void linear_regression(const float *x, const float *y, const int n,
float *beta1, float *beta0);
int main(void)
{
float pval[]={
76.26, 68.68, 71.49, 73.08, 72.99, 70.36, 57.82, 58.98,
69.71, 70.43, 77.53, 80.77, 70.30, 70.50, 70.79, 75.58,
76.88, 80.20, 77.69, 80.80, 70.50, 85.27, 75.25,
};
const int Nhour = 5;
const int MAX_HOUR = sizeof(pval)/sizeof(pval[0]);
const int size_hour = 7;
float ref_avg;
float sum = 0.0;
float m;
float b;
float calc_y[6];
float calc_x[6];
/* Get the average of the first seven elements */
for (int i = 0; i < size_hour; i++)
sum += pval[i];
ref_avg = sum / size_hour;
printf("ref avg = %5.2f\n", ref_avg); // JL
/* perform the regression analysis on 5 hours increment */
for (int pass = 0; pass <= MAX_HOUR - Nhour; pass++) // JL
{
calc_y[0] = ref_avg;
calc_x[0] = pass + 1;
printf("pass %d\ncalc_y[0] = %5.2f, calc_x[0] = %5.2f\n",
pass, calc_y[0], calc_x[0]);
for (int i = 1; i <= Nhour; i++)
{
int n = pass + i - 1;
calc_y[i] = pval[n];
calc_x[i] = pass + i + 1;
printf("calc_y[%d] = %5.2f, calc_x[%d] = %5.2f, n = %2d\n",
i, calc_y[i], i, calc_x[i], n);
}
linear_regression(calc_x, calc_y, Nhour+1, &m, &b);
printf("Slope= %5.2f, intercept = %5.2f\n", m, b);
}
return 0;
}
void linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0)
{
float sumx1 = 0.0;
float sumy1 = 0.0;
float sumx2 = 0.0;
float sumxy = 0.0;
assert(n > 1);
for (int i = 0; i < n; i++)
{
sumx1 += x[i];
sumy1 += y[i];
sumx2 += (x[i] * x[i]);
sumxy += (x[i] * y[i]);
}
float variance = (sumx2 - ((sumx1 * sumx1) / n));
if (variance != 0.0)
{
*beta1 = (sumxy - ((sumx1 * sumy1) / n)) / variance;
*beta0 = (sumy1 - ((*beta1) * sumx1)) / n;
}
else
{
*beta1 = 0.0;
*beta0 = 0.0;
}
}