How to plot continuously (like animation), using PLplot in C - c

I have written code which is continuously calculating and drawing its output:
#include "plplot/plplot.h"
#include <math.h>
#include <windows.h>
#define NSIZE 101
int
main( int argc, char *argv[] )
{
PLFLT x[NSIZE], y[NSIZE];
PLFLT xmin = 0., xmax = 1., ymin = 0., ymax = 100.;
int i;
// Prepare data to be plotted.
for ( i = 0; i < NSIZE; i++ )
{
x[i] = (PLFLT) ( i ) / (PLFLT) ( NSIZE - 1 );
y[i] = ymax * x[i] * x[i];
}
// Parse and process command line arguments
plparseopts( &argc, argv, PL_PARSE_FULL );
// Initialize plplot
plinit();
// Create a labelled box to hold the plot.
plenv( xmin, xmax, ymin, ymax, 0, 0 );
pllab( "x", "y=100 x#u2#d", "Simple PLplot demo of a 2D line plot" );
// Plot the data that was prepared above.
for(int i=0;i<20;i++)
{
plline( NSIZE, x, y );
y[40+i]=0;
Sleep(400);
c_plclear();
plflush();
}
// Close PLplot library
plend();
exit( 0 );
}
But output updates only few time and after that the windows goes into not responding.
I'm use Windows 10 64bit + minGW64 (MSYS2) + eclipse.
Is there some correct way to do this? Or maybe this is PLplot limitation.

Related

'an illegal memory access' when trying to write to a 2D array allocated using cudaMalloc3D

I am trying to allocate and copy memory of a flattened 2D array on to the device using cudaMalloc3D to test the performance of cudaMalloc3D. But when I try to write to the array from the kernel it throws 'an illegal memory access was encountered' exception. The program runs fine if I am just reading from the array but when I try to write to it, there is an error. Any help on this will be greatly appreciated. Below is my code and the syntax for compiling the code.
Compile using
nvcc -O2 -arch sm_20 test.cu
Code: test.cu
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define PI 3.14159265
#define NX 8192 /* includes boundary points on both end */
#define NY 4096 /* includes boundary points on both end */
#define NZ 1 /* needed for cudaMalloc3D */
#define N_THREADS_X 16
#define N_THREADS_Y 16
#define N_BLOCKS_X NX/N_THREADS_X
#define N_BLOCKS_Y NY/N_THREADS_Y
#define LX 4.0 /* length of the domain in x-direction */
#define LY 2.0 /* length of the domain in x-direction */
#define dx (REAL) ( LX/( (REAL) (NX) ) )
#define cSqrd 5.0
#define dt (REAL) ( 0.4 * dx / sqrt(cSqrd) )
#define FACTOR ( cSqrd * (dt*dt)/(dx*dx) )
#define IC (i + j*NX) /* (i,j) */
#define IM1 (i + j*NX - 1) /* (i-1,j) */
#define IP1 (i + j*NX + 1) /* (i+1,j) */
#define JM1 (i + (j-1)*NX) /* (i,j-1) */
#define JP1 (i + (j+1)*NX) /* (i,j+1) */
// Macro for checking CUDA errors following a CUDA launch or API call
#define cudaCheckError() {\
cudaError_t e = cudaGetLastError();\
if( e != cudaSuccess ) {\
printf("\nCuda failure %s:%d: '%s'\n",__FILE__,__LINE__,cudaGetErrorString(e));\
exit(EXIT_FAILURE);\
}\
}
typedef double REAL;
typedef int INT;
void meshGrid ( REAL *x, REAL *y )
{
INT i,j;
REAL a;
for (j=0; j<NY; j++) {
a = dx * ( (REAL) j );
for (i=0; i<NX; i++) {
x[IC] = dx * ( (REAL) i );
y[IC] = a;
}
}
}
void initWave ( REAL *u, REAL *uold, REAL *x, REAL *y )
{
INT i,j;
for (j=1; j<NY-1; j++) {
for (i=1; i<NX-1; i++) {
u[IC] = 0.1 * (4.0*x[IC]-x[IC]*x[IC]) * ( 2.0*y[IC] - y[IC]*y[IC] );
}
}
for (j=1; j<NY-1; j++) {
for (i=1; i<NX-1; i++) {
uold[IC] = u[IC] + 0.5*FACTOR*( u[IP1] + u[IM1] + u[JP1] + u[JM1] - 4.0*u[IC] );
}
}
}
__global__ void solveWaveGPU ( cudaPitchedPtr uold, cudaPitchedPtr u, cudaPitchedPtr unew )
{
INT i,j;
i = blockIdx.x*blockDim.x + threadIdx.x;
j = blockIdx.y*blockDim.y + threadIdx.y;
if (i>0 && i < (NX-1) && j>0 && j < (NY-1) ) {
char *unewPtr = (char *) unew.ptr;
REAL *unew_row = (REAL *) (unewPtr + i * unew.pitch);
REAL tmp = unew_row[j]; // no error on this line
unew_row[j] = 1.2; // this is where I get the error
}
}
INT main(INT argc, char *argv[])
{
INT nTimeSteps = 10;
// pointers for the host side
REAL *unew, *u, *uold, *uFinal, *x, *y;
// allocate memory on the host
unew = (REAL *)calloc(NX*NY,sizeof(REAL));
u = (REAL *)calloc(NX*NY,sizeof(REAL));
uold = (REAL *)calloc(NX*NY,sizeof(REAL));
uFinal = (REAL *)calloc(NX*NY,sizeof(REAL));
x = (REAL *)calloc(NX*NY,sizeof(REAL));
y = (REAL *)calloc(NX*NY,sizeof(REAL));
// pointer for the device side
size_t pitch = NX * sizeof(REAL);
cudaPitchedPtr d_u, d_uold, d_unew, d_tmp;
cudaExtent myExtent = make_cudaExtent(pitch, NY, NZ);
// allocate 3D memory on the device
cudaMalloc3D( &d_u, myExtent ); cudaCheckError();
cudaMalloc3D( &d_uold, myExtent ); cudaCheckError();
cudaMalloc3D( &d_unew, myExtent ); cudaCheckError();
// initialize grid and wave
meshGrid( x, y );
initWave( u, uold, x, y );
// copy host memory to 3D device memory
cudaMemcpy3DParms cpy3D = { 0 };
cpy3D.kind = cudaMemcpyHostToDevice;
// copying u to d_u
cpy3D.srcPtr = make_cudaPitchedPtr(u, pitch, NX, NY);
cpy3D.dstPtr = d_u;
cpy3D.extent = myExtent;
cudaMemcpy3D( &cpy3D ); cudaCheckError();
// copying uold to d_uold
cpy3D.srcPtr = make_cudaPitchedPtr(uold, pitch, NX, NY);
cpy3D.dstPtr = d_uold;
cpy3D.extent = myExtent;
cudaMemcpy3D( &cpy3D ); cudaCheckError();
// set up the GPU grid/block model
dim3 dimGrid ( N_BLOCKS_X , N_BLOCKS_Y );
dim3 dimBlock ( N_THREADS_X, N_THREADS_Y );
for ( INT n = 1; n < nTimeSteps + 1; n++ ) {
solveWaveGPU <<< dimGrid, dimBlock >>> ( d_uold, d_u, d_unew );
cudaThreadSynchronize();
cudaCheckError();
d_tmp = d_uold;
d_uold = d_u;
d_u = d_unew;
d_unew = d_tmp;
}
// copy the memory back to host
cpy3D.kind = cudaMemcpyDeviceToHost;
// copying d_unew to uFinal
cpy3D.srcPtr = d_unew;
cpy3D.dstPtr = make_cudaPitchedPtr(uFinal, pitch, NX, NY);
cpy3D.extent = myExtent;
cudaMemcpy3D( &cpy3D ); cudaCheckError();
free(u); cudaFree(d_u.ptr);
free(unew); cudaFree(d_unew.ptr);
free(uold); cudaFree(d_uold.ptr);
free(uFinal); free(x); free(y);
return EXIT_SUCCESS;
}
The reason the error doesn't occur on this line:
REAL tmp = unew_row[j]; // no error on this line
is because the compiler is optimizing that line out. It doesn't do anything useful, and so the compiler completely eliminates it. The compiler warning:
xxx.cu(87): warning: variable "tmp" was declared but never referenced
is a hint to that effect.
Your code is very nearly correct. The issue is here:
REAL *unew_row = (REAL *) (unewPtr + i * unew.pitch);
It should be:
REAL *unew_row = (REAL *) (unewPtr + j * unew.pitch);
The i variable in your kernel is the width (i.e. X) dimension.
The j variable is the height (i.e. Y) dimension.
The height is the one that refers to which row you are on, therefore the row pitch should be multiplied by the height parameter, i.e. j, not i.
Similarly, although it's not the source of the specific failure for your particular dimensions, this code may be not what you intended either:
REAL tmp = unew_row[j]; // no error on this line
unew_row[j] = 1.2; // this is where I get the error
If, for example, you were intending to compute the offset to the row and then index into the row (perhaps to set every element in the alocation, for example) then I think you would want to use i not j as your final index:
REAL tmp = unew_row[i]; // no error on this line
unew_row[i] = 1.2; // this is where I get the error
However, for this particular example, this is not the actual source of the illegal memory access.

Reverse the Fish-eye Distortion(I've used openCV with VC++)

I've made a simulation of fish eye distortion.
I want to develop a reverse program that can convert the distorted image to normal image.
I've tried to use undistortPonts() function but couldn't understand the input(dist-coefficient).
cv.UndistortPoints(distorted, undistorted, intrinsics, dist_coeffs)
My code for fish eye distortion:
#include "stdio.h"
#include <cv.h>
#include <highgui.h>
#include <math.h>
#include <iostream>
void sampleImage(const IplImage* arr, float idx0, float idx1, CvScalar& res)
{
if(idx0<0 || idx1<0 || idx0>(cvGetSize(arr).height-1) || idx1>(cvGetSize(arr).width-1))
{
res.val[0]=0;
res.val[1]=0;
res.val[2]=0;
res.val[3]=0;
return;
}
float idx0_fl=floor(idx0);
float idx0_cl=ceil(idx0);
float idx1_fl=floor(idx1);
float idx1_cl=ceil(idx1);
CvScalar s1=cvGet2D(arr,(int)idx0_fl,(int)idx1_fl);
CvScalar s2=cvGet2D(arr,(int)idx0_fl,(int)idx1_cl);
CvScalar s3=cvGet2D(arr,(int)idx0_cl,(int)idx1_cl);
CvScalar s4=cvGet2D(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
res.val[0]= s1.val[0]*(1-x)*(1-y) + s2.val[0]*(1-x)*y + s3.val[0]*x*y + s4.val[0]*x*(1-y);
res.val[1]= s1.val[1]*(1-x)*(1-y) + s2.val[1]*(1-x)*y + s3.val[1]*x*y + s4.val[1]*x*(1-y);
res.val[2]= s1.val[2]*(1-x)*(1-y) + s2.val[2]*(1-x)*y + s3.val[2]*x*y + s4.val[2]*x*(1-y);
res.val[3]= s1.val[3]*(1-x)*(1-y) + s2.val[3]*(1-x)*y + s3.val[3]*x*y + s4.val[3]*x*(1-y);
}
float xscale;
float yscale;
float xshift;
float yshift;
float getRadialX(float x,float y,float cx,float cy,float k)
{
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k)
{
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float thresh = 1;
float calc_shift(float x1,float x2,float cx,float k)
{
float x3 = x1+(x2-x1)*0.5;
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
// std::cerr<<"x1: "<<x1<<" - "<<res1<<" x3: "<<x3<<" - "<<res3<<std::endl;
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0)
{
return calc_shift(x3,x2,cx,k);
}
else
{
return calc_shift(x1,x3,cx,k);
}
}
int main(int argc, char** argv)
{
IplImage* src = cvLoadImage( "D:\\2012 Projects\\FishEye\\Debug\\images\\grid1.bmp", 1 );
IplImage* dst = cvCreateImage(cvGetSize(src),src->depth,src->nChannels);
IplImage* dst2 = cvCreateImage(cvGetSize(src),src->depth,src->nChannels);
float K=0.002;
float centerX=(float)(src->width/2);
float centerY=(float)(src->height/2);
int width = cvGetSize(src).width;
int height = cvGetSize(src).height;
xshift = calc_shift(0,centerX-1,centerX,K);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,K);
yshift = calc_shift(0,centerY-1,centerY,K);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,K);
// scale = (centerX-xshift)/centerX;
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
std::cerr<<xshift<<" "<<yshift<<" "<<xscale<<" "<<yscale<<std::endl;
std::cerr<<cvGetSize(src).height<<std::endl;
std::cerr<<cvGetSize(src).width<<std::endl;
for(int j=0;j<cvGetSize(dst).height;j++)
{
for(int i=0;i<cvGetSize(dst).width;i++)
{
CvScalar s;
float x = getRadialX((float)i,(float)j,centerX,centerY,K);
float y = getRadialY((float)i,(float)j,centerX,centerY,K);
sampleImage(src,y,x,s);
cvSet2D(dst,j,i,s);
}
}
#if 0
cvNamedWindow( "Source1", 1 );
cvShowImage( "Source1", dst);
cvWaitKey(0);
#endif
cvSaveImage("D:\\2012 Projects\\FishEye\\Debug\\images\\grid3.bmp",dst,0);
cvNamedWindow( "Source1", 1 );
cvShowImage( "Source1", src);
cvWaitKey(0);
cvNamedWindow( "Distortion", 2 );
cvShowImage( "Distortion", dst);
cvWaitKey(0);
#if 0
for(int j=0;j<cvGetSize(src).height;j++)
{
for(int i=0;i<cvGetSize(src).width;i++)
{
CvScalar s;
sampleImage(src,j+0.25,i+0.25,s);
cvSet2D(dst,j,i,s);
}
}
cvNamedWindow( "Source1", 1 );
cvShowImage( "Source1", src);
cvWaitKey(0);
#endif
}
Actually, my original anwser was about the undistortion algorithm for individual points. If you want to undistort a complete image, there is a much simpler technique, as explained in this other thread:
Understanding of openCV undistortion
The outline of the algorithm (which is the one used in OpenCV function undistort()) is as follow. For each pixel of the destination lens-corrected image do:
Convert the pixel coordinates (u_dst, v_dst) to normalized coordinates (x', y') using the inverse of the calibration matrix K,
Apply your lens-distortion model, to obtain the distorted normalized coordinates (x'', y''),
Convert (x'', y'') to distorted pixel coordinates (u_src, v_src) using the calibration matrix K,
Use the interpolation method of your choice to find the intensity/depth associated with the pixel coordinates (u_src, v_src) in the source image, and assign this intensity/depth to the current destination pixel (u_dst, v_dst).
Original answer:
Here is the undistortion algorithm extracted from OpenCV function undistortPoints() :
void dist2norm(const cv::Point2d &pt_dist, cv::Point2d &pt_norm) const {
pt_norm.x = (pt_dist.x-Kcx)/Kfx;
pt_norm.y = (pt_dist.y-Kcy)/Kfy;
int niters=(Dk1!=0.?5:0);
double x0=pt_norm.x, y0=pt_norm.y;
for(int i=0; i<niters; ++i) {
double x2=pt_norm.x*pt_norm.x,
y2=pt_norm.y*pt_norm.y,
xy=pt_norm.x*pt_norm.y,
r2=x2+y2;
double icdist = 1./(1 + ((Dk3*r2 + Dk2)*r2 + Dk1)*r2);
double deltaX = 2*Dp1*xy + Dp2*(r2 + 2*x2);
double deltaY = Dp1*(r2 + 2*y2) + 2*Dp2*xy;
pt_norm.x = (x0-deltaX)*icdist;
pt_norm.y = (y0-deltaY)*icdist;
}
}
If you provide the coordinates of a point in the distorted image in argument pt_dist, it will calculate the normalized coordinates of the associated point and return them in pt_norm. Then, you can obtain the coordinates of the associated point in the undistorted image as
pt_undist = K . [pt_norm.x; pt_norm.y; 1]
where K is the camera matrix.
The standard lens distortion model used by OpenCV is explained at the beginning of this page:
where the distortion coefficients are (k1,k2,p1,p2,k3, k4,k5,k6) (most often we use k4=k5=k6=0).
I don't know what is your model for FishEye distortion, but you can surely adapt the above algorithm to your case. Otherwise, you may use a non-linear optimization algorithm (e.g. Levenberg-Marquardt or any other), to recover the undistorted coordinates from the distorted one.

C/CUDA - Help needed to write a program to store images in a buffer

I am a new to CUDA programming and I need help in writing a program to store images in a memory buffer. I tried modifying the code in the CUDA-OpenGL interop example, given in the CUDA-By Example book, to store 2 images one after another in a buffer. How should I write the program if I tried to avoid infinite loops but I am not sure if I succeeded? Any help in writing a correct program would be very much appreciated!
#include "book.h"
#include "cpu_bitmap.h"
#include "cuda.h"
#include <cuda_gl_interop.h>
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) system ("pause");
}
}
#define DIM 512
#define IMAGESIZE_MAX (DIM*DIM)
GLuint bufferObj;
cudaGraphicsResource *resource;
// based on ripple code, but uses uchar4 which is the type of data
// graphic inter op uses. see screenshot - basic2.png
__global__ void kernel( uchar4 *ptr1)
{
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x ;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr1[offset].x = 0;
ptr1[offset].y = green;
ptr1[offset].z = 0;
ptr1[offset].w = 255;
}
__global__ void kernel2( uchar4 *ptr2)
{
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x ;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * tan( abs(fx*100) - abs(fy*100) );
unsigned char orange = 1000;
// accessing uchar4 vs unsigned char*
ptr2[offset].x = orange;
ptr2[offset].y = green;
ptr2[offset].z = 0;
ptr2[offset].w = 255;
}
__global__ void copy ( uchar4 *pBuffer, uchar4 *Ptr )
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int idx = x + y * blockDim.x * gridDim.x ;
while ( idx != DIM*DIM)
{
pBuffer[idx] = Ptr[idx] ;
__syncthreads();
}
}
__global__ void copy2 ( uchar4 *pBuffer, uchar4 *Ptr2 )
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int idx = x + y * blockDim.x * gridDim.x ;
int bdx = idx;
while ( (idx < DIM*DIM) && (bdx < DIM*DIM) )
{
uchar4 temp = Ptr2[bdx];
__syncthreads();
pBuffer[idx+4] = temp;
__syncthreads();
if ((idx==DIM*DIM) && (bdx==DIM*DIM))
{
break;
}
}
}
void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
// clean up OpenGL and CUDA
( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
}
}
void draw_func( void ) {
// we pass zero as the last parameter, because out bufferObj is now
// the source, and the field switches from being a pointer to a
// bitmap to now mean an offset into a bitmap object
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
int main( int argc, char **argv ) {
cudaDeviceProp prop;
int dev;
(memset( &prop, 0, sizeof( cudaDeviceProp ) ));
prop.major = 1;
prop.minor = 0;
HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );
// tell CUDA which dev we will be using for graphic interop
// from the programming guide: Interoperability with OpenGL
// requires that the CUDA device be specified by
// cudaGLSetGLDevice() before any other runtime calls.
HANDLE_ERROR( cudaGLSetGLDevice( dev ) );
// these GLUT calls need to be made before the other OpenGL
// calls, else we get a seg fault
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "bitmap" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
// the first three are standard OpenGL, the 4th is the CUDA reg
// of the bitmap these calls exist starting in OpenGL 1.5
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4 ,
NULL, GL_DYNAMIC_DRAW_ARB );
// REGISTER THE GL BufferObj and CUDA Resource
HANDLE_ERROR(( cudaGraphicsGLRegisterBuffer( &resource,
bufferObj,
cudaGraphicsMapFlagsNone ) ));
// do work with the memory dst being on the GPU, gotten via mapping
HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size = DIM*DIM;
size_t sizet = 2*DIM*DIM;
gpuErrchk(cudaMalloc ( (uchar4 **)&devPtr, size));
uchar4 *devPtr2;
gpuErrchk(cudaMalloc ( (uchar4 **)&devPtr2, size));
uchar4 *pBuffer;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBuffer, size));
uchar4 *pBufferCurrent;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBuffer, size));
uchar4 *pBufferImage;
gpuErrchk(cudaMalloc ( (uchar4 **)&pBufferImage, sizet));
// REGISTER THE C BUFFER and CUDA Resource
HANDLE_ERROR( cudaGraphicsResourceGetMappedPointer( (void**)&pBufferImage,
&size,
resource) );
dim3 grids(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grids,threads>>>( devPtr );
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
kernel2<<<grids,threads>>>(devPtr2);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
int a = 1;
do
{
if (a==1)
{
copy<<< 512, 512>>>(pBufferImage, devPtr);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
if(a==2)
{
copy2<<< 512, 512>>>(pBufferImage, devPtr2);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
}
a++;
} while (a<=2);
HANDLE_ERROR ( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
Here's some code I wrote that is a modification of the CUDA by examples code contained here which I believe is effectively what you started with. I used two kernels, just as you have, to generate either a green or an orange image. It will initially start with the green image displayed, but you can toggle between green and orange images using the space bar. ESC key will exit the app.
#include "book.h"
#include "cpu_bitmap.h"
//#include "cuda.h"
#include <cuda_gl_interop.h>
int which_image;
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
dim3 mgrids(DIM/16,DIM/16);
dim3 mthreads(16,16);
// based on ripple code, but uses uchar4 which is the type of data
// graphic inter op uses. see screenshot - basic2.png
__global__ void kernel_gr( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 *
sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
__global__ void kernel_or( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char orange = 128 + 127 *
sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = orange;
ptr[offset].y = orange/2;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
// we pass zero as the last parameter, because out bufferObj is now
// the source, and the field switches from being a pointer to a
// bitmap to now mean an offset into a bitmap object
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 32:
// do work with the memory dst being on the GPU, gotten via mapping
HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size;
HANDLE_ERROR(
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
&size,
resource) );
if (which_image == 1){
kernel_or<<<mgrids,mthreads>>>( devPtr );
HANDLE_ERROR(cudaPeekAtLastError());
HANDLE_ERROR(cudaDeviceSynchronize());
printf("orange\n");
which_image = 2;
}
else {
kernel_gr<<<mgrids,mthreads>>>( devPtr );
HANDLE_ERROR(cudaPeekAtLastError());
HANDLE_ERROR(cudaDeviceSynchronize());
printf("green\n");
which_image = 1;
}
HANDLE_ERROR( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
draw_func();
break;
case 27:
// clean up OpenGL and CUDA
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
}
}
int main( int argc, char **argv ) {
cudaDeviceProp prop;
int dev;
memset( &prop, 0, sizeof( cudaDeviceProp ) );
prop.major = 1;
prop.minor = 0;
HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );
// tell CUDA which dev we will be using for graphic interop
// from the programming guide: Interoperability with OpenGL
// requires that the CUDA device be specified by
// cudaGLSetGLDevice() before any other runtime calls.
HANDLE_ERROR( cudaGLSetGLDevice( dev ) );
// these GLUT calls need to be made before the other OpenGL
// calls, else we get a seg fault
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "bitmap" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
// the first three are standard OpenGL, the 4th is the CUDA reg
// of the bitmap these calls exist starting in OpenGL 1.5
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4,
NULL, GL_DYNAMIC_DRAW_ARB );
HANDLE_ERROR(
cudaGraphicsGLRegisterBuffer( &resource,
bufferObj,
cudaGraphicsMapFlagsNone ) );
// do work with the memory dst being on the GPU, gotten via mapping
HANDLE_ERROR( cudaGraphicsMapResources( 1, &resource, NULL ) );
uchar4* devPtr;
size_t size;
HANDLE_ERROR(
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
&size,
resource) );
dim3 grids(DIM/16,DIM/16);
dim3 threads(16,16);
kernel_gr<<<grids,threads>>>( devPtr );
HANDLE_ERROR( cudaGraphicsUnmapResources( 1, &resource, NULL ) );
which_image = 1;
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
Not sure if it will be useful, I'm still not understanding what you want to accomplish entirely. I don't really know what this means:
I just want to store both those images in a buffer and then render the buffer containing those two images in OpenGL.
You want to be able to see one image at a time, and switch images? Or you want to be able to see both images at the same time? If the latter, please explain. Do you want one at the top of the window and one at the bottom of the window? Both of them blended together?
EDIT: It seems to me that you may be wanting some sort of 3D visualization of multiple images, since the question and answer with you about what you want hasn't been productive (at least I still can't get a handle on what you want to see VISUALLY, ignoring what goes on under the hood.) You haven't tagged this question with OpenGL, so no OpenGL experts are looking at it. Furthermore, you've made statements like: "I will use OpenGL functions to rotate and translate the buffer. " If what you're trying to do is create a 3D visualization of a set of images that a user can interact with, this is not the sample code you want to start with. This is a basic 2D image display code. Trying to expand the buffer to hold multiple images is the least of your difficulties in creating some sort of 3D visualization in OpenGL. And you will not get to some kind of 3D multi-image display using this sample code.
I suspect that the CUDA-OpenGL interop portion of what you're trying to do is not difficult. I've shown with the example program how you can get 2 different images, generated by 2 different kernels, displayed under user control. So the problem of how to take an image from CUDA and display it, or get it into a buffer that can be displayed, I think is pretty well illustrated.
My suggestion is this: Leave the CUDA-OpenGL interop portion aside. Write an OpenGL program that does what you want, with arbitrary images (generate them however you like, no need to use CUDA.) If you need help with that, pose questions on SO, and tag them with OpenGL so that people who will know how to do it can help you. Then, when you have a prototype of what you want to display visually, you can inject the CUDA portion. And I suspect that part will be pretty simple at that point.

Converting Iplimage to a matrix or an array in Matlab

I am using OpenCV via Matlab to detect faces in a video and then do some processing using Matlab. At the moment I do face detection on the IplImage-structured frames (queried by cvQueryFrame) of the video. I save each of the queried frames as a jpg and then use the face coordinates to get the ROI for the required processing. See the portion of code outlining this below.
% After reading in frame from video..
for i=1:size
img = calllib('highgui210','cvQueryFrame',cvCapture);
calllib('cxcore210','cvFlip',img,img,1);
calllib('highgui210', 'cvSaveImage', 'ThisFrame.jpg', img, ptr);
% Rest of the processing comes here..
This being the case, I feel that there should be an easier and less-crude way to convert an 'IplImage' image to a matrix or array in Matlab. Is this a possibility? If yes,how is this done?
Some pointers in this direction would be much appreciated!
Try mexing the following code:
/*
* Usage:
* img = IplImage2mxArray( cvImgPtr, releaseFlag );
*/
void mexFunction( int nout, mxArray* pout[], int nin, const mxArray* pin[]) {
if ( nin != 2 )
mexErrMsgTxt("wrong number of inputs");
if ( nout != 1 )
mexErrMsgTxt("wrong number of outputs");
IplImage* cvImg = (IplImage*)mxGetData(pin[0]); // get the pointer
// allocate the output
mwSize imgDims[3] = {cvImg->height, cvImg->width, cvImg->nChannels};
pout[0] = mxCreateNumericArray( 3, imgDims, mxDOUBLE_CLASS, mxREAL );
if ( pout[0] == NULL )
mexErrMsgTxt("out of memeory");
double* imgP = mxGetPr(pout[0]);
double divVal = pow(2.0, cvImg->depth) - 1;
double releaseFlag = mxGetScalar( pin[1] );
for ( int x = 0 ; x < cvImg->width; x++ ) {
for ( int y = 0 ; y < cvImg->height; y++ ) {
CvScalar s = cvGet2D(cvImg, y, x);
for (int c = 0; c < cvImg->nChannels; c++) {
imgP[ y + x * cvImg->height + c * cvImg->height * cvImg->width ] = s.val[c] / divVal;
}
}
}
if ( releaseFlag != 0 ) {
cvReleaseImage( &cvImg );
}
}
You'll end up with a function (mex) IplImage2mxArray, use it in Matlab:
>> cvImg = calllib('highgui210','cvQueryFrame',cvCapture);
>> img = IplImage2mxArray( cvImg, false ); % no cvReleaseImage after cvQueryFrame
Due to internal opencv representations the channels of img might be permuted (BGR instread of RGB). Also note that img might contain four channels - an additional alpha channel.
-Shai

Doesn't move and rotate the triangle around a point - C Bgi graphics

Greeting,
I have this graphic homework in BGI graphic. We must use DevCPP and BGI, and matrices.
I wrote this code, and I think the transformations is good. But my triangle doesn't move and rotate around the circle, And I don't understand, why not it moves around the circle...
I don't know where and what I have to rewrite.
#include <math.h>
#include "graphics.h"
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#define PI 3.14159265
typedef float Matrix3x3[3][3];
Matrix3x3 theMatrix;
int Round( double n ){
return (int)( n + 0.5 );
}
void matrix3x3SetIdentity(Matrix3x3 m)
{
int i, j;
for(i=0; i<3; i++)
for(j=0; j<3;j++)
m[i][j]=(i==j);
}
/* Multiplies matrix, result in b matrix */
void matrix3x3PreMultiply(Matrix3x3 a, Matrix3x3 b)
{
int r, c;
Matrix3x3 tmp;
for(r=0; r<3;r++)
for(c=0; c<3;c++)
tmp[r][c]=
a[r][0]*b[0][c]+a[r][1]*b[1][c]+a[r][2]*b[2][c];
for(r=0; r<3;r++)
for(c=0; c<3; c++)
b[r][c]-tmp[r][c];
}
void translate2(int tx, int ty)
{
Matrix3x3 m;
matrix3x3SetIdentity (m);
m[0][2] = tx;
m[1][2] = ty;
matrix3x3PreMultiply(m, theMatrix);
}
void scale2 (float sx, float sy, pont2d refpt)
{
Matrix3x3 m;
matrix3x3SetIdentity(m);
m[0][0]=sx;
m[0][2]=(1-sx)*refpt.x;
m[1][1]=sy;
m[1][2]=(1-sy)*refpt.y;
matrix3x3PreMultiply(m, theMatrix);
}
void rotate2 (float a, pont2d refpt)
{
Matrix3x3 m;
matrix3x3SetIdentity(m);
a=a/PI;
m[0][0] = cosf(a);
m[0][1] = -sinf(a);
m[0][2] = refpt.x * (1-cosf(a)) + refpt.y * sinf(a);
m[1][0] = sinf (a);
m[1][1] = cosf (a);
m[1][2] = refpt.y * (1-cosf(a)) - refpt.x * sinf(a);
matrix3x3PreMultiply(m, theMatrix);
}
void transformPoints2 (int npts, pont2d *pts)
{
int k;
float tmp;
for (k = 0; k < npts; k++) {
tmp = theMatrix[0][0] * pts[k].x + theMatrix[0][1] *
pts[k].y + theMatrix[0][2];
pts[k].y = theMatrix[1][0] * pts[k].x + theMatrix[1][1] *
pts[k].y + theMatrix[1][2];
pts[k].x = tmp;
}
}
int main()
{
int gd, gm, i, page=0;
gd=VGA;gm=VGAHI;
initgraph(&gd,&gm,"");
int ap;
while(!kbhit())
{
setactivepage(page);
cleardevice();
pont2d P[3] = { 50.0, 50.0, 150.0, 50.0, 100.0, 150.0};
pont2d refPt = {200.0, 250.0};
// Drawing the Triangle
moveto( Round( P[ 0 ].x ), Round( P[ 0 ].y ) );
for( i = 1; i < 3; i++ )
lineto( Round( P[ i ].x ), Round( P[ i ].y ) );
lineto( Round( P[ 0 ].x ), Round( P[ 0 ].y ) );
// Drawing the Circle
fillellipse(200, 250, 5,5);
setcolor (BLUE);
matrix3x3SetIdentity (theMatrix);
scale2 (0.5, 0.5, refPt);
//scale2 (20, 20, refPt);
rotate2 (90.0, refPt);
translate2 (0, 150);
transformPoints2 (3, P);
setvisualpage(page);
page = 1-page;
}
getch();
closegraph();
return 0;
}
If you want to see the object "spin", then rotations should be performed about the local origin. Rotation about the global origin will cause the object to "orbit" the global origin. Thus, to spin the object:
Translate the object to global origin
Apply the rotation
Translate the object back to its original position
Look at the discussion regarding transformation order here for an illustration. Specifically, look for the section entitled "Demonstration of the importance of transformation order".
To rotate a triangle, get the three points and use the formula:
x' = x + r cos (theta)
y' = y - r sin (theta)
The above formula can be applied into a loop where there being 0 to 360. You can have a graphics simulation by putting a delay (200) milliseconds in the loop.

Resources