I want to declare my texture once and use it in all my kernels and files. Therefore, I declare it as extern in a header and include the header on all other files (following the SO How do I use extern to share variables between source files?)
I have a header cudaHeader.cuh file containing my texture:
extern texture<uchar4, 2, cudaReadModeElementType> texImage;
In my file1.cu, I allocate my CUDA array and bind it to the texture:
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc< uchar4 >( );
cudaStatus=cudaMallocArray( &cu_array_image, &channelDesc, width, height );
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMallocArray failed! cu_array_image couldn't be created.\n");
return cudaStatus;
}
cudaStatus=cudaMemcpyToArray( cu_array_image, 0, 0, image, size_image, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMemcpyToArray failed! Copy from the host memory to the device texture memory failed.\n");
return cudaStatus;
}
// set texture parameters
texImage.addressMode[0] = cudaAddressModeWrap;
texImage.addressMode[1] = cudaAddressModeWrap;
texImage.filterMode = cudaFilterModePoint;
texImage.normalized = false; // access with normalized texture coordinates
// Bind the array to the texture
cudaStatus=cudaBindTextureToArray( texImage, cu_array_image, channelDesc);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaBindTextureToArray failed! cu_array couldn't be bind to texImage.\n");
return cudaStatus;
}
In file2.cu, I use the texture in the kernel function as follows:
__global__ void kernel(int width, int height, unsigned char *dev_image) {
int x = blockIdx.x*blockDim.x + threadIdx.x;
int y = blockIdx.y*blockDim.y + threadIdx.y;
if(y< height) {
uchar4 tempcolor=tex2D(texImage, x, y);
//if(tempcolor.x==0)
// printf("tempcolor.x %d \n", tempcolor.x);
dev_image[y*width*3+x*3]= tempcolor.x;
dev_image[y*width*3+x*3+1]= tempcolor.y;
dev_image[y*width*3+x*3+2]= tempcolor.z;
}
}
The problem is that my texture contains nothing or corrupt values when I use it in my file2.cu. Even if I use the function kernel directly in file1.cu, the data are not correct.
If I add: texture<uchar4, 2, cudaReadModeElementType> texImage; in file1.cu and file2.cu, the compiler says that there is a redefinition.
EDIT:
I tried the same thing with CUDA version 5.0 but the same problem appears. If I print the address of texImage in file1.cu and file2.cu, I don't have the same address. There must have a problem with the declaration of the variable texImage.
This is a very old question and answers were provided in the comments by talonmies and Tom. In the pre-CUDA 5.0 scenario, extern textures were not feasible due to the lack of a true linker leading to extern linkage possibilities. As a consequence, and as mentioned by Tom,
you can have different compilation units, but they cannot reference each other
In the post-CUDA 5.0 scenario, extern textures are possible and I want to provide a simple example below, showing this in the hope that it could be useful to other users.
kernel.cu compilation unit
#include <stdio.h>
texture<int, 1, cudaReadModeElementType> texture_test;
/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
/*************************/
/* LOCAL KERNEL FUNCTION */
/*************************/
__global__ void kernel1() {
printf("ThreadID = %i; Texture value = %i\n", threadIdx.x, tex1Dfetch(texture_test, threadIdx.x));
}
__global__ void kernel2();
/********/
/* MAIN */
/********/
int main() {
const int N = 16;
// --- Host data allocation and initialization
int *h_data = (int*)malloc(N * sizeof(int));
for (int i=0; i<N; i++) h_data[i] = i;
// --- Device data allocation and host->device memory transfer
int *d_data; gpuErrchk(cudaMalloc((void**)&d_data, N * sizeof(int)));
gpuErrchk(cudaMemcpy(d_data, h_data, N * sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaBindTexture(NULL, texture_test, d_data, N * sizeof(int)));
kernel1<<<1, 16>>>();
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
kernel2<<<1, 16>>>();
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaUnbindTexture(texture_test));
}
kernel2.cu compilation unit
#include <stdio.h>
extern texture<int, 1, cudaReadModeElementType> texture_test;
/**********************************************/
/* DIFFERENT COMPILATION UNIT KERNEL FUNCTION */
/**********************************************/
__global__ void kernel2() {
printf("Texture value = %i\n", tex1Dfetch(texture_test, threadIdx.x));
}
Remember to compile generating relocatable device code, namely, -rdc = true, to enable external linkage
Related
I have pslib installed and in the latest version on an ubuntu system.
the library is installed at: "/usr/include/libps/pslib.h"
when I try compiling, the postscript PS objects are not recognized.
...
/usr/bin/ld: draw.c:(.text+0x1868): undefined reference to `PS_stroke'
...
and so on. I don't see any thing on the pslib webpage, about needing to include the library in the gcc build command.
what do I need to do to build C code with pslib? I am on Ubuntu Linux
#include <stdlib.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <locale.h>
#include <libps/pslib-mp.h>
void * my_malloc(PSDoc *p, size_t size, const char *caller) {
void *a;
a = (void *) malloc(size);
// printf("Allocating %d bytes at 0x%X (%s)\n", size, a, caller);
return(a);
}
void * my_realloc(PSDoc *p, void *mem, size_t size, const char *caller) {
return((void *) realloc(mem, size));
}
void my_free(PSDoc *p, void *mem) {
// printf("Freeing memory at 0x%X\n", mem);
free(mem);
}
int main() {
PSDoc *psdoc;
int antiqua;
float boxwidth, boxheight, baseline, colsep, leftmargin;
float fontsize;
int boxed;
boxwidth = 100;
boxheight = 630;
baseline = 100;
colsep = 20;
leftmargin = 100;
boxed = 0;
fontsize = 10.0;
PS_boot();
psdoc = PS_new2(NULL, my_malloc, my_realloc, my_free, NULL);
PS_open_file(psdoc, "polish.ps");
PS_set_info(psdoc, "Creator", __FILE__);
PS_set_info(psdoc, "Author", "Uwe Steinmann");
PS_set_info(psdoc, "Title", "Polish letters");
PS_set_info(psdoc, "Keywords", "polish, latin2, iso-8859-1");
PS_set_info(psdoc, "BoundingBox", "0 0 596 842");
PS_set_parameter(psdoc, "inputencoding", "ISO-8859-2");
PS_set_parameter(psdoc, "warning", "true");
antiqua = PS_findfont(psdoc, "plr10", "", 1);
PS_begin_page(psdoc, 596, 842);
PS_setfont(psdoc, antiqua, 10.0);
PS_set_value(psdoc, "leading", 15.0);
PS_show_xy(psdoc, "±æê³ñ󶼿 ¡ÆÊ£ÑÓ¦¬¯", leftmargin, 100);
PS_show_xy(psdoc, "><=!abc~_-", leftmargin, 200);
PS_end_page(psdoc);
PS_deletefont(psdoc, antiqua);
PS_close(psdoc);
PS_delete(psdoc);
PS_shutdown();
exit(0);
}
You need to use -lps when linking (or maybe -lps-mp).
This is specified in the documentation:
Programs which want to use pslib will have to include the header file libps/pslib.h and link against libps
The general rule is that -lXXX is used to link the library names libXXX.
I know that it is possible to write Level-2 MATLAB S-Functionswith variable-sized signals.
Is it also somehow possible to do that in C MEX S-Functions?
My data has a different size at each time step. This requirement originates from a compressing block which gets a fixed size signal (2D) as its input. However the output signal (1D / Vector) changes its size at every mdlOutput().
The comments of the question already answered it:
Yes it is possible!
Here my example:
// Required S-Function header
#define S_FUNCTION_LEVEL 2
#define S_FUNCTION_NAME sfunc_varsignal
#include "simstruc.h"
enum {INPUT_PORT = 0, NUM_INPUT_PORTS};
enum {OUTPUT_PORT = 0, NUM_OUPUT_PORTS};
/**
* "Specify the number of inputs, outputs, states, parameters, and other
* characteristics of the C MEX S-function"
*/
static void mdlInitializeSizes(SimStruct* S)
{
boolean_T boolret;
int_T intret;
// Parameter
ssSetNumSFcnParams(S, 0);
if (ssGetNumSFcnParams(S) != ssGetSFcnParamsCount(S))
{
return; // Parameter mismatch will be reported by Simulink
}
// Input port
boolret = ssSetNumInputPorts(S, NUM_INPUT_PORTS);
if (boolret == 0)
{
return;
}
ssSetInputPortDirectFeedThrough(S, INPUT_PORT, 1);
intret = ssSetInputPortDimensionInfo(S, INPUT_PORT, DYNAMIC_DIMENSION);
if (intret == 0)
{
ssWarning(S, "Input dimensions could not be set.");
}
_ssSetInputPortNumDimensions(S, INPUT_PORT, 1);
// Output port
boolret = ssSetNumOutputPorts(S, NUM_OUPUT_PORTS);
if (boolret == 0)
{
return;
}
intret = ssSetOutputPortDimensionInfo(S, OUTPUT_PORT, DYNAMIC_DIMENSION);
if (intret == 0)
{
ssWarning(S, "Output dimensions could not be set.");
}
_ssSetOutputPortNumDimensions(S, OUTPUT_PORT, 1);
// Sample Times
ssSetNumSampleTimes(S, 1);
// Dimension Modes of the Ports
ssSetInputPortDimensionsMode(S, INPUT_PORT, INHERIT_DIMS_MODE);
ssSetOutputPortDimensionsMode(S, OUTPUT_PORT, INHERIT_DIMS_MODE);
// This is required for any kind of variable size signal:
ssSetInputPortRequiredContiguous(S, INPUT_PORT, true);
ssSetOptions(S, 0);
// Note: In the doc of ssSetOutputPortWidth it is wriiten:
// "If the width is dynamically sized, the S-function must provide
// mdlSetOutputPortDimensionInfo and mdlSetDefaultPortDimensionInfo
// methods to enable the signal dimensions to be set correctly
// during signal propagation."
// However in the example sfun_varsize_concat1D this methods are
// not present. The function _ssSetOutputPortNumDimensions() may be sufficient
// This usgae of this function is copied from the example sfcndemo_varsize
}
#if defined(MATLAB_MEX_FILE)
/**
* "Set the width of an input port that accepts 1-D (vector) signals"
*/
#define MDL_SET_INPUT_PORT_WIDTH
static void mdlSetInputPortWidth(SimStruct* S, int_T port, int_T width)
{
// Set to the sugessted width (e.g. the output width
// from the connected block)
ssSetInputPortWidth(S, port, width);
// Check if the setting was sucessful
if (ssGetInputPortWidth(S, INPUT_PORT) != DYNAMICALLY_SIZED)
{
ssSetOutputPortWidth(S, OUTPUT_PORT, width);
}
return;
}
/**
* "Set the width of an output port that outputs 1-D (vector) signals"
*/
#define MDL_SET_OUTPUT_PORT_WIDTH
static void mdlSetOutputPortWidth(SimStruct* S, int_T port, int_T width)
{
// Nothing here, but its required since the output port is set as
// dynamically sized. But its size is set in mdlSetInputPortWidth()
UNUSED_ARG(S);
UNUSED_ARG(port);
UNUSED_ARG(width);
return;
}
#endif //defined(MATLAB_MEX_FILE)
/**
* "Specify the sample rates at which this C MEX S-function operates"
*/
static void mdlInitializeSampleTimes(SimStruct* S)
{
ssSetSampleTime(S, 0, INHERITED_SAMPLE_TIME);
ssSetOffsetTime(S, 0, 0.0);
ssSetModelReferenceSampleTimeDefaultInheritance(S);
}
/**
* "Compute the signals that this block emits."
*/
static void mdlOutputs(SimStruct* S, int_T tid)
{
UNUSED_ARG(tid);
const real_T* insignal = ssGetInputPortRealSignal(S, INPUT_PORT);
auto width = static_cast<const int>(insignal[0]);
// This function does the trick:
ssSetCurrentOutputPortDimensions(S, OUTPUT_PORT, 0, width);
// newWidth should be width
int_T newWidth = ssGetCurrentOutputPortDimensions(S, OUTPUT_PORT, 0 /* dimension ID */);
real_T* outsignal = ssGetOutputPortRealSignal(S, OUTPUT_PORT);
for (int i = 0; i < newWidth; i++)
{
*outsignal++ = i+1;
}
}
/**
* "Perform any actions required at termination of the simulation"
*/
static void mdlTerminate(SimStruct* S)
{
UNUSED_ARG(S);
}
// Required S-function trailer
#ifdef MATLAB_MEX_FILE /* Is this file being compiled as a MEX-file? */
#include "simulink.c" /* MEX-file interface mechanism */
#else
#include "cg_sfun.h" /* Code generation registration function */
#endif
I have a program which (for now) calculates values of two functions in random points on GPU , sends these values back to host, and then visualizes them. This is what I get, some nice semi-random points:
Now, if I modify my kernel code, and add the local array initalization code at the very end,
__global__ void optymalize(curandState * state, float* testPoints)
{
int ind=blockDim.x*blockIdx.x+threadIdx.x;
int step=blockDim.x*gridDim.x;
for(int i=ind*2;i<NOF*TEST_POINTS;i+=step*2)
{
float* x=generateX(state);
testPoints[i]=ZDT_f1(x);
testPoints[i+1]=ZDT_f2(x);
}
//works fine with 'new'
//float* test_array=new float[2];
float test_array[2]={1.0f,2.0f};
}
I get something like this everytime:
Does anyone know the cause of this behavior? All the drawn points are computed BEFORE test_array is initialized, yet they are affected by it. It doesn't happen when I initialize test_array before the 'for' loop.
Host/device code:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "curand_kernel.h"
#include "device_functions.h"
#include <random>
#include <iostream>
#include <time.h>
#include <fstream>
using namespace std;
#define XSIZE 5
#define TEST_POINTS 100
#define NOF 2
#define BLOCK_COUNT 64
#define THR_COUNT 128
#define POINTS_PER_THREAD (NOF*TEST_POINTS+THR_COUNT*BLOCK_COUNT-1)/(THR_COUNT*BLOCK_COUNT)
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=false)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__device__ float g(float* x)
{
float tmp=1;
for(int i=1;i<XSIZE;i++)
tmp*=x[i];
return 1+9*(tmp/(XSIZE-1));
}
__device__ float ZDT_f1(float* x)
{
return x[0];
}
__device__ float ZDT_f2(float* x)
{
float gp=g(x);
return gp*(1-sqrtf(x[0]/gp));
}
__device__ bool oneDominatesTwo(float* x1, float* x2)
{
for(int i=0;i<XSIZE;i++)
if(x1[i]>=x2[i])
return false;
return true;
}
__device__ float* generateX(curandState* globalState)
{
int ind = threadIdx.x;
float x[XSIZE];
for(int i=0;i<XSIZE;i++)
x[i]=curand_uniform(&globalState[ind]);
return x;
}
__global__ void setup_kernel ( curandState * state, unsigned long seed )
{
int id = blockDim.x*blockIdx.x+threadIdx.x;
curand_init ( seed, id, 0, &state[id] );
}
__global__ void optymalize(curandState * state, float* testPoints)
{
int ind=blockDim.x*blockIdx.x+threadIdx.x;
int step=blockDim.x*gridDim.x;
for(int i=ind*2;i<NOF*TEST_POINTS;i+=step*2)
{
float* x=generateX(state);
testPoints[i]=ZDT_f1(x);
testPoints[i+1]=ZDT_f2(x);
}
__syncthreads();
//float* test_array=new float[2];
//test_array[0]=1.0f;
//test_array[1]=1.0f;
float test_array[2]={1.0f,1.0f};
}
void saveResultToFile(float* result)
{
ofstream resultFile;
resultFile.open ("result.txt");
for(unsigned int i=0;i<NOF*TEST_POINTS;i+=NOF)
{
resultFile << result[i] << " "<<result[i+1]<<"\n";
}
resultFile.close();
}
int main()
{
float* dev_fPoints;
float* fPoints=new float[NOF*TEST_POINTS];
gpuErrchk(cudaMalloc((void**)&dev_fPoints, NOF * TEST_POINTS * sizeof(float)));
curandState* devStates;
gpuErrchk(cudaMalloc(&devStates,THR_COUNT*sizeof(curandState)));
cudaEvent_t start;
gpuErrchk(cudaEventCreate(&start));
cudaEvent_t stop;
gpuErrchk(cudaEventCreate(&stop));
gpuErrchk(cudaThreadSetLimit(cudaLimitMallocHeapSize, 128*1024*1024));
gpuErrchk(cudaEventRecord(start, NULL));
setup_kernel<<<BLOCK_COUNT, THR_COUNT>>>(devStates,unsigned(time(NULL)));
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaGetLastError());
optymalize<<<BLOCK_COUNT,THR_COUNT>>>(devStates, dev_fPoints);
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaGetLastError());
gpuErrchk(cudaMemcpy(fPoints, dev_fPoints, NOF * TEST_POINTS * sizeof(float), cudaMemcpyDeviceToHost));
gpuErrchk(cudaEventRecord(stop, NULL));
gpuErrchk(cudaEventSynchronize(stop));
float msecTotal = 0.0f;
cudaEventElapsedTime(&msecTotal, start, stop);
cout<<"Kernel execution time: "<<msecTotal<< "ms"<<endl;
saveResultToFile(fPoints);
system("start pythonw plot_data.py result.txt");
cudaFree(dev_fPoints);
cudaFree(devStates);
system("pause");
return 0;
}
Plot script code:
import matplotlib.pyplot as plt;
import sys;
if len(sys.argv)<2:
print("Usage: python PlotScript <filename>");
sys.exit(0);
path=sys.argv[1];
x=[]
y=[]
with open(path,"r") as f:
for line in f:
vals=line.strip().split(" ");
x.append(vals[0]);
y.append(vals[1]);
plt.plot(x,y,'ro')
plt.show();
The basic problem was in code you originally didn't show in your question, specifically this:
__device__ float* generateX(curandState* globalState)
{
int ind = threadIdx.x;
float x[XSIZE];
for(int i=0;i<XSIZE;i++)
x[i]=curand_uniform(&globalState[ind]);
return x;
}
Returning an address or reference to a local scope variable from a function results in undefined behaviour. It is only valid to use x by reference or value within generateX while it is in scope. There should be no surprise that adding or moving other local scope variables around within the kernel changes the kernel behaviour.
Fix this function so it populates an array passed by reference, rather than returning the address of a local scope array. And pay attention to compiler warnings - there will have been one for this which should have immediately set off alarm bells that there was something wrong.
The Problem
I have prepared one sample CUDA code using the constant memory. I can run this in cuda 4.2 successfully but I get "invalid device symbol" when I compile using the CUDA 5.
I have attached the sample code here.
The Code
#include <iostream>
#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda.h>
struct CParameter
{
int A;
float B;
float C;
float D;
};
__constant__ CParameter * CONSTANT_PARAMETER;
#define PARAMETER "CONSTANT_PARAMETER"
bool ERROR_CHECK(cudaError_t Status)
{
if(Status != cudaSuccess)
{
printf(cudaGetErrorString(Status));
return false;
}
return true;
}
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
{
a[idx] = CONSTANT_PARAMETER->A * a[idx];
}
}
////Main Function/////
int main(void)
{
/////Variable Definition
const int N = 10;
size_t size = N * sizeof(float);
cudaError_t Status = cudaSuccess;
CParameter * m_dParameter;
CParameter * m_hParameter;
float * m_D;
float * m_H;
//Memory Allocation Host
m_hParameter = new CParameter;
m_H = new float[N];
//Memory Allocation Device
cudaMalloc((void **) &m_D, size);
cudaMalloc((void**)&m_dParameter,sizeof(CParameter));
////Data Initialization
for (int i=0; i<N; i++)
m_H[i] = (float)i;
m_hParameter->A = 5;
m_hParameter->B = 3;
m_hParameter->C = 98;
m_hParameter->D = 100;
//Memory Copy from Host To Device
Status = cudaMemcpy(m_D, m_H, size, cudaMemcpyHostToDevice);
ERROR_CHECK(Status);
Status = cudaMemcpy(m_dParameter,m_hParameter,sizeof(CParameter),cudaMemcpyHostToDevice);
ERROR_CHECK(Status);
Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter));
ERROR_CHECK(Status);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
square_array <<<n_blocks, block_size>>>(m_D,N);
// Retrieve result from device and store it in host array
cudaMemcpy(m_H, m_D, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++)
printf("%d %f\n", i, m_H[i]);
// Cleanup
free(m_H);
free(m_hParameter);
cudaFree(m_dParameter);
cudaFree(m_D);
return 0;
}
I have tried WINDOWS: CUDA 5.0 Production Release and the Graphics card is GTX 590.
Any help will be appreciated.
In an effort to avoid being "Stringly Typed", the use of character strings to refer to device symbols was deprecated in CUDA runtime API functions in CUDA 4.1, and removed in CUDA 5.0.
The CUDA 5 release notes read:
** The use of a character string to indicate a device symbol, which was possible
with certain API functions, is no longer supported. Instead, the symbol should be
used directly.
If you change your code to the following, it should work.
Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter));
ERROR_CHECK(Status);
From the CUDA 5.0 Release Notes:
** The use of a character string to indicate a device symbol, which was possible with certain API functions, is no longer supported. Instead, the symbol should be used directly. "
These API functions still exist, but they accept the target symbol argument only as a bare identifier now, not as either a bare identifier or a string literal naming an ident. E.g.
__ device__ __ constant__ type ident;
main() { cudaMemcpyToSymbol("ident", ...); } // no longer valid, returns cudaErrorInvalidSymbol
main() { cudaMemcpyToSymbol(ident, ...); } // valid
So get rid of this:
#define PARAMETER "CONSTANT_PARAMETER"
And change this:
Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter));
To this:
Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter));
And I think it will work.
I am new to libpng and the documentation is really confusing for me.
Below is my code which is not working and I do not see the reason why.
Can someone point me to right direction? or suggest different ( "easier" ) library?
how I understand libpng:
open the file with fopen in rb mode
create png_structp with png_create_read_struct
create png_infop with png_create_info_struct
allocate space
read data
#include <stdio.h>
#include <png.h>
int main( int argc, char **argv )
{
int x, y;
int height, width;
png_structp png_ptr;
png_infop info_ptr;
png_bytep *row_pointers;
FILE *fp = fopen( "test.png", "rb");
{
if (!fp)
printf("File could not be opened for reading");
png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
info_ptr = png_create_info_struct(png_ptr);
png_read_info(png_ptr, info_ptr);
width = png_get_image_width(png_ptr, info_ptr);
height = png_get_image_height(png_ptr, info_ptr);
row_pointers = (png_bytep*) malloc(sizeof(png_bytep) * height);
for (y=0; y<height; y++)
row_pointers[y] = (png_byte*)malloc(png_get_rowbytes(png_ptr,info_ptr));
png_read_image(png_ptr, row_pointers);
fclose(fp);
}
for (y=0; y<height; y++)
{
png_byte *row = row_pointers[y];
for (x=0; x<width; x++)
{
png_byte* ptr = &(row[x*4]);
printf("Pixel at position [ %d - %d ] has RGBA values: %d - %d - %d - %d\n", x, y, ptr[0], ptr[1], ptr[2], ptr[3]);
}
}
}
I similarly had failure in png_create_read_struct. It is difficult to debug without further info (in my case, stderr goes nowhere), but fortunately you can provide your own error and warning functions:
void user_error_fn(png_structp png_ptr, png_const_charp error_msg);
void user_warning_fn(png_structp png_ptr, png_const_charp warning_msg);
Using this, libpng printed helpful errors stating that the application was using a different version of the header than the libpng found at runtime. Mystery solved.
So while individual issues may differ, using libpng's error reporting should provide insight.
Obviously, there is no information passed to libpng about where/how to read image chunk. Use:
...
png_init_io(png_ptr, fp);
png_read_info..