I currently have some data that I would like to pass to my GPU and the multiply it by 2.
I have created a struct which can be seen here:
struct GPUPatternData
{
cl_int nInput,nOutput,patternCount, offest;
cl_float* patterns;
};
This struct should contain an array of floats. The array of floats I will not know untill run time as it is specified by the user.
The host code:
typedef struct GPUPatternDataContatiner
{
int nodeInput,nodeOutput,patternCount, offest;
float* patterns;
} GPUPatternData;
__kernel void patternDataAddition(__global GPUPatternData* gpd,__global GPUPatternData* output)
{
int index = get_global_id(0);
if(index < gpd->patternCount)
{
output.patterns[index] = gpd.patterns[index]*2;
}
}
Here is the Host code:
GPUPattern::GPUPatternData gpd;
gpd.nodeInput = ptSet->getInputCount();
gpd.nodeOutput = ptSet->getOutputCount();
gpd.offest = gpd.nodeInput+gpd.nodeOutput;
gpd.patternCount = ptSet->getCount();
gpd.patterns = new cl_float [gpd.patternCount*gpd.offest];
GPUPattern::GPUPatternData gridC;
gridC.nodeInput = ptSet->getInputCount();
gridC.nodeOutput = ptSet->getOutputCount();
gridC.offest = gpd.nodeInput+gpd.nodeOutput;
gridC.patternCount = ptSet->getCount();
gridC.patterns = new cl_float [gpd.patternCount*gpd.offest];
All the data is initialized then initialized with values and then passed to the GPU
int elements = gpd.patternCount;
size_t ofsdf = sizeof(gridC);
size_t dataSize = sizeof(GPUPattern::GPUPatternData)+ (sizeof(cl_float)*elements);
cl_mem bufferA = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
//Copy the buffer to the device
err = clEnqueueWriteBuffer(queue,bufferA,CL_TRUE,0,dataSize,(void*)&gpd,0,NULL,NULL);
//This buffer is being written to only
cl_mem bufferC = clCreateBuffer(gpu.context,CL_MEM_WRITE_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
err = clEnqueueWriteBuffer(queue,bufferC,CL_TRUE,0,dataSize,(void*)&gridC,0,NULL,NULL);
Everything is built which I check just watching the error which stays at 0
cl_program program = clCreateProgramWithSource(gpu.context,1, (const char**) &kernelSource,NULL,&err);
////Build program
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
char build[2048];
clGetProgramBuildInfo(program, gpu.device, CL_PROGRAM_BUILD_LOG, 2048, build, NULL);
////Create kernal
cl_kernel kernal = clCreateKernel(program, "patternDataAddition",&err);
////Set kernal arguments
err = clSetKernelArg(kernal, 0, sizeof(cl_mem), &bufferA);
err |= clSetKernelArg(kernal, 1, sizeof(cl_mem), &bufferC);
It is then kicked off
size_t globalWorkSize = 1024;
size_t localWorkSize = 512;
err = clEnqueueNDRangeKernel(queue, kernal, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
clFinish(queue);
Its at this point it all goes wrong
err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, dataSize, &gridC, 0, NULL, NULL);
clFinish(queue);
The error in this case is -5 (CL_OUT_OF_RESOURCES).
Also if I change the line:
err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, dataSize, &gridC, 0, NULL,
to:
err = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, dataSize*1000, &gridC, 0, NULL, NULL);
I get the error -30 (CL_INVALID_VALUE).
So my question is why am i getting the errors I am when reading back the buffer. Also I am not sure if I am unable to use a pointer to my float array as could this be giving me the wrong sizeof() used for datasize which gives me the wrong buffer size.
You cannot pass a struct that contains pointers into OpenCL
http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf (Section 6.9)
You can either correct as Eric Bainville pointed out or if you are not very restrict on memory you can do something like
struct GPUPatternData
{
cl_int nInput,nOutput,patternCount, offest;
cl_float patterns[MAX_SIZE];
};
EDIT: OK if memory is an issue. Since you only use the patterns and patternCount you can copy the patterns from the struct and pass them to the kernel separately.
struct GPUPatternData
{
cl_int nInput,nOutput,patternCount, offest;
cl_float patterns*;
};
copy patterns to GPU from gpd and allocate space for patterns in gridC on GPU.
then
You can pass the buffers separately
__kernel void patternDataAddition(int gpd_patternCount,
__global const float * gpd_Patterns,
__global float * gridC_Patterns) {
int index = get_global_id(0);
if(index < gpd_patternCount)
{
gridC_Patterns[index] = gpd_Patterns[index]*2;
}
}
when you come back from the kernel copy the data back to gridC.patterns directly
One more :
You don't have to change your CPU struct. It stays the same. However this part
size_t dataSize = sizeof(GPUPattern::GPUPatternData)+ (sizeof(cl_float)*elements);
cl_mem bufferA = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
//Copy the buffer to the device
err = clEnqueueWriteBuffer(queue,bufferA,CL_TRUE,0,dataSize,(void*)&gpd,0,NULL,NULL);
should be changed to something like
size_t dataSize = (sizeof(cl_float)*elements); // HERE
float* gpd_dataPointer = gpd.patterns; // HERE
cl_mem bufferA = clCreateBuffer(gpu.context,CL_MEM_READ_ONLY,dataSize,NULL,&err);
openCLErrorCheck(&err);
// Now use the gpd_dataPointer
err = clEnqueueWriteBuffer(queue,bufferA,CL_TRUE,0,dataSize,(void*)&(gpd_dataPointer),0,NULL,NULL);
Same thing goes for the gridC
And when you copy back, copy it to gridC_dataPointer AKA gridC.dataPointer
And then continue using the struct as if nothing happened.
The problem is probably with the pointer inside your struct.
In this case, I would suggest to pass nInput,nOutput,patternCount,offset as kernel args, and the patterns as a buffer of float:
__kernel void patternDataAddition(int nInput,int nOutput,
int patternCount,int offset,
__global const float * inPatterns,
__global float * outPatterns)
I know that it is not actual now, but i passed this problem in other way:
Your code for allocation memory for struct with data stay same, but struct should bu changed to
typedef struct GPUPatternDataContatiner
{
int nodeInput, nodeOutput, patternCount, offest;
float patterns[0];
} GPUPatternData;
Using this "feature" i have created vectors for OpenCL
Related
I've an openCL code to do some calculate.
Can OpenCL pass array and int to kernel in the same time?
I want to pass 'myint' to kernel not via buffer.
int myint = 100;
cl_mem memObjects[3] = {0, 0, 0};
memObjects[0] = clCreateBuffer(opencl_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(int) * ARR_SIZE, data, NULL);
memObjects[1] = clCreateBuffer(opencl_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof(int), new int[]{myint}, NULL);
memObjects[2] = clCreateBuffer(opencl_context, CL_MEM_READ_WRITE,
sizeof(float) * ARR_SIZE, NULL, NULL);
opencl_errNum = clSetKernelArg(opencl_kernel, 0, sizeof(cl_mem), &memObjects[0]);
opencl_errNum |= clSetKernelArg(opencl_kernel, 1, sizeof(cl_mem), &memObjects[1]);
opencl_errNum |= clSetKernelArg(opencl_kernel, 2, sizeof(cl_mem), &memObjects[2]);
//Kernel code
const char *opencl_code = "__kernel void opencl_code(__global const int *src,\n"
" __global const int *value,\n"
" __global float *result) {\n"
" int gid = get_global_id(0);\n"
" result[gid] = src[gid] - value[0];\n"
"\n"
"}";
Want to change be like this.
const char *opencl_code = "__kernel void opencl_code(__global const int *src,\n"
" __global const int value,\n"
" __global float *result) {\n"
" int gid = get_global_id(0);\n"
" result[gid] = src[gid] - value;\n"
"\n"
"}";
Yes you can pass either 1-dimensional arrays or constants as kernel arguments. The syntax is as follows:
int myint = 100;
cl_mem memObjects[2] = {0, 0};
memObjects[0] = clCreateBuffer(opencl_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * ARR_SIZE, data, NULL);
memObjects[1] = clCreateBuffer(opencl_context, CL_MEM_READ_WRITE, sizeof(float) * ARR_SIZE, NULL, NULL);
opencl_errNum = clSetKernelArg(opencl_kernel, 0, sizeof(cl_mem), &memObjects[0]); // pass an array as kernel parameter
opencl_errNum |= clSetKernelArg(opencl_kernel, 1, sizeof(int), (void*)&myint); // pass a constant as kernel parameter
opencl_errNum |= clSetKernelArg(opencl_kernel, 2, sizeof(cl_mem), &memObjects[1]); // pass an array as kernel parameter
// note: the second parameter is just "const int value", without the "__global" keyword or "*" pointer
const char *opencl_code = "__kernel void opencl_code(__global const int *src,\n"
" const int value,\n"
" __global float *result) {\n"
" int gid = get_global_id(0);\n"
" result[gid] = src[gid] - value;\n" // note: result is type float, and you write an int to it here
"}\n";
To make this all a lot easier, I created this OpenCL-Wrapper. With that, you can directly pass arrays (Memory objects) or numbers as kernel parameters, and the C++ code would look like this:
int main() {
Device device(select_device_with_most_flops()); // compile OpenCL C code for the fastest available device
int myint = 100;
Memory<int> src(device, ARR_SIZE); // allocate memory on both host and device
Memory<float> result(device, ARR_SIZE);
Kernel opencl_kernel(device, ARR_SIZE, "opencl_code", src, myint, result); // kernel that runs on the device
// initialize src, for example with "src[5] = 3;"
src.write_to_device(); // copy data from host memory to device memory
opencl_kernel.run(); // run kernel
result.read_from_device(); // copy data from device memory to host memory
// read result with for example "float test = result[5];"
// ...
return 0;
}
I am trying to launch kernel function using the runtime API. For some reason, I am not able the directly call cudaLaunchKernel. Instead, I have call a function that calls cudaLaunchKernel inside it. Here is an example, which simply just print a message from the device:
#include<stdio.h>
#include<cuda.h>
#include<cuda_runtime.h>
__global__
void hello()
{
printf(“hello from kernel. \n”);
}
template<typename T>
int launchKernel (T kernel , const size_t grid[3] , const size_t block[3])
{
cudaError_t res;
dim3 grid3d = {(unsigned int)grid[0] , (unsigned int)grid[1] , (unsigned int)grid[2]};
dim3 block3d = {(unsigned int)block[0] , (unsigned int)block[1] , (unsigned int)block[2]};
res = cudaLaunchKernel ((void*)kernel , grid3d , block3d, NULL, 0, NULL);
if (res != CUDA_SUCCESS)
{
char msg[256];
printf (“error during kernel launch \n”);
return -1;
}
return 0;
}
int main(void)
{
float *hx, *dx;
hx = (float*)malloc(32 * sizeof(float));
cudaMalloc(&dx, 32 * sizeof(float));
unsigned int threads = 32;
unsigned int blocks = 1;
///////////// option 1: directly call runtime api: cudaLaunchKernel //////////////
//cudaLaunchKernel((void*)hello, dim3(blocks), dim3(threads), NULL, 0, NULL);
//////////////////////////////////////////////////////////////////////////////////
///////// option 2: call a function which further calls cudaLaunchKernel /////////
const size_t grid3d[3] = {blocks, 0, 0};
const size_t block3d[3] = {threads, 0, 0};
launchKernel (hello , grid3d , block3d);
//////////////////////////////////////////////////////////////////////////////////
cudaMemcpy(hx, dx, 32 * sizeof(float), cudaMemcpyDeviceToHost);
cudaFree(dx);
free(hx);
return 0;
}
Option 1, which directly calls the cudaLaunchKernel, works. However, option 2, which indirectly invokes the cudaLaunchKernel, does not work. Using option 2, no message was printed from the device, and the return value is not equal to CUDA_SUCCESS.
I was wondering if anyone has any insights into this problem.
Thank you in advance for your help and time.
grid and block dimensions cannot be zero:
const size_t grid3d[3] = {blocks, 0, 0};
const size_t block3d[3] = {threads, 0, 0};
the reason your two launches behave differently is that you are creating the grid and block dimension variables differently.
If you change to:
const size_t grid3d[3] = {blocks, 1, 1};
const size_t block3d[3] = {threads, 1, 1};
it will work for either case.
By the way, you're not doing yourself any favors with this sort of error trapping:
if (res != CUDA_SUCCESS)
{
char msg[256];
printf (“error during kernel launch \n”);
return -1;
}
This would be a lot more instructive:
if (res != cudaSuccess)
{
printf (“error during kernel launch: %s \n”, cudaGetErrorString(res));
return -1;
}
I am having an issue with my project where I am attempting to copy a structure from memory.
Copying the structure into a char array and writing the bytes in the char array to a registry key of type REG_BINARY
Here is the code so far, I am using RtlCopyMemory to achieve copying the bytes from memory:
VOID ReadVirtualMem(ULONG32 address, PVOID buffer, SIZE_T size)
{
PEPROCESS Process;
if (NT_SUCCESS(PsLookupProcessByProcessId((HANDLE)ProcessID, &Process)))
{
KAPC_STATE apc_state;
KeStackAttachProcess(Process, &apc_state);
if (MmIsAddressValid((PVOID)address) && MmIsAddressValid((PVOID)(address + size)))
{
RtlCopyMemory(buffer, (PVOID)address, size);
}
KeUnstackDetachProcess(&apc_state);
}
}
The above code will take in a vew parameters, address of the location I need reading from, buffer aka output the bytes need to be stored in, and the amount of bytes I want copied.
So here is how I am obtaining the bytes from memory:
struct PlayerInformation {
float Position;
int Points;
float Speed;
};
struct PlayerInformation PlayerInfo;
ReadVirtualMem(ReadAddress, &PlayerInfo, size);
I then cast this information to a PVOID data type so I can write this to the registry key like so:
PVOID Buffer;
memcpy(&Buffer, &PlayerInfo, sizeof(struct PlayerInfo));
status = WriteToKey(L"\\Registry\\Machine\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run", L"Bytes", REG_BINARY, Buffer, size);
if (!NT_SUCCESS(status))
return STATUS_UNSUCCESSFUL;
The following above code works just fine for 8 bytes only. I believe this is due to PVOID only having a size of 8 bytes.
Here is my code for writing to the registry key:
NTSTATUS WriteToKey(PWSTR registry_path, PWSTR value_name, ULONG type, PVOID data, ULONG length)
{
UNICODE_STRING valname;
UNICODE_STRING keyname;
OBJECT_ATTRIBUTES attribs;
HANDLE handle;
NTSTATUS rc;
ULONG result;
RtlInitUnicodeString(&valname, registry_path);
RtlInitUnicodeString(&keyname, value_name);
InitializeObjectAttributes(&attribs, &valname, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL);
rc = ZwCreateKey(&handle, KEY_ALL_ACCESS, &attribs, 0, NULL, REG_OPTION_VOLATILE, &result);
if (!NT_SUCCESS(rc))
return STATUS_UNSUCCESSFUL;
rc = ZwSetValueKey(handle, &keyname, 0, type, &data, length);
if (!NT_SUCCESS(rc))
STATUS_UNSUCCESSFUL;
return STATUS_SUCCESS;
}
My question is how would I be able to take the data copied from RtlCopyMemory into a structure, turn that structure into a char array and write this data to the registry key path.
In c++ its as easy as this:
unsigned char b[sizeof(PlayerInfo)];
std::cout << "Size of structure: " << sizeof(PlayerInfo) << "\n\n";
memcpy(b, &PlayerInfo, sizeof(PlayerInfo));
HKEY handle;
if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run", 0, KEY_WRITE, &handle) == ERROR_SUCCESS)
{
RegSetValueEx(handle, L"Bytes", 0, REG_BINARY, (BYTE*)&b, sizeof(b));
But C has no data type for BYTE* so I am just a bit confused on how I could go on about taking this C++ function, which works perfectly, and implementing it into C code to work the exact same way as the C++ function.
If you need any other details I will be more than gladly to give you the information.
Thanks in advance!
EDIT: So I edited my function to accept the PlayerInfo struct as an argument and now all the data is read correctly.
Here is the updated code:
NTSTATUS WriteToBytes(PWSTR registry_path, PWSTR value_name, ULONG type, struct PlayerInfo data, ULONG length)
{
UNICODE_STRING valname;
UNICODE_STRING keyname;
OBJECT_ATTRIBUTES attribs;
HANDLE handle;
NTSTATUS rc;
ULONG result;
RtlInitUnicodeString(&valname, registry_path);
RtlInitUnicodeString(&keyname, value_name);
InitializeObjectAttributes(&attribs, &valname, OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL);
rc = ZwCreateKey(&handle, KEY_ALL_ACCESS, &attribs, 0, NULL, REG_OPTION_VOLATILE, &result);
if (!NT_SUCCESS(rc))
return STATUS_UNSUCCESSFUL;
rc = ZwSetValueKey(handle, &keyname, 0, type, &data, length);
if (!NT_SUCCESS(rc))
STATUS_UNSUCCESSFUL;
return STATUS_SUCCESS;
}
This works well, no errors from what I can see. does anyone know why this works? And not passing a void* to the original write key function?
I'm implementing an iterator to go over the records from a Berkeley DB. However, it seems I need to set the DB_DBT_USERMEM flag before the call to cursor->get with DB_NEXT.
Doing it that way would make my iterator less cohesive and will have to implement multiple iterators for each data type I want to retrieve.
Is there a way to have a generic iterator that can traverse structures w/o pointers, and basic data types? Here's what I'm trying to achieve.
#include <stdio.h>
#include <string.h>
#include <db.h>
// let this function handle integers and use DB_DBT_USERMEM for memory alignment
void integer_items(DBT key, DBT data) {
int number = 0;
data.data = &number;
data.flags = DB_DBT_USERMEM;
data.ulen = sizeof(int);
printf("key is: %s, data is: %d\n", (char *) key.data,number);
}
// let this function handle pointer structs. No need for DB_DBT_USERMEM
void ptr_struct_items(DBT key, DBT data) {
// MY_STRUCT user;
// marshall struct...
// buffsize = sizeof(int) +(strlen(user.familiar_name) + strlen(user.surname) + 2);
// databuff = malloc(buffsize);
// memset(databuff, 0, buffsize);
// ...
// printf("key is: %s, data is: %d\n", (char *) key.data,number);
}
int iterator(DB *database, void(*function)(DBT key, DBT data)) {
DBT key, data;
DBC *cursor;
memset(&key, 0, sizeof(DBT));
memset(&data, 0, sizeof(DBT));
database->cursor(database, NULL, &cursor, 0);
while(cursor->c_get(cursor, &key, &data, DB_NEXT) == 0){
(*function)(key, data);
}
cursor->c_close(cursor);
return 0;
}
int main() {
DB_ENV *myEnv;
DB *dbp;
DBT key, data;
int r, v = 10;
char *k = "Test";
db_env_create(&myEnv, 0);
myEnv->open(myEnv, "./", DB_CREATE | DB_INIT_MPOOL, 0);
db_create(&dbp, myEnv, 0);
dbp->open(dbp, NULL, "test.db", NULL, DB_HASH, DB_CREATE, 0664);
memset(&key, 0, sizeof(key));
memset(&data, 0, sizeof(data));
key.data = k;
key.size = strlen(k) +1;
data.data = &v;
data.size = sizeof(int);
if((r=dbp->put(dbp, NULL, &key, &data, 0)!=0))
fprintf(stderr, "%s\n", db_strerror(r));
iterator(dbp, integer_items);
iterator(dbp, ptr_struct_items);
return 0;
}
You almost always want to use DB_DBT_USERMEM, if only to avoiding the malloc() from inside BDB for DB_DBT_MALLOC/REALLOC. When you use it, you must pass in your own memory large enough to hold the largest item in your database. This holds for the key DBT too, as you may want to use it there.
In your example, as the key and data are so small, I'd just put character arrays on the stack in your "iterator" function, and then initialize key and data after the call to memset(). What you've got above is wrong because you're setting USERMEM after the call to c_get().
Here's a reworked example that gives BDB 256 bytes to work with for key and data.
void integer_items(DBT key, DBT data) {
int number = 0;
if (data.size == sizeof number) {
number = *(int *)data.data;
printf("key is: %s, data is: %d\n", (char *) key.data, number);
}
}
int iterator(DB *database, void(*function)(DBT key, DBT data)) {
DBT key, data;
DBC *cursor;
char kmem[256];
char dmem[256];
memset(&key, 0, sizeof(DBT));
memset(&data, 0, sizeof(DBT));
key.flags = DB_DBT_USERMEM;
key.data = kmem;
key.ulen = sizeof kmem;
data.flags = DB_DBT_USERMEM;
data.data = dmem;
data.ulen = sizeof dmem;
database->cursor(database, NULL, &cursor, 0);
while(cursor->c_get(cursor, &key, &data, DB_NEXT) == 0){
(*function)(key, data);
}
cursor->c_close(cursor);
return 0;
}
To handle different structures inside your iterator, include the data type as part of the key somehow. For example, instead of a bare integer for the key, use a struct, and have the first character define which kind of type it is. Then, inside your iterator function, you can switch on that.
I am trying to learn graphics programming and I have written a simple OpenGL program that draws a triangle and should shade it red, however when I call the function glShaderSource for the the fragment shader it causes a segfault.
I don't know why it causes a segfault because the spec page doesn't say anything about the function causing a segfault, and anything about the shaders being loaded into memory wrong can't be it either, as the vertex shader is loaded in the same way and when I call glGetShaderInfoLog and print the log it says the vertex shader compile fine.
Anyways heres my code that Loads the shaders and links the shading program...
int LoadShader(char* Filename, GLchar* ShaderSource) //dont call this function by itself as it doesnt free its own memory
{
FILE* z;
z = fopen(Filename, "rb");
if(z == NULL) {printf("Error: file \"%s\" does not exist...\n", Filename); return -1;}
unsigned long len = 0;
//get file length
fseek(z, 0, SEEK_END);
len = ftell(z);
rewind(z);
if(len == 0) {printf("Error reading file \"%s\"\n", Filename); return -1;}
ShaderSource = (char*)malloc((sizeof(char)) * len + 1); //allocate enough bytes for the file
if(ShaderSource == NULL) {puts("Memory Error"); return -1;}
size_t result = fread(ShaderSource, 1, len, z);
if( result != len)
{
puts("Reading Error");
free(ShaderSource);
ShaderSource = NULL;
return -1;
}
ShaderSource[len] = 0; //make it null terminated
puts(ShaderSource); //debbugging
fclose(z);
return 1;
}
//----------------------------------------------------------------------
GLuint MakeProgram(char* VSpath, char* FSpath){
GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
GLchar* VSsource;
GLchar* FSsource;
if(!LoadShader(VSpath, VSsource))
return -1;
if(!LoadShader(FSpath, FSsource))
return -1;
GLint Result = GL_FALSE;
int InfoLogLength;
//compile shaders
const char* VS = VSsource; // glShaderSource needs a const char
glShaderSource(VertexShaderID, 1, &VS, NULL); //we use NULL for length becuase the source is null-terminated
glCompileShader(VertexShaderID);
//check
glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
char* VSerr;
VSerr = (char*)malloc(sizeof(char) * InfoLogLength);
glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VSerr[0]);
printf("%s\n", VSerr);
free(VSerr);
VSerr = NULL;
//fragment shader
const char* FS = FSsource;
glShaderSource(FragmentShaderID, 1, &FS, NULL);
glCompileShader(FragmentShaderID);
//check
glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
char* FSerr;
FSerr = (char*)malloc(sizeof(char) * InfoLogLength);
glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FSerr[0]);
printf("%s\n", FSerr);
free(FSerr);
FSerr = NULL;
//link program
GLuint ProgramID = glCreateProgram();
glAttachShader(ProgramID, VertexShaderID);
glAttachShader(ProgramID, FragmentShaderID);
glLinkProgram(ProgramID);
//check program
glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
char* err;
err = (char*)malloc(sizeof(char) * InfoLogLength);
glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &err[0]);
printf("%s\n", err);
free(err);
//free the shaders
free(VSsource);
VSsource = NULL;
free(FSsource);
FSsource = NULL;
glDeleteShader(VertexShaderID);
glDeleteShader(FragmentShaderID);
return ProgramID;
}
Take a closer look at your actual declarations of VSsource (uninitialized), FSsource (uninitialized) and the implementation of LoadShader (...). Because this is C and you do not pass things by reference any changes made to the ShaderSource pointer inside the LoadShader (...) function as you originally wrote it will not propagate outside the function.
In short, you implemented LoadShader (...) incorrectly. You need to actually change the address stored in the pointer you pass it (since you are allocating this memory inside the function), but you cannot do that since you currently pass it a GLchar*.
As for why GL accepts an uninitialized pointer for your first call to glShaderSource (...) I cannot say. Perhaps you are just extremely lucky? Regardless, you can correct your issue by altering LoadShader to take a GLchar** instead. I will illustrate the necessary changes below:
/* Originally, you made a copy of an uninitialized pointer and then proceeded to
re-assign this copy a value when you called malloc (...) - you actually need
to pass a pointer to your pointer so you can update the address outside of
this function!
*/
int LoadShader(char* Filename, GLchar** pShaderSource) //dont call this function by itself as it doesnt free its own memory
{
[...]
*pShaderSource = (GLchar *)malloc((sizeof(GLchar)) * len + 1); //allocate enough bytes for the file
GLchar* ShaderSource = *pShaderSource;
[...]
}
GLuint MakeProgram(char* VSpath, char* FSpath){
[...]
GLchar* VSsource; /* Uninitialized */
GLchar* FSsource; /* Uninitialized */
if(!LoadShader(VSpath, &VSsource)) /* Pass the address of your pointer */
return -1;
if(!LoadShader(FSpath, &FSsource)) /* Pass the address of your pointer */
return -1;
/*
* Now, since you did not pass copies of your pointers, you actually have
* *VALID* initialized memory addresses !
*/
[...]
}
Alternatively, you could simply modify your function to return the address of the string you allocated. Instead of returning -1 on failure like you do now, you could return NULL. Your function interface would be as simple as this if you chose to go that route: GLchar* LoadShader (char* Filename).