I'm following a tutorial on using NVIDIA/CUDA/etc. here: http://www.nvidia.com/content/gtc-2010/pdfs/2131_gtc2010.pdf
I'm trying to add two vectors in parallel, but I am having trouble with these memory access violations mentioned in the title of my post.
The error is occurring at my printf line (I will post my code below), but if I comment it out I get taken to a file named "dbgheap.c" and I just get the same error message on line 1696 of that file (the file has 3268 lines)
The line is:
if (*pb++ != bCheck)
and the function that is in is:
extern "C" static int __cdecl CheckBytes(
unsigned char * pb,
unsigned char bCheck,
size_t nSize
)
{
while (nSize--)
{
if (*pb++ != bCheck) //this is the line with the error
{
return FALSE;
}
}
return TRUE;
}
And the memory address location it says it can't access, I believe, are the locations of my "a", "b", and "c" variables (will post my code below).
So without further adieu, here is my code (sorry there are no comments):
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <stdlib.h>
#define N 10
__global__ void kernel() {
}
__global__ void add(int *a, int *b, int *c) {
c[blockIdx.x] = a[blockIdx.x] + b[blockIdx.x];
}
void random_ints(int* a,int num) {
for (int i = 0; i<num; i++)
a[i] = rand();
}
int main () {
int *a,*b,*c;
int *dev_a, *dev_b, *dev_c;
int size = N*sizeof(int);
cudaMalloc((void**)&dev_a,size);
cudaMalloc((void**)&dev_b,size);
cudaMalloc((void**)&dev_c,size);
a = (int*)malloc(size);
b = (int*)malloc(size);
c = (int*)malloc(size);
random_ints(a,N);
random_ints(b,N);
cudaMemcpy(dev_a,&a,size,cudaMemcpyHostToDevice);
cudaMemcpy(dev_b,&b,size,cudaMemcpyHostToDevice);
add<<<N,1>>>(dev_a,dev_b,dev_c);
cudaMemcpy(&c,dev_c,size,cudaMemcpyDeviceToHost);
for (int i = 0; i<N; i++)
printf("%d + %d = %d\n",a[i],b[i],c[i]);
free(a); free(b); free(c);
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
}
If you need any clarifications just ask.
Thanks!
The cudaMemcpy parameters are the actual pointers so no need to take their address before passing (unlike cudaMalloc). Remove the &.
cudaMemcpy(dev_a,a,size,cudaMemcpyHostToDevice);
cudaMemcpy(dev_b,b,size,cudaMemcpyHostToDevice);
add<<<N,1>>>(dev_a,dev_b,dev_c);
cudaMemcpy(c,dev_c,size,cudaMemcpyDeviceToHost);
I believe this is what is causing your memory corruption problems.
As a matter of form, I'd pass the length in and check it against blockIdx.x.
__global__ void add(int *a, int *b, int *c, int N) {
if (blockIdx.x < N) {
c[blockIdx.x] = a[blockIdx.x] + b[blockIdx.x];
}
}
I thought this was actually required but I'm guessing it isn't if the range fits in a single warp. For N > 16, it will be necessary and the check will be a bit more complex (using threadIdx.x and blockDim.x).
Related
I have the following code to create a sorted suffix array but there is simply no output. I run the program and it halts for 1-2secs and then exits.
The code is based off the c++ answer on the following website: https://www.geeksforgeeks.org/suffix-array-set-1-introduction/
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
struct suffix
{
int index;
char *suff;
};
int cmp(const void *a, const void *b)
{
const struct suffix *a1 = a;
const struct suffix *b1 = b;
return strcmp(a1->suff, b1->suff) < 0? 1 : 0;
}
int *buildSuffixArray(char *txt, int n)
{
struct suffix suffixes[n];
for (int i = 0; i < n; i++)
{
suffixes[i].index = i;
suffixes[i].suff = (txt+i);
}
qsort(suffixes, n, sizeof(int), cmp);
int *suffixArr = (int*)malloc(n * sizeof(int));
for (int i = 0; i < n; i++)
{
suffixArr[i] = suffixes[i].index;
}
return suffixArr;
}
void printArr(int arr[], int n)
{
for (int i = 0; i < n; i++)
{
printf("%d", arr[i]);
}
printf("\n");
}
int main()
{
char txt[] = "banana";
int n = strlen(txt);
int *suffixArr = buildSuffixArray(txt, n);
printf("following is suffix array for %s\n", txt);
printArr(suffixArr, n);
return 0;
}
Since there is no output, I assume that the problem is within the 'buildSuffixArray' function, specifically with qsort. I have tried fixing it but had no luck. Any help would be appreciated.
Summarizing the comments in the question from Weather Vane and Jonathan Leffler:
OP is passing the wrong element size to qsort(). qsort(suffixes, n, sizeof(int), cmp); should be qsort(suffixes, n, sizeof suffixes[0], cmp);. (Weather Vane).
The cmp function must return a negative value if the first argument sorts before the second argument, zero if they sort equal, or a positive value if the first argument sorts after the second argument. OP's cmp function returns 1 if the first argument is less than the second argument, otherwise 0. This will screw up any sorting by qsort(). (Jonathan Leffler)
It appears that the cmp function written by OP was based on the C++ code from Suffix Array | Set 1 (Introduction). In particular, OP's return strcmp(a1->suff, b1->suff) < 0? 1 : 0; is based on the similar return strcmp(a.suff, b.suff) < 0? 1 : 0; in the C++ code. The problem is that the C++ code uses std::sort(), not qsort() and the rules for the return value of the comparison functions are different.
As Jonathan Leffler points out, it would be sufficient for OP's cmp function to return the value from strcmp() directly:
return strcmp(a->suff, b->suff);
OP's printArr() function prints the integers in the array with no separation between each number. The correction is trivial.
So I have a program in C structured in 3 files: main.c, alloc.h and alloc.c. In the main.c function, I have the declaration of a pointer to another pointer to which I intend to alloc an n * m array:
#include <stdio.h>
#include <stdlib.h>
#include "alloc.h"
int main() {
int **mat, n, m;
alloc_matrix(&mat, int &n, int &m);
return 0;
}
In alloc.c I have the following declarations:
#ifndef ALLOC_H_INCLUDED
#define ALLOC_H_INCLUDED
#include <stdio.h>
#include <stdlib.h>
void alloc_matrix(int***, int*, int*);
#endif
In alloc.c I have the function:
void alloc_matrix(int ***mat, int *n, int *m) {
printf("\nn = "); scanf("%d", n);
printf("\nm = "); scanf("%d", m);
*mat = (int**)calloc(*n, sizeof(int*));
int i;
for (i = 0; i < *n; i++)
*(mat + i) = (int*)calloc(*m, sizeof(int));
}
But the program doesn't work. It enters some kind of loop and doesn't end.
If I allocate it in main it would work but I have no idea what I am doing wrong in the alloc function.
Here is the correct code. Your error was that in the definition of alloc_matrix, you used *(mat+i) in the allocation loop, which should be *(*mat+i) as, mat is a int*** so the base address for the 2D array would be in *mat. Then you need to move by offset i and then de-reference that memory location for the 1D array.
Main:
#include <stdio.h>
#include <stdlib.h>
#include "alloc.h"
int main()
{
int **mat,n,m;
alloc_matrix(&mat,&n,&m);
return 0;
}
alloc.h
#ifndef ALLOC_H_INCLUDED
#define ALLOC_H_INCLUDED
#include <stdio.h>
#include <stdlib.h>
void alloc_matrix(int***,int*,int*);
#endif
alloc.c :
void alloc_matrix(int ***mat,int *n,int *m)
{
printf("\nn = "); scanf("%d", n);
printf("\nm = "); scanf("%d", m);
*mat = (int**)calloc(*n,sizeof(int*));
int i;
for(i = 0; i < *n; i++)
*(*mat+i) = (int*)calloc(*m,sizeof(int));
}
The code for the read function :
void read_matrix(int ***mat,int n,int m)
{
int i,j;
for(i = 0; i < n; i++)
for(j = 0; j < m; j++)
{
printf("mat[%d][%d] = ", i, j);
scanf("%d", (*(*mat+i))+j);
}
}
The problem with it is that it only reads the first row and the it freezes.
void alloc_matrix(int ***mat,int *n,int *m)
There are two problems in this line. Neither is fatal but both are worth fixing.
First problem: A matrix in this program is represented as an int**. Why does alloc_matrix accept an int***? All standard functions that allocate something (malloc and friends) return a pointer to that something. This is an idiomatic way of doing things in C. It reduces your star count (being a three-star C programmer is not an achievement to be proud of) and simplifies the code. The function should be changed to
int** alloc_matrix( // but what's inside the () ?
The second problem is, why should a function called alloc_matrix prompt the user and read values? These things are not related to allocation. A function should do one thing and do it well. Does malloc prompts you to enter the size? Does fopen prompt you to enter the filename? These things would be regarded as nonsense of the first degree, and rightly so. It is advised to read the sizes elsewhere and pass them to alloc_matrix as input arguments. Hence,
int** alloc_matrix(int n, int m) { // but what's inside the {}?
What remains of alloc_matrix is simple:
int** alloc_matrix(int n, int m) {
int** mat; // that's what we will return
int i;
mat = (int**)calloc(n, sizeof(int*));
for(i = 0; i < n; i++)
// here comes the important part.
Since we have simplified alloc_matrixand reduced the star count in mat, what should we do with the old body of the loop? It was:
*(mat+i) = (int*)calloc(...);
but if we remove a star, it becomes
(mat+i) = (int*)calloc(...);
which is an obvious nonsense. Perhaps the old line was a problem. The fact that it provoked a compiler warning certainly doesn't speak for its correctness. So how to correct it? There aren't too many options. It turns out that in order to restore sanity, we must leave the old left-hand side (written for the three-star mat) intact. Or, better still, use an equivalent but more idiomatic notation:
mat[i] = (int*)calloc(m, sizeof(int));
So the entire function now becomes
int** alloc_matrix(int n, int m) {
int **mat;
int i;
mat = (int**)calloc(n, sizeof(int*));
for(i = 0; i < n; i++)
mat[i] = (int*)calloc(m, sizeof(int));
return mat;
}
and it should be called like
mat = alloc_matrix(n, m);
It is often said that one should not cast the result of calloc and friends. But in this case the cast has enabled a warning which helped to find a bug. I'm leaving the casts in place for now.
There is another idiom for the allocation that does not require the cast, but also avoids the problem of types not matching.
Instead of using the type for the sizeof, you can use the dereferenced pointer as the type information is available in the variable:
mat = (int**)calloc(n, sizeof(int*));
can be changed to
mat = calloc(n, sizeof *mat); //sizeof is an operator not a function
My aim is to allocate a 2d array with only using 1 line for efficiency. Since my prof is expecting it to be efficient.
the code gives me an error saying that it can't convert from void* to int.
#include<stdio.h>
#include<stdlib.h>
#define NUMOFCOL 4
int **addtwoarr(int (*A)[NUMOFCOL], int (*B)[NUMOFCOL]);
int main(void){
int firstarr[4][4]={{1,1,1,1},
{1,1,1,1},
{1,1,1,1},
{1,1,1,1}},
secondarr[4][4]={{1,1,1,1},
{1,1,1,1},
{1,1,1,1},
{1,1,1,1}}, **receiver;
receiver = addtwoarr(firstarr, secondarr);
printf("%d", receiver[3][3]);
}
int **addtwoarr(int (*A)[NUMOFCOL], int (*B)[NUMOFCOL]){
int col, row, **arr;
(*arr)[NUMOFCOL] = malloc(NUMOFCOL * sizeof(*arr)); /*this line in particular gives the error */
for(row=0; row<NUMOFCOL; row++){
for(col=0;col<NUMOFCOL; arr[row][col]=A[row][col]+B[row][col], col++){}
}
return arr;
}
The allocation happens in the addtwoarr function which is where the error occurs.
I seriously don't recommend this, as there are a ton of assumptions in your code about top-end sizing. But if you really want to do it, the rather cryptic syntax for returning pointers to fixed length arrays in C looks something like this:
#include <stdio.h>
#include <stdlib.h>
#define NUMOFCOL 4
int (*addtwoarr(int A[][NUMOFCOL], int B[][NUMOFCOL]))[NUMOFCOL];
int main(void)
{
int firstarr[][NUMOFCOL] = {
{1,1,1,1},
{1,1,1,1},
{1,1,1,1},
{1,1,1,1}},
secondarr[][NUMOFCOL] = {
{1,1,1,1},
{1,1,1,1},
{1,1,1,1},
{1,1,1,1}};
int (*receiver)[NUMOFCOL] = addtwoarr(firstarr, secondarr);
printf("%d\n", receiver[3][3]);
free(receiver);
}
int (*addtwoarr(int A[][NUMOFCOL], int B[][NUMOFCOL]))[NUMOFCOL]
{
int col, row;
int (*arr)[NUMOFCOL] = malloc(NUMOFCOL * sizeof(*arr));
for(row=0; row<NUMOFCOL; row++){
for(col=0;col<NUMOFCOL; arr[row][col]=A[row][col]+B[row][col], col++);
}
return arr;
}
Output
2
Best of luck.
I am trying to create a program in C that removes duplicate values in an integer array. My strategy is to first sort the array via a selectionsort function, and then call a function removedup that removes any consecutive, duplicate values in the array.
My code:
#include <stdio.h>
#include "simpio.h"
#define n 10
void GetArray(int a[]);
void SelectionSort(int a[]);
int FindMax(int a[], int high);
void swap(int a[], int p1, int p2);
int removedup(int a[]);
void printArray(int a[]);
main()
{
int a[n];
GetArray(a);
SelectionSort(a);
printf("The original, sorted array is\n");
printArray(a);
printf("The array with removed duplicates \n");
printArray(removedup(a));
getchar();
}
void GetArray(int a[])
{
int i;
for(i=0;i<n;i++)
{
printf("Enter integer# %d", i+1);
a[i]=GetInteger();
}
}
void SelectionSort(int a[])
{
int i, max;
for(i=0;i<n;i++)
{
max=FindMax(a,n-i-1);
swap(a,max,n-i-1);
}
}
int FindMax(int a[], int high)
{
int i, index;
index=high;
for(i=0;i<high;i++)
{
if(a[i]>a[index])
index=i;
}
return index;
}
void swap(int a[], int p1, int p2)
{
int temp;
temp=a[p2];
a[p2]=a[p1];
a[p1]=temp;
}
int removedup(int a[])
{
int i, count, OutArray[count], j;
count=0;
for(i=0;i<n-1;i++)
{
if(a[i]==a[i+1])
{
a[i+1]=a[i+2];
count++;
}
}
count++;
for(j=0;j<count;j++)
{
OutArray[i]=a[i];
}
return OutArray;
}
I have two questions:
1) How do I fix the error the compiler in giving me in the main body when calling removedup inside the printarray function, saying "invalid conversion from int to int*"? (line 22)
2) How do I accurately define the size of OutArray[] in the removedup function? Currently I have it defined as the size variable, but the value of this variable isn't accurately defined until after the declaration of OutArray.
Notice your prototypes ...
int removedup(int a[]);
void printArray(int a[]);
And also notice you're calling printArray() with the result of removedup().
printArray(removedup(a));
The result of removedup() is an int; printarray() requires a int[].
int and int[] are not compatible.
I suggest you remove duplicates and print array in two distinct statements.
You should be able to fix the compiling problems after reading comp.lang-c FAQ on arrays and pointers.
After you get your array sorted, you can use the following function to remove the duplicates:
int dedup(int arr[], int size) {
int curr = 0, next = 0;
while (next < size) {
while (next < size && arr[next] == arr[curr])
next++;
if (next < size)
arr[++curr] = arr[next++];
}
return size ? curr+1 : 0;
}
It takes two arguments, the array and its size. The duplicates are removed in-place, which means that the array is modified, without allocating a new array to store the unique elements.
Remember that the dedup function expects the elements to be sorted! I've noticed you are using your own implementation of selection sort, which makes me think this is homework. In that case, I feel a little reluctant on giving you a complete solution, although understanding it should be a good exercise anyway.
EDIT: I should've explained the last line of code.
return size ? curr+1 : 0; is equivalent to:
if (size)
return curr+1;
else
return 0;
Just a shorter way of saying the same thing.
Consider this code:
#include <stdio.h>
#define N 5
void printMatrix(int (*matrix)[N],int n)
{
int i,j;
for(i=0;i<n;i++){
for(j=0;j<n;j++)
printf("%d",matrix[i][j]);
printf("\n");
}
}
int main()
{
int R[N][N]={{1,2,3},{4,5,6},{7,8,9}};
printMatrix(R,3);
}
This works fine as expected.
Now, I thought to write the functions handling 2D-matrices in a separate source file and link them wherever required.
But then I ran into a problem as in the function printMatrix, the size of array of int to which matrix points (i.e N) is required at compile-time. So, my functions would not work in other cases when the size is different.
So,How can I handle this?
Dynamic Arrays are a solution but i want to know if it can be done with static arrays.
You can't use the built-in 2D array type if both sizes are not known at compile time. A built-in 2D array must have at least one of the two sizes known at compile time.
If both sizes are run-time values, then you have no other choice but to use a "manual" implementation of 2D array, like an array of pointers to arrays, for example. In that case the function declaration might look as follows (two alternative equivalent forms)
void printMatrix(int *const *matrix, int n, int m);
void printMatrix(int *const matrix[], int n, int m);
To access to the array elements you can still use the "traditional" syntax
matrix[i][j]
The array itself would be created as follows (a simple example)
int row0[] = { 1, 2, 3 };
int row1[] = { 4, 5, 6 };
int *matrix[2];
matrix[0] = row0;
matrix[1] = row1;
printMatrix(matrix, 2, 3);
But if you already have a matrix implemented as a built-in 2d array
int matrix[2][3] = { ... };
then just to be able to pass it to the above function you can "convert" it into the above form by using an additional temporary "row pointer" array
int *rows[2];
rows[0] = matrix[0];
rows[1] = matrix[1];
printMatrix(rows, 2, 3);
Write yourself a macro:
#define MAT(i,j) matrix[i*n + j];
and declare "matrix" as a simple pointer to an "int".
Calculate the array index yourself. This will handle an arbitrary two dimensional array, for example:
void printMatrix(int *matrix,int n, int m)
{
int i,j;
for(i=0;i<n;i++){
for(j=0;j<m;j++)
printf("%d",matrix[m * i + j]);
printf("\n");
}
}
Don't try to pass it as a 2-D array; pass a pointer to the first element, then compute offsets manually:
void printMatrix(int *a, size_t m, size_t n)
{
size_t i,j;
for (i = 0; i < m; i++)
{
for (j = 0; j < n; j++)
{
printf("a[%lu][%lu] = %d\n",
(unsigned long) i,
(unsigned long) j,
a[i*n+j]); // treat a as 1-d array, compute offset manually
}
}
}
int main(void)
{
int arr[5][4];
...
printMatrix(&arr[0][0], 5, 4);
...
}
Granted, this will only work for contiguously allocated arrays.
Although the syntax is not exactly the same, but this also happens to work a bit:
#include <stdio.h>
#define N 5
void printMatrix(int* row,int n,int sz)
{
int i,j;
int *currRow;
for(i=0;i<n;i++){
currRow = row+i*sz;
for(j=0;j<n;j++)
printf("%d",currRow[j]);
printf("\n");
}
}
int main()
{
int R[N][N]={{1,2,3},{4,5,6},{7,8,9}};
printMatrix(R[0],3,sizeof(R[0])/sizeof(int));
}