cudaMemcpy not copying the host matrix to device (gives segmentation fault) - c

Below is the code where i get Segmentation fault when i am trying to print the matrix d_A which is being copied from host matrix h_A.when i am trying to print matrix h_A just before cudamalloc it gets printed but after cudamemcpy trying to print d_A(Device matrix) gives me error.
I am using the following:- nvcc -arch=sm_20 Trial.cu -o out to compile
#include <stdio.h>
#include <sstream>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <unistd.h>
#include <sys/time.h>
#include <stdint.h>
#include <cuda.h>
#include <time.h>
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
void LUdecomposition(float *h_A,float *A_,int dim,unsigned int size_A,int row_A)
{
float *d_A;int k;
gpuErrchk(cudaMalloc(&d_A, size_A*sizeof(float)));
gpuErrchk(cudaMemcpy(d_A, h_A, size_A*sizeof(float), cudaMemcpyHostToDevice));
printf("\n D_A");
gpuErrchk(cudaMemcpy(A_,d_A,size_A*sizeof(float), cudaMemcpyDeviceToHost));
for(int i=0; i<size_A; i++)
{
if (i % row_A == 0) printf("\n");
printf("%f ", A_[i]);
}
printf("\n D_A");
}
void input_matrix_generation_A(float *Matrix, unsigned int row, unsigned int column, unsigned int size)
{
for (int i=0; i<size; i++)
{
Matrix[i] = rand()%5+1;
if (i % column == 0) printf("\n");
}
}
int main(int argc, char *argv[])
{
int m=4;int dim=2;
int size_A=m*m;
float *A, *A_;
A = (float*)malloc(sizeof(float)*size_A);
input_matrix_generation_A(A,m,m,size_A);
A_ = (float*)malloc(sizeof(float)*size_A);
LUdecomposition(A,A_,dim,size_A,m);
for(int i=0; i<size_A; i++)
{
if (i % row_A == 0) printf("\n");
printf("%f ", A_[i]);
}
return 0;
}

You are trying to access (de-reference) a device pointer from the host, which is resulting in undefined behavior and causing segmentation fault. So the following line of code is invalid:
printf("%f ", d_A[i]);
Also, you are copying back extra amount of memory:
cudaMemcpy(A_,d_A,size_A*sizeof(double), cudaMemcpyDeviceToHost);
It should be
cudaMemcpy(A_,d_A,size_A*sizeof(float), cudaMemcpyDeviceToHost);

In your code at about line 23, you write:
for(int i=0; i<size_A; i++)
{
if (i % row_A == 0) printf("\n");
printf("%f ", d_A[i]);
}
and this is the part that triggers the segment fault.
Please notice that the device pointer d_A is in the memory space of global memory on GPU, and shall be never de-referenced directly on CPU side.

Related

Determinant of a Matrix in C, Troubleshooting

main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "matrix.h"
int main(){
//Prompt the user for the size of matrix to be calculated.
printf("Welcome to the matrix determinant calculator!\n\n");
printf("Please select the matrix size you would like to input: \n");
printf("\t (A): 2x2 matrix\n");
printf("\t (B): 3x3 matrix\n\n");
char selection; //Stores matrix size selection
scanf(" %c", &selection);
int size; //Size of matrix
//Uses selection from user to determine value to assign to 'size'
if (selection == 'A' || selection == 'a'){
size = 2;
}
else if (selection == 'B' || selection == 'b'){
size = 3;
}
else{
printf("Your selection is invalid. Please start over.\n");
return 0;
}
printf("\nYou have selected a %dx%d matrix.\n\n", size, size);
//Initialize pointer array
int* matrix_ptr = (int*) malloc(size * sizeof(int*));
int** matrix = &matrix_ptr;
for (int i = 0; i < size; i++){
matrix[i] = (int*)malloc(size * sizeof(int));
}
readMatrix(matrix, size); //Sets up matrix by taking input from user
int calc = determinant(matrix, size); //Calculates determinant
printf("The %dx%d matrix is: \n\n", size, size);
//Displays the matrix on the console
for (int row = 0; row < size; row++){
for (int col = 0; col < size; col++){
printf("%d\t", matrix[row][col]);
}
printf("\n");
}
//Deletes stored data
for (int i = 0; i < size; i++){
free(matrix[i]);
}
free(matrix);
printf("\nThe determinant of the matrix is: %d\n", calc);
return 0;
}
determinant.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "matrix.h"
#include "determinant.h"
int determinant(int** matrix, int size){
int detm_calc; //Determinant calculation variable
//Determine which formula to use - 2x2 or 3x3 matrix.
if (size == 2){ //2x2 case
int a = matrix[0][0];
int b = matrix[0][1];
int c = matrix[1][0];
int d = matrix[1][1];
detm_calc = (a*d) - (b*c);
}
else{ //3x3 case
int a = matrix[0][0];
int b = matrix[0][1];
int c = matrix[0][2];
int d = matrix[1][0];
int e = matrix[1][1];
int f = matrix[1][2];
int g = matrix[2][0];
int h = matrix[2][1];
int i = matrix[2][2];
detm_calc = a*(e*i - f*h) - b*(d*i - f*g) + c*(d*h - e*g);
}
return detm_calc;
}
determinant.h
#ifndef DETERMINANT_H
#define DETERMINANT_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "matrix.h"
int determinant(int**, int);
#endif
matrix.c
#include <stdio.h>
#include <stdlib.h>
#include "matrix.h"
#include "determinant.h"
void readMatrix(int** matrix, int size){
for (int i = 0; i < size; i++){
for (int j = 0; j < size; j++){
printf("Please enter the integer for row %d column %d:\t", i+1, j+1);
scanf("%d", &matrix[i][j]);
}
printf("\n");
}
}
matrix.h
#ifndef MATRIX_H
#define MATRIX_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "determinant.h"
void readMatrix(int**, int);
#endif
Makefile
determinant: main.o determinant.o matrix.o
gcc main.o determinant.o matrix.o -o determinant.out
main.o: main.c
gcc -c main.c
determinant.o: determinant.c determinant.h
gcc -c determinant.c
matrix.o: matrix.c matrix.h
gcc -c matrix.c
The code shown above is supposed to create a determinant of a matrix however, it is not processing correctly as it infinitely loops. I assume something is wrong with the Makefile or the translation from C++ to C. Are there any notable errors that I am unable to spot? Thanks!
You are using
int* matrix_ptr = (int*) malloc(size * sizeof(int*));
int** matrix = &matrix_ptr;
Problems with this:
You are initializing matrix_ptr as a pointer to int, but allocating the memory for size*sizeof(int*), as though it would be an array of int*. So, allocated memory will depend on your hardware, and could be a mismatch with int memory size resulting in the wrong size of memory allocation.
int** matrix = &matrix_ptr; causes matrix to point to matrix_ptr, so when you are trying to write into matrix[1] in a loop - you are trying to write into an unallocated memory block, which will result in a segmentation fault.
int** matrix = (int**)malloc(size*sizeof(int*)); instead of these two lines will solve your problem
Note: I do not understand where it loops (I got a segfault with your code) and how the makefile can result in a runtime problem in your situation.

Segmentation Fault while printing

#include <stdlib.h>
#include <stdio.h>
#include "string.h"
//This function tries to calculate result of the floor function for floats <= 9999.
int main(int argc, char* argv[]) {
int i, j;
float k;
int x[10000];
for(i = 0; i < 10000; ++i){
x[i] = i;
}
printf("Enter a float in 0..9999: ");
scanf("%f", &k);
tester(x, k);
}
int tester(int* c, int k) {
printf("x[%d] = %d\n", k, c[k]);
}
When I run the program it gives me segmentation fault in here:
printf("x[%d] = %d\n", k, c[k]);
Can anyone see the what problem really is?
You can see the screenshots:
segmentation fault in printf
There are two major problems in your code.
You get input from the user (scanf) as float, but actually use it as int to pass it into the function and as index for the array x.
You should ALWAYS check user input from the terminal (or wherever). In this case the check should be at least, if the actual input is between 0 and 9999.
Improved version:
#include <stdlib.h>
#include <stdio.h>
#include "string.h"
void tester(int* c, int k) {
printf("x[%d] = %d\n", k, c[k]);
}
//This function tries to calculate result of the floor function for floats <= 9999.
int main(int argc, char* argv[]) {
int i;
int k;
int x[10000];
for(i = 0; i < 10000; ++i){
x[i] = i;
}
printf("Enter a float in 0..9999: ");
scanf("%d", &k);
if (k >= 0 && k < 10000) {
tester(x, k);
} else
{
printf("Sorry, your input %d was invalid.", k);
}
}
Probably that will fix your Segmentation Fault problem.

Solving segmentation fault

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void main() {
int i,j;
int *u = malloc(10000 * 10000 * sizeof(int));
for (i=0; i<10000; i++)
{
for(j=0;j<10000;j++)
{
u[i][j]=i+j;
}
}
free(u);
}
I edited my program. when compiling this program, I get an error "subscripted value is neither array nor pointer nor vector".
how can i allocate memory?
You have allocated memory for a single dimensional array and you are trying to use it as a two-dimensional array. There is a slight alteration which you need to do to your code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main() {
int i,j;
int *u = malloc(10 * 10 * sizeof(int));
for (i=0; i<10; i++)
{
for(j=0;j<10;j++)
{
u[10*i +j]=i+j; // this is how you can use it
}
}
for (i=0; i<10; i++)
{
for(j=0;j<10;j++)
{
printf("%d ",u[10 *i +j]);
}
printf("\n");
}
free(u);
return 0;
}
Note that I have used size 10*10, you can do the same for whatever size you need.
Check-here
You can't allocate large arrays on heap directly in the declaration.
You can allocate large arrays using malloc as follows
#include <stdlib.h>
int *matrix = malloc(ROW * COLUMNs * sizeof(int));
Always use column major order to search for elements.
Explanation for column major order can be found here Accessing elements in a matrix
Here size = 10000
Always after you complete your task,free the memory
free(matrix);

Hackerrank circular array rotation segmentation faults

My following code sucessfully runs in sample input but gives segmentation faults in 13 test cases.
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
#include <stdbool.h>
int main(){
int n;
int k;
int q;
int index[q];
scanf("%d %d %d",&n,&k,&q);
int *a = (malloc(sizeof(int) * n));
for(int a_i = 0; a_i < n; a_i++){
scanf("%d",&a[a_i]);
}
for(int a0 = 0; a0 < q; a0++){
int m;
scanf("%d",&m);
index[a0] = m;
}
for(int i=0; i<k; i++){
int ap = a[n-2];
for(int p=1; p<n-1; p++){
a[p] = a[p-1];
}
a[0] = a[n-1];
a[n-1] = ap;
}
for(int j=0; j<q;j++){
printf("%d\n", a[index[j]]);
}
return 0;
}
I am unable to find where the segmentation fault is. Also check out this:where I asked about declaring a as a pointer using malloc
There might have been chances that using malloc() to declare a would have lead to segmentation faults since it does not check for allocation error, but even when i defined a as an array the problem still remained.
q is not initialized, but is used as the argument to the declaration of an array.
You should use malloc to allocate the index array, after reading the value of q.

Can't convert code from C to ASM (Undefined symbol)

I'm trying to convert the code below from C to ASM because I'm having a tiny difficulty writing a procedure in ASM.
in DDOSBox the command I use: tcc -v -S -r- main.c
I'm getting an error: Undefined symbol _initarr in module main.c
Code: [main.c]
#include <stdio.h>
#include <stdlib.h>
extern int initarr(int** arr, int n, int (*initfunc)(int));
int getNum(int idx);
int getNum(int idx) {
return (4*idx);
}
void main() {
int *arr, i, n, success;
printf("\nPlease enter the array size\n");
scanf("%d",&n);
success = initarr(&arr, n, getNum);
if(!success) {
printf("Memory Allocation Failed\n");
return;
}
printf("\nThe Numbers in the allocated array are:\n");
for(i=0; i<n; i++)
printf("%d ", arr[i]);
return;
}

Resources