Parallel BFS OpenMP C

Parallel BFS OpenMP C - c

I'm trying to develop the code to make a parallel version of the BFS (topDown) algorithm on a graph. This is the code I have developed so far:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdbool.h>
#include<time.h>
#include<omp.h>
#define MAX 10000
int n;
int v;
long dim = (long) MAX*MAX;
long* adj;
void create_graph();
void BF_Traversal();
void BFS(int v);
int main()
{
create_graph();
BFS(v);
return 0;
}
bool isEmpty(int* array){
for (int i=0;i<n;i++){
if (array[i] != -1)
return false;
}
return true;
}
void topDownStep(int* frontier, int* next, int* parents){
int index = 0;
#pragma omp parallel for num_threads(4)
for (int i=0;i<n;i++){
if (frontier[i]!=-1){
for (int j=0;j<n;j++){
long offset= frontier[i]*MAX + j;
if (adj[offset]==1 && j!=v){
if (parents[j] ==-1){
parents[j]=frontier[i];
next[index] = j;
index++;
}
}
}
}
}
}
void BFS(int startVertex){
int* frontier = malloc(n * sizeof(int));
int* next = malloc(n * sizeof(int));
int* parents = malloc(n * sizeof(int));
memset( frontier, -1, n*sizeof(int) );
memset( next, -1, n*sizeof(int) );
memset( parents, -1, n*sizeof(int) );
frontier[0] = startVertex;
while (!isEmpty(frontier))
{
topDownStep(frontier,next,parents);
printf("Visited frontier:\n");
for (int i=0;i<n;i++){
if(frontier[i]!=-1)
printf("%d\t",frontier[i]);
}
printf("\n");
memcpy(frontier,next,n*sizeof(int));
memset( next, -1, n*sizeof(int) );
}
}
void create_graph(){
adj = malloc(dim* sizeof(long));
int count,max_edge,origin,destin;
srand(time(NULL));
// FILE *in_file = fopen("graph.txt", "r");
// fscanf(in_file, "%d %d", &n , &v);
// printf("%d - %d",n,v);
// max_edge = n*(n-1);
// while ( fscanf(in_file, "%d %d", &origin,&destin) != EOF ) {
// if((origin == -1) && (destin == -1))
// break;
// if(origin>=n || destin>=n || origin<0 || destin<0)
// printf("Invalid edge!\n");
// else
// adj[origin][destin] = 1;
// adj[destin][origin] = 1;
// }
n = MAX;
v= 0;
max_edge = n*(n-1);
for(int i =0;i<n;i++){
for(int j=0;j<2;j++){
int val = rand()%MAX;
if(i!=val){
long offset = i*MAX+val;
adj[offset]=1;
}
}
}
}
I'm not sure if this approach is good or not for parallelizing such algorithm (see topDownStep).
The code seems to work correctly as the frontiers are visited in the right order, but I have a feeling that the code is not optimized.
Also I am afraid that race conditions may occur even if from the tests I have done it seems that this does not happen.
I look forward to your advice and I always thank you for your availability.

Related

How can I find why my merge sorting algorithm crash when sorting an array of 1 million element?

I'm a French student and trying to calculate the execution time of the Merge Sort algorithm for different size of array.
I also want to write the different execution time in a .csv file. But when my program tries to sort an array with 1 million elements the process returns -1073741571 (0xC00000FD) in Code::Blocks. So if you could point me to a way to find a solution I would be very grateful!
Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
void genTab(int *tab, int n) {
int i;
for (i = 0; i < n; i++) {
tab[i] = rand() % 100;
}
}
void fusion(int *tab, int deb, int mid, int fin) {
int i = deb;
int j = mid + 1;
int k = deb;
int temp[fin + 1];
while ((i <= mid) && (j <= fin)) {
if (tab[i] <= tab[j]) {
temp[k] = tab[i];
i++;
} else {
temp[k] = tab[j];
j++;
}
k++;
}
while (i <= mid) {
temp[k] = tab[i];
i++;
k++;
}
while (j <= fin) {
temp[k] = tab[j];
k++;
j++;
}
for (i = deb; i <= fin; i++) {
tab[i] = temp[i];
}
}
void triFusion(int *tab, int i, int j) {
if (i < j) {
triFusion(tab, i, (int)((i + j) / 2));
triFusion(tab, (int)((i + j) / 2 + 1), j);
fusion(tab, i, (int)((i + j) / 2), j);
}
}
void reset(int *tab1, int *tab2, int n) {
for (int i = 0; i < n; i++) {
tab2[i] = tab1[i];
}
}
int main() {
srand(time(NULL));
clock_t start, end;
int nbrTest[15] = {
1000, 5000, 10000, 50000, 80000, 100000, 120000, 140000,
150000, 180000, 200000, 250000, 300000, 450000, 1000000
};
FILE *fp;
char *tpsExecution = "exeTime.csv";
fp = fopen(tpsExecution, "w");
fprintf(fp, "Array Size; Merge Time");
for (int i = 0; i < 15; i++) {
int n = nbrTest[i];
printf("Calculating time for an array of %d \n", n);
int *tab = malloc(sizeof(int) * n);
genTab(tab, n);
int *copie = malloc(sizeof(int) * n);
reset(tab, copie, n);
start = clock();
triFusion(tab, 0, n - 1);
end = clock();
float tpsFusion = (float)(end - start) / CLOCKS_PER_SEC;
reset(tab, copie, n);
printf("writing in the file\n");
fprintf(fp, "\n%d;%f", n, tpsFusion);
free(tab);
free(copie);
}
fclose(fp);
return 0;
}

int temp[fin+1]; may exceed the space limit for the stack. You should allocate it with malloc instead, and free it with free.
If you want to exclude malloc and free from the timed code, the allocation could be performed outside the timed code and passed in as work space.

(Note: posted after the answer from #Eric Postpischil).
The function
void fusion(int * tab, int deb, int mid, int fin)
Has the line
int temp[fin+1];
and the value of fin comes through another function from the number of elements n to be sorted
triFusion(tab, 0, n-1);
and as an automatic variable, breaks the stack when n is large.
I suggest replacing the line with
int *temp = malloc((fin+1) * sizeof *temp);
if(temp == NULL) {
puts("malloc");
exit(1);
}
// ...
free(temp);

fusion() is always allocating the full size of the array for temp, even when only a small fraction of temp is being used. You could change this to:
int k = 0;
...
int temp[fin+1-deb];
...
tab[i]=temp[i-deb];
still this will exceed stack space if n is large. So as suggested in the other answers:
int k = 0;
...
int *temp = malloc((fin+1-deb)*sizeof(int));
...
tab[i]=temp[i-deb];
...
free(temp)
or better still, do a one time allocation of a second array in main or in a "helper" function, the include a pointer to the second array in the merge sort functions.

How to overcome double free or corruption (out) Aborted (core dumped) in c

I am trying to make a zoom_image function which zooms gray image using discrete Fourier transform,. the code i am include work if the image size is less than or equal 4*4 but if size increase. it gives 'double free or corruption (out) Aborted (core dumped)' error
I have tried fft_d and ifft_2d function of my code it works if input size is small if input size is big it gives the above error
#include<math.h>
#include<stdio.h>
#include<stdlib.h>
struct complex{
float real;
float im;
};
typedef struct complex complex;
complex w(int i, int n) {
complex result;
result.real = cos(2*M_PI*i/n);
result.im = -sin(2*M_PI*i/n);
return result;
}
complex wp(int i, int n) {
complex result;
result.real = cos(2*M_PI*i/n);
result.im = sin(2*M_PI*i/n);
return result;
}
complex mul(complex a, complex b) {
complex result;
result.real = a.real*b.real - a.im*b.im;
result.im = a.real*b.im + b.real*a.im;
return result;
}
complex divi(complex a, complex b) {
complex result;
result.real = (a.real*b.real + a.im*b.im)/(b.real*b.real + b.im*b.im);
result.im = (-a.real*b.im + b.real*a.im)/(b.real*b.real + b.im*b.im);
return result;
}
complex add(complex a, complex b) {
complex result;
result.real = a.real+b.real ;
result.im = a.im+b.im;
return result;
}
complex sub(complex a, complex b) {
complex result;
result.real = a.real - b.real;
result.im = a.im - b.im;
return result;
}
void printComplex(complex var) {
printf("%f i%f\n",var.real,var.im);
}
void printComplexS(complex var) {
printf("%f i%f",var.real,var.im);
}
#include<stdio.h>
#include<stdlib.h>
#include"complex.h"
static int gb=0;
complex* _fft(complex *arr, int size, int step, int index) {
if(size == 2) {
complex *result;
result = (complex*)malloc(size*sizeof(complex));
result[0] = add(arr[index], arr[index+step]);
result[1] = sub(arr[index], arr[index+step]);
return result;
}
else {
int i;
complex *even, *odd, *result, *mull;
even = _fft(arr, size/2, step*2, index);
odd = _fft(arr, size/2, step*2, index+step);
result = (complex*)malloc(size*sizeof(complex));
mull = (complex*)malloc(size/2*sizeof(complex));
for(i=0;i<size/2;i++) {
mull[i] = mul(odd[i], w(i,size));
}
for(i=0;i<size/2;i++)
result[i] = add(even[i], mull[i]);
for(;i<size;i++)
result[i] = sub(even[i - size/2], mull[i - size/2]);
free(even);
free(odd);
free(mull);
return result;
}
}
complex* fft(complex *arr, int size) {
return (complex*)_fft(arr, size, 1, 0);
}
complex* _ifft(complex *arr, int size, int step, int index) {
if(size == 2) {
complex *result;
result = (complex*)malloc(size*sizeof(complex));
result[0] = add(arr[index], arr[index+step]);
result[1] = sub(arr[index], arr[index+step]);
return result;
}
else {
int i;
complex *even, *odd, *result, *mull;
even = _ifft(arr, size/2, step*2, index);
odd = _ifft(arr, size/2, step*2, index+step);
result = (complex*)malloc(size*sizeof(complex));
mull = (complex*)malloc(size/2*sizeof(complex));
for(i=0;i<size/2;i++)
mull[i] = mul(odd[i], wp(i,size));
for(i=0;i<size/2;i++)
result[i] = add(even[i], mull[i]);
for(;i<size;i++)
result[i] = sub(even[i - size/2], mull[i - size/2]);
free(even);
free(odd);
free(mull);
return result;
}
}
complex* ifft(complex *arr, int size) {
complex *re = _ifft(arr, size, 1, 0);
for(int i=0;i<size;i++){
re[i].real=re[i].real/size;
re[i].im=re[i].im/size;
}
return re;
}
complex** transpose(complex **src, int n, int m) {
complex **result, ***re;
result=(complex**)malloc(sizeof(complex*)*m);
for(int i=0;i<m;i++) {
result[i]=(complex*)malloc(sizeof(complex)*n);
for(int j=0;j<n;j++)
result[i][j]=src[j][i];
}
re=(complex***)&result;
return (complex**)*re;
}
complex** fft_2d(complex** arr, int n, int m) {
complex **arrR, ***r, *temp;
int i, j;
arrR = (complex**)malloc(sizeof(complex*));
for(i=0;i<n;i++) {
arrR[i]=(complex*)fft(arr[i],m);
printf("%d ",i);
}
arrR=(complex**)transpose(arrR,n,m);
malloc(0);
for(i=0;i<m;i++)
arrR[i]=(complex*)fft(arrR[i],n);
arrR=transpose(arrR,n,m);
r=(complex***)&arrR;
return (complex**)*r;
}
complex** ifft_2d(complex** arr, int n, int m) {
complex **arrR, ***r, *temp;
int i, j;
arrR = (complex**)malloc(sizeof(complex*));
for(i=0;i<n;i++)
arrR[i]=(complex*)ifft(arr[i],m);
arrR=(complex**)transpose(arrR,n,m);
malloc(0);
for(i=0;i<m;i++)
arrR[i]=(complex*)ifft(arrR[i],n);
arrR=transpose(arrR,n,m);
r=(complex***)&arrR;
return (complex**)*r;
}
unsigned int** zoom_img(unsigned int **img, int col, int row, int r_col, int r_row) {
int i, j;
complex **mat=(complex**)malloc(sizeof(complex*)*col), **re;
complex **new_re=(complex**)malloc(sizeof(complex*)*(col+2*r_col)), **u;
unsigned int **result=(unsigned int**)malloc(sizeof(unsigned int*)*(col + 2*r_col)), ***z;
for(i=0;i<col;i++)
mat[i]=(complex*)malloc(sizeof(complex)*row);
for (i=0;i<col;i++) {
for (j=0;j<row;j++) {
mat[i][j].real = (float)pow(-1, i+j)*(float)img[i][j];
mat[i][j].im=0;
}
}
re = (complex**)fft_2d(mat, col, row);
for(i=0;i<(col+2*r_col);i++)
new_re[i]=(complex*)malloc(sizeof(complex)*(row+2*r_row));
for(i=0;i<(col+2*r_col);i++) {
for(j=0;j<(row+2*r_row);j++) {
if(i<r_col || i>r_col+col-1 || j<r_row || j>r_row+row-1) {
new_re[i][j].real = 0;
new_re[i][j].im = 0;
}
else
new_re[i][j]=re[i-r_col][j-r_row];
}
}
u = (complex**)ifft_2d(new_re, col+2*r_col, row + 2*r_row);
for(i=0;i<(col+2*r_col);i++) {
result[i]=(unsigned int*)malloc(sizeof(unsigned int)*(row+2*r_row));
for(j=0;j<(row+2*r_row);j++) {
result[i][j] = (unsigned int)u[i][j].real;
}
}
z=&result;
return *z;
}
int main() {
unsigned int i, j, **arr=(unsigned int**)malloc(sizeof(unsigned int*)*2), **result;
for(i=0;i<2;i++) {
arr[i]=(unsigned int*)malloc(sizeof(unsigned int)*2);
for(j=0;j<2;j++) {
arr[i][j] = i+j +1;
}
}
result = zoom_img(arr,2,2,2,2);
return 0;
}/*
int main() {
complex **arr, **result, **re;
arr=(complex**)malloc(sizeof(complex*)*4);
for (int i = 0; i < 4; i++) {
arr[i]=(complex*)malloc(sizeof(complex)*4);
for (int j = 0; j < 4; j++) {
arr[i][j].real = i*j+1.2;
arr[i][j].im=0;
}
}
result = (complex**)fft_2d(arr,4,4);
//malloc(0);
re = (complex**)ifft_2d(result,4,4);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
printf("(%2f) ", arr[i][j].real);
printComplexS(result[i][j]);
printf(" (%2f) ",re[i][j].real);
}
printf("\n");
}
return 0;
}*/

The code has several issues.
complex** ifft_2d(complex** arr, int n, int m) {
complex **arrR, ***r;
int i;
And then you are allocating too little:
arrR = (complex**)malloc(sizeof(complex*));
The above line should be malloc(sizeof(complex*) * n). Without the *n the next line causes undefined behavior (buffer overflow):
for(i=0;i<n;i++)
arrR[i]=(complex*)ifft(arr[i],m);
Then there is this strange line, why is it for?
malloc(0);
Then the ending of the function is weird (Why not simply return arrR?):
r=(complex***)&arrR;
return (complex**)*r;
Next, the recursive code (_ifft and _fft) assumes that size is a power of two, otherwise it gets into infinite recursion. You should validate the input. At least assert that:
assert(n >= 2);
assert(((n-1) & n ) == 0); // for positive n, assert that it is a power of 2
With this line you can see that zoom_img violates this assumption in the line:
u = (complex**)ifft_2d(new_re, col+2*r_col, row + 2*r_row);
Note that in the uncommented main, col==2, row==2, r_col==2, r_row==2, which ends up with size == 6, which is not a power of 2.
A smaller issue is performance. The code overuses malloc instead of reusing the same block of memory, and peek into different areas of it. This is what the classical FFT does. Classical FFT also does not use recursion like that, but uses iterations instead.

Troubles with implementation of cooley FFT

I am doing a convolution of two integer signals with the help of FFT, but somehow I can't get it right. I am not sure if my implementation of FFT is correct. Especially the math part.
big edit:
I posted all the code now. My apologies for not starting with it. I was sure the error was only in FFT part, but there might be more problems I overlooked. I know the code is messy and not clean. Everything is a bit fragmented and can be programmed in a simpler and cleaner way, but I was testing bit by bit. As for input it reads two signals from the command line. build up as a number indicating how big the signal is and the signal presented as an integer array e.q 2: [1,-1] and 10: [0,0,0,1,1,1,1,0,0,0]. It should then do a convolution on the signals by performing a FFT on both of them then do bit wise multiplication. With a inverse FFT on the resulting signal. Printing it again with the length and then the array consiting of integers. The printing itself is correct, but the values in the resulting array is not correct. I hope it is all a bit clearer now again my apologies and thank you for your help so far.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <complex.h>
double PI;
int *readSignal(int *len) { //reads the signal
int *x;
char c;
scanf("%d:", len);
x = calloc(*len, sizeof(int));
do c = getchar(); while (c != '[');
if (len > 0) {
scanf("%d", &x[0]);
for (int i=1; i < *len; i++) scanf(",%d", &x[i]);
}
do c = getchar(); while (c != ']');
return x;
}
void printSignal(int len, int *x) { //prints the signal
printf("%d: [", len);
if (len > 0) {
printf("%d", x[0]);
for (int i=1; i < len; i++)
printf(",%d", x[i]);
}
printf("]\n");
}
void *padSignal(int len, int lenSig, int *x) { //ensures that the signal is of size 2^n by padding it with 0's
int *padded;
padded = calloc(len, sizeof(int));
for (int i=0; i < lenSig; i++) {
padded[i] = x[i];
}
return padded;
}
void fft(double complex signal[], int length, int power) {
if (length == 1) {
return;
}
double complex *signalODD = calloc((length/2+1), sizeof(double complex));
double complex *signalEVEN = calloc((length/2+1), sizeof(double complex));
int index1 = 0;
int index2 = 0;
for(int i = 0; i < length; i++) {
if(i % 2 ==0) {
signalEVEN[index1] = signal[i];
index1++;
}
else {
signalODD[index2] = signal[i];
index2++;
}
}
fft(signalEVEN,length/2, power+1);
fft(signalODD,length/2, power+1);
for(int i = 0; i<length/2-1; i++) {
signal[i] = signalEVEN[i] + cexp((I*2*PI*i)/length)*signalODD[i];
signal[i+length/2] = signalEVEN[i]-cexp((I*2*PI*i)/length)*signalODD[i];
}
free(signalODD);
free(signalEVEN);
}
void ifft(double complex signal[], int length, int power) {
if (length == 1) {
return;
}
double complex *signalODD = calloc((length/2+1), sizeof(double complex));
double complex *signalEVEN = calloc((length/2+1), sizeof(double complex));
int index1 = 0;
int index2 = 0;
for(int i = 0; i < length; i++) {
if(i % 2 ==0) {
signalEVEN[index1] = signal[i];
index1++;
}
else {
signalODD[index2] = signal[i];
index2++;
}
}
fft(signalEVEN,length/2, power+1);
ifft(signalODD,length/2, power+1);
for(int i = 0; i<length/2-1; i++) {
signal[i] = signalEVEN[i] + cexp((I*-2*PI*i)/length)*signalODD[i];
signal[i+length/2] = signalEVEN[i]-cexp((I*-2*PI*i)/length)*signalODD[i];
}
free(signalODD);
free(signalEVEN);
}
int checkPowerofTwo(double len) { //checks for the closed power of 2
double x = 1;
while(len > pow(2,x)) {
x++;
}
return pow(2,x);
}
int main(int argc, char *argv[]) {
int lenH, *H;
int lenX, *X;
int *paddedX;
int *paddedH;
double length;
H=readSignal(&lenH); //reads in the signal H
X=readSignal(&lenX); //reads in signal X
length = lenH+lenX-1;
paddedH=padSignal((length),lenH,H); //pads the signal to the length
paddedX=padSignal((length),lenX,X); // pads the signal to the length
double complex *signalX = calloc(length, sizeof(double complex)); //creats a complex signal X and fills it with paddedX
for (int i = 0; i<length; i++) {
signalX[i] = paddedX[i];
}
double complex *signalH = calloc(length, sizeof(double complex)); // same for H
for (int i = 0; i<length; i++) {
signalH[i] = paddedH[i];
}
fft(signalX, length, 1); //performs the fast fourier transform on X
fft(signalH,length, 1); // performs the fast fourier transfom on H
double complex *signalY = calloc(length, sizeof(double complex)); //makes complex signal Y
for (int i = 0; i<length; i++) { //performs the convolution
signalY[i] = signalX[i]*signalH[i];
}
ifft(signalY, length,1);
int *output = calloc(length, sizeof(int)); //creates the final output signal
for (int i = 0; i<length; i++) {
output[i] = creal(signalY[i]);
}
printSignal(length,output);
free(signalX);
free(signalH);
free(signalY);
free(H);
free(X);
free(paddedH);
free(paddedX);
free(output);
return 0;
}

In:
if(i % 2 ==0 && i != 0)
Why do you exclude i == 0? Change that to if(i % 2 ==0) in both fft and ifft.
In both fft and ifft, the line:
for(int i = 0; i<lenght/2-1; i++) {
should be:
for(int i = 0; i<lenght/2; i++) {
In ifft, the recursion accidentally uses fft:
fft(signalEVEN,lenght/2, power+1);
fft(signalODD,lenght/2, power+1);
Change those to ifft.
The calloc calls do not need this much space:
calloc((lenght/2 + 1), sizeof(double complex));
That can be:
calloc((lenght/2), sizeof(double complex));
Also, the proper spelling of is “length”.
With those fixed, the fft and ifft routines appear to work for some superficial cases.

Dynamic memory allocation in arrays in C

I have spent last few hours trying to debug my code but I failed to do so. I think the problem lies in me not fully understanding dynamic memory allocation, however I could've made some other mistakes aswell. The question here is a bit more of a personal problem and I'm sorry if someone finds this question not fulfilling "Make it relevant to others".
I've been given the next assigment:
Create an array A[] out of n random elements from interval 0-100. Create a function which splits two arrays such as: array B[] contains elements > 50 while C[] contains rest of the elements. Create arrays A and B using dynamic memory allocation. Arguments of the function have to be all three arrays and their respective lengths.
#include <stdio.h>
#include <stdlib.h>
void Array(int *A, int *nA, int *B, int *nB, int *C, int*nC){
int i;
int nB1 = 0;
int nC1 = 0;
int *tmpB;
int *tmpC;
B = malloc((nB1+1)*sizeof(int));
C = malloc((nC1+1)*sizeof(int));
printf("\n");
for(i = 0 ; i < nA ; i++){
if(A[i] <= 50){
C[i] = A[i];
nC1++;
// The idea here is to have a new array with basically
// no length so that each time one element passes to either
// B or A array that array gets increased at the same
// time as nC or nB
tmpC = realloc(C, sizeof(int) * nC1);
if(tmpC == NULL){
printf("ERROR: realloc failed.\n");
}
C = tmpC;
// C = realloc(C, nC1 + 1);
}
else{
B[i] = A[i];
nB1++;
tmpB = realloc(B, sizeof(int) * nB1);
if(tmpB == NULL){
printf("ERROR: realloc failed.\n");
}
B = tmpB;
// B = realloc(B, nB1 + 1);
}
}
printf("\n");
printf("Array B: ");
nB = nB1;
for(i = 0 ; i < nB ; i++){
printf("%d ", B[i]);
}
printf("\n");
printf("Number of elements in array B: %d\n", nB);
printf("\n");
printf("Array C: ");
nC = nC1;
for(i = 0 ; i < nC ; i++){
printf("%d ", C[i]);
}
printf("\n");
printf("Number of elements in array C: %d\n", nC);
}
void main(){
int *A;
int *B;
int *C;
int nA, nB, nC, i, r, j;
nB = 0;
nC = 0;
printf("Enter the length of array A: ");
scanf("%d", &nA);
printf("\n");
A = malloc(nA * sizeof(int));
if (A == NULL){
printf("ERROR: malloc failed.\n");
return 1;
}
time_t t;
srand((unsigned)time(&t));
printf("Array A: ");
for(i = 0 ; i < nA ; i++){
r = rand() % 101;
A[i] = r;
printf("%d ", r);
}
printf("\n");
Array(A, nA, B, nB, C, nC);
}
So far my code is breaking when:
User input from nA is higher than 6.
Code is working fine while array A has all elements which can be put in one single array such as B or C. But if elements can be split, last element of the array A is not shown properly on the screen while being in B or C array.
EDIT: Updated code so it's easier to keep track of my mistakes.
#include <stdio.h>
#include <stdlib.h>
void Array(int *A, int nA, int *B, int nB, int *C, int nC){
int i;
int nB1 = 0;
int nC1 = 0;
int *tmpB;
int *tmpC;
B = malloc(1*sizeof(int));
if(B == NULL){
printf("ERROR: malloc B failed.\n");
return 1;
}
C = malloc(1*sizeof(int));
if(C == NULL){
printf("ERROR: malloc C failed.\n");
return 1;
}
printf("\n");
for(i = 0 ; i < nA ; i++){
if(A[i] <= 50){
// C[nC1] = A[i];
// nC1++;
// if( nC1 > 1){
// tmpC = realloc(C, sizeof(int) * nC1);
// if(tmpC == NULL){
// printf("ERROR: realloc C failed.\n");
// return 1;
// }
// C = tmpC;
// }
tmpC = realloc(C, sizeof(int) * nC1);
if(tmpC == NULL){
printf("ERROR: realloc C failed.\n");
return 1;
}
C = tmpC;
C[nC1++] = A[i];
// nC1++;
}
else{
// B[nB1] = A[i];
// nB1++;
// if(nB1 > 1){
// tmpB = realloc(B, sizeof(int) * nB1);
// if(tmpB == NULL){
// printf("ERROR: realloc B failed.\n");
// return 1;
// }
// B = tmpB;
// }
tmpB = realloc(B, sizeof(int) * nB1);
if(tmpB == NULL){
printf("ERROR: realloc B failed.\n");
return 1;
}
B = tmpB;
B[nB1++] = A[i];
// nB1++;
}
}
printf("\n");
printf("Array B: ");
nB = nB1;
for(i = 0 ; i < nB ; i++){
printf("%d ", B[i]);
}
printf("\n");
printf("Number of elements in array B: %d\n", nB);
printf("\n");
printf("Array C: ");
nC = nC1;
for(i = 0 ; i < nC ; i++){
printf("%d ", C[i]);
}
printf("\n");
printf("Number of elements in array C: %d\n", nC);
}
int main(){
int *A;
int *B;
int *C;
int nA, nB, nC, i, r;
printf("Enter the length of array A: ");
scanf("%d", &nA);
printf("\n");
A = malloc(nA * sizeof(int));
if (A == NULL){
printf("ERROR: malloc A failed.\n");
return 1;
}
time_t t;
srand((unsigned)time(&t));
printf("Array A: ");
for(i = 0 ; i < nA ; i++){
r = rand() % 101;
A[i] = r;
printf("%d ", r);
}
printf("\n");
Array(A, nA, B, nB, C, nC);
return 0;
}

I'll try to take a crack at this I guess. I found some notable issues with your code that would be resolved with some structural changes towards your implementation. Some of the lesser ones:
void main() {
// Code above here left out.
A = malloc(nA * sizeof(int));
if (A == NULL){
printf("ERROR: malloc failed.\n");
return 1; // Might be a problem here.
}
You return an integer in main when there is not enough memory to allocate for A. This could cause issues on some compilers - or if you hate warnings in general a simple fix from void main to int main will clear this up.
2nd, in your function Array you have
void Array(int *A, int *nA, int *B, int *nB, int *C, int *nC)
however, you begin to use nA nB and nC as just int instead of int * which is a mismatch of types. You could change to
void Array(int *A, int nA, int *B, int nB, int *C, int nC)
Lastly and most importantly, you run into heap errors by using uninitialized ints to create memory for the arrays passed in:
int nB1;
int nC1;
B = malloc((nB1 + 1)*sizeof(int));
C = malloc((nC1 + 1)*sizeof(int));
It would be better to instead add just one int to the arrays without using nB1 and nC1.
B = malloc(1*sizeof(int));
C = malloc(1*sizeof(int));
You should first make checks as to whether B and C are empty arrays when they are passed in, then add memory to them as you see fit.
if (A[i] <= 50){
C[i] = A[i];
nC1++;
// The idea here is to have a new array with basically
// no length so that each time one element passes to either
// B or A array that array gets increased at the same
// time as nC or nB
tmpC = realloc(C, sizeof(int) * nC1);
if (tmpC == NULL){
printf("ERROR: realloc failed.\n");
}
C = tmpC;
}
tmpC is realloc'd but C is never given more space thus when assigning the value C[i] = A[i] it crashes. You need to expand on maintaining the arrays C and B.
EDIT
Disregard the comment about reallocation, the realloc operation you have is fine.
You iterate through each element in the array A, if the value is less than or equal to 50 you insert into array C otherwise B. However, you might have a few elements that satisfy the first condition but then the next condition could be satisfied. You then assign like so B[i] = A[i] but as stated you are at element 3 in A but you insert at element 1 in B, so using i to iterate for both is not correct. Follow SGM1's advice for reading into each array using nB1 and nC1, and also ryyker for the comment on reallocation.
I was able to compile and run this every time, however now you have to deal with actually assigning the values now:
for (i = 0; i < nA; i++){
if (A[i] <= 50){
C[nC1] = A[i];
nC1++;
tmpC = (int *)realloc(C, sizeof(int) * nC1);
if (tmpC == NULL){
printf("ERROR: realloc failed.\n");
}
C = tmpC;
// C = realloc(C, nC1 + 1);
}

Logic error B[i] = A[i]; in your condition should be B[nB1] = A[i];. Same thing with C array, should be C[nC1] = A[i] (both of this in the loop).
Also, from KillaBytes bytes analysis. Yes, you should definitely realloc BEFORE adding the new value. Probably is the reason your program crashes eventually:
else {
nB1++; // since you started at size 0
tmpB = realloc(B, sizeof(int) * nB1); //realloc to 0 = (sizeof(int) * 0) is bad
if(tmpB == NULL){
printf("ERROR: realloc failed.\n");
}
B = tmpB;
B[nB1 - 1] = A[i];///*****KEY POINT, This happens at the very end***
}
Still, there are many other problems, but not with the core logic. Especially with the the understanding of pointers. I'll try to list a few you need to think about:
Nothing comes out of the Array function (besides the printed output), the original B and C remain the same.
A, B (in the function), C (in the function) is not freed at the end. The idea is, for every malloc/calloc (realloc would free when successful), you would have to call free on the pointer to avoid any memory leakage :) just a trivial point, but very good habit.
Error conditions shouldn't continue logic, simplest solution would be to kill the program
if(tmpC == NULL){
printf("ERROR: realloc failed.\n");
}
should be
if(tmpC == NULL){
free(C);//unnecessary because the program is going to exit
// I would argue to get used to this, to force the habit
// to alway free all mallocs
printf("ERROR: realloc failed.\n");
exit(-1);// -1 is a generic value, error code that is checked
// if the caller cares to look (you can make it any int)
}
Now, if you wanted to "return" the value outside of for B and C Array() there are a few approaches, I'll demo one way.
void Array(int *A, int nA, int **B, int *nB, int **C, int *nC){ // funciton signature
Any time you use B, nB, C, nC in this function, use *B, *nB, *C, *nC. Eg:
*B = malloc((*nB1+1)*sizeof(int));
*C = malloc((*nC1+1)*sizeof(int));
And the call to the function from main should be:
printf("\n");
Array(A, nA, &B, &nB, &C, &nC);

There were a few bugs. There were a lot of errors/warnings flagged by the compiler (i.e. always compile with -Wall)
In the Array prototype, nA was defined as *nA [a pointer] but you used nA throughout the body.
*nB and *nC were correct, but near the bottom you needed:
*nB = nB1;
*nC = nC1;
There was a lot of replication code for all arrays. Each array had a custom loop to print it. To simplify, create a "print array" function.
The reallocation of B and C are similar, so, once again, create a common function.
Other bugs aside, while Array created B and C, it had no way to pass the updated values back to main. So, in the prototype, we need int **Bp and int **Cp.
In all, Array had two input values and four return values.
Here's the corrected code [please pardon the gratuitous style cleanup]:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// prtarray -- print an array
void
prtarray(int *arr,int count,const char *sym)
{
printf("\n");
printf("Array %s:",sym);
for (int i = 0; i < count; i++)
printf(" %d ", arr[i]);
printf("\n");
printf("Number of elements in array %s: %d\n",sym,count);
}
// growarray -- grow array
int *
growarray(int *arr,int *pcount,int val)
{
int off;
off = *pcount;
arr = realloc(arr,sizeof(int) * (off + 1));
arr[off] = val;
*pcount = off + 1;
return arr;
}
// BUGFIX: we need "int nA" and _not_ "int *nA"
#if 0
void
Array(int *A, int *nA, int *B, int *nB, int *C, int *nC)
#else
void
Array(int *A, int nA, int **Bp, int *nB, int **Cp, int *nC)
#endif
{
#if 1
int *B = NULL;
int *C = NULL;
#endif
int i;
printf("\n");
for (i = 0; i < nA; i++) {
if (A[i] <= 50)
C = growarray(C,nC,A[i]);
else
B = growarray(B,nB,A[i]);
}
*Bp = B;
*Cp = C;
}
int
main()
{
int *A;
int *B;
int *C;
int nA;
int nB;
int nC;
nB = 0;
nC = 0;
printf("Enter the length of array A: ");
scanf("%d", &nA);
printf("\n");
A = malloc(nA * sizeof(int));
if (A == NULL) {
printf("ERROR: malloc failed.\n");
return 1;
}
time_t t;
srand((unsigned) time(&t));
for (int i = 0; i < nA; i++)
A[i] = rand() % 101;
prtarray(A,nA,"A");
#if 0
Array(A, nA, B, nB, C, nC);
#else
Array(A, nA, &B, &nB, &C, &nC);
#endif
prtarray(B,nB,"B");
prtarray(C,nC,"C");
return 0;
}
But, dynamic arrays "cry out" for using a struct to control things. The code becomes even simpler:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// array control
struct array {
int *arr_data; // data
int arr_count; // array count
const char *arr_sym; // array name
};
// arrinit -- initialize array
void
arrinit(struct array *arr,const char *sym)
{
arr->arr_data = NULL;
arr->arr_count = 0;
arr->arr_sym = sym;
}
// arrprint -- print an array
void
arrprint(struct array *arr)
{
printf("\n");
printf("Array %s:",arr->arr_sym);
for (int i = 0; i < arr->arr_count; i++)
printf(" %d ", arr->arr_data[i]);
printf("\n");
printf("Number of elements in array %s: %d\n",arr->arr_sym,arr->arr_count);
}
// arrgrow -- grow array
void
arrgrow(struct array *arr,int val)
{
int off;
off = arr->arr_count;
arr->arr_data = realloc(arr->arr_data,sizeof(int) * (off + 1));
arr->arr_data[off] = val;
arr->arr_count = off + 1;
}
// BUGFIX: we need "int nA" and _not_ "int *nA"
void
Array(struct array *A, struct array *B, struct array *C)
{
int i;
int val;
printf("\n");
for (i = 0; i < A->arr_count; i++) {
val = A->arr_data[i];
if (val <= 50)
arrgrow(C,val);
else
arrgrow(B,val);
}
}
int
main()
{
struct array A;
struct array B;
struct array C;
int nA;
arrinit(&A,"A");
arrinit(&B,"B");
arrinit(&C,"C");
printf("Enter the length of array A: ");
scanf("%d", &nA);
printf("\n");
time_t t;
srand((unsigned) time(&t));
for (int i = 0; i < nA; i++)
arrgrow(&A,rand() % 101);
arrprint(&A);
Array(&A, &B, &C);
arrprint(&B);
arrprint(&C);
return 0;
}
UPDATE:
Here's a clean wrapper for realloc to use in the above.
// qrealloc -- reallocate with null check
void *
qrealloc(void *ptr,size_t len)
{
#if 1
// what is normally sufficient
ptr = realloc(ptr,len);
if (ptr == NULL) {
fprintf(stderr,"qrealloc: realloc failure -- len=%lu\n",len);
exit(1);
}
#else
// what valgrind needs
void *tmp = realloc(ptr,len);
if (tmp == NULL) {
free(ptr);
fprintf(stderr,"qrealloc: realloc failure -- ptr=%p len=%lu\n",ptr,len);
exit(1);
}
ptr = tmp;
#endif
return ptr;
}

CUDA BFS Giant Graph (seg.fault)

I'm making a test on a BFS algorithm on CUDA ( wich I know that has some syncronization problems, but it's part of my work to test it anyway ) but i'm having problems on using (or creating?) 1M+ size graphs.
Here's the code I use to create them:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define GRAPHSIZE 1000000
struct Node
{
int begin; // comeco da sub-string de vizinhos
int num; // tamanho da sub-string de vizinhos
};
int getSize()
{
int size,arcs;
printf("Size of the graph: \nNodes:\n>>");
scanf ("%d", &size);
return size;
}
void createEdges(int graphSize, int* Edges)
{
int j,value, aux, rndIdx;
int edgesSize = 2*GRAPHSIZE;
srand(time(NULL));
printf ("\nGS : %d\n", graphSize);
j = 1;
for (int i=0; i < edgesSize; i++) //first it creates an ordered array of edges
{
if (j < GRAPHSIZE)
{
Edges [i] = j;
j++;
}
else
{
j=1;
Edges [i] = j;
j++;
}
}
for (int i=0; i < edgesSize; i++) //now, it randomly swaps the edges array
{
rndIdx = rand()%graphSize;
aux = Edges[rndIdx];
Edges[rndIdx] = Edges [i];
Edges [i] = aux;
}
}
int main ()
{
int size,graphAtts[2];
int edgesSize = 2*GRAPHSIZE;
int Edges[edgesSize];
struct Node node[GRAPHSIZE];
FILE *file;
printf("____________________________\nRandom graph generator in compact format, optmized for CUDA algorithms by Ianuarivs Severvs.\nFor details about this format read the README. \n");
//size = getSize(graphAtts);
//printf ("%d,%d",size,arcs);
createEdges(GRAPHSIZE,Edges); // or size?
/*
for (int i = 0; i < edgesSize ; i ++)
{
printf ("-- %d --", Edges[i]);
}
*/
printf("\nEdges:\n");
for (int i=0; i < edgesSize; i++)
printf("%d,",Edges[i]);
for (int i=0,j=0 ; i < GRAPHSIZE; i++,j+=2) // now, completes the graph
{
node[i].begin=j;
node[i].num=2;
printf ("\n node %d : begin = %d, num = 2",i,j);
}
printf("\n");
//writes file:
file = fopen ("graph1M.g","wb");
fwrite (&Edges, edgesSize * sizeof(int),1,file);
fwrite (&node, GRAPHSIZE * sizeof(struct Node),1,file);
fclose(file);
for (int i = 0; i < edgesSize ; i ++)
{
printf ("-- %d --", Edges[i]);
}
for (int i = 0; i < GRAPHSIZE ; i ++)
{
printf ("* %d *", i);
}
}
And here's my BFS code (on CUDA):
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda.h>
#include <cutil.h>
#define GRAPHSIZE 1000000
struct Node
{
int begin; // begining of the substring
int num; // size of the sub-string
};
__global__ void BFS (Node *Va, int *Ea, bool *Fa, bool *Xa, int *Ca, bool *parada) // memory races on both Xa and Ca
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid > GRAPHSIZE)
*parada=true;
if (Fa[tid] == true && Xa[tid] == false)
{
Fa[tid] = false;
Xa[tid] = true;
//__syncthreads(); // this solves the memrace problem as long as the threads are all on the same block
for (int i = Va[tid].begin; i < (Va[tid].begin + Va[tid].num); i++) // Va begin is where it's edges' subarray begins, Va is it's number of elements
{
int nid = Ea[i];
if (Xa[nid] == false)
{
Ca[nid] = Ca[tid] + 1;
Fa[nid] = true;
*parada = true;
}
}
}
}
// The BFS frontier corresponds to all the nodes being processed at the current level.
int main()
{
// for the time couting:
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
FILE * file;
printf("\nLoading graph file...\n");
struct Node node[GRAPHSIZE];
int edgesSize = 2*GRAPHSIZE;
int edges[edgesSize];
file = fopen ("graph1M.g","rb");
printf("abriu");
fread (&edges, edgesSize * sizeof(int),1,file);
fread (&node, GRAPHSIZE * sizeof(struct Node),1,file);
fclose(file);
//For file read test propouses only:
/*
for (int i = 0; i < edgesSize ; i ++)
{
printf ("-- %d --", edges[i]);
}
for (int i = 0; i < GRAPHSIZE ; i ++)
{
printf ("* %d *", i);
}
*/
bool frontier[GRAPHSIZE]={false};
bool visited[GRAPHSIZE]={false};
int custo[GRAPHSIZE]={0};
int source=0;
frontier[source]=true;
Node* Va;
cudaMalloc((void**)&Va,sizeof(Node)*GRAPHSIZE);
cudaMemcpy(Va,node,sizeof(Node)*GRAPHSIZE,cudaMemcpyHostToDevice);
int* Ea;
cudaMalloc((void**)&Ea,sizeof(Node)*GRAPHSIZE);
cudaMemcpy(Ea,edges,sizeof(Node)*GRAPHSIZE,cudaMemcpyHostToDevice);
bool* Fa;
cudaMalloc((void**)&Fa,sizeof(bool)*GRAPHSIZE);
cudaMemcpy(Fa,frontier,sizeof(bool)*GRAPHSIZE,cudaMemcpyHostToDevice);
bool* Xa;
cudaMalloc((void**)&Xa,sizeof(bool)*GRAPHSIZE);
cudaMemcpy(Xa,visited,sizeof(bool)*GRAPHSIZE,cudaMemcpyHostToDevice);
int* Ca;
cudaMalloc((void**)&Ca,sizeof(int)*GRAPHSIZE);
cudaMemcpy(Ca,custo,sizeof(int)*GRAPHSIZE,cudaMemcpyHostToDevice);
dim3 grid(100,100,1); //blocks per grid
dim3 threads(100,1,1); // threads per block
bool para;
bool* parada;
cudaMalloc((void**)&parada,sizeof(bool));
printf("_____________________________________________\n");
int count=0;
cudaEventRecord(start, 0);
do{
count ++;
para=false;
cudaMemcpy(parada,&para,sizeof(bool),cudaMemcpyHostToDevice);
BFS <<<grid,threads,0>>>(Va,Ea,Fa,Xa,Ca,parada);
CUT_CHECK_ERROR("kernel1 execution failed");
cudaMemcpy(&para,parada,sizeof(bool),cudaMemcpyDeviceToHost);
}while(para);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
//printf("\nFinal:\n");
cudaMemcpy(custo,Ca,sizeof(int)*GRAPHSIZE,cudaMemcpyDeviceToHost);
/*
printf("\n_____________________________________________\n");
for(int i=0;i<GRAPHSIZE;i++)
printf("%d ",custo[i]);
printf("\n");
printf("_____________________________________________\n");
*/
cudaEventElapsedTime(&time, start, stop);
printf ("\nTime for the kernel: %lf s \n", time/1000);
printf ("Number of kernel calls : %d \n", count);
file = fopen ("graph125MPar","w");
for(int i=0;i<GRAPHSIZE;i++)
fprintf(file,"%d ",custo[i]);
fprintf(file,"\n");
fclose(file);
}
I'm having a segmantation fault while trying to run it for 1M+ graphs (please note that I used the changed the stack size of the sistem with the command ' ulimit -s 16384 ' on linux)
Can someone help?

Don't use statically allocated host arrays for the graph, use dynamic memory allocation instead. Your ulimit command is setting the stacksize to 16384 kb, but you require something like 5*sizeof(int) + 2*sizeof(bool) per graph entry which is probably 22 bytes per entry. It is pretty easy to see where you will run out of stack space with 1 million entries.