I need to create a program that gets a dynamic matrix and changes it to one dimension, for example 4x4 matrix will give 16 arrays length, where each index has a odd or even number, matching the index itself. The threads needs to go over the matrix at the same time and copy the odd and even numbers to the correct places in the array. The main thread needs to wait for the rest of them to finish before printing the array and every value with its respective thread. It should come out like this
We managed to fix the segmentation fault that kept happening, but now we need to set it so that each thread runs right after the other but instead each thread runs 4 times and then it switches to a different one. How can I change it so it'll run as asked?
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <pthread.h>
#include <math.h>
#define CORE 4
int N;
int odd = 1;
int even = 0;
typedef struct my_thread {
int** matrix;
int* resArray;
int threadId;
int strart_raw;
int strart_cal;
int end_raw;
int end_cal;
int counter;
} my_thread;
void* createArray(struct my_thread* thread);
void main() {
pthread_t th[CORE];
int s_r = 0, s_c, e_r, e_c;
int i, j, lines, columns, * intMatrix;
printf("Type the N for the N*N matrix:\t");
scanf("%d", &N);
int size = N * N;
int result_Array[N * N];
int retcode;
int interval = size / CORE;
int matrix_build_counter = 1;
intMatrix = (int*)malloc(N * N * sizeof(int));
for (i = 0; i < N; ++i)
{
for (j = 0; j < N; ++j)
{
intMatrix[i * N + j] = matrix_build_counter;
matrix_build_counter++;
}
}
printf("The matrix:\n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%d ", intMatrix[i * N + j]);
}
printf("\n");
}
struct my_thread thred_obj_array[CORE];
for (int i = 0; i < CORE; i++) {
thred_obj_array[i].matrix = &intMatrix;
thred_obj_array[i].resArray = result_Array;
thred_obj_array[i].threadId = i;
thred_obj_array[i].strart_raw = (int)((i * N) / CORE);
thred_obj_array[i].end_raw = (int)(((interval * (i + 1)) / N));
thred_obj_array[i].strart_cal = ((interval * i)) % N;
thred_obj_array[i].end_cal = ((interval) * (i + 1));
thred_obj_array[i].counter = (int)floor((interval)*i);
}
for (int i = 0; i < CORE; i++) {
retcode = pthread_create(&th[i], NULL, createArray, &thred_obj_array[i]);
if (retcode != 0) {
printf("Create thread failed with error %d\n", retcode);
}
}
printf("done");
for (int i = 0; i < CORE; i++) {
pthread_join(th[i], NULL);
}
printf("the result array is: ");
for (int i = 0; i < N * N; i++) {
printf("%d ", result_Array[i]);
}
}
void* createArray(struct my_thread* thread) {
int j;
for (int i = thread->strart_raw; i < N; i = i * sizeof(int) * N) {
for (j = thread->strart_cal; j < N; j++) {
printf("I am thread: %d And my value is: %d , (%d,%d)\n", thread->threadId, (*thread->matrix + i * N)[j], i, j);
if (((*thread->matrix + i * N)[j]) % 2 == 0) {
thread->resArray[even] = (*thread->matrix + i * N)[j];
even += 2;
printf("-----%d ---even---\n", even);
}
else {
thread->resArray[odd] = (*thread->matrix + i * N)[j];
odd += 2;
printf("---%d ---odd--\n", odd);
}
(thread->counter)++;
if (thread->counter == thread->end_cal) {
return;
}
}
thread->strart_cal = 0;
}
}
This program takes an integer from input and populates a double array with random values and creates threads to sort each half of the array, one thread to sort the entire array, one to sort the first half, and one to sort the second half but segfaults at pthread_create(&tid1, NULL, selectionSortFirstHalf, A_First_Half). I added multiple print statements as shown below and also used gdb to confirm where the seg fault is but I'm completely lost as to why it doesn't work for array sizes greater than 4.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>
typedef struct merge
{
double *FirstHalf;
double *SecondHalf;
double *myVal;
} MergeArray;
// Global Vars
int ArraySize;
int ArrayHalfSize;
void *mergeThread(void *args)
{
int i, j;
MergeArray *myMerge = (struct merge *)args;
for(i = 0; i < ArrayHalfSize; i++)
{
myMerge->myVal[i] = myMerge->FirstHalf[i];
}
ArraySize = ArrayHalfSize + ArrayHalfSize;
for(i = 0, j = ArrayHalfSize; j < ArraySize && i < ArrayHalfSize; i++, j++)
{
myMerge->myVal[j] = myMerge->SecondHalf[i];
}
return NULL;
}
void *selectionSortThreadB(void *args)
{
double *arr;
double *ptrArr;
arr = (double*)args;
ptrArr = (double*)malloc(ArraySize * sizeof(double));
int i;
int j;
double temp;
for(i = 1; i < ArraySize; i++)
{
temp = arr[i];
j = i - 1;
while(j >= 0 && arr[j] > temp)
{
arr[j + 1] = arr[j];
j = j - 1;
}
arr[j + 1] = temp;
}
/*for(i = 0; i < ArraySize; i++)
{
printf("SSTB: %d, %.2lf\n", i, arr[i]);
}*/
ptrArr = arr;
pthread_exit((void*)ptrArr);
}
void *selectionSortSecondHalf(void *args)
{
double *myarr, *myptrretSecondHalf;
myarr = (double *)args;
myptrretSecondHalf = (double *)malloc(ArrayHalfSize * sizeof(double));
if(myptrretSecondHalf == NULL)
{
fprintf(stderr, "Could not allocate memory in selectionSortSecondtHalf\n");
exit(-1);
}
int i, j;
int min;
double temp;
/*for(i = ArrayHalfSize; i < ArraySize; i++)
{
printf("This is the second half of the array when passed to SSSH: A_Second_Half[%d] = %.2lf\n", i, myarr[i]);
}*/
for (i = ArrayHalfSize; i < ArraySize; i++)
{
min = i;
for(j = i + 1; j < ArraySize; j++)
{
if(myarr[j] < myarr[min])
min = j;
}
temp = myarr[i];
myarr[i] = myarr[min];
myarr[min] = temp;
}
myptrretSecondHalf = myarr;
pthread_exit(myptrretSecondHalf);
}
void *selectionSortFirstHalf(void *args)
{
//printf("hello from 104\n");
double *myarr, *myptrretFirstHalf;
myarr = (double *)args;
myptrretFirstHalf = (double *)malloc(ArrayHalfSize * sizeof(double));
if(myptrretFirstHalf == NULL)
{
fprintf(stderr, "Could not allocate memory in selectionSortFirstHalf\n");
exit(-1);
}
int i, j;
int min;
double temp;
/*for(i = 0; i < ArrayHalfSize; i++)
{
printf("This is the first half of the array when passed to SSFH: A_First_Half[%d] = %.2lf\n", i, myarr[i]);
}*/
for (i = 0; i < ArrayHalfSize; i++)
{
min = i;
for(j = i + 1; j < ArrayHalfSize; j++)
{
if(myarr[j] < myarr[min])
min = j;
}
temp = myarr[i];
myarr[i] = myarr[min];
myarr[min] = temp;
}
myptrretFirstHalf = myarr;
pthread_exit(myptrretFirstHalf);
}
int main(int argc, char *argv[])
{
if(argc != 2)
{
fprintf(stderr, "ERROR: Please provide the correct number of arguments (file, size of array)\n");
exit(-1);
}
else
{
ArraySize = atoi(argv[1]);
ArrayHalfSize = (ArraySize / 2);
clock_t start, end, start2, end2;
double RandomNum;
double *ThreadBlock;
double *ThreadArrayHalf1;
double *ThreadArrayHalf2;
pthread_t tid, tid1, tid2, tid3;
double A[ArraySize];
double B[ArraySize];
//double C[ArraySize];
double *A_First_Half/*[ArrayHalfSize]*/;
double *A_Second_Half/*[ArrayHalfSize]*/;
A_First_Half = (double*)malloc(sizeof(A_First_Half)*ArrayHalfSize);
A_Second_Half = (double*)malloc(sizeof(A_Second_Half)*ArrayHalfSize);
int i;
srand(time(NULL)); // generate seed for rand nums based on time
for(i = 0; i < ArraySize; i++)
{
RandomNum = ((double) rand()*(1000.0+1.0)/(double)RAND_MAX+1.0);
printf("%.2lf\n", RandomNum);
A[i] = RandomNum;
}
for(i = 0; i < ArraySize; i++)
{
B[i] = A[i];
}
start = clock();
pthread_create(&tid, NULL, selectionSortThreadB, (void*)B);
pthread_join(tid, (void**)&ThreadBlock);
end = clock() - start;
printf("Sorting is done in %.2fms when one thread is used\n", end * 1000.0 / CLOCKS_PER_SEC);
//*******Two-Threaded Option**************//
// prints A[]
for(i = 0; i < ArraySize; i++)
{
printf("A[%d] = %.2lf\n", i, A[i]);
}
// populates first half of array A with half of A
for(i = 0; i < ArrayHalfSize; i++)
{
A_First_Half[i] = A[i];
printf("A_First_Half[%d] = %.2lf\n", i, A_First_Half[i]);
}
// populates second half of array A with second half of B
for(i = ArrayHalfSize; i < ArraySize; i++)
{
A_Second_Half[i] = A[i];
printf("A_Second_Half[%d] = %.2lf\n", i, A_Second_Half[i]);
}
printf("hello from 199\n");
start2 = clock();
printf("hello from 201\n");
pthread_create(&tid1, NULL, selectionSortFirstHalf, A_First_Half);
printf("hello from 203\n");
pthread_create(&tid2, NULL, selectionSortSecondHalf, A_Second_Half);
printf("hello from 205\n");
pthread_join(tid1, (void**)&ThreadArrayHalf1);
pthread_join(tid2, (void**)&ThreadArrayHalf2);
MergeArray threadMerge;
threadMerge.myVal = (double*)malloc(ArraySize * sizeof(double));
for(i = 0; i < ArrayHalfSize; i++)
{
printf("SSFH: %d, %.2lf\n", i, ThreadArrayHalf1[i]);
}
for(i = ArrayHalfSize; i < ArraySize; i++)
{
printf("SSSH: %d, %.2lf\n", i, ThreadArrayHalf2[i]);
}
threadMerge.FirstHalf = ThreadArrayHalf1;
threadMerge.SecondHalf = ThreadArrayHalf2;
pthread_create(&tid3, NULL, mergeThread, (void*)&threadMerge);
pthread_join(tid3, NULL);
end2 = clock() - start2;
printf("Sorting is done in %.2fms when two threads are used\n", end2 * 1000.0 / CLOCKS_PER_SEC);
//free(A_First_Half);
//free(A_Second_Half);
//free(threadMerge.myVal);
}
return 0;
}
A_First_Half = (double*)malloc(sizeof(A_First_Half)*ArrayHalfSize); allocates space based on sizeof(A_First_Half), which uses the size of the pointer A_First_Half, not the size of the object it points to, *A_First_Half.
And you do not need the parentheses for sizeof with an expression or the cast of malloc. So use A_First_Half = malloc(sizeof *A_First_Half * ArrayHalfSize);.
In several places, a “second half” array is used with indices running from ArrayHalfSize to ArraySize-1. However, they point to storage for which space for ArrayHalfSize elements has been allocated. Indices in that space run from 0 to ArrayHalfSize-1. All the code should be changed to use only indices from 0 to ArrayHalfSize-1 with these “second halves.”
Along those lines, there is no need for selectionSortFirstHalf and selectionSortSecondHalf to be separate routines. Once selectionSortSecondHalf is fixed, per above, it will do the same thing as selectionSortFirstHalf: Sort an array with ArrayHalfSize elements.
Once you have fixed those and gotten the program working, eliminate the global variables and pass all the necessary information to the threads via their argument pointer (by pointing to a structure that contains the information the thread needs).
Can't increase rows in 2d array, but columns is ok.
#include <stdio.h>
#include <stdlib.h>
it is working:
void increasecolumn(int ** mas, int* n, int m){
for (int i = 0; i < m; i++){
int* tmp = realloc(mas[i], sizeof (*mas[i]) * ((*n) + 1));
if (tmp){
mas[i] = tmp;
}
}
(*n) = (*n) + 1;
}
but increasing rows failed
void increaserow(int ** mas, int n, int* m){
int ** tmp = realloc(mas, sizeof(*mas) * ((*m) + 1));
if (tmp){
mas = tmp;
for (int i = 0; i < 1; i++){
mas[(*m) + i] = malloc(sizeof(*mas[(*m) + i]) * n);
}
}
(*m) = (*m) + 1;
}
int main(int argc, char * argv[]) {
int n = 3; // columns
int m = 2; // rows
int** mas = malloc(m*sizeof(*mas));
for(int i = 0; i < m; i++){
mas[i] = malloc(n*sizeof(*(mas[i])));
}
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
mas[i][j] = 0;
printf("%d ", mas[i][j]);
}
printf("\n");
}
printf("\n");
increasecolumn(mas, &n, m);
for (int i = 0; i < m; i++){
mas[i][n-1] = 1;
}
increaserow(mas, n, &m); // problem is here
for (int j = 0; j < n; j++){
mas[m-1][j] = 0;
}
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
printf("%d ", mas[i][j]);
}
printf("\n");
}
system("pause");
return 0;
}
I use this answer Resizing 2D Arrays in C like an example, something wrong.
The GNU Project Debugger on Windows:
warning: FTH: (9152): * Fault tolerant heap shim applied to current process. This is usually due to previous crashes. *
0 0 0
0 0 0
Program received signal SIGSEGV, Segmentation fault.
0x0000000000401821 in main (argc=1, argv=0x7f1990) at D:\III Курс! II СЕМЕСТР\МатМодДослОп\stud\Untitled2.c:47
47: mas[m-1][j] = 0;
#include <stdio.h>
#include <pthread.h>
int arr[1000][1000];
int brr[1000][1000];
int h;
int f;
void *BMM(void *arg)
{
int* neo = (int*) arg;
int ne = *neo;
int sum = 0;
for(int i = 0; i < n; ++i)
{
sum += arr[x][i]*brr[x][f];
++f;
}
printf("%d\n", sum);
crr[x][h] = sum;
pthread_exit(NULL);
}
int main()
{
pthread_t* ar = malloc(3*sizeof(*ar));
printf("Enter the value of m and n\n");
scanf("%d %d",&m,&n);
for(int i = 0; i < m; ++i)
{
for(int j = 0; j < n; ++j)
{
scanf("%d",&arr[i][j]);
}
}
printf("Enter the value of p and q\n");
scanf("%d %d",&p,&q);
if(p != n)
{
printf("The matrix multiplication is not possible\n");
return 0;
}
int* id;
id = (int *)malloc(4*sizeof(int));
for(int i = 0; i < p; ++i)
{
for(int j = 0; j < q; ++j)
{
scanf("%d",&brr[i][j]);
}
}
for(x = 0; x < m; ++x)
{
for(z = 0; z < q; z+=4)
{
f = z;
h = z;
for(int k = 0; k < 3; ++k)
{
pthread_create(&ar[k],NULL,BMM,NULL);
}
for(int k = 0; k < 3; ++k)
{
pthread_join(ar[k],NULL);
}
}
}
for (int i = 0; i < m; ++i)
{
for(int j = 0; j < q; ++j)
{
printf("%d ",crr[i][j]);
}
printf("\n");
}
}
The above program is supposed to multiply two matrix by multiplying row one of matrix by all the columns of other matrix using 3 threads and then row two by all the other columns and so on and then store the respective values int another matrix but it is giving segmentation fault. Where am I going wrong?
I think your problem is here:
pthread_create(&ar[k],NULL,BMM,NULL);
^^^^
void *arg is NULL
and then:
void *BMM(void *arg)
{
int* neo = (int*) arg;
int ne = *neo; // Dereference NULL --> segmentation fault
Further this looks strange:
void *BMM(void *arg)
{
int* neo = (int*) arg;
int ne = *neo; // ne is never used !!
int sum = 0;
for(int i = 0; i < n; ++i) // Where does n come from ?
Perhaps it should be n instead of ne?
If n, x, f and h are global variables you are into trouble as all threads will work on the same variables. That would be real bad. Each thread needs it own variables.
BTW:
Always check the value returned by scanf - something like:
if (scanf("%d %d",&m,&n) != 2)
{
// Add error handling here
}
and
if (scanf("%d",&arr[i][j]) != 1)
{
// Add error handling here
}
I have a written a program that performs Gaussian elimination in C and returns the L2 norm of a matrix. The program is called like ./exec n k where n is the size of a n by n matrix and k is the number of threads that will be used to do the program (max 4). I allocate space for a n by n+1 matrix because having an augmented matrix is part of the gaussian elimination.
It works perfectly in OpenMP. As seen in the code below, I only have 1 parallel for. My goal now is to make that parallel for loop run concurrently using Pthreads instead of OpenMP. I made the for loop that be parallelized into a separate function and create pthreads to deal with it. My guess is that the pthreads are not each doing a different part of the loop (basically a different iteration of j), but instead the 4 Pthreads are just running the entire loops. I run the program like ./gauss 30 4 and it sometimes work and sometimes segfaults, although when it does work the L2 norm is not 0 (L2 will return 0 if program worked perfectly), so something is obviously up with the threading section. When I run it through GDB it segfaults at a loop for some reason but this same loop runs perfectly in OpenMP...can someone help me out
GDB
http://i.stack.imgur.com/V99yt.png
OpenMP Code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y,z;
int bvectcount = 0;
struct timeval start, end;
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double sum_squares = (double) 0;
for (i=0; i < matrixSize; i++) {
sum_squares += res[i] * res[i];
}
return sqrt(sum_squares);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
int chunk = matrixSize/threadNum;
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++){
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
//calculating triangular matrix
//threading needs to be done HERE
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
double temp2;
#pragma omp parallel default(none) num_threads(threadNum) shared(a,i,matrixSize,vect) private(j,z,ratio,temp2)
{
#pragma omp for schedule(static)
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
}
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
Pthreads code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y,z;
int bvectcount = 0;
int threadcount;
pthread_t workerThreads[4];
typedef struct threader {
int counter;
int matrixl;
} threader;
struct timeval start, end;
void *retval;
int checkargs(int argc, char* argv[]);
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double squaresum = (double) 0;
for (i=0; i < matrixSize; i++) {
squaresum += res[i] * res[i];
}
return sqrt(squaresum);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
void *parallelstuff(void *args){
threader temp = *((threader *)args);
int i, matrixSize;
i = temp.counter;
matrixSize = temp.matrixl;
double temp2;
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++)
{
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
threader stuff;
stuff.counter = i;
stuff.matrixl = matrixSize;
//MAKE EACH THREAD DO SOMETHING DIFF
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
while(threadcount != 0){
if(pthread_join (workerThreads[threadcount-1], &retval ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
threadcount--;
}
//create matrix of n size
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
}
note that j , z should be declared as local (private) variables in each thread.
in OpenMP Code , you closed the brace of for loop in line 100 :
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
.......
......
.....
} //line 100
but in pthreads code, you closed it in line 149, so the full code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y; //z?
int bvectcount = 0;
int threadcount;
pthread_t workerThreads[4];
typedef struct threader {
int counter;
int matrixl;
} threader;
struct timeval start, end;
void *retval;
int checkargs(int argc, char* argv[]);
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double squaresum = (double) 0;
for (i=0; i < matrixSize; i++) {
squaresum += res[i] * res[i];
}
return sqrt(squaresum);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
void *parallelstuff(void *args){
threader temp = *((threader *)args);
int i, matrixSize;
i = temp.counter;
matrixSize = temp.matrixl;
//printf("matrixSize=%d counter=%d\n" , matrixSize ,temp.counter );
double temp2;
int j , z; //houssam
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++)
{
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
threader stuff;
stuff.counter = i;
stuff.matrixl = matrixSize;
//printf("i=%d\n" , i);
//MAKE EACH THREAD DO SOMETHING DIFF
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
while(threadcount != 0){
if(pthread_join (workerThreads[threadcount-1], &retval ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
threadcount--;
}
}
//create matrix of n size
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
The two codes as written are not equivalent. Observe the OpenMP code:
#pragma omp for schedule(static)
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
The combined parallel for construct in OpenMP is a worksharing construct, i.e. it distributes the iterations on the following loop among the threads in the team. Given the schedule(static) clause, the iteration space is split into #threads blocks and each block is assigned to a different thread.
Your Pthreads code does not share the work:
i = temp.counter;
matrixSize = temp.matrixl;
...
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
Given that the same stuff object is passed to all threads, they all receive the same value of i and matrixSize and loop over the whole iteration space, therefore the wrong results.
What you have to do is simulate what #pragma omp for schedule(static) does, namely make each thread do only some of the matrixSize - (i+1) + 1 iterations. You should pass each thread a unique data object that contains the starting and the ending iteration:
typedef struct threader {
int start;
int end;
int i;
int matrixSize;
} threader;
...
void *parallelstuff(void *args){
threader *temp = (threader *)args;
int start, end, i, matrixSize;
start = temp->start;
end = temp->end;
i = temp->i;
matrixSize = temp->matrixSize;
double temp2;
int j , z; //houssam
for(j = start + 1; j<end; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
...
threader stuff[threadNum];
//MAKE EACH THREAD DO SOMETHING DIFF
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
stuff[threadcount].start = i + threadcount*(matrixSize / threadNum);
stuff[threadcount].end = i + (threadcount+1)*(matrixSize / threadNum);
stuff[threadcount].i = i;
stuff[threadcount].matrixSize = matrixSize;
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
In theory, you could also let each thread know how many threads are out there and let it compute the iteration range itself, but that is complicated by the fact that Pthreads API lacks the equivalent of omp_get_thread_num(). There is an advanced trick that employs aligned memory allocation and the numeric thread ID encoded in the last bits of the pointer passed.