I am trying to run a program I have written for multiplying to square NxN matrices. However, I am getting a segmentation fault error. I have working code for the program without threading. But I have been unsuccessful in adapting my code for multiple threads.
I am attempting to run the code on a raspberry pi 4. The debugger states that the following line is where I receive the error signal SIGSEGV:
args.A[i][j] = rand() % 100;
I have tried putting printf statements around the sections of code where I allocate memory, but they were never run, so I am assuming that seg faults happen before any of the code is actually ran. I did some research on the internet in regards to solving seg faults, and that is when I tried using the debugger, but I do not understand why it is having a problem with setting the matrix elements. Especially since my previous unthreaded program has the same line of code and runs without any errors.
Feedback would be greatly appreciated.
The following is my code:
/* Program must be passed exactly one integer that satisfies the following condition:
* N % n = 0, where N is the square matrices' dimensions and n is the number of threads.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>
#define N 2000
typedef struct __myarg_t
{
FILE *Aptr, *Bptr, *Cptr; // Files containing the matrices
int **A, **B, **C, **T; // Matrices A , B, resultant and transpose of B
int rows; // Number of rows each thread computes
int cur; // Current thread number
} myarg_t;
void *mythread(void *arg)
{
myarg_t *m = (myarg_t *) arg;
int start = m->cur++ * m->rows;
int end = start + m->rows;
// Matrix Multiplication for rows start:(end - 1)
for (int i = start; i < end; i++)
{
for (int j = start; j < end; j++)
{
int num = 0;
for (int k = 0; k < N; k++)
{
num += m->A[i][k] * m->T[j][k];
}
m->C[i][j] = num;
}
}
return NULL;
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "usage: main-first <#ofthreads>\n");
exit(1);
}
pthread_t *thread;
clock_t tic, toc;
myarg_t args;
int rc, n;
args.cur = 0;
args.rows = N/n;
n = atoi(argv[1]);
args.Aptr = fopen("A_multi.txt", "w");
args.Bptr = fopen("B_multi.txt", "w");
args.Cptr = fopen("C_multi.txt", "w");
args.A = (int**)malloc(N * sizeof(int*));
args.B = (int**)malloc(N * sizeof(int*));
args.C = (int**)malloc(N * sizeof(int*));
args.T = (int**)malloc(N * sizeof(int*));
thread = (pthread_t *)malloc(n * sizeof(pthread_t));
// Dynamically allocate memory for 2D Array
for (int i = 0; i < N; i++)
{
args.A[i] = (int*)malloc(N * sizeof(int*));
args.B[i] = (int*)malloc(N * sizeof(int*));
args.C[i] = (int*)malloc(N * sizeof(int*));
args.T[i] = (int*)malloc(N * sizeof(int*));
}
// Assign values to the elements of the Matrices
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; i++)
{
args.A[i][j] = rand() % 100;
args.B[i][j] = rand() % 100;
args.T[j][i] = args.B[i][j];
}
}
tic = clock();
// Create threads
for (int i = 0; i < n; i++)
{
rc = pthread_create(&thread[i], NULL, mythread, &args);
if (rc != 0)
{
printf("pthread_create failed with thread %d.\n", i);
exit(1);
}
}
// Wait for threads to complete
for (int i = 0; i < n; i++)
{
rc = pthread_join(thread[i], NULL);
if (rc != 0)
{
printf("ptphread_join failed with thread %d.\n", i);
exit(1);
}
}
toc = clock();
printf("Elapsed: %f seconds\n", (double)(toc - tic) / CLOCKS_PER_SEC);
// Write matrices to their output files
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N; j++)
{
fprintf(args.Aptr, "%d ", args.A[i][j]);
fprintf(args.Bptr, "%d ", args.B[i][j]);
fprintf(args.Cptr, "%d ", args.C[i][j]);
}
fprintf(args.Aptr, "\n");
fprintf(args.Bptr, "\n");
fprintf(args.Cptr, "\n");
}
// Deallocate memory
for (int i = 0; i < N; i++)
{
free(args.A[i]);
free(args.B[i]);
free(args.C[i]);
free(args.T[i]);
}
free(args.A);
free(args.B);
free(args.C);
free(args.T);
fclose(args.Aptr);
fclose(args.Bptr);
fclose(args.Cptr);
return 0;
}
Change:
int rc, n;
...
args.rows = N/n;
n = atoi(argv[1]);
to:
int rc;
...
int n = atoi(argv[1]);
if(!n) {
// atoi() will return for "0" or error
}
args.rows = N/n;
The 2nd loop after the "Assign values" comment probably increments the wrong variable i but should be j. Otherwise i will be 2 * (N-1) which will overflow the arrays A, B and T which are of has N elements. This will cause your segfault.
Related
So, I was trying to write a program to do matrix multiplication using multiple threads and then plot a graph between the time taken and the number of threads used.
I used the following approach:
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
pthread_mutex_t lock;
#define M 200
#define N 300
#define P 400
#define X 2 // Number of Threads
#define RED "\x1b[31m"
#define GREEN "\x1b[32m"
int A[M][N], B[N][P], C[M][P], D[M][P];
int row = 0;
void *matrixMulti(void *arg)
{
pthread_mutex_lock(&lock);
int i = row++;
for (int j = 0; j < P; j++)
{
C[i][j] = 0;
for (int k = 0; k < N; k++)
{
C[i][j] += A[i][k] * B[k][j];
}
}
pthread_exit(NULL);
pthread_mutex_unlock(&lock);
}
void matrixMultiplicationWithoutThreading();
void matrixMultiplicationWithThreading();
void verifyIfBothMatrixAreSame();
int main()
{
int m, n, p;
// A: m*n Matrix, B: n*p Matrix
for (int i = 0; i < M; i++)
for (int j = 0; j < N; j++)
A[i][j] = rand() % 10;
// scanf("%d", &A[i][j]);
for (int i = 0; i < N; i++)
for (int j = 0; j < P; j++)
B[i][j] = rand() % 10;
// scanf("%d", &B[i][j]);
struct timeval start, end;
gettimeofday(&start, NULL);
matrixMultiplicationWithoutThreading();
gettimeofday(&end, NULL);
double time = (end.tv_sec - start.tv_sec) * 1e6;
time = (time + end.tv_usec - start.tv_usec) * 1e-6;
printf("The time taken by simple matrix calculation without threding is %0.6f\n", time);
struct timeval start_th, end_th;
gettimeofday(&start_th, NULL);
matrixMultiplicationWithThreading();
gettimeofday(&end_th, NULL);
time = (end_th.tv_sec - start_th.tv_sec) * 1e6;
time = (time + end_th.tv_usec - start_th.tv_usec) * 1e-6;
printf("The time taken by using the Threading Method with %d threads is %0.6f\n", X, time);
verifyIfBothMatrixAreSame();
}
void matrixMultiplicationWithThreading()
{
pthread_t threads[X];
for (int i = 0; i < X; i++)
{
threads[i] = (pthread_t)-1;
}
// Computation Started:
for (int i = 0; i < M; i++)
{
// At any moment only X threads at max are working
if (threads[i] == (pthread_t)-1)
pthread_create(&threads[i % X], NULL, matrixMulti, NULL);
else
{
pthread_join(threads[i % X], NULL);
pthread_create(&threads[i % X], NULL, matrixMulti, NULL);
}
}
for (int i = 0; i < X; i++)
pthread_join(threads[i], NULL);
// Computation Done:
}
void matrixMultiplicationWithoutThreading()
{
// Computation Started:
for (int i = 0; i < M; i++)
for (int j = 0; j < P; j++)
{
D[i][j] = 0;
for (int k = 0; k < N; k++)
D[i][j] += A[i][k] * B[k][j];
}
// Computation Done:
}
void verifyIfBothMatrixAreSame()
{
for (int i = 0; i < M; i++)
for (int j = 0; j < P; j++)
{
if (C[i][j] != D[i][j])
{
printf(RED "\nMatrix's are not equal something wrong with the computation\n");
return;
}
}
printf(GREEN "\nBoth Matrixes are equal thus verifying the computation\n");
}
Now, this code works sometimes, and sometimes it doesn't, like the result does not match the actual result. Similarly, this code gives a segmentation fault in one of the Linux virtual machines. Also, even when it works correctly, it doesn't give the asymptotically decreasing graph. Instead, the time is almost constant with arbitrary variations with the thread number.
Can someone help with this, like why this is happening? I found multiple solutions to this problem on the internet; some of them don't work (rarely but it happens), but I haven't seen my approach yet; it might be an issue I think. So, can anyone comment on using pthread_create(&threads[i % X], NULL, matrixMulti, NULL), like why this is not a good idea?
EDITED:
I have tried taking the suggestion and optimising the code, I have not done the Matrix multiplication efficient method, as we were asked to do the O(n^3) method, but I have tried doing the threading correctly. Is this correct?
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <math.h>
#define M 2
#define N 2
#define P 2
#define X 40 // Number of Threads
#define RED "\x1b[31m"
#define GREEN "\x1b[32m"
int t = 0; // Computation done by the first usedXFullthreads
int usedXFull = 0;
int A[M][N], B[N][P], C[M][P], D[M][P];
int row = 0;
void *matrixMulti(void *arg)
{
int* l = (int *)arg;
int n = *l;
int i = 0, j = 0, k = 0, comp = 0;
if (n <= usedXFull)
{
i = n * t / (N * P);
j = (n * t - N * P * i) / N;
k = n * t - N * P * i - N * j;
if (n == usedXFull)
comp = M * N * P - usedXFull * t;
else
comp = t;
}
while (comp)
{
if (i == M)
printf(RED "Some fault in the code\n\n");
C[i][j] += A[i][k] * B[k][j];
comp--;
k++;
if (k == N)
{
j++;
if (j == P)
{
i++;
j = 0;
}
k = 0;
}
}
return NULL;
}
void matrixMultiplicationWithoutThreading();
void matrixMultiplicationWithThreading();
void verifyIfBothMatrixAreSame();
int main()
{
int m, n, p;
// A: m*n Matrix, B: n*p Matrix
for (int i = 0; i < M; i++)
for (int j = 0; j < N; j++)
A[i][j] = rand() % 10;
// scanf("%d", &A[i][j]);
for (int i = 0; i < N; i++)
for (int j = 0; j < P; j++)
B[i][j] = rand() % 10;
// scanf("%d", &B[i][j]);
for (int i = 0; i < M; i++)
for (int j = 0; j < P; j++)
C[i][j] = 0;
struct timeval start, end;
gettimeofday(&start, NULL);
matrixMultiplicationWithoutThreading();
gettimeofday(&end, NULL);
double time = (end.tv_sec - start.tv_sec) * 1e6;
time = (time + end.tv_usec - start.tv_usec) * 1e-6;
printf("The time taken by simple matrix calculation without threding is %0.6f\n", time);
struct timeval start_th, end_th;
gettimeofday(&start_th, NULL);
matrixMultiplicationWithThreading();
gettimeofday(&end_th, NULL);
time = (end_th.tv_sec - start_th.tv_sec) * 1e6;
time = (time + end_th.tv_usec - start_th.tv_usec) * 1e-6;
printf("The time taken by using the Threading Method with %d threads is %0.6f\n", X, time);
verifyIfBothMatrixAreSame();
}
void matrixMultiplicationWithThreading()
{
int totalComp = M * N * P; // Total Computation
t = ceil((double)totalComp / (double)X);
usedXFull = totalComp / t;
int computationByLastUsedThread = totalComp - t * usedXFull;
int computationIndex[X];
pthread_t threads[X];
// Computation Started:
for (int i = 0; i < X; i++)
{
computationIndex[i] = i;
int rc = pthread_create(&threads[i], NULL, matrixMulti, (void *)&computationIndex[i]);
if (rc)
{
printf(RED "ERROR; return code from pthread_create() is %d\n", rc);
exit(-1);
}
}
for (int i = 0; i < X; i++)
pthread_join(threads[i], NULL);
// Computation Done:
}
void matrixMultiplicationWithoutThreading()
{
// Computation Started:
for (int i = 0; i < M; i++)
for (int j = 0; j < P; j++)
{
D[i][j] = 0;
for (int k = 0; k < N; k++)
D[i][j] += A[i][k] * B[k][j];
}
// Computation Done:
}
void verifyIfBothMatrixAreSame()
{
for (int i = 0; i < M; i++)
for (int j = 0; j < P; j++)
{
if (C[i][j] != D[i][j])
{
printf(RED "\nMatrix's are not equal something wrong with the computation\n");
return;
}
}
printf(GREEN "\nBoth Matrixes are equal thus verifying the computation\n");
}
There are many issues in the code. Here are some points:
lock is not initialized with pthread_mutex_init which is required (nor freed).
There is no need for locks in a matrix multiplication: work sharing should be preferred instead (especially since the current lock make your code run fully serially).
Using pthread_exit is generally rather a bad idea, at least it is here. Consider just returning NULL. Besides, returning something is mandatory in matrixMulti. Please enable compiler warnings so to detect such a thing.
There is an out of bound of threads[i] in the 0..M based loop.
There is no need to create M threads. You can create 2 threads and divide the work in 2 even parts along the M-based dimension. Creating M threads while allowing only 2 threads to run simultaneously just add more overhead for no reason (it takes time for thread to be created and scheduled by the OS).
It is generally better to dynamically allocate large arrays than using static global C arrays.
It is better to avoid global variables and use the arg parameter so to get thread-specific data.
To design a fast matrix multiplication, please consider reading this article. For example, the ijk loop nest is very inefficient and should really not be used for sake of performance (not efficient in cache). Besides, note you can use a BLAS library for that (they are highly optimized and easy to use) though I guess this is a homework. Additionally, note that you can use OpenMP instead of pthread so to make the code shorter and easy to read.
I need to create a program that gets a dynamic matrix and changes it to one dimension, for example 4x4 matrix will give 16 arrays length, where each index has a odd or even number, matching the index itself. The threads needs to go over the matrix at the same time and copy the odd and even numbers to the correct places in the array. The main thread needs to wait for the rest of them to finish before printing the array and every value with its respective thread. It should come out like this
We managed to fix the segmentation fault that kept happening, but now we need to set it so that each thread runs right after the other but instead each thread runs 4 times and then it switches to a different one. How can I change it so it'll run as asked?
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <pthread.h>
#include <math.h>
#define CORE 4
int N;
int odd = 1;
int even = 0;
typedef struct my_thread {
int** matrix;
int* resArray;
int threadId;
int strart_raw;
int strart_cal;
int end_raw;
int end_cal;
int counter;
} my_thread;
void* createArray(struct my_thread* thread);
void main() {
pthread_t th[CORE];
int s_r = 0, s_c, e_r, e_c;
int i, j, lines, columns, * intMatrix;
printf("Type the N for the N*N matrix:\t");
scanf("%d", &N);
int size = N * N;
int result_Array[N * N];
int retcode;
int interval = size / CORE;
int matrix_build_counter = 1;
intMatrix = (int*)malloc(N * N * sizeof(int));
for (i = 0; i < N; ++i)
{
for (j = 0; j < N; ++j)
{
intMatrix[i * N + j] = matrix_build_counter;
matrix_build_counter++;
}
}
printf("The matrix:\n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf("%d ", intMatrix[i * N + j]);
}
printf("\n");
}
struct my_thread thred_obj_array[CORE];
for (int i = 0; i < CORE; i++) {
thred_obj_array[i].matrix = &intMatrix;
thred_obj_array[i].resArray = result_Array;
thred_obj_array[i].threadId = i;
thred_obj_array[i].strart_raw = (int)((i * N) / CORE);
thred_obj_array[i].end_raw = (int)(((interval * (i + 1)) / N));
thred_obj_array[i].strart_cal = ((interval * i)) % N;
thred_obj_array[i].end_cal = ((interval) * (i + 1));
thred_obj_array[i].counter = (int)floor((interval)*i);
}
for (int i = 0; i < CORE; i++) {
retcode = pthread_create(&th[i], NULL, createArray, &thred_obj_array[i]);
if (retcode != 0) {
printf("Create thread failed with error %d\n", retcode);
}
}
printf("done");
for (int i = 0; i < CORE; i++) {
pthread_join(th[i], NULL);
}
printf("the result array is: ");
for (int i = 0; i < N * N; i++) {
printf("%d ", result_Array[i]);
}
}
void* createArray(struct my_thread* thread) {
int j;
for (int i = thread->strart_raw; i < N; i = i * sizeof(int) * N) {
for (j = thread->strart_cal; j < N; j++) {
printf("I am thread: %d And my value is: %d , (%d,%d)\n", thread->threadId, (*thread->matrix + i * N)[j], i, j);
if (((*thread->matrix + i * N)[j]) % 2 == 0) {
thread->resArray[even] = (*thread->matrix + i * N)[j];
even += 2;
printf("-----%d ---even---\n", even);
}
else {
thread->resArray[odd] = (*thread->matrix + i * N)[j];
odd += 2;
printf("---%d ---odd--\n", odd);
}
(thread->counter)++;
if (thread->counter == thread->end_cal) {
return;
}
}
thread->strart_cal = 0;
}
}
This program takes an integer from input and populates a double array with random values and creates threads to sort each half of the array, one thread to sort the entire array, one to sort the first half, and one to sort the second half but segfaults at pthread_create(&tid1, NULL, selectionSortFirstHalf, A_First_Half). I added multiple print statements as shown below and also used gdb to confirm where the seg fault is but I'm completely lost as to why it doesn't work for array sizes greater than 4.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>
typedef struct merge
{
double *FirstHalf;
double *SecondHalf;
double *myVal;
} MergeArray;
// Global Vars
int ArraySize;
int ArrayHalfSize;
void *mergeThread(void *args)
{
int i, j;
MergeArray *myMerge = (struct merge *)args;
for(i = 0; i < ArrayHalfSize; i++)
{
myMerge->myVal[i] = myMerge->FirstHalf[i];
}
ArraySize = ArrayHalfSize + ArrayHalfSize;
for(i = 0, j = ArrayHalfSize; j < ArraySize && i < ArrayHalfSize; i++, j++)
{
myMerge->myVal[j] = myMerge->SecondHalf[i];
}
return NULL;
}
void *selectionSortThreadB(void *args)
{
double *arr;
double *ptrArr;
arr = (double*)args;
ptrArr = (double*)malloc(ArraySize * sizeof(double));
int i;
int j;
double temp;
for(i = 1; i < ArraySize; i++)
{
temp = arr[i];
j = i - 1;
while(j >= 0 && arr[j] > temp)
{
arr[j + 1] = arr[j];
j = j - 1;
}
arr[j + 1] = temp;
}
/*for(i = 0; i < ArraySize; i++)
{
printf("SSTB: %d, %.2lf\n", i, arr[i]);
}*/
ptrArr = arr;
pthread_exit((void*)ptrArr);
}
void *selectionSortSecondHalf(void *args)
{
double *myarr, *myptrretSecondHalf;
myarr = (double *)args;
myptrretSecondHalf = (double *)malloc(ArrayHalfSize * sizeof(double));
if(myptrretSecondHalf == NULL)
{
fprintf(stderr, "Could not allocate memory in selectionSortSecondtHalf\n");
exit(-1);
}
int i, j;
int min;
double temp;
/*for(i = ArrayHalfSize; i < ArraySize; i++)
{
printf("This is the second half of the array when passed to SSSH: A_Second_Half[%d] = %.2lf\n", i, myarr[i]);
}*/
for (i = ArrayHalfSize; i < ArraySize; i++)
{
min = i;
for(j = i + 1; j < ArraySize; j++)
{
if(myarr[j] < myarr[min])
min = j;
}
temp = myarr[i];
myarr[i] = myarr[min];
myarr[min] = temp;
}
myptrretSecondHalf = myarr;
pthread_exit(myptrretSecondHalf);
}
void *selectionSortFirstHalf(void *args)
{
//printf("hello from 104\n");
double *myarr, *myptrretFirstHalf;
myarr = (double *)args;
myptrretFirstHalf = (double *)malloc(ArrayHalfSize * sizeof(double));
if(myptrretFirstHalf == NULL)
{
fprintf(stderr, "Could not allocate memory in selectionSortFirstHalf\n");
exit(-1);
}
int i, j;
int min;
double temp;
/*for(i = 0; i < ArrayHalfSize; i++)
{
printf("This is the first half of the array when passed to SSFH: A_First_Half[%d] = %.2lf\n", i, myarr[i]);
}*/
for (i = 0; i < ArrayHalfSize; i++)
{
min = i;
for(j = i + 1; j < ArrayHalfSize; j++)
{
if(myarr[j] < myarr[min])
min = j;
}
temp = myarr[i];
myarr[i] = myarr[min];
myarr[min] = temp;
}
myptrretFirstHalf = myarr;
pthread_exit(myptrretFirstHalf);
}
int main(int argc, char *argv[])
{
if(argc != 2)
{
fprintf(stderr, "ERROR: Please provide the correct number of arguments (file, size of array)\n");
exit(-1);
}
else
{
ArraySize = atoi(argv[1]);
ArrayHalfSize = (ArraySize / 2);
clock_t start, end, start2, end2;
double RandomNum;
double *ThreadBlock;
double *ThreadArrayHalf1;
double *ThreadArrayHalf2;
pthread_t tid, tid1, tid2, tid3;
double A[ArraySize];
double B[ArraySize];
//double C[ArraySize];
double *A_First_Half/*[ArrayHalfSize]*/;
double *A_Second_Half/*[ArrayHalfSize]*/;
A_First_Half = (double*)malloc(sizeof(A_First_Half)*ArrayHalfSize);
A_Second_Half = (double*)malloc(sizeof(A_Second_Half)*ArrayHalfSize);
int i;
srand(time(NULL)); // generate seed for rand nums based on time
for(i = 0; i < ArraySize; i++)
{
RandomNum = ((double) rand()*(1000.0+1.0)/(double)RAND_MAX+1.0);
printf("%.2lf\n", RandomNum);
A[i] = RandomNum;
}
for(i = 0; i < ArraySize; i++)
{
B[i] = A[i];
}
start = clock();
pthread_create(&tid, NULL, selectionSortThreadB, (void*)B);
pthread_join(tid, (void**)&ThreadBlock);
end = clock() - start;
printf("Sorting is done in %.2fms when one thread is used\n", end * 1000.0 / CLOCKS_PER_SEC);
//*******Two-Threaded Option**************//
// prints A[]
for(i = 0; i < ArraySize; i++)
{
printf("A[%d] = %.2lf\n", i, A[i]);
}
// populates first half of array A with half of A
for(i = 0; i < ArrayHalfSize; i++)
{
A_First_Half[i] = A[i];
printf("A_First_Half[%d] = %.2lf\n", i, A_First_Half[i]);
}
// populates second half of array A with second half of B
for(i = ArrayHalfSize; i < ArraySize; i++)
{
A_Second_Half[i] = A[i];
printf("A_Second_Half[%d] = %.2lf\n", i, A_Second_Half[i]);
}
printf("hello from 199\n");
start2 = clock();
printf("hello from 201\n");
pthread_create(&tid1, NULL, selectionSortFirstHalf, A_First_Half);
printf("hello from 203\n");
pthread_create(&tid2, NULL, selectionSortSecondHalf, A_Second_Half);
printf("hello from 205\n");
pthread_join(tid1, (void**)&ThreadArrayHalf1);
pthread_join(tid2, (void**)&ThreadArrayHalf2);
MergeArray threadMerge;
threadMerge.myVal = (double*)malloc(ArraySize * sizeof(double));
for(i = 0; i < ArrayHalfSize; i++)
{
printf("SSFH: %d, %.2lf\n", i, ThreadArrayHalf1[i]);
}
for(i = ArrayHalfSize; i < ArraySize; i++)
{
printf("SSSH: %d, %.2lf\n", i, ThreadArrayHalf2[i]);
}
threadMerge.FirstHalf = ThreadArrayHalf1;
threadMerge.SecondHalf = ThreadArrayHalf2;
pthread_create(&tid3, NULL, mergeThread, (void*)&threadMerge);
pthread_join(tid3, NULL);
end2 = clock() - start2;
printf("Sorting is done in %.2fms when two threads are used\n", end2 * 1000.0 / CLOCKS_PER_SEC);
//free(A_First_Half);
//free(A_Second_Half);
//free(threadMerge.myVal);
}
return 0;
}
A_First_Half = (double*)malloc(sizeof(A_First_Half)*ArrayHalfSize); allocates space based on sizeof(A_First_Half), which uses the size of the pointer A_First_Half, not the size of the object it points to, *A_First_Half.
And you do not need the parentheses for sizeof with an expression or the cast of malloc. So use A_First_Half = malloc(sizeof *A_First_Half * ArrayHalfSize);.
In several places, a “second half” array is used with indices running from ArrayHalfSize to ArraySize-1. However, they point to storage for which space for ArrayHalfSize elements has been allocated. Indices in that space run from 0 to ArrayHalfSize-1. All the code should be changed to use only indices from 0 to ArrayHalfSize-1 with these “second halves.”
Along those lines, there is no need for selectionSortFirstHalf and selectionSortSecondHalf to be separate routines. Once selectionSortSecondHalf is fixed, per above, it will do the same thing as selectionSortFirstHalf: Sort an array with ArrayHalfSize elements.
Once you have fixed those and gotten the program working, eliminate the global variables and pass all the necessary information to the threads via their argument pointer (by pointing to a structure that contains the information the thread needs).
So, this is my program that calculates matrix determinant using system calls, not good at all, but, the trouble is that when i put in a number bigger than 8 for dimension of matrix, it crashes somehow and i can't figure why it keeps happening. Please, give me some ideas.
The task was to calculate determinant using multithreading. Maybe, the problem is that I exceed max threads? valgrind says that
Use --max-threads=INT to specify a larger number of threads
and rerun valgrind
valgrind: the 'impossible' happened:
Max number of threads is too low
compile it with gcc -g -pthread
#include <stdlib.h>
#include <pthread.h>
#include <math.h>
#include <time.h>
#include <malloc.h>
pthread_mutex_t mutex;
typedef struct {
int **matrix;
int size;
} T_MS;
void* determinant(void *npt) {
T_MS* tmp = (T_MS*) npt;
int i,j;
double det = 0;
pthread_t *array = malloc(sizeof(pthread_t) * tmp->size);
T_MS *mtarr = malloc(sizeof(T_MS) * tmp->size);
if (tmp->size == 1) {
det = tmp->matrix[0][0];
} else if (tmp->size == 2) {
det = tmp->matrix[0][0] * tmp->matrix[1][1] - tmp->matrix[0][1] * tmp->matrix[1][0];
} else {
for (i = 0; i < tmp->size; ++i) {
mtarr[i].matrix = (int **)malloc(sizeof(int *) * tmp->size);
mtarr[i].size = tmp->size - 1;
for (j = 0; j < tmp->size - 1; ++j) {
if (j < i)
mtarr[i].matrix[j] = tmp->matrix[j];
else
mtarr[i].matrix[j] = tmp->matrix[j + 1];
}
pthread_create(&array[i], NULL, determinant, mtarr + i);
}
for (i = 0; i < tmp->size; ++i) {
void *res;
for (j = 0; j < tmp->size - 1; ++j) {
}
pthread_join(array[i], &res);
double x = *(double *)&res;
det += (-1 + 2 * !(i % 2)) * x * tmp->matrix[i][tmp->size - 1];
double answer = *(double*)&det;
free(mtarr[i].matrix);
}
}
free(mtarr);
free(array);
void* ans = *(void **)&det;
return ans;
}
int main(int argc, char const *argv[]) {
srand(time(NULL));
int **matrix;
int n = 0;
int a;
pthread_t tid;
pthread_attr_t attr;
pthread_attr_init(&attr);
printf("Insert the demention of matrix:\n");
scanf("%d", &n);
matrix = (int**)malloc(n * sizeof(int*));
for (int i=0; i<n; ++i)
matrix[i] = (int*)malloc(n * sizeof(int));
printf("Insert matrix:\n");
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
matrix[i][j]=rand()%15;
//matrix[i][j] = i;
}
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
printf("%d ", matrix[i][j]);
}
printf("\n");
}
T_MS* npt = (T_MS*)malloc(sizeof(T_MS));
npt->matrix = matrix;
npt->size = n;
void *det;
pthread_mutex_init(&mutex, NULL);
pthread_create(&tid, NULL, determinant, npt);
pthread_join(tid, &det);
double answer = *(double*)&det;
printf("Det is: %f\n", answer);
for (int i = 0; i < n; ++i)
free(matrix[i]);
free(matrix);
free(npt);
return 0;
} ```
This question already has answers here:
Segmentation fault on large array sizes
(7 answers)
Closed 5 years ago.
I'm trying to write a super simple C program of the vector multiply-add "axpy" algorithm for integer data types. The program output the execution time to measure the performance of a machine. The matrices are filled by random numbers.
int benchmark(void) {
int N; /* The matrix size, controlled by user input */
int r, c; /* Row and Column number */
int random; /* Random number to fill the matix */
int a = rand() % 20; /* Scale number to multiply x matrix */
printf("Enter the size(N*N) of the matrices(Maximum 1,000,000)\n");
scanf("%d", &N);
if (N > 1000000) {
fprintf(stderr, "Size of matrix is too large!\n");
return 0;
}
/* Initialize and fill the matrix x and y */
int xMatrix[N][N], yMatrix[N][N], resultMatrix[N][N];
/* Compute time */
clock_t t;
t = clock();
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
xMatrix[r][c] = a * random; /* Multiply matrix x with random value a */
}
}
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
int random = rand() % 100;
yMatrix[r][c] = random;
}
}
/* Add two matrix together */
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
}
}
t = clock() - t;
double timeTaken = ((double)t) / CLOCKS_PER_SEC;
printf("\n -> Total time : %f seconds\n", timeTaken);
printf("\n -> Vector length : %d", N * N);
}
User controls the size of the matrix.
The program works fine when the value of N is less than 800.
The size of the objects allocated with automatic storage (on the stack) is too large, you get undefined behavior, more specifically a Stack overflow.
You should instead allocate the objects from the heap:
/* Initialize and fill the matix x and y */
int (*xMatrix)[N] = malloc(N * sizeof(*xMatrix));
int (*yMatrix)[N] = malloc(N * sizeof(*yMatrix));
int (*resultMatrix)[N] = malloc(N * sizeof(*resultMatrix));
And verify that none of the pointers returned by malloc() are NULL.
Here is the modified code:
int benchmark(void) {
int N; /* The matrix size, controlled by user input */
int r, c; /* Row and Column number */
int random; /* Random number to fill the matix */
int a = rand() % 20; /* Scale number to multiply x matrix */
printf("Enter the size(N*N) of the matrices (Maximum 1,000,000)\n");
if (scanf("%d", &N) != 1) {
fprintf(stderr, "Input error!\n");
return 0;
}
if (N > 1000000) {
fprintf(stderr, "Matrix size is too large!\n");
return 0;
}
/* Initialize and fill the matrix x and y */
int (*xMatrix)[N] = malloc(N * sizeof(*xMatrix));
int (*yMatrix)[N] = malloc(N * sizeof(*yMatrix));
int (*resultMatrix)[N] = malloc(N * sizeof(*resultMatrix));
if (xMatrix == NULL || yMatrix == NULL || resultMatrix == NULL) {
fprintf(stderr, "Memory allocation failed!\n");
free(xMatrix);
free(yMatrix);
free(resultMatrix);
return 0;
}
/* Compute time */
clock_t t = clock();
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
xMatrix[r][c] = a * random; /* Multiply matrix x with random value a */
}
}
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
yMatrix[r][c] = random;
}
}
/* Add two matrix together */
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
}
}
t = clock() - t;
double timeTaken = ((double)t) / CLOCKS_PER_SEC;
printf("\n -> Total time : %f seconds\n", timeTaken);
printf("\n -> Vector length : %lld", (long long)N * N);
free(xMatrix);
free(yMatrix);
free(resultMatrix);
return 0;
}
Note however that your computation is very simple, most of the time is likely spent in the rand() function.
You are trying to allocate memmory dynamically, I would recommend using malloc from stdlib.h as shown bellow.
Also, check out these SO posts: memory allocation in Stack and Heap, and What and where are the stack and heap?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int benchmark(void) {
int N; /* The matrix size, controlled by user input */
int r, c; /* Row and Column number */
int random; /* Random number to fill the matix */
int a = rand() % 20; /* Scale number to multiply x matrix */
printf("Enter the size(N*N) of the matrixs(Maximum 1,000,000)\n");
scanf("%d", &N);
if(N > 1000000) {
fprintf(stderr, "Size of matrix is too large!\n");
return 0;
}
/* Initialize and fill the matix x and y */
int** xMatrix = NULL;
int** yMatrix = NULL;
int** resultMatrix = NULL;
/* Using the heap memory allocation instead of the stack */
xMatrix = (int **) malloc(N * sizeof(int *));
yMatrix = (int **) malloc(N * sizeof(int *));
resultMatrix = (int **) malloc(N * sizeof(int *));
for (r = 0; r < N; r++) {
xMatrix[r] = (int *) malloc(N * sizeof(int));
yMatrix[r] = (int *) malloc(N * sizeof(int));
resultMatrix[r] = (int *) malloc(N * sizeof(int));
}
/* Compute time */
clock_t t;
t = clock();
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
random = rand() % 100;
xMatrix[r][c] = a * random; /* Multiply matix x with random value a */
}
}
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
int random = rand() % 100;
yMatrix[r][c] = random;
}
}
/* Add two matrix together */
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++) {
resultMatrix[r][c] = xMatrix[r][c] + yMatrix[r][c];
}
}
t = clock() - t;
double timeTaken = ((double)t)/CLOCKS_PER_SEC;
printf("\n -> Total time : %f seconds\n", timeTaken);
printf("\n -> Vector length : %d", N*N);
/* Always remember to free your allocated memory */
for (r = 0; r < N; r++) {
free(xMatrix[r]);
free(yMatrix[r]);
free(resultMatrix[r]);
}
free(xMatrix);
free(yMatrix);
free(resultMatrix);
}
int main() {
benchmark();
return 0;
}