I have a written a program that performs Gaussian elimination in C and returns the L2 norm of a matrix. The program is called like ./exec n k where n is the size of a n by n matrix and k is the number of threads that will be used to do the program (max 4). I allocate space for a n by n+1 matrix because having an augmented matrix is part of the gaussian elimination.
It works perfectly in OpenMP. As seen in the code below, I only have 1 parallel for. My goal now is to make that parallel for loop run concurrently using Pthreads instead of OpenMP. I made the for loop that be parallelized into a separate function and create pthreads to deal with it. My guess is that the pthreads are not each doing a different part of the loop (basically a different iteration of j), but instead the 4 Pthreads are just running the entire loops. I run the program like ./gauss 30 4 and it sometimes work and sometimes segfaults, although when it does work the L2 norm is not 0 (L2 will return 0 if program worked perfectly), so something is obviously up with the threading section. When I run it through GDB it segfaults at a loop for some reason but this same loop runs perfectly in OpenMP...can someone help me out
GDB
http://i.stack.imgur.com/V99yt.png
OpenMP Code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y,z;
int bvectcount = 0;
struct timeval start, end;
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double sum_squares = (double) 0;
for (i=0; i < matrixSize; i++) {
sum_squares += res[i] * res[i];
}
return sqrt(sum_squares);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
int chunk = matrixSize/threadNum;
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++){
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
//calculating triangular matrix
//threading needs to be done HERE
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
double temp2;
#pragma omp parallel default(none) num_threads(threadNum) shared(a,i,matrixSize,vect) private(j,z,ratio,temp2)
{
#pragma omp for schedule(static)
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
}
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
Pthreads code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y,z;
int bvectcount = 0;
int threadcount;
pthread_t workerThreads[4];
typedef struct threader {
int counter;
int matrixl;
} threader;
struct timeval start, end;
void *retval;
int checkargs(int argc, char* argv[]);
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double squaresum = (double) 0;
for (i=0; i < matrixSize; i++) {
squaresum += res[i] * res[i];
}
return sqrt(squaresum);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
void *parallelstuff(void *args){
threader temp = *((threader *)args);
int i, matrixSize;
i = temp.counter;
matrixSize = temp.matrixl;
double temp2;
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++)
{
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
threader stuff;
stuff.counter = i;
stuff.matrixl = matrixSize;
//MAKE EACH THREAD DO SOMETHING DIFF
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
while(threadcount != 0){
if(pthread_join (workerThreads[threadcount-1], &retval ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
threadcount--;
}
//create matrix of n size
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
}
note that j , z should be declared as local (private) variables in each thread.
in OpenMP Code , you closed the brace of for loop in line 100 :
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
.......
......
.....
} //line 100
but in pthreads code, you closed it in line 149, so the full code:
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
//globals
double **a, *vect, *bvect, scalar, ratio, sum, delta, *temp;
int i,j,k,ptr, z;
int y; //z?
int bvectcount = 0;
int threadcount;
pthread_t workerThreads[4];
typedef struct threader {
int counter;
int matrixl;
} threader;
struct timeval start, end;
void *retval;
int checkargs(int argc, char* argv[]);
// a is matrix, b is vector, x is the solution vector, and n is the size
double L2(double **a, double *bvect, double *vect, int matrixSize) {
double sum;
double res[matrixSize];
int i, j;
for (i=0; i < matrixSize; i++) {
sum = (double) 0;
for (j=0; j < matrixSize; j++) {
sum += a[i][j] * vect[j];
}
res[i] = sum;
}
for (i=0; i < matrixSize; i++) {
res[i] -= vect[i];
}
double squaresum = (double) 0;
for (i=0; i < matrixSize; i++) {
squaresum += res[i] * res[i];
}
return sqrt(squaresum);
}
int checkargs(int argc, char* argv[]){
if(argc != 3){
fprintf(stderr, "Error: Usage is size threadNum\n" );
exit(1);
}
}
void *parallelstuff(void *args){
threader temp = *((threader *)args);
int i, matrixSize;
i = temp.counter;
matrixSize = temp.matrixl;
//printf("matrixSize=%d counter=%d\n" , matrixSize ,temp.counter );
double temp2;
int j , z; //houssam
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
int main(int argc, char* argv[]){
//check for args
checkargs(argc, argv);
int matrixSize = atoi(argv[1]);
int threadNum = atoi(argv[2]);
//memory allocation
a = (double**)malloc(matrixSize*sizeof(double*));
for(i = 0; i < matrixSize ; i++)
a[i] = (double*)malloc(matrixSize*sizeof(double) * matrixSize);
vect = (double*)malloc(matrixSize*sizeof(double));
bvect = (double*)malloc(matrixSize*sizeof(double));
temp = (double*)malloc(matrixSize*sizeof(double));
for(i = 0; i < matrixSize; ++i){
for(j = 0; j < matrixSize + 1; ++j){
a[i][j] = drand48();
}
}
j = 0;
j += matrixSize;
for(i = 0; i < matrixSize; ++i){
bvect[i] = a[i][j];
}
//generation of scalar matrix (diagonal vector)
gettimeofday(&start, NULL);
for(i=0; i<matrixSize; i++){
scalar = a[i][i];
//initialization of p to travel throughout matrix
ptr = i;
//find largest number in column and row number of it
for(k = i+1; k < matrixSize; k++){
if(fabs(scalar) < fabs(a[k][i])){
//k is row of scalar, while
scalar = a[k][i];
ptr = k;
}
}
//swaping the elements of diagonal row and row containing largest no
for(j = 0; j <= matrixSize; j++)
{
temp[0] = a[i][j];
a[i][j]= a[ptr][j];
a[ptr][j] = temp[0];
}
ratio = a[i][i];
for(k = 0; k < matrixSize + 1; k++){
a[i][k] = a[i][k] / ratio;
}
threader stuff;
stuff.counter = i;
stuff.matrixl = matrixSize;
//printf("i=%d\n" , i);
//MAKE EACH THREAD DO SOMETHING DIFF
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
while(threadcount != 0){
if(pthread_join (workerThreads[threadcount-1], &retval ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
threadcount--;
}
}
//create matrix of n size
//backward substitution method
for(i=matrixSize-1; i >=0; i--){
for(k = i; k > 0; k--){
a[k-1][matrixSize] -= a[k-1][i] * a[i][matrixSize];
a[k-1][i] -= a[k-1][i] * a[i][i];
}
}
for(i = 0; i < matrixSize; ++i){
vect[i] = a[i][matrixSize];
}
double l2Norm;
l2Norm = L2(a, bvect, vect, matrixSize);
printf("THIS IS L2 NORM: %f\n", l2Norm);
gettimeofday(&end, NULL);
delta = ((end.tv_sec - start.tv_sec) * 1000000u +
end.tv_usec - start.tv_usec) / 1.e6;
printf("end time: %f\n", delta);
}
The two codes as written are not equivalent. Observe the OpenMP code:
#pragma omp for schedule(static)
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
The combined parallel for construct in OpenMP is a worksharing construct, i.e. it distributes the iterations on the following loop among the threads in the team. Given the schedule(static) clause, the iteration space is split into #threads blocks and each block is assigned to a different thread.
Your Pthreads code does not share the work:
i = temp.counter;
matrixSize = temp.matrixl;
...
for(j = i + 1; j<matrixSize; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
Given that the same stuff object is passed to all threads, they all receive the same value of i and matrixSize and loop over the whole iteration space, therefore the wrong results.
What you have to do is simulate what #pragma omp for schedule(static) does, namely make each thread do only some of the matrixSize - (i+1) + 1 iterations. You should pass each thread a unique data object that contains the starting and the ending iteration:
typedef struct threader {
int start;
int end;
int i;
int matrixSize;
} threader;
...
void *parallelstuff(void *args){
threader *temp = (threader *)args;
int start, end, i, matrixSize;
start = temp->start;
end = temp->end;
i = temp->i;
matrixSize = temp->matrixSize;
double temp2;
int j , z; //houssam
for(j = start + 1; j<end; j++){
temp2 = a[j][i]/a[i][i];
for(z = 0; z<matrixSize + 1; z++){
a[j][z] = a[j][z] - temp2 * a[i][z];
}
}
}
...
threader stuff[threadNum];
//MAKE EACH THREAD DO SOMETHING DIFF
// parallelstuff(int i, int matrixSize, double **a){
for(threadcount = 0; threadcount < threadNum; threadcount++){
stuff[threadcount].start = i + threadcount*(matrixSize / threadNum);
stuff[threadcount].end = i + (threadcount+1)*(matrixSize / threadNum);
stuff[threadcount].i = i;
stuff[threadcount].matrixSize = matrixSize;
if(pthread_create (&workerThreads[threadcount], NULL, parallelstuff, (void *) &stuff ) != 0){
fprintf(stderr, "Error: consumer create problem\n");
exit(1);
}
}
In theory, you could also let each thread know how many threads are out there and let it compute the iteration range itself, but that is complicated by the fact that Pthreads API lacks the equivalent of omp_get_thread_num(). There is an advanced trick that employs aligned memory allocation and the numeric thread ID encoded in the last bits of the pointer passed.
Related
I'm trying to solve linear systems of the form Ax = b where A is an (nxn) matrix of real numbers and b a (1xn) vector of real numbers, using the A = LU algorithm. This is my implementation:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
int LUPDecompose(double A[N][N], double Tol, int P[N])
{
int i, j, k, imax;
double maxA, ptr[N], absA;
for (i = 0; i <= N; i++)
P[i] = i; //Unit permutation matrix, P[N] initialized with N
for (i = 0; i < N; i++) {
maxA = 0.0;
imax = i;
for (k = i; k < N; k++)
if ((absA = abs(A[k][i])) > maxA) {
maxA = absA;
imax = k;
}
if (maxA < Tol) return 0; //failure, matrix is degenerate
if (imax != i) {
//pivoting P
j = P[i];
P[i] = P[imax];
P[imax] = j;
//pivoting rows of A
for (int ii = 0; ii < N; ii++)
{
ptr[ii] = A[i][ii];
A[i][ii] = A[imax][ii];
A[imax][ii] = ptr[ii];
}
//counting pivots starting from N (for determinant)
P[N]++;
}
for (j = i + 1; j < N; j++) {
A[j][i] /= A[i][i];
for (k = i + 1; k < N; k++)
A[j][k] -= A[j][i] * A[i][k];
}
}
return 1; //decomposition done
}
/* INPUT: A,P filled in LUPDecompose; b - rhs vector; N - dimension
* OUTPUT: x - solution vector of A*x=b
*/
void LUPSolve(double A[N][N], int P[N], double b[N], double x[N])
{
for (int i = 0; i < N; i++) {
x[i] = b[P[i]];
for (int k = 0; k < i; k++)
x[i] -= A[i][k] * x[k];
}
for (int i = N - 1; i >= 0; i--) {
for (int k = i + 1; k < N; k++)
x[i] -= A[i][k] * x[k];
x[i] /= A[i][i];
}
}
int main()
{
double Am[N][N] = {{0.6289, 0, 0.0128, 0.3184, 0.7151},
{0, 1, 0, 0, 0},
{0.0128, 0, 0.0021, 0.0045, 0.0380},
{0.3184, 0, 0.0045, 0.6618, 0.3371},
{0.7151, 0, 0.0380, 0.3371, 1.1381}};
double bm[N] = {1.6752, 0, 0.0574, 1.3217, 2.2283};
int Pm[N] = {0};
double X[N] = {0};
LUPDecompose( Am, 0.0001, Pm);
LUPSolve(Am, Pm, bm, X);
printf("%f %f %f %f %f",X[0],X[1],X[2],X[3],X[4]);
}
However, I am getting inf values as such.
-1.#IND00 -1.#IND00 3.166387 0.849298 0.670689
I wonder if it is a code issue or algorithm. Any help to solve this issue?
"I wonder if it is a code issue or algorithm. Any help to solve this issue?"
I believe there are code and algorithm issues. The following is your code with corrections to address only compile errors, and warnings (see in-line comments). It is not debugged beyond C syntax to achieve a clean compile, and run w/o error. (i.e. runs with no divide by zero, or inf errors.)
#define N 5 //required to be 5 by hard-coded array definitions in main()
int LUPDecompose(double A[N][N], double Tol, int P[N])
{
int i, j, k, imax, ii;//added ii here to increase scope below
double maxA, ptr[N], absA;
//for (i = 0; i <= N; i++)
for (i = 0; i < N; i++)
P[i] = i; //Unit permutation matrix, P[N] initialized with N (actually init with i)
for (i = 0; i < N; i++) {
maxA = 0.0;
imax = i;
for (k = i; k < N; k++)
if ((absA = fabs(A[k][i])) > maxA) {// using fabs, not abs to avoid conversion of double to int.
maxA = absA;
imax = k;
}
if (maxA < Tol) return 0; //failure, matrix is degenerate
if (imax != i) {
//pivoting P
j = P[i];
P[i] = P[imax];
P[imax] = j;
//pivoting rows of A
//for (int ii = 0; ii < N; ii++)
for ( ii = 0; ii < N; ii++)
{
ptr[ii] = A[i][ii];
A[i][ii] = A[imax][ii];
A[imax][ii] = ptr[ii];
}
//counting pivots starting from N (for determinant)
//P[N]++;//N will always overflow for array with only N elements
P[ii-1]++;//use index here instead
}
for (j = i + 1; j < N; j++) {
A[j][i] /= A[i][i];
for (k = i + 1; k < N; k++) {//extra brackets added for readability
A[j][k] -= A[j][i] * A[i][k];
}
}
}
return 1; //decomposition done
}
/* INPUT: A,P filled in LUPDecompose; b - rhs vector; N - dimension
* OUTPUT: x - solution vector of A*x=b
*/
void LUPSolve(double A[N][N], int P[N], double b[N], double x[N])
{
for (int i = 0; i < N; i++) {
x[i] = b[P[i]];
for (int k = 0; k < i; k++) {//extra brackets added for readability
x[i] -= A[i][k] * x[k];
}
}
for (int i = N - 1; i >= 0; i--) {
for (int k = i + 1; k < N; k++) {//additional brackets added for readability
x[i] -= A[i][k] * x[k];
}
x[i] /= A[i][i];
}
}
//int main()
int main(void)//minimum signature for main includes void
{
//Note hardcoded arrays in this code require N == 5 (#define at top)
double Am[N][N] = {{0.6289, 0, 0.0128, 0.3184, 0.7151},
{0, 1, 0, 0, 0},
{0.0128, 0, 0.0021, 0.0045, 0.0380},
{0.3184, 0, 0.0045, 0.6618, 0.3371},
{0.7151, 0, 0.0380, 0.3371, 1.1381}};
double bm[N] = {1.6752, 0, 0.0574, 1.3217, 2.2283};
int Pm[N] = {0};
double X[N] = {0};
LUPDecompose( Am, 0.0001, Pm);
LUPSolve(Am, Pm, bm, X);
printf("%f %f %f %f %f",X[0],X[1],X[2],X[3],X[4]);
return 0; //int main(void){...} requires return statement.
}
Based on this calculator, with these inputs:
the correct solution is:
-0.590174531351002
0
-19.76923076923077
1.0517711171662125
2.6772727272727272
But the actual output from code above is:
Algorithm related debugging is left for you to perform.
This program takes an integer from input and populates a double array with random values and creates threads to sort each half of the array, one thread to sort the entire array, one to sort the first half, and one to sort the second half but segfaults at pthread_create(&tid1, NULL, selectionSortFirstHalf, A_First_Half). I added multiple print statements as shown below and also used gdb to confirm where the seg fault is but I'm completely lost as to why it doesn't work for array sizes greater than 4.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>
typedef struct merge
{
double *FirstHalf;
double *SecondHalf;
double *myVal;
} MergeArray;
// Global Vars
int ArraySize;
int ArrayHalfSize;
void *mergeThread(void *args)
{
int i, j;
MergeArray *myMerge = (struct merge *)args;
for(i = 0; i < ArrayHalfSize; i++)
{
myMerge->myVal[i] = myMerge->FirstHalf[i];
}
ArraySize = ArrayHalfSize + ArrayHalfSize;
for(i = 0, j = ArrayHalfSize; j < ArraySize && i < ArrayHalfSize; i++, j++)
{
myMerge->myVal[j] = myMerge->SecondHalf[i];
}
return NULL;
}
void *selectionSortThreadB(void *args)
{
double *arr;
double *ptrArr;
arr = (double*)args;
ptrArr = (double*)malloc(ArraySize * sizeof(double));
int i;
int j;
double temp;
for(i = 1; i < ArraySize; i++)
{
temp = arr[i];
j = i - 1;
while(j >= 0 && arr[j] > temp)
{
arr[j + 1] = arr[j];
j = j - 1;
}
arr[j + 1] = temp;
}
/*for(i = 0; i < ArraySize; i++)
{
printf("SSTB: %d, %.2lf\n", i, arr[i]);
}*/
ptrArr = arr;
pthread_exit((void*)ptrArr);
}
void *selectionSortSecondHalf(void *args)
{
double *myarr, *myptrretSecondHalf;
myarr = (double *)args;
myptrretSecondHalf = (double *)malloc(ArrayHalfSize * sizeof(double));
if(myptrretSecondHalf == NULL)
{
fprintf(stderr, "Could not allocate memory in selectionSortSecondtHalf\n");
exit(-1);
}
int i, j;
int min;
double temp;
/*for(i = ArrayHalfSize; i < ArraySize; i++)
{
printf("This is the second half of the array when passed to SSSH: A_Second_Half[%d] = %.2lf\n", i, myarr[i]);
}*/
for (i = ArrayHalfSize; i < ArraySize; i++)
{
min = i;
for(j = i + 1; j < ArraySize; j++)
{
if(myarr[j] < myarr[min])
min = j;
}
temp = myarr[i];
myarr[i] = myarr[min];
myarr[min] = temp;
}
myptrretSecondHalf = myarr;
pthread_exit(myptrretSecondHalf);
}
void *selectionSortFirstHalf(void *args)
{
//printf("hello from 104\n");
double *myarr, *myptrretFirstHalf;
myarr = (double *)args;
myptrretFirstHalf = (double *)malloc(ArrayHalfSize * sizeof(double));
if(myptrretFirstHalf == NULL)
{
fprintf(stderr, "Could not allocate memory in selectionSortFirstHalf\n");
exit(-1);
}
int i, j;
int min;
double temp;
/*for(i = 0; i < ArrayHalfSize; i++)
{
printf("This is the first half of the array when passed to SSFH: A_First_Half[%d] = %.2lf\n", i, myarr[i]);
}*/
for (i = 0; i < ArrayHalfSize; i++)
{
min = i;
for(j = i + 1; j < ArrayHalfSize; j++)
{
if(myarr[j] < myarr[min])
min = j;
}
temp = myarr[i];
myarr[i] = myarr[min];
myarr[min] = temp;
}
myptrretFirstHalf = myarr;
pthread_exit(myptrretFirstHalf);
}
int main(int argc, char *argv[])
{
if(argc != 2)
{
fprintf(stderr, "ERROR: Please provide the correct number of arguments (file, size of array)\n");
exit(-1);
}
else
{
ArraySize = atoi(argv[1]);
ArrayHalfSize = (ArraySize / 2);
clock_t start, end, start2, end2;
double RandomNum;
double *ThreadBlock;
double *ThreadArrayHalf1;
double *ThreadArrayHalf2;
pthread_t tid, tid1, tid2, tid3;
double A[ArraySize];
double B[ArraySize];
//double C[ArraySize];
double *A_First_Half/*[ArrayHalfSize]*/;
double *A_Second_Half/*[ArrayHalfSize]*/;
A_First_Half = (double*)malloc(sizeof(A_First_Half)*ArrayHalfSize);
A_Second_Half = (double*)malloc(sizeof(A_Second_Half)*ArrayHalfSize);
int i;
srand(time(NULL)); // generate seed for rand nums based on time
for(i = 0; i < ArraySize; i++)
{
RandomNum = ((double) rand()*(1000.0+1.0)/(double)RAND_MAX+1.0);
printf("%.2lf\n", RandomNum);
A[i] = RandomNum;
}
for(i = 0; i < ArraySize; i++)
{
B[i] = A[i];
}
start = clock();
pthread_create(&tid, NULL, selectionSortThreadB, (void*)B);
pthread_join(tid, (void**)&ThreadBlock);
end = clock() - start;
printf("Sorting is done in %.2fms when one thread is used\n", end * 1000.0 / CLOCKS_PER_SEC);
//*******Two-Threaded Option**************//
// prints A[]
for(i = 0; i < ArraySize; i++)
{
printf("A[%d] = %.2lf\n", i, A[i]);
}
// populates first half of array A with half of A
for(i = 0; i < ArrayHalfSize; i++)
{
A_First_Half[i] = A[i];
printf("A_First_Half[%d] = %.2lf\n", i, A_First_Half[i]);
}
// populates second half of array A with second half of B
for(i = ArrayHalfSize; i < ArraySize; i++)
{
A_Second_Half[i] = A[i];
printf("A_Second_Half[%d] = %.2lf\n", i, A_Second_Half[i]);
}
printf("hello from 199\n");
start2 = clock();
printf("hello from 201\n");
pthread_create(&tid1, NULL, selectionSortFirstHalf, A_First_Half);
printf("hello from 203\n");
pthread_create(&tid2, NULL, selectionSortSecondHalf, A_Second_Half);
printf("hello from 205\n");
pthread_join(tid1, (void**)&ThreadArrayHalf1);
pthread_join(tid2, (void**)&ThreadArrayHalf2);
MergeArray threadMerge;
threadMerge.myVal = (double*)malloc(ArraySize * sizeof(double));
for(i = 0; i < ArrayHalfSize; i++)
{
printf("SSFH: %d, %.2lf\n", i, ThreadArrayHalf1[i]);
}
for(i = ArrayHalfSize; i < ArraySize; i++)
{
printf("SSSH: %d, %.2lf\n", i, ThreadArrayHalf2[i]);
}
threadMerge.FirstHalf = ThreadArrayHalf1;
threadMerge.SecondHalf = ThreadArrayHalf2;
pthread_create(&tid3, NULL, mergeThread, (void*)&threadMerge);
pthread_join(tid3, NULL);
end2 = clock() - start2;
printf("Sorting is done in %.2fms when two threads are used\n", end2 * 1000.0 / CLOCKS_PER_SEC);
//free(A_First_Half);
//free(A_Second_Half);
//free(threadMerge.myVal);
}
return 0;
}
A_First_Half = (double*)malloc(sizeof(A_First_Half)*ArrayHalfSize); allocates space based on sizeof(A_First_Half), which uses the size of the pointer A_First_Half, not the size of the object it points to, *A_First_Half.
And you do not need the parentheses for sizeof with an expression or the cast of malloc. So use A_First_Half = malloc(sizeof *A_First_Half * ArrayHalfSize);.
In several places, a “second half” array is used with indices running from ArrayHalfSize to ArraySize-1. However, they point to storage for which space for ArrayHalfSize elements has been allocated. Indices in that space run from 0 to ArrayHalfSize-1. All the code should be changed to use only indices from 0 to ArrayHalfSize-1 with these “second halves.”
Along those lines, there is no need for selectionSortFirstHalf and selectionSortSecondHalf to be separate routines. Once selectionSortSecondHalf is fixed, per above, it will do the same thing as selectionSortFirstHalf: Sort an array with ArrayHalfSize elements.
Once you have fixed those and gotten the program working, eliminate the global variables and pass all the necessary information to the threads via their argument pointer (by pointing to a structure that contains the information the thread needs).
So, this is my program that calculates matrix determinant using system calls, not good at all, but, the trouble is that when i put in a number bigger than 8 for dimension of matrix, it crashes somehow and i can't figure why it keeps happening. Please, give me some ideas.
The task was to calculate determinant using multithreading. Maybe, the problem is that I exceed max threads? valgrind says that
Use --max-threads=INT to specify a larger number of threads
and rerun valgrind
valgrind: the 'impossible' happened:
Max number of threads is too low
compile it with gcc -g -pthread
#include <stdlib.h>
#include <pthread.h>
#include <math.h>
#include <time.h>
#include <malloc.h>
pthread_mutex_t mutex;
typedef struct {
int **matrix;
int size;
} T_MS;
void* determinant(void *npt) {
T_MS* tmp = (T_MS*) npt;
int i,j;
double det = 0;
pthread_t *array = malloc(sizeof(pthread_t) * tmp->size);
T_MS *mtarr = malloc(sizeof(T_MS) * tmp->size);
if (tmp->size == 1) {
det = tmp->matrix[0][0];
} else if (tmp->size == 2) {
det = tmp->matrix[0][0] * tmp->matrix[1][1] - tmp->matrix[0][1] * tmp->matrix[1][0];
} else {
for (i = 0; i < tmp->size; ++i) {
mtarr[i].matrix = (int **)malloc(sizeof(int *) * tmp->size);
mtarr[i].size = tmp->size - 1;
for (j = 0; j < tmp->size - 1; ++j) {
if (j < i)
mtarr[i].matrix[j] = tmp->matrix[j];
else
mtarr[i].matrix[j] = tmp->matrix[j + 1];
}
pthread_create(&array[i], NULL, determinant, mtarr + i);
}
for (i = 0; i < tmp->size; ++i) {
void *res;
for (j = 0; j < tmp->size - 1; ++j) {
}
pthread_join(array[i], &res);
double x = *(double *)&res;
det += (-1 + 2 * !(i % 2)) * x * tmp->matrix[i][tmp->size - 1];
double answer = *(double*)&det;
free(mtarr[i].matrix);
}
}
free(mtarr);
free(array);
void* ans = *(void **)&det;
return ans;
}
int main(int argc, char const *argv[]) {
srand(time(NULL));
int **matrix;
int n = 0;
int a;
pthread_t tid;
pthread_attr_t attr;
pthread_attr_init(&attr);
printf("Insert the demention of matrix:\n");
scanf("%d", &n);
matrix = (int**)malloc(n * sizeof(int*));
for (int i=0; i<n; ++i)
matrix[i] = (int*)malloc(n * sizeof(int));
printf("Insert matrix:\n");
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
matrix[i][j]=rand()%15;
//matrix[i][j] = i;
}
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
printf("%d ", matrix[i][j]);
}
printf("\n");
}
T_MS* npt = (T_MS*)malloc(sizeof(T_MS));
npt->matrix = matrix;
npt->size = n;
void *det;
pthread_mutex_init(&mutex, NULL);
pthread_create(&tid, NULL, determinant, npt);
pthread_join(tid, &det);
double answer = *(double*)&det;
printf("Det is: %f\n", answer);
for (int i = 0; i < n; ++i)
free(matrix[i]);
free(matrix);
free(npt);
return 0;
} ```
I have this code and I want to partition a table inp[2560] into 4 parts and for each part I want to calculate this:
MI = calcul__min(inp,640);
MA = calcul__max(inp,640);
MOY = calcul__moy(inp,640);
ectt = calcul__ect(inp,640);
I don't know how to use the for-loop for this.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define ORDER 65
#define NP 2560
float inp[2560];
float ED1, ED2, ED3, ED4, Ap4;
float d1, d2, d3, d4, a4, total;
int i;
double calcul__max(float tab[], int N)
{
double max;
int i;
for (i = 0; i < N; i++)
{
if(tab[i]>max)
max=tab[i];
}
return max;
}
double calcul__min(float tab[], int N)
{
double min;
int i;
for (i = 0; i < N; i++)
{
if(tab[i]<min)
min=tab[i];
}
return min;
}
double calcul__moy(float tab[],int N)
{
double moyenne,somme;
int i;
for (i = 0; i < N; i++)
{
somme = somme + tab[i];
moyenne = somme /640;
}
return moyenne;
}
float calcul__ect(float tab[], int N)
{
double moyenne, TM, som, ec, ect;
moyenne = calcul__moy(inp,640);
int i;
for (i = 0; i < N; i++)
{
TM = tab[i] - moyenne;
TM *= TM;
som += TM;
}
ec = som / 639;
ect = sqrt(ec);
return ect;
}
struct Calculstat
{
float Ea;
float amplitudemin;
float ecarttype;
float Ed2;
float amplitudemax;
};
filter(int ord, float *a, float *b, int np, float *x, float *y) {
int i, j;
y[0] = b[0] * x[0];
for (i = 1; i < ord + 1; i++) {
y[i] = 0.0;
for (j = 0; j < i + 1; j++)
y[i] = y[i] + b[j] * x[i-j];
for (j = 0; j < i; j++)
y[i] = y[i] - a[j+1] * y[i-j-1];
}
for (i = ord + 1; i < np + 1; i++) {
y[i] = 0.0;
for (j = 0; j < ord + 1; j++)
y[i] = y[i] + b[j] * x[i-j];
for (j = 0; j < ord; j++)
y[i] = y[i] - a[j+1] * y[i-j-1];
}
}
main()
{
float x[NP]= { -84.786,...};
float y[NP], a[ORDER+1], b[ORDER+1];
int i, j;
b[0] = -0.005574892;
// [...]
b[65] = -0.005574892;
a[0] = 0;
// [...]
a[65] = 0;
filter(ORDER, a, b, NP, x, y);
for (i = 0; i < NP; i++)
{
x[i]=y[NP-i-1];
}
filter(ORDER,a,b,NP,x,y);
for (i=0;i<NP;i++)
{
x[i] = y[NP-i-1];
}
for (i = 0; i < NP; i++)
{
y[i] = x[i];
}
for (i = 0; i < NP; i++)
{
//printf("%f\n",y[i]);
inp[i]=y[i];
}
double MA,MI,MOY;
float ectt;
MI = calcul__min(inp,640);
MA = calcul__max(inp,640);
MOY = calcul__moy(inp,640);
ectt = calcul__ect(inp,640);
printf("Le min de tableau est ""%f\n",MI);
printf("Le max de tableau est ""%f\n",MA);
printf ("la moyenne est de ""%g\n", MOY);
printf ("ecart type est ""%g\n", ectt);
}
As we know, arrays in C are passed as pointer to first byte.
And also as we know we can apply pointer arithmetics on pointers in C (except void).
Here is the example
#include <stdio.h>
void foo(float *f)
{
// Some stuff
}
int main()
{
float inp[2560];
foo(inp);
foo(inp+(640));
foo(inp+(2*640));
foo(inp+(3*640));
}
inp+X skips X floats in array. Dont explicitly type X * sizeof(float); it would be wrong.
i have this code and i want to partion a table inp[2560] into 4 part
and for each part i want to calculate this :
Here is an inspiration:
int i = 0;
while (i < 4)
{
whatever = calcul__min(inp+(i * 640),640);
}
Can't increase rows in 2d array, but columns is ok.
#include <stdio.h>
#include <stdlib.h>
it is working:
void increasecolumn(int ** mas, int* n, int m){
for (int i = 0; i < m; i++){
int* tmp = realloc(mas[i], sizeof (*mas[i]) * ((*n) + 1));
if (tmp){
mas[i] = tmp;
}
}
(*n) = (*n) + 1;
}
but increasing rows failed
void increaserow(int ** mas, int n, int* m){
int ** tmp = realloc(mas, sizeof(*mas) * ((*m) + 1));
if (tmp){
mas = tmp;
for (int i = 0; i < 1; i++){
mas[(*m) + i] = malloc(sizeof(*mas[(*m) + i]) * n);
}
}
(*m) = (*m) + 1;
}
int main(int argc, char * argv[]) {
int n = 3; // columns
int m = 2; // rows
int** mas = malloc(m*sizeof(*mas));
for(int i = 0; i < m; i++){
mas[i] = malloc(n*sizeof(*(mas[i])));
}
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
mas[i][j] = 0;
printf("%d ", mas[i][j]);
}
printf("\n");
}
printf("\n");
increasecolumn(mas, &n, m);
for (int i = 0; i < m; i++){
mas[i][n-1] = 1;
}
increaserow(mas, n, &m); // problem is here
for (int j = 0; j < n; j++){
mas[m-1][j] = 0;
}
for(int i = 0; i < m; i++){
for(int j = 0; j < n; j++){
printf("%d ", mas[i][j]);
}
printf("\n");
}
system("pause");
return 0;
}
I use this answer Resizing 2D Arrays in C like an example, something wrong.
The GNU Project Debugger on Windows:
warning: FTH: (9152): * Fault tolerant heap shim applied to current process. This is usually due to previous crashes. *
0 0 0
0 0 0
Program received signal SIGSEGV, Segmentation fault.
0x0000000000401821 in main (argc=1, argv=0x7f1990) at D:\III Курс! II СЕМЕСТР\МатМодДослОп\stud\Untitled2.c:47
47: mas[m-1][j] = 0;