Im currently trying to finish an assignment in which you use pthreads to speed up the matrix multiplication. The idea is to have the pthreads each work on there chunk of rows
For example: 1000 rows and 4 threads would be thread 0 : rows 0-249, thread 1: 250-499, thread 2: 500-749 and thread 3: 750-999
blocksOfWork = (BLOCK *) malloc(numberOfThreads*sizeof(BLOCK));
for(i=0; i < numberOfThreads; i++){
blocksOfWork[i].threadId = i;
blocksOfWork[i].start_row = i * rows/numberOfThreads;
if (i == numberOfThreads -1){
blocksOfWork[i].end_row = rows - 1;
}
else{
blocksOfWork[i].end_row = (i+1)*rows/numberOfThreads -1;
}
blocksOfWork[i].start_col = 0;
blocksOfWork[i].end_col = columns -1;
}
I'm just having a hard time understanding on how to perform the matrix multiplication iteration, I've put a print statement for debugging purpose but I still can't figure it out, ThreadMMulti Function. Any helpful tips in the right direction would be appreciated. Here is the full code
/*Program to generate two square 2D arrays of random doubles and
time their multiplication.
Program utlizies pthreads to efficiently perform matrix Multiplication
Compile by: gcc -o mmult -O3 mmultHW6.c -lpthread
Run by: ./mmult 1000 1000 2
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>
#define TRUE 1
#define FALSE 0
#define BOOL int
typedef struct {
int threadId;
int start_row;
int end_row;
int start_col;
int end_col;
} BLOCK;
// function prototypes
double ** allocate2DArray(int rows, int columns);
void print2DArray(int rows, int columns, double ** array2D);
void generateRandom2DArray(int rows, int columns,
double min, double max, double ** random2DArray);
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
double tolerance);
void matrixMultiplication(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product);
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product);
void * threadMMult(void * rank);
int numberOfThreads;
double ** A;
double ** B;
double ** C;
double ** C_alt;
int rows, columns;
int main(int argc, char ** argv) {
long i, startTime, endTime,seqTime, paralellTime;
BLOCK * blocksOfWork;
int errorCode;
double tolerence;
pthread_t * threadHandles;
if (argc !=3) {
printf("Usage: %s <# of rows><# of columns><# of Threads>\n", argv[0]);
exit(-1);
} // end if
sscanf(argv[1], "%d", &rows);
sscanf(argv[1], "%d", &numberOfThreads);
columns = rows;
// seed the random number generator
srand( time(NULL) );
A = allocate2DArray(rows, columns);
B = allocate2DArray(rows, columns);
C = allocate2DArray(rows, columns);
C_alt = allocate2DArray(rows, columns);
generateRandom2DArray(rows, columns, -1.0, +1.0, A);
generateRandom2DArray(rows, columns, -1.0, +1.0, B);
printf("after initializing matrices\n");
time(&startTime);
matrixMultiplicationAlt(rows, columns, A, rows, columns, B, C_alt);
time(&endTime);
seqTime = endTime-startTime;
printf("Matrix Multiplication Alt. time = %ld\n",seqTime);
time(&startTime);
threadHandles = (pthread_t *)
malloc(numberOfThreads*sizeof(pthread_t));
blocksOfWork = (BLOCK *) malloc(numberOfThreads*sizeof(BLOCK));
for(i=0; i < numberOfThreads; i++){
blocksOfWork[i].threadId = i;
blocksOfWork[i].start_row = i * rows/numberOfThreads;
if (i == numberOfThreads -1){
blocksOfWork[i].end_row = rows - 1;
}
else{
blocksOfWork[i].end_row = (i+1)*rows/numberOfThreads -1;
}
blocksOfWork[i].start_col = 0;
blocksOfWork[i].end_col = columns -1;
}
for (i=0; i < numberOfThreads; i++) {
if (errorCode = pthread_create(&threadHandles[i], NULL, threadMMult,
&blocksOfWork[i]) != 0) {
printf("pthread %d failed to be created with error code %d\n", i, errorCode);
} // end if
} // end for
for (i=0; i < numberOfThreads; i++) {
if (errorCode = pthread_join(threadHandles[i], (void **) NULL) != 0) {
printf("pthread %d failed to be joined with error code %d\n", i, errorCode);
} // end if
} // end for
time(&endTime);
paralellTime = endTime-startTime;
printf("Parallel Matrix Multiplication time = %ld\n",paralellTime);
if (equal2DArrays(rows, columns, C, C_alt, 0.000001)) {
printf("Arrays match with tolerance of %.000001f\n", 0.000001);
} else {
printf("Arrays DON'T match with tolerance of %.000001f\n", 0.000001);
} // end if
return 0;
} // end main
void * threadMMult(void * arg){
BLOCK * block = (BLOCK *) arg;
int threadId = block->threadId;
int startRow = block->start_row;
int endRow = block->end_row;
int startCol = block->start_col;
int endCol = block->end_col;
int i, j, k, sum;
for (int i=startRow; i<=endRow;i++){
for (int j = startCol; j<=endCol; j++){
//C[i][j] = 0;
for(int k=0; k<= columns-1; k++){
//C[i][j] += A[i][k]*B[k][j];
printf("%lu - C[%d][%d] += A[%d][%d]*B[%d][%d]\n", pthread_self()
, i,j,i,k,k,j);
}
}
}
}
/*******************************************************************
* Function matrixMultiplicationAlt passed two matrices and returns
* their product.
********************************************************************/
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product) {
int i, j, k;
double ** array2_transpose;
if (columns1 != rows2) {
printf("Matrices cannot be multiplied -- incompatible dimensions!\n");
exit(-1);
} // end if
// Transposes array2
array2_transpose = allocate2DArray(columns2, rows2);
for (i=0; i < rows2; i++) {
for (j=0; j < columns2; j++) {
array2_transpose[j][i] = array2[i][j];
} /* end for (j */
} /* end for (i */
// Matrix Multiplication uses array1 and array2_transpose
for (i=0; i < rows1; i++) {
for (j=0; j < columns2; j++) {
product[i][j] = 0.0;
for (k=0; k < columns1; k++) {
product[i][j] += array1[i][k]*array2_transpose[j][k];
} /* end for (k */
} /* end for (j */
} /* end for (i */
} // end matrixMultiplicationAlt
/*******************************************************************
* Function allocate2DArray dynamically allocates a 2D array of
* size rows x columns, and returns it.
********************************************************************/
double ** allocate2DArray(int rows, int columns) {
double ** local2DArray;
int r;
local2DArray = (double **) malloc(sizeof(double *)*rows);
for (r=0; r < rows; r++) {
local2DArray[r] = (double *) malloc(sizeof(double)*columns);
} // end for
return local2DArray;
} // end allocate2DArray
/*******************************************************************
* Function generateRandom2DArray is passed the # rows, the # columns,
* min. value, max. value, and returns random2DArray containing
* randomly generated doubles.
********************************************************************/
void generateRandom2DArray(int rows, int columns,
double min, double max, double ** random2DArray) {
int r, c;
double range, div;
for (r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
range = max - min;
div = RAND_MAX / range;
random2DArray[r][c] = min + (rand() / div);
} // end for (c...
} // end for (r...
} // end generateRandom2DArray
/*******************************************************************
* Function print2DArray is passed the # rows, # columns, and the
* array2D. It prints the 2D array to the screen.
********************************************************************/
void print2DArray(int rows, int columns, double ** array2D) {
int r, c;
for(r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
printf("%10.5lf", array2D[r][c]);
} // end for (c...
printf("\n");
} // end for(r...
} // end print2DArray
/*******************************************************************
* Function equal2DArrays is passed the # rows, # columns, two
* array2Ds, and tolerance. It returns TRUE if corresponding array
* elements are equal within the specified tolerance; otherwise it
* returns FALSE.
********************************************************************/
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
double tolerance) {
int r, c;
for(r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
if (fabs(array1[r][c] - array2[r][c]) > tolerance) {
return FALSE;
} // end if
} // end for (c...
} // end for(r...
return TRUE;
} // end equal2DArray
Related
I am creating 2 programs to test the differences in run time of serial matrix multiply vs that of parallel matrix multiply. The parallel code that I have written is actually running slower than serial code, and running the program with additional cores enabled provides no speedup at all... using more cores actually seems to slow down the parallel program.
What is going on here? This is my parallel code: to use this pass in matrix size and thread number (see my useage below)
#include <stdio.h>
#include <stdlib.h> // rand(), srand()
#include <unistd.h>
#include <time.h>
#include <pthread.h>
// Time struct + prototypes
struct timespec time1, time2, diffTime;
struct timespec timespecDifference(struct timespec start, struct timespec end); // For timing
double** reserveMatrix(int nRows, int nCols);
void printMat(double** mat1, int rows, int cols);
void* matMult(void* arg);
// Argstruct
typedef struct {
double** result;
int tid;
int size;
int s;
int e;
} argStr;
// global variables for use by all threads
int size; // Size of a row and column.
int numThreads; // Number of pThreads to do work
double** mat1;
double** mat2;
double** mat3;
// Main function
int main(int argc, char *argv[]) {
size = atoi(argv[1]);
numThreads = atoi(argv[2]);
mat1 = reserveMatrix(size, size);
mat2 = reserveMatrix(size, size);
mat3 = reserveMatrix(size, size);
if (size == 0) {
//printf("Matrix cannot be size 0\n");
return -1;
}
//Start timer
clock_gettime(CLOCK_MONOTONIC, &time1);
// *********** Begin main operation *********** //
// //
// declare necessary local variables
pthread_t theThreads[numThreads];
argStr data[numThreads]; // Create numThreads # of argStr objects
for (int i = 0; i < numThreads; i++) {
data[i].result = reserveMatrix(size, size);
data[i].tid = i; // Self-assigned threadID
data[i].size = size; // Size of a block
data[i].s = size * i / numThreads;
data[i].e = size * (i + 1) / numThreads - 1;
//printf("I handle operations from %d to %d\n", data[i].s, data[i].e);
}
// Start the threads
for (int i = 0; i < numThreads; i++) {
pthread_create(&theThreads[i], NULL, matMult, (void*) (&data[i]));
}
// await all threads being done.
for (int i = 0; i < numThreads; i++) {
pthread_join(theThreads[i], NULL);
}
// rejoin received data
//printMat(data[1].result, size, size);
// //
// *********** End main operation *********** //
// Stop timer and find time taken
clock_gettime(CLOCK_MONOTONIC, &time2);
diffTime = timespecDifference(time1, time2);
double cpuTimeUsed = ((double)diffTime.tv_sec + (double)diffTime.tv_nsec / 1000000000.0);
//Print Time
printf("Pthread Matrix Multiply, %d, %d, %lf\n", size, numThreads, cpuTimeUsed);
}
// Struct Timer
struct timespec timespecDifference(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec - start.tv_nsec) < 0) {
temp.tv_sec = end.tv_sec - start.tv_sec - 1;
temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
}
else {
temp.tv_sec = end.tv_sec - start.tv_sec;
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
return temp;
}
// Reserve matrix function
double** reserveMatrix(int nRows, int nCols) {
double** matrix1 = (double**)malloc(nRows * sizeof(double*));
matrix1[0] = (double*)malloc(nRows * nCols * sizeof(double));
// Assign row pointers to "segment" out the data
for (int r = 1; r < nRows; ++r) {
matrix1[r] = &(matrix1[0][r * nCols]);
}
// Give values to the array
for(int i = 0; i < nRows * nCols; i++) {
matrix1[0][i] = i;
}
return matrix1;
}
// Print matrix function
void printMat(double** mat1, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%f, ", mat1[i][j]);
}
printf("\n");
}
printf("End of array print\n");
}
void* matMult(void* arg) {
//printf("Begin an operation\n");
argStr* args = (argStr*)arg;
double** result = args->result;
int tid = args->tid;
int size = args->size; // Size of the matrix
long s = args->s; // Start
long e = args->e; // End
// Print message to confirm data is getting stored
//printf("Hello from operation %d! \n", tid);
//printf("I am working from number %ld to %ld\n", s, e);
for(int r = s; r <= e; r++) { // May need to declare out of loop
for(int c = 0; c < size; c++) {
result[r][c] = 0.0;
for(int i = 0; i < size; i++) {
result[r][c] += mat1[r][i] * mat2[i][c];
}
}
}
// Print multipled matrix values
//printMat(mat3, size, size);
return NULL;
}
This is my serial code: To use this pass in the same sized row and column (see my useage below)
#include <stdio.h>
#include <stdlib.h> // rand(), srand()
#include <unistd.h>
#include <time.h>
// Matrix multiply code
// **** Time struct **** //
struct timespec time1, time2, diffTime;
// Prototypes
struct timespec timespecDifference(struct timespec start, struct timespec end); // For timing
double** matrixMultiply(double** matrix1, double** matrix2, double** result, int nRows, int nCols);
double** transMatrixMultiply(double** matrix1, double** matrix2, double** result, int nRows, int nCols);
double** reserveMatrix(int nRows, int nCols);
double matrixProduct(double** mat1, double** mat2, int nRows, int nCols);
void printMat(double** mat1, int rows, int cols);
// Begin main
int main(int argc, char *argv[])
{
int rows = atoi(argv[1]);
int cols = atoi(argv[2]);
// Declare the ARRAYS and populate them
double** arr1 = reserveMatrix(rows, cols);
double** arr2 = reserveMatrix(rows, cols);
double** arr3 = reserveMatrix(rows, cols);
double** arr4 = reserveMatrix(rows, cols);
double prod1 = matrixProduct(arr1, arr2, rows, cols);
//Start Clock
clock_gettime(CLOCK_MONOTONIC, &time1);
arr3 = matrixMultiply(arr1, arr2, arr3, rows, cols);
// Stop timer and find time taken
clock_gettime(CLOCK_MONOTONIC, &time2);
diffTime = timespecDifference(time1, time2);
double cpuTimeUsed = ((double)diffTime.tv_sec + (double)diffTime.tv_nsec / 1000000000.0);
//Print Time
printf("Matrix Multiply, %d, %lf\n", rows, cpuTimeUsed);
// Print input matrix values. Used to test that matrix multiply works - it does
// Perform a transposition of matrix 2
for (int r = 0; r < rows; ++r) {
for (int c = r + 1; c < cols; ++c) {
double val = arr2[r][c];
arr2[r][c] = arr2[c][r];
arr2[c][r] = val;
}
}
// Run matrix multiply again on the newly transposed data.
//Start Clock
clock_gettime(CLOCK_MONOTONIC, &time1);
arr4 = transMatrixMultiply(arr1, arr2, arr4, rows, cols);
// Stop timer and find time taken
clock_gettime(CLOCK_MONOTONIC, &time2);
diffTime = timespecDifference(time1, time2);
cpuTimeUsed = ((double)diffTime.tv_sec + (double)diffTime.tv_nsec / 1000000000.0);
//Print Time
printf("Trans Matrix Multiply, %d, %lf\n", rows, cpuTimeUsed);
//double prod2 = matrixProduct(arr3, arr4, rows, cols);
//printf("The matrix product of m3 and m4 is: %f\n", prod2);
//printMat(mat3, rows, cols);
return 0;
}
// Struct Timer
struct timespec timespecDifference(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec - start.tv_nsec) < 0) {
temp.tv_sec = end.tv_sec - start.tv_sec - 1;
temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
}
else {
temp.tv_sec = end.tv_sec - start.tv_sec;
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
return temp;
}
// standard matrix multiply
double** matrixMultiply(double** matrix1, double** matrix2, double** result, int nRows, int nCols) {
for (int r = 0; r < nRows; ++r) {
for (int c = 0; c < nCols; ++c) {
result[r][c] = 0.0;
for (int i = 0; i < nRows; ++i) {
result[r][c] += matrix1[r][i] * matrix2[i][c];
}
}
}
return result;
}
// Transpose matrix multiply
double** transMatrixMultiply(double** matrix1, double** matrix2, double** result, int nRows, int nCols) {
for (int c = 0; c < nCols; ++c) {
for (int r = 0; r < nRows; ++r) {
result[c][r] = 0.0;
for (int i = 0; i < nCols; ++i) {
result[c][r] += matrix1[c][i] * matrix2[r][i];
}
}
}
return result;
}
// Reserve data function. Reserves and populates array data
double** reserveMatrix(int nRows, int nCols) {
double** matrix1 = (double**)malloc(nRows * sizeof(double*));
matrix1[0] = (double*)malloc(nRows * nCols * sizeof(double));
// Assign row pointers to "segment" out the data
for (int r = 1; r < nRows; ++r) {
matrix1[r] = &(matrix1[0][r * nCols]);
}
// Give values to the array
for(int i = 0; i < nRows * nCols; i++) {
matrix1[0][i] = i;
}
return matrix1;
}
// Check that matrix1 and matrix2 are the same
double matrixProduct(double** mat1, double** mat2, int nRows, int nCols) {
double sum = 0.0;
for(int i = 0; i < nRows * nCols; i++) {
sum += (mat1[0][i] - mat2[0][i]) * (mat1[0][i] - mat2[0][i]);
//printf("matrix product pos: %i, sum: %f\n", i, sum);
}
return sum;
}
// Print matrix function
void printMat(double** mat1, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
printf("%f, ", mat1[i][j]);
}
printf("\n");
}
printf("End of array print\n");
}
Here is the linux output of me compiling and running this code. At matrix size 1200 x 1200 the run time differences are not that pronounced, but the serial code ends up being significantly faster than the parallel at sizes above 1500 x 1500:
MYPC:~/Projects/matrixMultiply/phase3$ gcc matrixMult.c -o MM
MYPC:~/Projects/matrixMultiply/phase3$ gcc pMatMult.c -lpthread -o PMM
MYPC:~/Projects/matrixMultiply/phase3$ ./MM 1200 1200
Matrix Multiply, 1200, 25.487388
Trans Matrix Multiply, 1200, 16.452777
MYPC:~/Projects/matrixMultiply/phase3$ ./PMM 1200 2
Pthread Matrix Multiply, 1200, 2, 22.495115
MYPC:~/Projects/matrixMultiply/phase3$ ./PMM 1200 4
Pthread Matrix Multiply, 1200, 4, 22.181686
The sections in bold contain the meaningful output. It reads
name of the process
matrix size
number of threads spawned (in pThread program only)
run time
Any help would be appreciated. I will be instantly replying to questions for the next 2 hours.
The solution was to terminate extra processes that were running on my ubuntu machine. The code worked perfectly fine as a few users pointed out. Killing all other processes on the machine, then running my parallel code provided the expected speedups.
I am not sure of the precise technical reason this is going on other than the machine wasn't prioritizing my program when it had others running, resulting in slower times.
I have been given a school assignment in C to create a program that multiplies matrices. I will list assignment constraints below so people don't respond with questions as to why I am doing things this way.
Constraints from instructor:
Cannot use square brackets anywhere in code (use pointer notation instead)
Matrices A, B, C must be single integer pointer variables (int *A, *B, *C)
Can only use main function and those specified by header
Must compile with "gcc -ansi -Wall -o p2 p2.c"
I have not implemented the matrix multiplication function yet, as the issues I am having relate to either file reading or memory allocation.
The specific problem I am having is when I allocate space to the pointer matrix with either malloc OR calloc (tried both), the program inserts 33 in some places in the output instead of 0. I've tried everything at this point and am convinced my knowledge of pointers is fundamentally flawed.
p2.h (given by instructor)
#include <stdio.h>
#include <stdlib.h>
/* This function reads m, n, and p from the datafile.
It then allocates the correct amount of memory required for matrices
A, B, and C.
Then matrices A and B are filled from the datafile.
The values for m, n, and p are passed by reference, and are
thus filled in by this function
PARAMETERS in order are:
int ** matrix A
int ** matrix B
int ** matrix C
int * m The number of rows in matrix A
int * n The number of columns in matrix A and
The number of rows in matrix B
int * p The number of columns in matrix B
char * The name of the datafile, from the command line
*/
void read_matrices(int **, int **, int **, int *, int *, int *, char *);
/* This function prints a matrix. Rows and columns should be preserved.
PARAMETERS in order are:
int * The matrix to print
int The number of rows in the matrix
int The number of columns in the matrix
*/
void print_matrix(int *, int, int);
/* The two matrices A and B are multiplied, and matrix C contains the
result.
PARAMETERS in order are:
int * Matrix A
int * Matrix B
int * Matrix C
int m
int n
int p
*/
void mult_matrices(int *, int *, int *, int, int, int);
p2.c (sorry for the mess a lot of debugging went on)
#include <stdio.h>
#include <stdlib.h>
#include "./p2.h"
/* constants for testing */
#define cM 3
#define cN 2
#define cP 5
int main(int argc, char **argv) {
if (argc < 2) {
printf("Must include an argument.\n");
exit(1);
}
char *path = *(argv + 1);
int *m = (int *) malloc(sizeof(int));
int *n = (int *) malloc(sizeof(int));
int *p = (int *) malloc(sizeof(int));
*m = cM; *n = cN; *p = cP;
int i,j; /* loop counters */
/* allocate space for 2d pointer arrays */
int **A = NULL;
A = (int **) malloc(*m * sizeof(int *));
for (i = 0; i < *m; i++) {
*(A+i) = (int *) malloc(*n * sizeof(int));
}
int **B = NULL;
B = (int **) malloc(*n * sizeof(int *));
for (i = 0; i < *n; i++) {
*(B+i) = (int *) malloc(*p * sizeof(int));
}
int **C = NULL;
C = (int **) malloc(*m * sizeof(int *));
for (i = 0; i < *m; i++) {
*(C+i) = (int *) malloc(*p * sizeof(int));
}
/* write data to A */
for (i = 0; i < *m; i++) {
for (j = 0; j < *n; j++) {
*(*(A+i)+j) = 0;
}
}
/* testing a */
for (i = 0; i < *m; i++) {
for (j = 0; j < *n; j++) {
if (*(*(A+i)+j) != 0) {
printf("[x]");
} else {
printf("[0]");
}
}
}
printf("\n");
/* write data to B */
for (i = 0; i < *n; i++) {
for (j = 0; j < *p; j++) {
*(*(B+i)+j) = 0;
}
}
/* testing b */
for (i = 0; i < *n; i++) {
for (j = 0; j < *p; j++) {
if (*(*(B+i)+j) != 0) {
printf("[x]");
} else {
printf("[0]");
}
}
}
printf("\n");
/* write data to C */
for (i = 0; i < *m; i++) {
for (j = 0; j < *p; j++) {
*(*(C+i)+j) = 0;
}
}
/* testing c */
for (i = 0; i < *m; i++) {
for (j = 0; j < *p; j++) {
if (*(*(C+i)+j) != 0) {
printf("[x]");
} else {
printf("[0]");
}
}
}
printf("\n");
printf("Matrix A: \n");
print_matrix(*A, *m, *n);
printf("Matrix B: \n");
print_matrix(*B, *n, *p);
printf("Matrix C: \n");
print_matrix(*C, *m, *p);
return 0;
}
void read_matrices(int **A, int **B, int **C, int *m, int *n, int *p, char *path) {
FILE *fptr;
fptr = fopen(path, "r");
if (fptr == NULL) {
printf("Cannot open file: ./p2 [filename].txt\n");
exit(1);
}
/* get first 3 numbers from file, set m,n,p */
*m = fgetc(fptr);
fgetc(fptr);
*n = fgetc(fptr);
fgetc(fptr);
*p = fgetc(fptr);
fgetc(fptr);
/* read first matrix */
/* 1) calculate matrix size m x n
* 2) loop through malloc'ed matrix
* 3) each loop, insert char in loc
* 4) if next char NOT 10/32, add nextchar*10 to value in loc
*/
char cur;
while ( (cur = fgetc(fptr)) != EOF ) {
if (cur == 10 || cur == 32) {
/* do nothing :) */
} else {
*m = cur;
*n = cur;
*p = cur;
break;
}
}
printf("m: %c\n", *m);
printf("n: %c\n", *n);
printf("p: %c\n", *p);
printf("next: %c\n", fgetc(fptr));
fclose(fptr);
}
void print_matrix(int *X, int rows, int cols) {
int r, c;
int k = 0;
for (r = 0; r < rows; r++) {
for (c = 0; c < cols; c++) {
printf("\t%d", *(X+k));
k++;
}
printf("\n");
}
}
void mult_matrices(int *A, int *B, int *C, int m, int n, int p) {
}
d2.txt (data file)
3
2
4
1 2
3 4
5 6
7 8 9 10
11 12 13 14
Output: ./p2 d2.txt
[0][0][0][0][0][0]
[0][0][0][0][0][0][0][0][0][0]
[0][0][0][0][0][0][0][0][0][0][0][0][0][0][0]
Matrix A:
0 0
0 0
0 0
Matrix B:
0 0 0 0 0
0 33 0 0 0
Matrix C:
0 0 0 0 0
0 33 0 0 0
0 0 0 0 33
If you notice, I have some debug code that checks whether or not the current item in the array is 0. It seems to indicate that they are all 0, making me think it is a printing problem, but I am even more lost on what would be causing that. The ascii code for 33 is an exclamation point, but I am not sure what relevance it has.
Based on the function signatures you're supposed to use, you need to implement your 2D arrays as 1D with the correct index math. This will result in all memory being laid out contiguously, which is not at all guaranteed with the way you're allocating memory now (two calls to malloc for each matrix). For example:
#include <stdio.h>
#include <stdlib.h>
void print_matrix(int* A, int rows, int cols)
{
for (int r=0; r<rows; r++)
{
for (int c=0; c<cols; c++)
{
// If you want to treat A as a 2D matrix, this is where we have to do a bit of
// fancy index math to give you what double bracket notation [][] does for you
// r * cols gives you the index of the right row
// + c give you the column offset in that row
// add that offset to A then dereference
printf("%d\t", *(A + (r * cols + c)));
}
printf("\n");
}
}
int main(void)
{
// matrix A is supposed to be m by n
int* A;
// read these from file, or where ever they're supposed to come from
int m = 2;
int n = 10;
// Allocate the memory in one chunk. This makes the memory all contiguous, just the
// same as if you had done A[m][n]. However, the double call malloc for each int**
// matrix probably will not give you contiguous memory for the entire matrix. Each
// call to malloc is independent.
A = malloc(m * n * sizeof(int)); // or sizeof(*A) would be even better
if (A == NULL)
{
// handle error
}
// We can initialize values for A at this point, still not needing to care about
// rows or columns
for (int i=0; i<m*n; i++)
{
*(A + i) = i; // using i for a better visual when we print
}
print_matrix(A, m, n);
free(A);
return 0;
}
Demo
You are ovecomplicating simple things. Use pointers to arrays and allocate 2D array.
Use the correct type of your size variables.
Try to avoid side effects. Use parameters and function return values.
//this function is for the test purposes only
int writefile(const char *fn)
{
FILE *fo = fopen(fn, "w");
fprintf(fo,
"3\n"
"2\n"
"4\n"
"1 2\n"
"3 4\n"
"5 6\n"
"7 8 9 10\n"
"11 12 13 14\n");
fclose(fo);
}
void *allocIntMatrix(size_t rows, size_t cols)
{
int (*m)[cols] = malloc(rows * sizeof(*m));
return m;
}
void printIntMatrix(size_t rows, size_t cols, int (*m)[cols])
{
for(size_t row = 0; row < rows; row++)
{
for(size_t col = 0; col < cols; col++)
{
printf("[%5d] ", m[row][col]);
}
printf("\n");
}
}
int readData(FILE *fi, size_t rows, size_t cols, int (*m)[cols])
{
for(size_t row = 0; row < rows; row++)
{
for(size_t col = 0; col < cols; col++)
{
fscanf(fi, "%d", &m[row][col]);
}
}
return 0;
}
int main(int argc, char **argv)
{
size_t n,m,p;
writefile("a.aaa");
FILE *fi = fopen("a.aaa", "r");
fscanf(fi, "%zu", &m);
fscanf(fi, "%zu", &n);
fscanf(fi, "%zu", &p);
printf("n = %zu, m = %zu, p = %zu\n", n, m, p);
int (*A)[n] = allocIntMatrix(m, n);
int (*B)[p] = allocIntMatrix(n, p);
readData(fi, m, n, A);
readData(fi, n, p, B);
fclose(fi);
printIntMatrix(m, n, A);
printf("\n");
printIntMatrix(n, p, B);
return 0;
}
https://godbolt.org/z/adoEx1r4f
You need to check for errors (file, memory etc). I skipped it for the sake of simplicity of the example.
So I created a program that calculates matrix multiplication sequentially then records the time, then calculates matrix multiplication using any number of pthreads entered in the command line numberOfThreads. But regardless of how many threads I enter it still giving me the same time each time. I'm currently on i7 Macbook so I'm not sure if thats why adding more threads doesn't optimize the calculations or If I just don't have the correct program.
Heres the code:
/*Program to generate two square 2D arrays of random doubles and
time their multiplication.
Program utlizies pthreads to efficiently perform matrix Multiplication
Compile by: gcc -o mmult -O3 mmultHW6.c -lpthread
Run by: ./mmult 1000 2
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>
#define TRUE 1
#define FALSE 0
#define BOOL int
typedef struct {
int threadId;
int start_row;
int end_row;
int start_col;
int end_col;
} BLOCK;
// function prototypes
double ** allocate2DArray(int rows, int columns);
void print2DArray(int rows, int columns, double ** array2D);
void generateRandom2DArray(int rows, int columns,
double min, double max, double ** random2DArray);
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
double tolerance);
void matrixMultiplication(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product);
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product);
void * threadMMult(void * rank);
int numberOfThreads;
double ** A;
double ** B;
double ** C;
double ** C_alt;
int rows, columns;
int main(int argc, char ** argv) {
long i, startTime, endTime,seqTime, paralellTime;
BLOCK * blocksOfWork;
int errorCode;
double tolerence;
pthread_t * threadHandles;
if (argc !=3) {
printf("Usage: %s <# of rows><# of Threads>\n", argv[0]);
exit(-1);
} // end if
sscanf(argv[1], "%d", &rows);
sscanf(argv[1], "%d", &numberOfThreads);
columns = rows;
// seed the random number generator
srand( time(NULL) );
A = allocate2DArray(rows, columns);
B = allocate2DArray(rows, columns);
C = allocate2DArray(rows, columns);
C_alt = allocate2DArray(rows, columns);
generateRandom2DArray(rows, columns, -1.0, +1.0, A);
generateRandom2DArray(rows, columns, -1.0, +1.0, B);
printf("after initializing matrices\n");
time(&startTime);
matrixMultiplicationAlt(rows, columns, A, rows, columns, B, C_alt);
time(&endTime);
seqTime = endTime-startTime;
printf("Matrix Multiplication Alt. time = %ld\n",seqTime);
time(&startTime);
threadHandles = (pthread_t *) malloc(numberOfThreads*sizeof(pthread_t));
blocksOfWork = (BLOCK *) malloc(numberOfThreads*sizeof(BLOCK));
for(i=0; i < numberOfThreads; i++){
blocksOfWork[i].threadId = i;
blocksOfWork[i].start_row = i * rows/numberOfThreads;
if (i == numberOfThreads -1){
blocksOfWork[i].end_row = rows - 1;
}
else{
blocksOfWork[i].end_row = (i+1)*rows/numberOfThreads -1;
}
}
for (i=0; i < numberOfThreads; i++) {
if (errorCode = pthread_create(&threadHandles[i], NULL, threadMMult,
&blocksOfWork[i]) != 0) {
printf("pthread %d failed to be created with error code %d\n", i, errorCode);
} // end if
} // end for
for (i=0; i < numberOfThreads; i++) {
if (errorCode = pthread_join(threadHandles[i], (void **) NULL) != 0) {
printf("pthread %d failed to be joined with error code %d\n", i, errorCode);
} // end if
} // end for
time(&endTime);
paralellTime = endTime-startTime;
printf("Parallel Matrix Multiplication time = %ld\n",paralellTime);
if (equal2DArrays(rows, columns, C, C_alt, 0.000001)) {
printf("Arrays match with tolerance of %.000001f\n", 0.000001);
} else {
printf("Arrays DON'T match with tolerance of %.000001f\n", 0.000001);
} // end if
return 0;
} // end main
void * threadMMult(void * arg){
BLOCK * block = (BLOCK *) arg;
int threadId = block->threadId;
int startRow = block->start_row;
int endRow = block->end_row;
int i, j, k, sum;
for(i=startRow; i<=endRow;i++){
for(j = 0; j<rows;j++){
C[i][j] = 0;
for(k=0; k<rows ; k++){
C[i][j] += A[i][k]*B[k][j];
//printf("%lu - C[%d][%d] += A[%d][%d] * B[%d][%d]\n",
//pthread_self(), i,j,i,k,k,j);
}
}
return 0;
}
}
//C[i][j] += A[i][k] * B_transpose[j][k];
/*******************************************************************
* Function matrixMultiplicationAlt passed two matrices and returns
* their product.
********************************************************************/
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product) {
int i, j, k;
double ** array2_transpose;
if (columns1 != rows2) {
printf("Matrices cannot be multiplied -- incompatible dimensions!\n");
exit(-1);
} // end if
// Transposes array2
array2_transpose = allocate2DArray(columns2, rows2);
for (i=0; i < rows2; i++) {
for (j=0; j < columns2; j++) {
array2_transpose[j][i] = array2[i][j];
} /* end for (j */
} /* end for (i */
// Matrix Multiplication uses array1 and array2_transpose
for (i=0; i < rows1; i++) {
for (j=0; j < columns2; j++) {
C_alt[i][j] = 0.0;
for (k=0; k < columns1; k++) {
C_alt[i][j] += array1[i][k]*array2_transpose[j][k];
} /* end for (k */
} /* end for (j */
} /* end for (i */
} // end matrixMultiplicationAlt
/*******************************************************************
* Function allocate2DArray dynamically allocates a 2D array of
* size rows x columns, and returns it.
********************************************************************/
double ** allocate2DArray(int rows, int columns) {
double ** local2DArray;
int r;
local2DArray = (double **) malloc(sizeof(double *)*rows);
for (r=0; r < rows; r++) {
local2DArray[r] = (double *) malloc(sizeof(double)*columns);
} // end for
return local2DArray;
} // end allocate2DArray
/*******************************************************************
* Function generateRandom2DArray is passed the # rows, the # columns,
* min. value, max. value, and returns random2DArray containing
* randomly generated doubles.
********************************************************************/
void generateRandom2DArray(int rows, int columns,
double min, double max, double ** random2DArray) {
int r, c;
double range, div;
for (r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
range = max - min;
div = RAND_MAX / range;
random2DArray[r][c] = min + (rand() / div);
} // end for (c...
} // end for (r...
} // end generateRandom2DArray
/*******************************************************************
* Function print2DArray is passed the # rows, # columns, and the
* array2D. It prints the 2D array to the screen.
********************************************************************/
void print2DArray(int rows, int columns, double ** array2D) {
int r, c;
for(r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
printf("%10.5lf", array2D[r][c]);
} // end for (c...
printf("\n");
} // end for(r...
} // end print2DArray
/*******************************************************************
* Function equal2DArrays is passed the # rows, # columns, two
* array2Ds, and tolerance. It returns TRUE if corresponding array
* elements are equal within the specified tolerance; otherwise it
* returns FALSE.
********************************************************************/
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
double tolerance) {
int r, c;
for(r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
if (fabs(array1[r][c] - array2[r][c]) > tolerance) {
return FALSE;
} // end if
} // end for (c...
} // end for(r...
return TRUE;
} // end equal2DArray
This is a bit suspicious:
sscanf(argv[1], "%d", &rows);
sscanf(argv[1], "%d", &numberOfThreads);
I am not sure where you are with the learning C thing, but “man 3 getopt” should show a much better way to pass runtime parameters into your program than accidentally re-using argv[1]....
The second problem that you have created is using two different mechanisms for performing matrix multiplication; one for the sequential version, and a separate one for the parallel one. Logically, you should be able to have one, fully parameterized function to perform the operation, then the fact that M parallel threads are invoking it with different data is transparent to the function itself. Since you didn’t, you left a question mark: is the lack of scale because one of your matrix multiply functions doesn’t work correctly?
Looking at your code, I have little faith that it does; you have utilized a barrage of mechanisms to implement this. The core is:
Mult(double *A, int Ar, int Ac, double *B, int Br, int Bc, double C, int Cr, int Cc) {
/ multiply C = A * B */
}
and whether it is N threads converging on a solution, or one thread trudging through the solution, they should be able to execute the same code.
I'm having an issue with compling the code.
I keep getting these errors.
"C: 194 warning passing argument 3 of 'matrix_column_subtract' makes pointer from integer without a cast"
"C: 12 note: expected 'double**' but argument is type 'int'
"C: 194 error too few arguments to function 'matrix_column_subtract'
I think I know what is going on I'm calling matrix_column_multiply is a void and I need to be calling it a pointer pointer I think and I don't know how to chage that. If anyone has some idea on how I can get this to compile that would be highly appreciated!!
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define DEBUG 0
#define MAX_ITER 10000
double *eigen (int n, double **A);
void qr_decomp (int n, double **A, double **Q, double **R);
void matrix_copy_column(double **msrc, int col1, double **mdst,int col2, int rows);
void matrix_column_subtract(double **m1, int c1, double **m2, int c2, int rows);
void matrix_column_multiply(double **m, int c, double k, int rows);
int main() {
int i, n = 126;
double *eig, **Am;
FILE *BinInp, *TxtOut;
/* Input Code: Reads bv, Am in binary form from CGrad.bin */
if ( (BinInp = fopen("CGrad.bin","r")) == NULL ) {
fprintf(stderr, "Cannot open matrix binary file INPUT... exiting\n");
exit(-1);
}
Am = (double**)malloc (n*sizeof(double*));
Am[0] = (double*)malloc (n*n*sizeof(double));
for (i = 1; i < n; ++i) {
Am[i] = Am[0] + i*n;
}
for (i = 0; i < n; i++) {
if (i==0) { /* Read one extra line that is discarded (bv is still in bin file) */
if (!fread(Am[i], sizeof(double), n, BinInp)) {
fprintf(stderr, "Cannot read row Am[%03d] of matrix... exiting\n", i+1);
exit(1);
}
}
if (!fread(Am[i], sizeof(double), n, BinInp)) {
fprintf(stderr, "Cannot read row Am[%03d] of matrix... exiting\n", i+1);
exit(1);
}
}
if (fclose(BinInp) == EOF) {
fprintf(stderr, "Cannot close matrix binary file INPUT... exiting\n");
exit(-1);
}
/* COMPUTE EIGENVALUES HERE USING FUNCTIONS. RETURN EIGENVALUES (AS 1D VECTOR) TO eig. */
if (DEBUG) printf ("Calling eigen\n");
eig = eigen (n, Am);
/* Output Code: Writes eig in text form to Eigs.txt */
if ( (TxtOut = fopen("Eigs.txt", "w")) == NULL ) {
fprintf(stderr, "Cannot open matrix text file OUTPUT... exiting\n");
exit(-1);
}
for (i = 0; i < n; i++) {
fprintf (TxtOut, "%18.14e ", eig[i]);
}
if (fclose(TxtOut) == EOF) {
fprintf(stderr, "Cannot close matrix text file INPUT... exiting\n");
exit(-1);
}
return 0;
}
double* eigen (int n, double **Acur)
{
double err = 1, eps = 1e-2;
double ndenom, nnumer, temp;
double *eig, **Anex, **Qsub, **Rsub;
int i, j, k, iters = 1;
/* Malloc memory for the three matricies */
Anex = malloc (n*sizeof(double*));
Anex[0] = malloc (n*n*sizeof(double));
for (i = 1; i < n; ++i) {
Anex[i] = Anex[0] + i*n;
}
Qsub = malloc (n*sizeof(double*));
Qsub[0] = malloc (n*n*sizeof(double));
for (i = 1; i < n; ++i) {
Qsub[i] = Qsub[0] + i*n;
}
Rsub = malloc (n*sizeof(double*));
Rsub[0] = malloc (n*n*sizeof(double));
for (i = 1; i < n; ++i) {
Rsub[i] = Rsub[0] + i*n;
}
/* Malloc memory for the return eig vector */
eig = malloc (n*sizeof(double));
for (i = 0; i < n; i++) {
eig[i] = 0;
}
/* Enter main iteration loop for eigenvalues */
while (err > eps && iters < MAX_ITER) {
/* QR Decompose Acur then find next iterate value in Anex */
qr_decomp (n, Acur, Qsub, Rsub);
// FIND NEXT ITERATE VALUE, PUT IN Anex.
/* Determine relative error change, reset "old" iterate value. */
ndenom = 0;
nnumer = 0;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
temp = Anex[i][j]-Acur[i][j];
ndenom += temp*temp;
nnumer += Anex[i][j]*Anex[i][j];
Acur[i][j] = Anex[i][j];
}
}
err = sqrt(ndenom)/sqrt(nnumer);
/* Increment the iteration count and report error */
if (iters % 25 == 0) {
printf ("Error at end of iteration %05d = %14.10f %%\n", iters, 100*err);
}
++iters;
}
printf ("Error at end of iteration %05d = %14.10f\nCONVERGED.\n", iters-1, err);
if (iters == MAX_ITER) {
printf ("WARNING: MAX_ITER iterations reached!...\n");
}
/* Copy diagonal entries of Acur into eig for return to main */
for (i=0; i<n; i++) {
eig[i] = Acur[i][i];
}
return eig;
}
void qr_decomp (int n, double **Adec, double **myQ, double **myR)
{
int i, j, k; /* Loop Variables: this is all you should need! */
double **T, **S;
double r;
T = malloc (n*sizeof(double*));
T[0] = malloc (n*n*sizeof(double));
for (i = 1; i < n; ++i) {
T[i] = T[0] + i*n;
}
S = malloc (n*sizeof(double*));
S[0] = malloc (n*n*sizeof(double));
for (i = 1; i < n; ++i) {
S[i] = S[0] + i*n;
}
/* Main loop for decomposition */
for (i=0; i<n; i++) {
/* Column i of Q is initially column i of A */
matrix_copy_column(Adec,i,myQ,i,n);
/* For j < i-1, Perform the dot product between the j row of Q and the
i row of A to determine the R(j,i) value, then insure the Q i column
is orthogonal to all other Q columns by subtracting existing Q columns
from it. */
for (j = 0; j < i; j++) {
//r[j,i] = Qj^T * Ui
matrix_copy_column(myQ,j,T,0,n);
matrix_copy_column(Adec,i,S,0,n);
for (k=0; k<n; k++) {
r += T[k][0] * S[k][0];
}
/* Determine the R diagonal as the magnitude of the Q column, then
normalize the Q column (make it a unit vector). */
//Qi = Ui
myR[j][i] = r;
// Something wrong here.
// There is one parameter missing, as matrix_column_subtract needs 5 parameters and
// only 4 are given.
// Also, matrix_column_multiply is defined as returning a void, whereas the 3rd parameter
// of matrix_column_subtract should be a double **
matrix_column_subtract(myQ,i,matrix_column_multiply(T,0,r,n),j);
}
}
}
/* Copies a matrix column from msrc at column col1 to mdst at column col2 */
void matrix_copy_column(double **msrc, int col1, double **mdst,int col2, int rows) {
int i = 0;
for (i=0; i<rows; i++) {
mdst[i][col2] = msrc[i][col1];
}
}
/* Subtracts m2's column c2 from m1's column c1 */
void matrix_column_subtract(double **m1, int c1, double **m2, int c2, int rows) {
int i = 0;
for (i=0; i<rows; i++) {
m1[i][c1] -= m2[i][c2];
}
/*return m1;*/
}
void matrix_column_multiply(double **m, int c, double k, int rows) {
int i = 0;
for (i=0; i<rows; i++) {
m[i][c] *= k;
}
/*return m;*/
}
A problem is here.
matrix_column_subtract(myQ,i,matrix_column_multiply(T,0,r,n),j);
The relevant function signatures are:
void matrix_column_subtract(double **m1, int c1, double **m2, int c2, int rows);
void matrix_column_multiply(double **m, int c, double k, int rows);
Note that matrix_column_multiply returns void, but you're passing it in as if it were double **. I'm guessing the compiler is desperately trying to make it work and returned some sort of garbage integer.
These functions return void because they do their work directly on the matrix in question. matrix_column_multiply alters m. matrix_column_subtract alters m1.
To make it work, call matrix_column_multiply on T then pass T (now modified by the multiplication) into matrix_column_subtract.
matrix_column_multiply(T, 0, r, n);
matrix_column_subtract(myQ, i, T, j, ...rows...);
You're still missing the fifth argument, rows. I'm going to guess that's n, same as has been used for the rows in every other matrix_column_blah call.
I'm making a program which dynamically creating 2d array.but it's showing the error which I mentioned on the title. I'm using Visual Studio 2015.
// last.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdio.h>
#include <time.h>
#include "stdlib.h"
double selectionSort(int * number, int number_count);
void print2d(int ** array, int rows, int cols);
void twodarray();
void main(int argc, char* argv[])
{
int num_count = 10000;
int num[10000];
for (int i = 0; i < num_count; i++)
{
num[i] = rand();
}
double sortTime = selectionSort(num, num_count);
printf("Total Runtime is: %.0f milliseconds. \n", sortTime * 1000);
twodarray();
getchar();
}
double selectionSort(int * number, int number_count)
{
clock_t start, end;
double duration;
int min;
start = clock();
for (int i = 0; i < number_count - 1; i++)
{
min = i;
for (int j = i + 1; j < number_count; j++)
{
if (number[min] > number[j])
{
min = j;
}
}
if (min != i)
{
int temp = number[min];
number[min] = number[i];
number[i] = temp;
}
}
end = clock();
return duration = (double)(end - start) / CLOCKS_PER_SEC;
}
void print2d(int ** array, int rows, int cols)
{
int i, j;
for (i = 0; i < rows; i++)
{
for (j = 0, j < cols; j++;)
{
printf("%10d ", array[i][j]);
}
puts("");
}
}
void twodarray()
{
int **twod;
int rows = 10;
twod = malloc(rows * sizeof(int));
int i,cols = 10;
for (i = 0; i < rows; i++)
{
twod[i] = malloc(cols*sizeof(int));
print2d(twod, rows, cols);
}
for (i = 0; rows; i++)
{
free(twod[i]);
free(twod);
}
}
In c++ you need to cast when assigining a void * pointer to another type of pointer. But in c++ you should not use malloc(), instead use
int **twod = new int *[rows];
If you didn't mean to write a c++ program, rename the file. Change the extension from .cpp to .c.
Your allocation is wrong too, as pointed out by #KeineLust here.
This is wrong:
int **twod;
int rows = 10;
twod = malloc(rows * sizeof(int));
You need to reserve space for n pointers to int, not for n ints, change to
twod = malloc(rows * sizeof(int *));
And here:
for (j = 0, j < cols; j++;)
^ ^
Use a semicolon instead of a comma and also remove the last semicolon.
Another problem:
for (i = 0; rows; i++)
{
free(twod[i]);
free(twod); /* Don't free twod in the loop, one malloc -> one free */
}
And as pointed out by Nicat and Iharob, it seems that you are mixing C and C++, use the proper extension (.c)