parallel and sequential dotproduct programs different result - c

I've wrote this two versions of code for computing a dotproduct operation on two arrays. each length is 256. here is very simple sequential code:
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char* argv[]){
double sum;
double a[256], b[256];
int n = 256, i;
for (i=0; i<n; i++){
a[i] = i * 0.5;
b[i] = i * 2.0;
}
sum = 0;
for (i=1; i<=n; i++){
sum = sum + a[i]*b[i];
}
printf ("sum = %f\n", sum);
}//main
answer is 5559680
but the parallel code:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define NUMTHRDS 4
double sum;
double a[256], b[256];
int status;
int n=256;
pthread_t thds[NUMTHRDS];
pthread_mutex_t mutexsum;
void* dotprod(void *arg){
int myid, i, my_first, my_last;
double sum_local;
myid = (int)arg;
my_first = myid * n/NUMTHRDS;
my_last = (myid + 1) * n/NUMTHRDS;
sum_local = 0;
for (i=my_first; i<=my_last; i++){
sum_local = sum_local + a[i]*b[i];
}
pthread_mutex_lock(&mutexsum);
sum = sum + sum_local;
pthread_mutex_unlock(&mutexsum);
pthread_exit((void*)0);
}//dotprod
int main(int argc, char* argv[]){
int i;
pthread_attr_t attr;
for (i=0; i<n; i++){
a[i] = i * 0.5;
b[i] = i * 2.0;
}
pthread_mutex_init(&mutexsum, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
for (i=0; i<NUMTHRDS; i++){
pthread_create(&thds[i], &attr, dotprod, (void*)i);
}
pthread_attr_destroy(&attr);
for(i=0; i<NUMTHRDS; i++){
pthread_join(thds[i], (void **)&status);
}
printf("sum = %f \n", sum);
pthread_mutex_destroy(&mutexsum);
pthread_exit(NULL);
return 0;
}//main
answer is 5617024
i totally confused what is this difference for?

off by one error.
for (i=1; i<=n; i++){
for (i=0; i<n; i++) {
and
for (i=my_first; i<=my_last; i++){
for (i=my_first; i<my_last; i++){
In the first program, you are adding in a[256] and b[256], which is off the end of the array.
most likely those values were 0, so you got the right answer.
In the second program, you are counting some parts of the array twice: 64, 128, 192, and
still adding in index 256.
Always check the boundary conditions of your loops, especially with array accesses.

Related

Why do I get different values in debug mode?

My program supposed to calculate the matrix product (multiplication) of 2 given matrices using multithreading - each thread calculates one column.
The problem is - when I run it from the terminal, I'm getting few zeros columns. I used debugger to see where the problem is - but at the debugger it works fine every time! I can't figure out where the problem is
How is it possible that I'm getting different values when debugging vs regular execution?
I tried some "simple solutions" from online searching:
reset the linux virtual machine
reset VSCode
and i don't getting any errors or memory leaks but i have no idea what to do further
my code:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <errno.h>
#include <sys/stat.h>
#include <pthread.h>
int MATRIX_DIM=3; //default
int **RESULT_MAT;
struct calcCol{
int **matA;
int **matB;
int **matC;
int dim;
int i;
};
int ijMult(int **matA, int **matB,int dim, int i, int j);
void* colMatCalc(void* s);
void printMatrix(int **mat);
void getMatrix(int **mat);
void freeMatrices(int **matA, int **matB, int **matC);
void* printThreadId(void* arg){
int i = *(int*)arg;
printf("Thread id: %d \n", i);
pthread_exit(NULL);
}
int main(){
pthread_t tid[MATRIX_DIM];
int args[MATRIX_DIM];
pthread_attr_t attr;
pthread_attr_init(&attr);
printf("Enter enter MATRIX_DIM (number between 1 and 10) \n");
scanf("%d",&MATRIX_DIM);
if(MATRIX_DIM>10 || MATRIX_DIM<1)
MATRIX_DIM=3; //default value
//memory allocation for matrices
int **matA = (int **)malloc(MATRIX_DIM * sizeof(int*));
for(int i = 0; i < MATRIX_DIM; i++) matA[i] = (int *)malloc(MATRIX_DIM * sizeof(int));
int **matB = (int **)malloc(MATRIX_DIM * sizeof(int*));
for(int i = 0; i < MATRIX_DIM; i++) matB[i] = (int *)malloc(MATRIX_DIM * sizeof(int));
RESULT_MAT = (int **)malloc(MATRIX_DIM * sizeof(int*));
for(int i = 0; i < MATRIX_DIM; i++) RESULT_MAT[i] = (int *)malloc(MATRIX_DIM * sizeof(int));
struct calcCol s;
s.matA = matA;
s.matB = matB;
s.matC=RESULT_MAT;
s.dim=MATRIX_DIM;
s.i=0;
printf("Enter elements of first matrix \n");
getMatrix(matA);
printf("Enter elements of second matrix \n");
getMatrix(matB);
for(int i=0 ; i<MATRIX_DIM ; i++)
{
args[i]=i;
s.i=i;
pthread_create(&tid[i], &attr, colMatCalc, &s);
}
for(int i=0 ; i<MATRIX_DIM ; i++)
{
if(pthread_join(tid[i], NULL)!=0)
{
perror("pthread_join faild.");
exit(EXIT_FAILURE);
}
printf("Thread %d is terminated.\n", *(int*)(&args[i]));
}
printf("All threads are terminated!\n");
printf("Product of the matrices: \n");
printMatrix(RESULT_MAT);
freeMatrices(matA, matB, RESULT_MAT);
return 0;
}
//calculating (i,j) of the result matrix
int ijMult(int **matA, int **matB, int dim, int i, int j)
{
int sum = 0;
for(int k = 0 ; k < MATRIX_DIM ; k++)
sum = sum + (matA[i][k]*matB[k][j]);
return sum;
}
//calculating the 'i' column of the result matrix
void* colMatCalc(void* arg)
{
struct calcCol s = *(struct calcCol*)arg;
for(int k = 0 ; k < s.dim ; k++)
s.matC[k][s.i] = ijMult(s.matA, s.matB,s.dim, k, s.i);
return 0;
}
void printMatrix(int **mat)
{
for(int i = 0 ; i < MATRIX_DIM ; i++)
{
for(int j = 0 ; j < MATRIX_DIM ; j++)
{
printf("%d \t", mat[i][j]);
}
printf("\n");
}
}
void getMatrix(int **mat)
{
for(int i=0 ; i<MATRIX_DIM ; i++)
{
for(int j=0 ; j<MATRIX_DIM ; j++)
{
scanf("%d",&mat[i][j]);
}
}
}
void freeMatrices(int **matA, int **matB, int **matC)
{
for(int i=0 ; i < MATRIX_DIM ; i++)
{
free(matA[i]);
free(matB[i]);
free(RESULT_MAT[i]);
}
free(matA);
free(matB);
free(RESULT_MAT);
}
I know i don't need to paste the entire code, but i really don't know where is the tricky part...cause again- when i'm in debug mode it works fine.
Input: 3 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6
7 8 9 Expected output 30 36 42 66 81 96
102 126 150
Actual output: same but with zeros column instead (different columns each time)
Thank you.
The problem is with variable s which is shared across all the threads, that is
s.i=i;
When you update s in the loop, every thread will be pointing to latest contents of s.
What you can do is have different instance to each thread as below.
for(int i=0 ; i<MATRIX_DIM ; i++)
{
struct calcCol *s = malloc(sizeof(*s));
s->matA = matA;
s->matB = matB;
s->matC=RESULT_MAT;
s->dim=MATRIX_DIM;
s->i=0;
args[i]=i;
s->i=i;
pthread_create(&tid[i], &attr, colMatCalc, s);
}
With that when you update the content of *s it won't affect other thread's execution.
Make sure you free it once thread is finished its execution.

Using pthreads I get "Segmentation fault (core dumped)" only in big values

This is a program to make the (square) sum of an array using threads. It works fine if the array elements are up to about 2.000.000 but after that I get a "Segmentation fault (core dumped)" error. Could it be because I am using ubuntu in a virtual machine and I have allocated 4GB of RAM in it?
Thank you for your time!
#include <pthread.h>
#include <unistd.h>
#include <stdio.h>
#include <stdint.h>
int part = 0;
int local_elements = 0;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void* square_sum(void* arg);
int main()
{
int threads, total_elements;
int i;
void *loc_sum = NULL;
long long total_sum = 0;
printf("Give the number of threads: ");
scanf("%d", &threads);
/*-----Fixed big size array-----*/
total_elements = 2000000; // <--- If this value gets big I get that error
local_elements = total_elements/threads;
int element_array[total_elements];
for (int i=0; i<total_elements; i++) {
//Filling all the positions with 1s
element_array[i] = 1;
}
//Creating the threads
pthread_t newthread[threads];
for (int i=0; i<threads; i++) {
//The thread function gets the element array
pthread_create(&newthread[i], NULL, square_sum, (void *)element_array);
}
//Waiting for each thread to finish and creating the total_sum
for (int i=0; i<threads; i++) {
pthread_join(newthread[i], (void*) &loc_sum);
printf("Thread %d returned the local_sum: %d \n", i, (int)loc_sum);
total_sum += (int)loc_sum;
}
printf("\nThe total square sum of the array is: %lld\n", total_sum);
return 0;
}
void* square_sum(void* arg) {
intptr_t local_sum = 0;
int *element_array = (int *) arg;
//--- Start of critical section ---
pthread_mutex_lock(&mutex);
//Each thread computes its part
int thread_part = part++;
for (int i = thread_part*local_elements; i < (thread_part+1)*local_elements; i++) {
local_sum += element_array[i] * element_array[i];
//printf("Thread %d says -- element %d is: %d \n", thread_part, i, element_array[i]);
}
pthread_mutex_unlock(&mutex);
//--- End of critical section ---
return ((void*)local_sum);
}
Kiran Biradar is correct. I get correct results with ulimit -s 80000 (I ran as root to set high ulimit) for this hacked version of your program. Alternatively, allocating the array on the heap or as static should also avoid the stack size problem, as mentioned by another commenter.
#include <pthread.h>
#include <unistd.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
int part = 0;
int local_elements = 0;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void* square_sum(void* arg) {
int* local_sum = (int*)malloc(sizeof(int));
int *element_array = (int *) arg;
//--- Start of critical section ---
pthread_mutex_lock(&mutex);
//Each thread computes its part
int thread_part = part++;
for (int i = thread_part*local_elements; i <
(thread_part+1)*local_elements; i++) {
*local_sum += element_array[i] * element_array[i];
//printf("Thread %d says -- element %d is: %d \n", thread_part, i,
//element_array[i]);
}
pthread_mutex_unlock(&mutex);
//--- End of critical section ---
return local_sum;
}
int main()
{
int threads, total_elements;
int i;
int* loc_sum;
long long total_sum = 0;
printf("Give the number of threads: ");
scanf("%d", &threads);
/*-----Fixed big size array-----*/
total_elements = 2000000; // <--- If this value gets big I get that error
local_elements = total_elements/threads;
int element_array[total_elements];
for (int i=0; i<total_elements; i++) {
//Filling all the positions with 1s
element_array[i] = 1;
}
//Creating the threads
pthread_t newthread[threads];
for (int i=0; i<threads; i++) {
//The thread function gets the element array
pthread_create(&newthread[i], NULL, square_sum, element_array);
}
//Waiting for each thread to finish and creating the total_sum
for (int i=0; i<threads; i++) {
pthread_join(newthread[i], (void**)&loc_sum);
printf("Thread %d returned the local_sum: %d \n", i, *loc_sum);
total_sum += *(int*)loc_sum;
free(loc_sum);
printf("loc_sum %d\n", *loc_sum);
}
printf("\nThe total square sum of the array is: %lld\n", total_sum);
return 0;
}
Your problem is that
int element_array[total_elements];
is too big to be allocated in the in the stack. 2.000.000 positions requires 8Mb of memory, and Linux's default stack size is 8Mb, as can be seen by running ulimit -s. To fix it, you can use malloc to allocate it in the heap, or use the static keyword, as it will allocate the array in the .data segment, which usually has a 4Gb limit in amd64 machines.

Finding the square sum of an array with pthreads in C

What I need to do is calculate the square sum of an array. The array and the number of threads must be given by the user, and we assume the number of elements and the number of threads are integrally divided.
My code doesn't even compile.. I know I have issues with the pointers, and I should place mutexes but I don't know where exactly. Can you point me to the right direction? I will appreciate it a lot.
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
int part = 0;
int local_elements = 0;
void* square_sum(void* arg)
{
int local_sum = 0;
int *element_array[] = (int *)arg;
//Each thread computes its part
int thread_part = part++;
for (int i = thread_part * loc_elements; i < (thread_part + 1)*loc_elements; i++) {
local_sum += element_array[i] * element_array[i];
}
return (void*)&local_sum;
}
main()
{
int threads, total_elements;
int i;
int total_sum = 0;
printf("Give the number of threads: ");
scanf("%d", &threads);
printf("Give the number of the elements you want: ");
scanf("%d", &total_elements;)
int element_array[total_elements];
//How many elements each thread gets
local_elements = total_elements / threads;
printf("Give the %d elements \n", total_elements);
for (i = 0; i < total_elements; i++) {
printf("No. %d:", i);
scanf("%d", &element_array[i]);
}
pthread_t newthread[threads];
//Creating the threads
for (int i = 0; i < threads; i++) {
//The start routine gets the whole element_array
pthread_create(&newthread[i], NULL, square_sum, &element_array);
}
int loc_sum;
//Waiting for each thread to finish and creating the total_sum
for (int i = 0; i < threads; i++) {
pthread_join(newthread[i], &loc_sum);
total_sum += loc_sum;
}
printf("The total sum is: %d", &total_sum);
}

C function and arrays

I'm learning C programming and passing arrays to multiple functions, not sure when and why my array is overwritten by something, help me debug and spot my mistake in code:
#include <stdio.h>
int insertNumbers(int *numbers, int howManny){
int i;
for(i=0; i< howManny; i++){
printf("Insert number:");
scanf("%d", &numbers[i]);
}
printf("\nNumbers :(insertNumbers function)\n");
for (int i = 0; i < howManny; ++i) {
printf("%d: %d\n",i, numbers[i]);
}
return *numbers;
}
int add(int *numbers, int howManny){
int sum = 0;
for (int i = 0; i < howManny; ++i) {
sum = sum + numbers[i];
}
return sum;
}
void printArray(int *numbers, int howManny){
printf("\nNumbers:(print array function)\n");
for (int i = 0; i < howManny; ++i) {
printf("%d: %d\n",i, numbers[i]);
}
}
int main(){
int numbers, howManny, sum = 0, numbersArray;
printf("How manny numbers do you want?");
scanf("%d", &howManny);
numbersArray = insertNumbers(&numbers, howManny);
sum = add(&numbers, howManny);
printf("Total sum is: %d",sum);
printArray( &numbersArray, howManny);
return 0;
}
result is
How manny numbers do you want?3
3
Insert number:10
10
Insert number:20
20
Insert number:30
30
Numbers :(insertNumbers function)
0: 10
1: 20
2: 30
Total sum is: 60
Numbers:(print array function)
0: 10
1: 3
2: 10
looks like my array is overwritten somewhere but not sure when and why
even when I try to use
printArray( &numbers, howManny);
still not working but getting 10, 10, 50 values
Your array has not been created at the compile time. Also, the array size is given at the runtime of your program. Therefore, the array cannot be static so it has to be allocated dynamically and freed at the end, in order to work properly.
That being said, here I wrote a simple solution for what you're trying to achieve.
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
void insertNumbers(int *numbers, int howManny){
int i;
for(i=0; i< howManny; i++){
printf("Insert number:");
scanf("%d", &numbers[i]);
}
printf("\nNumbers :(insertNumbers function)\n");
for (int i = 0; i < howManny; ++i) {
printf("%d: %d\n",i, numbers[i]);
}
}
int add(int *numbers, int howManny){
int sum = 0;
for (int i = 0; i < howManny; ++i) {
sum = sum + numbers[i];
}
return sum;
}
void printArray(const int *numbers, int howManny){
printf("\nNumbers:(print array function)\n");
for (int i = 0; i < howManny; ++i) {
printf("%d: %d\n",i, numbers[i]);
}
}
int main(){
int *numbers, howManny;
printf("How manny numbers do you want?");
scanf("%d", &howManny);
// allocate memory (dynamic allocation)
numbers = (int *)malloc(sizeof(int) * howManny);
// validate memory allocation
assert(numbers != NULL);
// numbers is modified and returned by reference
insertNumbers(numbers, howManny);
printf("Total sum is: %d", add(numbers, howManny));
printArray(numbers, howManny);
// free dynamic allocation
free(numbers);
return 0;
}

Passing Array as argument to a new thread in C

I am attempting to pass an array as an argument to a function in a new thread using pthread_create, is this possible? I have an array of integers and a calculate average method that is called from the create thread method but I cannot seem to pass my array into the method correctly. Here is my code:
int nums[];
int average;
int size = 0;
void *calcAvg(int *nums[]);
int main(int argc, char *argv[]){
/* initialize an array of the integers to be passed */
nums[argc - 1];
for(int i = 0; i < argc - 1; i++){
nums[i] = atoi(argv[i + 1]);
size++;
}
/* Thread Identifier */
pthread_t avgThread;
pthread_create(&avgThread, NULL, calcAvg, nums);
pthread_join(avgThread, NULL);
printf("average= %d", average);
}
void *calcAvg(int *nums[]){
int sum;
for(int i = 0; i < size; i++){
sum += nums[i];
}
average = sum / (size);
pthread_exit(0);
}
there is lots of problem in your code, i fix some to compile
hope it will help
compile: gcc -o main main.c -lpthread
execute: ./main 2 5
output: 3
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
int average;
int size = 0;
void *calcAvg(void *arg);
int main(int argc, char *argv[]){
/* initialize an array of the integers to be passed */
int *nums = (int*)malloc((argc - 1)*sizeof(int));
int i = 1;
for(i = 1; i < argc ; i++){
nums[i-1] = atoi(argv[i]);
size++;
}
/* Thread Identifier */
pthread_t avgThread;
pthread_create(&avgThread, NULL, calcAvg, (void*)nums);
pthread_join(avgThread, NULL);
printf("average = %d \n",average);
free(nums);
}
void *calcAvg(void *arg){
int *val_p = (int *) arg;
int sum = 0;
int i = 0;
for( i = 0; i < size; i++){
sum += val_p[i];
}
average = sum / (size);
pthread_exit(0);
}
Change the following
void *calcAvg(int *nums[]){
int sum;
for(int i = 0; i < size; i++){
sum += nums[i];
}
average = sum / (size);
pthread_exit(0);
}
to
void *calcAvg(void *arg){
int *val_p = (int *) arg;
int sum;
for(int i = 0; i < size; i++){
sum += val_p[i];
}
average = sum / (size);
pthread_exit(0);
}
The main issue that 'pthread_create()' takes a void pointer as its last argument. You are trying to pass to it an array of pointers to integers. Issue "man pthread_create" at the terminal to see the argument types you should be passing.
What you really want to do is just pass the of array integers to the thread. In C, array indexing is just notation for pointer arithmetic. Writing nums[i] is equivalint to &nums[0] + i or just nums+i. The last case works because the name of an array in C can be used as a pointer to the first element of the array.
change void *calcAvg(int *nums[]) to void *calcAvg(void* thread_args). Then in 'calcAvg' write int *nums = (int*)thread_args. Now you can use nums in that function just as if you had called calcAvg(nums), which in in essence you have done.

Resources