program spinning on pthread lock - c

After banging my head against a wall for a few hours during this exercise, I am stuck at that wall.
First off, this is a program designed to find and print all prime numbers between 1 and ceiling, where ceiling is some user input. The design is to implement POSIX threads.
In my program, it runs successfully until on one of the later iterations in the thread's method. When it gets to that later iteration, it steps to the line pthread_mutex_lock(lock); and spins, forcing me to kill it with Ctrl+z. The 2 input's I've been using are 1 for the number of threads and 10 for the ceiling. This flaw is reproducible as it happens every time I've tried it. note: although this code should be able to implement multiple threads, I'd like to get it working correctly with 1 child thread before adding more.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
int* numbermarker = NULL;
int* buffer = NULL;
int* checked = NULL;
int pullposition = 0;
int placeposition = 0;
pthread_mutex_t* lock;
int ceiling;
/*This method places one of the primes in the buffer. It
offers a safe way to manage where the next value will be placed*/
void placevalue(int value){
buffer[placeposition] = value;
placeposition++;
}
void* threadmethod(){
int i;
int k;
int l;
while(1){
printf("pull %d number %d \n",pullposition, buffer[pullposition]);
pthread_mutex_lock(lock);
printf("FLAG\n");
l = buffer[pullposition];
pullposition++;
printf("pullTWO %d number %d \n",pullposition, buffer[pullposition-1]);
pthread_mutex_unlock(lock);
for(k=l+1;k<=ceiling;k++){
if(k%l){
if(k%2){
checked[k]=1;
placevalue(k);
}
}
else{
numbermarker[k-1] = 1;
}
}
int sum=0;
for(i=0; i<ceiling; i++){
if(numbermarker[i]){
checked[i] = numbermarker[i];
}
printf("checked|%d|%d|%d|%d|%d|%d|%d|%d|%d|%d|\n",
checked[0], checked[1], checked[2], checked[3], checked[4], checked[5], checked[6], checked[7], checked[8], checked[9]);
sum += checked[i];
printf("sum %d ceiling %d\n",sum,ceiling);
}
printf("number |%d|%d|%d|%d|%d|%d|%d|%d|%d|%d|\n",
numbermarker[0], numbermarker[1], numbermarker[2], numbermarker[3], numbermarker[4], numbermarker[5], numbermarker[6], numbermarker[7], numbermarker[8], numbermarker[9]);
if(sum == ceiling){
return NULL;
}
}
}
int main()
{
int numthreads;
int i;
printf("Enter number of threads: \n");
scanf("%d", &numthreads);
printf("Enter the highest value to check \n");
scanf("%d", &ceiling);
/* This will hold 1's and 0's.
1 = number has been checked or is
confirmed not to be a prime
0 = number is a possible prime
The idea behind these values is that the next
prime can always be identified by the 0 with
the lowest index
*/
numbermarker = (int*)malloc(sizeof(int)*(ceiling));
checked = (int*)malloc(sizeof(int)*(ceiling));
/*This will hold the primes as they are found*/
buffer = (int*)malloc(sizeof(int)*(ceiling));
/*allocate space for the lock*/
lock = (pthread_mutex_t *) malloc(sizeof(pthread_mutex_t));
pthread_mutex_init(lock,NULL);
for(i=0; i<ceiling; i++){
if(i<1){
numbermarker[i] = 1;
}
else{
numbermarker[i] = 0;
}
checked[i]=0;
buffer[i]=0;
printf("%d \n",numbermarker[i]);
}
checked[0]=1;
placevalue(2);
printf("checked|%d|%d|%d|%d|%d|%d|%d|%d|%d|%d|\n", checked[0], checked[1], checked[2], checked[3], checked[4], checked[5], checked[6], checked[7], checked[8], checked[9]);
pthread_t **tid = (pthread_t **) malloc(sizeof(pthread_t *) * numthreads);
for(i=0;i<numthreads;i++){
tid[i] = (pthread_t *) malloc(sizeof(pthread_t));
}
for(i=0;i<numthreads;i++){
if(pthread_create(tid[i],
NULL,
threadmethod,
NULL)){
printf("Could not create thread \n");
exit(-1);
}
}
for(i=0;i<numthreads;i++){
if(pthread_join(*tid[i], NULL)){
printf("Error Joining with thread \n");
exit(-1);
}
free(tid[i]);
}
free(tid);
for(i=0;i<ceiling;i++){
if(numbermarker[i] == 0){
printf("%d sdfsddd \n", numbermarker[i]);
printf("%d \n", i+1);
}
}
free(buffer);
free(numbermarker);
buffer=NULL;
numbermarker=NULL;
return(0);
}

I've tried your code and in
void placevalue(int value)
{
buffer[placeposition] = value;
placeposition++;
}
placeposition goes beyond the size of buffer. This results in undefined behaviour, a very plausible outcome of which is the trashing of the mutex (which is malloc()ed right after buffer).
On top of that, there's a race condition is placevalue(). However, if you're using a single worker thread, you are not (yet) running into it.

Related

Use mutex cause deadlock in c

Im trying to make a mini threads game, should be a manager thread that create amount of thread as the user want, and for every round (the user choose the amount of rounds) the threads need to random number, and after all the threads finished to random number the managerGame thread should print the thread that random the greatest number and add point to this thread.
At the end of the game the ManagerGame thread should print who is the winner.
Now i wrote the next code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
struct Player{
int isFinished;
int id;
int number;
};
struct GameManager{
struct Player *Players;
int *WinnersTable;
int Rounds;
};
struct GameDetails{
int PlayersCount;
int RoundsCount;
};
struct GameManager game_manager;
pthread_mutex_t lock;
pthread_cond_t cond;
int IsGameActive;
void* ManagerGameFunc(void* gameDetails);
void* PlayerFunc(void* playerDetails);
void SetWinnerToRound(int roundNumber, int playersCount);
void StartNewRound(int playersCount);
void printWinnerToGame(int playersCount);
int IsRoundEnded(int playersCount);
int main(){
srand(time(NULL));
pthread_t ManagerThread;
pthread_mutex_init(&lock, NULL);
pthread_cond_init(&cond, NULL);
struct GameDetails gameDetails;
printf("----GAME----\n");
printf("Enter amount of players\n");
fflush(stdin);
scanf("%d\n", &gameDetails.PlayersCount);
printf("Enter amount of rounds\n");
fflush(stdin);
scanf("%d\n", &gameDetails.RoundsCount);
printf("----GAME STARTED----\n");
pthread_create(&ManagerThread, NULL, ManagerGameFunc, &gameDetails);
pthread_join(ManagerThread, NULL);
printf("Game finished\n");
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&cond);
}
void* ManagerGameFunc(void* gameDetails){
struct GameDetails game_details = *((struct GameDetails *)gameDetails);
pthread_t *threads = (pthread_t *)malloc(game_details.PlayersCount * sizeof(pthread_t));
game_manager.Players = (struct Player *)malloc(game_details.PlayersCount * sizeof(struct Player));
game_manager.WinnersTable = (int *)malloc(game_details.PlayersCount * sizeof(int));
game_manager.Rounds = game_details.RoundsCount;
IsGameActive = 1;
for(int i = 0; i < game_details.PlayersCount; i++){
game_manager.Players[i].isFinished = 0;
game_manager.Players[i].id = i;
pthread_create(&threads[i], NULL, PlayerFunc, &game_manager.Players[i]);
}
for(int i = 0; i < game_manager.Rounds; i++){
pthread_mutex_lock(&lock);
while(!IsRoundEnded(game_details.PlayersCount)){
pthread_cond_wait(&cond, &lock);
}
SetWinnerToRound(i + 1, game_details.PlayersCount);
StartNewRound(game_details.PlayersCount);
pthread_cond_signal(&cond);
pthread_mutex_unlock(&lock);
}
IsGameActive = 0;
printf("\nGame end ! The winner of the game is ...\n");
printWinnerToGame(game_details.PlayersCount);
}
void* PlayerFunc(void* playerDetails){
struct Player player = *((struct Player *)playerDetails);
while(IsGameActive){
pthread_mutex_lock(&lock);
while(player.isFinished){
pthread_cond_wait(&cond, &lock);
}
player.number = (rand() % (100 - 1 + 1)) + 1;
printf("%d", player.number);
player.isFinished = 1;
pthread_cond_signal(&cond);
pthread_mutex_unlock(&lock);
}
}
void SetWinnerToRound(int roundNumber, int playersCount){
int max = game_manager.Players[0].number;
int id = 0;
for(int i = 1; i < playersCount; i++){
if(game_manager.Players[i].number > max){
max = game_manager.Players[i].number;
id = i;
}
}
printf("--Round %d: winner is player number %d--\n", roundNumber, id + 1);
game_manager.WinnersTable[id]++;
}
void StartNewRound(int playersCount){
for(int i = 1; i < playersCount; i++){
game_manager.Players[i].isFinished = 0;
}
}
void printWinnerToGame(int playersCount){
int max = game_manager.Players[0].number;
int id = 0;
for(int i = 1; i < playersCount; i++){
if(game_manager.Players[i].number > max){
max = game_manager.Players[i].number;
id = i;
}
}
printf("Player number %d is the winner of the game !!\n", id + 1);
}
int IsRoundEnded(int playersCount){
for(int i = 0; i < playersCount; i++)
if(!game_manager.Players[i].isFinished)
return 0;
return 1;
}
But the problem is that when i run it the output is
the image is in the link
and the terminal is stuck like it show in the image.
I think that maybe the threads is in deadlock mode, isnt it?
Thank you for your help!
UPDATE
So I heard to #n.m. and to #pilcrow, and in PlayerFunc i changed the Player object to pointer to the argument cause before the change the function made a copy of the argument and every time i tried to change something in the object it didnt change in the PlayerThread. In addition i changed the pthread_cond_signal to pthread_cond_broadcast and now it work. Thank you all!
void* PlayerFunc(void* playerDetails){
struct Player player = *((struct Player *)playerDetails);
....
PlayerFunc creates a copy of its argument representing that thread/player's state. ManagerGameFunc will never see modifications to that copy, in particular whether it isFinished or not.
All your threads follow the same pattern:
pthread_mutex_lock(&lock);
while(!predicate(....)){
pthread_cond_wait(&cond, &lock);
}
....
pthread_cond_signal(&cond);
pthread_mutex_unlock(&lock);
First time through the loop all predicates are false. All threads therefore peacefully sleep in pthread_cond_wait. It is never signalled, because neither thread has a chance to progress and signal it. So they wait forever.

Incompatible pointer type with pthread_create (C)

I'm working on a personal project that uses multi threading to split an array, search for a target, and return the number of matches. I've got one bug and a few errors with my early code.
Errors...
main.c:117:10: warning: passing argument 1 of 'pthread_create' from incompatible pointer type [enabled by default]
In file included from main.c:5:0: /usr/include/pthread.h:225:12: note: expected 'pthread_t * restrict' but argument is of type 'pthread_t **'
I'm very new to POSIX and have no clue what's wrong here.
Bug...
My big loop that is supposed to loop as long as index < totalElems only loops once. It goes into the small loop and drops out of both instead of looping more. I'm not sure why this is.
Header file...
#ifndef COUNT_ARRAY_H
#define COUNT_ARRAY_H
// structure declarations
typedef struct
{
int threadNum;
int *array;
int first;
int last;
int target;
int numFound;
} ThreadInfo;
// function prototypes
void* ThreadFunc(void *vptr);
#endif // COUNT_ARRAY_H
.
.
Main.c file....
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include "count_array.h"
int main(void)
{
auto int numSegs;
auto int numSegElems;
auto int maxRand;
auto int target;
auto int totalElems;
auto int totalFound = 0;
auto ThreadInfo *infoPtr;
auto pthread_t *threadHandles;
auto int index = 0;
auto int first;
auto int last;
auto int threadNum = 0;
//get primary info from user...
printf(" Please enter the total number of elements? ");
scanf("%d", &totalElems);
printf(" Please enter the maximum random value: ");
scanf("%d", &maxRand);
printf(" Please enter the number of segments (1 to 15857): ");
scanf("%d", &numSegs);
if(numSegs > 15857)
{
puts(" Too many segments for machine!");
exit(EXIT_FAILURE);
}
numSegElems = totalElems/numSegs;
// configure the array to work with
// declare array here...
auto int myArray[totalElems];
//and fill array here
for(; index < totalElems; index++)
{
// % rand() and maxRand to get good range and
//not go beyond users max number
myArray[index] = (rand() % maxRand);
//test printf...ignore if still here at 5/18/17 or later
printf(" %d \n", myArray[index]);
}
// get the target value to look for
printf(" Please enter the target value: ");
scanf("%d",&target);
// display initial information
printf("*** Begin search: target = %d, # elements = %d, # segments = %d, "
"# segment elements = %d\n"
, target
, totalElems
, numSegs
, numSegElems);
// initialize the array first/last indexes into the integer array
// >>>50 elems total/5 = 10 threads total and 5 elems in each thread<<<
for(index = 0; index < totalElems; index++)
{
first = myArray[0];
if(index == numSegElems)
{
puts(" in if ");
last = myArray[index];
printf(" %d \n", index);
// allocate an array to store the thread handles
auto int arraySeg[numSegElems];
// loop and create threads (# of segments)
// allocate a thread info structure from the heap
//using malloc
infoPtr = malloc(sizeof(ThreadInfo));
if(NULL == infoPtr)
{
fprintf(stderr, "Unable to allocate ThreadInfo struct for "
"thread #%d\n", threadNum);
continue;
}
// store the information in the allocated structure
infoPtr->target = target;
infoPtr->threadNum = threadNum;
infoPtr->first = first;
infoPtr->last = last;
infoPtr->array = arraySeg;
// create the secondary thread, passing the thread info
if(pthread_create(&threadHandles, NULL, ThreadFunc, &infoPtr))
{
fprintf(stderr, "Error: failed to create thread #%d\n",
threadNum);
continue;
}
// update the first/last array indexes for the next thread
//set to zero again??
}//end small loop to make individual threads
//increment thread #
++threadNum;
}//end big loop
// loop and join the threads to fetch each thread's results
// join with the next thread
// get the total number of matches from the thread's infoPtr
// and display a message
// release the infoPtr structure back to the heap
// display the final results
// release heap memory
return 0;
} // end of "main"
I've done a smaller project like this before, but I'm not seeing what's wrong here. I need to pass infoPtr and use the members in ThreadFunc. I'm doing this like my other programs exactly, but it's not working. I've tried searching the site and google, but maybe I can't figure it out because it's too specific? Also, removing the & doesn't help. Any help would be appreciated!
I'm not seeing what's wrong here.
There are many things wrong here.
Let's consider a very reduced test case:
pthread_t *threadHandles;
ThreadInfo *info;
for(index = 0; index < totalElems; index++) {
info = malloc(sizeof(ThreadInfo));
pthread_create(&threadHandles, NULL, ThreadFunc, &info);
}
return 0;
Problems:
You create N threads, then return without waiting for any of them. As soon as main returns, all threads evaporate with it (the entire program terminates).
On each iteration of the loop, pthread_create assigns a new value to the same threadHandles variable. You lose the previous value, and thus the ability to wait for that thread (this is also producing the compiler warning that is the subject of your question).
You pass the same info value into each of the threads, thus leaking memory you allocated for them, and creating a data race between them.
Here is a fix for above problems (there may be other problems that I didn't spot immediately):
pthread_t *threadHandles;
ThreadInfo *info;
threadHandles = calloc(totalElems, sizeof(pthread_t));
info = calloc(totalElems, sizeof(ThreadInfo));
for(index = 0; index < totalElems; index++) {
info[index].threadNum = threadNum;
// Initialize other elements of info here.
pthread_create(&threadHandles[index], NULL, ThreadFunc, &info[index]);
}
// Wait for threads to finish
for(index = 0; index < totalElems; index++) {
pthread_join(threadHanles[index], NULL);
}
// Free memory
free(threadHandles);
free(info);
return 0;

Compute the summation of a given interval using multiple threads

For my homework, I need to compute the squares of integers in the interval (0,N) (e.g. (0,50) in a way that the load is distributed equally among threads (e.g. 5 threads). I have been advised to use small chunks from the interval and assign it to the thread. For that, I am using a queue. Here's my code:
#include <stdio.h>
#include <pthread.h>
#define QUEUE_SIZE 50
typedef struct {
int q[QUEUE_SIZE];
int first,last;
int count;
} queue;
void init_queue(queue *q)
{
q->first = 0;
q->last = QUEUE_SIZE - 1;
q->count = 0;
}
void enqueue(queue *q,int x)
{
q->last = (q->last + 1) % QUEUE_SIZE;
q->q[ q->last ] = x;
q->count = q->count + 1;
}
int dequeue(queue *q)
{
int x = q->q[ q->first ];
q->first = (q->first + 1) % QUEUE_SIZE;
q->count = q->count - 1;
return x;
}
queue q; //declare the queue data structure
void* threadFunc(void* data)
{
int my_data = (int)data; /* data received by thread */
int sum=0, tmp;
while (q.count)
{
tmp = dequeue(&q);
sum = sum + tmp*tmp;
usleep(1);
}
printf("SUM = %d\n", sum);
printf("Hello from new thread %u - I was created in iteration %d\n",pthread_self(), my_data);
pthread_exit(NULL); /* terminate the thread */
}
int main(int argc, char* argv[])
{
init_queue(&q);
int i;
for (i=0; i<50; i++)
{
enqueue(&q, i);
}
pthread_t *tid = malloc(5 * sizeof(pthread_t) );
int rc; //return value
for(i=0; i<5; i++)
{
rc = pthread_create(&tid[i], NULL, threadFunc, (void*)i);
if(rc) /* could not create thread */
{
printf("\n ERROR: return code from pthread_create is %u \n", rc);
return(-1);
}
}
for(i=0; i<5; i++)
{
pthread_join(tid[i], NULL);
}
}
The output is not always correct. Most of the time it is correct, 40425, but sometimes, the value is bigger. Is it because of the threads are running in parallel and accessing the queue at the same time (the processor on my laptop is is intel i7)? I would appreciate the feedback on my concerns.
I think contrary to what some of the other people here suggested, you don't need any synchronization primitives like semaphores or mutexes at all. Something like this:
Given some array like
int values[50];
I'd create a couple of threads (say: 5), each of which getting a pointer to a struct with the offset into the values array and a number of squares to compute, like
typedef struct ThreadArgs {
int *values;
size_t numSquares;
} ThreadArgs;
You can then start your threads, each of which being told to process 10 numbers:
for ( i = 0; i < 5; ++i ) {
ThreadArgs *args = malloc( sizeof( ThreadArgs ) );
args->values = values + 10 * i;
args->numSquares = 10;
pthread_create( ...., threadFunc, args );
}
Each thread then simply computes the squares it was assigned, like:
void *threadFunc( void *data )
{
ThreadArgs *args = data;
int i;
for ( i = 0; i < args->numSquares; ++i ) {
args->values[i] = args->values[i] * args->values[i];
}
free( args );
}
At the end, you'd just use a pthread_join to wait for all threads to finish, after which you have your squares in the values array.
All your threads read from the same queue. This leads to a race condition. For instance, if the number 10 was read simultaneously by two threads, your result will be offset by 100. You should protect your queue with a mutex. Put the following print in deque function to know which numbers are repeated:
printf("Dequeing %d in thread %d\n", x, pthread_self());
Your code doesn't show where the results are accumulated to a single variable. You should protect that variable with a mutex as well.
Alternatively, you can pass the start number as the input parameter to each thread from the loop so that each thread can work on its set of numbers. First thread will work on 1-10, the second one on 11-20 and so on. In this approach, you have to use mutex only the part where the threads update the global sum variable at the end of their execution.
First you need to define what it means to be "distributed equally among threads." If you mean that each thread does the same amount of work as the other threads, then I would create a single queue, put all the numbers in the queue, and start all threads (which are the same code.) Each thread tries to get a value from the queue which must be protected by a mutex unless it is thread safe, calculates the partial answer from the value taken from the thread, and adds the result to the total which must also be protected by a mutex. If you mean that each thread will execute an equal amount of times as each of the other threads, then you need to make a priority queue and put all the numbers in the queue along with the thread number that should compute on it. Each thread then tries to get a value from the queue that matches its thread number. From the thread point of view, it should try to get a value from the queue, do the work, then try to get another value. If there are no more values to get, then the thread should exit. The main program does a join on all threads and the program exits when all threads have exited.

Producer and consumer with semaphores, multiple pthreads, 2 buffers

OK. So I have 2 buffers, implemented as heap. It has an init, insert, and delete function. The priority in this case does not matter though.
For each buffer, there are 3 producer threads producing an item (letter) and inserting it into a buffer. Three producers for a buffer holding uppercase letters, and three for a buffer holding lowercase letters. Obviously only 1 producer may append at a time to a buffer.
There are 10 consumer threads. These threads can access either buffer, but only up to 3 at a time per buffer, and not when a producer is producing. I am trying to instruct the threads to go first to the buffer with more items in it, then the other.
My problem is with deadlocks I believe. I am getting segmentation faults, so I assume that my semaphores are not working correctly (I have not implemented them correctly).
I first worked on this problem using a single buffer, with multiple consumers/producers and was able to get it to work. I have basically segmented the 1 buffer solution into multiple parts.
I'm using 3 semaphores for each buffer. A mutex, so only 1 item can take or append something per buffer. These are named mutex and mutex2. emptycount and fillcount are the counting sems used that start at the max_buffer_size and 0 respectively.
The consumers must take a letter from each buffer before consuming. The order in which they do this is not important, so I'm trying to tell each one to take something from the buffer with more in it first, then the other buffer. I've put a semaphore starting at 3 outside of the consumer code for each buffer. These are sems upper and lower, and are used to make sure only 3 consumers can enter any one buffer at a time.
Variables countUpper and countLower are supposed to count how many items are in each buffer at the time.
I'm sure my sems are placed incorrectly but i cannot find out where, I'm very new to threading. The queues are abstracted and in another file, but they work correctly. Also, I'm only producing lowercase letters atm, but that shouldn't matter.
Any help is appreciated.
#include "pq.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <semaphore.h>
#include <pthread.h>
#define MAX_CPRODUCER 3
#define MAX_LPRODUCER 3
#define MAX_CONSUMER 10
#define MAX_INT 2147483647
// Is the structure for the items produced by the producer thread
typedef struct product product_t;
struct product {
char item;
int PrdPID;
unsigned int sequence_num;
};
typedef struct pq_struct pq_struct_t;
struct pq_struct
{
pq_t lower;
pq_t upper;
};
int countUpper, countLower;
sem_t mutex, fillCount, emptyCount;
sem_t mutex2, fillCount2, emptyCount2;
sem_t lower, upper;
//Produce a letter to put in
product_t *produce(int pid, unsigned int count)
{
product_t *aProduct = malloc( sizeof(*aProduct) );
aProduct->item = (char)(rand() % 26 + 97); //To get ascii lowercase
aProduct->PrdPID = pid;
aProduct->sequence_num = count;
return aProduct;
}
//Append the letter to the buffer
void append(void *pq, product_t *aProduct)
{
//Insert letter into queue
//Key is irrelevant and always 1 for this program
pq_insert((pq_t*)pq, 1, (void*)aProduct);
//printf("Appended: ID = %d Count = %d Letter = %c\n", aProduct->PrdPID, aProduct->sequence_num, aProduct->item);
}
//Take a letter from buffer
product_t *take(void *pq)
{
//Delete from buffer and place in our consumer product
product_t *aProduct = (product_t*)pq_delete((pq_t*)pq);
return aProduct;
}
//Consume the letter
void consume(product_t *aProduct, product_t *theProduct, int id)
{
printf("Removing: ID = %d Count = %d Letter = %c\n", aProduct->PrdPID, aProduct->sequence_num, aProduct->item);
free(aProduct); //Takes place of consume
free(theProduct);
}
//Producer function for uppercase buffer
void *ProducerCapital (void *pq)
{
pq_t *upperPq = pq;
product_t *cProduct = NULL;
unsigned int count = 0;
for (;;)
{
cProduct = produce(1, ++count);
sem_wait(&emptyCount);
sem_wait(&mutex);
//Make new item
append(upperPq, cProduct);
++countUpper;
//
sem_post(&mutex);
sem_post(&fillCount);
}
pthread_exit (NULL);
}
//Producer thread for lowercase buffer
void *ProducerLower (void *pq)
{
pq_t *lowerPq = pq;
product_t *lProduct = NULL;
unsigned int count = 0;
for (;;)
{
lProduct = produce(1, ++count);
sem_wait(&emptyCount2);
sem_wait(&mutex2);
//Make new item
append(lowerPq, lProduct);
++countLower;
//
sem_post(&mutex2);
sem_post(&fillCount2);
}
pthread_exit (NULL);
}
void ConsumeUpper(pq_t *pqUpper, product_t *capProd)
{
sem_wait(&fillCount);
sem_wait(&mutex);
//Take Item
capProd = (product_t*)take((void*)pqUpper);
//
sem_post(&mutex);
sem_post(&emptyCount);
}
void ConsumeLower(pq_t *pqLower, product_t *lowProd)
{
sem_wait(&fillCount2);
sem_wait(&mutex2);
//Take Item
lowProd = (product_t*)take((void*)pqLower);
//
sem_post(&mutex2);
sem_post(&emptyCount2);
}
void *Consumer (void *pq)
{
pq_struct_t *sharePqs = (pq_struct_t*)pq;
product_t *capitalProduct = NULL;
product_t *lowerProduct = NULL;
for (;;)
{
if(countUpper < countLower)
{
sem_wait(&lower);
--countLower;
ConsumeLower(&sharePqs->lower, capitalProduct);
sem_post(&lower);
sem_wait(&upper);
--countUpper;
ConsumeUpper(&sharePqs->upper, capitalProduct);
sem_post(&upper);
}
else
{
sem_wait(&upper);
--countUpper;
ConsumeUpper(&sharePqs->upper, capitalProduct);
sem_post(&upper);
sem_wait(&lower);
--countLower;
ConsumeLower(&sharePqs->lower, capitalProduct);
sem_post(&lower);
}
consume(capitalProduct, lowerProduct , 2);
}
pthread_exit (NULL);
}
int main ()
{
//Create queue
pq_struct_t my_pqs;
//Initialize the queue
pq_init(&my_pqs.upper);
pq_init(&my_pqs.lower);
pthread_t cProducers[MAX_CPRODUCER];
pthread_t lProducers[MAX_LPRODUCER];
pthread_t consumers[MAX_CONSUMER];
srand(time(NULL)); // randomize random function call
int i, code;
countUpper = 0;
countLower = 0;
sem_init(&mutex, 0, 1);
sem_init(&fillCount, 0, 0);
sem_init(&emptyCount, 0, MAX_INT);
sem_init(&mutex2, 0, 1);
sem_init(&fillCount2, 0, 0);
sem_init(&emptyCount2, 0, MAX_INT);
sem_init(&lower, 0, 3);
sem_init(&upper, 0, 3);
for (i=0; i<MAX_CONSUMER; ++i)
{
printf ("In main: creating consumer thread %d\n", i);
code = pthread_create(&consumers[i], NULL, Consumer, (void *)&my_pqs);
if (code)
{
printf ("Error: pthread_create: %d\n", code);
exit (-1);
}
}
for (i=0; i<MAX_CPRODUCER; ++i)
{
printf ("In main: creating producer thread %d\n", i);
code = pthread_create(&cProducers[i], NULL, ProducerCapital, (void *)&my_pqs.upper);
if (code)
{
printf ("Error: pthread_create: %d\n", code);
exit (-1);
}
}
for (i=0; i<MAX_LPRODUCER; ++i)
{
printf ("In main: creating producer thread %d\n", i);
code = pthread_create(&lProducers[i], NULL, ProducerLower, (void *)&my_pqs.lower);
if (code)
{
printf ("Error: pthread_create: %d\n", code);
exit (-1);
}
}
pthread_exit (NULL);
}

C: pthread performance woes. How can I make this code perform as expected?

I have created this little program to calculate pi using probability and ratios. In order to make it run faster I decided to give multithreading with pthreads a shot. Unfortunately, even after doing much searching around I was unable to solve the problem I have in that when I run the threadFunc function, with one thread, whether that be with a pthread, or just normally called from the calculate_pi_mt function, the performance is much better (at least twice or if not 3 times better) than when I try running it with two threads on my dual core machine. I have tried disabling optimizations to no avail. As far as I can see, when the thread is running it is using local variables apart from at the end when I have used a mutex lock to create the sum of hits...
Firstly are there any tips for creating code that will run better here? (ie style) because I'm just learning by trying this stuff.
And secondly would there be any reason for these obvious performance problems?
When running with number of threads set to 1, one of my cpus maxes out at 100%. When set to two, the second cpu rises to roughly 80%-90%, but all this extra work it is apparently doing is to no avail! Could it be the use of the rand() function?
struct arguments {
int n_threads;
int rays;
int hits_in;
pthread_mutex_t *mutex;
};
void *threadFunc(void *arg)
{
struct arguments* args=(struct arguments*)arg;
int n = 0;
int local_hits_in = 0;
double x;
double y;
double r;
while (n < args->rays)
{
n++;
x = ((double)rand())/((double)RAND_MAX);
y = ((double)rand())/((double)RAND_MAX);
r = (double)sqrt(pow(x, 2) + pow(y, 2));
if (r < 1.0){
local_hits_in++;
}
}
pthread_mutex_lock(args->mutex);
args->hits_in += local_hits_in;
pthread_mutex_unlock(args->mutex);
return NULL;
}
double calculate_pi_mt(int rays, int threads){
double answer;
int c;
unsigned int iseed = (unsigned int)time(NULL);
srand(iseed);
if ( (float)(rays/threads) != ((float)rays)/((float)threads) ){
printf("Error: number of rays is not evenly divisible by threads\n");
}
/* argument initialization */
struct arguments* args = malloc(sizeof(struct arguments));
args->hits_in = 0;
args->rays = rays/threads;
args->n_threads = 0;
args->mutex = malloc(sizeof(pthread_mutex_t));
if (pthread_mutex_init(args->mutex, NULL)){
printf("Error creating mutex!\n");
}
pthread_t thread_ary[MAXTHREADS];
c=0;
while (c < threads){
args->n_threads += 1;
if (pthread_create(&(thread_ary[c]),NULL,threadFunc, args)){
printf("Error when creating thread\n");
}
printf("Created Thread: %d\n", args->n_threads);
c+=1;
}
c=0;
while (c < threads){
printf("main waiting for thread %d to terminate...\n", c+1);
if (pthread_join(thread_ary[c],NULL)){
printf("Error while waiting for thread to join\n");
}
printf("Destroyed Thread: %d\n", c+1);
c+=1;
}
printf("Hits in %d\n", args->hits_in);
printf("Rays: %d\n", rays);
answer = 4.0 * (double)(args->hits_in)/(double)(rays);
//freeing everything!
pthread_mutex_destroy(args->mutex);
free(args->mutex);
free(args);
return answer;
}
There's a couple of problems I can see:
rand() is not thread-safe. Use drand48_r() (which generates a double in the range [0.0, 1.0) natively, which is what you want)
You only create one struct arguments structure, then try to use that for multiple threads. You need to create a seperate one for each thread (just use an array).
Here's how I'd clean up your approach. Note how we don't need to use any mutexes - each thread just stashes its own return value in a seperate location, and the main thread adds them up after the other threads have finished:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <pthread.h>
struct thread_info {
int thread_n;
pthread_t thread_id;
int rays;
int hits_in;
};
void seed_rand(int thread_n, struct drand48_data *buffer)
{
struct timeval tv;
gettimeofday(&tv, NULL);
srand48_r(tv.tv_sec * thread_n + tv.tv_usec, buffer);
}
void *threadFunc(void *arg)
{
struct thread_info *thread_info = arg;
struct drand48_data drand_buffer;
int n = 0;
const int rays = thread_info->rays;
int hits_in = 0;
double x;
double y;
double r;
seed_rand(thread_info->thread_n, &drand_buffer);
for (n = 0; n < rays; n++)
{
drand48_r(&drand_buffer, &x);
drand48_r(&drand_buffer, &y);
r = x * x + y * y;
if (r < 1.0){
hits_in++;
}
}
thread_info->hits_in = hits_in;
return NULL;
}
double calculate_pi_mt(int rays, int threads)
{
int c;
int hits_in = 0;
if (rays % threads) {
printf("Error: number of rays is not evenly divisible by threads\n");
rays = (rays / threads) * threads;
}
/* argument initialization */
struct thread_info *thr = malloc(threads * sizeof thr[0]);
for (c = 0; c < threads; c++) {
thr[c].thread_n = c;
thr[c].rays = rays / threads;
thr[c].hits_in = 0;
if (pthread_create(&thr[c].thread_id, NULL, threadFunc, &thr[c])) {
printf("Error when creating thread\n");
}
printf("Created Thread: %d\n", thr[c].thread_n);
}
for (c = 0; c < threads; c++) {
printf("main waiting for thread %d to terminate...\n", c);
if (pthread_join(thr[c].thread_id, NULL)) {
printf("Error while waiting for thread to join\n");
}
hits_in += thr[c].hits_in;
printf("Destroyed Thread: %d\n", c+1);
}
printf("Hits in %d\n", hits_in);
printf("Rays: %d\n", rays);
double answer = (4.0 * hits_in) / rays;
free(thr);
return answer;
}
You're using far too many synchronization primitives. You should sum the local_hits at the end in the main thread, and not use a mutex to update it in an asynchronous fashion. Or, at least, you could use an atomic operation (it's just an int) to do it instead of lock an entire mutex to update one int.
Threading has a cost. It may be that, as your useful computing code looks very simple, the cost of thread management (cost paid when changing thread and synchronisation cost) is much higher than the benefit.

Resources