How to synchronize manager/worker pthreads without a join?

How to synchronize manager/worker pthreads without a join? - c

I'm familiar with multithreading and I've developed many multithreaded programs in Java and Objective-C successfully. But I couldn't achieve the following in C using pthreads without using a join from the main thread:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define NUM_OF_THREADS 2
struct thread_data {
int start;
int end;
int *arr;
};
void print(int *ints, int n);
void *processArray(void *args);
int main(int argc, const char * argv[])
{
int numOfInts = 10;
int *ints = malloc(numOfInts * sizeof(int));
for (int i = 0; i < numOfInts; i++) {
ints[i] = i;
}
print(ints, numOfInts); // prints [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
pthread_t threads[NUM_OF_THREADS];
struct thread_data thread_data[NUM_OF_THREADS];
// these vars are used to calculate the index ranges for each thread
int remainingWork = numOfInts, amountOfWork;
int startRange, endRange = -1;
for (int i = 0; i < NUM_OF_THREADS; i++) {
amountOfWork = remainingWork / (NUM_OF_THREADS - i);
startRange = endRange + 1;
endRange = startRange + amountOfWork - 1;
thread_data[i].arr = ints;
thread_data[i].start = startRange;
thread_data[i].end = endRange;
pthread_create(&threads[i], NULL, processArray, (void *)&thread_data[i]);
remainingWork -= amountOfWork;
}
// 1. Signal to the threads to start working
// 2. Wait for them to finish
print(ints, numOfInts); // should print [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
free(ints);
return 0;
}
void *processArray(void *args)
{
struct thread_data *data = (struct thread_data *)args;
int *arr = data->arr;
int start = data->start;
int end = data->end;
// 1. Wait for a signal to start from the main thread
for (int i = start; i <= end; i++) {
arr[i] = arr[i] + 1;
}
// 2. Signal to the main thread that you're done
pthread_exit(NULL);
}
void print(int *ints, int n)
{
printf("[");
for (int i = 0; i < n; i++) {
printf("%d", ints[i]);
if (i+1 != n)
printf(", ");
}
printf("]\n");
}
I would like to achieve the following in the above code:
In main():
Signal to the threads to start working.
Wait for the background threads to finish.
In processArray():
Wait for a signal to start from the main thread
Signal to the main thread that you're done
I don't want to use a join in the main thread because in the real application, the main thread will create the threads once, and then it will signal to the background threads to work many times, and I can't let the main thread proceed unless all the background threads have finished working. In the processArray function, I will put an infinite loop as following:
void *processArray(void *args)
{
struct thread_data *data = (struct thread_data *)args;
while (1)
{
// 1. Wait for a signal to start from the main thread
int *arr = data->arr;
int start = data->start;
int end = data->end;
// Process
for (int i = start; i <= end; i++) {
arr[i] = arr[i] + 1;
}
// 2. Signal to the main thread that you're done
}
pthread_exit(NULL);
}
Note that I'm new to C and the posix API, so excuse me if I'm missing something obvious. But I really tried many things, starting from using a mutex, and an array of semaphores, and a mixture of both, but without success. I think a condition variable may help, but I couldn't understand how it could be used.
Thanks for your time.
Problem Solved:
Thank you guys so much! I was finally able to get this to work safely and without using a join by following your tips. Although the solution is somewhat ugly, it gets the job done and the performance gains is worth it (as you'll see below). For anyone interested, this is a simulation of the real application I'm working on, in which the main thread keeps giving work continuously to the background threads:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#define NUM_OF_THREADS 5
struct thread_data {
int id;
int start;
int end;
int *arr;
};
pthread_mutex_t currentlyIdleMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t currentlyIdleCond = PTHREAD_COND_INITIALIZER;
int currentlyIdle;
pthread_mutex_t workReadyMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t workReadyCond = PTHREAD_COND_INITIALIZER;
int workReady;
pthread_cond_t currentlyWorkingCond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t currentlyWorkingMutex= PTHREAD_MUTEX_INITIALIZER;
int currentlyWorking;
pthread_mutex_t canFinishMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t canFinishCond = PTHREAD_COND_INITIALIZER;
int canFinish;
void print(int *ints, int n);
void *processArray(void *args);
int validateResult(int *ints, int num, int start);
int main(int argc, const char * argv[])
{
int numOfInts = 10;
int *ints = malloc(numOfInts * sizeof(int));
for (int i = 0; i < numOfInts; i++) {
ints[i] = i;
}
// print(ints, numOfInts);
pthread_t threads[NUM_OF_THREADS];
struct thread_data thread_data[NUM_OF_THREADS];
workReady = 0;
canFinish = 0;
currentlyIdle = 0;
currentlyWorking = 0;
// these vars are used to calculate the index ranges for each thread
int remainingWork = numOfInts, amountOfWork;
int startRange, endRange = -1;
// Create the threads and give each one its data struct.
for (int i = 0; i < NUM_OF_THREADS; i++) {
amountOfWork = remainingWork / (NUM_OF_THREADS - i);
startRange = endRange + 1;
endRange = startRange + amountOfWork - 1;
thread_data[i].id = i;
thread_data[i].arr = ints;
thread_data[i].start = startRange;
thread_data[i].end = endRange;
pthread_create(&threads[i], NULL, processArray, (void *)&thread_data[i]);
remainingWork -= amountOfWork;
}
int loops = 1111111;
int expectedStartingValue = ints[0] + loops; // used to validate the results
// The elements in ints[] should be incremented by 1 in each loop
while (loops-- != 0) {
// Make sure all of them are ready
pthread_mutex_lock(&currentlyIdleMutex);
while (currentlyIdle != NUM_OF_THREADS) {
pthread_cond_wait(&currentlyIdleCond, &currentlyIdleMutex);
}
pthread_mutex_unlock(&currentlyIdleMutex);
// All threads are now blocked; it's safe to not lock the mutex.
// Prevent them from finishing before authorized.
canFinish = 0;
// Reset the number of currentlyWorking threads
currentlyWorking = NUM_OF_THREADS;
// Signal to the threads to start
pthread_mutex_lock(&workReadyMutex);
workReady = 1;
pthread_cond_broadcast(&workReadyCond );
pthread_mutex_unlock(&workReadyMutex);
// Wait for them to finish
pthread_mutex_lock(&currentlyWorkingMutex);
while (currentlyWorking != 0) {
pthread_cond_wait(&currentlyWorkingCond, &currentlyWorkingMutex);
}
pthread_mutex_unlock(&currentlyWorkingMutex);
// The threads are now waiting for permission to finish
// Prevent them from starting again
workReady = 0;
currentlyIdle = 0;
// Allow them to finish
pthread_mutex_lock(&canFinishMutex);
canFinish = 1;
pthread_cond_broadcast(&canFinishCond);
pthread_mutex_unlock(&canFinishMutex);
}
// print(ints, numOfInts);
if (validateResult(ints, numOfInts, expectedStartingValue)) {
printf("Result correct.\n");
}
else {
printf("Result invalid.\n");
}
// clean up
for (int i = 0; i < NUM_OF_THREADS; i++) {
pthread_cancel(threads[i]);
}
free(ints);
return 0;
}
void *processArray(void *args)
{
struct thread_data *data = (struct thread_data *)args;
int *arr = data->arr;
int start = data->start;
int end = data->end;
while (1) {
// Set yourself as idle and signal to the main thread, when all threads are idle main will start
pthread_mutex_lock(&currentlyIdleMutex);
currentlyIdle++;
pthread_cond_signal(&currentlyIdleCond);
pthread_mutex_unlock(&currentlyIdleMutex);
// wait for work from main
pthread_mutex_lock(&workReadyMutex);
while (!workReady) {
pthread_cond_wait(&workReadyCond , &workReadyMutex);
}
pthread_mutex_unlock(&workReadyMutex);
// Do the work
for (int i = start; i <= end; i++) {
arr[i] = arr[i] + 1;
}
// mark yourself as finished and signal to main
pthread_mutex_lock(&currentlyWorkingMutex);
currentlyWorking--;
pthread_cond_signal(&currentlyWorkingCond);
pthread_mutex_unlock(&currentlyWorkingMutex);
// Wait for permission to finish
pthread_mutex_lock(&canFinishMutex);
while (!canFinish) {
pthread_cond_wait(&canFinishCond , &canFinishMutex);
}
pthread_mutex_unlock(&canFinishMutex);
}
pthread_exit(NULL);
}
int validateResult(int *ints, int n, int start)
{
int tmp = start;
for (int i = 0; i < n; i++, tmp++) {
if (ints[i] != tmp) {
return 0;
}
}
return 1;
}
void print(int *ints, int n)
{
printf("[");
for (int i = 0; i < n; i++) {
printf("%d", ints[i]);
if (i+1 != n)
printf(", ");
}
printf("]\n");
}
I'm not sure though if pthread_cancel is enough for clean up! As for the barrier, it would've been of a great help if it wasn't limited to some OSs as mentioned by #Jeremy.
Benchmarks:
I wanted to make sure that these many conditions aren't actually slowing down the algorithm, so I've setup this benchmark to compare the two solutions:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
#define NUM_OF_THREADS 5
struct thread_data {
int start;
int end;
int *arr;
};
pthread_mutex_t currentlyIdleMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t currentlyIdleCond = PTHREAD_COND_INITIALIZER;
int currentlyIdle;
pthread_mutex_t workReadyMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t workReadyCond = PTHREAD_COND_INITIALIZER;
int workReady;
pthread_cond_t currentlyWorkingCond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t currentlyWorkingMutex= PTHREAD_MUTEX_INITIALIZER;
int currentlyWorking;
pthread_mutex_t canFinishMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t canFinishCond = PTHREAD_COND_INITIALIZER;
int canFinish;
void *processArrayMutex(void *args);
void *processArrayJoin(void *args);
double doItWithMutex(pthread_t *threads, struct thread_data *data, int loops);
double doItWithJoin(pthread_t *threads, struct thread_data *data, int loops);
int main(int argc, const char * argv[])
{
int numOfInts = 10;
int *join_ints = malloc(numOfInts * sizeof(int));
int *mutex_ints = malloc(numOfInts * sizeof(int));
for (int i = 0; i < numOfInts; i++) {
join_ints[i] = i;
mutex_ints[i] = i;
}
pthread_t join_threads[NUM_OF_THREADS];
pthread_t mutex_threads[NUM_OF_THREADS];
struct thread_data join_thread_data[NUM_OF_THREADS];
struct thread_data mutex_thread_data[NUM_OF_THREADS];
workReady = 0;
canFinish = 0;
currentlyIdle = 0;
currentlyWorking = 0;
int remainingWork = numOfInts, amountOfWork;
int startRange, endRange = -1;
for (int i = 0; i < NUM_OF_THREADS; i++) {
amountOfWork = remainingWork / (NUM_OF_THREADS - i);
startRange = endRange + 1;
endRange = startRange + amountOfWork - 1;
join_thread_data[i].arr = join_ints;
join_thread_data[i].start = startRange;
join_thread_data[i].end = endRange;
mutex_thread_data[i].arr = mutex_ints;
mutex_thread_data[i].start = startRange;
mutex_thread_data[i].end = endRange;
pthread_create(&mutex_threads[i], NULL, processArrayMutex, (void *)&mutex_thread_data[i]);
remainingWork -= amountOfWork;
}
int numOfBenchmarkTests = 100;
int numberOfLoopsPerTest= 1000;
double join_sum = 0.0, mutex_sum = 0.0;
for (int i = 0; i < numOfBenchmarkTests; i++)
{
double joinTime = doItWithJoin(join_threads, join_thread_data, numberOfLoopsPerTest);
double mutexTime= doItWithMutex(mutex_threads, mutex_thread_data, numberOfLoopsPerTest);
join_sum += joinTime;
mutex_sum+= mutexTime;
}
double join_avg = join_sum / numOfBenchmarkTests;
double mutex_avg= mutex_sum / numOfBenchmarkTests;
printf("Join average : %f\n", join_avg);
printf("Mutex average: %f\n", mutex_avg);
double diff = join_avg - mutex_avg;
if (diff > 0.0)
printf("Mutex is %.0f%% faster.\n", 100 * diff / join_avg);
else if (diff < 0.0)
printf("Join is %.0f%% faster.\n", 100 * diff / mutex_avg);
else
printf("Both have the same performance.");
free(join_ints);
free(mutex_ints);
return 0;
}
// From https://stackoverflow.com/a/2349941/408286
double get_time()
{
struct timeval t;
struct timezone tzp;
gettimeofday(&t, &tzp);
return t.tv_sec + t.tv_usec*1e-6;
}
double doItWithMutex(pthread_t *threads, struct thread_data *data, int num_loops)
{
double start = get_time();
int loops = num_loops;
while (loops-- != 0) {
// Make sure all of them are ready
pthread_mutex_lock(&currentlyIdleMutex);
while (currentlyIdle != NUM_OF_THREADS) {
pthread_cond_wait(&currentlyIdleCond, &currentlyIdleMutex);
}
pthread_mutex_unlock(&currentlyIdleMutex);
// All threads are now blocked; it's safe to not lock the mutex.
// Prevent them from finishing before authorized.
canFinish = 0;
// Reset the number of currentlyWorking threads
currentlyWorking = NUM_OF_THREADS;
// Signal to the threads to start
pthread_mutex_lock(&workReadyMutex);
workReady = 1;
pthread_cond_broadcast(&workReadyCond );
pthread_mutex_unlock(&workReadyMutex);
// Wait for them to finish
pthread_mutex_lock(&currentlyWorkingMutex);
while (currentlyWorking != 0) {
pthread_cond_wait(&currentlyWorkingCond, &currentlyWorkingMutex);
}
pthread_mutex_unlock(&currentlyWorkingMutex);
// The threads are now waiting for permission to finish
// Prevent them from starting again
workReady = 0;
currentlyIdle = 0;
// Allow them to finish
pthread_mutex_lock(&canFinishMutex);
canFinish = 1;
pthread_cond_broadcast(&canFinishCond);
pthread_mutex_unlock(&canFinishMutex);
}
return get_time() - start;
}
double doItWithJoin(pthread_t *threads, struct thread_data *data, int num_loops)
{
double start = get_time();
int loops = num_loops;
while (loops-- != 0) {
// create them
for (int i = 0; i < NUM_OF_THREADS; i++) {
pthread_create(&threads[i], NULL, processArrayJoin, (void *)&data[i]);
}
// wait
for (int i = 0; i < NUM_OF_THREADS; i++) {
pthread_join(threads[i], NULL);
}
}
return get_time() - start;
}
void *processArrayMutex(void *args)
{
struct thread_data *data = (struct thread_data *)args;
int *arr = data->arr;
int start = data->start;
int end = data->end;
while (1) {
// Set yourself as idle and signal to the main thread, when all threads are idle main will start
pthread_mutex_lock(&currentlyIdleMutex);
currentlyIdle++;
pthread_cond_signal(&currentlyIdleCond);
pthread_mutex_unlock(&currentlyIdleMutex);
// wait for work from main
pthread_mutex_lock(&workReadyMutex);
while (!workReady) {
pthread_cond_wait(&workReadyCond , &workReadyMutex);
}
pthread_mutex_unlock(&workReadyMutex);
// Do the work
for (int i = start; i <= end; i++) {
arr[i] = arr[i] + 1;
}
// mark yourself as finished and signal to main
pthread_mutex_lock(&currentlyWorkingMutex);
currentlyWorking--;
pthread_cond_signal(&currentlyWorkingCond);
pthread_mutex_unlock(&currentlyWorkingMutex);
// Wait for permission to finish
pthread_mutex_lock(&canFinishMutex);
while (!canFinish) {
pthread_cond_wait(&canFinishCond , &canFinishMutex);
}
pthread_mutex_unlock(&canFinishMutex);
}
pthread_exit(NULL);
}
void *processArrayJoin(void *args)
{
struct thread_data *data = (struct thread_data *)args;
int *arr = data->arr;
int start = data->start;
int end = data->end;
// Do the work
for (int i = start; i <= end; i++) {
arr[i] = arr[i] + 1;
}
pthread_exit(NULL);
}
And the output is:
Join average : 0.153074
Mutex average: 0.071588
Mutex is 53% faster.
Thank you again. I really appreciate your help!

There are several synchronization mechanisms you can use (condition variables, for example). I think the simplest would be to use a pthread_barrier to synchronize the the start of the threads.
Assuming that you want all of the threads to 'sync up' on each loop iteration, you can just reuse the barrier. If you need something more flexible, a condition variable might be more appropriate.
When you decide it's time for the thread to wrap up (you haven't indicated how the threads will know to break out of the infinite loop - a simple shared variable might be used for that; the shared variable could be an atomic type or protected with a mutex), the main() thread should use pthread_join() to wait for all the threads to complete.

You need to use a different synchronization technique than join, that's clear.
Unfortunately you have a lot of options. One is a "synchronization barrier", which basically is a thing where each thread that reaches it blocks until they've all reached it (you specify the number of threads in advance). Look at pthread_barrier.
Another is to use a condition-variable/mutex pair (pthread_cond_*). When each thread finishes it takes the mutex, increments a count, signals the condvar. The main thread waits on the condvar until the count reaches the value it expects. The code looks like this:
// thread has finished
mutex_lock
++global_count
// optional optimization: only execute the next line when global_count >= N
cond_signal
mutex_unlock
// main is waiting for N threads to finish
mutex_lock
while (global_count < N) {
cond_wait
}
mutex_unlock
Another is to use a semaphore per thread -- when the thread finishes it posts its own semaphore, and the main thread waits on each semaphore in turn instead of joining each thread in turn.
You also need synchronization to re-start the threads for the next job -- this could be a second synchronization object of the same type as the first, with details changed for the fact that you have 1 poster and N waiters rather than the other way around. Or you could (with care) re-use the same object for both purposes.
If you've tried these things and your code didn't work, maybe ask a new specific question about the code you tried. All of them are adequate to the task.

You are working at the wrong level of abstraction. This problem has been solved already. You are reimplementing a work queue + thread pool.
OpenMP seems like a good fit for your problem. It converts #pragma annotations into threaded code. I believe it would let you express what you're trying to do pretty directly.
Using libdispatch, what you're trying to do would be expressed as a dispatch_apply targeting a concurrent queue. This implicitly waits for all child tasks to complete. Under OS X, it's implemented using a non-portable pthread workqueue interface; under FreeBSD, I believe it manages a group of pthreads directly.
If it is portability concerns driving you to use raw pthreads, don't use pthread barriers. Barriers are an additional extension over and above basic POSIX threads. OS X for example does not support it. For more, see POSIX.
Blocking the main thread till all child threads have completed can be done using a count protected by a condition variable or, even more simply, using a pipe and a blocking read where the number of bytes to read matches the number of threads. Each thread writes one byte on work completion, then sleeps till it gets new work from the main thread. The main thread unblocks once each thread has written its "I'm done!" byte.
Passing work to the child threads can be done using a mutex protecting the work-descriptor and a condition to signal new work. You could use a single array of work descriptors that all threads draw from. On signal, each one tries to grab the mutex. On grabbing the mutex, it would dequeue some work, signal anew if the queue is nonempty, and then process its work, after which it would signal completion to the master thread.
You could reuse this "work queue" to unblock the main thread by enqueueing the results, with the main thread waiting till the result queue length matches the number of threads; the pipe approach is just using a blocking read to do this count for you.

To tell all the threads to start working, it can be as simple as a global integer variable which is initialized to zero, and the threads simply wait until it's non-zero. This way you don't need the while (1) loop in the thread function.
For waiting until they are all done, pthread_join is simplest as it will actually block until the thread it's joining is done. It's also needed to clean up system stuff after the thread (like otherwise the return value from the thread will be stored for the remainder of the program). As you have an array of all pthread_t for the threads, just loop over them one by one. As that part of your program doesn't do anything else, and has to wait until all threads are done, just waiting for them in order is okay.

Related

Use mutex cause deadlock in c

Im trying to make a mini threads game, should be a manager thread that create amount of thread as the user want, and for every round (the user choose the amount of rounds) the threads need to random number, and after all the threads finished to random number the managerGame thread should print the thread that random the greatest number and add point to this thread.
At the end of the game the ManagerGame thread should print who is the winner.
Now i wrote the next code:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
struct Player{
int isFinished;
int id;
int number;
};
struct GameManager{
struct Player *Players;
int *WinnersTable;
int Rounds;
};
struct GameDetails{
int PlayersCount;
int RoundsCount;
};
struct GameManager game_manager;
pthread_mutex_t lock;
pthread_cond_t cond;
int IsGameActive;
void* ManagerGameFunc(void* gameDetails);
void* PlayerFunc(void* playerDetails);
void SetWinnerToRound(int roundNumber, int playersCount);
void StartNewRound(int playersCount);
void printWinnerToGame(int playersCount);
int IsRoundEnded(int playersCount);
int main(){
srand(time(NULL));
pthread_t ManagerThread;
pthread_mutex_init(&lock, NULL);
pthread_cond_init(&cond, NULL);
struct GameDetails gameDetails;
printf("----GAME----\n");
printf("Enter amount of players\n");
fflush(stdin);
scanf("%d\n", &gameDetails.PlayersCount);
printf("Enter amount of rounds\n");
fflush(stdin);
scanf("%d\n", &gameDetails.RoundsCount);
printf("----GAME STARTED----\n");
pthread_create(&ManagerThread, NULL, ManagerGameFunc, &gameDetails);
pthread_join(ManagerThread, NULL);
printf("Game finished\n");
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&cond);
}
void* ManagerGameFunc(void* gameDetails){
struct GameDetails game_details = *((struct GameDetails *)gameDetails);
pthread_t *threads = (pthread_t *)malloc(game_details.PlayersCount * sizeof(pthread_t));
game_manager.Players = (struct Player *)malloc(game_details.PlayersCount * sizeof(struct Player));
game_manager.WinnersTable = (int *)malloc(game_details.PlayersCount * sizeof(int));
game_manager.Rounds = game_details.RoundsCount;
IsGameActive = 1;
for(int i = 0; i < game_details.PlayersCount; i++){
game_manager.Players[i].isFinished = 0;
game_manager.Players[i].id = i;
pthread_create(&threads[i], NULL, PlayerFunc, &game_manager.Players[i]);
}
for(int i = 0; i < game_manager.Rounds; i++){
pthread_mutex_lock(&lock);
while(!IsRoundEnded(game_details.PlayersCount)){
pthread_cond_wait(&cond, &lock);
}
SetWinnerToRound(i + 1, game_details.PlayersCount);
StartNewRound(game_details.PlayersCount);
pthread_cond_signal(&cond);
pthread_mutex_unlock(&lock);
}
IsGameActive = 0;
printf("\nGame end ! The winner of the game is ...\n");
printWinnerToGame(game_details.PlayersCount);
}
void* PlayerFunc(void* playerDetails){
struct Player player = *((struct Player *)playerDetails);
while(IsGameActive){
pthread_mutex_lock(&lock);
while(player.isFinished){
pthread_cond_wait(&cond, &lock);
}
player.number = (rand() % (100 - 1 + 1)) + 1;
printf("%d", player.number);
player.isFinished = 1;
pthread_cond_signal(&cond);
pthread_mutex_unlock(&lock);
}
}
void SetWinnerToRound(int roundNumber, int playersCount){
int max = game_manager.Players[0].number;
int id = 0;
for(int i = 1; i < playersCount; i++){
if(game_manager.Players[i].number > max){
max = game_manager.Players[i].number;
id = i;
}
}
printf("--Round %d: winner is player number %d--\n", roundNumber, id + 1);
game_manager.WinnersTable[id]++;
}
void StartNewRound(int playersCount){
for(int i = 1; i < playersCount; i++){
game_manager.Players[i].isFinished = 0;
}
}
void printWinnerToGame(int playersCount){
int max = game_manager.Players[0].number;
int id = 0;
for(int i = 1; i < playersCount; i++){
if(game_manager.Players[i].number > max){
max = game_manager.Players[i].number;
id = i;
}
}
printf("Player number %d is the winner of the game !!\n", id + 1);
}
int IsRoundEnded(int playersCount){
for(int i = 0; i < playersCount; i++)
if(!game_manager.Players[i].isFinished)
return 0;
return 1;
}
But the problem is that when i run it the output is
the image is in the link
and the terminal is stuck like it show in the image.
I think that maybe the threads is in deadlock mode, isnt it?
Thank you for your help!
UPDATE
So I heard to #n.m. and to #pilcrow, and in PlayerFunc i changed the Player object to pointer to the argument cause before the change the function made a copy of the argument and every time i tried to change something in the object it didnt change in the PlayerThread. In addition i changed the pthread_cond_signal to pthread_cond_broadcast and now it work. Thank you all!

void* PlayerFunc(void* playerDetails){
struct Player player = *((struct Player *)playerDetails);
....
PlayerFunc creates a copy of its argument representing that thread/player's state. ManagerGameFunc will never see modifications to that copy, in particular whether it isFinished or not.

All your threads follow the same pattern:
pthread_mutex_lock(&lock);
while(!predicate(....)){
pthread_cond_wait(&cond, &lock);
}
....
pthread_cond_signal(&cond);
pthread_mutex_unlock(&lock);
First time through the loop all predicates are false. All threads therefore peacefully sleep in pthread_cond_wait. It is never signalled, because neither thread has a chance to progress and signal it. So they wait forever.

Multithreaded program not producing desired output

I am writing a code that creates 10 threads and executes those threads with even thread ids first and then executes all those with odd thread ids next. I'm using the POSIX threads library. Here is the code I wrote:
#include "stdlib.h"
#include "pthread.h"
#include "stdio.h"
#define TRUE 1
#define FALSE 0
int EVEN_DONE = FALSE;
int evenThreads, oddThreads = 0;
int currentThread = 0;
//the mutex for thread synchronization
static pthread_mutex_t mymutex = PTHREAD_MUTEX_INITIALIZER;
//the condition variable;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
void * printEven(unsigned long id)
{
pthread_mutex_lock(&mymutex);
evenThreads++;
printf("TID: %lu, Hello from even\n", id);
// this condition checks whether even threads have finished executing
if(evenThreads + oddThreads >= 10) {
EVEN_DONE = TRUE;
pthread_cond_broadcast(&cond);
}
pthread_mutex_unlock(&mymutex);
return NULL;
}
void * printOdd(unsigned long id)
{
pthread_mutex_lock(&mymutex);
while (!EVEN_DONE) {
oddThreads++;
pthread_cond_wait(&cond, &mymutex);
printf("TID: %lu, Hello from odd\n", id);
}
pthread_mutex_unlock(&mymutex);
return NULL;
}
void * threadFunc(void *arg)
{
unsigned long id = (unsigned long)pthread_self();
if (id % 2 == 0)
{
printEven(id);
}
else
{
printOdd(id);
}
return NULL;
}
int main()
{
pthread_t* threads;
int num_threads = 10;
int i, j;
threads = malloc(num_threads * sizeof(threads));
for ( i = 0; i < 10; i++) {
pthread_create(&threads[i], NULL, threadFunc, NULL);
}
for ( j = 0; j < 10; j++) {
pthread_join(threads[j], NULL);
}
printf("Finished executing all threads\n");
return 0;
}
However, when I run the code it doesn't produce the desired output. The output I'm getting is this:
Apparently, it seems that all the thread IDs are even numbers. However, I do think there is a problem with my code. What am I doing wrong? How can I achieve the desired output?
(Note: I'm at beginner level when it comes to POSIX threads and multithreading in general)
Thanks in advance.

There is no guarantee in POSIX that the pthread_t type returned by pthread_self() is a numeric type that can be cast to an unsigned long - it is allowed to be a structure type, for example.
If you want to write your code in a POSIX-conforming way, you will need to allocate numeric thread IDs yourself. For example, you could have:
unsigned long allocate_id(void)
{
static unsigned long next_id = 0;
static pthread_mutex_t id_lock = PTHREAD_MUTEX_INITIALIZER;
unsigned long id;
pthread_mutex_lock(&id_lock);
id = next_id++;
pthread_mutex_unlock(&id_lock);
return id;
}
Then in your threads use:
unsigned long id = allocate_id();
Controlling the allocation of IDs yourself also allows you to control the sequence - for example in this case you can ensure that IDs are sequentially allocated so that you will have both odd and even IDs.

Compute the summation of a given interval using multiple threads

For my homework, I need to compute the squares of integers in the interval (0,N) (e.g. (0,50) in a way that the load is distributed equally among threads (e.g. 5 threads). I have been advised to use small chunks from the interval and assign it to the thread. For that, I am using a queue. Here's my code:
#include <stdio.h>
#include <pthread.h>
#define QUEUE_SIZE 50
typedef struct {
int q[QUEUE_SIZE];
int first,last;
int count;
} queue;
void init_queue(queue *q)
{
q->first = 0;
q->last = QUEUE_SIZE - 1;
q->count = 0;
}
void enqueue(queue *q,int x)
{
q->last = (q->last + 1) % QUEUE_SIZE;
q->q[ q->last ] = x;
q->count = q->count + 1;
}
int dequeue(queue *q)
{
int x = q->q[ q->first ];
q->first = (q->first + 1) % QUEUE_SIZE;
q->count = q->count - 1;
return x;
}
queue q; //declare the queue data structure
void* threadFunc(void* data)
{
int my_data = (int)data; /* data received by thread */
int sum=0, tmp;
while (q.count)
{
tmp = dequeue(&q);
sum = sum + tmp*tmp;
usleep(1);
}
printf("SUM = %d\n", sum);
printf("Hello from new thread %u - I was created in iteration %d\n",pthread_self(), my_data);
pthread_exit(NULL); /* terminate the thread */
}
int main(int argc, char* argv[])
{
init_queue(&q);
int i;
for (i=0; i<50; i++)
{
enqueue(&q, i);
}
pthread_t *tid = malloc(5 * sizeof(pthread_t) );
int rc; //return value
for(i=0; i<5; i++)
{
rc = pthread_create(&tid[i], NULL, threadFunc, (void*)i);
if(rc) /* could not create thread */
{
printf("\n ERROR: return code from pthread_create is %u \n", rc);
return(-1);
}
}
for(i=0; i<5; i++)
{
pthread_join(tid[i], NULL);
}
}
The output is not always correct. Most of the time it is correct, 40425, but sometimes, the value is bigger. Is it because of the threads are running in parallel and accessing the queue at the same time (the processor on my laptop is is intel i7)? I would appreciate the feedback on my concerns.

I think contrary to what some of the other people here suggested, you don't need any synchronization primitives like semaphores or mutexes at all. Something like this:
Given some array like
int values[50];
I'd create a couple of threads (say: 5), each of which getting a pointer to a struct with the offset into the values array and a number of squares to compute, like
typedef struct ThreadArgs {
int *values;
size_t numSquares;
} ThreadArgs;
You can then start your threads, each of which being told to process 10 numbers:
for ( i = 0; i < 5; ++i ) {
ThreadArgs *args = malloc( sizeof( ThreadArgs ) );
args->values = values + 10 * i;
args->numSquares = 10;
pthread_create( ...., threadFunc, args );
}
Each thread then simply computes the squares it was assigned, like:
void *threadFunc( void *data )
{
ThreadArgs *args = data;
int i;
for ( i = 0; i < args->numSquares; ++i ) {
args->values[i] = args->values[i] * args->values[i];
}
free( args );
}
At the end, you'd just use a pthread_join to wait for all threads to finish, after which you have your squares in the values array.

All your threads read from the same queue. This leads to a race condition. For instance, if the number 10 was read simultaneously by two threads, your result will be offset by 100. You should protect your queue with a mutex. Put the following print in deque function to know which numbers are repeated:
printf("Dequeing %d in thread %d\n", x, pthread_self());
Your code doesn't show where the results are accumulated to a single variable. You should protect that variable with a mutex as well.
Alternatively, you can pass the start number as the input parameter to each thread from the loop so that each thread can work on its set of numbers. First thread will work on 1-10, the second one on 11-20 and so on. In this approach, you have to use mutex only the part where the threads update the global sum variable at the end of their execution.

First you need to define what it means to be "distributed equally among threads." If you mean that each thread does the same amount of work as the other threads, then I would create a single queue, put all the numbers in the queue, and start all threads (which are the same code.) Each thread tries to get a value from the queue which must be protected by a mutex unless it is thread safe, calculates the partial answer from the value taken from the thread, and adds the result to the total which must also be protected by a mutex. If you mean that each thread will execute an equal amount of times as each of the other threads, then you need to make a priority queue and put all the numbers in the queue along with the thread number that should compute on it. Each thread then tries to get a value from the queue that matches its thread number. From the thread point of view, it should try to get a value from the queue, do the work, then try to get another value. If there are no more values to get, then the thread should exit. The main program does a join on all threads and the program exits when all threads have exited.

Mutex Locks Across Threads with Different Functions

The problem:
Similar to one of my other Questions Other Question
I am trying to create a program in C that allows me to Search through 10 text files with a variable amount of threads to find the largest Prime. It should also have a Manager thread that is allowed to read the Largest Prime number of a worker thread (and not modify it). The Manager thread also Posts the largest Prime number found by all of the worker threads so the worker threads can read it and use it. The worker threads must post their local Largest Prime to a global array (privateLargestPrime) and before they do this they must lock it so that the Manager Thread doesn't read it until the worker thread updates it.
The weird Part:
As I step through my program when the worker thread wants to call a lock it switches threads to the manager which calls for a lock and is granted a lock then it keeps looping starving the Worker thread. I am Not sure what is going on with that. If I could get any insight on this problem it will be greatly appreciated.
/*
* The Reason for Worker Initialization + Manager Initialization is that we need both types of threads to exist at the same time
* so I just combined them into one loop, although I believe that they could have been created seperatly.
* Basically just call pthread_Join at the end
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>
#include <time.h>
#include <string.h>
#include <fileTest.h>
clock_t Start, End;
double elapsed = 0;
pthread_cond_t managerVar;
pthread_mutex_t mutex;
unsigned int globalLargestPrime = 0;
int numThreads = 1;//Number of Threads
int LINES_PER_THREAD;
pthread_cond_t *WorkerConditionaVar;
pthread_cond_t *ManagerConditionaVar;
unsigned int *privateLocalLargest;//will need to be changed
int *statusArray;
FILE *fileOut;
typedef enum{
FREE,
IN_USE
}lrgstPrm;
lrgstPrm monLargestPrime;//create enum
lrgstPrm workerLargestPrime;//create enum
typedef enum{
Finished,
Not_Finished
}Status;
Status is_Finished;
typedef struct threadFields{
int id;
int StartPos;//gets seek for worker thread
int EndPos;
}tField;
int ChkPrim(unsigned int n){
unsigned int i;
unsigned int root = sqrt(n);
for(i=2; i<root; i++){
if(n % i == 0)
return 0;
}
//printf("%d \n", isPrime);
return 1;
}
void *Worker(void *threadStruct){//Create Threads
struct threadFields *info = threadStruct;
int index;
int id = info->id;
unsigned int currentNum = 0;
int Seek = info->StartPos;
unsigned int localLargestPrime = 0;
char *buffer = malloc(50);
int isPrime = 0;
while(Seek<info->EndPos){
for(index = 0; index < 1000; index++){//Loop 1000 times
fseek(fileOut,Seek*sizeof(char)*20, SEEK_SET);
fgets(buffer,20,fileOut);
Seek++;
currentNum = atoi(buffer);
if(currentNum>localLargestPrime && currentNum > 0){
isPrime = ChkPrim(currentNum);
if( isPrime == 1)
localLargestPrime = currentNum;
}
}
//while(monLargestPrime == IN_USE)
//pthread_cond_wait(&monitor[id], &mutex);//wait untill mutex is unlocked
//monLargestPrime = IN_USE;
//Critical Zone
//printf("Entering Critical Zone My ID: %d\n",id);
/*Should Lock the Private Largest Prime from any other thread using it*/
if(pthread_mutex_lock(&mutex) != 0)//Lock
printf("Failed To Lock");
while(workerLargestPrime == IN_USE)//Wait untill Workers largest prime is free
pthread_cond_wait(ManagerConditionaVar, &mutex);
workerLargestPrime = IN_USE;//Local Largest is in use
privateLocalLargest[id] = localLargestPrime;//Assign Local Largest to each workers Shared Variable
workerLargestPrime = FREE;
pthread_cond_signal(ManagerConditionaVar);//Signal to any waiting thread that wants to touch(read) this workers privateLocalLargest
pthread_mutex_unlock(&mutex);
/*
pthread_mutex_lock(&mutex);
while(workerLargestPrime == FREE){
workerLargestPrime = IN_USE;
//pthread_cond_wait(&managerVar,&mutex);
*/
if(localLargestPrime < globalLargestPrime)
localLargestPrime = globalLargestPrime;
/*
workerLargestPrime = FREE;
pthread_mutex_unlock(&mutex);
// for(index = 0; index < numThreads; index++)
// if(index != id)
// pthread_cond_signal(&monitor[id]);//signal all threads that mutex is unlocked
//monLargestPrime = FREE;
//printf("Exiting Critical Zone My ID: %d\n",id);
*/
//pthread_mutex_unlock(&mutex);
}//End of While
statusArray[id] = 1;
void *i = 0;
return i;
}
void *manager(){
int index, MlocalLargestPrime;
while(is_Finished==Not_Finished){
/*Should Lock the Private Largest Prime from any other thread using it*/
if(pthread_mutex_lock(&mutex) != 0)//Lock
printf("Failed To Lock");
while(workerLargestPrime == IN_USE)//Wait untill Workers largest prime is free
pthread_cond_wait(ManagerConditionaVar, &mutex);
workerLargestPrime = IN_USE;//Local Largest is in use
//Critical Zone
for(index =0; index < numThreads; index++)
if(privateLocalLargest[index] > MlocalLargestPrime)
MlocalLargestPrime = privateLocalLargest[index];
//Critical Zone
workerLargestPrime = FREE;
pthread_cond_signal(ManagerConditionaVar);//Signal to any waiting thread that wants to touch(read) this workers privateLocalLargest
pthread_mutex_unlock(&mutex);
/*
pthread_mutex_lock(&mutex);
while(workerLargestPrime == FREE){
workerLargestPrime = IN_USE;
globalLargestPrime = MlocalLargestPrime;
workerLargestPrime = FREE;
pthread_cond_signal(&managerVar);
}
pthread_mutex_unlock(&mutex);
*/
/*check if workers have finished*/
for(index = 0; index < numThreads; index++)
if(statusArray[index] == 0)
is_Finished = Not_Finished;
}
void *i = 0;
return i;
}
int main(){
//setFile();
LINES_PER_THREAD = (getLineNum()/numThreads);
fileOut = fopen("TextFiles/dataBin.txt", "rb");
Start = clock();
//pthread_t managerThread;
pthread_t threads[numThreads];
pthread_cond_t monitor[numThreads];
pthread_cond_t managerCon;
WorkerConditionaVar = monitor;//Global Pointer points to the array created in main
ManagerConditionaVar = &managerCon;
unsigned int WorkerSharedVar[numThreads];
privateLocalLargest = WorkerSharedVar;
pthread_mutex_init(&mutex, NULL);
int finishedArr[numThreads];
statusArray = finishedArr;
is_Finished = Not_Finished;
int index;
/*Worker Initialization + Manager Initialization*/
pthread_cond_init(&managerCon,NULL);
/*Worker Thread Struct Initalization*/
tField *threadFields[numThreads];//sets number of thread structs
rewind(fileOut);
for(index = 0; index < numThreads; index++){//run through threads; inizilize the Struct for workers
pthread_cond_init(&monitor[index], NULL);//Initialize all the conditional variables
threadFields[index] = malloc(sizeof(tField));
threadFields[index]->id = index;
threadFields[index]->StartPos = index*LINES_PER_THREAD;// Get Position for start of block
threadFields[index]->EndPos = (index+1)*LINES_PER_THREAD-1;// Get Position for end of block
}
/*Worker Thread Struct Initalization*/
for(index = 0; index<numThreads+1; index++)
if(index == numThreads)//Last Thread is Manager Thread
pthread_create(&threads[index],NULL,manager,NULL);//Create Manager
else//Worker Threads
pthread_create(&threads[index],NULL,Worker,(void *)threadFields[index]);//Pass struct to each worker
for(index = 0; index<numThreads+1; index++)
pthread_join(threads[index], NULL);
/*Worker Initialization + Manager Initialization*/
/*Destroy the mutexes & conditional signals*/
for(index = 0; index < numThreads; index++){
pthread_cond_destroy(&WorkerConditionaVar[index]);
}
pthread_cond_destroy(&managerCon);
pthread_mutex_destroy(&mutex);
End = clock();
elapsed = ((double) (End - Start)) / CLOCKS_PER_SEC;
printf("This is the Time %f\n", elapsed);
printf("This is the Largest Prime Number: %u", globalLargestPrime);
return 0;
}
[1]: https://stackoverflow.com/questions/13672456/slightly-complicated-thread-synchronization
There is another C source which I only use 1 method and it is to give me the number of lines from the 10 text files, I will also post it (but not neccessary):
/*
* fileTest.c
*
* Created on: Dec 8, 2012
* Author: kevin
*
* count number of lines
* divide by number of threads
* get the positions to hand to each thread
* to get positions, one needs to get the number of lines per thread,
* add number of lines to each: Seek*sizeof(char)*10, SEEK_SET.
* and hand out these positions to each thread
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>
#include <time.h>
#include <string.h>
FILE *filesIn[10], *fileOut;
int Seek;
void createText(){
FILE *fOUT = fopen("data9.txt", "w");
int i;
srand(time(NULL));
for(i=0; i<10000; i++)
fprintf(fOUT, "%d\n",rand()%9000);
fclose(fOUT);
}
void setFile(){
int index;
Seek = 0;
char *buffer = malloc(50);
filesIn[0] = fopen("TextFiles/primes1.txt", "r");//read Text
filesIn[1] = fopen("TextFiles/primes2.txt", "r");//read Text
filesIn[2] = fopen("TextFiles/primes3.txt", "r");//read Text
filesIn[3] = fopen("TextFiles/primes4.txt", "r");//read Text
filesIn[4] = fopen("TextFiles/primes5.txt", "r");//read Text
filesIn[5] = fopen("TextFiles/primes6.txt", "r");//read Text
filesIn[6] = fopen("TextFiles/primes7.txt", "r");//read Text
filesIn[7] = fopen("TextFiles/primes8.txt", "r");//read Text
filesIn[8] = fopen("TextFiles/primes9.txt", "r");//read Text
filesIn[9] = fopen("TextFiles/primes10.txt", "r");//read Text
fileOut = fopen("TextFiles/dataBin.txt", "wb");//write in bin
for(index = 0; index < 10; index++)//Run through 10 files
while(!feof(filesIn[index])){
fscanf(filesIn[index],"%s", buffer);//take line from input
fseek(fileOut,Seek*sizeof(char)*20, SEEK_SET);
fputs(buffer,fileOut);//Print line to output file
Seek++;
}
fclose(filesIn[0]);
fclose(filesIn[1]);
fclose(filesIn[2]);
fclose(filesIn[3]);
fclose(filesIn[4]);
fclose(filesIn[5]);
fclose(filesIn[6]);
fclose(filesIn[7]);
fclose(filesIn[8]);
fclose(filesIn[9]);
fclose(fileOut);
}
void getFile(){
int Seek = 0;
int currentSeek = 0;
int currentNum = 0;
int localLargestPrime = 0;
char *buffer = malloc(50);
fileOut = fopen("TextFiles/dataBin.txt", "rb");
rewind(fileOut);
while(!feof(fileOut)){
fseek(fileOut,Seek*sizeof(char)*20, SEEK_SET);
fgets(buffer,10,fileOut);
Seek++;
currentNum = atoi(buffer);
if(currentNum>localLargestPrime)
if(ChkPrim(currentNum) == 1){
localLargestPrime = currentNum;
currentSeek = Seek*sizeof(char)*20;
printf("the current seek is: %d\n", currentSeek);
}
}
printf("This is the largest Prime: %d\n", localLargestPrime);
}
int getLineNum(){
Seek = 0;
int index;
char c;
filesIn[0] = fopen("TextFiles/primes1.txt", "r");//read Text
filesIn[1] = fopen("TextFiles/primes2.txt", "r");//read Text
filesIn[2] = fopen("TextFiles/primes3.txt", "r");//read Text
filesIn[3] = fopen("TextFiles/primes4.txt", "r");//read Text
filesIn[4] = fopen("TextFiles/primes5.txt", "r");//read Text
filesIn[5] = fopen("TextFiles/primes6.txt", "r");//read Text
filesIn[6] = fopen("TextFiles/primes7.txt", "r");//read Text
filesIn[7] = fopen("TextFiles/primes8.txt", "r");//read Text
filesIn[8] = fopen("TextFiles/primes9.txt", "r");//read Text
filesIn[9] = fopen("TextFiles/primes10.txt", "r");//read Text
for(index = 0; index < 10; index++)
while((c = fgetc(filesIn[index])) != EOF)
if(c == '\n')
Seek++;
return Seek;
}
enter link description here

You seem to be overdoing the synchronization of the access to globalLargestPrime. But instead of trying to fix that there might be a better way to communicate each thread's value to the manager - just have the thread function return the value it finds as an unsigned int cast to a void*. Then the manager can collect those values by just waiting on a pthread_join() for each thread to finish.
Something like the following pseudo code:
void *Worker(void *threadStruct)
{
unsigned int largest_prime;
// do whatever you need to do to find the largest prime in the set of numbers
// this thread has to deal with
//
// Note that nothing here should require synchronization, since the data should be
// completely independent of other threads
return (void*) largest_prime;
}
void *manager()
{
unsigned int largest_prime = 0;
// do whatever to spin up the threads and keep track of them in a
// pthread_t[] array...
// now wait for the threads to finish up and keep deal with the value
// each thread has found:
for each (pthread* p in the pthread_t[]) { // remember - pseudo code
void* result = 0;
// get the result that thread found
pthread_join( p, &result);
unsigned int thread_prime = (unsigned int) result;
if (largest_prime < thread_prime) {
largest_prime = thread_prime;
}
}
printf("largest prime: %u\n", largest_prime);
}
Now all of your synchronization hassles are dealt with by pthread_join().

Going by your problem, I think you can and should do without locks.
Use the global array to update the Manager thread from the worker threads. Since worker each thread will write to separate array index, there is only one writer per array index. Main thread can keep on reading from the same array.
Use one global variable for Largest prime number found so far (shared across all threads). For this variable, the main thread is the only writer and the worker threads are all readers.
Consistency will not be an issue since its only one variable. You need to worry about taking locks if there are more variables that need to be updated together.
Hope this helps.

Ok So I was doing something Funky with my conditional variables (I had way too many!) so Here I shall post my answer:
/*
* The Reason for Worker Initialization + Manager Initialization is that we need both types of threads to exist at the same time
* so I just combined them into one loop, although I believe that they could have been created seperatly.
* Basically just call pthread_Join at the end
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>
#include <time.h>
#include <string.h>
#include "fileTest.h"
clock_t Start, End;
double elapsed = 0;
pthread_cond_t managerVar;
pthread_mutex_t mutex;
unsigned int globalLargestPrime = 0;
int *numThreads;//Number of Threads ptr
int LINES_PER_THREAD;
pthread_cond_t *WorkerConditionaVar;
pthread_cond_t *ManagerConditionaVar;
unsigned int *privateLocalLargest;//will need to be changed
int *statusArray;
FILE *fileOut;
typedef enum{
FREE,
IN_USE
}lrgstPrm;
lrgstPrm managerLargestPrime;//create enum
lrgstPrm workerLargestPrime;//create enum
typedef enum{
Finished,
Not_Finished
}Status;
Status is_Finished;
typedef struct threadFields{
int id;
int StartPos;//gets seek for worker thread
int EndPos;
}tField;
int ChkPrim(unsigned int n){
unsigned int i;
unsigned int root = sqrt(n);
for(i=2; i<root; i++){
if(n % i == 0)
return 0;
}
//printf("%d \n", isPrime);
return 1;
}
void *Worker(void *threadStruct){//Create Threads
struct threadFields *info = threadStruct;
int index;
int id = info->id;
unsigned int currentNum = 0;
int Seek = info->StartPos;
unsigned int localLargestPrime = 0;
char *buffer = malloc(50);
int isPrime = 0;
while(Seek<info->EndPos){
for(index = 0; index < 1000; index++){//Loop 1000 times
fseek(fileOut,Seek*sizeof(char)*20, SEEK_SET);
fgets(buffer,20,fileOut);
Seek++;
currentNum = atoi(buffer);
if(currentNum>localLargestPrime && currentNum > 0){
isPrime = ChkPrim(currentNum);
if( isPrime == 1)
localLargestPrime = currentNum;
}
}
/*Here is where I block the manager thread read while I Write*/
pthread_mutex_lock(&mutex);
while(workerLargestPrime == IN_USE)
pthread_cond_wait(WorkerConditionaVar,&mutex);
//Critical Zone
privateLocalLargest[id] = localLargestPrime;
//Critical Zone
pthread_cond_signal(WorkerConditionaVar);
pthread_mutex_unlock(&mutex);
/*Here is where I block the manager thread read while I Write*/
/*I need to wait here until it is free to read the Managers Shared variable (GlobaLargestPrime)*/
pthread_mutex_lock(&mutex);
while(managerLargestPrime == IN_USE)
pthread_cond_wait(ManagerConditionaVar,&mutex);
//Critical Zone
if(localLargestPrime < globalLargestPrime)
localLargestPrime = globalLargestPrime;
//Critical Zone
pthread_cond_signal(ManagerConditionaVar);
pthread_mutex_unlock(&mutex);
/*I need to wait here until it is free to read the Managers Shared variable (GlobaLargestPrime)*/
}//End of While
statusArray[id] = 1;
return NULL;
}
void *manager(){
int index;
int ManagerLocalLargest = 0;
while(is_Finished==Not_Finished){
/*I need to wait here until it is free to read the workers Shared variable (PrivateLocalLargest)*/
pthread_mutex_lock(&mutex);
while(workerLargestPrime == IN_USE)
pthread_cond_wait(WorkerConditionaVar,&mutex);
//Critical Zone
for(index = 0; index < *numThreads; index++)
if(privateLocalLargest[index] > ManagerLocalLargest)
ManagerLocalLargest = privateLocalLargest[index];
//Critical Zone
pthread_cond_signal(WorkerConditionaVar);
pthread_mutex_unlock(&mutex);
/*Here is where I block the worker thread read while I Write*/
pthread_mutex_lock(&mutex);
while(managerLargestPrime == IN_USE)
pthread_cond_wait(ManagerConditionaVar,&mutex);
//Critical Zone
for(index = 0; index < *numThreads; index++)
if(privateLocalLargest[index] > globalLargestPrime)
globalLargestPrime = privateLocalLargest[index];
//Critical Zone
pthread_cond_signal(ManagerConditionaVar);
pthread_mutex_unlock(&mutex);
/*Here is where I block the worker thread read while I Write*/
/*check if workers have finished*/
for(index = 0; index < *numThreads; index++){
is_Finished = Finished;
if(statusArray[index] != 1){
is_Finished = Not_Finished;
}
}
}
return NULL;
}
int main(int argc, char *argv[]){
//setFile();
int argument;
switch(argc){
case 1:
printf("You didn't Type the number of threads you wanted... \n");
printf("argument format: [# of Threads]\n");
return -1;
break;
case 2:
if(strcmp(argv[1],"--help") == 0){
printf("argument format: [# of Threads]\n");
return 0;
}
else
argument = atoi(argv[1]);
break;
}
printf("The number of threads is %d\n", argument);
numThreads = &argument;
LINES_PER_THREAD = (getLineNum()/(*numThreads));
fileOut = fopen("TextFiles/dataBin.txt", "rb");
//pthread_t managerThread;
pthread_t threads[*numThreads];
pthread_cond_t monitor[*numThreads];
pthread_cond_t managerCon;
WorkerConditionaVar = monitor;//Global Pointer points to the array created in main
ManagerConditionaVar = &managerCon;
unsigned int WorkerSharedVar[*numThreads];
privateLocalLargest = WorkerSharedVar;
pthread_mutex_init(&mutex, NULL);
int finishedArr[*numThreads];
statusArray = finishedArr;
is_Finished = Not_Finished;
int index;
/*Worker Initialization + Manager Initialization*/
pthread_cond_init(&managerCon,NULL);
/*Worker Thread Struct Initalization*/
tField *threadFields[*numThreads];//sets number of thread structs
rewind(fileOut);
for(index = 0; index < *numThreads; index++){//run through threads; inizilize the Struct for workers
privateLocalLargest[index] = 0;
pthread_cond_init(&monitor[index], NULL);//Initialize all the conditional variables
threadFields[index] = malloc(sizeof(tField));
threadFields[index]->id = index;
threadFields[index]->StartPos = index*LINES_PER_THREAD;// Get Position for start of block
threadFields[index]->EndPos = (index+1)*LINES_PER_THREAD-1;// Get Position for end of block
}
/*Worker Thread Struct Initalization*/
Start = clock();
for(index = 0; index<*numThreads+1; index++)
if(index == *numThreads)//Last Thread is Manager Thread
pthread_create(&threads[index],NULL,manager,NULL);//Create Manager
else//Worker Threads
pthread_create(&threads[index],NULL,Worker,(void *)threadFields[index]);//Pass struct to each worker
for(index = 0; index<*numThreads+1; index++)
pthread_join(threads[index], NULL);
/*Worker Initialization + Manager Initialization*/
/*Destroy the mutexes & conditional signals*/
for(index = 0; index < *numThreads; index++){
pthread_cond_destroy(&WorkerConditionaVar[index]);
}
pthread_cond_destroy(&managerCon);
pthread_mutex_destroy(&mutex);
End = clock();
elapsed = ((double) (End - Start)) / CLOCKS_PER_SEC;
printf("This is the Time %f\n", elapsed);
printf("This is the Largest Prime Number: %u\n", globalLargestPrime);
return 0;
}
Also I solved my problem with the number of threads and Conditional variables needing to be hard-coded in, Now It can just be entered in as a parameter. Thanks everyone for all the support.
PS.
I have noticed that having 2 threads does not speed up the process (I assumed it would) and my pc is a dual core. it could be because of the Mutex locks and all of the blocking. I also Noticed that the more threads the longer it takes for them to process the data... hrmm if anyone sees this and can give me some insight please pm me or write a comment. Thanks (the other c file stayed the same).

C: pthread performance woes. How can I make this code perform as expected?

I have created this little program to calculate pi using probability and ratios. In order to make it run faster I decided to give multithreading with pthreads a shot. Unfortunately, even after doing much searching around I was unable to solve the problem I have in that when I run the threadFunc function, with one thread, whether that be with a pthread, or just normally called from the calculate_pi_mt function, the performance is much better (at least twice or if not 3 times better) than when I try running it with two threads on my dual core machine. I have tried disabling optimizations to no avail. As far as I can see, when the thread is running it is using local variables apart from at the end when I have used a mutex lock to create the sum of hits...
Firstly are there any tips for creating code that will run better here? (ie style) because I'm just learning by trying this stuff.
And secondly would there be any reason for these obvious performance problems?
When running with number of threads set to 1, one of my cpus maxes out at 100%. When set to two, the second cpu rises to roughly 80%-90%, but all this extra work it is apparently doing is to no avail! Could it be the use of the rand() function?
struct arguments {
int n_threads;
int rays;
int hits_in;
pthread_mutex_t *mutex;
};
void *threadFunc(void *arg)
{
struct arguments* args=(struct arguments*)arg;
int n = 0;
int local_hits_in = 0;
double x;
double y;
double r;
while (n < args->rays)
{
n++;
x = ((double)rand())/((double)RAND_MAX);
y = ((double)rand())/((double)RAND_MAX);
r = (double)sqrt(pow(x, 2) + pow(y, 2));
if (r < 1.0){
local_hits_in++;
}
}
pthread_mutex_lock(args->mutex);
args->hits_in += local_hits_in;
pthread_mutex_unlock(args->mutex);
return NULL;
}
double calculate_pi_mt(int rays, int threads){
double answer;
int c;
unsigned int iseed = (unsigned int)time(NULL);
srand(iseed);
if ( (float)(rays/threads) != ((float)rays)/((float)threads) ){
printf("Error: number of rays is not evenly divisible by threads\n");
}
/* argument initialization */
struct arguments* args = malloc(sizeof(struct arguments));
args->hits_in = 0;
args->rays = rays/threads;
args->n_threads = 0;
args->mutex = malloc(sizeof(pthread_mutex_t));
if (pthread_mutex_init(args->mutex, NULL)){
printf("Error creating mutex!\n");
}
pthread_t thread_ary[MAXTHREADS];
c=0;
while (c < threads){
args->n_threads += 1;
if (pthread_create(&(thread_ary[c]),NULL,threadFunc, args)){
printf("Error when creating thread\n");
}
printf("Created Thread: %d\n", args->n_threads);
c+=1;
}
c=0;
while (c < threads){
printf("main waiting for thread %d to terminate...\n", c+1);
if (pthread_join(thread_ary[c],NULL)){
printf("Error while waiting for thread to join\n");
}
printf("Destroyed Thread: %d\n", c+1);
c+=1;
}
printf("Hits in %d\n", args->hits_in);
printf("Rays: %d\n", rays);
answer = 4.0 * (double)(args->hits_in)/(double)(rays);
//freeing everything!
pthread_mutex_destroy(args->mutex);
free(args->mutex);
free(args);
return answer;
}

There's a couple of problems I can see:
rand() is not thread-safe. Use drand48_r() (which generates a double in the range [0.0, 1.0) natively, which is what you want)
You only create one struct arguments structure, then try to use that for multiple threads. You need to create a seperate one for each thread (just use an array).
Here's how I'd clean up your approach. Note how we don't need to use any mutexes - each thread just stashes its own return value in a seperate location, and the main thread adds them up after the other threads have finished:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <pthread.h>
struct thread_info {
int thread_n;
pthread_t thread_id;
int rays;
int hits_in;
};
void seed_rand(int thread_n, struct drand48_data *buffer)
{
struct timeval tv;
gettimeofday(&tv, NULL);
srand48_r(tv.tv_sec * thread_n + tv.tv_usec, buffer);
}
void *threadFunc(void *arg)
{
struct thread_info *thread_info = arg;
struct drand48_data drand_buffer;
int n = 0;
const int rays = thread_info->rays;
int hits_in = 0;
double x;
double y;
double r;
seed_rand(thread_info->thread_n, &drand_buffer);
for (n = 0; n < rays; n++)
{
drand48_r(&drand_buffer, &x);
drand48_r(&drand_buffer, &y);
r = x * x + y * y;
if (r < 1.0){
hits_in++;
}
}
thread_info->hits_in = hits_in;
return NULL;
}
double calculate_pi_mt(int rays, int threads)
{
int c;
int hits_in = 0;
if (rays % threads) {
printf("Error: number of rays is not evenly divisible by threads\n");
rays = (rays / threads) * threads;
}
/* argument initialization */
struct thread_info *thr = malloc(threads * sizeof thr[0]);
for (c = 0; c < threads; c++) {
thr[c].thread_n = c;
thr[c].rays = rays / threads;
thr[c].hits_in = 0;
if (pthread_create(&thr[c].thread_id, NULL, threadFunc, &thr[c])) {
printf("Error when creating thread\n");
}
printf("Created Thread: %d\n", thr[c].thread_n);
}
for (c = 0; c < threads; c++) {
printf("main waiting for thread %d to terminate...\n", c);
if (pthread_join(thr[c].thread_id, NULL)) {
printf("Error while waiting for thread to join\n");
}
hits_in += thr[c].hits_in;
printf("Destroyed Thread: %d\n", c+1);
}
printf("Hits in %d\n", hits_in);
printf("Rays: %d\n", rays);
double answer = (4.0 * hits_in) / rays;
free(thr);
return answer;
}

You're using far too many synchronization primitives. You should sum the local_hits at the end in the main thread, and not use a mutex to update it in an asynchronous fashion. Or, at least, you could use an atomic operation (it's just an int) to do it instead of lock an entire mutex to update one int.

Threading has a cost. It may be that, as your useful computing code looks very simple, the cost of thread management (cost paid when changing thread and synchronisation cost) is much higher than the benefit.