I have a program that counts the time to send or receive a amount of data. When I receive the data the clocks says it only takes about half of the time it actually takes.
Output from the terminal that recives the data:
Amount of data recived: 60296112/300000000
Time: 4
Start time: 3269
End time: 4849790
Clocks per sec: 1000000
And the output from the terminal that sends the data:
Sent 300000000 bytes of data
Time to send was 10.793425
The terminal that sends the data will send a stop signal after it has sent all other data. When I watch the terminal that receives the data I can see that it starts counting when the other terminal starts sending data and I can see it print out output from clock() way longer that the output says it does.
My code for the receive part:
static void recive_sock(int socket_fd,char *buffert, size_t buffert_size, struct sockaddr *other, socklen_t *other_size){
clock_t start_t, end_t;
long int total_time = 0;
printf("Listning for data\n" );
fflush(stdout);
int run = 1;
char start = 1;
int amount = 0;
int recive_length;
while(run){
recive_length = recvfrom(socket_fd, buffert, buffert_size, 0, other, other_size );
if(recive_length < 0){
die("Recvfrom failed");
}
if(strncmp(buffert, "HELLO!", 6) == 0){
amount += recive_length;
if(start == 1){
start = 0;
start_t = clock();
printf("Start: %ld\n",start_t );
}
printf("%ld\n",clock() );
}
else if (strncmp(buffert, "die", 3) == 0) {
run = 0;
end_t = clock();
printf("End %ld\n",end_t );
total_time = (end_t - start_t) / CLOCKS_PER_SEC;
printf("Amount of data recived: %d/%d\nTime: %ld\nStart time: %ld\nEnd time: %ld\n,Clocks per sec: %ld", amount, AMOUNT, total_time, start_t, end_t, CLOCKS_PER_SEC);
}
}
}
The function clock will return the CPU time which is probably not what you are looking for, instead you want to use something like gettimeofday or clock_gettime for system that support it. Then you can compare the time before and after to get the time elapsed. For future readers, here is how to do it with clock_gettime if your system supports it:
#include <stdio.h>
#include <time.h> // for clock_gettime()
int main(void) {
int i;
int j;
int sum = 1;
struct timespec t1, t2;
double elapsedTime;
// start timer
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t1);
// do something here
for (i = 0; i < 10000; i++) {
for (j = 0; j < 10000; j++) {
sum *= i+j;
}
}
// stop timer
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t2);
// compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;
elapsedTime += (t2.tv_nsec - t1.tv_nsec) / 1000000.0;
printf("%.3f ms elapsed\n", elapsedTime);
return 0;
}
Here is a simple example on how to measure time with gettimeofday (less accurate for high-precision timing):
#include <stdio.h>
#include <time.h> // for gettimeofday()
int main(void) {
int i;
int j;
int sum = 1;
struct timeval t1, t2;
double elapsedTime;
// start timer
gettimeofday(&t1, NULL);
// do something here
for (i = 0; i < 10000; i++) {
for (j = 0; j < 10000; j++) {
sum *= i+j;
}
}
// stop timer
gettimeofday(&t2, NULL);
// compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0;
printf("%.3f ms elapsed\n", elapsedTime);
return 0;
}
Related
I'm trying to have a loop execute for an exact time specified by executionTime. I am using ctime to get this timing, and I need it to be within a few milliseconds of accuracy (which I believe it is). Once that loop runs for the execution time specified, it should break.
My problem is for the execution time of 0.5, the result being printing is 1. Upon further testing it appears my program rounds up the execution time to the nearest integer. So for 4.5 executionTime, the execution time being printed is 5.0000000. My code is below. I am not sure where this error is coming from.
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
int main()
{
float executionTime = 4.5;
int i;
float timeDifference = -1;
time_t t1;
time_t t2;
t1 = time(0);
for (i = 0; i < 1000000000; i++)
{
t2 = time(0);
timeDifference = difftime(t2, t1);
if (timeDifference >= executionTime)
{
break;
}
}
printf("time difference is: %f\n", timeDifference);
return 0;
}
NEW VERSION trying to use clock_gettime. This version has the issue of never breaking when execution time is reached for some reason inside the loop.
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#define BILLION 1E9
int main()
{
float executionTime = 4.5;
int i;
float elapsedTime = -1;
struct timespec start;
struct timespec stop;
//Get starting time
clock_gettime(CLOCK_REALTIME, &start);
for (i = 0; i < 1000000000; i++)
{
//Get current time
clock_gettime(CLOCK_REALTIME, &stop);
elapsedTime = ((stop.tv_sec - start.tv_sec) + (stop.tv_nsec - start.tv_nsec)) / BILLION;
if (elapsedTime >= executionTime)
{
break;
}
}
printf("time difference is: %f\n", elapsedTime);
return 0;
}
clock_gettime can be used to get greater accuracy.
Call delay with a value greater than 0 to set the delay time, then with -1 to check the elapsed time.
Calling clock_gettime in main will give the elapsed time in main.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#define BILLION 1E9
int delay ( double seconds)
{
static double usec = 0.0;
double elapsed = 0.0;
static struct timespec start;
struct timespec stop;
if ( 0.0 <= seconds) {
usec = seconds;
clock_gettime ( CLOCK_REALTIME, &start);
return 0;
}
clock_gettime ( CLOCK_REALTIME, &stop);
elapsed = difftime ( stop.tv_sec, start.tv_sec);
elapsed += ( stop.tv_nsec - start.tv_nsec) / BILLION;
if ( ( elapsed < usec)) {
return 1;
}
return 0;
}
int main() {
double executionTime = 4.5;
int i;
double timeDifference = -1;
struct timespec end;
struct timespec begin;
clock_gettime ( CLOCK_REALTIME, &begin);
delay( executionTime);//call to set time
for (i = 0; i < 1000000000; i++)
{
if ( !delay( -1)) {//call to check elapsed time
break;
}
}
clock_gettime ( CLOCK_REALTIME, &end);
timeDifference = difftime ( end.tv_sec, begin.tv_sec);
timeDifference += ( end.tv_nsec - begin.tv_nsec) / BILLION;
printf("time difference is: %f\n", timeDifference);
return 0;
}
time() returns to the nearest second and difftime() returns the difference of those. So basically this function compute difference of to integers. For a more accurate estimation you can use clock()
time_t purpose is for measuring calendar times
int main()
{
float executionTime = 1.3;
int i;
double timeDifference = 1.0;
clock_t t1;
clock_t t2;
t1 = clock();
for (i = 0; i < 1000000000; i++)
{
timeDifference = (double)(clock() - t1) / CLOCKS_PER_SEC;
if (timeDifference >= executionTime)
{
break;
}
}
printf("time difference is: %.9g\n", timeDifference);
return 0;
}
I currently have a multi-threaded C program coded using Pthreads which uses 2 threads. I want to increase the no. of threads and measure speed up upon doing so. I would like to run my code in an automated manner where the no. of threads used keeps getting incremented and I want to graphically display running times of my code. I would love it if I could get a clue in on how to do so especially on how to automate the entire process and plotting it graphically. Here is my code:
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define NUM_THREADS 2
#define VECTOR_SIZE 40
struct DOTdata
{
/* data */
long X[VECTOR_SIZE];
long Y[VECTOR_SIZE];
long sum;
long compute_length;
};
struct DOTdata dotstr;
pthread_mutex_t mutex_sum;
void *calcDOT(void *);
int main(int argc, char *argv[])
{
long vec_index;
for(vec_index = 0 ; vec_index < VECTOR_SIZE ; vec_index++){
dotstr.X[vec_index] = vec_index + 1;
dotstr.Y[vec_index] = vec_index + 2;
}
dotstr.sum = 0;
dotstr.compute_length = VECTOR_SIZE/NUM_THREADS;
pthread_t call_thread[NUM_THREADS];
pthread_attr_t attr;
void *status;
pthread_mutex_init(&mutex_sum, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
long i;
for(i = 0 ; i < NUM_THREADS ; i++){
pthread_create(&call_thread[i], &attr, calcDOT, (void *)i);
}
pthread_attr_destroy(&attr);
for (i = 0 ; i < NUM_THREADS ; i++){
pthread_join(call_thread[i], &status);
}
printf("Resultant X*Y is %ld\n", dotstr.sum);
pthread_mutex_destroy(&mutex_sum);
pthread_exit(NULL);
}
void *calcDOT(void *thread_id)
{
long vec_index;
long start_index;
long end_index;
long length;
long offset;
long sum = 0;
offset = (long)thread_id;
length = dotstr.compute_length;
start_index = offset * length;
end_index = (start_index + length) - 1;
for(vec_index = start_index ; vec_index < end_index ; vec_index++){
sum += (dotstr.X[vec_index] * dotstr.Y[vec_index]);
}
pthread_mutex_lock(&mutex_sum);
dotstr.sum += sum;
pthread_mutex_unlock(&mutex_sum);
pthread_exit((void *)thread_id);
}
I would like to increment my NUM_THREADS parameter and run it after each increment, record the execution time after each increment and plot a graph of execution time vs number of threads.
I tried a naive approach by increasing the number of threads, timing it with time.h and plotting it with gnuplot. Each iteration we double the number of threads and we print the time for an iteration. We use gnuplot to display a graph with number of threads on the x-axis and execution time on the y-axis
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define NUM_THREADS 2
#define VECTOR_SIZE 40
struct DOTdata {
/* data */
long X[VECTOR_SIZE];
long Y[VECTOR_SIZE];
long sum;
long compute_length;
};
struct DOTdata dotstr;
pthread_mutex_t mutex_sum;
void *calcDOT(void *);
int main(int argc, char *argv[]) {
double xvals[VECTOR_SIZE / NUM_THREADS];
double yvals[VECTOR_SIZE / NUM_THREADS];
int index = 0;
for (int count = NUM_THREADS; count < VECTOR_SIZE / NUM_THREADS; count = count * 2) {
clock_t begin = clock();
long vec_index;
for (vec_index = 0; vec_index < VECTOR_SIZE; vec_index++) {
dotstr.X[vec_index] = vec_index + 1;
dotstr.Y[vec_index] = vec_index + 2;
}
dotstr.sum = 0;
dotstr.compute_length = VECTOR_SIZE / count;
pthread_t call_thread[count];
pthread_attr_t attr;
void *status;
pthread_mutex_init(&mutex_sum, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
long i;
for (i = 0; i < count; i++) {
pthread_create(&call_thread[i], &attr, calcDOT, (void *) i);
}
pthread_attr_destroy(&attr);
for (i = 0; i < count; i++) {
pthread_join(call_thread[i], &status);
}
printf("Resultant X*Y is %ld\n", dotstr.sum);
pthread_mutex_destroy(&mutex_sum);
clock_t end = clock();
double time_spent = (double) (end - begin) / CLOCKS_PER_SEC;
printf("time spent: %f NUM_THREADS: %d\n", time_spent, count);
xvals[index] = count;
yvals[index] = time_spent;
index++;
}
FILE * gnuplotPipe = popen ("gnuplot -persistent", "w");
fprintf(gnuplotPipe, "plot '-' \n");
for (int i = 0; i < VECTOR_SIZE / NUM_THREADS; i++)
{
fprintf(gnuplotPipe, "%lf %lf\n", xvals[i], yvals[i]);
}
fprintf(gnuplotPipe, "e");
pthread_exit(NULL);
}
void *calcDOT(void *thread_id) {
long vec_index;
long start_index;
long end_index;
long length;
long offset;
long sum = 0;
offset = (long) thread_id;
length = dotstr.compute_length;
start_index = offset * length;
end_index = (start_index + length) - 1;
for (vec_index = start_index; vec_index < end_index; vec_index++) {
sum += (dotstr.X[vec_index] * dotstr.Y[vec_index]);
}
pthread_mutex_lock(&mutex_sum);
dotstr.sum += sum;
pthread_mutex_unlock(&mutex_sum);
pthread_exit((void *) thread_id);
}
Output
Resultant X*Y is 20900
time spent: 0.000155 NUM_THREADS: 2
Resultant X*Y is 19860
time spent: 0.000406 NUM_THREADS: 4
Resultant X*Y is 17680
time spent: 0.000112 NUM_THREADS: 8
Resultant X*Y is 5712
time spent: 0.000587 NUM_THREADS: 16
I am trying to implement the multithreaded version of the Monte-Carlo algorithm. Here is my code:
#define _POSIX_C_SOURCE 200112L
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include <math.h>
#include <semaphore.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#define MAX_THREADS 12
#define MAX_DOTS 10000000
double sum = 0.0;
sem_t sem;
void reset() {
sum = 0.0;
}
void* check_dot(void* _iterations) {
int* iterations = (int*)_iterations;
for(int i = 0; i < *iterations; ++i) {
double x = (double)(rand() % 314) / 100;
double y = (double)(rand() % 100) / 100;
if(y <= sin(x)) {
sem_wait(&sem);
sum += x * y;
sem_post(&sem);
}
}
return NULL;
}
void* check_dots_advanced(void* _iterations) {
int* iterations = (int*)_iterations;
double* res = (double*)malloc(sizeof(double));
for(int i = 0; i < *iterations; ++i) {
double x = (double)(rand() % 314) / 100;
double y = (double)(rand() % 100) / 100;
if(y <= sin(x)) *res += x * y;
}
pthread_exit((void*)res);
}
double run(int threads_num, bool advanced) {
if(!advanced) sem_init(&sem, 0, 1);
struct timespec begin, end;
double elapsed;
pthread_t threads[threads_num];
int iters = MAX_DOTS / threads_num;
for(int i = 0; i < threads_num; ++i) {
if(!advanced) pthread_create(&threads[i], NULL, &check_dot, (void*)&iters);
else pthread_create(&threads[i], NULL, &check_dots_advanced, (void*)&iters);
}
if(clock_gettime(CLOCK_REALTIME, &begin) == -1) {
perror("Unable to get time");
exit(-1);
}
for(int i = 0; i < threads_num; ++i) {
if(!advanced) pthread_join(threads[i], NULL);
else {
void* tmp;
pthread_join(threads[i], &tmp);
sum += *((double*)tmp);
free(tmp);
}
}
if(clock_gettime(CLOCK_REALTIME, &end) == -1) {
perror("Unable to get time");
exit(-1);
}
if(!advanced) sem_destroy(&sem);
elapsed = end.tv_sec - begin.tv_sec;
elapsed += (end.tv_nsec - begin.tv_nsec) / 1000000000.0;
return elapsed;
}
int main(int argc, char** argv) {
bool advanced = false;
char* filename = NULL;
for(int i = 1; i < argc; ++i) {
if(strcmp(argv[i], "-o") == 0 && argc > i + 1) {
filename = argv[i + 1];
++i;
}
else if(strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--advanced") == 0) {
advanced = true;
}
}
if(!filename) {
fprintf(stderr, "You should provide the name of the output file.\n");
exit(-1);
}
FILE* fd = fopen(filename, "w");
if(!fd) {
perror("Unable to open file");
exit(-1);
}
srand(time(NULL));
double worst_time = run(1, advanced);
double result = (3.14 / MAX_DOTS) * sum;
reset();
fprintf(fd, "Result: %f\n", result);
for(int i = 2; i <= MAX_THREADS; ++i) {
double time = run(i, advanced);
double accel = time / worst_time;
fprintf(fd, "%d:%f\n", i, accel);
reset();
}
fclose(fd);
return 0;
}
However, I can't see any real acceleration with increasing the number of threads (and it does not matter what check_dot() function I am using). I have tried to execute this code on my laptop with Intel Core i7-3517u (lscpu says that it has 4 independent CPUs) and it looks like the number of threads not really influences the execution time of my program:
Number of threads: 1, working time: 0.847277 s
Number of threads: 2, working time: 3.133838 s
Number of threads: 3, working time: 2.331216 s
Number of threads: 4, working time: 3.011819 s
Number of threads: 5, working time: 3.086003 s
Number of threads: 6, working time: 3.118296 s
Number of threads: 7, working time: 3.058180 s
Number of threads: 8, working time: 3.114867 s
Number of threads: 9, working time: 3.179515 s
Number of threads: 10, working time: 3.025266 s
Number of threads: 11, working time: 3.142141 s
Number of threads: 12, working time: 3.064318 s
I supposed that it should be some kind of linear dependence between the execution time and number of working threads for at least four first values (the more threads are working the less is execution time), but here I have pretty equal time values. Is it a real problem in my code or I am too demanding?
The problem you are experiencing is that the internal state of rand() is a shared resource between all threads, so the threads are going to serialise on access to rand().
You need to use a pseudo-random number generator with per-thread state - the rand_r() function (although marked obsolete in the latest version of POSIX) can be used as such. For serious work you would be best off importing the implementation of some specific PRNG algorithm such as Mersenne Twister.
I was able to collect the timing / scaling measurements that you would desire with two changes to your code.
First, rand() is not thread safe. Replacing the calls with calls to rand_r(seed) in the advanced check_dots showed continual scaling as threads increased. I think rand might have an internal lock that is serializing execution and preventing any speedup. This change alone shows some scaling, from 1.23s -> 0.55 sec (5 threads).
Second, I introduced barriers around the core execution region so that the cost of serially creating/joining threads and the malloc calls is not included. The core execution region shows good scaling, from 1.23sec -> 0.18sec (8 threads).
Code was compiled with gcc -O3 -pthread mcp.c -std=c11 -lm, run on Intel E3-1240 v5 (4 cores, HT), Linux 3.19.0-68-generic. Single measurements reported.
pthread_barrier_t bar;
void* check_dots_advanced(void* _iterations) {
int* iterations = (int*)_iterations;
double* res = (double*)malloc(sizeof(double));
sem_wait(&sem);
unsigned int seed = rand();
sem_post(&sem);
pthread_barrier_wait(&bar);
for(int i = 0; i < *iterations; ++i) {
double x = (double)(rand_r(&seed) % 314) / 100;
double y = (double)(rand_r(&seed) % 100) / 100;
if(y <= sin(x)) *res += x * y;
}
pthread_barrier_wait(&bar);
pthread_exit((void*)res);
}
double run(int threads_num, bool advanced) {
sem_init(&sem, 0, 1);
struct timespec begin, end;
double elapsed;
pthread_t threads[threads_num];
int iters = MAX_DOTS / threads_num;
pthread_barrier_init(&bar, NULL, threads_num + 1); // barrier init
for(int i = 0; i < threads_num; ++i) {
if(!advanced) pthread_create(&threads[i], NULL, &check_dot, (void*)&iters);
else pthread_create(&threads[i], NULL, &check_dots_advanced, (void*)&iters);
}
pthread_barrier_wait(&bar); // wait until threads are ready
if(clock_gettime(CLOCK_REALTIME, &begin) == -1) { // begin time
perror("Unable to get time");
exit(-1);
}
pthread_barrier_wait(&bar); // wait until threads finish
if(clock_gettime(CLOCK_REALTIME, &end) == -1) { // end time
perror("Unable to get time");
exit(-1);
}
for(int i = 0; i < threads_num; ++i) {
if(!advanced) pthread_join(threads[i], NULL);
else {
void* tmp;
pthread_join(threads[i], &tmp);
sum += *((double*)tmp);
free(tmp);
}
}
pthread_barrier_destroy(&bar);
I am having trouble getting using pthreads to count the number of 3's in a list. Using a serial version of my code works fine but trying to use pthread_create is giving me trouble. Currently the problem is that count3s_thread_2(int id) is not giving me the same value as the serial version.
What do I need to change?
P.S., sorry for the mess. I am new to programming in C.
// Declares some global variables we will use throughout the
// program with all versions.
#define NUM_THREADS 4
int Length = 1000;
int array[1000];
int count;
long i;
pthread_mutex_t m;
pthread_t threads[NUM_THREADS];
void create_list(int *array)
{
srand(time(NULL));
for (i = 0; i < Length; i++)
{
int r = rand();
r = (r % 10) + 1;
array[i] = r;
}
}
void* count3s(void* threadid)
{
// This is the function that counts the number of threes for
// the first threaded version.
//int i = (intptr_t)threadid;
int i = (intptr_t)threadid;
long tid = (long)threadid;
int length_per_thread = Length / NUM_THREADS;
long start = tid * (long)length_per_thread;
for (i = start; i < start + length_per_thread; i++)
{
if (array[i] == 3)
{
count++;
}
}
pthread_exit(NULL);
}
void* count3s_v2(void* threadid)
{
// This is the function that counts the number of threes for
// the second threaded version.
//int serial = count3s_serial();
//printf("Number of threes: %d\n", serial);
int i = (intptr_t)threadid;
long tid = (long)threadid;
int length_per_thread = Length / NUM_THREADS;
long start = tid * (long)length_per_thread;
for (i = start; i < start + length_per_thread; i++)
{
if (array[i] == 3)
{
pthread_mutex_lock(&m);
count++;
pthread_mutex_unlock(&m);
}
}
pthread_exit(NULL);
}
int count3s_serial()
{
// This is the serial version of count3s. No threads are
// created and run separately from other threads.
count = 0;
for (i = 0; i < Length; i++)
{
if (array[i] == 3)
{
count++;
}
}
return count;
}
int count3s_thread(int id)
{
clock_t begin, end;
double time_spent;
begin = clock();
//pthread_attr_init();
for (i = 0; i < NUM_THREADS; i++)
{
pthread_create(&threads[i], NULL, count3s, (void *)i);
}
//pthread_attr_destroy();
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
return count;
}
int count3s_thread_2(int id)
{
clock_t begin, end;
double time_spent;
begin = clock();
pthread_attr_init(&something);
for (i = 0; i < NUM_THREADS; i++)
{
pthread_create(&threads[i], NULL, count3s_v2, (void *)i);
}
pthread_attr_destroy(&something);
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
return count;
//printf("Thread Version 2: Number of threes = %d\nThread Version 2: Time Spent = %f\n", count, time_spent);
}
int main()
{
create_list(array);
clock_t begin, end;
double time_spent;
for (i = 0; i < Length; i++)
{
printf("%d\n", array[i]);
}
// Beginning of serial version. Timer begins, serial version
// is ran and after it's done, the timer stops.
begin = clock();
int serial = count3s_serial();
end = clock();
time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Serial Version: Number of threes = %d\nSerial Version: Time Spent = %f\n", serial, time_spent);
// End of serial version.
/*
*********************************************************************
*/
// Beginning of first theaded version. Timer begins, first
// threaded version is ran and after it's done, the timer stops.
int the_thing = 0;
count = 0;
the_thing = count3s_thread(i);
printf("Thread Version 1: Number of threes = %d\nThread Version 1: Time Spent = %f\n", the_thing, time_spent);
// End of first threaded version.
/*
*********************************************************************
*/
// Beginning of second theaded version. Timer begins, second
// threaded version is ran and after it's done, the timer stops.
int the_other_thing = 0;
count = 0;
the_other_thing = count3s_thread_2(i);
printf("Thread Version 2: Number of threes = %d\nThread Version 2: Time Spent = %f\n", the_other_thing, time_spent);
pthread_exit(NULL);
}
The problem is that you spawn the threads but don't wait for them to finish before printing the result. Both thread versions have the same problem. Use pthread_join to wait for the threads to exit or implement some other synchronisation for parent to know when the threads have completed their work.
For example, add the following block of code to the end of both count3s_thread and count3s_thread_2. It will wait for the threads to complete before printing the result. NOTE: You must add it to both functions (even though you are ok for the first one to have the wrong count). Otherwise when you run the second threading version the first set of threads are likely to still be executing and will mess up the global count.
for (i = 0; i < NUM_THREADS; i++) {
pthread_join(threads[i], NULL);
}
Okay, mucked around with your code and got it to compile. You aren’t waiting for your threads to return before you announce the result. You need to pthread_join() each of your threads in count3s_thread() before it returns.
The time measuring in thread_work function is not working.
Code is a little bit nasty but I just want you to look at the thread_work function
and teach me why the print_time function keeps generating 0 value.
(I write the whole code just in case, I'm sorry for your eyes, really)
#include <stdio.h>
#include <pthread.h>
#include <time.h>
#include <stdlib.h>
#include <semaphore.h>
#include <unistd.h>
#define num_thread 20
char str[11];
void *thread_work(void *tid);
void generate_str(int n);
void str_sort(int n);
void check_sort(void);
void print_time(struct timespec *myclock);
void print_time_start(struct timespec *myclock);
void print_time_end(struct timespec *myclock);
sem_t my_sem;
int main(void)
{
pthread_t tid[num_thread];
int ret;
int t;
struct timespec t1[2];
srand(time(NULL));
ret = sem_init(&my_sem, 0, 1);
clock_gettime(CLOCK_REALTIME, &t1[0]);
print_time_start(t1);
for(t=0; t<num_thread; t++)
{
ret = pthread_create(&tid[t], NULL, thread_work, (void *)t);
usleep(1);
}
for(t=0; t<num_thread; t++)
ret = pthread_join(tid[t], NULL);
clock_gettime(CLOCK_REALTIME, &t1[1]);
print_time_end(t1);
sem_destroy(&my_sem);
return 0;
}
void *thread_work(void *t)
{
int n = (int )t;
struct timespec t2[2];
printf("########## Thread #%2d starting ########## \n",n);
sleep(1);
sem_wait(&my_sem); //Entry Section
clock_gettime(CLOCK_REALTIME, &t2[0]); //Critical Section Start
generate_str(n);
str_sort(n);
check_sort();
clock_gettime(CLOCK_REALTIME, &t2[1]);
print_time(t2); //Critical Section End
sem_post(&my_sem); //Exit Section
}
void str_sort(int n)
{
int temp;
int i, j;
for(i=0; i<9; i++)
for(j=0; j<9-i; j++)
{
if(str[j]>str[j+1])
{
temp=str[j];
str[j]=str[j+1];
str[j+1]=temp;
}
}
printf("[%2d] ",n);
for(i=0; i<10; i++)
printf("%2c", str[i]);
}
void generate_str(int n)
{
int i;
int num;
srand(n); //differentiate the string of each threads
for(i=0; i<10; i++)
{
num = (97+rand()%26);
str[i]=num;
}
str[10]='\0';
}
void check_sort(void)
{
int i;
int count=0;
for(i=0; i<9; i++)
{
if(str[i]>str[i+1])
count++;
}
if(count != 0)
printf(" [X]FALSE ");
else
printf(" [O]TRUE ");
}
void print_time(struct timespec *myclock)
{
long delay, temp, temp_n, sec;
sec = myclock[0].tv_sec % 60;
printf(" %ld.%ld -> ", sec, myclock[0].tv_nsec);
sec = myclock[1].tv_sec % 60;
printf("%ld.%ld", sec, myclock[1].tv_nsec);
if(myclock[1].tv_nsec >= myclock[0].tv_nsec)
{
temp = myclock[1].tv_sec - myclock[0].tv_sec;
temp_n = myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n;
}
else
{
temp = myclock[1].tv_sec - myclock[0].tv_sec - 1;
temp_n = 1000000000 + myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n;
}
printf(", Interval : %ld ns\n", delay);
}
void print_time_start(struct timespec *myclock)
{
long sec;
sec = myclock[0].tv_sec % 60;
printf("########## Thread: Start Time -> %ld.%ld\n", sec, myclock[0].tv_nsec);
}
void print_time_end(struct timespec *myclock)
{
long delay, temp, temp_n, sec;
sec = myclock[1].tv_sec % 60;
printf("########## Thread: End Time -> %ld.%ld ", sec, myclock[1].tv_nsec);
if (myclock[1].tv_nsec >= myclock[0].tv_nsec)
{
temp = myclock[1].tv_sec - myclock[0].tv_sec;
temp_n = myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n; //The unit of delay is nano second
}
else
{
temp = myclock[1].tv_sec - myclock[0].tv_sec - 1;
temp_n = 1000000000 + myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n; //The unit of delay is nano second
}
delay = delay / 1000; //The unit of delay is now micro second
printf("(Thread Execution Time -> %ld micro second)\n", delay);
}
clock_gettime(CLOCK_REALTIME, &t2[0]); //Critical Section Start
generate_str(n);
str_sort(n);
check_sort();
clock_gettime(CLOCK_REALTIME, &t2[1]);
Could be that the three methods execute so fast that the system clock doesn't progress. You could try and get a higher solution by changing CLOCK_REALTIME to CLOCK_THREAD_CPUTIME_ID or CLOCK_PROCESS_CPUTIME_ID.