The time measuring in thread_work function is not working.
Code is a little bit nasty but I just want you to look at the thread_work function
and teach me why the print_time function keeps generating 0 value.
(I write the whole code just in case, I'm sorry for your eyes, really)
#include <stdio.h>
#include <pthread.h>
#include <time.h>
#include <stdlib.h>
#include <semaphore.h>
#include <unistd.h>
#define num_thread 20
char str[11];
void *thread_work(void *tid);
void generate_str(int n);
void str_sort(int n);
void check_sort(void);
void print_time(struct timespec *myclock);
void print_time_start(struct timespec *myclock);
void print_time_end(struct timespec *myclock);
sem_t my_sem;
int main(void)
{
pthread_t tid[num_thread];
int ret;
int t;
struct timespec t1[2];
srand(time(NULL));
ret = sem_init(&my_sem, 0, 1);
clock_gettime(CLOCK_REALTIME, &t1[0]);
print_time_start(t1);
for(t=0; t<num_thread; t++)
{
ret = pthread_create(&tid[t], NULL, thread_work, (void *)t);
usleep(1);
}
for(t=0; t<num_thread; t++)
ret = pthread_join(tid[t], NULL);
clock_gettime(CLOCK_REALTIME, &t1[1]);
print_time_end(t1);
sem_destroy(&my_sem);
return 0;
}
void *thread_work(void *t)
{
int n = (int )t;
struct timespec t2[2];
printf("########## Thread #%2d starting ########## \n",n);
sleep(1);
sem_wait(&my_sem); //Entry Section
clock_gettime(CLOCK_REALTIME, &t2[0]); //Critical Section Start
generate_str(n);
str_sort(n);
check_sort();
clock_gettime(CLOCK_REALTIME, &t2[1]);
print_time(t2); //Critical Section End
sem_post(&my_sem); //Exit Section
}
void str_sort(int n)
{
int temp;
int i, j;
for(i=0; i<9; i++)
for(j=0; j<9-i; j++)
{
if(str[j]>str[j+1])
{
temp=str[j];
str[j]=str[j+1];
str[j+1]=temp;
}
}
printf("[%2d] ",n);
for(i=0; i<10; i++)
printf("%2c", str[i]);
}
void generate_str(int n)
{
int i;
int num;
srand(n); //differentiate the string of each threads
for(i=0; i<10; i++)
{
num = (97+rand()%26);
str[i]=num;
}
str[10]='\0';
}
void check_sort(void)
{
int i;
int count=0;
for(i=0; i<9; i++)
{
if(str[i]>str[i+1])
count++;
}
if(count != 0)
printf(" [X]FALSE ");
else
printf(" [O]TRUE ");
}
void print_time(struct timespec *myclock)
{
long delay, temp, temp_n, sec;
sec = myclock[0].tv_sec % 60;
printf(" %ld.%ld -> ", sec, myclock[0].tv_nsec);
sec = myclock[1].tv_sec % 60;
printf("%ld.%ld", sec, myclock[1].tv_nsec);
if(myclock[1].tv_nsec >= myclock[0].tv_nsec)
{
temp = myclock[1].tv_sec - myclock[0].tv_sec;
temp_n = myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n;
}
else
{
temp = myclock[1].tv_sec - myclock[0].tv_sec - 1;
temp_n = 1000000000 + myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n;
}
printf(", Interval : %ld ns\n", delay);
}
void print_time_start(struct timespec *myclock)
{
long sec;
sec = myclock[0].tv_sec % 60;
printf("########## Thread: Start Time -> %ld.%ld\n", sec, myclock[0].tv_nsec);
}
void print_time_end(struct timespec *myclock)
{
long delay, temp, temp_n, sec;
sec = myclock[1].tv_sec % 60;
printf("########## Thread: End Time -> %ld.%ld ", sec, myclock[1].tv_nsec);
if (myclock[1].tv_nsec >= myclock[0].tv_nsec)
{
temp = myclock[1].tv_sec - myclock[0].tv_sec;
temp_n = myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n; //The unit of delay is nano second
}
else
{
temp = myclock[1].tv_sec - myclock[0].tv_sec - 1;
temp_n = 1000000000 + myclock[1].tv_nsec - myclock[0].tv_nsec;
delay = 1000000000 * temp + temp_n; //The unit of delay is nano second
}
delay = delay / 1000; //The unit of delay is now micro second
printf("(Thread Execution Time -> %ld micro second)\n", delay);
}
clock_gettime(CLOCK_REALTIME, &t2[0]); //Critical Section Start
generate_str(n);
str_sort(n);
check_sort();
clock_gettime(CLOCK_REALTIME, &t2[1]);
Could be that the three methods execute so fast that the system clock doesn't progress. You could try and get a higher solution by changing CLOCK_REALTIME to CLOCK_THREAD_CPUTIME_ID or CLOCK_PROCESS_CPUTIME_ID.
Related
I'm trying to learn about multithreaded algorithms so I've implemented a simple find max number function of an array.
I've made a baseline program (findMax1.c) which loads from a file about 263 million int numbers into memory.
Then I simply use a for loop to find the max number. Then I've made another program (findMax2.c) which uses 4 threads.
I chose 4 threads because the CPU (intel i5 4460) I'm using has 4 cores and 1 thread per core. So my guess is that
if I assign each core a chunk of the array to process it would be more efficient because that way I'll have fewer cache
misses. Now, each thread finds the max number from each chunk, then I join all threads to finally find the max number
from all those chunks. The baseline program findMax1.c takes about 660ms to complete the task, so my initial thought was
that findMax2.c (which uses 4 threads) would take about 165ms (660ms / 4) to complete since now I have 4 threads running
all in parallel to do the same task, but findMax2.c takes about 610ms. Only 50ms less than findMax1.c.
What am I missing? is there something wrong with the implementation of the threaded program?
findMax1.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
int main(void)
{
int i, *array, max = 0, position;
size_t array_size_in_bytes = 1024*1024*1024, elements_read, array_size;
FILE *f;
clock_t t;
double time;
array = (int*) malloc(array_size_in_bytes);
assert(array != NULL); // assert if condition is falsa
printf("Loading array...");
t = clock();
f = fopen("numbers.bin", "rb");
assert(f != NULL);
elements_read = fread(array, array_size_in_bytes, 1, f);
t = clock() - t;
time = ((double) t) / CLOCKS_PER_SEC;
assert(elements_read == 1);
printf("done!\n");
printf("File load time: %f [s]\n", time);
fclose(f);
array_size = array_size_in_bytes / sizeof(int);
printf("Finding max...");
t = clock();
for(i = 0; i < array_size; i++)
if(array[i] > max)
{
max = array[i];
position = i;
}
t = clock() - t;
time = ((double) t) / CLOCKS_PER_SEC;
printf("done!\n");
printf("----------- Program results -------------\nMax number: %d position %d\n", max, position);
printf("Time %f [s]\n", time);
return 0;
}
findMax2.c:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#define NUM_THREADS 4
int max_chunk[NUM_THREADS], pos_chunk[NUM_THREADS];
int *array;
pthread_t tid[NUM_THREADS];
void *thread(void *arg)
{
size_t array_size_in_bytes = 1024*1024*1024;
int i, rc, offset, chunk_size, array_size, *core_id = (int*) arg, num_cores = sysconf(_SC_NPROCESSORS_ONLN);
pthread_t id = pthread_self();
cpu_set_t cpuset;
if (*core_id < 0 || *core_id >= num_cores)
return NULL;
CPU_ZERO(&cpuset);
CPU_SET(*core_id, &cpuset);
rc = pthread_setaffinity_np(id, sizeof(cpu_set_t), &cpuset);
if(rc != 0)
{
printf("pthread_setaffinity_np() failed! - rc %d\n", rc);
return NULL;
}
printf("Thread running on CPU %d\n", sched_getcpu());
array_size = (int) (array_size_in_bytes / sizeof(int));
chunk_size = (int) (array_size / NUM_THREADS);
offset = chunk_size * (*core_id);
// Find max number in the array chunk
for(i = offset; i < (offset + chunk_size); i++)
{
if(array[i] > max_chunk[*core_id])
{
max_chunk[*core_id] = array[i];
pos_chunk[*core_id] = i;
}
}
return NULL;
}
void load_array(void)
{
FILE *f;
size_t array_size_in_bytes = 1024*1024*1024, elements_read;
array = (int*) malloc(array_size_in_bytes);
assert(array != NULL); // assert if condition is false
printf("Loading array...");
f = fopen("numbers.bin", "rb");
assert(f != NULL);
elements_read = fread(array, array_size_in_bytes, 1, f);
assert(elements_read == 1);
printf("done!\n");
fclose(f);
}
int main(void)
{
int i, max = 0, position, id[NUM_THREADS], rc;
clock_t t;
double time;
load_array();
printf("Finding max...");
t = clock();
// Create threads
for(i = 0; i < NUM_THREADS; i++)
{
id[i] = i; // uso id para pasarle un puntero distinto a cada thread
rc = pthread_create(&(tid[i]), NULL, &thread, (void*)(id + i));
if (rc != 0)
printf("Can't create thread! rc = %d\n", rc);
else
printf("Thread %lu created\n", tid[i]);
}
// Join threads
for(i = 0; i < NUM_THREADS; i++)
pthread_join(tid[i], NULL);
// Find max number from all chunks
for(i = 0; i < NUM_THREADS; i++)
if(max_chunk[i] > max)
{
max = max_chunk[i];
position = pos_chunk[i];
}
t = clock() - t;
time = ((double) t) / CLOCKS_PER_SEC;
printf("done!\n");
free(array);
printf("----------- Program results -------------\nMax number: %d position %d\n", max, position);
printf("Time %f [s]\n", time);
pthread_exit(NULL);
return 0;
}
First of all, you're measuring your time wrong.
clock() measures process CPU time, i.e., time used by all threads. The real elapsed time will be fraction of that. clock_gettime(CLOCK_MONOTONIC,...) should yield better measurements.
Second, your core loops aren't at all comparable.
In the multithreaded program you're writing in each loop iteration to global variables that are very close to each other and that is horrible for cache contention.
You could space that global memory apart (make each array item a cache-aligned struct (_Alignas(64))) and that'll help the time, but a better and fairer approach would be to use local variables (which should go into registers), copying the approach of the first loop, and then write out the chunk result to memory at the end of the loop:
int l_max_chunk=0, l_pos_chunk=0, *a;
for(i = 0,a=array+offset; i < chunk_size; i++)
if(a[i] > l_max_chunk) l_max_chunk=a[i], l_pos_chunk=i;
max_chunk[*core_id] = l_max_chunk;
pos_chunk[*core_id] = l_pos_chunk;
Here's your modified test program with expected speedups (I'm getting approx. a 2x speedup on my two-core processor).
(I've also taken the liberty of replacing the file load with in-memory initialization, to make it simpler to test.)
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <stdint.h>
struct timespec ts0,ts1;
uint64_t sc_timespec_diff(struct timespec Ts1, struct timespec Ts0) { return (Ts1.tv_sec - Ts0.tv_sec)*1000000000+(Ts1.tv_nsec - Ts0.tv_nsec); }
#define NUM_THREADS 4
int max_chunk[NUM_THREADS], pos_chunk[NUM_THREADS];
int *array;
pthread_t tid[NUM_THREADS];
void *thread(void *arg)
{
size_t array_size_in_bytes = 1024*1024*1024;
int i, rc, offset, chunk_size, array_size, *core_id = (int*) arg, num_cores = sysconf(_SC_NPROCESSORS_ONLN);
#if 1 //shouldn't make much difference
pthread_t id = pthread_self();
cpu_set_t cpuset;
if (*core_id < 0 || *core_id >= num_cores)
return NULL;
CPU_ZERO(&cpuset);
CPU_SET(*core_id, &cpuset);
rc = pthread_setaffinity_np(id, sizeof(cpu_set_t), &cpuset);
if(rc != 0)
{
printf("pthread_setaffinity_np() failed! - rc %d\n", rc);
return NULL;
}
printf("Thread running on CPU %d\n", sched_getcpu());
#endif
array_size = (int) (array_size_in_bytes / sizeof(int));
chunk_size = (int) (array_size / NUM_THREADS);
offset = chunk_size * (*core_id);
// Find max number in the array chunk
#if 0 //horrible for caches
for(i = offset; i < (offset + chunk_size); i++)
{
if(array[i] > max_chunk[*core_id])
{
max_chunk[*core_id] = array[i];
pos_chunk[*core_id] = i;
}
}
#else
int l_max_chunk=0, l_pos_chunk=0, *a;
for(i = 0,a=array+offset; i < chunk_size; i++)
if(a[i] > l_max_chunk) l_max_chunk=a[i], l_pos_chunk=i;
max_chunk[*core_id] = l_max_chunk;
pos_chunk[*core_id] = l_pos_chunk;
#endif
return NULL;
}
void load_array(void)
{
FILE *f;
size_t array_size_in_bytes = 1024*1024*1024, array_size=array_size_in_bytes/sizeof(int);
array = (int*) malloc(array_size_in_bytes);
if(array == NULL) abort(); // assert if condition is false
for(size_t i=0; i<array_size; i++) array[i]=i;
}
int main(void)
{
int i, max = 0, position, id[NUM_THREADS], rc;
clock_t t;
double time;
load_array();
printf("Finding max...");
t = clock();
clock_gettime(CLOCK_MONOTONIC,&ts0);
// Create threads
for(i = 0; i < NUM_THREADS; i++)
{
id[i] = i; // uso id para pasarle un puntero distinto a cada thread
rc = pthread_create(&(tid[i]), NULL, &thread, (void*)(id + i));
if (rc != 0)
printf("Can't create thread! rc = %d\n", rc);
else
printf("Thread %lu created\n", tid[i]);
}
// Join threads
for(i = 0; i < NUM_THREADS; i++)
pthread_join(tid[i], NULL);
// Find max number from all chunks
for(i = 0; i < NUM_THREADS; i++)
if(max_chunk[i] > max)
{
max = max_chunk[i];
position = pos_chunk[i];
}
clock_gettime(CLOCK_MONOTONIC,&ts1);
printf("Time2 %.6LF\n", sc_timespec_diff(ts1,ts0)/1E9L);
t = clock() - t;
time = ((double) t) / CLOCKS_PER_SEC;
printf("done!\n");
free(array);
printf("----------- Program results -------------\nMax number: %d position %d\n", max, position);
printf("Time %f [s]\n", time);
pthread_exit(NULL);
return 0;
}
My timings:
0.188917 for the signle threaded version
2.511590 for the original multithreaded version (measured with clock_gettime(CLOCK_MONOTONIC,...)
0.099802 with the modified threaded version (measured with clock_gettime(CLOCK_MONOTONIC,...)
ran on a Linux machine with Intel(R) Core(TM) i7-2620M CPU # 2.70GHz.
I wrote a multi-thread today. The task of the thread is to write data to a large array. A single thread takes about 0.7s, but it takes more than 20 seconds to write independently and concurrently with two threads. The same operation is under Windows or Multi-process seconds under Linux all are about 0.7s.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/time.h>
#include <sys/types.h>
#define SIZE_IN_MB 256
#define NUM_BYTE (SIZE_IN_MB*1024*1024)
#define NUM_LONG (NUM_BYTE/sizeof(long))
#define CHILD_COUNT 2
#define STEP_SIZE 1 //use to avoid cache,when set to 8
unsigned long Time[CHILD_COUNT];
struct Arg {
unsigned long *data;
int index;
};
unsigned long diffTime(struct timeval *end, struct timeval *start) {
return labs((end->tv_sec - start->tv_sec) * 1000 + (end->tv_usec - start->tv_usec) / 1000);
}
void getTime(struct timeval *t) {
gettimeofday(t, NULL);
}
unsigned long writeData() {
struct timeval start, end;
getTime(&start);
unsigned long *data = (unsigned long *) malloc(NUM_LONG * sizeof(long));
for (int i = 0; i < STEP_SIZE; ++i) {
for (size_t k = i; k < NUM_LONG; k+=STEP_SIZE)
data[k] = 0x5a5a5a5a5a5a5a5a + rand();
}
getTime(&end);
free(data);
return diffTime(&end, &start);
}
void *child(void *arg) {
Time[((struct Arg *) arg)->index] = writeData();
}
void waitAll(pthread_t threads[]) {
for (int i = 0; i < CHILD_COUNT; i++) {
pthread_join(threads[i], NULL);
}
}
void printAverTime(int count) {
unsigned long time = 0;
for (int i = 0; i < count; ++i) {
time += Time[i];
}
printf("Thread: %ld\n", time / count);
}
void thread_test() {
pthread_t threads[CHILD_COUNT];
struct Arg arg[CHILD_COUNT] = {};
for (int i = 0; i < CHILD_COUNT; i++) {
arg[i].index = i;
pthread_create(&threads[i], NULL, child, (void *) &arg[i]);
}
waitAll(threads);
printAverTime(CHILD_COUNT);
}
void process_test() {
int p[CHILD_COUNT][2];
for (int i = 0; i < CHILD_COUNT; ++i) {
pipe(p[i]);
}
for (int i = 0; i < CHILD_COUNT; i++) {
if (fork() == 0) {
unsigned long t = writeData();
write(p[i][1], &t, sizeof(t));
exit(0);
}
}
unsigned long t = 0,tmp= 0;
for (int i = 0; i < CHILD_COUNT; ++i) {
read(p[i][0], &tmp, sizeof(tmp));
t += tmp;
}
printf("Process: %ld\n", t / CHILD_COUNT);
}
int main() {
thread_test();
process_test();
}
The penalty you are paying when using multiple threads is not for writing to memory but for the fact that you are calling rand(), which involves locking, many times in the following nested loops in writeData():
for (int i = 0; i < STEP_SIZE; ++i) {
for (size_t k = i; k < NUM_LONG; k+=STEP_SIZE)
data[k] = 0x5a5a5a5a5a5a5a5a + rand();
}
So you are incurring a huge penalty because for each call to rand() only one thread can get in at a time and all the other threads have to wait and there is overhead to this waiting.
You can fix your code to avoid collisions in the inner loop by using a reentrant form of rand(), such as rand_r() (which is documented at https://man7.org/linux/man-pages/man3/rand.3.html)
unsigned int seed = rand();
for (int i = 0; i < STEP_SIZE; ++i) {
for (size_t k = i; k < NUM_LONG; k+=STEP_SIZE)
data[k] = 0x5a5a5a5a5a5a5a5a + rand_r(&seed);
}
I currently have a multi-threaded C program coded using Pthreads which uses 2 threads. I want to increase the no. of threads and measure speed up upon doing so. I would like to run my code in an automated manner where the no. of threads used keeps getting incremented and I want to graphically display running times of my code. I would love it if I could get a clue in on how to do so especially on how to automate the entire process and plotting it graphically. Here is my code:
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define NUM_THREADS 2
#define VECTOR_SIZE 40
struct DOTdata
{
/* data */
long X[VECTOR_SIZE];
long Y[VECTOR_SIZE];
long sum;
long compute_length;
};
struct DOTdata dotstr;
pthread_mutex_t mutex_sum;
void *calcDOT(void *);
int main(int argc, char *argv[])
{
long vec_index;
for(vec_index = 0 ; vec_index < VECTOR_SIZE ; vec_index++){
dotstr.X[vec_index] = vec_index + 1;
dotstr.Y[vec_index] = vec_index + 2;
}
dotstr.sum = 0;
dotstr.compute_length = VECTOR_SIZE/NUM_THREADS;
pthread_t call_thread[NUM_THREADS];
pthread_attr_t attr;
void *status;
pthread_mutex_init(&mutex_sum, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
long i;
for(i = 0 ; i < NUM_THREADS ; i++){
pthread_create(&call_thread[i], &attr, calcDOT, (void *)i);
}
pthread_attr_destroy(&attr);
for (i = 0 ; i < NUM_THREADS ; i++){
pthread_join(call_thread[i], &status);
}
printf("Resultant X*Y is %ld\n", dotstr.sum);
pthread_mutex_destroy(&mutex_sum);
pthread_exit(NULL);
}
void *calcDOT(void *thread_id)
{
long vec_index;
long start_index;
long end_index;
long length;
long offset;
long sum = 0;
offset = (long)thread_id;
length = dotstr.compute_length;
start_index = offset * length;
end_index = (start_index + length) - 1;
for(vec_index = start_index ; vec_index < end_index ; vec_index++){
sum += (dotstr.X[vec_index] * dotstr.Y[vec_index]);
}
pthread_mutex_lock(&mutex_sum);
dotstr.sum += sum;
pthread_mutex_unlock(&mutex_sum);
pthread_exit((void *)thread_id);
}
I would like to increment my NUM_THREADS parameter and run it after each increment, record the execution time after each increment and plot a graph of execution time vs number of threads.
I tried a naive approach by increasing the number of threads, timing it with time.h and plotting it with gnuplot. Each iteration we double the number of threads and we print the time for an iteration. We use gnuplot to display a graph with number of threads on the x-axis and execution time on the y-axis
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define NUM_THREADS 2
#define VECTOR_SIZE 40
struct DOTdata {
/* data */
long X[VECTOR_SIZE];
long Y[VECTOR_SIZE];
long sum;
long compute_length;
};
struct DOTdata dotstr;
pthread_mutex_t mutex_sum;
void *calcDOT(void *);
int main(int argc, char *argv[]) {
double xvals[VECTOR_SIZE / NUM_THREADS];
double yvals[VECTOR_SIZE / NUM_THREADS];
int index = 0;
for (int count = NUM_THREADS; count < VECTOR_SIZE / NUM_THREADS; count = count * 2) {
clock_t begin = clock();
long vec_index;
for (vec_index = 0; vec_index < VECTOR_SIZE; vec_index++) {
dotstr.X[vec_index] = vec_index + 1;
dotstr.Y[vec_index] = vec_index + 2;
}
dotstr.sum = 0;
dotstr.compute_length = VECTOR_SIZE / count;
pthread_t call_thread[count];
pthread_attr_t attr;
void *status;
pthread_mutex_init(&mutex_sum, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
long i;
for (i = 0; i < count; i++) {
pthread_create(&call_thread[i], &attr, calcDOT, (void *) i);
}
pthread_attr_destroy(&attr);
for (i = 0; i < count; i++) {
pthread_join(call_thread[i], &status);
}
printf("Resultant X*Y is %ld\n", dotstr.sum);
pthread_mutex_destroy(&mutex_sum);
clock_t end = clock();
double time_spent = (double) (end - begin) / CLOCKS_PER_SEC;
printf("time spent: %f NUM_THREADS: %d\n", time_spent, count);
xvals[index] = count;
yvals[index] = time_spent;
index++;
}
FILE * gnuplotPipe = popen ("gnuplot -persistent", "w");
fprintf(gnuplotPipe, "plot '-' \n");
for (int i = 0; i < VECTOR_SIZE / NUM_THREADS; i++)
{
fprintf(gnuplotPipe, "%lf %lf\n", xvals[i], yvals[i]);
}
fprintf(gnuplotPipe, "e");
pthread_exit(NULL);
}
void *calcDOT(void *thread_id) {
long vec_index;
long start_index;
long end_index;
long length;
long offset;
long sum = 0;
offset = (long) thread_id;
length = dotstr.compute_length;
start_index = offset * length;
end_index = (start_index + length) - 1;
for (vec_index = start_index; vec_index < end_index; vec_index++) {
sum += (dotstr.X[vec_index] * dotstr.Y[vec_index]);
}
pthread_mutex_lock(&mutex_sum);
dotstr.sum += sum;
pthread_mutex_unlock(&mutex_sum);
pthread_exit((void *) thread_id);
}
Output
Resultant X*Y is 20900
time spent: 0.000155 NUM_THREADS: 2
Resultant X*Y is 19860
time spent: 0.000406 NUM_THREADS: 4
Resultant X*Y is 17680
time spent: 0.000112 NUM_THREADS: 8
Resultant X*Y is 5712
time spent: 0.000587 NUM_THREADS: 16
I have a program that counts the time to send or receive a amount of data. When I receive the data the clocks says it only takes about half of the time it actually takes.
Output from the terminal that recives the data:
Amount of data recived: 60296112/300000000
Time: 4
Start time: 3269
End time: 4849790
Clocks per sec: 1000000
And the output from the terminal that sends the data:
Sent 300000000 bytes of data
Time to send was 10.793425
The terminal that sends the data will send a stop signal after it has sent all other data. When I watch the terminal that receives the data I can see that it starts counting when the other terminal starts sending data and I can see it print out output from clock() way longer that the output says it does.
My code for the receive part:
static void recive_sock(int socket_fd,char *buffert, size_t buffert_size, struct sockaddr *other, socklen_t *other_size){
clock_t start_t, end_t;
long int total_time = 0;
printf("Listning for data\n" );
fflush(stdout);
int run = 1;
char start = 1;
int amount = 0;
int recive_length;
while(run){
recive_length = recvfrom(socket_fd, buffert, buffert_size, 0, other, other_size );
if(recive_length < 0){
die("Recvfrom failed");
}
if(strncmp(buffert, "HELLO!", 6) == 0){
amount += recive_length;
if(start == 1){
start = 0;
start_t = clock();
printf("Start: %ld\n",start_t );
}
printf("%ld\n",clock() );
}
else if (strncmp(buffert, "die", 3) == 0) {
run = 0;
end_t = clock();
printf("End %ld\n",end_t );
total_time = (end_t - start_t) / CLOCKS_PER_SEC;
printf("Amount of data recived: %d/%d\nTime: %ld\nStart time: %ld\nEnd time: %ld\n,Clocks per sec: %ld", amount, AMOUNT, total_time, start_t, end_t, CLOCKS_PER_SEC);
}
}
}
The function clock will return the CPU time which is probably not what you are looking for, instead you want to use something like gettimeofday or clock_gettime for system that support it. Then you can compare the time before and after to get the time elapsed. For future readers, here is how to do it with clock_gettime if your system supports it:
#include <stdio.h>
#include <time.h> // for clock_gettime()
int main(void) {
int i;
int j;
int sum = 1;
struct timespec t1, t2;
double elapsedTime;
// start timer
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t1);
// do something here
for (i = 0; i < 10000; i++) {
for (j = 0; j < 10000; j++) {
sum *= i+j;
}
}
// stop timer
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t2);
// compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;
elapsedTime += (t2.tv_nsec - t1.tv_nsec) / 1000000.0;
printf("%.3f ms elapsed\n", elapsedTime);
return 0;
}
Here is a simple example on how to measure time with gettimeofday (less accurate for high-precision timing):
#include <stdio.h>
#include <time.h> // for gettimeofday()
int main(void) {
int i;
int j;
int sum = 1;
struct timeval t1, t2;
double elapsedTime;
// start timer
gettimeofday(&t1, NULL);
// do something here
for (i = 0; i < 10000; i++) {
for (j = 0; j < 10000; j++) {
sum *= i+j;
}
}
// stop timer
gettimeofday(&t2, NULL);
// compute and print the elapsed time in millisec
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0;
printf("%.3f ms elapsed\n", elapsedTime);
return 0;
}
I was doing Histogram using pthreads and after long struggle on it.. finally it said:
Segmentation Fault (Core Dumped)
unfortunately I had this line
p=(struct1 *)malloc(sizeof(struct1));
after getting the values to the struct variables from command line.. So that was cleared off.. Thanks for #DNT for letting me know that..
Now when I try to execute the following program.. It sometimes displays the output and sometimes it is going out to the which_bin function and prints the following
output type 1(which is not the correct output):
Data = 0.000000 doesn't belong to a bin!
Quitting
output type 2(almost the correct output of histo with time taken by threads):
10.000-28.000:
28.000-46.000:
46.000-64.000:
64.000-82.000:
82.000-100.000: XXXXXXXXXX
The code to be timed took 0.000415 seconds
My ques is why the same prog when ran shows different outputs.. I am confused of what it is exactly looking for..
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include "timer.h"
void Usage(char prog_name[]);
void Gen_data(void *p);
void Gen_bins(void *p);
int Which_bin(void *p);
void Print_histo(void *p);
void func(void *p);
struct test
{
int bin_count, i, bin;
float min_meas, max_meas;
float* bin_maxes;
int* bin_counts;
int data_count;
float* data;
};
typedef struct test struct1;
int main(int argc, char* argv[])
{
double start, finish, elapsed;
GET_TIME(start);
struct1 *p;
pthread_t th1, th2, th3;
p=(struct1 *)malloc(sizeof(struct1));
if (argc != 5)
Usage(argv[0]);
p->bin_count = strtol(argv[1], NULL, 10);
p->min_meas = strtof(argv[2], NULL);
p->max_meas = strtof(argv[3], NULL);
p->data_count = strtol(argv[4], NULL, 10);
p->bin_maxes = malloc(p->bin_count*sizeof(float));
p->bin_counts = malloc(p->bin_count*sizeof(int));
p->data = malloc(p->data_count*sizeof(float));
pthread_create(&th1,NULL,(void*) Gen_data,(void*) p);
pthread_create(&th2,NULL,(void*) Gen_bins,(void*) p);
pthread_create(&th3,NULL,(void*) func,(void*) p);
printf("Hi\n");
pthread_join(th1,NULL);
pthread_join(th2,NULL);
pthread_join(th3,NULL);
Print_histo(p);
free(p->data);
free(p->bin_maxes);
free(p->bin_counts);
GET_TIME(finish);
elapsed = finish - start;
printf("The code to be timed took %f seconds\n", elapsed);
return 0;
} /* main */
void func(void *p)
{
int i;
struct1 *args;
args=(struct1*)p;
for (i = 0; i < args->data_count; i++)
{
args->bin = Which_bin(args);
args->bin_counts[args->bin]++;
}
# ifdef DEBUG
printf("bin_counts = ");
for (i = 0; i < args->bin_count; i++)
printf("%d ", args->bin_counts[i]);
printf("\n");
# endif
}
/*---------------------------------------------------------------------
* Function: Usage
* Purpose: Print a message showing how to run program and quit
* In arg: prog_name: the name of the program from the command line
*/
void Usage(char prog_name[] /* in */)
{
fprintf(stderr, "usage: %s ", prog_name);
fprintf(stderr, "<bin_count> <min_meas> <max_meas> <data_count>\n");
exit(0);
} /* Usage */
void Gen_data(void *p)
{
struct1 *args;
args=(struct1*)p;
int i;
srandom(0);
for (i = 0; i < args->data_count; i++)
args->data[i] = args->min_meas + (args->max_meas - args->min_meas)*random()/((double) RAND_MAX);
#ifdef DEBUG
printf("data = ");
for (i = 0; i < args->data_count; i++)
printf("%4.3f ", args->data[i]);
printf("\n");
#endif
} /* Gen_data */
void Gen_bins(void* p)
{
struct1 *args;
args=(struct1*)p;
float bin_width;
int i;
bin_width = (args->max_meas - args->min_meas)/args->bin_count;
for (i = 0; i < args->bin_count; i++)
{
args->bin_maxes[i] = args->min_meas + (i+1)*bin_width;
args->bin_counts[i] = 0;
}
# ifdef DEBUG
printf("bin_maxes = ");
for (i = 0; i < args->bin_count; i++)
printf("%4.3f ", args->bin_maxes[i]);
printf("\n");
# endif
}
int Which_bin(void* p)
{
struct1 *args;
args=(struct1*)p;
int bottom = 0, top = args->bin_count-1;
int mid;
float bin_max, bin_min;
while (bottom <= top)
{
mid = (bottom + top)/2;
bin_max = args->bin_maxes[mid];
bin_min = (mid == 0) ? args->min_meas: args->bin_maxes[mid-1];
if (*(args->data) >= bin_max)
bottom = mid+1;
else if (*(args->data) < bin_min)
top = mid-1;
else
return mid;
}
fprintf(stderr, "Data = %f doesn't belong to a bin!\n", args->data);
fprintf(stderr, "Quitting\n");
exit(-1);
}
void Print_histo(void *p)
{
struct1 *args;
args=(struct1*)p;
int i, j;
float bin_max, bin_min;
for (i = 0; i < args->bin_count; i++)
{
bin_max = args->bin_maxes[i];
bin_min = (i == 0) ? args->min_meas: args->bin_maxes[i-1];
printf("%.3f-%.3f:\t", bin_min, bin_max);
for (j = 0; j < args->bin_counts[i]; j++)
printf("X");
printf("\n");
}
}
/* Print_histo */ #include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include "timer.h"
void Usage(char prog_name[]);
void Gen_data(void *p);
void Gen_bins(void *p);
int Which_bin(void *p);
void Print_histo(void *p);
void func(void *p);
pthread_mutex_t lock;
struct test
{
int bin_count, i, bin;
float min_meas, max_meas;
float* bin_maxes;
int* bin_counts;
int data_count;
float* data;
};
typedef struct test struct1;
int main(int argc, char* argv[])
{
if (pthread_mutex_init(&lock, NULL) != 0)
{
printf("\n mutex init failed\n");
return 1;
}
double start, finish, elapsed;
GET_TIME(start);
struct1 *p;
pthread_t th1, th2, th3;
p=(struct1 *)malloc(sizeof(struct1));
if (argc != 5)
Usage(argv[0]);
p->bin_count = strtol(argv[1], NULL, 10);
p->min_meas = strtof(argv[2], NULL);
p->max_meas = strtof(argv[3], NULL);
p->data_count = strtol(argv[4], NULL, 10);
p->bin_maxes = malloc(p->bin_count*sizeof(float));
p->bin_counts = malloc(p->bin_count*sizeof(int));
p->data = malloc(p->data_count*sizeof(float));
pthread_create(&th1,NULL,(void*) Gen_data,(void*) p);
pthread_create(&th2,NULL,(void*) Gen_bins,(void*) p);
pthread_create(&th3,NULL,(void*) func,(void*) p);
printf("Hi\n");
pthread_join(th1,NULL);
pthread_join(th2,NULL);
pthread_join(th3,NULL);
Print_histo(p);
free(p->data);
free(p->bin_maxes);
free(p->bin_counts);
GET_TIME(finish);
elapsed = finish - start;
printf("The code to be timed took %f seconds\n", elapsed);
return 0;
} /* main */
void func(void *p)
{
pthread_mutex_lock(&lock);
printf("th3 from Gen_func\n");
int i;
struct1 *args;
args=(struct1*)p;
for (i = 0; i < args->data_count; i++)
{
args->bin = Which_bin(args);
args->bin_counts[args->bin]++;
}
# ifdef DEBUG
printf("bin_counts = ");
for (i = 0; i < args->bin_count; i++)
printf("%d ", args->bin_counts[i]);
printf("\n");
# endif
pthread_mutex_unlock(&lock);
}
/*---------------------------------------------------------------------
* Function: Usage
* Purpose: Print a message showing how to run program and quit
* In arg: prog_name: the name of the program from the command line
*/
void Usage(char prog_name[] /* in */)
{
fprintf(stderr, "usage: %s ", prog_name);
fprintf(stderr, "<bin_count> <min_meas> <max_meas> <data_count>\n");
exit(0);
} /* Usage */
void Gen_data(void *p)
{
pthread_mutex_lock(&lock);
printf("th1 from Gen_data\n");
struct1 *args;
args=(struct1*)p;
int i;
srandom(0);
for (i = 0; i < args->data_count; i++)
args->data[i] = args->min_meas + (args->max_meas - args->min_meas)*random()/((double) RAND_MAX);
#ifdef DEBUG
printf("data = ");
for (i = 0; i < args->data_count; i++)
printf("%4.3f ", args->data[i]);
printf("\n");
#endif
pthread_mutex_unlock(&lock);
} /* Gen_data */
void Gen_bins(void* p)
{
pthread_mutex_lock(&lock);
printf("th2 from Gen_bins\n");
struct1 *args;
args=(struct1*)p;
float bin_width;
int i;
bin_width = (args->max_meas - args->min_meas)/args->bin_count;
for (i = 0; i < args->bin_count; i++)
{
args->bin_maxes[i] = args->min_meas + (i+1)*bin_width;
args->bin_counts[i] = 0;
}
# ifdef DEBUG
printf("bin_maxes = ");
for (i = 0; i < args->bin_count; i++)
printf("%4.3f ", args->bin_maxes[i]);
printf("\n");
# endif
pthread_mutex_unlock(&lock);
}
int Which_bin(void* p)
{
struct1 *args;
args=(struct1*)p;
int bottom = 0, top = args->bin_count-1;
int mid;
float bin_max, bin_min;
while (bottom <= top)
{
mid = (bottom + top)/2;
bin_max = args->bin_maxes[mid];
bin_min = (mid == 0) ? args->min_meas: args->bin_maxes[mid-1];
if (*(args->data) >= bin_max)
bottom = mid+1;
else if (*(args->data) < bin_min)
top = mid-1;
else
return mid;
}
fprintf(stderr, "Data = %f doesn't belong to a bin!\n", args->data);
fprintf(stderr, "Quitting\n");
exit(-1);
}
void Print_histo(void *p)
{
struct1 *args;
args=(struct1*)p;
int i, j;
float bin_max, bin_min;
for (i = 0; i < args->bin_count; i++)
{
bin_max = args->bin_maxes[i];
bin_min = (i == 0) ? args->min_meas: args->bin_maxes[i-1];
printf("%.3f-%.3f:\t", bin_min, bin_max);
for (j = 0; j < args->bin_counts[i]; j++)
printf("X");
printf("\n");
}
}
/* Print_histo */
I have added the lines to see if all the threads are accessing its functions.. I observed this..
output 1:
Hi
th1 from Gen_data
th3 from Gen_func
Data = 0.000000 doesn't belong to a bin!
Quitting
In the output 1, I can see that th2 is not executed and program ended displaying error..
output 2:
th1 from Gen_data
Hi
th2 from Gen_bins
th3 from Gen_func
10.000-28.000:
28.000-46.000:
46.000-64.000:
64.000-82.000:
82.000-100.000: XXXXXXXXXX
The code to be timed took 0.000348 seconds
In output 2, all the threads are executed and so is the output..
I am confused that why the thread th2 is not being executed and how can I make sure that all threads runs in correct order..
I would like to know if the program is logically wrong? if it is wrong logically in that case why is it showing the histogram output at times.. Thanks!
Order of thread execution is not guaranteed. On a modern, multi-core processor, the threads may even execute concurrently. There is no guarantee that the Gen_bins thread completes before the func thread. Since your threads access and manipulate the same data structures, the results are unpredictable as you have noticed.
While I don't think threads are necessary for this application, make the following change to ensure the threads execute in the order listed. Change:
pthread_create(&th1,NULL,(void*) Gen_data,(void*) p);
pthread_create(&th2,NULL,(void*) Gen_bins,(void*) p);
pthread_create(&th3,NULL,(void*) func,(void*) p);
pthread_join(th1,NULL);
pthread_join(th2,NULL);
pthread_join(th3,NULL);
to:
pthread_create(&th1,NULL,(void*) Gen_data,(void*) p);
pthread_join(th1,NULL);
pthread_create(&th2,NULL,(void*) Gen_bins,(void*) p);
pthread_join(th2,NULL);
pthread_create(&th3,NULL,(void*) func,(void*) p);
pthread_join(th3,NULL);
This ensures that each thread executes and completes before the next starts. Again, since the threads aren't executing concurrently, threading isn't necessary for this program and just adds complexity.