What is the problem with this program it is supposed to calculate the elapsed time of each function call but to my surprise, the elapsed time is always ZERO because the begin and end are exactly the same. Does anyone have an explanation for this?
This is the output I get:
TIMING TEST: 10000000 calls to rand()
2113 6249 23817 12054 7060 9945 26819
13831 6820 14149 13035 30858 13924 26467
4268 11314 28400 5239 4496 27757 21452
10878 25064 9049 6508 29612 11373 29913
10234 31769 16167 24553 1875 23992 30606
2606 19539 2184 14832 27089 27474 23310
, .. , ,
End time: 1610034404
Begin time: 1610034404
Elapsed time: 0
Time for each call:,10f
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define NCALLS 10000000
#define NCOLS 7
#define NLINES 7
int main(void) {
int i, val;
long begin, diff, end;
begin = time(NULL);
srand(time(NULL));
printf("\nTIMING TEST: %d calls to rand()\n\n", NCALLS);
for (i = 1; i <= NCALLS; ++i) {
val = rand();
if (i <= NCOLS * NLINES) {
printf("%7d", val);
if (i % NCOLS == 0)
putchar('\n');
} else
if (i == NCOLS * NLINES + 1)
printf("%7s\n\n", ", .. , ,");
}
end = time(NULL);
diff = end - begin;
printf("%s%ld\n%s%ld\n%s%ld\n%s%,10f\n\n",
"End time: ", end,
"Begin time: ", begin,
"Elapsed time: ", diff,
"Time for each call:", (double)diff / NCALLS);
return 0;
}
instead of time(NULL) you can use clock()
time_t t1 = clock();
// your code
time_t t2 = clock();
printf("%f", (double)(t2 - t1) / CLOCKS_PER_SEC); // you have to divide it to CLOCKS_PER_SEC (1 000 000) if you want time in seconds
time() measures in seconds, so if your program doesn't take 1 second you won't see difference
Someone in stackoverflow has already answered difference between them time() vs clock()
Changing your code just to spend some random time inside the loop using a system call a few times, using
struct stat file_stat;
for( int j = 0; j < rand()%(1000); j+=1) stat(".", &file_stat);
and we get on a very old machine (for 10,000 and not 10,000,000 cycles as in your code)
toninho#DSK-2009:/mnt/c/Users/toninho/projects/um$ gcc -std=c17 -Wall tlim.c
toninho#DSK-2009:/mnt/c/Users/toninho/projects/um$ ./a.out
TIMING TEST: 10000 calls to rand()
953019096 822572575 552766679 1101222688 890440097
348966778 1483436091 1936203136 1060888701 936990601
524198868 554412390 1109472424 51262334 723194231
353522463 1808580291 673860068 818332399 350403991
442567054 1054917195 229398907 420744931 620127925
1975661852 812007818 1400791797 1471940068 1739247840
1364643097 529639947 1569398779 20035674 92849903
1060567289 1126157009 2111376669 324165122 338724259
719809477 977786583 510114270 981390269 2029486195
1551025212 1112929616 2091082251 1066603801 1722106156
, .. , ,
End time: 1610044947
Begin time: 1610044942
Elapsed time: 5
Time for each call:500.000000
Using
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <time.h>
#define NCALLS 10 * 1000
#define NCOLS 5
#define NLINES 10
int main(void)
{
int i, val;
long begin, diff, end;
begin = time(NULL);
srand(210701);
printf("\nTIMING TEST: %d calls to rand()\n\n", NCALLS);
for (i = 1; i <= NCALLS; ++i)
{
val = rand();
// spend some time
struct stat file_stat;
for( int j = 0; j < rand()%(1000); j+=1) stat(".", &file_stat);
if (i <= NCOLS * NLINES)
{
printf("%12d", val);
if (i % NCOLS == 0)
putchar('\n');
}
else if (i == NCOLS * NLINES + 1)
printf("%7s\n\n", ", .. , ,");
//printf("%7d: ", i);
//fgetc(stdin);
}; // for
end = time(NULL);
diff = end - begin;
printf("%s%ld\n%s%ld\n%s%ld\n%s%10f\n\n", // %
"End time: ", end,
"Begin time: ", begin,
"Elapsed time: ", diff,
"Time for each call:", (double)diff / NCALLS);
return 0;
}
Related
i need a 60HZ timer (16.6 ms trigger once)
it work well in windows(mingw gcc) but not in liunx(gcc)
can anyone help me abust this? THX
#include <stdio.h>
#include <time.h>t
#define PRE_MS CLOCKS_PER_SEC / 1000
int main()
{
clock_t pre = clock();
int cnt = 0;
printf("CLOCKS_PER_SEC = %d\n", CLOCKS_PER_SEC);
while (1)
{
clock_t diff = clock() - pre;
if (diff > 16 * PRE_MS)
{
cnt++;
if (cnt > 60)
{
printf("%d\n", (int)pre);
cnt = 0;
}
pre += diff;
}
}
}
printf pre 1s in windows
CLOCKS_PER_SEC = 1000
1020
2058
3095
4132
5169
6206
7243
8280
9317
printf pre 2s in linux
CLOCKS_PER_SEC = 1000000
1875000
3781250
5687500
7593750
9500000
11406250
13312500
15218750
First a misconception: 60 Hz is not 17 operations per second but 60.
Second the period check is reading clock() twice and discarding any time interval for printf() to be called. AFAIK the CLOCKS_PER_SEC is larger on Linux than Windows systems, so there is more chance that you are 'throwing away' clock ticks. Read the clock() once, for example:
#include <stdio.h>
#include <time.h>
int main(void)
{
unsigned long long tickcount = 0;
clock_t baseticks = clock();
while (tickcount < 180) { // for 3 seconds
tickcount++;
clock_t nexttick = (clock_t) (baseticks + tickcount * CLOCKS_PER_SEC / 60);
while(clock() < nexttick) {} // wait
printf("Tick %llu\n", tickcount);
}
return 0;
}
The code works from the total elapsed time, so any intervals that are not an exact number of clock ticks are averaged out (instead of a cumulative rounding-off error).
At some point the value from clock() will overflow/wrap, so a real implementation that runs for any length of time will have to take care of this.
I wrote a program with 2 threads doing the same thing but I found the throughput of each threads is slower than if I only spawn one thread. Then I write this simple test to see if that's my problem or it's because of the system.
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
/*
* Function: run_add
* -----------------------
* Do addition operation for iteration ^ 3 times
*
* returns: void
*/
void *run_add(void *ptr) {
clock_t t1, t2;
t1 = clock();
int sum = 0;
int i = 0, j = 0, k = 0;
int iteration = 1000;
long total = iteration * iteration * iteration;
for (i = 0; i < iteration; i++) {
for (j = 0; j < iteration; j++) {
for (k = 0; k < iteration; k++) {
sum++;
}
}
}
t2 = clock();
float diff = ((float)(t2 - t1) / 1000000.0F );
printf("thread id = %d\n", (int)(pthread_self()));
printf("Total addtions: %ld\n", total);
printf("Total time: %f second\n", diff);
printf("Addition per second: %f\n", total / diff);
printf("\n");
return NULL;
}
void run_test(int num_thread) {
pthread_t pth_arr[num_thread];
int i = 0;
for (i = 0; i < num_thread; i++) {
pthread_create(&pth_arr[i], NULL, run_add, NULL);
}
for (i = 0; i < num_thread; i++) {
pthread_join(pth_arr[i], NULL);
}
}
int main() {
int num_thread = 5;
int i = 0;
for (i = 1; i < num_thread; i++) {
printf("Running SUM with %d threads. \n\n", i);
run_test(i);
}
return 0;
}
The result still shows the average speed of n threads is slower than one single thread. The more threads I have, the slower each one is.
Here's the result:
Running SUM with 1 threads.
thread id = 528384,
Total addtions: 1000000000,
Total time: 1.441257 second,
Addition per second: 693838784.000000
Running SUM with 2 threads.
thread id = 528384,
Total addtions: 1000000000,
Total time: 2.970870 second,
Addition per second: 336601728.000000
thread id = 1064960,
Total addtions: 1000000000,
Total time: 2.972992 second,
Addition per second: 336361504.000000
Running SUM with 3 threads.
thread id = 1064960,
Total addtions: 1000000000,
Total time: 4.434701 second,
Addition per second: 225494352.000000
thread id = 1601536,
Total addtions: 1000000000,
Total time: 4.449250 second,
Addition per second: 224756976.000000
thread id = 528384,
Total addtions: 1000000000,
Total time: 4.454826 second,
Addition per second: 224475664.000000
Running SUM with 4 threads.
thread id = 528384,
Total addtions: 1000000000,
Total time: 6.261967 second,
Addition per second: 159694224.000000
thread id = 1064960,
Total addtions: 1000000000,
Total time: 6.293107 second,
Addition per second: 158904016.000000
thread id = 2138112,
Total addtions: 1000000000,
Total time: 6.295047 second,
Addition per second: 158855056.000000
thread id = 1601536,
Total addtions: 1000000000,
Total time: 6.306261 second,
Addition per second: 158572560.000000
I have a 4-core CPU and my system monitor shows each time I ran n threads, n CPU cores are 100% utilized. Is it true that n threads(<= my CPU cores) are supposed to run n times as fast as one thread? Why it is not the case here?
clock() measures CPU time not "Wall" time.
it also measures the total time of all threads..
CPU time is time when the processor was executing you code, wall time is real world elapsed time (like a clock on the wall would show)
time your program using /usr/bin/time to see what's really happening.
or use a wall-time function like time(), gettimeofday() or clock_gettime()
clock_gettime() can measure CPU time for this thread, for this process, or wall time. - it's probably the best way to do this type of experiment.
While you have your answer regarding why the multi-threaded performance seemed worse than single-thread, there are several things you can do to clean up the logic of your program and make it work like it appears you intended it to.
First, if you were keeping track of the relative wall-time that passed and the time reported by your diff of the clock() times, you would have noticed the time reported was approximately a (n-proccessor core) multiple of the actual wall-time. That was explained in the other answer.
For relative per-core performance timing, the use of clock() is fine. You are getting only an approximation of wall-time, but for looking at a relative additions per-second, that provides a clean per-core look at performance.
While you have correctly used a divisor of 1000000 for diff, time.h provides a convenient define for you. POSIX requires that CLOCKS_PER_SEC equals 1000000 independent of the actual resolution. That constant is provided in time.h.
Next, you should also notice that your output per-core wasn't reported until all threads were joined making reporting totals in run_add somewhat pointless. You can output thread_id, etc. from the individual threads for convenience, but the timing information should be computed back in the calling function after all threads have been joined. That will clean up the logic of your run_add significantly. Further, if you want to be able to vary the number of iterations, you should consider passing that value through ptr. e.g.:
/*
* Function: run_add
* -----------------------
* Do addition operation for iteration ^ 3 times
*
* returns: void
*/
void *run_add (void *ptr)
{
int i = 0, j = 0, k = 0, iteration = *(int *)ptr;
unsigned long sum = 0;
for (i = 0; i < iteration; i++)
for (j = 0; j < iteration; j++)
for (k = 0; k < iteration; k++)
sum++;
printf (" thread id = %lu\n", (long unsigned) (pthread_self ()));
printf (" iterations = %lu\n\n", sum);
return NULL;
}
run_test is relatively unchanged, with the bulk of the calculation changes being those moved from run_add to main and being scaled to account for the number of cores utilized. The following is a rewrite of main allowing the user to specify the number of cores to use as the first argument (using all-cores by default) and the base for your cubed number of iterations as the second argument (1000 by default):
int main (int argc, char **argv) {
int nproc = sysconf (_SC_NPROCESSORS_ONLN), /* number of core available */
num_thread = argc > 1 ? atoi (argv[1]) : nproc,
iter = argc > 2 ? atoi (argv[2]) : 1000;
unsigned long subtotal = iter * iter * iter,
total = subtotal * num_thread;
double diff = 0.0, t1 = 0.0, t2 = 0.0;
if (num_thread > nproc) num_thread = nproc;
printf ("\nrunning sum with %d threads.\n\n", num_thread);
t1 = clock ();
run_test (num_thread, &iter);
t2 = clock ();
diff = (double)((t2 - t1) / CLOCKS_PER_SEC / num_thread);
printf ("----------------\nTotal time: %lf second\n", diff);
printf ("Total addtions: %lu\n", total);
printf ("Additions per-second: %lf\n\n", total / diff);
return 0;
}
Putting all the pieces together, you could write a working example as follows. Make sure you disable optimizations to prevent your compiler from optimizing out your loops for sum, etc...
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include <unistd.h>
/*
* Function: run_add
* -----------------------
* Do addition operation for iteration ^ 3 times
*
* returns: void
*/
void *run_add (void *ptr)
{
int i = 0, j = 0, k = 0, iteration = *(int *)ptr;
unsigned long sum = 0;
for (i = 0; i < iteration; i++)
for (j = 0; j < iteration; j++)
for (k = 0; k < iteration; k++)
sum++;
printf (" thread id = %lu\n", (long unsigned) (pthread_self ()));
printf (" iterations = %lu\n\n", sum);
return NULL;
}
void run_test (int num_thread, int *it)
{
pthread_t pth_arr[num_thread];
int i = 0;
for (i = 0; i < num_thread; i++)
pthread_create (&pth_arr[i], NULL, run_add, it);
for (i = 0; i < num_thread; i++)
pthread_join (pth_arr[i], NULL);
}
int main (int argc, char **argv) {
int nproc = sysconf (_SC_NPROCESSORS_ONLN),
num_thread = argc > 1 ? atoi (argv[1]) : nproc,
iter = argc > 2 ? atoi (argv[2]) : 1000;
unsigned long subtotal = iter * iter * iter,
total = subtotal * num_thread;
double diff = 0.0, t1 = 0.0, t2 = 0.0;
if (num_thread > nproc) num_thread = nproc;
printf ("\nrunning sum with %d threads.\n\n", num_thread);
t1 = clock ();
run_test (num_thread, &iter);
t2 = clock ();
diff = (double)((t2 - t1) / CLOCKS_PER_SEC / num_thread);
printf ("----------------\nTotal time: %lf second\n", diff);
printf ("Total addtions: %lu\n", total);
printf ("Additions per-second: %lf\n\n", total / diff);
return 0;
}
Example Use/Output
Now you can measure the relative number of additions per-second performed based on the number of cores utilized -- and have it return a Total time that is roughly what wall-time would be. For example, measuring the additions per-second using a single core results in:
$ ./bin/pthread_one_per_core 1
running sum with 1 threads.
thread id = 140380000397056
iterations = 1000000000
----------------
Total time: 2.149662 second
Total addtions: 1000000000
Additions per-second: 465189411.172547
Approximatey 465M additions per-sec. Using two cores should double that rate:
$ ./bin/pthread_one_per_core 2
running sum with 2 threads.
thread id = 140437156796160
iterations = 1000000000
thread id = 140437165188864
iterations = 1000000000
----------------
Total time: 2.152436 second
Total addtions: 2000000000
Additions per-second: 929179560.000957
Exactly twice the additions per-sec at 929M/s. Using 4-cores:
$ ./bin/pthread_one_per_core 4
running sum with 4 threads.
thread id = 139867841853184
iterations = 1000000000
thread id = 139867858638592
iterations = 1000000000
thread id = 139867867031296
iterations = 1000000000
thread id = 139867850245888
iterations = 1000000000
----------------
Total time: 2.202021 second
Total addtions: 4000000000
Additions per-second: 1816513309.422720
Doubled again to 1.81G/s, and using 8-cores gives the expected results:
$ ./bin/pthread_one_per_core
running sum with 8 threads.
thread id = 140617712838400
iterations = 1000000000
thread id = 140617654089472
iterations = 1000000000
thread id = 140617687660288
iterations = 1000000000
thread id = 140617704445696
iterations = 1000000000
thread id = 140617662482176
iterations = 1000000000
thread id = 140617696052992
iterations = 1000000000
thread id = 140617670874880
iterations = 1000000000
thread id = 140617679267584
iterations = 1000000000
----------------
Total time: 2.250243 second
Total addtions: 8000000000
Additions per-second: 3555171004.558562
3.55G/s. Look over the both answers (currently) and let us know if you have any questions.
note: there are a number of additional clean-ups and validations that could be applied, but for purposes of your example, updating the types to rational unsigned prevents strange results with thread_id and the addition numbers.
I'm working on a programming assignment and I'm getting strange results.
The idea is to calculate the number of processor ticks and time taken to run the algorithm.
Usually the code runs so quickly that the time taken is 0 sec, but I noticed that the number of processor ticks was 0 at the start and at the finish, resulting in 0 processor ticks taken.
I added a delay using usleep so that the time taken was non-zero, but the processor ticks is still zero and the calculation between the time stamps is still zero.
I've been banging my head on this for several days now and can't get past this problem, any suggestions are extremely welcome.
My code is below:
/* This program takes an input "n". If n is even it divides n by 2
* If n is odd, it multiples n by 3 and adds 1. Each time through the loop
* it iterates a counter.
* It continues until n is 1
*
* This program will compute the time taken to perform the above algorithm
*/
#include <stdio.h>
#include <time.h>
void delay(int);
int main(void) {
int n, i = 0;
time_t start, finish, duration;
clock_t startTicks, finishTicks, diffTicks;
printf("Clocks per sec = %d\n", CLOCKS_PER_SEC);
printf("Enter an integer: ");
scanf("%d", &n); // read value from keyboard
time(&start); // record start time in ticks
startTicks = clock();
printf("Start Clock = %s\n", ctime(&start));
printf("Start Processor Ticks = %d\n", startTicks);
while (n != 1) { // continues until n=1
i++; // increment counter
printf("iterations =%d\t", i); // display counter iterations
if (n % 2) { // if n is odd, n=3n+1
printf("Input n is odd!\t\t");
n = (n * 3) + 1;
printf("Output n = %d\n", n);
delay(1000000);
} else { //if n is even, n=n/2
printf("Input n is even!\t");
n = n / 2;
printf("Output n = %d\n", n);
delay(1000000);
}
}
printf("n=%d\n", n);
time(&finish); // record finish time in ticks
finishTicks = clock();
printf("Stop time = %s\n", ctime(&finish));
printf("Stop Processor Ticks = %d\n", finishTicks);
duration = difftime(finish, start); // compute difference in time
diffTicks = finishTicks - startTicks;
printf("Time elapsed = %2.4f seconds\n", duration);
printf("Processor ticks elapsed = %d\n", diffTicks);
return (n);
}
void delay(int us) {
usleep(us);
}
EDIT: So after researching further, I discovered that usleep() won't affect the program running time, so I wrote a delay function in asm. Now I am getting a value for processor ticks, but I am still getting zero sec taken to run the algorithm.
void delay(int us) {
for (int i = 0; i < us; i++) {
__asm__("nop");
}
}
You can calculate the elapsed time using the below formula.
double timeDiff = (double)(EndTime - StartTime) / CLOCKS_PER_SEC.
Here is the dummy code.
void CalculateTime(clock_t startTime, clock_t endTime)
{
clock_t diffTime = endTime - startTime;
printf("Processor time elapsed = %lf\n", (double)diffTime /CLOCKS_PER_SEC);
}
Hope this helps.
You are trying to time an implementation of Goldbach's Conjecture. I don't see how you can hope to get a meaningful execution time when it contains delays. Another problem is the granularity of clock() results, as shown by the value of CLOCKS_PER_SEC.
It is even more difficult trying to use time() which has a resolution of 1 second.
The way to do it is to compute a large number of values. This prints only 10 of them, to ensure the calculations are not optimised out, but not to distort the calculation time too much.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SAMPLES 100000
int main(void) {
int i, j, n;
double duration;
clock_t startTicks = clock();
for(j=2; j<SAMPLES; j++) {
n = j; // starting number
i = 0; // iterations
while(n != 1) {
if (n % 2){ // if n is odd, n=3n+1
n = n * 3 + 1;
}
else { // if n is even, n=n/2
n = n / 2;
}
i++;
}
if(j % (SAMPLES/10) == 0) // print 10 results only
printf ("%d had %d iterations\n", j, i);
}
duration = ((double)clock() - startTicks) / CLOCKS_PER_SEC;
printf("\nDuration: %f seconds\n", duration);
return 0;
}
Program output:
10000 had 29 iterations
20000 had 30 iterations
30000 had 178 iterations
40000 had 31 iterations
50000 had 127 iterations
60000 had 179 iterations
70000 had 81 iterations
80000 had 32 iterations
90000 had 164 iterations
Duration: 0.090000 seconds
Version A:
#include<time.h>
#include<stdio.h>
int main()
{
time_t start = time(0); //denote start time
int i,j; // initialize ints
static double dst[4096][4096]; //initialize arrays
static double src[4096][4096]; //
for(i=0; i<4096; ++i){
for(j=0; j<4096; ++j){
dst[i][j] = src[i][j];
}
}
time_t end = time(0); //denote end time
double time = difftime(end, start); //take difference of start and end time to determine elapsed time
printf("Test One: %fms\n",time);
}
Version B:
#include<time.h>
#include<stdio.h>
int main()
{
time_t start = time(0); //denote start time
int i,j; // initialize ints
static double dst[4096][4096]; //initialize arrays
static double src[4096][4096]; //
for(i=0; i<4096; ++i){
for(j=0; j<4096; ++j){
dst[j][i] = src[j][i];
}
}
time_t end = time(0); //denote end time
double time = difftime(end, start); //take difference of start and end time to determine elapsed time
printf("Test One: %fms\n",time);
}
Using this program, I have determined that if you reverse the positions of i and j in the arrays, it takes 1 second longer to execute.
Why is this happening?
In your code, the loop means that "traverse the address in the same row, one by one, then go to next line". But if you reverse the positions of i and j, this means that "traverse the address in the same column, one by one, the go to next column".
In C, multi-dimensional array are put on linear address space, byte by byte, then line by line, so dst[i][j] = src[i][j] in your case means *(dst + 4096 * i + j) = *(src + 4096 * i + j):
*(dst + 4096 * 0 + 0) = *(src + 4096 * 0 + 0);
*(dst + 4096 * 0 + 1) = *(src + 4096 * 0 + 1);
*(dst + 4096 * 0 + 2) = *(src + 4096 * 0 + 2);
//...
while reversed i and j means:
*(dst + 4096 * 0 + 0) = *(src + 4096 * 0 + 0);
*(dst + 4096 * 1 + 0) = *(src + 4096 * 1 + 0);
*(dst + 4096 * 2 + 0) = *(src + 4096 * 2 + 0);
//...
So the extra 1 second in second case is cause by accessing memory in a non-contigous manner.
You don't need to do time calculation yourself, because you can run your program with "time" command on linux/UNIX:
$ time ./loop
The results on my linux box for the 2 cases:
$ time ./loop_i_j
real 0m0.244s
user 0m0.062s
sys 0m0.180s
$ time ./loop_j_i
real 0m1.072s
user 0m0.995s
sys 0m0.073s
#include<time.h>
#include<stdio.h>
int main()
{
time_t start = time(0); //denote start time
int i,j; // initialize ints
static double dst[4096][4096]; //initialize arrays
static double src[4096][4096]; //
for(j=0; j<4096; ++j){
for(i=0; i<4096; ++i){
dst[j][i] = src[j][i];
}
}
time_t end = time(0); //denote end time
double time = difftime(end, start); //take difference of start and end time to determine elapsed time
printf("Test One: %fms\n",time);
}
I tested and it is giving me this o/p Test One: 0.000000ms in both cases after reversing and normal. I used gcc compiler.
Maybe the issue is that you have not included stdio.h .I experienced the same behavior once when I did not include stdio.h.
Something related to memory(in stack) allocation during compile time could be possible reason.
I wrote a sample program to understand the time measurement in C.Below is a small self contained example.I have a function do_primes() that calculates prime numbers.In the main() function between timing code I call do_primes() and also sleep for 20 milliseconds.I am measure time using struct timeval (which I understand returns clock time.) and also cpu_time using CLOCKS_PER_SEC.Now as I understand it,this denotes the time for which the CPU was working.
The output of the program is as follows.
Calculated 9592 primes.
elapsed time 2.866976 sec.
cpu time used 2.840000 secs.
As you can see the differnece between the elapsed time and cpu time is
0.026976 seconds OR 26.976 milliseconds.
1) Are my assumptions correct?
2) 6.976 milliseconds is accounted for my the scheduler switch delay?
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#define MAX_PRIME 100000
void do_primes()
{
unsigned long i, num, primes = 0;
for (num = 1; num <= MAX_PRIME; ++num)
{
for (i = 2; (i <= num) && (num % i != 0); ++i);
if (i == num)
++primes;
}
printf("Calculated %ld primes.\n", primes);
}
int main()
{
struct timeval t1, t2;
double elapsedTime;
clock_t start, end;
double cpu_time_used;
int primes = 0;
int i = 0;
int num = 0;
start = clock();
/* start timer*/
gettimeofday(&t1, NULL);
/*do something */
usleep(20000);
do_primes();
/* stop timer*/
gettimeofday(&t2, NULL);
end = clock();
/*compute and print the elapsed time in millisec*/
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; /* sec to ms*/
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; /* us to ms */
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("elapsed time %f sec. \ncpu time used %f secs.\n",(elapsedTime/1000),cpu_time_used);
return 0;
}
Your understanding is correct.
The additional 6.976ms might not mean anything at all, because it's possible that the clock() function only has a resolution of 10ms.