c know how much memory was used in the execution - c

For example, the next code, how to know the memory without the struct timeval and int microseg?.
Is the problem 1 of project Euler.
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
int main(){
struct timeval t, t2;
struct rusage uso;
int microseg;
gettimeofday(&t, NULL);
int sum = 0;
for (int k = 2; k < 1000; k++){
if(k%3 == 0 || k%5 == 0)
sum +=k;
}
printf("%d \n",sum);
gettimeofday(&t2, NULL);
microsegundos = ((t2.tv_usec - t.tv_usec) + ((t2.tv_sec - t.tv_sec) * 1000000.0f));
printf("CPU time: %d\n",microseg);
getrusage(RUSAGE_SELF, &uso);
printf("Memory: %ld KB\n", (long)uso.ru_maxrss);
return 0;
}

Related

Why is my multi-threading program in VirtualBox not faster than my single-thread program?

everybody!
I have two program estimating PI using Monte-Carlo technique : one using single-thread and one using multi-thread.
The single-thread one :
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define BILLION 1000000000.0
int main(int argc, char *argv[])
{
struct timespec start, end;
///////////////////////
clock_gettime(CLOCK_REALTIME, &start);
///////////////////////
if(argc != 2)
{
fprintf(stderr, "usage: a.out <integer value>\n");
return -1;
}
if(atoi(argv[1]) < 0)
{
fprintf(stderr, "%d must be >= 0\n", atoi(argv[1]));
return -1;
}
time_t t;
srand((unsigned) time(&t));
int total = atoi(argv[1]);
int inside = 0;
unsigned int seed = rand()%30000;
for(int i = 0; i < total; ++i)
{
double rand_x = (double)rand_r(&seed)/(double)RAND_MAX;
double rand_y = (double)rand_r(&seed)/(double)RAND_MAX;
double dist = rand_x*rand_x + rand_y*rand_y;
if(dist < 1.0) ++inside;
}
double pi = (double)(4 * inside)/total;
clock_gettime(CLOCK_REALTIME, &end);
double time_spent = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / BILLION;
printf("pi = %lf\n", pi);
printf("time = %f\n", time_spent);
return 0;
}
The multi-thread one:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <pthread.h>
#include <math.h>
#define N 5
#define BILLION 1000000000.0
int inside = 0;
pthread_mutex_t mutex;
void* countInside(void * n)
{
int total = (int)n;
int hit_count = 0;
unsigned int seed = rand()%30000;
for(int i = 0; i < total; ++i)
{
double rand_x = (double)rand_r(&seed)/(double)RAND_MAX;
double rand_y = (double)rand_r(&seed)/(double)RAND_MAX;
double dist = rand_x*rand_x + rand_y*rand_y;
if(dist < 1.0) ++hit_count;
}
pthread_mutex_lock(&mutex);
inside += hit_count;
pthread_mutex_unlock(&mutex);
pthread_exit(0);
}
int main(int argc, char *argv[])
{
struct timespec start, end;
///////////////////////
clock_gettime(CLOCK_REALTIME, &start);
///////////////////////
if(argc != 2)
{
fprintf(stderr, "usage: a.out <integer value>\n");
return -1;
}
if(atoi(argv[1]) < 0)
{
fprintf(stderr, "%d must be >= 0\n", atoi(argv[1]));
return -1;
}
int total = atoi(argv[1]);
srand((unsigned) time(NULL));
//int N;
//printf("Input the number of thread you desire : ");
//scanf("%d", &N);
int n = total/N;
//pthread_t* tid = malloc(sizeof(pthread_t) * (N));
pthread_t tid[N];
pthread_mutex_init(&mutex, NULL);
for(int i = 0; i < N; ++i)
{
pthread_create(&tid[i], 0, countInside, (void*)n);
}
for(int i = 0; i < N; ++i)
{
pthread_join(tid[i], NULL);
}
double pi = 4.0 * inside / total;
clock_gettime(CLOCK_REALTIME, &end);
double time_spent = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) / BILLION;
printf("pi = %lf\n", pi);
printf("time = %lf\n", time_spent);
return 0;
}
When i execute both program with 100000000 points, i get the ouput:
Ouput of single-thread:
quan#quan-VirtualBox:~/Documents/lab5$ ./pi_serial 100000000
pi = 3.141583
time = 1.576207
Output of multi-thread:
quan#quan-VirtualBox:~/Documents/lab5$ ./pi_multi-thread 100000000
pi = 3.141532
time = 1.446410
Note : There are sometimes multi-thread one is even slower than single-thread one.
What's the problem ? I thought multi-thread must have some speed-up compared to single-thread one. Is my multi-thread code wrong ? Please give me some advice? Thank you!

Parallel MPI Version

I have to write a parallel MPI version of my dot product (code below):
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#define SIZE 10000000
volatile float a[SIZE];
volatile float b[SIZE];
int main(int argc, char **argv)
{
long int i;
double sum;
struct timeval time1, time2;
srand(time(0));
for (i = 0; i < SIZE; i++)
{
a[i] = rand();
b[i] = rand();
}
gettimeofday(&time1, 0);
sum = 0.0;
for (i = 0; i < SIZE; i++)
{
sum = sum + a[i]*b[i];
}
gettimeofday(&time2, 0);
MPI_Scatter(a, 1, MPI_INT, &b, 1, MPI_INT, 0, MPI_COMM_WORLD);
printf("Elapsed time (us) = %d\n", (time2.tv_sec-time1.tv_sec)*1000000 + time2.tv_usec - time1.tv_usec);
return 0;
}
My question is, what code do I need to add to the program, what MPI primitives are useful?

How to measure scanf time in a C program?

In the following program, I want to measure the input time i.e. the time taken by user to enter the variables of the array :
#include <stdio.h>
#include <time.h>
int main()
{
int i, array[10];
double user_input_time;
clock_t input_start, input_end;
input_start = clock();
for (i = 0; i < 10; i++)
{
scanf("%d", &array[i]);
}
input_end = clock();
user_input_time = ((double)(input_end - input_start)) / CLOCKS_PER_SEC;
printf("User Input Time : %f\n", user_input_time);
return 0;
}
Above, what I'm getting is the processor time taken not the input time taken by user to enter all the 10 variable of the array.
Please, can someone help me in doing so.
Include: time.h
Use:
int main()
{
time_t start = time(NULL);
//Do your operations here
printf("%.2f\n", (double)(time(NULL) - start));
return 0;
}
Note - We can use clock_gettime for more precise results - link
Using clock_gettime
int main () {
struct timespec start, finish;
clock_gettime(CLOCK_REALTIME, &start);
// do your operations here
clock_gettime(CLOCK_REALTIME, &finish);
long seconds = finish.tv_sec - start.tv_sec;
long ns = finish.tv_nsec - start.tv_nsec;
if (start.tv_nsec > finish.tv_nsec) { // clock underflow
--seconds;
ns += 1000000000;
}
printf("seconds without ns: %ld\n", seconds);
printf("nanoseconds: %ld\n", ns);
printf("total seconds: %e\n", (double)seconds + (double)ns/(double)1000000000);
}
Which precision do you want ?
Because you can use simply time(NULL) if you only want to know the time at the second.
#include <stdio.h>
#include <time.h>
int main(void)
{
int i, array[10];
time_t user_input_time, input_start, input_end;
input_start = time(NULL);
for (i = 0; i < 10; i++)
{
scanf("%d", &array[i]);
}
input_end = time(NULL);
user_input_time = input_end - input_start;
printf("User Input Time : %d second\n", (int)user_input_time);
return 0;
}
You can use gettimeofday() from sys/time.h
#include <stdio.h>
#include <sys/time.h>
int main(void)
{
int i, array[10];
struct timeval input_start, input_end;
gettimeofday(&input_start, NULL);
for (i = 0; i < 10; i++)
{
scanf("%d", &array[i]);
}
gettimeofday(&input_end, NULL);
printf("User Input Time : %d second\n", input_end.tv_sec - input_start.tv_sec);
return 0;
}

Pthread- limit number of threads that can access a function at a time

I have a quick question. I'm learning about semaphores and I want only four threads to be able to access someFunction() at any given time. This function needs to execute num_task times. This is what I have so far, but valgrind is throwing some errors saying that I have possible memory leaks. Please tell me where I'm going wrong and how to go about fixing this. Thank you!
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
#include <semaphore.h>
sem_t s;
typedef struct Data Data;
struct Data {
int index;
int j;
};
void* someFunction(void* arg){
// make sure only four threads access this function at once
sem_wait(&s);
Data* a = arg;
printf("i%d j%d\n", a->index, a->j);
sleep(1);
free(a);
sem_post(&s);
return 0;
}
int main(void){
int num_task = 10; // i need to call someFunction() 9000 times
int num_threads = 4;
sem_init(&s, 0, num_threads);
int j = 0;
pthread_t thread_ids[num_threads];
for (int i = 0; i < num_task; i ++){ // these are our columns
sem_wait(&s);
if (j > num_threads - 1){
j = 0; // j goes 0 1 2 3 0 1 2 3 0 1 2 3 ....
}
Data* a = malloc(sizeof(Data));
a->index = i;
a->j = j;
printf("MAIN j%d\n", j);
pthread_create(thread_ids + j, NULL, someFunction, a);
j ++;
sem_post(&s);
}
for (int i = 0; i < num_threads; i ++){
pthread_join(thread_ids[i], NULL);
}
sem_destroy(&s);
return 0;
}

A sample openmp program with speedup

Could someone provide an OpenMP program where the speedup is visible compared to without it. I'm finding it extremely difficult to achieve speedup. Even this simple program runs slower with OpenMP. My processor is Intel® Core™ i3-2370M CPU # 2.40GHz × 4 running on Linux (Ubuntu 14.10)
#include <cmath>
#include <stdio.h>
#include <time.h>
int main() {
clock_t t;
t = clock();
const int size = 4;
long long int k;
#pragma omp parallel for num_threads(4)
for(int n=0; n<size; ++n) {
for(int j=0;j<100000000;j++){
}
printf("\n");
}
t = clock() - t;
printf ("It took me %d clicks (%f seconds).\n",t,((float)t)/CLOCKS_PER_SEC);
return 0;
}
I had a problem related to this, where I wanted to find the max value of an array. I made the same mistake as you, I used clock for measuring the elapsed time. To fix this, I used clock_gettime() instead, and now it works.
As for an example code where the speedup is measurable (Note you migth want to change the value of N):
#include <omp.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
struct timespec diff(struct timespec start, struct timespec end)
{
struct timespec temp;
if(end.tv_sec - start.tv_sec == 0)
{
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
else
{
temp.tv_nsec = ((end.tv_sec - start.tv_sec)*1000000000) + end.tv_nsec - start.tv_nsec;
}
return temp;
}
int main()
{
unsigned int N;
struct timespec t_start, t_end;
clock_t start, end;
srand(time(NULL));
FILE *f = fopen("out.txt", "w");
if(f == NULL)
{
printf("Could not open output\n");
return -1;
}
for(N = 1000000; N < 100000000; N += 1000000)
{
fprintf(f, "%d\t", N);
int* array = (int*)malloc(sizeof(int)*N);
if(array == NULL)
{
printf("Not enough space\n");
return -1;
}
for(unsigned int i = 0; i<N; i++) array[i] = rand();
int max_val = 0.0;
clock_gettime(CLOCK_MONOTONIC, &t_start);
#pragma omp parallel for reduction(max:max_val)
for(unsigned int i=0; i<N; i++)
{
if(array[i] > max_val) max_val = array[i];
}
clock_gettime(CLOCK_MONOTONIC, &t_end);
fprintf(f, "%lf\t", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));
max_val = 0.0;
clock_gettime(CLOCK_MONOTONIC, &t_start);
for(unsigned int i = 0; i<N; i++)
{
if(array[i] > max_val) max_val = array[i];
}
clock_gettime(CLOCK_MONOTONIC, &t_end);
fprintf(f, "%lf\n", (double)(diff(t_start, t_end).tv_nsec / 1000000000.0));
free(array);
}
fclose(f);
return 0;
}
Calculating a integral is a classical one, adjust the parts constant to increase the execution time and see more clearly the runtime, more parts, more execution time. It's getting 21.3 seconds with OpenMP enabled and 26.7 seconds, on a SINGLE core, DUAL thread Intel pentium 4:
#include <math.h>
#include <stdio.h>
#include <omp.h>
#define from 0.0f
#define to 2.0f
#define parts 999999999
#define step ((to - from) / parts)
#define x (from + (step / 2.0f))
int main()
{
double integralSum = 0;
int i;
#pragma omp parallel for reduction(+:integralSum)
for (i = 1; i < (parts+1); ++i)
{
integralSum = integralSum + (step * fabs(pow((x + (step * i)),2) + 4));
}
printf("%f\n", integralSum);
return 0;
}
It calculates the definite integral from 0 to 2 of x^2 + 4

Resources