Difference of performance between two simple functions - c

Supposing I need to call one of these functions millions of times, what are the differences in performance between these two ?
typedef struct s_tuple{
double x;
double y;
double z;
double w;
double m;
double n;
double o;
double p;
} t_tuple;
// (1)
t_tuple tuple_mul1(const double q, t_tuple a)
{
a.x *= q;
a.y *= q;
a.z *= q;
a.w *= q;
a.m *= q;
a.n *= q;
a.o *= q;
a.p *= q;
return a;
}
// (2)
t_tuple tuple_mul2(const double q, const t_tuple a)
{
t_tuple b;
b.x = a.x * q;
b.y = a.y * q;
b.z = a.z * q;
b.w = a.w * q;
b.m = a.m * q;
b.n = a.n * q;
b.o = a.o * q;
b.p = a.p * q;
return b;
}
My thoughts at first:
resource-management:
(2) needs to allocate memory on the stack for b, so in terms of resources 2 requires 64 more bytes than (1) per exec
runtime:
(1) does not allocate memory on the stack so it gains the 'stack-allocating a t_tuple' time compared to (2).
BUT !
I made some tests and I am completely off. Actually, 2 runs faster than 1: for 200 millions calls, (2) execs in ~1s, whereas (1) execs in ~1.55s
Edit: I compiled with cc with no options
Can someone please explain why ?
Here is my runtime-test program:
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct s_tuple{
double x;
double y;
double z;
double w;
double m;
double n;
double o;
double p;
} t_tuple;
// (1)
t_tuple tuple_mul1(const double q, t_tuple a)
{
a.x *= q;
a.y *= q;
a.z *= q;
a.w *= q;
a.m *= q;
a.n *= q;
a.o *= q;
a.p *= q;
return a;
}
// (2)
t_tuple tuple_mul2(const double q, const t_tuple a)
{
t_tuple b;
b.x = a.x * q;
b.y = a.y * q;
b.z = a.z * q;
b.w = a.w * q;
b.m = a.m * q;
b.n = a.n * q;
b.o = a.o * q;
b.p = a.p * q;
return b;
}
int main(int ac, char **av)
{
int i;
long int n;
double q;
t_tuple a;
clock_t start, end;
q = 0.7;
a.x = 1.5;
a.y = 2;
a.z = 35897.78;
a.w = 4.6698;
a.m = 5.5;
a.n = 1065;
a.o = 11.6887;
a.p = 109090808.789;
if (ac > 1)
{
n = atol(av[1]);
double execution_time;
start = clock();
for (i = 0; i < n; i++)
tuple_mul1(q, a);
// tuple_mul2(q, a);
end = clock();
execution_time = ((double)(end - start))/CLOCKS_PER_SEC;
printf("exec_time = %f\nn = %.f * 1e6\n", execution_time, n / 1e6);
}
}

What compiler did you use and with what options?
gcc with -O3 produced identical assembly for both functions: https://godbolt.org/z/YhTW3zzWq

Related

A int64_t value overflows in a structure in C

I want to create a polynomial with only one term: 6/5 and display it.
It should print this: 6/5X^0
The structure of polynomial is :
typedef struct __poly_struct_t *poly_t;
struct __poly_struct_t{
unsigned int deg;
ratio_t *coeffs;
};
Where ratio_t is an array of rational numbers, it's structure is:
typedef struct __ratio_struct_t{
int64_t num;
int64_t den;
}ratio_t;
I used two functions to construct this polynomial. polyFromRatioArray works: it prints 6/5X^0
poly_t polyFromRatioArray(ratio_t *c, unsigned int degree){
poly_t p = (struct __poly_struct_t*)malloc(sizeof(struct __poly_struct_t));
p->deg = degree;
p->coeffs = c;
return p;
}
The other one made the denominator overflowed: polyFromRatio prints 6/140218959144480X^0
poly_t polyFromRatio(ratio_t c){
return polyFromRatioArray(&c, 0);
}
Main function:
int main(){
ratio_t ra = createRatio((int64_t)6,(int64_t)5);
poly_t p1 = polyFromRatioArray(&ra, 0); // one that works
polyPrint(p1);
poly_t p2 = polyFromRatio(ra); // this doesn't
polyPrint(p2);
free(p1);
free(p2);
return 0;
}
Other fonctions involved:
ratio_t createRatio(int64_t a, int64_t b){
if(b == 0){
printf("Error : a divise by 0 \n");
exit(1);
}
ratio_t r;
int64_t pgcd = gcd(a, b); // gcd(int64_t a, int64_t b) is a function that finds pgcd using Euclid.
r.num = a/pgcd;
r.den = b/pgcd;
return r;
}
int64_t gcd(int64_t a, int64_t b){
int64_t u, v, g;
ext_eucl_div(&u, &v, &g, llabs(a), llabs(b));
return g;
}
void ext_eucl_div(int64_t *u, int64_t *v, int64_t *g, int64_t a, int64_t b){ // this function stocks pgcd of a and b in g
int64_t u1, u2, u3 , v1, v2, v3, q, t1, t2, t3;
int tour = 0;
do{
if(tour == 0){
u1 = 1; u2 = 0; u3 = a; v1 = 0; v2 = 1; v3 = b;
}
else{
u1 = v1; u2 = v2; u3 = v3; v1 = t1; v2 = t2; v3 = t3;
}
q = u3/v3;
t1 = u1 - q*v1;
t2 = u2 - q*v2;
t3 = u3%v3;
tour++;
} while(t3>=1);
*u = v1;
*v = v2;
*g = v3;
}
void polyPrint(poly_t p){
unsigned int i;
for(i=0; i<= p->deg; i++){
if(p->coeffs[i].num != 0){
printRatio(p->coeffs[i]);
if(i != p->deg) printf("X^%u + ", i);
else printf("X^%u\n", i);
}else printf("0\n");
}
}
void printRatio(ratio_t a){
printf("%" PRId64, a.num);
printf("/%" PRId64, a.den);
}
This is very strange, polyFromRatioArray and polyFromRatio seem like doing the same thing but nope.

Normalize a Vector from a struct in C

typedef struct
{
float x, y, z;
} VECTOR;
Write a function that takes a pointer to a VECTOR structure and normalizes the x, y, and z components
I realize you can normalize the Vector by getting the length and dividing x, y, and z by the length. but how would I do this with the code presented here.
Here is my attempt at this.
Void Norm(*VECTOR)
{
x = x / *VECTOR.Length;
y = y / *VECTOR.Length;
z = z / *VECTOR.Length;
}
#include <math.h>
typedef struct
{
float x, y, z;
} VECTOR;
void normalize( VECTOR* p )
{
float w = sqrt( p->x * p->x + p->y * p->y + p->z * p->z );
p->x /= w;
p->y /= w;
p->z /= w;
}

segmentation fault (core dumped) in recursive algorithm

This is a program to calculate a definite integral using numerical quadrature method (I don't know if this is the right translation):
#include <math.h>
#include <stdio.h>
float f(float x){
float y;
y = 4/(+x*x);
return y;
}
void quadra(float a, float b, float *Q, float *E, float f(float)){
float q1, q2, m, h, fa, fb;
h = b - a;
fa = f(a);
fb = f(b);
q1 = (fa+fb)*h/2.;
m = (a+b)/2.;
q2 = ( (fa+2*f(m)+fb) ) *h/4;
*Q = q2;
*E = fabs(q2-q1)/3;
}
void scambia(float *x, float *y) {
float z;
z = *x;
*x = *y;
*y = z;
return;
}
void sort(float x[], int n) {
int flag=1, k=n-1, i;
while (flag == 1 && k > 0) {
flag = 0;
for (i=0; i<k; i++) {
if (x[i]>x[i+1]) {
scambia(&x[i], &x[i+1]);
flag = 1;
}
}
k = k-1;
}
return;
}
int intautri(float A, float B, float TOL, int MAXFUN, float *Q, float *E, int *N, float FUN(float)){
void sort(float [], int);
void quadra(float, float, float*, float*, float(float));
float Q1,Q2,c,d,Iold,Eold,E0,E1,E2,alist[100],blist[100],qlist[100],elist[100];
int n, flag;
quadra(A, B, &Q1, &E0,FUN);
*N = 3;
n = 1;
alist[n] = A;
blist[n] = B;
qlist[n] = Q1;
elist[n] = E0;
*Q = Q1;
*E = E0;
if (*E<=TOL || *N>=MAXFUN){
flag = 1;
}else{
c = alist[n];
d = blist[n];
Iold = qlist[n];
Eold = elist[n];
n = n-1;
quadra(c, (c+d)/2, &Q1, &E1, FUN);
quadra((c+d)/2, d, &Q2, &E2, FUN);
*Q = *Q - Iold + Q1 + Q2;
*E = *E - Eold + E1 + E2;
*N = *N + 6;
intautri(A , B, TOL, MAXFUN, Q, E, N, FUN);
alist[n+1] = c;
blist[n+1] = (c+d)/2;
qlist[n+1] = Q1;
elist[n+1] = E1;
alist[n+2] = (c+d)/2;
blist[n+2] = d;
qlist[n+2] = Q2;
elist[n+2] = E2;
n = n+2;
sort(alist, n);
sort(blist, n);
sort(qlist, n);
sort(elist, n);
flag = 0;
}
;
return flag;
}
int main(){
int intautri(float, float, float, int, float *, float *, int*, float(float));
float TOL, MAXFUN, A, B,Q,E;
int N, J;
float f(float);
A = 0;
B = 1;
TOL = 0.0001;
MAXFUN = 200;
J = intautri(A, B, TOL, MAXFUN, &Q, &E, &N, f);
printf("%d\n", J);
printf("%d\n", N);
printf("%f\n", Q);
return 0;
}
Anyway I compile without problems that code, but when I run it "segmentation fault (core dumped)" appears on terminal. I know that error could depend by memory allocation but I don't understand what is wrong...can you help me??
You have an exit condition in intautri:
if (*E<=TOL || *N>=MAXFUN){
but in the beginning of the function you also have:
*N = 3;
This means that your second exit condition will never fire. Without attempting to understand the logic of the code, I believe that you should:
Initialize N in main().
Remove the *N = 3 assignment in intautri().

problems with compiling openMP and math library

I'm trying to compile a program with openMP:
gcc -c fopenmp -lm prog.c -o prog
prog.c includes . However when I run ./prog, the error is:
bash: ./prog: Permission denied
I tried again without the -c flag:
gcc -o prog -fopenmp -lm prog.c
But this time it doesn't seem to see the functions in math library like cos, sqrt (undefined reference).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <omp.h>
#define REAL float
#define NX (64)
#ifndef M_PI
#define M_PI (3.1415926535897932384626)
#endif
void init(REAL *buff, const int nx, const int ny, const int nz,
const REAL kx, const REAL ky, const REAL kz,
const REAL dx, const REAL dy, const REAL dz,
const REAL kappa, const REAL time) {
REAL ax, ay, az;
int jz, jy, jx;
ax = exp(-kappa*time*(kx*kx));
ay = exp(-kappa*time*(ky*ky));
az = exp(-kappa*time*(kz*kz));
for (jz = 0; jz < nz; jz++) {
for (jy = 0; jy < ny; jy++) {
for (jx = 0; jx < nx; jx++) {
int j = jz*nx*ny + jy*nx + jx;
REAL x = dx*((REAL)(jx + 0.5));
REAL y = dy*((REAL)(jy + 0.5));
REAL z = dz*((REAL)(jz + 0.5));
REAL f0 = (REAL)0.125
*(1.0 - ax*cos(kx*x))
*(1.0 - ay*cos(ky*y))
*(1.0 - az*cos(kz*z));
buff[j] = f0;
}
}
}
}
REAL accuracy(const REAL *b1, REAL *b2, const int len) {
REAL err = 0.0;
int i;
for (i = 0; i < len; i++) {
err += (b1[i] - b2[i]) * (b1[i] - b2[i]);
}
return (REAL)sqrt(err/len);
}
typedef void (*diffusion_loop_t)(REAL *f1, REAL *f2, int nx, int ny, int nz,
REAL ce, REAL cw, REAL cn, REAL cs, REAL ct,
REAL cb, REAL cc, REAL dt,
REAL **f_ret, REAL *time_ret, int *count_ret);
static void
diffusion_baseline(REAL *f1, REAL *f2, int nx, int ny, int nz,
REAL ce, REAL cw, REAL cn, REAL cs, REAL ct,
REAL cb, REAL cc, REAL dt,
REAL **f_ret, REAL *time_ret, int *count_ret) {
REAL time = 0.0;
int count = 0;
do {
int z;
for (z = 0; z < nz; z++) {
int y;
for (y = 0; y < ny; y++) {
int x;
for (x = 0; x < nx; x++) {
int c, w, e, n, s, b, t;
c = x + y * nx + z * nx * ny;
w = (x == 0) ? c : c - 1;
e = (x == nx-1) ? c : c + 1;
n = (y == 0) ? c : c - nx;
s = (y == ny-1) ? c : c + nx;
b = (z == 0) ? c : c - nx * ny;
t = (z == nz-1) ? c : c + nx * ny;
f2[c] = cc * f1[c] + cw * f1[w] + ce * f1[e]
+ cs * f1[s] + cn * f1[n] + cb * f1[b] + ct * f1[t];
}
}
}
REAL *t = f1;
f1 = f2;
f2 = t;
time += dt;
count++;
} while (time + 0.5*dt < 0.1);
*time_ret = time;
*f_ret = f1;
*count_ret = count;
return;
}
static void
diffusion_openmp(REAL *f1, REAL *f2, int nx, int ny, int nz,
REAL ce, REAL cw, REAL cn, REAL cs, REAL ct,
REAL cb, REAL cc, REAL dt,
REAL **f_ret, REAL *time_ret, int *count_ret) {
#pragma omp parallel
{
REAL time = 0.0;
int count = 0;
REAL *f1_t = f1;
REAL *f2_t = f2;
#pragma omp master
printf("%d threads running\n", omp_get_num_threads());
do {
int z;
#pragma omp for
for (z = 0; z < nz; z++) {
int y;
for (y = 0; y < ny; y++) {
int x;
for (x = 0; x < nx; x++) {
int c, w, e, n, s, b, t;
c = x + y * nx + z * nx * ny;
w = (x == 0) ? c : c - 1;
e = (x == nx-1) ? c : c + 1;
n = (y == 0) ? c : c - nx;
s = (y == ny-1) ? c : c + nx;
b = (z == 0) ? c : c - nx * ny;
t = (z == nz-1) ? c : c + nx * ny;
f2_t[c] = cc * f1_t[c] + cw * f1_t[w] + ce * f1_t[e]
+ cs * f1_t[s] + cn * f1_t[n] + cb * f1_t[b] + ct * f1_t[t];
}
}
}
REAL *t = f1_t;
f1_t = f2_t;
f2_t = t;
time += dt;
count++;
} while (time + 0.5*dt < 0.1);
#pragma omp master
{
*f_ret = f1_t;
*time_ret = time;
*count_ret = count;
}
}
return;
}
int main(int argc, char *argv[])
{
struct timeval time_begin, time_end;
int nx = NX;
int ny = NX;
int nz = NX;
REAL *f1 = (REAL *)malloc(sizeof(REAL)*NX*NX*NX);
REAL *f2 = (REAL *)malloc(sizeof(REAL)*NX*NX*NX);
REAL time = 0.0;
int count = 0;
REAL l, dx, dy, dz, kx, ky, kz, kappa, dt;
REAL ce, cw, cn, cs, ct, cb, cc;
l = 1.0;
kappa = 0.1;
dx = dy = dz = l / nx;
kx = ky = kz = 2.0 * M_PI;
dt = 0.1*dx*dx / kappa;
init(f1, nx, ny, nz, kx, ky, kz, dx, dy, dz, kappa, time);
ce = cw = kappa*dt/(dx*dx);
cn = cs = kappa*dt/(dy*dy);
ct = cb = kappa*dt/(dz*dz);
cc = 1.0 - (ce + cw + cn + cs + ct + cb);
diffusion_loop_t diffusion_loop = diffusion_baseline;
if (argc == 2) {
if (strcmp(argv[1], "openmp") == 0) {
diffusion_loop = diffusion_openmp;
}
}
gettimeofday(&time_begin, NULL);
diffusion_loop(f1, f2, nx, ny, nz, ce, cw, cn, cs, ct, cb, cc, dt,
&f1, &time, &count);
gettimeofday(&time_end, NULL);
REAL *answer = (REAL *)malloc(sizeof(REAL) * nx*ny*nz);
init(answer, nx, ny, nz, kx, ky, kz, dx, dy, dz, kappa, time);
REAL err = accuracy(f1, answer, nx*ny*nz);
double elapsed_time = (time_end.tv_sec - time_begin.tv_sec)
+ (time_end.tv_usec - time_begin.tv_usec)*1.0e-6;
REAL mflops = (nx*ny*nz)*13.0*count/elapsed_time * 1.0e-06;
double thput = (nx * ny * nz) * sizeof(REAL) * 2.0 * count
/ elapsed_time / (1 << 30);
fprintf(stderr, "elapsed time : %.3f (s)\n", elapsed_time);
fprintf(stderr, "flops : %.3f (MFlops)\n", mflops);
fprintf(stderr, "throughput : %.3f (GB/s)\n", thput);
fprintf(stderr, "accuracy : %e\n", err);
free(answer);
free(f1);
free(f2);
return 0;
}
In your first case, the -c flag only compiles to an object file, not an actual executable. Since object files don't normally have the executable bit set (because they're not directly executable), you get the Permission denied error.
In your second case, it's because the order in which you specify -l libraries is important. You need to move -lm after your prog.c file, like this:
gcc -o prog -fopenmp prog.c -lm

Golden Section Routine Segmentation Fault

I'm trying to find minimum point of Gamma function by Golden Section method. But when I execute the program I get segmentation fault error. I think since I'm a newbie C user, the problem may be due to calling the function Min_Search_Golden_Section wrong. Here is my complete code. I can't find my mistake. Thanks in advance.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#define A 12
#define sqrt5 2.236067977499789696
static int Stopping_Rule(double x0, double x1, double tolerance);
double sp_gamma(double z)
{
const int a = A;
static double c_space[A];
static double *c = NULL;
int k;
double accm;
if ( c == NULL ) {
double k1_factrl = 1.0; /* (k - 1)!*(-1)^k with 0!==1*/
c = c_space;
c[0] = sqrt(2.0*M_PI);
for(k=1; k < a; k++) {
c[k] = exp(a-k) * pow(a-k, k-0.5) / k1_factrl;
k1_factrl *= -k;
}
}
accm = c[0];
for(k=1; k < a; k++) {
accm += c[k] / ( z + k );
}
accm *= exp(-(z+a)) * pow(z+a, z+0.5); /* Gamma(z+1) */
return accm/z;
}
void Min_Search_Golden_Section( double (*f)(double), double* a, double *fa,
double* b, double* fb, double tolerance)
{
static const double lambda = 0.5 * (sqrt5 - 1.0);
static const double mu = 0.5 * (3.0 - sqrt5); // = 1 - lambda
double x1;
double x2;
double fx1;
double fx2;
// Find first two internal points and evaluate
// the function at the two internal points.
x1 = *b - lambda * (*b - *a);
x2 = *a + lambda * (*b - *a);
fx1 = f(x1);
fx2 = f(x2);
// Verify that the tolerance is an acceptable number
if (tolerance <= 0.0) tolerance = sqrt(DBL_EPSILON) * (*b - *a);
// Loop by exluding segments from current endpoints a, b
// to current internal points x1, x2 and then calculating
// a new internal point until the length of the interval
// is less than or equal to the tolerance.
while ( ! Stopping_Rule( *a, *b, tolerance) ) {
if (fx1 > fx2) {
*a = x1;
*fa = fx1;
if ( Stopping_Rule( *a, *b, tolerance) ) break;
x1 = x2;
fx1 = fx2;
x2 = *b - mu * (*b - *a);
fx2 = f(x2);
} else {
*b = x2;
*fb = fx2;
if ( Stopping_Rule( *a, *b, tolerance) ) break;
x2 = x1;
fx2 = fx1;
x1 = *a + mu * (*b - *a);
fx1 = f(x1);
}
}
return;
}
int main()
{
double x;
double a = 0.0, b = 4.0, fa = 0.00001, fb = 6.0;
double fx = sp_gamma(x);
Min_Search_Golden_Section( &fx, &a, &fa, &b, &fb, 0.0000001);
return 0;
}
static int Stopping_Rule(double x0, double x1, double tolerance)
{
double xm = 0.5 * fabs( x1 + x0 );
if ( xm <= 1.0 ) return ( fabs( x1 - x0 ) < tolerance ) ? 1 : 0;
return ( fabs( x1 - x0 ) < tolerance * xm ) ? 1 : 0;
}
You should be getting a compiler error. The first argument to Min_Search_Golden_Section should be a function pointer, but you pass the address of a variable instead.
When you get compiler errors, fix them - don't run the program and hope. :)
I guess you just meant to write:
Min_Search_Golden_Section( &sp_gamma, &a, &fa, &b, &fb, 0.0000001);

Resources