I'm trying to implement polynomials in C but Im having an issue with arrays and setting values. Im bad at C, please explain why this is happening: I run this, it says that p.coefs[1] is 0.0 instead of 3.0 as intended.
#include <stdio.h>
#include <assert.h>
int main()
{
#define MAX_DEG 10
typedef struct Polynomial Polynomial;
struct Polynomial {
int deg;
double coefs[MAX_DEG];
};
Polynomial ply_create(int deg) {
assert(deg >= 0 && deg <= MAX_DEG);
Polynomial poly;
poly.deg = deg;
return poly;
}
void ply_set_coef(Polynomial poly, int i, double val) {
poly.coefs[i] = val;
}
Polynomial p = ply_create(1);
p.coefs[0] = 1.0;
ply_set_coef(p, 1, 3.0);
printf("p.coefs[0] is %f and p.coefs[1] is %f", p.coefs[0], p.coefs[1]);
return 0;
}
I was previously using malloc and made p.coefs a pointer to a double. In this case I did not have any problem.
Related
I need to sum two complex numbers (c1,c2) and then express the result in its polar form.
I don't really know how to access the result for c1+c2, I mean I store them in the variable "result" but when I try to access them I find myself in the ComplexPolar structure and so I can't access the result.real and result.img to calculate magnitude and angle:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
struct ComplexCartesian
{
float real;
float img;
};
struct ComplexPolar
{
float magnitude;
float angle;
};
struct ComplexPolar add_two_complex(struct ComplexCartesian c1, struct ComplexCartesian c2, struct ComplexPolar result)
{
result.real= c1.real+c2.real;
result.img=c1.img+c2.img;
result.magnitude= sqrt((result.real)^2 + (result.img)^2);
result.angle= atan2(result.img, result.real);
}
^2 is not how you square in C, you have to either multiply the number by itself or use libc pow function.
^2 is a XOR operation where you aim to toggle the second bit, but in your case you are using it on a float which violates the strict aliasing rule and cause undefined behavior (on top of not being what you seek).
See the code below with some comments:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
struct ComplexCartesian
{
float real;
float img;
};
struct ComplexPolar
{
float magnitude;
float angle;
};
struct ComplexPolar polar_from_cartesian_sum(struct ComplexCartesian c1, struct ComplexCartesian c2)
{
struct ComplexPolar complexPolar; // here you declare the variable of your ComplexPolar struct
c1.real += c2.real; // you don't need to have a result var, you can just reuse c1.
c1.img += c2.img;
complexPolar.magnitude = sqrt(c1.real * c1.real + c1.img * c1.img);
complexPolar.angle = atan2(c1.img, c1.real);
return complexPolar; // you return the value;
}
int main(void) {
struct ComplexCartesian c1 = {0.12f, 0.15f};
struct ComplexCartesian c2 = {0.42f, 1.15f};
struct ComplexPolar complexPolar = polar_from_cartesian_sum(c1, c2);
printf("%f %f\n", complexPolar.magnitude, complexPolar.angle);
return 0;
}
Compile with gcc complex.c -lm && ./a.out
Output:
1.407693 1.177098
NB: Perhaps you should explicitly tell that your angle is expressed in radians, and also rename your function as polar_from_cartesian_sum
Radius = 1.41
θ = 67.44o = 1.18 radians
I am using the montecarlo method as implemented in the gsl library. I need to compute many repetitions of this integral changing a parameter in the integrand. So I need to make my subroutine fast. It seems that the most time consuming part is the evaluation of the integrand at the random points. How could I make the evaluation faster in my specific case?
Here is a minimal example:
#include <gsl/gsl_rng.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_monte.h>
#include <gsl/gsl_monte_plain.h>
#include <gsl/gsl_monte_vegas.h>
double q=0.0;
double mu=0.001;
double eta=0.1;
double kF=1.0;
double Kcut=10;
long int Nmax=10000000;
int Nwu=1000000;
double w=1;
struct my_f_params { double y;};
double
g (double *k, size_t dim, void *p)
{
double A;
struct my_f_params * fp = (struct my_f_params *)p;
double PQ=q*q+k[1]*k[1]-2*q*k[1]*cos(k[3])+mu;
double QK=k[0]*k[0]+k[1]*k[1]-2*k[0]*k[1]* (cos(k[2])*cos(k[3])+cos(k[4])*sin(k[2])*sin(k[3]))+mu;
double KPQ=q*q+k[0]*k[0]+k[1]*k[1]+2*k[0]*cos(k[2])*(q-k[1]*cos(k[3]))+2*k[1]* (q*cos(k[3])+k[0]*cos(k[4])*sin(k[2])*sin(k[3]));
double denFreq=fp->y-0.5*(k[0]*k[0]+k[1]*k[1]+KPQ);
double vol=k[0]*k[0]*k[1]*k[1]*sin(k[2])*sin(k[3]);
if (sqrt(KPQ) < kF) {
A = vol*denFreq*(1/QK-1/PQ)/(QK*(pow(denFreq,2)+eta*eta));
}
else {
A = 0;
}
return A;
}
int
main (void)
{
double res, err;
double xl[5] = {0, kF, 0, 0, 0};
double xu[5] = {kF, Kcut, M_PI, M_PI, 2*M_PI};
const gsl_rng_type *T;
gsl_rng *r;
gsl_monte_function G;
size_t calls = Nmax;
gsl_rng_env_setup ();
struct my_f_params params;
T = gsl_rng_default;
r = gsl_rng_alloc (T);
params.y=w;
G.f=&g;
G.dim=5;
G.params=¶ms;
{
gsl_monte_vegas_state *s = gsl_monte_vegas_alloc (5);
gsl_monte_vegas_integrate (&G, xl, xu, 5, Nwu, r, s,&res, &err);
do
{
gsl_monte_vegas_integrate (&G, xl, xu, 5, calls/5, r, s,&res, &err);
}
while (fabs (gsl_monte_vegas_chisq (s) - 1.0) > 0.5);
gsl_monte_vegas_free (s);
}
printf ("%.6f %.6f %.6f\n", w,res,err);
gsl_rng_free (r);
return 0;
}
Expanding on Bob__'s comment, you can use sincos to compute the sin and cos of the same argument (k[2] and k[3]), and define a kF_sqr to be initialised in the main and use that in the g function to avoid the sqrt call. With these optimisations, a quick & dirty test on my machine showed a ~5% speed-up over your code.
im trying to calculate the fraction with structs, the compiler says that initializer list cannot be converted, whats actually the issue? here is my code
#include <stdio.h>
struct fraction{
int z = 0;
int n = 1;
};
struct fraction addition(struct fraction b1, struct fraction b2) {
struct fraction result;
result.z = b1.z*b2.n + b2.z*b1.n;
result.n = b1.n*b2.n;
return result;
}
void Print(struct fraction b) {
printf("%d/%d\n", b.z, b.n);
}
int main() {
int i;
struct fraction b1 = { 1,1 }, b2 = { 1,2 };
try {
for (i = 1; i <= 6; i++) {
Print(addition(b1, b2));
}
}
catch (int exception) {
printf("Program closed!");
}
}
the for loop by the way is harmonic series but im not done with it yet. thanks for help in advance
Remove assigned values from struct (for C++11 and below):
struct fraction{
int z;
int n;
};
Now it compiles fine, tested with g++ 5.2.1 (should get the job done for other compilers too).
When I added option -std=c++14 your code compiled just fine without any changes.
When you use C++, you don't need to use struct fraction. You can but you don't need to. You can use just fraction.
Coming to the problematic line:
struct fraction b1 = { 1,1 }, b2 = { 1,2 };
You can use:
fraction b1{ 1,1 };
fraction b2{ 1,2 };
or
fraction b1 = fraction{ 1,1 }, b2 = fraction{ 1,2 };
from http://en.cppreference.com/w/cpp/language/aggregate_initialization
I read that user defined constructors will stop you from initializing with braces and, I guess, initializing your variables in the struct will act like an user defined constructor.
no user-provided constructors (explicitly defaulted or deleted constructors are allowed) (since C++11)
so writing just :
struct fraction{
int z;
int n;
};
should solve the problem
here is my idea how you could solve the problem
#include "stdafx.h"
int euclid( int a, int b );
struct fraction {
int z;
int n;
};
typedef struct fraction Bruch;
Bruch add( Bruch b1, Bruch b2 ) {
Bruch r;
r.z = b1.z * b2.n + b2.z * b1.n;
r.n = b1.n * b2.n;
return r;
}
void print( Bruch b ) {
printf("%d / %d \n", b.z, b.n );
}
void test() {
Bruch eins = {1,1};
Bruch halb = {1,2};
printf(" ---------------- structure---------------- \n" );
Bruch sum = add( eins, halb );
print(sum);
sum.z = 0;
sum.n = 1;
Bruch summand = {1,1};
for( int n=1; n<=6; ++n ) {
summand.nenner = n;
sum = add( sum, summand );
print(sum);
}
}
I wrote a C code that I would like to parallelize using OpenMP (I am a beginner and I have just a few days to solve this task); let's start from the main: first of all I have initialized 6 vectors (Vx,Vy,Vz,thetap,phip,theta); then there is a for loop that cycles over Nmax; inside of this loop I allocate some memory for the structure I have defined at the very top of the code; the structure is called coll_CPU and increases its size every cycle; then I pick some of the values from the vectors I have mentioned before and I place them into the structure; so at this point my structure coll_CPU is filled with Ncoll elements; during this process I used some of the functions declared outside of the main (these functions are random number generators). Now comes the important part: in my serial code I use a for loop to pass every single element of the structure to a function called collisionCPU (this function just gets the inputs and multiplies them by 2); My goal is to parallelize this loop so that each of my CPUs gives its contribution to do this operation and speed up the process.
Here are the codes:
main.c
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <memory.h>
#include <string.h>
#include <time.h>
#include <omp.h>
#define pi2 6.283185307
#define pi 3.141592654
#define IMUL(a,b) __mul24(a,b)
typedef struct {
int seme;
} iniran;
typedef struct{
int jp1;
int jp2;
float kx;
float ky;
float kz;
float vAx;
float vAy;
float vAz;
float vBx;
float vBy;
float vBz;
float tetaAp;
float phiAp;
float tetaA;
float tetaBp;
float phiBp;
float tetaB;
float kAx;
float kAy;
float kAz;
float kBx;
float kBy;
float kBz;
int caso;
} stato_struct;
stato_struct *coll_CPU=0;
unsigned int timer;
#include "DSMC_kernel_float.c"
//=============================================================
float min(float *a, float*b){
if(*a<*b){
return *a;
}
else{
return *b;
}
}
//=============================================================
float max(float *a, float*b){
if(*a>*b){
return *a;
}
else{
return *b;
}
}
//=============================================================
float rf(int *idum){
static int iff=0;
static int inext, inextp, ma[55];
int mj, mk;
int i, k, ii;
float ret_val;
if (*idum<0 || iff==0) {
iff=1;
mj=161803398 - abs(*idum);
mj %= 1000000000;
ma[54]=mj;
mk=1;
for (i=1; i<=54; ++i){
ii=(i*21)%55;
ma[ii-1]=mk;
mk=mj-mk;
if (mk<0) {
mk += 1000000000;
}
mj= ma[ii-1];
}
for(k=1; k<=4; ++k) {
for(i=1; i<=55; ++i){
ma[i-1] -= ma[(i+30)%55];
if (ma[i-1]<0){
ma[i-1] += 1000000000;
}
}
}
inext=0;
inextp=31;
*idum=1;
}
++inext;
if (inext==56){
inext=1;
}
++inextp;
if (inextp==56){
inextp=1;
}
mj=ma[inext-1]-ma[inextp-1];
if (mj<0){
mj += 1000000000;
}
ma[inext-1]=mj;
ret_val=mj*1.0000000000000001e-9;
return ret_val;
}
//============================================================
int genk(float *kx, float *ky, float *kz, int *p2seme){
// float sqrtf(float), sinf(float), cosf(float);
extern float rf(int *);
static float phi;
*kx=rf(p2seme) * 2. -1.f;
*ky= sqrtf(1. - *kx * *kx);
phi=pi2*rf(p2seme);
*kz=*ky * sinf(phi);
*ky *= cosf(phi);
return 0;
}
//==============================================================
int main(void){
float msec_kernel;
int Np=10000, Nmax=512;
int id,jp,jcoll,Ncoll,jp1, jp2, ind;
float Vx[Np],Vy[Np],Vz[Np],teta[Np],tetap[Np],phip[Np];
float kx, ky, kz, Vrx, Vry, Vrz, scalprod, fk;
float kAx, kAy, kAz, kBx, kBy, kBz;
iniran1.seme=7593;
for(jp=1;jp<=Np;jp++){
if(jp<=Np/2){
Vx[jp-1]=2.5;
Vy[jp-1]=0;
Vz[jp-1]=0;
tetap[jp-1]=0;
phip[jp-1]=0;
teta[jp-1]=0;
}
for (Ncoll=1;Ncoll<=Nmax;Ncoll += 10){
coll_CPU=(stato_struct*) malloc(Ncoll*sizeof(stato_struct));
jcoll=0;
while (jcoll<Ncoll){
jp1=1+floorf(Np*rf(&iniran1.seme));
jp2=1+floorf(Np*rf(&iniran1.seme));
genk(&kx,&ky,&kz,&iniran1.seme);
Vrx=Vx[jp2-1]-Vx[jp1-1];
Vry=Vy[jp2-1]-Vy[jp1-1];
Vrz=Vz[jp2-1]-Vz[jp1-1];
scalprod=Vrx*kx+Vry*ky+Vrz*kz;
if (scalprod<0) {
genk(&kAx,&kAy,&kAz,&iniran1.seme);
genk(&kBx,&kBy,&kBz,&iniran1.seme);
coll_CPU[jcoll].jp1= jp1;
coll_CPU[jcoll].jp2=jp2;
coll_CPU[jcoll].kx=kx;
coll_CPU[jcoll].ky=ky;
coll_CPU[jcoll].kz=kz;
coll_CPU[jcoll].vAx=Vx[jp1-1];
coll_CPU[jcoll].vAy=Vy[jp1-1];
coll_CPU[jcoll].vAz=Vz[jp1-1];
coll_CPU[jcoll].vBx=Vx[jp2-1];
coll_CPU[jcoll].vBy=Vy[jp2-1];
coll_CPU[jcoll].vBz=Vz[jp2-1];
coll_CPU[jcoll].tetaAp=tetap[jp1-1];
coll_CPU[jcoll].phiAp=phip[jp1-1];
coll_CPU[jcoll].tetaA=teta[jp1-1];
coll_CPU[jcoll].tetaBp=tetap[jp2-1];
coll_CPU[jcoll].phiBp=phip[jp2-1];
coll_CPU[jcoll].tetaB=teta[jp2-1];
coll_CPU[jcoll].kAx=kAx;
coll_CPU[jcoll].kAy=kAy;
coll_CPU[jcoll].kAz=kAz;
coll_CPU[jcoll].kBx=kBx;
coll_CPU[jcoll].kBy=kBy;
coll_CPU[jcoll].kBz=kBz;
coll_CPU[jcoll].caso=1;
jcoll++;
}
}
clock_t t;
t = clock();
#pragma omp parallel for private(id) //HERE IS WHERE I TRIED TO DO THE PARALLELIZATION BUT WITH NO SUCCESS. WHAT DO I HAVE TO TYPE INSTEAD???
for(id=0;id<Nmax;id++){
CollisioniCPU(coll_CPU,id);
}
t = clock() - t;
msec_kernel = ((float)t*1000)/CLOCKS_PER_SEC;
printf("Tempo esecuzione kernel:%e s\n",msec_kernel*1e-03);
for (ind=0;ind<Ncoll;ind++){
if (coll_CPU[ind].caso==4)
Ncoll_eff++;
else if (coll_CPU[ind].caso==0)
Ncoll_div++;
else
Ncoll_dim++;
}
free(coll_CPU);
}
return 0;
}
DSMC_kernel_float.c
void CollisioniCPU(stato_struct *coll_CPU, int id){
float vettA[6], vettB[6];
vettA[0]=coll_CPU[id].vAx;
vettA[1]=coll_CPU[id].vAy;
vettA[2]=coll_CPU[id].vAz;
vettA[3]=coll_CPU[id].tetaAp;
vettA[4]=coll_CPU[id].phiAp;
vettA[5]=coll_CPU[id].tetaA;
vettB[0]=coll_CPU[id].vBx;
vettB[1]=coll_CPU[id].vBy;
vettB[2]=coll_CPU[id].vBz;
vettB[3]=coll_CPU[id].tetaBp;
vettB[4]=coll_CPU[id].phiBp;
vettB[5]=coll_CPU[id].tetaB;
coll_CPU[id].vAx=2*vettA[0];
coll_CPU[id].vAy=2*vettA[1];
coll_CPU[id].vAz=2*vettA[2];
coll_CPU[id].tetaAp=2*vettA[3];
coll_CPU[id].phiAp=2*vettA[4];
coll_CPU[id].tetaA=2*vettA[5];
coll_CPU[id].vBx=2*vettB[0];
coll_CPU[id].vBy=2*vettB[1];
coll_CPU[id].vBz=2*vettB[2];
coll_CPU[id].tetaBp=2*vettB[3];
coll_CPU[id].phiBp=2*vettB[4];
coll_CPU[id].tetaB=2*vettB[5];
}
In order to compile the program I type this line on the terminal: gcc -fopenmp time_analysis.c -o time_analysis -lm fallowed by export OMP_NUM_THREADS=1; however once I run the executable I get this error message:
Error in `./time_analysis': double free or corruption (!prev): 0x00000000009602c0 ***
Aborted
What does this error mean? what I have done wrong in the main function when I tried to parallelize the for loop? and most important: what should I type instead in order to make my code go on parallel? please help me out if you can because I seriously have no time to study OpenMP from scratch and I need to get this job done right away.
Changing the inner loop as follows should bring you one step further.
#pragma omp parallel for private(id)
for(id=0;id<Ncoll;id++){
CollisioniCPU(coll_CPU,id);
}
Your OpenMP line seems okay, but I doubt that it will lead to significant improvements in runtime. You should optimize the surrounding code as well. Allocating the memory once outside of your loops would be a good start.
By the way, is there any reason for this verbose coding style and not using a more compact and readable version as this one?
void CollisioniCPU(stato_struct *coll_CPU, int id) {
stato_struct *ptr = coll_CPU + id;
ptr->vAx *= 2;
ptr->vAy *= 2;
ptr->vAz *= 2;
ptr->tetaAp *= 2;
ptr->phiAp *= 2;
ptr->tetaA *= 2;
ptr->vBx *= 2;
ptr->vBy *= 2;
ptr->vBz *= 2;
ptr->tetaBp *= 2;
ptr->phiBp *= 2;
ptr->tetaB *= 2;
}
the formula is pretty complicated. the numerator is num and the denominator is den, in the formula there is a root on the denominator so i have putted den in sqrrt() but sqrrt only accepts doubles
#include<stdio.h>
#include<conio.h>
#include<math.h>
#define LEN 11
// for the following set of x and y find r by the formula ..
float sum(float arr[]);
void main(void)
{ int i;
float x[]={43.22,39.87,41.85,43.23,40.06,53.29,53.29,54.14,49.12,40.71,55.15};
float y[]={102.43,100.93,97.43,97.81,98.32,98.32,100.07,97.08,91.59,94.85,94.6};
float num,den[LEN],r[LEN],xy[LEN],x2[LEN],y2[LEN];
for(i=0;i<LEN;i++)
{
x2[i]=x[i]*x[i];
y2[i]=y[i]*y[i];
xy[i]=x[i]*y[i];
}
num=sum(xy)-sum(x)*sum(y);
for(i=0;i<LEN;i++)
{
den[i]=((LEN*sum(x2)-(sum(x))*(sum(x)))*(LEN*sum(y2))-(sum(y2))*(sum(y2)));
r[i]=num /sqrt(den); /*<----------the problem is here-----> */
}
printf("%f",r);
getch();
}
float sum(float arr[])
{
int i;
float total=0;
for(i=0;i<=LEN;i++)
{
total+=arr[i];
}
return total;
}
Out of sheer boredom I have fixed your code. It is still ugly and extremely inefficient but compiles and should work. I'll leave you or someone else to make it decent.
#include <stdio.h>
#include <math.h>
#define LEN 11
// for the following set of x and y find r by the formula ..
float sum(float arr[]);
int main(void)
{ int i;
float x[]={43.22,39.87,41.85,43.23,40.06,53.29,53.29,54.14,49.12,40.71,55.15};
float y[]={102.43,100.93,97.43,97.81,98.32,98.32,100.07,97.08,91.59,94.85,94.6};
float num,den,r[LEN],xy[LEN],x2[LEN],y2[LEN];
for(i=0;i<LEN;i++)
{
x2[i]=x[i]*x[i];
y2[i]=y[i]*y[i];
xy[i]=x[i]*y[i];
}
num=LEN*sum(xy)-sum(x)*sum(y);
den = (LEN*sum(x2)) - sum(x)*sum(x);
float alpha = sum(y)/LEN - (num/den)*sum(x)/LEN;
printf("beta = %f, alpha = %f\n", num/den, alpha);
for(i=0;i<LEN;i++)
{
float term = y[i] - alpha - (num/den)*x[i];
r[i] = (term*term);
printf("%f",r[i]);
}
}
float sum(float arr[])
{
int i;
float total=0;
for(i=0;i<=LEN;i++)
{
total+=arr[i];
}
return total;
}
To be consistent with the rest of the code, you should presumably be writing:
r[i] = num / sqrt(den[i]);
However, the calculation is not one I recognize. The body of the second loop is going to produce the same result for each value in den and therefore also in r, which is probably not what the question asked for.
You need to give the index den[i] at the denominator....instead in your code you have just passed the base address!
r[i]=num /sqrt(den[i]);
If this is what you want to achieve, which is quite unclear.