OpenMP paralleling GSL Ordinary Differential Equations calculation - c

I'm trying to parallelize my code, but i got errors. I need to calc a Cauchy problem (it's already done) but than i need to parallelize it using OpenMP lib.
I've tried to write some code with OpenMP, but it's not working.
I've created a struct to collect result.
struct Dots {
double par;
double x;
double y;
};
This is my target function with parameter.
int ode_func (double x, const double y[], double f[], void *params)
{
double mu = *(int *)params;
f[0] = x + 2 * y[0] / (1 + mu * mu);
return GSL_SUCCESS;
}
This is the main function. I currently didn't find a way how to create a array of arrays of struct, but this is not the main problem.
void calc_cauchy_problem(struct Dots ArrayOfDots[], double x_start, double x_end, double y_start,
int count) {
int dim = 1;
double x = x_start;
double y[1] = {y_start};
int mu = 5;
int param = 0;
gsl_odeiv2_system sys = {ode_func, NULL, dim, &param};
gsl_odeiv2_driver * d = gsl_odeiv2_driver_alloc_y_new (&sys,
gsl_odeiv2_step_rkf45, 1e-6, 1e-6, 0.0);
int status = 0;
#pragma omp parallel for shared(ArrayOfDots) private(sys, param, d, status)
for (int param = 1; param < mu; param++) {
gsl_odeiv2_system sys = {ode_func, NULL, dim, &param};
gsl_odeiv2_driver * d = gsl_odeiv2_driver_alloc_y_new (&sys,
gsl_odeiv2_step_rkf45, 1e-6, 1e-6, 0.0);
for (int i = 1; i <= count; i++)
{
double xi = x_start + i * (x_end - x_start) / count;
int status = gsl_odeiv2_driver_apply(d, &x, xi, y);
if (status != GSL_SUCCESS)
{
printf ("error, return value=%d\n", status);
break;
}
// ArrayOfDots[i].par = mu;
// ArrayOfDots[i].x = xi;
// ArrayOfDots[i].y = y[0];
}
gsl_odeiv2_driver_free (d);
}
}
The main
int main() {
double x_start = 0;
double x_end = 10;
double y_start = 0;
int count = 10;
struct Dots ArrayOfDots[count];
calc_cauchy_problem(ArrayOfDots, x_start, x_end, y_start, count);
return 0;
}
It's compiled successfully with this gcc main.c -o main -fopenmp -lgsl -std=gnu11 but when i launch it i got error
gsl: driver.c:354: ERROR: integration limits and/or step direction not consistent
Default GSL error handler invoked.
I think that the main problem with this #pragma omp parallel for shared(ArrayOfDots) private(sys, param, d, status) but i have no idea how to rewrite this in the other way.
Thanks for your responses.
UPD:
With Kaveh Vahedipour help my code partially start to work. It means that half of my for cycle start to work.
UPD UPD:
After another investigations i had the following code:
It's compile and run, but i got Process finished with exit code 4 and printf("Elapsed time = %f\n", omp_get_wtime() - start_time); don't print anything.
struct Dots {
double par;
double x;
double y;
};
int ode_func (double x, const double y[], double f[], void *params)
{
double mu = *(int *)params;
f[0] = (x + 2 * y[0]) / (1 + mu * mu);
return GSL_SUCCESS;
}
void calc_cauchy_problem(double x_start, double x_end, double y_start,
int count, int param1, int param2) {
int dim = 1;
double x = x_start;
double y[1] = {y_start};
int param = param1;
int j = 0;
int status = 0;
char filename[10];
#pragma omp parallel for private(param, status, x, y)
for (param = param1; param <= param2; param++) {
struct Dots ArrayOfDots[count];
gsl_odeiv2_system sys = {ode_func, NULL, dim, &param};
gsl_odeiv2_driver * d =
gsl_odeiv2_driver_alloc_y_new (&sys, gsl_odeiv2_step_rkf45, 1e-6, 1e-6, 0.0);
for (int i = 1; i <= count; i++) {
double xi = x_start + i * (x_end - x_start) / count;
int status = gsl_odeiv2_driver_apply(d, &x, xi, y);
if (status != GSL_SUCCESS)
{
printf ("error, return value=%d\n", status);
break;
}
ArrayOfDots[i].par = param;
ArrayOfDots[i].x = xi;
ArrayOfDots[i].y = y[0];
}
gsl_odeiv2_driver_free (d);
}
}
int main() {
double start_time = omp_get_wtime();
double x_start = 0;
double x_end = 10;
double y_start = 0;
const int count = 500;
int param1 = 1;
int param2 = 10;
calc_cauchy_problem(x_start, x_end, y_start, count, param1, param2);
printf("Elapsed time = %f\n", omp_get_wtime() - start_time);
return 0;
}

Add x to private loop vars: private(sys, param, d, status, x). Please get back to me, if you still experience issues.
void calc_cauchy_problem(double x_start, double x_end, double y_start,
int count, int param1, int param2) {
int dim = 1;
double x = x_start;
double y[1] = {y_start};
int param = param1;
int j = 0;
int status = 0;
char filename[10];
#pragma omp parallel for private(param, status, x, y)
for (param = param1; param <= param2; param++) {
struct Dots ArrayOfDots[count];
gsl_odeiv2_system sys = {ode_func, NULL, dim, &param};
gsl_odeiv2_driver * d =
gsl_odeiv2_driver_alloc_y_new (&sys, gsl_odeiv2_step_rkf45, 1e-6, 1e-6, 0.0);
for (int i = 1; i <= count; i++) {
double xi = x_start + i * (x_end - x_start) / count;
int status = gsl_odeiv2_driver_apply(d, &x, xi, y);
if (status != GSL_SUCCESS)
{
printf ("error, return value=%d\n", status);
break;
}
ArrayOfDots[i].par = param;
ArrayOfDots[i].x = xi;
ArrayOfDots[i].y = y[0];
}
//write_data_to_file(param, count, ArrayOfDots);
for (int i = 0; i < count; ++i) {
printf ("%d: %f, %f, %f\n", omp_get_thread_num(),
ArrayOfDots[i].par, ArrayOfDots[i].x, ArrayOfDots[i].y);
}
gsl_odeiv2_driver_free (d);
}
}

Seems like this version works fine. I think problem was with this struct Dots ArrayOfDots[count]; and when i try to push values to this struct.
ArrayOfDots[i].par = param;
ArrayOfDots[i].x = xi;
ArrayOfDots[i].y = y[0];
Here is the full code.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>
// GSL lib includes
#include <gsl/gsl_sf_bessel.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_odeiv2.h>
int ode_func (double x, const double y[], double f[], void *params)
{
double mu = *(int *)params;
f[0] = (x + 2 * y[0]) / (1 + mu * mu);
return GSL_SUCCESS;
}
void calc_cauchy_problem(double x_start, double x_end, double y_start,
int count, int param1, int param2) {
#pragma omp parallel for
for(int param = param1; param < param2; param++) {
gsl_odeiv2_system sys = {ode_func, NULL, 1, &param};
gsl_odeiv2_driver * d =
gsl_odeiv2_driver_alloc_y_new (&sys, gsl_odeiv2_step_rk8pd,
1e-6, 1e-6, 0.0);
int i;
double x = x_start, x1 = x_end;
double y[1] = { y_start };
for (i = 1; i <= count; i++)
{
double xi = i * x1 / count;
int status = gsl_odeiv2_driver_apply (d, &x, xi, y);
if (status != GSL_SUCCESS)
{
printf ("error, return value=%d\n", status);
break;
}
// printf ("%d %d %.5e %.5e\n", omp_get_thread_num(), param, x, y[0]);
}
gsl_odeiv2_driver_free (d);
}
}
int main() {
double start_time = omp_get_wtime();
double x_start = 0;
double x_end = 10;
double y_start = 0;
const int count = 100000;
int param1 = 1;
int param2 = 20;
calc_cauchy_problem(x_start, x_end, y_start, count, param1, param2);
printf("Elapsed time = %f\n", omp_get_wtime() - start_time);
return 0;
}
Really thanks to Kaveh Vahedipour.

Related

How do we import C code outputs to be used in matlab for visualisation

i have a C code that finds the solution of a function using the methode of bisection and Newton Raphson, and i want to compare the results using graphs ( i am asked to do so in matlab as it's a school project ), but i have no idea how.
here's my code :
`
#include <stdio.h>
#include <math.h>
#include<stdio.h>
#include<math.h>
float F0 (float V)
{
float J0 = 1e-15;
float n = 0.68;
float V0 = 0.025;
int E = 1;
int R = 100;
return (E-V-R*J0*(exp(n*V/V0)-1));
}
float F1 (float V)
{
float J0 = 1e-15;
float n = 0.68;
float V0 = 0.025;
int E = 1;
int R = 100;
return (-1-n*R*J0/V0*exp(n*V/V0));
}
void Dichotomie (float *V, float a, float b, int *itr)
{
*V=(a+b)/2;
++(*itr);
printf("Iteration no. %3d V = %7.5f\n", *itr, *V);
}
void Newton(int itr, int maxmitr, float h, float V0, float V1, float err)
{
for (itr=1; itr<=maxmitr; itr++)
{
h=F0(V0)/F1(V0);
V1=V0-h;
printf("Iteration no. %3d, V = %9.6f\n", itr, V1);
if (fabs(h) < 2*err)
{
printf("After %3d iterations, root = %8.6f\n", itr, V1);
return;
}
V0=V1;
}
printf(" The required solution does not converge or iterations are insufficient\n");
return;
}
int main ()
{
float J0 = 1e-15;
float n = 0.68;
float V0 = 0.025;
int E = 1;
int R = 100;
int itr = 0, maxmitr;
float V, a, b, err, V1;
float h;
a = 0;
b = 1;
err = 0.000001;
maxmitr = 100;
Newton(itr, maxmitr,h,V0,V1,err);
Dichotomie (&V, a, b, &itr);
do
{
if (F0(a)*F0(V) < 0)
b=V;
else
a=V;
Dichotomie (&V1, a, b, &itr);
if (fabs(V1-V) < 2*err)
{
printf("After %d iterations, root = %6.6f\n", itr, V1);
return 0;
}
V=V1;
}
while (itr < maxmitr);
printf("The solution does not converge or iterations are not sufficient");
return 1;
}
`
I read some documentations about this in the Matlab website, and i found that there is a function block in Simulink to be used, but i have no idea how Simulink works.

Perceptron in C fails to train

Trying to implement a perceptron in C, can't get it to train.
The output always just goes to zero and I don't know what’s wrong.
Although, I do suspect it might be either the delta function or me just incorrectly implementing the perceptron.
Thanks in advance to everyone who helps!
#include<stdio.h>
#define arrayLength(x) (sizeof(x) / sizeof((x)[0]))
typedef int bool;
enum { false, true };
int main(){
float trainInputs [2][2] = {{0.0f, 1.0f}, {0.0f, 0.0f}};
float trainOutputs [2][1] = {{1.0f}, {0.0f}};
int amontOfTrainData = 1;
float inputs [] = {0.0f, 1.1f};
float outputs [] = {0.0f};
float wights [(arrayLength(inputs) * arrayLength(outputs))] = {0.5f, 0.5f, 0.5f, 0.5f};
float learningRate = 0.01f;
float delta(float actual, float want, float wight){
float error = want - actual;
float out = error * learningRate * wight;
printf(":%.6f:\n", out);
return out;
}
// Run perceptron
void run(bool train){
int outputInc = 0;
int wightInc = 0;
while(outputInc < arrayLength(outputs)){
int inputInc = 0;
while(inputInc < arrayLength(inputs)){
if(train){
int x = 0;
while(x < amontOfTrainData){
outputs[outputInc] = trainInputs[x][inputInc] * wights[wightInc];
wights[wightInc] = delta(outputs[outputInc], trainOutputs[x][outputInc], wights[wightInc]);
x++;
}
}else{
outputs[outputInc] = inputs[inputInc] * wights[wightInc];
}
inputInc++;
wightInc++;
}
//printf("out[%i]: %.5f\n", outputInc, outputs[outputInc]);
outputInc++;
}
}
int b = 0;
while(b < 100){
run(true);
b++;
}
printf("-----------[ 100 LOOPS DONE ]-----------\n");
run(false);
return 0;
}
As error, learningRate and wight are less than 0, the expression error * learningRate * wight will tend to 0 too.
The delta should not be the new value for the weight, it is the amount of change, so instead of:
wights[wightInc] = delta(...);
Try:
wights[wightInc] += delta(...);
(Which source are you using for you perceptron formulas?)
I have applied all the changes and this is the final working code.
Thanks to everyone who helped me!
#include<stdio.h>
#include<stdbool.h>
#define arrayLength(x) (sizeof(x) / sizeof((x)[0]))
float trainInputs [2][2] = {{0.0f, 1.0f}, {0.0f, 0.0f}};
float trainOutputs [2][1] = {{1.0f}, {0.0f}};
int amontOfTrainData = 1;
float inputs [] = {1.0f, 1.0f};
float outputs [] = {0.0f};
float wights [(arrayLength(inputs) * arrayLength(outputs))] = {0.001f, 0.001f};
float learningRate = 0.1f;
float delta(float actual, float want, float wight)
{
float error = want - actual;
float out = error * learningRate * wight;
return out;
}
void run(bool train)
{
int outputInc = 0;
int wightInc = 0;
while(outputInc < arrayLength(outputs))
{
int inputInc = 0;
while(inputInc < arrayLength(inputs))
{
if(train)
{
int x = 0;
while(x < amontOfTrainData)
{
outputs[outputInc] = trainInputs[x][inputInc] * wights[wightInc];
wights[wightInc] += delta(outputs[outputInc], trainOutputs[x][outputInc], wights[wightInc]);
x++;
}
}
else
{
outputs[outputInc] = inputs[inputInc] * wights[wightInc];
}
inputInc++;
wightInc++;
}
printf("out[%i]: %.5f\n", outputInc, outputs[outputInc]);
outputInc++;
}
}
int main()
{
// Run neural network
int b = 0;
int loops = 500;
while(b < loops)
{
run(true);
b++;
}
printf("-----------[ %i LOOPS DONE ]-----------\n", loops);
run(false);
return 0;
}

Error in the basic GSL example program - simple nonlinear fitting

I just got GSL set up on my windows box and I am trying to learn how to use the nonlinear fitting functions. First thing I did was pull an example directly off their website: https://www.gnu.org/software/gsl/manual/html_node/Example-programs-for-Nonlinear-Least_002dSquares-Fitting.html
which is here:
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#define N 40
#define FIT(i) gsl_vector_get(s->x, i)
#define ERR(i) sqrt(gsl_matrix_get(covar,i,i))
struct data {
size_t n;
double * y;
double * sigma;
};
int expb_f (const gsl_vector * x, void *data, gsl_vector * f)
{
size_t n = ((struct data *)data)->n;
double *y = ((struct data *)data)->y;
double *sigma = ((struct data *) data)->sigma;
double A = gsl_vector_get (x, 0);
double lambda = gsl_vector_get (x, 1);
double b = gsl_vector_get (x, 2);
size_t i;
for (i = 0; i < n; i++)
{
/* Model Yi = A * exp(-lambda * i) + b */
double t = i;
double Yi = A * exp (-lambda * t) + b;
gsl_vector_set (f, i, (Yi - y[i])/sigma[i]);
}
return GSL_SUCCESS;
}
int expb_df (const gsl_vector * x, void *data, gsl_matrix * J)
{
size_t n = ((struct data *)data)->n;
double *sigma = ((struct data *) data)->sigma;
double A = gsl_vector_get (x, 0);
double lambda = gsl_vector_get (x, 1);
size_t i;
for (i = 0; i < n; i++)
{
/* Jacobian matrix J(i,j) = dfi / dxj, */
/* where fi = (Yi - yi)/sigma[i], */
/* Yi = A * exp(-lambda * i) + b */
/* and the xj are the parameters (A,lambda,b) */
double t = i;
double s = sigma[i];
double e = exp(-lambda * t);
gsl_matrix_set (J, i, 0, e/s);
gsl_matrix_set (J, i, 1, -t * A * e/s);
gsl_matrix_set (J, i, 2, 1/s);
}
return GSL_SUCCESS;
}
int expb_fdf (const gsl_vector * x, void *data, gsl_vector * f, gsl_matrix * J)
{
expb_f (x, data, f);
expb_df (x, data, J);
return GSL_SUCCESS;
}
void print_state (size_t iter, gsl_multifit_fdfsolver * s);
int main (void)
{
const gsl_multifit_fdfsolver_type *T;
gsl_multifit_fdfsolver *s;
int status;
unsigned int i, iter = 0;
const size_t n = N;
const size_t p = 3;
gsl_matrix *covar = gsl_matrix_alloc (p, p);
double y[N], sigma[N];
struct data d = { n, y, sigma};
gsl_multifit_function_fdf f;
double x_init[3] = { 1.0, 0.0, 0.0 };
gsl_vector_view x = gsl_vector_view_array (x_init, p);
const gsl_rng_type * type;
gsl_rng * r;
gsl_rng_env_setup();
type = gsl_rng_default;
r = gsl_rng_alloc (type);
f.f = &expb_f;
f.df = &expb_df;
f.fdf = &expb_fdf;
f.n = n;
f.p = p;
f.params = &d;
/* This is the data to be fitted */
for (i = 0; i < n; i++)
{
double t = i;
y[i] = 1.0 + 5 * exp (-0.1 * t) + gsl_ran_gaussian (r, 0.1);
sigma[i] = 0.1;
printf ("data: %u %g %g\n", i, y[i], sigma[i]);
};
T = gsl_multifit_fdfsolver_lmsder;
s = gsl_multifit_fdfsolver_alloc (T, n, p);
gsl_multifit_fdfsolver_set (s, &f, &x.vector);
print_state (iter, s);
do
{
iter++;
status = gsl_multifit_fdfsolver_iterate (s);
printf ("status = %s\n", gsl_strerror (status));
print_state (iter, s);
if (status)
break;
status = gsl_multifit_test_delta (s->dx, s->x,
1e-4, 1e-4);
}
while (status == GSL_CONTINUE && iter < 500);
gsl_multifit_covar (s->J, 0.0, covar);
{
double chi = gsl_blas_dnrm2(s->f);
double dof = n - p;
double c = GSL_MAX_DBL(1, chi / sqrt(dof));
printf("chisq/dof = %g\n", pow(chi, 2.0) / dof);
printf ("A = %.5f +/- %.5f\n", FIT(0), c*ERR(0));
printf ("lambda = %.5f +/- %.5f\n", FIT(1), c*ERR(1));
printf ("b = %.5f +/- %.5f\n", FIT(2), c*ERR(2));
}
printf ("status = %s\n", gsl_strerror (status));
gsl_multifit_fdfsolver_free (s);
gsl_matrix_free (covar);
gsl_rng_free (r);
return 0;
}
void print_state (size_t iter, gsl_multifit_fdfsolver * s)
{
printf ("iter: %3u x = % 15.8f % 15.8f % 15.8f "
"|f(x)| = %g\n",
iter,
gsl_vector_get (s->x, 0),
gsl_vector_get (s->x, 1),
gsl_vector_get (s->x, 2),
gsl_blas_dnrm2 (s->f));
}
Ideally it should simply generate a short data set that follows a decaying exponential with some white noise on top and then fit it.
To get it running in Code::Blocks in windows I followed the procedure outlined here: installing GSL on Windows XP 32bit for use with codeblocks
It compiles without warnings even with -Wall and -Wextra flags. However, it fails on the line: gsl_multifit_fdfsolver_set (s, &f, &x.vector); with the error: multifit\fdfsolver.c:132: ERROR: vector length does not match solver. Default GSL error handler invoked.
I was a little surprised to find this in what should be raw example code, but here we are. So I am hoping someone more knowledgeable than I can tell me what I am doing wrong with this simple example.
Figured it out: They were allocating their initial vector wrong. The fixed code is here:
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_multifit_nlin.h>
#define N 40
#define FIT(i) gsl_vector_get(s->x, i)
#define ERR(i) sqrt(gsl_matrix_get(covar,i,i))
struct data {
size_t n;
double * y;
double * sigma;
};
int expb_f (const gsl_vector * x, void *data, gsl_vector * f)
{
size_t n = ((struct data *)data)->n;
double *y = ((struct data *)data)->y;
double *sigma = ((struct data *) data)->sigma;
double A = gsl_vector_get (x, 0);
double lambda = gsl_vector_get (x, 1);
double b = gsl_vector_get (x, 2);
size_t i;
for (i = 0; i < n; i++)
{
/* Model Yi = A * exp(-lambda * i) + b */
double t = i;
double Yi = A * exp (-lambda * t) + b;
gsl_vector_set (f, i, (Yi - y[i])/sigma[i]);
}
return GSL_SUCCESS;
}
int expb_df (const gsl_vector * x, void *data, gsl_matrix * J)
{
size_t n = ((struct data *)data)->n;
double *sigma = ((struct data *) data)->sigma;
double A = gsl_vector_get (x, 0);
double lambda = gsl_vector_get (x, 1);
size_t i;
for (i = 0; i < n; i++)
{
/* Jacobian matrix J(i,j) = dfi / dxj, */
/* where fi = (Yi - yi)/sigma[i], */
/* Yi = A * exp(-lambda * i) + b */
/* and the xj are the parameters (A,lambda,b) */
double t = i;
double s = sigma[i];
double e = exp(-lambda * t);
gsl_matrix_set (J, i, 0, e/s);
gsl_matrix_set (J, i, 1, -t * A * e/s);
gsl_matrix_set (J, i, 2, 1/s);
}
return GSL_SUCCESS;
}
int expb_fdf (const gsl_vector * x, void *data, gsl_vector * f, gsl_matrix * J)
{
expb_f (x, data, f);
expb_df (x, data, J);
return GSL_SUCCESS;
}
void print_state (size_t iter, gsl_multifit_fdfsolver * s);
int main (void)
{
const gsl_multifit_fdfsolver_type *T;
gsl_multifit_fdfsolver *s;
int status;
unsigned int i, iter = 0;
const size_t n = N;
const size_t p = 3;
gsl_matrix *covar = gsl_matrix_alloc (p, p);
double y[N], sigma[N];
struct data d = { n, y, sigma};
gsl_multifit_function_fdf f;
gsl_vector *x = gsl_vector_alloc(p);
for (i=0; i<p; i++)
{
gsl_vector_set(x,i,i==0 ? 1 : 0);
}
const gsl_rng_type * type;
gsl_rng * r;
gsl_rng_env_setup();
type = gsl_rng_default;
r = gsl_rng_alloc (type);
f.f = &expb_f;
f.df = &expb_df;
f.fdf = &expb_fdf;
f.n = n;
f.p = p;
f.params = &d;
/* This is the data to be fitted */
for (i = 0; i < n; i++)
{
double t = i;
y[i] = 1.0 + 5 * exp (-0.1 * t) + gsl_ran_gaussian (r, 0.1);
sigma[i] = 0.1;
printf ("data: %u %g %g\n", i, y[i], sigma[i]);
};
T = gsl_multifit_fdfsolver_lmsder;
s = gsl_multifit_fdfsolver_alloc (T, n, p);
gsl_multifit_fdfsolver_set (s, &f, x);
print_state (iter, s);
do
{
iter++;
status = gsl_multifit_fdfsolver_iterate (s);
printf ("status = %s\n", gsl_strerror (status));
print_state (iter, s);
if (status)
break;
status = gsl_multifit_test_delta (s->dx, s->x,
1e-4, 1e-4);
}
while (status == GSL_CONTINUE && iter < 500);
gsl_multifit_covar (s->J, 0.0, covar);
{
double chi = gsl_blas_dnrm2(s->f);
double dof = n - p;
double c = GSL_MAX_DBL(1, chi / sqrt(dof));
printf("chisq/dof = %g\n", pow(chi, 2.0) / dof);
printf ("A = %.5f +/- %.5f\n", FIT(0), c*ERR(0));
printf ("lambda = %.5f +/- %.5f\n", FIT(1), c*ERR(1));
printf ("b = %.5f +/- %.5f\n", FIT(2), c*ERR(2));
}
printf ("status = %s\n", gsl_strerror (status));
gsl_multifit_fdfsolver_free (s);
gsl_matrix_free (covar);
gsl_rng_free (r);
gsl_vector_free(x);
return 0;
}
void print_state (size_t iter, gsl_multifit_fdfsolver * s)
{
printf ("iter: %3u x = % 15.8f % 15.8f % 15.8f "
"|f(x)| = %g\n",
iter,
gsl_vector_get (s->x, 0),
gsl_vector_get (s->x, 1),
gsl_vector_get (s->x, 2),
gsl_blas_dnrm2 (s->f));
}

segment fault on programming C

I am tyring to make velocity Verlet method, by using C language.
I thought I made it good. However, there pops up 'Segmentation fault(core dumped)' whenever, I increase the size of the vector or array, x and y.
For the size n equal and less than 1e3, it's fine, but at the point of n = 1e4, the program gets error.
Please anybody help me on this.
Thank you.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
double verlet(double t, double x)
{
double E = 0.252;
double B = 0.052;
double a = M_PI/2;
return -sin(x) + E*cos(t) + B*cos(2*t+a);
}
double pverlet(double(*f)(double, double), double dt, double t, double x, double y)
{
return x + dt*( y + (dt/2)*f(t, x));
}
double vverlet(double(*g)(double, double), double dt, double t, double x, double y)
{
return y + (dt/2) * g(t, x);
}
int main(void)
{
int i;
double t;
int n = 1e4;
double ti = 0, tf = 1e5, dt = (tf-ti)/n;
double *x = (double *) malloc(sizeof(double)*n);
double *y = (double *) malloc(sizeof(double)*2*n);
if (x == NULL)
{
printf("error allocating memory!\n");
return 1;
}
if (y == NULL)
{
printf("error allocating memory!\n");
return 1;
}
for (y[0] = 0, i = 1; i <2*n; i++)
{
y[i] = vverlet(verlet, dt, ti + dt*(i-1), x[i-1], y[i-1]);
}
for (x[0] = 0, i = 1; i < n; i++)
{
x[i] = pverlet(verlet, dt, ti + dt*(i-1), x[i-1], y[2*(i-1)]);
}
for (i = 0; i < n; i++)
{
t = ti + dt * i;
printf("%e %e %e\n", t, x[i], y[2*i]);
}
return 0;
free(x);
free(y);
}
for (y[0] = 0, i = 1; i <2*n; i++)
{
y[i] = vverlet(verlet, dt, ti + dt*(i-1), x[i-1], y[i-1]);
}
x is defined from 0 to n-1.

MPI_AllGather not gather properly...all elements end up the same value?

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>
#define h 1
#define XY0 0
#define MAX_XY 5
#define N 2 //particles per subdomain
#define BLOCKS 4
#define a 1
#define b 1
float velocityX(float x, float y);
float velocityY(float x, float y);
int malloc2dfloat(float ***array, int length);
int main (int argc, char **argv)
{
typedef struct {
float xcoord;
float ycoord;
float velx;
float vely;
} particle;
int points= (int) floor((MAX_XY - XY0)/h) + 1;
int procsize = 2;
int myid, nproc;
MPI_Datatype particletype, oldtypes[1];
MPI_Aint offset[1], extent;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
int startElementX, startElementY, endElementX, endElementY;
particle* sub_pars = (particle*)malloc(sizeof(particle)*N);
offset[0] = 0;
int blockcounts[1];
blockcounts[0] = 4;
oldtypes[0] = MPI_FLOAT;
MPI_Type_struct(1, blockcounts, offset, oldtypes, &particletype);
MPI_Type_commit(&particletype);
particle* particles = (particle*)malloc(sizeof(particle) * N * procsize*procsize);
if (nproc != procsize*procsize){
printf("Must use np=4 -- split into 4 blocks");
MPI_Abort(MPI_COMM_WORLD,1);
}
srand(time(NULL)+myid);
if (myid == 0)
{
float mins[4];
startElementX = 0;
startElementY = 0;
endElementX = (points/procsize)-1;
endElementY = (points/procsize) -1;
}
else if (myid == 1)
{
startElementX = 0;
startElementY = (points/procsize);
endElementX = (points/procsize) -1;
endElementY = points - 1;
}
else if (myid == 2)
{
startElementX = (points/procsize);
startElementY = 0;
endElementX = points - 1;
endElementY = (points/procsize) -1;
}
else
{
startElementX = (points/procsize);
startElementY = (points/procsize);
endElementX = points-1;
endElementY = points-1;
}
int i;
float localmin;
float mag;
for (i=0; i<N; i++)
{
sub_pars[i].xcoord = ((startElementX + rand()/(RAND_MAX / (endElementX-startElementX+1)+1)))*h + XY0;
printf("%f\n", sub_pars[i].xcoord);
sub_pars[i].ycoord = ((startElementY + rand()/(RAND_MAX / (endElementY-startElementY+1)+1)))*h + XY0;
sub_pars[i].velx = velocityX(sub_pars[i].xcoord, sub_pars[i].ycoord);
sub_pars[i].vely = velocityY(sub_pars[i].xcoord, sub_pars[i].ycoord);
mag = sqrt(sub_pars[i].velx*sub_pars[i].velx + sub_pars[i].vely*sub_pars[i].vely);
if (i==0 || localmin > mag) localmin = mag;
}
printf("localmin of %d is %.2f \n", myid, localmin);
MPI_Allgather(&sub_pars, 1, particletype, particles ,1, particletype, MPI_COMM_WORLD);
MPI_Finalize();
if(myid == 0)
{
int k;
for (k=0; k<N*4; k++)
{
printf("test %.2f \n", particles[i].xcoord);
}
}
return 0;
}
float velocityX(float x, float y)
{
float temp = (a+(b*(y*y-x*x))/((x*x+y*y)*(x*x+y*y)));
return temp;
}
float velocityY(float x, float y)
{
float temp = (-1*(2*b*x*y)/((x*x+y*y)*(x*x+y*y)));
return temp;
}
It just returns the same value for all the particles, but I know they are being calculate correctly within each thread, so something is wrong with my MPI_Allgather, can someone please explain how it should look?
You have made a very common mistake: the & (address-of) operator in the first argument that you pass to MPI_Allgather is unnecessary. sub_pars is already a pointer and calling MPI_Allgather with &sub_pars passes a pointer to the pointer (a location somewhere in the stack frame of the main() routine) instead of pointer to the actual data.

Resources