Selecting and analysing window of points in an array - c

Could someone please advise me on how to resolve this problem.
I have a function which performs a simple regression analysis on a sets of point contained in an array.
I have one array (pval) which contains all the data I want to perform regression analysis on.
This is how I want to implement this.
I get an average value for the first 7 elements of the array. This is what I call a 'ref_avg' in the programme.
I want to perform a regression analysis for every five elements of the array taking the first element of this array as the 'ref_avg'. That is in every step of the regression analysis I will have 6 points in the array.
e.g
For the 1st step the ref_avg as calculated below is 70.78. So the 1st step in the simple regression will contain these points
1st = {70.78,76.26,69.17,68.68,71.49,73.08},
The second step will contain the ref_avg as the 1st element and other elements starting from the second element in the original array
2nd = {70.78,69.17,68.68,71.49,73.08,72.99},
3rd = {70.78,68.68,71.49,73.08,72.99,70.36},
4th = {70.78,71.49,73.08,72.99,70.36,57.82} and so on until the end.
The regression function is also shown below.
I don't understand why the first 3 elements of the 'calcul' array have value 0.00 on the first step of the regression, 2 elements on the 2nd step,1 elements on the 3rd.
Also the last step of the regression function is printed 3 times.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main()
{
float pval[]={76.26,69.17,68.68,71.49,73.08,72.99,70.36,57.82,58.98,69.71,70.43,77.53,80.77,70.30,70.5,70.79,75.58,76.88,80.20,77.69,80.80,70.5,85.27,75.25};
int count,Nhour;
const int MAX_HOUR = 24;
float *calcul=NULL;
float *tab_time =NULL;
float ref_avg;
int size_hour=7;
float sum=0;
int length = Nhour+1;
float m;
float b;
calcul=(float*)calloc(MAX_HOUR,sizeof(calcul));
if (calcul==NULL)
{
printf(" error in buffer\n");
exit(EXIT_FAILURE);
}
tab_time= calloc(MAX_HOUR,sizeof(float));
/* Get the average of the first seven elements */
int i;
for (i=0;i<size_hour;i++)
{
sum += pval[i];
}
ref_avg = sum / size_hour;
count=0;
/* perform the regression analysis on 5 hours increment */
while(count<=MAX_HOUR)
{
++count;
Nhour=5;
int pass = -(Nhour-1);
int i=0;
for(i=0;i<Nhour+1;i++)
{
if(count<MAX_HOUR)
{
calcul[0]=ref_avg;
calcul[i] =pval[count+pass];
pass++;
}
printf("calc=%.2f\n",calcul[i]); // For debug only
tab_time[i]=i+1;
if(i==Nhour)
{
linear_regression(tab_time, calcul, length, &m, &b);
printf("Slope= %.2f\n", m);
}
}
}
free(calcul);
calcul=NULL;
free(tab_time);
tab_time=NULL;
return 0;
}
/* end of the main function */
/* This function is used to calculate the linear
regression as it was called above in the main function.
It compiles and runs very well, was just included for the
compilation and execution of the main function above where I have a problem. */
int linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0)
{
float sumx = 0,
sumy = 0,
sumx2 = 0,
sumxy = 0;
int i;
if (n <= 1) {
*beta1 = 0;
*beta0= 0;
printf("Not enough data for regression \n");
}
else
{
float variance;
for (i = 0; i < n; i++)
{
sumx += x[i];
sumy += y[i];
sumx2 += (x[i] * x[i]);
sumxy += (x[i] * y[i]);
}
variance = (sumx2 - ((sumx * sumx) / n));
if ( variance != 0) {
*beta1 = (sumxy - ((sumx * sumy) / n)) / variance;
*beta0 = (sumy - ((*beta1) * sumx)) / n;
}
else
{
*beta1 = 0;
*beta0 = 0;
}
}
return 0;
}

I think this code produces sane answers. The reference average quoted in the question seems to be wrong. The memory allocation is not needed. The value of MAX_HOUR was 24 but there were only 23 data values in the array. The indexing in building up the array to be regressed was bogus, referencing negative indexes in the pval array (and hence leading to erroneous results). The variable Nhour was referenced before it was initialized; the variable length was not correctly set. There wasn't good diagnostic printing.
The body of main() here is substantially rewritten; the editing on linear_regression() is much more nearly minimal. The code is more consistently laid out and white space has been used to make it easier to read. This version terminates the regression when there is no longer enough data left to fill the array with 5 values - it is not clear what the intended termination condition was.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void linear_regression(const float *x, const float *y, const int n,
float *beta1, float *beta0);
int main(void)
{
float pval[]={
76.26, 68.68, 71.49, 73.08, 72.99, 70.36, 57.82, 58.98,
69.71, 70.43, 77.53, 80.77, 70.30, 70.50, 70.79, 75.58,
76.88, 80.20, 77.69, 80.80, 70.50, 85.27, 75.25,
};
const int Nhour = 5;
const int MAX_HOUR = sizeof(pval)/sizeof(pval[0]);
const int size_hour = 7;
float ref_avg;
float sum = 0.0;
float m;
float b;
float calc_y[6];
float calc_x[6];
/* Get the average of the first seven elements */
for (int i = 0; i < size_hour; i++)
sum += pval[i];
ref_avg = sum / size_hour;
printf("ref avg = %5.2f\n", ref_avg); // JL
/* perform the regression analysis on 5 hours increment */
for (int pass = 0; pass <= MAX_HOUR - Nhour; pass++) // JL
{
calc_y[0] = ref_avg;
calc_x[0] = pass + 1;
printf("pass %d\ncalc_y[0] = %5.2f, calc_x[0] = %5.2f\n",
pass, calc_y[0], calc_x[0]);
for (int i = 1; i <= Nhour; i++)
{
int n = pass + i - 1;
calc_y[i] = pval[n];
calc_x[i] = pass + i + 1;
printf("calc_y[%d] = %5.2f, calc_x[%d] = %5.2f, n = %2d\n",
i, calc_y[i], i, calc_x[i], n);
}
linear_regression(calc_x, calc_y, Nhour+1, &m, &b);
printf("Slope= %5.2f, intercept = %5.2f\n", m, b);
}
return 0;
}
void linear_regression(const float *x, const float *y, const int n, float *beta1, float *beta0)
{
float sumx1 = 0.0;
float sumy1 = 0.0;
float sumx2 = 0.0;
float sumxy = 0.0;
assert(n > 1);
for (int i = 0; i < n; i++)
{
sumx1 += x[i];
sumy1 += y[i];
sumx2 += (x[i] * x[i]);
sumxy += (x[i] * y[i]);
}
float variance = (sumx2 - ((sumx1 * sumx1) / n));
if (variance != 0.0)
{
*beta1 = (sumxy - ((sumx1 * sumy1) / n)) / variance;
*beta0 = (sumy1 - ((*beta1) * sumx1)) / n;
}
else
{
*beta1 = 0.0;
*beta0 = 0.0;
}
}

Related

Graph of cos(x) through MacLaurin series only getting the first result right

I'm trying to create a program that compares the efficiency of calculating a function through MacLaurin series.
The idea is: Make a graph (using gnuplot) of cos(x) between -Pi and Pi (100 intervals) calculating cos(x) using the first 4 terms of its MacLaurin series, then, the first 6 terms, and comparing the graph between them.
Cos(x) through MacLaurin.
So, to use gnuplot, I made the code below that gets 2 files with the data I need, however, when i run the code only the first result is correct. For the first 4 terms my file is:
-3.141593 -9.760222e-001
-3.078126 2.367934e+264
And the rest of what would be my Y axis is just 2.367934e+264 repeated over and over. The 6 terms file is also just that number. X axis is fine.
I'm fairly new to coding and just don't know what i'm doing wrong. Any help would be appreciated.
Here's the code:
#include <stdio.h>
#include <math.h>
#define X_INI -M_PI
#define X_FIM M_PI
#define NI 100
int fatorial(int);
double serie(int ,double );
int main()
{
double x, y[NI], dx;
int i;
FILE *fp[3];
fp[0]=fopen("4Termos.dat","w");
fp[1]=fopen("6Termos.dat","w");
x=X_INI;
dx = (X_FIM - X_INI)/ (NI - 1);
for(i=0; i<NI; i++){
y[i]=serie(4,x);
fprintf(fp[0],"%lf %e\n", x, y[i]);
y[i]=serie(6,x);
fprintf(fp[1],"%lf %e\n", x, y[i]);
x = x + dx;
}
return 0;
}
int fatorial(int n) {
int i,p;
p = 1;
if (n==0)
return 1;
else {
for (i=1;i<=n;i++)
p = p*i;
return p;
}
}
double serie(int m, double z){
double s;
int j;
for(j = 0; j < m+1; j++)
{
s = s + ( ( pow((-1) , j))*pow(z, (2*j)) ) / (fatorial(2*j));
}
return s;
}
Fatorial is used to calculate factorial, serie used to calculate MacLaurin...
Use of uninitialized s in serie() function (I've taken the liberty to format the code to my liking).
double serie(int m, double z) {
double s; // better: double s = 0;
int j;
for (j = 0; j < m + 1; j++) {
s += pow(-1, j) * pow(z, 2 * j) / fatorial(2 * j);
}
return s;
}

Assigning a list within a loop

I have the following function to get the distance between some points:
#include <stdio.h>
#include <math.h>
int add_coords(size_t size, float coords[size][2])
{
float distance = 0;
for (int i=0; i < size-1; i++) {
float this[2] = coords[i]; // not allowed
float next[2] = coords[i+1];
distance = sqrt(pow(this[0] - next[0]),2) + pow(this[1] - next[1]),2));
}
return distance;
}
int main(void)
{
float coords[][2] = {{1,3}, {5,7}, {-2,-3}};
float distance = add_coords(sizeof coords / sizeof *coords, coords);
printf("The distance is %.2f\n", distance);
}
What would be the best way to do the following?
float this[2] = coords[i];
Current what I'm doing is the following, this it was a bit tricky to come up with:
// pointer to array of two
float (*this)[2], (*next)[2];
for (int i=0; i < size; i++) {
this = &coords[i];
next = &coords[i+1];
distance += sqrtf(powf((*this)[0] - (*next)[0],2) + powf((*this)[1] - *(next)[1],2));
}
I would simply use a struct, this would avoid any unwanted undefined behavior:
typedef struct coords {
float x;
float y;
} t_coords
// ...
t_coords coords = {.x= coords[i][0], .y=coords[i][1]};
Also you should be careful to declare your array coords with float f:
{{1f,3f}, {5f,7f}, {-2f,-3f}};

Segfault with large int - not enough memory?

I am fairly new to C and how arrays and memory allocation works. I'm solving a very simple function right now, vector_average(), which computes the mean value between two successive array entries, i.e., the average between (i) and (i + 1). This average function is the following:
void
vector_average(double *cc, double *nc, int n)
{
//#pragma omp parallel for
double tbeg ;
double tend ;
tbeg = Wtime() ;
for (int i = 0; i < n; i++) {
cc[i] = .5 * (nc[i] + nc[i+1]);
}
tend = Wtime() ;
printf("vector_average() took %g seconds\n", tend - tbeg);
}
My goal is to set int n extremely high, to the point where it actually takes some time to complete this loop (hence, why I am tracking wall time in this code). I'm passing this function a random test function of x, f(x) = sin(x) + 1/3 * sin(3 x), denoted in this code as x_nc, in main() in the following form:
int
main(int argc, char **argv)
{
int N = 1.E6;
double x_nc[N+1];
double dx = 2. * M_PI / N;
for (int i = 0; i <= N; i++) {
double x = i * dx;
x_nc[i] = sin(x) + 1./3. * sin(3.*x);
}
double x_cc[N];
vector_average(x_cc, x_nc, N);
}
But my problem here is that if I set int N any higher than 1.E5, it segfaults. Please provide any suggestions for how I might set N much higher. Perhaps I have to do something with malloc, but, again, I am new to all of this stuff and I'm not quite sure how I would implement this.
-CJW
A function only has 1M stack memory on Windows or other system. Obviously, the size of temporary variable 'x_nc' is bigger than 1M. So, you should use heap to save data of x_nc:
int
main(int argc, char **argv)
{
int N = 1.E6;
double* x_nc = (double*)malloc(sizeof(dounble)*(N+1));
double dx = 2. * M_PI / N;
for (int i = 0; i <= N; i++) {
double x = i * dx;
x_nc[i] = sin(x) + 1./3. * sin(3.*x);
}
double* x_cc = (double*)malloc(sizeof(double)*N);
vector_average(x_cc, x_nc, N);
free(x_nc);
free(x_cc);
return 0;
}

program.exe (C) has stopped working

I am extremely new to C and managed to compile this program, but the exe stops working upon running. I'm really not sure what's wrong.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define TINY 1.0e-20 // A small number.
void ludcmp(float a[3][3], int n, int *indx, float *d);
void lubksb(float a[3][3], int n, int *indx, float b[]) ;
int main(){
int i,n,*indx;
float *b,d;
float a[3][3] = {
{ 1.0, 2.0, 5.0},
{-1.0, 2.0, 3.0},
{ 6.0, 0.0, 1.0}
};
ludcmp(a,n,indx,&d);
lubksb(a,n,indx,b);
for(i = 1; i = 3; i++) {
printf("%.2f",b[i]);
}
getchar();
return 0;
}
For those who were asking, the 2 functions ludcmp and lubksg are below. I got them from the numerical recipes textbook, but edited some lines to remove exclusive routines which I do not have. Specifically, they are the lines with malloc, printf, and free.
The original code came with all the loops starting with 1, which is why I also started my loop with 1. I have since changed all the loops to start from 0 instead, hopefully without introducing any new errors.
You can see the original code here:
https://github.com/saulwiggin/Numerical-Recipies-in-C/tree/master/Chapter2.Solution-of-Linear-Equations
Here is ludcmp:
void ludcmp(float a[3][3], int n, int *indx, float *d)
{
int i, imax, j, k;
float big, dum, sum, temp;
float *vv; // vv stores the implicit scaling of each row.
vv = (float *) malloc(n * sizeof(float));
*d=1.0;
for (i=0;i<n;i++) {
big=0.0;
for (j=0;j<n;j++)
if ((temp=fabs(a[i][j])) > big) big=temp;
if (big == 0.0)
{
printf("Singular matrix in routine ludcmp");
//free(vv);
}
// No nonzero largest element.
vv[i] = 1.0 / big; // Save the scaling.
}
// This is the loop over columns of Crout's method.
for (j=0;j<n;j++) {
for (i=0;i<j;i++) {
sum=a[i][j];
for (k=0;k<i;k++) sum -= a[i][k]*a[k][j];
a[i][j]=sum;
}
// Initialize for the search for largest pivot element.
big=0.0;
for (i=j;i<=n;i++) {
sum=a[i][j];
for (k=0;k<j;k++)
sum -= a[i][k]*a[k][j];
a[i][j]=sum;
if ( (dum=vv[i]*fabs(sum)) >= big) {
big=dum;
imax=i;
}
}
if (j != imax) {
for (k=0;k<n;k++) {
dum=a[imax][k];
a[imax][k]=a[j][k];
a[j][k]=dum;
}
*d = -(*d);
vv[imax]=vv[j];
}
indx[j]=imax;
if (a[j][j] == 0.0) a[j][j]=TINY;
if (j != n) {
dum=1.0/(a[j][j]);
for (i=j+1;i<n;i++) a[i][j] *= dum;
}
} // Go back for the next column in the reduction.
free(vv);
}
And lubksb:
void lubksb(float a[3][3],int n,int *indx,float b[])
{
int i,ii=0,ip,j;
float sum;
for (i=1;i<=n;i++) {
ip=indx[i];
sum=b[ip];
b[ip]=b[i];
if (ii)
for (j=ii;j<=i-1;j++) sum -= a[i][j]*b[j];
else if (sum) ii=i;
b[i]=sum;
}
for (i=n;i>=1;i--) {
sum=b[i];
for (j=i+1;j<=n;j++) sum -= a[i][j]*b[j];
b[i]=sum/a[i][i];
}
}
This is a Two Dimensional Array and you are looping as it was just one. You should do something like:
for (int i = 0; i < 3; ++i) {
for(int j = 0; j < 3; ++j) {
printf("%d %d: ", i+1, j+1);
}
}
Is bad practice to define the size of the array explicit. Try to use a constant.
And as said in the comments by #Marged:
In C arrays starts in 0
b is never assigned to anything valid when it's declared:
float *b,d;
At best, it's NULL or pointing to an invalid memory address:
I don't know what the lubksb function does:
lubksb(a,n,indx,b);
But b is clearly an invalid parameter since you never assign to it before calling this function.
And with this statement:
for(i = 1; i = 3; i++) {
printf("%.2f",b[i]);
}
As others have pointed out, array indices start at zero. But there's no evidence that b has a length of three anyway.

C Code for numerical integration works on one computer but blows up on another [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
I have written a code for a simple pendulum with numerical integration using rk4 method. Here's an image of expected result.
It works on my laptop, running Ubuntu 14.04, 64 bit, (it gives a sine wave as the result), but doesn't work on my PC, which runs Debian 8 and is also 64 bit.
Here's an image of the wrong plot.
Any reason why this would be happening?
Here's the code:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
int N = 2;
float h = 0.001;
struct t_y_couple {
float t;
float *y;
};
struct t_y_couple integrator_rk4(float dt, float t, float *p1);
void oscnetwork_opt(float t, float *y, float *dydt);
int main(void) {
/* initializations*/
struct t_y_couple t_y;
int i, iter, j;
// time span for which to run simulation
int tspan = 20;
// total number of time iterations = tspan*step_size
int tot_time = (int)ceil(tspan / h);
// Time array
float T[tot_time];
// pointer definitions
float *p, *q;
// vector to hold values for each differential variable for all time
// iterations
float Y[tot_time][2];
// N = total number of coupled differential equations to solve
// initial conditions vector for time = 0
Y[0][0] = 0;
Y[0][1] = 3.14;
// set the time array
T[0] = 0;
// This loop calls the RK4 code
for (i = 0; i < tot_time - 1; i++) {
p = &Y[i][0]; // current time
q = &Y[i + 1][0]; // next time step
// printf("\n\n");
// for (j=0;j<N;j++)
// call the RK4 integrator with current time value, and current
// values of voltage
t_y = integrator_rk4(h, T[i], p);
// Return the time output of integrator into the next iteration of time
T[i + 1] = t_y.t;
// copy the output of the integrator into the next iteration of voltage
q = memcpy(q, t_y.y, (2) * sizeof(float));
printf("%f ", T[i + 1]);
for (iter = 0; iter < N; iter++)
printf("%f ", *(p + iter));
printf("\n");
}
return 0;
}
struct t_y_couple integrator_rk4(float dt, float t, float y[2]) {
// initialize all the pointers
float y1[2], y2[2], y3[2], yout[2];
float tout, dt_half;
float k1[2], k2[2], k3[2], k4[2];
// initialize iterator
int i;
struct t_y_couple ty1;
tout = t + dt;
dt_half = 0.5 * dt;
float addition[2];
// return the differential array into k1
oscnetwork_opt(t, y, k1);
// multiply the array k1 by dt_half
for (i = 0; i < 2; i++)
y1[i] = y[i] + (k1[i]) * dt_half;
// add k1 to each element of the array y
// do the same thing 3 times
oscnetwork_opt(t + dt_half, y1, k2);
for (i = 0; i < 2; i++)
y2[i] = y[i] + (k2[i]) * dt_half;
oscnetwork_opt(t + dt_half, y2, k3);
for (i = 0; i < 2; i++)
y3[i] = y[i] + (k3[i]) * dt_half;
oscnetwork_opt(tout, y3, k4);
// Make the final additions with k1,k2,k3 and k4 according to the RK4 code
for (i = 0; i < 2; i++) {
addition[i] = ((k1[i]) + (k2[i]) * 2 + (k3[i]) * 2 + (k4[i])) * dt / 6;
}
// add this to the original array
for (i = 0; i < 2; i++)
yout[i] = y[i] + addition[i];
// return a struct with the current time and the updated voltage array
ty1.t = tout;
ty1.y = yout;
return ty1;
}
// function to return the vector with coupled differential variables for each
// time iteration
void oscnetwork_opt(float t, float y[2], float *dydt) {
int i;
dydt[0] = y[1];
dydt[1] = -(1) * sin(y[0]);
}
You have a problem of lifetime with your variable yout in integrator_rk4(). You assign address of yout to ty1.y but you use it outside this function. This is undefined behavior.
quick fix:
struct t_y_couple {
float t;
float y[2];
};
struct t_y_couple integrator_rk4(float dt, float t, float y[2]) {
float y1[2], y2[2], y3[2], yout[2];
// ...
ty1.t = tout;
ty1.y[0] = yout[0];
ty1.y[1] = yout[1];
return ty1;
}
You have a lot of useless allocation and you made "spaghetti code" with your global variable. You should not cast the return of malloc.

Resources