Is pointers within structures slowing down my code?

Is pointers within structures slowing down my code? - c

I am looking for some help or hints to speed up my code.
I have implemented a routine for computing the gravitational potential at a point (r,phi,lambda) in space from a set of spherical harmonic coefficients C_{n,m} and S_{n,m}. The equation is shown in the link below:
and includes the recursive computation of the latitude (phi) dependent associated Legendre polynomials P_{n,m}, starting with the first two values P{0,0} and P_{1,1}.
At first, I had this implemented as a MATLAB C-MEX code, with only the core part of my code in C-language. I wanted to make a pure C-routine, but found that the code runs 3-5 times slower, which makes me wonder why. Could it be the way I define my structures and use pointers to pointers in the central code?
It seems like it is the core computation part that takes the extra time, but that part did not change although before I was passing using pointers directly to variables and now I am using pointers inside structures.
Any help is appreciated!
In the following, I will try to explain my code and show some extracts:
At the beginning of the program, I define three structures. One to hold the spherical harmonic coefficients, C_{n,m} and S_{n,m}, (ggm_struct), one to hold the computation coordinates (comp_struct) and one to hold the results (func_struct):
// Define constant variables
const double deg2rad = M_PI/180.0; // degrees to radians conversion factor
const double sfac = 1.0000E-280; // scaling factor
const double sqrt2 = 1.414213562373095; // sqrt(2)
const double sqrt3 = 1.732050807568877; // sqrt(3)
// Define structure to hold geopotential model
struct ggm_struct {
char product_type[100], modelname[100], errors[100], norm[100], tide_system[100];
double GM, R, *C, *S;
int max_degree, ncoef;
};
// Define structure to hold computation coordinates
struct comp_struct {
double *lat, *lon, *h;
double *r, *phi;
int nlat, nlon;
};
/* Define structure to hold results */
struct func_struct {
double *rval;
int npoints;
};
I then have a (sub-)function that starts by allocating space and then loads the coefficients from an ascii file as
int read_gfc(char mfile[100], int *nmax, int *mmax, struct ggm_struct *ggm)
{
// Set file identifier
FILE *fid;
// Declare variables
char str[200], var[100];
int n, m, nid, l00 = 0, l10 = 0, l11 = 0;
double c, s;
// Determine number of coefficients
ggm->ncoef = (*nmax+2)*(*nmax+1)/2;
// Allocate memory for coefficients
ggm->C = (double*) malloc(ggm->ncoef*sizeof(double));
if (ggm->C == NULL){
printf("Error: Memory for C not allocated!");
return -ENOMEM;
}
ggm->S = (double*) malloc(ggm->ncoef*sizeof(double));
if (ggm->S == NULL){
printf("Error: Memory for S not allocated!");
return -ENOMEM;
}
// Open file
fid = fopen(mfile,"r");
// Check that file was opened correctly
if (fid == NULL){
printf("Error: opening file %s!",mfile);
return -ENOMEM;
}
// Read file header
while (fgets(str,200,fid) != NULL && strncmp(str,"end_of_head",11) != 0){
// Extract model parameters
if (strncmp(str,"product_type",12) == 0){ sscanf(str,"%s %s",var,ggm->product_type); }
if (strncmp(str,"modelname",9) == 0){ sscanf(str,"%s %s",var,ggm->modelname); }
if (strncmp(str,"earth_gravity_constant",22) == 0){ sscanf(str,"%s %lf",var,&ggm->GM); }
if (strncmp(str,"radius",6) == 0){ sscanf(str,"%s %lf",var,&ggm->R); }
if (strncmp(str,"max_degree",10) == 0){ sscanf(str,"%s %d",var,&ggm->max_degree); }
if (strncmp(str,"errors",6) == 0){ sscanf(str,"%s %s",var,ggm->errors); }
if (strncmp(str,"norm",4) == 0){ sscanf(str,"%s %s",var,ggm->norm); }
if (strncmp(str,"tide_system",11) == 0){ sscanf(str,"%s %s",var,ggm->tide_system); }
}
// Read coefficients
while (fgets(str,200,fid) != NULL){
// Extract parameters
sscanf(str,"%s %d %d %lf %lf",var,&n,&m,&c,&s);
// Store parameters
if (n <= *nmax && m <= *mmax) {
// Determine index
nid = (n+1)*n/2 + m;
// Store values
*(ggm->C+nid) = c;
*(ggm->S+nid) = s;
}
}
// Close fil
fclose(fid);
// Return from function
return 0;
}
Afterwards, the computation grid is defined by an array of seven components. As an example, the array [-90 90 -180 180 1 1 0] defines a grid from -90 to 90 degrees latitude at 1-degree increments and from -180 to 180 degrees longitude at 1-degree increments. The height is zero. From this array, a computation grid is generated in a (sub-)function:
int make_grid(double *grid, struct comp_struct *inp)
{
// Declare variables
int n;
/* Echo routine */
printf("Creating grid of coordinates\n");
printf(" [lat1,lat2,dlat] = [%f,%f,%f]\n", *grid, *(grid+1), *(grid+4) );
printf(" [lon1,lon2,dlon] = [%f,%f,%f]\n", *(grid+2), *(grid+3), *(grid+5) );
printf(" h = %f\n", *(grid+6));
/* Latitude ------------------------------------------------------------- */
// Determine number of increments
inp->nlat = ceil( ( *(grid+1) - *grid + *(grid+4) ) / *(grid+4) );
// Allocate memory
inp->lat = (double*) malloc(inp->nlat*sizeof(double));
if (inp->lat== NULL){
printf("Error: Memory for LATITUDE (inp.lat) points not allocated!");
return -ENOMEM;
}
// Fill in values
*(inp->lat) = *(grid+1);
for (n = 1; n < inp->nlat-1; n++) {
*(inp->lat+n) = *(inp->lat+n-1) - *(grid+4);
}
*(inp->lat+inp->nlat-1) = *grid;
/* Longitude ------------------------------------------------------------ */
// Determine number of increments
inp->nlon = ceil( ( *(grid+3) - *(grid+2) + *(grid+5) ) / *(grid+5) );
// Allocate memory
inp->lon = (double*) malloc(inp->nlon*sizeof(double));
if (inp->lon== NULL){
printf("Error: Memory for LONGITUDE (inp.lon) points not allocated!");
return -ENOMEM;
}
// Fill in values
*(inp->lon) = *(grid+2);
for (n = 1; n < inp->nlon-1; n++) {
*(inp->lon+n) = *(inp->lon+n-1) + *(grid+5);
}
*(inp->lon+inp->nlon-1) = *(grid+3);
/* Height --------------------------------------------------------------- */
// Allocate memory
inp->h = (double*) malloc(inp->nlat*sizeof(double));
if (inp->h== NULL){
printf("Error: Memory for HEIGHT (inp.h) points not allocated!");
return -ENOMEM;
}
// Fill in values
for (n = 0; n < inp->nlat; n++) {
*(inp->h+n) = *(inp->h+n-1) + *(grid+6);
}
// Return from function
return 0;
}
These geographic coordinates are then transformed to spherical coordinates for the computation using another (sub-)routine
int geo2sph(struct comp_struct *inp, int *lgrid)
{
// Declare variables
double a = 6378137.0, e2 = 6.69437999014E-3; /* WGS84 parameters */
double x, y, z, sinlat, coslat, sinlon, coslon, R_E;
int i, j, nid;
/* Allocate space ------------------------------------------------------- */
// radius
inp->r = (double*) malloc(inp->nlat*sizeof(double));
if (inp->r== NULL){
printf("Error: Memory for SPHERICAL DISTANCE (inp.r) points not allocated!");
return -ENOMEM;
}
// phi
inp->phi = (double*) malloc(inp->nlat*sizeof(double));
if (inp->phi== NULL){
printf("Error: Memory for SPHERICAL LATITUDE (inp.phi) points not allocated!");
return -ENOMEM;
}
/* Loop over latitude =================================================== */
for (i = 0; i < inp->nlat; i++) {
// Compute sine and cosine of latitude
sinlat = sin(*(inp->lat+i));
coslat = cos(*(inp->lat+i));
// Compute radius of curvature
R_E = a / sqrt( 1.0 - e2*sinlat*sinlat );
// Compute sine and cosine of longitude
sinlon = sin(*(inp->lon));
coslon = cos(*(inp->lon));
// Compute rectangular coordinates
x = ( R_E + *(inp->h+i) ) * coslat * coslon;
y = ( R_E + *(inp->h+i) ) * coslat * sinlon;
z = ( R_E*(1.0-e2) + *(inp->h+i) ) * sinlat;
// Compute sqrt( x^2 + y^2 )
R_E = sqrt( x*x + y*y );
// Derive radial distance
*(inp->r+i) = sqrt( R_E * R_E + z*z );
// Derive spherical latitude
if (R_E < 1) {
if (z > 0) { *(inp->phi+i) = M_PI/2.0; }
else { *(inp->phi+i) = -M_PI/2.0; }
}
else {
*(inp->phi+i) = asin( z / *(inp->r+i) );
}
}
// Return from function
return 0;
}
Finally, the gravitational potential is computed within is own (sub-)function. This is the core part of the code, which is more or less the same as for the MATLAB C-MEX function. The only difference seems to be that before (in MATLAB MEX) everything was defined as (simple) double variables - now the variables are located inside a structure which contains pointers.
int gravpot(struct comp_struct *inp, struct ggm_struct *ggm, int *nmax,
int *mmax, int *lgrid, struct func_struct *out)
{
// Declare variables
double GMr, ar, t, u, u2, arn, gnm, hnm, P, Pp1, Pp2, msum;
double Pmm[*nmax+1], CPnm[*mmax+1], SPnm[*mmax+1];
int i, j, n, m, id;
// Allocate memory
out->rval = (double*) malloc(inp->nlat*inp->nlon*sizeof(double));
if (out->rval== NULL){
printf("Error: Memory for OUTPUT (out.rval) not allocated!");
return -ENOMEM;
}
/* Compute sectorial values of associated Legendre polynomials ========== */
// Define seed values ( divided by u^m )
Pmm[0] = sfac;
Pmm[1] = sqrt3 * sfac;
// Compute sectorial values, [1] eq. 13 and 28 ( divided by u^m )
for (m = 2; m <= *nmax; m++) {
Pmm[m] = sqrt( (2.0*m+1.0) / (2.0*m) ) * Pmm[m-1];
}
/* ====================================================================== */
/* Loop over latitude =================================================== */
for (i = 0; i < inp->nlat; i++) {
// Compute ratios to be used in summation
GMr = ggm->GM / *(inp->r+i);
ar = ggm->R / *(inp->r+i);
/* ---------------------------------------------------------------------
* Compute product of Legendre values and spherical harmonic coefficients.
* Products of similar degree are summed together, resulting in mmax
* values. The degree terms are latitude dependent, such that these mmax
* sums are valid for every point with the same latitude.
* The values of the associated Legendre polynomials, Pnm, are scaled by
* sfac = 10^(-280) and divided by u^m in order to prevent underflow and
* overflow of the coefficients.
* ------------------------------------------------------------------ */
// Form coefficients for Legendre recursive algorithm
t = sin(*(inp->phi+i));
u = cos(*(inp->phi+i));
u2 = u * u;
arn = ar;
/* Degree n = 0 terms ----------------------------------------------- */
// Compute order m = 0 term (S term is zero)
CPnm[0] = Pmm[0] * *(ggm->C);
/* Degree n = 1 terms ----------------------------------------------- */
// Compute (1,1) terms, [1] eq. 3
CPnm[1] = ar * Pmm[1] * *(ggm->C+2);
SPnm[1] = ar * Pmm[1] * *(ggm->S+2);
// Compute (1,0) Legendre value, [1] eq. 18 and 27
P = t * Pmm[1];
// Add (1,0) terms to sum (S term is zero), [1] eq. 3
CPnm[0] = CPnm[0] + ar * P * *(ggm->C+1);
/* Degree n = [2,n_max] --------------------------------------------- */
for (n = 2; n <= *nmax; n++) {
// Compute power term
arn = arn * ar;
/* Compute sectorial (m=n) terms ++++++++++++++++++++++++++++++++ */
// Extract associated Legendre value
Pp1 = Pmm[n];
// Compute product terms, [1] eq. 3
if (n <= *mmax) {
id = (n+1)*n/2 + n;
CPnm[n] = arn * Pp1 * *(ggm->C+id);
SPnm[n] = arn * Pp1 * *(ggm->S+id);
}
/* Compute first non-sectorial terms (m=n-1) ++++++++++++++++++++ */
// Compute associated Legendre value, [1] eq. 18 and 27
gnm = sqrt( 2.0*n );
P = gnm * t * Pp1;
// Add terms to summation, eq. 3 in [1]
if (n-1 <= *mmax) {
id = (n+1)*n/2 + n - 1;
CPnm[n-1] = CPnm[n-1] + arn * P * *(ggm->C+id);
SPnm[n-1] = SPnm[n-1] + arn * P * *(ggm->S+id);
}
/* Compute terms of order m = [n-2,1] +++++++++++++++++++++++++++ */
for (m = n-2; m > 0; m--) {
// Set previous values
Pp2 = Pp1;
Pp1 = P;
// Compute associated Legendre value, [1] eq. 18, 19 and 27
gnm = 2.0*(m+1.0) / sqrt( (n-m)*(n+m+1.0) );
hnm = sqrt( (n+m+2.0)*(n-m-1.0)/(n-m)/(n+m+1.0) );
P = gnm * t * Pp1 - hnm * u2 * Pp2;
// Add product terms to summation, eq. 3 in [1]
if (m <= *mmax) {
id = (n+1)*n/2 + m;
CPnm[m] = CPnm[m] + arn * P * *(ggm->C+id);
SPnm[m] = SPnm[m] + arn * P * *(ggm->S+id);
}
}
/* Compute zonal terms (m=0) ++++++++++++++++++++++++++++++++++++ */
// Compute associated Legendre value, [1] eq. 18, 19 and 27
gnm = 2.0 / sqrt( n*(n+1.0) );
hnm = sqrt( (n+2.0)*(n-1.0)/n/(n+1.0) );
P = ( gnm * t * P - hnm * u2 * Pp1 ) / sqrt2;
// Add terms to summation (S term is zero), [1] eq. 3
id = (n+1)*n/2;
CPnm[0] = CPnm[0] + arn * P * *(ggm->C+id);
} /* ---------------------------------------------------------------- */
/* Loop over longitude ============================================== */
for (j = 0; j < inp->nlon; j++) {
/* -----------------------------------------------------------------
* All associated Legendre polynomials (latitude dependent) are now
* computed and multiplied by the corresponding spherical harmonic
* coefficient. These products are scaled by u^m, meaning that
* Horner's scheme is used in the following summation.
* -------------------------------------------------------------- */
// Initialise order-dependent sum
msum = 0.0;
// Derive longitude id
id = j + i * *lgrid;
// Loop over order (m > 0)
for (m = *mmax; m > 0; m--) {
// Add to order-dependent sum using Horner's scheme, [1] eq. 2, 3 and 31
msum = ( msum + cos( m * *(inp->lon+id) ) * CPnm[m]
+ sin( m * *(inp->lon+id) ) * SPnm[m] ) * u;
}
// Add zero order term to sum
msum = msum + CPnm[0];
// Rescale value into gravitational potential, [1] eq. 1
*(out->rval+i+j*inp->nlat) = GMr * msum / sfac;
} /* ================================================================ */
} /* ==================================================================== */
// Return from function
return 0;
}
Again, any help is greatly appreciated and additional information can be supplied if relevant, but this already became a long post. I have a hard time accepting that pure c-code runs slower than the MATLAB C-MEX code.

To put it simply, yes, pointers can prevent some compiler optimizations resulting in a potential slow down. At least, this is clearly the case with ICC and a bit with GCC. The performance of the program is strongly impacted by pointer aliasing and vectorization.
Indeed, the compiler cannot easily know if the provided pointers alias each other or alias with the address with some fields of the provided data structure. As a result, the compilers tends to be conservative and assume that the pointed value can change at any time and reload them often. This can prevent optimizations like the splitting of some loops in gravpot (with GCC -- see line 119 of this modified code). Moreover, indirections and aliasing tends to prevent the vectorization of the hot loops (ie. the use of SIMD instructions provided by the target processor). Vectorisation can strongly impact the performance of a code.
To give an example, here is the initial code of geo2sph and here is a slightly modified implementation. In the first case, ICC generate a slow scalar implementation, while in the second case, ICC generate a significantly faster vectorized implementation. The only difference between the two implementation is the use of the restrict keyword. This keyword tell to the compiler that for the lifetime of the pointer, only the pointer itself or a value directly derived from it (such as pointer+1) will be used to access the object to which it points (see here for more information). Note that the use of the restrict keyword is dangerous and one should be very careful while using it since the compiler may generate a bad code if the restrict hint is wrong (very hard to debug). Alternatively, you can help the compiler to generate a vectorized code using the OpenMP SIMD directive #pragma omp simd (see here for the result). Note that you should be sure the target code can be safely vectorized (eg. iterations of must be independent).

Related

Logistic regression code stops working above ~43,500 generated observations

Having some difficulty troubleshooting code I wrote in C to perform a logistic regression. While it seems to work on smaller, semi-randomized datasets, it stops working (e.g. assigning proper probabilities of belonging to class 1) at around the point where I pass 43,500 observations (determined by tweaking the number of observations created. When creating the 150 features used in the code, I do create the first two as a function of the number of observations, so I'm not sure if maybe that's the issue here, though I am using double precision. Maybe there's an overflow somewhere in the code?
The below code should be self-contained; it generates m=50,000 observations with n=150 features. Setting m below 43,500 should return "Percent class 1: 0.250000", setting to 44,000 or above will return "Percent class 1: 0.000000", regardless of what max_iter (number of times we sample m observations) is set to.
The first feature is set to 1.0 divided by the total number of observations, if class 0 (first 75% of observations), or the index of the observation divided by the total number of observations otherwise.
The second feature is just index divided by total number of observations.
All other features are random.
The logistic regression is intended to use stochastic gradient descent, randomly selecting an observation index, computing the gradient of the loss with the predicted y using current weights, and updating weights with the gradient and learning rate (eta).
Using the same initialization with Python and NumPy, I still get the proper results, even above 50,000 observations.
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
// Compute z = w * x + b
double dlc( int n, double *X, double *coef, double intercept )
{
double y_pred = intercept;
for (int i = 0; i < n; i++)
{
y_pred += X[i] * coef[i];
}
return y_pred;
}
// Compute y_hat = 1 / (1 + e^(-z))
double sigmoid( int n, double alpha, double *X, double *coef, double beta, double intercept )
{
double y_pred;
y_pred = dlc(n, X, coef, intercept);
y_pred = 1.0 / (1.0 + exp(-y_pred));
return y_pred;
}
// Stochastic gradient descent
void sgd( int m, int n, double *X, double *y, double *coef, double *intercept, double eta, int max_iter, int fit_intercept, int random_seed )
{
double *gradient_coef, *X_i;
double y_i, y_pred, resid;
int idx;
double gradient_intercept = 0.0, alpha = 1.0, beta = 1.0;
X_i = (double *) malloc (n * sizeof(double));
gradient_coef = (double *) malloc (n * sizeof(double));
for ( int i = 0; i < n; i++ )
{
coef[i] = 0.0;
gradient_coef[i] = 0.0;
}
*intercept = 0.0;
srand(random_seed);
for ( int epoch = 0; epoch < max_iter; epoch++ )
{
for ( int run = 0; run < m; run++ )
{
// Randomly sample an observation
idx = rand() % m;
for ( int i = 0; i < n; i++ )
{
X_i[i] = X[n*idx+i];
}
y_i = y[idx];
// Compute y_hat
y_pred = sigmoid( n, alpha, X_i, coef, beta, *intercept );
resid = -(y_i - y_pred);
// Compute gradients and adjust weights
for (int i = 0; i < n; i++)
{
gradient_coef[i] = X_i[i] * resid;
coef[i] -= eta * gradient_coef[i];
}
if ( fit_intercept == 1 )
{
*intercept -= eta * resid;
}
}
}
}
int main(void)
{
double *X, *y, *coef, *y_pred;
double intercept;
double eta = 0.05;
double alpha = 1.0, beta = 1.0;
long m = 50000;
long n = 150;
int max_iter = 20;
long class_0 = (long)(3.0 / 4.0 * (double)m);
double pct_class_1 = 0.0;
clock_t test_start;
clock_t test_end;
double test_time;
printf("Constructing variables...\n");
X = (double *) malloc (m * n * sizeof(double));
y = (double *) malloc (m * sizeof(double));
y_pred = (double *) malloc (m * sizeof(double));
coef = (double *) malloc (n * sizeof(double));
// Initialize classes
for (int i = 0; i < m; i++)
{
if (i < class_0)
{
y[i] = 0.0;
}
else
{
y[i] = 1.0;
}
}
// Initialize observation features
for (int i = 0; i < m; i++)
{
if (i < class_0)
{
X[n*i] = 1.0 / (double)m;
}
else
{
X[n*i] = (double)i / (double)m;
}
X[n*i + 1] = (double)i / (double)m;
for (int j = 2; j < n; j++)
{
X[n*i + j] = (double)(rand() % 100) / 100.0;
}
}
// Fit weights
printf("Running SGD...\n");
test_start = clock();
sgd( m, n, X, y, coef, &intercept, eta, max_iter, 1, 42 );
test_end = clock();
test_time = (double)(test_end - test_start) / CLOCKS_PER_SEC;
printf("Time taken: %f\n", test_time);
// Compute y_hat and share of observations predicted as class 1
printf("Making predictions...\n");
for ( int i = 0; i < m; i++ )
{
y_pred[i] = sigmoid( n, alpha, &X[i*n], coef, beta, intercept );
}
printf("Printing results...\n");
for ( int i = 0; i < m; i++ )
{
//printf("%f\n", y_pred[i]);
if (y_pred[i] > 0.5)
{
pct_class_1 += 1.0;
}
// Troubleshooting print
if (i < 10 || i > m - 10)
{
printf("%g\n", y_pred[i]);
}
}
printf("Percent class 1: %f", pct_class_1 / (double)m);
return 0;
}
For reference, here is my (presumably) equivalent Python code, which returns the correct percent of identified classes at more than 50,000 observations:
import numpy as np
import time
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class LogisticRegressor:
def __init__(self, eta, init_runs, fit_intercept=True):
self.eta = eta
self.init_runs = init_runs
self.fit_intercept = fit_intercept
def fit(self, x, y):
m, n = x.shape
self.coef = np.zeros((n, 1))
self.intercept = np.zeros((1, 1))
for epoch in range(self.init_runs):
for run in range(m):
idx = np.random.randint(0, m)
x_i = x[idx:idx+1, :]
y_i = y[idx]
y_pred_i = sigmoid(x_i.dot(self.coef) + self.intercept)
gradient_w = -(x_i.T * (y_i - y_pred_i))
self.coef -= self.eta * gradient_w
if self.fit_intercept:
gradient_b = -(y_i - y_pred_i)
self.intercept -= self.eta * gradient_b
def predict_proba(self, x):
m, n = x.shape
y_pred = np.ones((m, 2))
y_pred[:,1:2] = sigmoid(x.dot(self.coef) + self.intercept)
y_pred[:,0:1] -= y_pred[:,1:2]
return y_pred
def predict(self, x):
return np.round(sigmoid(x.dot(self.coef) + self.intercept))
m = 50000
n = 150
class1 = int(3.0 / 4.0 * m)
X = np.random.rand(m, n)
y = np.zeros((m, 1))
for obs in range(m):
if obs < class1:
continue
else:
y[obs,0] = 1
for obs in range(m):
if obs < class1:
X[obs, 0] = 1.0 / float(m)
else:
X[obs, 0] = float(obs) / float(m)
X[obs, 1] = float(obs) / float(m)
logit = LogisticRegressor(0.05, 20)
start_time = time.time()
logit.fit(X, y)
end_time = time.time()
print(round(end_time - start_time, 2))
y_pred = logit.predict(X)
print("Percent:", y_pred.sum() / len(y_pred))

The issue is here:
// Randomly sample an observation
idx = rand() % m;
... in light of the fact that the OP's RAND_MAX is 32767. This is exacerbated by the fact that all of the class 0 observations are at the end.
All samples will be drawn from the first 32768 observations, and when the total number of observations is greater than that, the proportion of class 0 observations among those that can be sampled is less than 0.25. At 43691 total observations, there are no class 0 observations among those that can be sampled.
As a secondary issue, rand() % m does not yield a wholly uniform distribution if m does not evenly divide RAND_MAX + 1, though the effect of this issue will be much more subtle.
Bottom line: you need a better random number generator.
At minimum, you could consider combining the bits from two calls to rand() to yield an integer with sufficient range, but you might want to consider getting a third-party generator. There are several available.

Note: OP reports "m=50,000 observations with n=150 features.", so perhaps this is not the issue for OP, but I'll leave this answer up for reference when OP tries larger tasks.
A potential issue:
long overflow
m * n * sizeof(double) risks overflow when long is 32-bit and m*n > LONG_MAX (or about 46,341 if m, n are the same).
OP does report
A first step is to perform the multiplication using size_t math where we gain at least 1 more bit in the calculation.
// m * n * sizeof(double)
sizeof(double) * m * n
Yet unless OP's size_t is more than 32-bit, we still have trouble.
IAC, I recommend to use size_t for array sizing and indexing.
Check allocations for failure too.

Since RAND_MAX may be too small and array indexing should be done using size_t math, consider a helper function to generate a random index over the entire size_t range.
// idx = rand() % m;
size_t idx = rand_size_t() % (size_t)m;
If stuck with the standard rand(), below is a helper function to extend its range as needed.
It uses the real nifty IMAX_BITS(m).
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
// https://stackoverflow.com/a/4589384/2410359
/* Number of bits in inttype_MAX, or in any (1<<k)-1 where 0 <= k < 2040 */
#define IMAX_BITS(m) ((m)/((m)%255+1) / 255%255*8 + 7-86/((m)%255+12))
// Test that RAND_MAX is a power of 2 minus 1
_Static_assert((RAND_MAX & 1) && ((RAND_MAX/2 + 1) & (RAND_MAX/2)) == 0, "RAND_MAX is not a Mersenne number");
#define RAND_MAX_WIDTH (IMAX_BITS(RAND_MAX))
#define SIZE_MAX_WIDTH (IMAX_BITS(SIZE_MAX))
size_t rand_size_t(void) {
size_t index = (size_t) rand();
for (unsigned i = RAND_MAX_WIDTH; i < SIZE_MAX_WIDTH; i += RAND_MAX_WIDTH) {
index <<= RAND_MAX_WIDTH;
index ^= (size_t) rand();
}
return index;
}
Further considerations can replace the rand_size_t() % (size_t)m with a more uniform distribution.

As has been determined elsewhere, the problem is due to the implementation's RAND_MAX value being too small.
Assuming 32-bit ints, a slightly better PRNG function can be implemented in the code, such as this C implementation of the minstd_rand() function from C++:
#define MINSTD_RAND_MAX 2147483646
// Code assumes `int` is at least 32 bits wide.
static unsigned int minstd_seed = 1;
static void minstd_srand(unsigned int seed)
{
seed %= 2147483647;
// zero seed is bad!
minstd_seed = seed ? seed : 1;
}
static int minstd_rand(void)
{
minstd_seed = (unsigned long long)minstd_seed * 48271 % 2147483647;
return (int)minstd_seed;
}
Another problem is that expressions of the form rand() % m produce a biased result when m does not divide (unsigned int)RAND_MAX + 1. Here is an unbiased function that returns a random integer from 0 to le inclusive, making use of the minstd_rand() function defined earlier:
static int minstd_rand_max(int le)
{
int r;
if (le < 0)
{
r = le;
}
else if (le >= MINSTD_RAND_MAX)
{
r = minstd_rand();
}
else
{
int rm = MINSTD_RAND_MAX - le + MINSTD_RAND_MAX % (le + 1);
while ((r = minstd_rand()) > rm)
{
}
r /= (rm / (le + 1) + 1);
}
return r;
}
(Actually, it does still have a very small bias because minstd_rand() will never return 0.)
For example, replace rand() % 100 with minstd_rand_max(99), and replace rand() % m with minstd_rand_max(m - 1). Also replace srand(random_seed) with minstd_srand(random_seed).

VSCode exiting C code with code=3221225477

I wrote the C program below using VSCode on my old 64 bit Windows 10 Enterprise computer. It worked perfectly when I ran it on there, and it also works perfectly when I run it on this online C compiler. However, when I try to run it on VSCode on my new 64 bit Windows 11 Home computer, it exits with code=3221225477. Does anyone know what I should do about this? Thanks.
What I've tried so far:
I've tried writing programs to test each of the functions in it and they all ran perfectly fine on VSCode on my new computer; I'm only getting problems when I run the entire program
Because my new computer has Norton installed on it, it likes to think that some of my C programs are viruses and stops them from being executed. However, I made a folder that I have told it not to interfere with and copied my program there. I still got code=3221225477 when I ran it.
/*
File name : triangulation.c
Author : kene02
Last modified : 20/12/2021
License : All rights reserved
Description:
Uses triangulation to find where a point is.
*/
#define _USE_MATH_DEFINES
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
// Function declarations
float distance_between(float *point_1, float *point_2);
float find_gradient(float *point_1, float *point_2);
char *bearing(float *point_1, float *point_2);
float *triangulate(float *a, float *b, float *c, float *d);
int main()
{
// Allocating memory for coordinates
float *point_a = (float *)malloc(2*sizeof(float));
float *point_b = (float *)malloc(2*sizeof(float));
float *point_c = (float *)malloc(2*sizeof(float));
float *point_d = (float *)malloc(2*sizeof(float));
point_a[0] = 328;
point_a[1] = -1445;
point_b[0] = 325;
point_b[1] = -1455;
point_c[0] = 286;
point_c[1] = -1567;
point_d[0] = 292;
point_d[1] = -1575;
// Print coordinates
printf("\nA(%.1f, %.1f)\t", point_a[0], point_a[1]);
printf("B(%.1f, %.1f)\n", point_b[0], point_b[1]);
printf("C(%.1f, %.1f)\t", point_c[0], point_c[1]);
printf("D(%.1f, %.1f)\n\n", point_d[0], point_d[1]);
// Evaluate point of intersection
float *point_x = triangulate(point_a, point_b, point_c, point_d);
// Evaluate bearings
char *bng_ba = bearing(point_b, point_a);
char *bng_dc = bearing(point_d, point_c);
char *bng_xa = bearing(point_x, point_a);
char *bng_xc = bearing(point_x, point_c);
if (strcmp(bng_ba, bng_xa) != 0 || strcmp(bng_dc, bng_xc) != 0) {
printf("Lines do not intersect.\n\n");
} else {
float length_xa = distance_between(point_x, point_a);
float length_xc = distance_between(point_x, point_c);
printf("Point of intersection\t\t\t: ");
printf("X(%.1f, %.1f)\n", point_x[0], point_x[1]);
printf("Distance and bearing of X from A\t: ");
printf("%.1f units, %s\n", length_xa, bng_xa);
printf("Distance and bearing of X from C\t: ");
printf("%.1f units, %s\n\n", length_xc, bng_xc);
}
return 0;
}
/**
* #brief distance_between function: finds the distance between two points.
*
* #param point_1 the (x, y) coordinates of the 1st point
* #param point_2 the (x, y) coordinates of the 2nd point
* #return the distance between the two points
*/
float distance_between(float *point_1, float *point_2)
{
float x_diff = point_2[0] - point_1[0];
float y_diff = point_2[1] - point_1[1];
float distance = sqrt(pow(x_diff, 2) + pow(y_diff, 2));
return distance;
}
/**
* #brief find_gradient function: finds the gradient of the line between two
* points.
*
* #param point_1 the (x, y) coordinates of the 1st point
* #param point_2 the (x, y) coordinates of the 2nd point
* #return the gradient of the line between the two points
*/
float find_gradient(float *point_1, float *point_2)
{
float x_diff = point_2[0] - point_1[0];
float y_diff = point_2[1] - point_1[1];
float gradient = y_diff/x_diff;
return gradient;
}
/**
* #brief traingulate function: finds the point of intersection of the line
* passing through points A & B and the line passing though points C & D.
*
* #param a the coordinates of point A
* #param b the coordinates of point B
* #param c the coordinates of point C
* #param d the coordinates of point D
* #return the point at which the lines intersect
*/
float *triangulate(float *a, float *b, float *c, float *d)
{
float *intersect = (float *)malloc(2*sizeof(float));
float g1 = find_gradient(a, b);
float g2 = find_gradient(c, d);
if (a[0] == b[0]) {
intersect[0] = a[0];
intersect[1] = c[1] + (a[0]-c[0])*g2;
} else if (c[0] == d[0]) {
intersect[0] = c[0];
intersect[1] = a[1] - (a[0]-c[0])*g1;
} else {
intersect[0] = ((a[1]-c[1]) + c[0]*g2 - a[0]*g1)/(g2-g1);
intersect[1] = (a[1]*g2 - c[1]*g1 + (c[0]-a[0])*g2*g1)/(g2-g1);
}
return intersect;
}
/**
* #brief bearing function: finds the compass bearing of point_1 from point_2
*
* #param point_1 the coordinates of the point to find the compass bearing of
* relative to point_2
* #param point_2 the coordinates of the reference point from which the compass
* bearing of point_1 will be measured
* #return the compass bearing of point_1 from point_2
*/
char *bearing(float *point_1, float *point_2)
{
const float RAD_DEG_RATIO = 180/M_PI;
float x_diff = point_1[0] - point_2[0];
float y_diff = point_1[1] - point_2[1];
float gradient = y_diff/x_diff;
char *bearing = (char *)malloc(7*sizeof(char));
if (x_diff == 0 && y_diff > 0) {
sprintf(bearing, "N");
} else if (x_diff == 0 && y_diff < 0) {
sprintf(bearing, "S");
} else if (x_diff > 0 && y_diff == 0) {
sprintf(bearing, "E");
} else if (x_diff < 0 && y_diff == 0) {
sprintf(bearing, "W");
} else if (x_diff > 0 && y_diff > 0) {
float angle = 90 - RAD_DEG_RATIO*atan(gradient);
sprintf(bearing, "N %.2f E", angle);
} else if (x_diff < 0 && y_diff > 0) {
float angle = 90 + RAD_DEG_RATIO*atan(gradient);
sprintf(bearing, "N %.2f W", angle);
} else if (x_diff < 0 && y_diff < 0) {
float angle = 90 - RAD_DEG_RATIO*atan(gradient);
sprintf(bearing, "S %.2f W", angle);
} else if (x_diff > 0 && y_diff < 0) {
float angle = 90 + RAD_DEG_RATIO*atan(gradient);
sprintf(bearing, "S %.2f E", angle);
}
return bearing;
}

3221225477 decimal is gibberish. Translated to hex you get 0xC0000005, which is Windows' error code for access violation (also known as "seg fault"). The most likely cause is array out of bounds access.
The bug is here: malloc(7*sizeof(char)); creates too little memory to contain what your sprintf might produce. This can only hold 6 characters + the mandatory null terminator. Memory is cheap, so change this for something with margins like:
char *bearing = malloc(20);
Why you keep using malloc all over the place to allocate tiny amounts of memory, I have no idea. All you achieve with that is to slow down everything and create potential for memory leaks.
It's better practice to have your function work with caller allocated memory whenever possible. It is not good design to mix up memory allocation and the actual algorithm in the same function.

Solving a coupled differential equations system using time splitting

/******************************************************************************
Online C Compiler.
Code, Compile, Run and Debug C program online.
Write your code in this editor and press "Run" button to compile and execute it.
*******************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#define PI 3.141592
void read_input(double *D, double *L, int *nx, double *t_F);
double main(void) {
/******************************/
/* Declarations of parameters */
/******************************/
/* Number of grid points */
int nx;
/* Length of domain */
double L;
/* Equation coefficients */
double D;
/* Length of time to run simulation. */
double t_F;
/* Read in from file; */
read_input(&D, &L, &nx, &t_F);
/* Grid spacing */
double dx = L/nx;
double invdx2 = 1.0/(dx*dx);
/* Time step */
double dt = 0.25/invdx2; // changed to 0.25/dx^2 to satisfy the stability condition
/************************************************/
/* Solution Storage at Current / Next time step */
/************************************************/
double *uc, *un, *vc, *vn;
/* Time splitting solutions */
double *uts1, *uts2, *vts1, *vts2;
/* Derivative used in finite difference */
double deriv;
/* Allocate memory according to size of nx */
uc = malloc(nx * sizeof(double));
un = malloc(nx * sizeof(double));
vc = malloc(nx * sizeof(double));
vn = malloc(nx * sizeof(double));
uts1 = malloc(nx * sizeof(double));
uts2 = malloc(nx * sizeof(double));
vts1 = malloc(nx * sizeof(double));
vts2 = malloc(nx * sizeof(double));
/* Check the allocation pointers */
if (uc==NULL||un==NULL||vc==NULL||vn==NULL||uts1==NULL||
uts2==NULL||vts1==NULL||vts2==NULL) {
printf("Memory allocation failed\n");
return 1;
}
int k;
double x;
/* Current time */
double ctime;
/* Initialise arrays */
for(k = 0; k < nx; k++) {
x = k*dx;
uc[k] = 1.0 + sin(2.0*PI*x/L);
vc[k] = 0.0;
/* Set other arrays to 0 */
uts1[k] = 0; uts2[k] = 0;
vts1[k] = 0; vts2[k] = 0;
}
/* Loop over timesteps */
while (ctime < t_F){
/* Rotation factors for time-splitting scheme. */
double cfac = cos(dt); //changed from 2*dt to dt
double sfac = sin(dt);
/* First substep for diffusion equation, A_1 */
for (k = 0; k < nx; k++) {
x = k*dx;
/* Diffusion at half time step. */
deriv = (uc[k-1] + uc[k+1] - 2*uc[k])*invdx2 ;
uts1[k] = uc[k] + (D * deriv + vc[k])* 0.5*dt; //
deriv = (vc[k-1] + vc[k+1] - 2*vc[k])*invdx2;
vts1[k] = vc[k] + (D * deriv - uc[k]) * 0.5*dt;
}
/* Second substep for decay/growth terms, A_2 */
for (k = 0; k < nx; k++) {
x = k*dx;
/* Apply rotation matrix to u and v, */
uts2[k] = cfac*uts1[k] + sfac*vts1[k];
vts2[k] = -sfac*uts1[k] + cfac*vts1[k];
}
/* Third substep for diffusion terms, A_1 */
for (k = 0; k < nx; k++) {
x = k*dx;
deriv = (uts2[k-1] + uts2[k+1] - 2*uts2[k])*invdx2;
un[k] = uts2[k] + (D * deriv + vts2[k]) * 0.5*dt;
deriv = (vts2[k-1] + vts2[k+1] - 2*vts2[k])*invdx2;
vn[k] = vts2[k] + (D * deriv - uts2[k]) * 0.5*dt;
}
/* Copy next values at timestep to u, v arrays. */
memcpy(uc,un, sizeof(double) * nx);
memcpy(vc,vn, sizeof(double) * nx);
/* Increment time. */
ctime += dt;
for (k = 0; k < nx; k++ ) {
x = k*dx;
printf("%g %g %g %g\n",ctime,x,uc[k],vc[k]);
}
}
/* Free allocated memory */
free(uc); free(un);
free(vc); free(vn);
free(uts1); free(uts2);
free(vts1); free(vts2);
return 0;
}
// The lines below don't contain any bugs! Don't modify them
void read_input(double *D, double *L, int *nx, double *t_F) {
FILE *infile;
if(!(infile=fopen("input.txt","r"))) {
printf("Error opening file\n");
exit(1);
}
if(4!=fscanf(infile,"%lf %lf %d %lf",D,L,nx,t_F)) {
printf("Error reading parameters from file\n");
exit(1);
}
fclose(infile);
}
So this is the code. It is meant to solve the following differential equations:
du/dt - Dd^2u/dx^2 - v = 0
dv/dt - Dd^2v/dx^2 + u = 0
It splits the equations into two parts. The second x derivative part(A1) and the decay part which contains u and v(A2) . It uses two half steps(0.5dt) for A1 and 1 full step of dt for A2. I know how to do time splitting but i dont know whether i have done it correctly here.
This is for an assignment and i have fixed all the errors and i am just trying to make the code work as intended. I have never had to solve something similar to this so i am definitely very stuck right now. The solution converges but i think its wrong. Any ideas why? Am not looking for someone to outright tell me what am doing wrong, just guide me in the right direction if you know what i mean.
PS: When i compile the code with gcc i get a warning about double main(void). Why might that be?

Implementing equations with very small numbers in C - Plank's Law generating blackbody

I have a problem that, after much head scratching, I think is to do with very small numbers in a long-double.
I am trying to implement Planck's law equation to generate a normalised blackbody curve at 1nm intervals between a given wavelength range and for a given temperature. Ultimately this will be a function accepting inputs, for now it is main() with the variables fixed and outputting by printf().
I see examples in matlab and python, and they are implementing the same equation as me in a similar loop with no trouble at all.
This is the equation:
My code generates an incorrect blackbody curve:
I have tested key parts of the code independently. After trying to test the equation by breaking it into blocks in excel I noticed that it does result in very small numbers and I wonder if my implementation of large numbers could be causing the issue? Does anyone have any insight into using C to implement equations? This a new area to me and I have found the maths much harder to implement and debug than normal code.
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
//global variables
const double H = 6.626070040e-34; //Planck's constant (Joule-seconds)
const double C = 299800000; //Speed of light in vacume (meters per second)
const double K = 1.3806488e-23; //Boltzmann's constant (Joules per Kelvin)
const double nm_to_m = 1e-6; //conversion between nm and m
const int interval = 1; //wavelength interval to caculate at (nm)
//typedef structure to hold results
typedef struct {
int *wavelength;
long double *radiance;
long double *normalised;
} results;
int main() {
int min = 100 , max = 3000; //wavelength bounds to caculate between, later to be swaped to function inputs
double temprature = 200; //temprature in kelvin, later to be swaped to function input
double new_valu, old_valu = 0;
static results SPD_data, *SPD; //setup a static results structure and a pointer to point to it
SPD = &SPD_data;
SPD->wavelength = malloc(sizeof(int) * (max - min)); //allocate memory based on wavelength bounds
SPD->radiance = malloc(sizeof(long double) * (max - min));
SPD->normalised = malloc(sizeof(long double) * (max - min));
for (int i = 0; i <= (max - min); i++) {
//Fill wavelength vector
SPD->wavelength[i] = min + (interval * i);
//Computes radiance for every wavelength of blackbody of given temprature
SPD->radiance[i] = ((2 * H * pow(C, 2)) / (pow((SPD->wavelength[i] / nm_to_m), 5))) * (1 / (exp((H * C) / ((SPD->wavelength[i] / nm_to_m) * K * temprature))-1));
//Copy SPD->radiance to SPD->normalised
SPD->normalised[i] = SPD->radiance[i];
//Find largest value
if (i <= 0) {
old_valu = SPD->normalised[0];
} else if (i > 0){
new_valu = SPD->normalised[i];
if (new_valu > old_valu) {
old_valu = new_valu;
}
}
}
//for debug perposes
printf("wavelength(nm) radiance(Watts per steradian per meter squared) normalised radiance\n");
for (int i = 0; i <= (max - min); i++) {
//Normalise SPD
SPD->normalised[i] = SPD->normalised[i] / old_valu;
//for debug perposes
printf("%d %Le %Lf\n", SPD->wavelength[i], SPD->radiance[i], SPD->normalised[i]);
}
return 0; //later to be swaped to 'return SPD';
}
/*********************UPDATE Friday 24th Mar 2017 23:42*************************/
Thank you for the suggestions so far, lots of useful pointers especially understanding the way numbers are stored in C (IEEE 754) but I don't think that is the issue here as it only applies to significant digits. I implemented most of the suggestions but still no progress on the problem. I suspect Alexander in the comments is probably right, changing the units and order of operations is likely what I need to do to make the equation work like the matlab or python examples, but my knowledge of maths is not good enough to do this. I broke the equation down into chunks to take a closer look at what it was doing.
//global variables
const double H = 6.6260700e-34; //Planck's constant (Joule-seconds) 6.626070040e-34
const double C = 299792458; //Speed of light in vacume (meters per second)
const double K = 1.3806488e-23; //Boltzmann's constant (Joules per Kelvin) 1.3806488e-23
const double nm_to_m = 1e-9; //conversion between nm and m
const int interval = 1; //wavelength interval to caculate at (nm)
const int min = 100, max = 3000; //max and min wavelengths to caculate between (nm)
const double temprature = 200; //temprature (K)
//typedef structure to hold results
typedef struct {
int *wavelength;
long double *radiance;
long double *normalised;
} results;
//main program
int main()
{
//setup a static results structure and a pointer to point to it
static results SPD_data, *SPD;
SPD = &SPD_data;
//allocate memory based on wavelength bounds
SPD->wavelength = malloc(sizeof(int) * (max - min));
SPD->radiance = malloc(sizeof(long double) * (max - min));
SPD->normalised = malloc(sizeof(long double) * (max - min));
//break equasion into visible parts for debuging
long double aa, bb, cc, dd, ee, ff, gg, hh, ii, jj, kk, ll, mm, nn, oo;
for (int i = 0; i < (max - min); i++) {
//Computes radiance at every wavelength interval for blackbody of given temprature
SPD->wavelength[i] = min + (interval * i);
aa = 2 * H;
bb = pow(C, 2);
cc = aa * bb;
dd = pow((SPD->wavelength[i] / nm_to_m), 5);
ee = cc / dd;
ff = 1;
gg = H * C;
hh = SPD->wavelength[i] / nm_to_m;
ii = K * temprature;
jj = hh * ii;
kk = gg / jj;
ll = exp(kk);
mm = ll - 1;
nn = ff / mm;
oo = ee * nn;
SPD->radiance[i] = oo;
}
//for debug perposes
printf("wavelength(nm) | radiance(Watts per steradian per meter squared)\n");
for (int i = 0; i < (max - min); i++) {
printf("%d %Le\n", SPD->wavelength[i], SPD->radiance[i]);
}
return 0;
}
Equation variable values during runtime in xcode:

I notice a couple of things that are wrong and/or suspicious about the current state of your program:
You have defined nm_to_m as 10-9,, yet you divide by it. If your wavelength is measured in nanometers, you should multiply it by 10-9 to get it in meters. To wit, if hh is supposed to be your wavelength in meters, it is on the order of several light-hours.
The same is obviously true for dd as well.
mm, being the exponential expression minus 1, is zero, which gives you infinity in the results deriving from it. This is apparently because you don't have enough digits in a double to represent the significant part of the exponential. Instead of using exp(...) - 1 here, try using the expm1() function instead, which implements a well-defined algorithm for calculating exponentials minus 1 without cancellation errors.
Since interval is 1, it doesn't currently matter, but you can probably see that your results wouldn't match the meaning of the code if you set interval to something else.
Unless you plan to change something about this in the future, there shouldn't be a need for this program to "save" the values of all calculations. You could just print them out as you run them.
On the other hand, you don't seem to be in any danger of underflow or overflow. The largest and smallest numbers you use don't seem to be a far way from 10±60, which is well within what ordinary doubles can deal with, let alone long doubles. The being said, it might not hurt to use more normalized units, but at the magnitudes you currently display, I wouldn't worry about it.

Thanks for all the pointers in the comments. For anyone else running into a similar problem with implementing equations in C, I had a few silly errors in the code:
writing a 6 not a 9
dividing when I should be multiplying
an off by one error with the size of my array vs the iterations of for() loop
200 when I meant 2000 in the temperature variable
As a result of the last one particularly I was not getting the results I expected (my wavelength range was not right for plotting the temperature I was calculating) and this was leading me to the assumption that something was wrong in the implementation of the equation, specifically I was thinking about big/small numbers in C because I did not understand them. This was not the case.
In summary, I should have made sure I knew exactly what my equation should be outputting for given test conditions before implementing it in code. I will work on getting more comfortable with maths, particularly algebra and dimensional analysis.
Below is the working code, implemented as a function, feel free to use it for anything but obviously no warranty of any kind etc.
blackbody.c
//
// Computes radiance for every wavelength of blackbody of given temprature
//
// INPUTS: int min wavelength to begin calculation from (nm), int max wavelength to end calculation at (nm), int temperature (kelvin)
// OUTPUTS: pointer to structure containing:
// - spectral radiance (Watts per steradian per meter squared per wavelength at 1nm intervals)
// - normalised radiance
//
//include & define
#include "blackbody.h"
//global variables
const double H = 6.626070040e-34; //Planck's constant (Joule-seconds) 6.626070040e-34
const double C = 299792458; //Speed of light in vacuum (meters per second)
const double K = 1.3806488e-23; //Boltzmann's constant (Joules per Kelvin) 1.3806488e-23
const double nm_to_m = 1e-9; //conversion between nm and m
const int interval = 1; //wavelength interval to calculate at (nm), to change this line 45 also need to be changed
bbresults* blackbody(int min, int max, double temperature) {
double new_valu, old_valu = 0; //variables for normalising result
bbresults *SPD;
SPD = malloc(sizeof(bbresults));
//allocate memory based on wavelength bounds
SPD->wavelength = malloc(sizeof(int) * (max - min));
SPD->radiance = malloc(sizeof(long double) * (max - min));
SPD->normalised = malloc(sizeof(long double) * (max - min));
for (int i = 0; i < (max - min); i++) {
//Computes radiance for every wavelength of blackbody of given temperature
SPD->wavelength[i] = min + (interval * i);
SPD->radiance[i] = ((2 * H * pow(C, 2)) / (pow((SPD->wavelength[i] * nm_to_m), 5))) * (1 / (expm1((H * C) / ((SPD->wavelength[i] * nm_to_m) * K * temperature))));
//Copy SPD->radiance to SPD->normalised
SPD->normalised[i] = SPD->radiance[i];
//Find largest value
if (i <= 0) {
old_valu = SPD->normalised[0];
} else if (i > 0){
new_valu = SPD->normalised[i];
if (new_valu > old_valu) {
old_valu = new_valu;
}
}
}
for (int i = 0; i < (max - min); i++) {
//Normalise SPD
SPD->normalised[i] = SPD->normalised[i] / old_valu;
}
return SPD;
}
blackbody.h
#ifndef blackbody_h
#define blackbody_h
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
//typedef structure to hold results
typedef struct {
int *wavelength;
long double *radiance;
long double *normalised;
} bbresults;
//function declarations
bbresults* blackbody(int, int, double);
#endif /* blackbody_h */
main.c
#include <stdio.h>
#include "blackbody.h"
int main() {
bbresults *TEST;
int min = 100, max = 3000, temp = 5000;
TEST = blackbody(min, max, temp);
printf("wavelength | normalised radiance | radiance |\n");
printf(" (nm) | - | (W per meter squr per steradian) |\n");
for (int i = 0; i < (max - min); i++) {
printf("%4d %Lf %Le\n", TEST->wavelength[i], TEST->normalised[i], TEST->radiance[i]);
}
free(TEST);
free(TEST->wavelength);
free(TEST->radiance);
free(TEST->normalised);
return 0;
}
Plot of output:

Inverse filtering on OpenCV - accessing DFT values and multiplying DFT matrices

I am trying to perform an inverse and a pseudo-inverse filtering in the frequency domain.
However I am having trouble accessing DFT coefficients and multiplying DFT matrices afterwards, since I got complex numbers and, therefore, actually two matrices...
Basically the inverse filtering performs
F = G/H,
where F is the restored image, G is the blurred image and H is the kernel that blurred the image.
The pseudo-inverse needs to access the values in H, since if the value is near 0 it should be replaced in order to avoid problems in the restoration. For this we must change the H so that:
H(u,v) = 1/H(u,v) if H(u,v) > threshold
and = 0 otherwise
I have a kernel1 (h_1), and the images imf (restored) and img (blurred). Here is the code:
// compute the DFTs of the kernel (DFT_B) and the blurred image (DBF_A)
cvDFT( dft_A, dft_A, CV_DXT_FORWARD, complexInput1->height );
cvDFT( dft_B, dft_B, CV_DXT_FORWARD, complexInput2->height );
// the first type is the inverse fitlering
if (type == 1) {
printf("...performing inverse filtering\n");
// dividing the transforms
cvDiv(dft_A, dft_B, dft_C, 1);
}
// the second type is the pseudo-inverse filtering
else {
printf("...prepare kernel for pseudo-inverse filtering\n");
// will try to access the real values in order to see if value is above a threshold
cvSplit( dft_B, image_Re1, image_Im1, 0, 0 );
// pointers to access the data into the real and imaginary matrices
uchar * dRe1 = (uchar *)image_Re1->imageData;
uchar * dIm1 = (uchar *)image_Im1->imageData;
int width = image_Re1->width;
int height = image_Re1->height;
int step = image_Re1->widthStep;
image_Re2 = cvCreateImage(cvGetSize(image_Re1), IPL_DEPTH_32F, 1);
image_Im2 = cvCreateImage(cvGetSize(image_Im2), IPL_DEPTH_32F, 1);
// pointers to access the data into the real and imaginary matrices
// it will be the resulting pseudo-inverse filter
uchar * dRe2 = (uchar *)image_Re2->imageData;
uchar * dIm2 = (uchar *)image_Im2->imageData;
printf("...building kernel for pseudo-inverse filtering\n");
for ( i = 0; i < height; i++ ) {
for ( j = 0; j < width; j++ ) {
// generate the 1/H(i,j) value
if (dRe1[i * step + j] > threshold) {
float realsq = dRe1[i * step + j]*dRe1[i * step + j];
float imagsq = dIm1[i * step + j]*dIm1[i * step + j];
dRe2[i * step + j] = dRe1[i * step + j] / (realsq + imagsq);
dIm2[i * step + j] = -1 * (dIm1[i * step + j] / (realsq + imagsq));
}
else {
dRe2[i * step + j] = 0;
dIm2[i * step + j] = 0;
}
}
}
printf("...merging final kernel\n");
cvMerge(image_Re2, image_Im2, 0, 0, dft_B);
printf("...performing pseudo-inverse filtering\n");
cvMulSpectrums(dft_A, dft_B, dft_C, 1);
}
printf("...performing IDFT\n");
cvDFT(dft_C, dft_H, CV_DXT_INV_SCALE, 1);
printf("...getting size\n");
cvGetSubRect(dft_H, &tmp3, cvRect(0, 0, img->width, img->height));
printf("......(%d, %d) - (%d, %d)\n", tmp3.cols, tmp3.rows, restored->width, restored->height);
cvSplit( &tmp3, image_Re1, image_Im1, 0, 0 );
cvNamedWindow("re", 0);
cvShowImage("re", image_Re2);
cvWaitKey(0);
printf("...copying final image\n");
// error is in the line below
cvCopy(image_Re1, imf, NULL);
I have an error on the last line: --- OpenCV Error: Assertion failed (src.depth() == dst.depth() && src.size() == dst.size()) in cvCopy, file /build/buildd/opencv-2.1.0/src/cxcore/cxcopy.cpp, line 466
I know it have to do with the size or depth but I don't know how to control. Anyway, I tried to show the image_Re1 and it is empty...
Can anyone shed some light on it?

Seems like you didn't initialize your imf picture!
cvCopy needs a initialized matrix do a:
IplImage* imf= cvCreateImage(cvGetSize(image_Re1), IPL_DEPTH_32F, 1);
first and I think it'll work.
Also, you don't free the image space in this code (cvReleaseImage(&image))