I am changing the phase of signal from 0 to 360 by each degree to get max voltage value.Because if i change phase of the signal the voltage also changes.I have the fallowing code to find max value.
void Maxphase(float *max, unsigned int *index)
{
*max = 0.0;
float value;
unsigned int i, data;
for (i=0;i<=360;i++)
{
phaseset(i);
delay_ms(100);
data = readvalue();
value = voltage(mux1);
if(value > *max) //find max value
{
*max = value; //max voltage
*index = i;
}
}
}
from the above code I am getting Max value(voltage) after 38 sec(360*100) because for every read operation my device needs 100ms delay. This is too large, I can't change hardware thus i want to get the max value within 2 to 3 sec by optimizing software.
then I have tried with the fallowing code.
void Maxphase(float *max1, unsigned int *index1)
{
max = 0.0;
float value;
unsigned int i,j,data;
for (i=0;i<=360;i+=10)
{
phaseset(i);
delay_ms(100);
data = readvalue();
value = voltage(mux1);
if(value > max) //find max value
{
max = value; //max voltage
index = i;
}
}
*max1=max;
*index1=index;
for (i=*index1-9;i<=*index1+9;i+=1)
{
j=i;
phaseset(j);
delay_ms(100);
data = readvalue();
value = voltage(mux1);
if(value > *max1) //find max value
{
*max1 = value; //max voltage
*index1 = i;
}
}
}
I have reduced time from 45 sec to 7 sec. i have reduced iterations 360 to 54(54*100). I want to reduce it 7 sec to 2 sec.
Can any one help me with better algorithm that i can get max value from (0 to 360) with in 2 sec.
I have measured the voltage values using scope by changing phase. I have written below how it vary voltage with phase.
Phase (degree) voltage(max)
0 0.9mv
45 9.5mv
90 9.0mv
135 0.9mv
180 292mv
225 601mv
270 555mv
315 230mv
360 0.9mv
I am new to C programming. Can anyone provide sample code for the best algorithm.
Golden section search is probably what you are after. It is effective, but still pretty simple.
If you want something even faster and more sophisticated, you can use Brent's method.
If you can be sure that there is only a single highest point on your 360 degrees you can do a recursive divide and conquer.
You start by looking e.g. at 0, 180, 270. Let's say you find the answer is that 180 + 270 together have the highest value. Than you start by looking in at 210.... Which side is higher? And so on ...
Exploiting the various comments and suggestions here, I present this untested piece of code. I don't know whether this works at all or is an improvement over the existing source, but it was fun to try, anyway:
extern void phaseset(int);
extern void delay_ms(int);
extern float readvalue();
extern float voltage(int);
extern int mux1;
float probe(int phase)
{
float data;
phaseset(phase);
delay_ms(100);
data = readvalue(); /* data is ignored? */
return voltage(mux1); /* mux1? */
}
/* helper routine, find the max in a given range [phase1, phase2] */
void maxphase_aux(int phase1, float vol1, int phase2, float vol2, int *phaseret, float *volret)
{
float xvol1 = 0, xvol2 = 0;
int xphase1 = -1, xphase2 = -1;
/* test the voltage in the middle */
int phasem = abs(phase2 - phase1) / 2;
float volm = probe(phasem);
if (volm > vol1 && volm > vol2) {
/* middle point is the highest so far,
* search left and right for maximum */
*volret = volm;
*phaseret = phasem;
maxphase_aux(phase1, vol1, phasem, volm, &xphase1, &xvol1);
maxphase_aux(phase2, vol2, phasem, volm, &xphase2, &xvol2);
} else if (volm < vol1 && volm > vol2) {
/* vol1 is the highest so far,
* search between volm and vol1 for maximum */
maxphase_aux(phase1, vol1, phasem, volm, &xphase1, &xvol1);
} else if (volm > vol1 && volm < vol2) {
/* vol2 is the highest so far,
* search between volm and vol2 for maximum */
maxphase_aux(phase2, vol2, phasem, volm, &xphase2, &xvol2);
} else {
/* not possible? */
return;
}
if (xvol1 > volm) {
*volret = xvol1;
*phaseret = xphase1;
}
if (xvol2 > volm) {
*volret = xvol2;
*phaseret = xphase2;
}
}
void maxphase(int *phaseret, float *volret)
{
float v0 = probe(0);
float v360 = probe(360);
maxphase_aux(0, v0, 360, v360, phaseret, volret);
}
UPDATE: 2012-11-10.
#include <stdio.h>
#include <string.h>
#include <math.h>
#define FAKE_TARGET 89
unsigned fake_target = FAKE_TARGET;
float probe_one(unsigned int phase);
void Maxphase(float *max, unsigned int *index);
void Maxphase(float *max, unsigned int *index)
{
unsigned int aim, idx, victim;
struct best {
unsigned pos;
float val;
} samples[4] = {{0, 0.0}, };
for (aim = 0;aim < 360;aim += 90) {
idx=aim/90;
samples[idx].pos = aim;
samples[idx].val = probe_one(samples[idx].pos);
if (!idx || samples[idx].val < samples[victim].val ) victim = idx;
}
/* eliminate the weakist postion, and rotate the rest,
** such that:
** samples[0] := lower boundary.
** samples[1] := our best guess
** samples[2] := upper boundary
** samples[3] := scratch/probe element
*/
fprintf(stderr, "Victim=%u\n", victim );
switch(victim) {
case 0: samples[0] = samples[1]; samples[1] = samples[2]; samples[2] = samples[3]; break;
case 1: samples[1] = samples[3]; samples[3] = samples[0]; samples[0] = samples[2]; samples[2] = samples[3]; break;
case 2: samples[2] = samples[1]; samples[1] = samples[0]; samples[0] = samples[3]; break;
case 3: break;
}
/* Calculation is easier if the positions are increasing.
** (We can always perform the modulo 360 if needed)
*/
if (samples[0].pos > samples[1].pos ) samples[1].pos += 360;
if (samples[1].pos > samples[2].pos ) samples[2].pos += 360;
while( 1) {
int step;
step = samples[2].pos - samples[0].pos;
if (step < 3) break;
do {
fprintf(stderr, "\n[%u %u %u] Diff=%d\n"
, samples[0].pos , samples[1].pos , samples[2].pos , step);
if (step > 0) step++; else step--;
step /= 2;
aim = (samples[0].pos + step ) ;
/* avoid hitting the middle cell twice */
if (aim %360 != samples[1].pos %360) break;
step += 1;
aim = (samples[0].pos + step ) ;
if (aim %360 != samples[1].pos %360) break;
step -= 2;
aim = (samples[0].pos + step ) ;
break;
} while(0);
fprintf(stderr, "Step=%d Aim=%u, Idx=%u\n",step, aim,idx );
samples[3].pos = aim;
samples[3].val = probe_one( samples[3].pos );
victim= (samples[3].pos > samples[1].pos ) ? 2 : 0;
if (samples[3].val > samples[1].val) idx= 1; else idx = victim;
fprintf(stderr, "Victim=%u, TargetIdx=%u\n", victim, idx );
/* This should not happen */
if (samples[3].val < samples[victim].val) break;
if (idx != victim) samples[2-victim] = samples[idx];
samples[idx] = samples[3];
}
*max = samples[1].val;
*index = samples[1].pos % 360;
}
float probe_one(unsigned int phase)
{
float value;
#ifdef FAKE_TARGET
int dif;
dif = fake_target-phase;
if (dif < -180) dif = 360+dif;
else if (dif > 180) dif = 360-dif;
/* value = 1.0 / (1 + pow(phase-231, 2)); */
value = 1.0 / (1 + pow(dif, 2));
fprintf(stderr, "Target = %d: Probe(%d:%d) := %f\n", fake_target, phase, dif, value );
sleep (1);
#else
unsigned int data;
phase %= 360;
phaseset(phase);
delay_ms(100);
data = readvalue(); // what is this ?
value = voltage(mux1);
#endif
return value;
}
int main(int argc, char **argv)
{
float value;
unsigned int index;
if (argv[1]) sscanf (argv[1], "%u", &fake_target);
fake_target %= 360;
Maxphase(&value, &index) ;
printf("Phase=%u Max=%f\n", index, value );
return 0;
}
Related
I have made a (pretty bad) line follower.
Here is a sketch to roughly know the shape of the robot and location of the treads and sensors
[-] 0 0 [-] // 0 = color sensor
[-]------[-] // - = robot body
[-]------[-] // [-] = tank tread
[-] [-]
Here's what it does:
get Red, Green & Blue, make average of sensor 1 readings, do the same for 2
subtract to get value
this value will go through the PID part
steer with calculated steering
repeat (all of this is in a loop)
I use RGB and not reflected intensity (which is what is commonly used), because sometimes I need to detect if there's green color under the sensor (if there is, turn).
The real problem comes with the steering part. Unfortunately, it only accelerates a motor, meaning that in very tight turns we just lose the line.
Optimally, it should compensate a bit with the other motor (maybe going in the other direction?), but I am not sure how to calculate the speed of the motor, nor how to enforce this very strict line following policy.
Here is the code (I am also very grateful for any kind of tips on how to clean up the code! This is my first project in C :D ). I am not asking to read it all (it is pretty long), you could also just look at the steering function, and work your way back to rawFollowLine, this should hopefully shorten the code.
void rawFollowLine(int speed, float Kp, float Ki, float Kd){
_checkInit();
set_sensor_mode(sn_lx_color, "RGB-RAW");
set_sensor_mode(sn_rx_color, "RGB-RAW");
//printAllSensors();
int wasBlackCounter = 0;
int wasBlack = 0;
int lastBlack = 0;
for (int i = 0; i < 2000; i++)
{
if (isTerminating == 1)
{
killMotors(0);
break;
}
int greenCheck = rawGreenCheck(&wasBlack, &wasBlackCounter, &lastBlack);
if (wasBlack == 1){
wasBlackCounter++;
if (wasBlackCounter > 50){
wasBlackCounter = 0;
wasBlack = 0;
}
}
if (greenCheck == 1)
{
// lx is green
killMotors(1);
usleep(400 * 1000);
drive(200, 70);
waitIfMotorIsRunning();
killMotors(1);
pivotTurn(-90);
}
else if (greenCheck == 2)
{
// rx is green
killMotors(1);
usleep(400 * 1000);
drive(200, 70);
waitIfMotorIsRunning();
killMotors(1);
pivotTurn(90);
}
else if (greenCheck == 3)
{
// both rx and lx are green
killMotors(1);
turn(180);
}
else if (greenCheck == 5)
{
if(lastBlack == 2)
{
lastBlack = 0;
drive(100, -200);
//pivotTurn(50);
}
else if (lastBlack == 1)
{
lastBlack = 0;
drive(100, -200);
//pivotTurn(-50);
} else {
pidLineRaw(speed, Kp, Ki, Kd, &lastBlack);
}
}
else
{
pidLineRaw(speed, Kp, Ki, Kd, &lastBlack);
}
}
killMotors(1);
}
int rawGreenCheck(int *wasBlack, int *wasBlackCounter, int *lastBlack)
{
// Some documentation
// return nums:
// 3 = double green
// 2 = right green
// 1 = left green
// 0 = no green
int lx_red;
int lx_green;
int lx_blue;
int rx_red;
int rx_green;
int rx_blue;
get_sensor_value(0, sn_lx_color, &lx_red);
get_sensor_value(0, sn_rx_color, &rx_red);
get_sensor_value(1, sn_lx_color, &lx_green);
get_sensor_value(1, sn_rx_color, &rx_green);
get_sensor_value(2, sn_lx_color, &lx_blue);
get_sensor_value(2, sn_rx_color, &rx_blue);
//printf("rx_red %d\n", rx_red);
rx_red = (rx_red * rx_ratio_r);
rx_green = (rx_green * rx_ratio_g);
rx_blue = (rx_blue * rx_ratio_b);
//printf("rx_red (again) %d\n", rx_red);
if(
lx_red < 55 &&
lx_green > 90 &&
lx_blue < 55 &&
rx_red < 55 &&
rx_green > 90 &&
rx_blue < 55
)
{
// rx and lx see green
if (*wasBlack == 1)
{
// Apparently we crossed an intersection!
printf("Apparently we crossed an intersection!\n");
// We need to go straight.
*wasBlack = 0;
*wasBlackCounter = 0;
return 0;
}
else
{
return 3;
}
}
else if(lx_red < 55 && lx_green > 90 && lx_blue < 55)
{
// lx sees green
return 1;
}
else if(rx_red < 55 && rx_green > 90 && rx_blue < 55)
{
// rx sees green
return 2;
}
else if(rx_red < 50 && rx_green < 50 && rx_blue < 50 && lx_red < 50 && lx_green < 50 && lx_blue < 50)
{
// rx and lx see black
// this is needed if the intersection has the green tiles after the black line
printf("We are on the line? Is this an intersection?\n");
*wasBlack = 1;
return 0;
}
else if(lx_red < 55 && lx_green < 55 && lx_blue < 55)
{
// lx = right sees black
// this is needed if the intersection has the green tiles after the black line
//printf("We are on the line? Is this an intersection?\n");
killMotor(1, motor[R]);
rotateTillBlack(motor[L], sn_rx_color);
//printf("ASS2\n");
return 0;
}
else if(rx_red < 55 && rx_green < 55 && rx_blue < 55)
{
// rx = left sees black
killMotor(1, motor[L]);
rotateTillBlack(motor[R], sn_lx_color);
//printf("ASS1\n");
return 0;
}
//*lx_color_status = 0;
//*rx_color_status = 0;
*lastBlack = 0;
return 0;
}
void pidLineRaw(int speed, float Kp, float Ki, float Kd, int *lastBlack)
{
int red_lx_color;
int red_rx_color;
int green_lx_color;
int green_rx_color;
int blue_lx_color;
int blue_rx_color;
int lx_color;
int rx_color;
int last_error = 0;
int integral = 0;
int derivative = 0;
//float Kp = 0.1;
//float Ki = 0;
//float Kd = 0;
//set_sensor_mode(sn_lx_color, "COL-REFLECT");
//set_sensor_mode(sn_rx_color, "COL-REFLECT");
get_sensor_value(0, sn_lx_color, &red_lx_color);
get_sensor_value(0, sn_rx_color, &red_rx_color);
get_sensor_value(1, sn_lx_color, &green_lx_color);
get_sensor_value(1, sn_rx_color, &green_rx_color);
get_sensor_value(2, sn_lx_color, &blue_lx_color);
get_sensor_value(2, sn_rx_color, &blue_rx_color);
lx_color = (red_lx_color + green_lx_color+ blue_lx_color)/3;
rx_color = ( (red_rx_color*rx_ratio_r) + (green_rx_color*rx_ratio_g) + (blue_rx_color*rx_ratio_b))/3;
if(*lastBlack == 0)
{
int error = lx_color - rx_color;
integral = integral + error;
derivative = error - last_error;
last_error = error;
int steering_val = (error * Kp) + (integral * Ki) + (derivative * Kd);
// printf("error: %d\nsteering: %d\n",error, steering_val);
move_steering(-steering_val, speed, 1, 0);
} else if (*lastBlack == 1)
{
printf("lx_color_status\n");
move_steering(35, speed, 1, 0);
move_steering(-2, speed, 1, 0);
}
else if (*lastBlack == 2)
{
printf("rx_color_status\n");
move_steering(-35, speed, 1, 0);
move_steering(2, speed, 1, 0);
}
else
{
printf("HMMM: %d\n", *lastBlack);
exit(666);
}
}
static void _getSteeringSpeed(int speed, int *lx_speed, int *rx_speed, int steering)
{
if(steering > 100 || steering < -100)
{
printf("Yo wtf steering is %d\n", steering);
}
else
{
int speed_factor = (50 - abs(steering)) / 50;
*lx_speed = speed;
*rx_speed = speed;
if(steering >= 0)
{
*rx_speed = *rx_speed * speed_factor;
}
else
{
*lx_speed = *lx_speed * speed_factor;
}
}
}
Some parts are omitted, yes, they are not required to solve the problem.
I am also extremely sorry as there might be unused variables and such. I am working on refactoring the project, I'll update the post when I'm done.
So, summing everything up, I need to make sure that the steering part properly turns and follows the line. How do I do that? Is the code that I wrote even suitable? I'm guessing the steering itself might need some sort of feedback loop, to check if it's on the line?
I've tried a few things, any it seems that at best I'm 1.5x slower than the printf() family of functions, which boggles my mind a bit. I think what I'm up against in this situation is the addressing of my device is 32bit, and I don't have an FPU. I've tried a couple of "ftoa()" implementations and constrained them to only look for 2 digits on the left of the decimal point, and left myself some breadcrumbs as to what the total length is of a larger overall string that I'm trying to build. At the end of the day, it seems like the nature of an array of 8-bit elements on a 32bit system is leading to a bunch of hidden shift operations, bitwise "OR" and bitwise NAND operations that are just slowing things down ridiculously...
Anyone have any general tips for this situation? (other than a re-architect to an 8.24 fixed point design) I've tried the compiler optimizations from wysiwyg to execution speed focused, nothing seems to beat snprintf.
Here's the fastest one that I had tried:
#if (__DEBUG)
#define DATA_FIFO_SIZE (8)
#else
#define DATA_FIFO_SIZE (1024)
#endif
typedef struct
{
int32_t rval[4];
double cval[4];
uint16_t idx;
uint16_t padding; //#attention the compiler was padding with 2 bytes to align to 32bit
} data_fifo_entry;
const char V_ERR_MSG[7] = "ERROR,\0";
static data_fifo_entry data_fifo[DATA_FIFO_SIZE];
static char embed_text[256];
/****
* float to ASCII, adapted from
* https://stackoverflow.com/questions/2302969/how-to-implement-char-ftoafloat-num-without-sprintf-library-function-i#7097567
*
****/
//#attention the following floating point #defs are linked!!
#define MAX_DIGITS_TO_PRINT_FLOAT (6)
#define MAX_SUPPORTED_PRINTABLE_FLOAT (+999999.99999999999999999999999999)
#define MIN_SUPPORTED_PRINTABLE_FLOAT (-999999.99999999999999999999999999)
#define FLOAT_TEST6 (100000.0)
#define FLOAT_TEST5 (10000.0)
#define FLOAT_TEST4 (1000.0)
#define FLOAT_TEST3 (100.0)
#define FLOAT_TEST2 (10.0)
#define FLOAT_TEST1 (1.0)
static inline int ftoa(char *s, const float f_in, const uint8_t precision)
{
float f_p = 0.0001;
float n = f_in;
int neg = (n < 0.0);
int length = 0;
switch (precision)
{
case (1):
{
f_p = 0.1;
break;
}
case (2):
{
f_p = 0.01;
break;
}
case (3):
{
f_p = 0.001;
break;
}
//case (4) is the default assumption
case (5):
{
f_p = 0.00001;
break;
}
case (6):
{
f_p = 0.000001;
break;
}
default: //already assumed, no assignments here
{
break;
}
} /* switch */
// handle special cases
if (isnan(n))
{
strcpy(s, "nan\0");
length = 4;
}
else if ((isinf(n)) || (n >= MAX_SUPPORTED_PRINTABLE_FLOAT) ||
((-1.0 * n) < MIN_SUPPORTED_PRINTABLE_FLOAT))
{
strcpy(s, "inf\0");
length = 4;
}
else if (n == 0.0)
{
int idx;
s[length++] = '+';
s[length++] = '0';
s[length++] = '.';
for (idx = 0; idx < precision; idx++)
{
s[length++] = '0';
}
s[length++] = '\0';
}
else if (((n > 0.0) && (n < f_p)) || ((n < 0.0) && ((-1.0 * n) < f_p)))
{
int idx;
if (n >= 0.0)
{
s[length++] = '+';
}
else
{
s[length++] = '-';
}
s[length++] = '0';
s[length++] = '.';
for (idx = 1; idx < precision; idx++)
{
s[length++] = '0';
}
s[length++] = '\0';
}
else
{
int digit, m;
if (neg)
{
n = -n;
}
// calculate magnitude
if (n >= FLOAT_TEST6)
{
m = 6;
}
else if (n >= FLOAT_TEST5)
{
m = 5;
}
else if (n >= FLOAT_TEST4)
{
m = 4;
}
else if (n >= FLOAT_TEST3)
{
m = 3;
}
else if (n >= FLOAT_TEST2)
{
m = 2;
}
else if (n >= FLOAT_TEST1)
{
m = 1;
}
else
{
m = 0;
}
if (neg)
{
s[length++] = '-';
}
else
{
s[length++] = '+';
}
// set up for scientific notation
if (m < 1.0)
{
m = 0;
}
// convert the number
while (n > f_p || m >= 0)
{
double weight = pow(10.0, m);
if ((weight > 0) && !isinf(weight))
{
digit = floor(n / weight);
n -= (digit * weight);
s[length++] = '0' + digit;
}
if ((m == 0) && (n > 0))
{
s[length++] = '.';
}
m--;
}
s[length++] = '\0';
}
return (length - 1);
} /* ftoa */
static inline void print2_and_idx(int8_t idx1, int8_t idx2, uint16_t fifo_idx)
{
//#attention 10 characters already in the buffer, idx does NOT start at zero
uint8_t idx = V_PREFIX_LENGTH;
char scratch[16] = {'\0'};
char * p_fifo_id;
if ((idx1 >= 0) && (idx1 < MAX_IDX) && (idx2 >= 0) && (idx2 < MAX_IDX) &&
(fifo_idx >= 0) && (fifo_idx < DATA_FIFO_SIZE))
{
ftoa(scratch, data_fifo[fifo_idx].cval[idx1], 4);
memcpy((void *)&embed_text[idx += 7], (void *)scratch, 7);
embed_text[idx++] = ',';
ftoa(scratch, data_fifo[fifo_idx].cval[idx2], 4);
memcpy((void *)&embed_text[idx += 7], (void *)scratch, 7);
embed_text[idx++] = ',';
//!\todo maybe print the .idx as fixed width, zero pad to 5 digits
p_fifo_id = utoa((char *)&embed_text[idx], (unsigned int)data_fifo[fifo_idx].idx, 10);
idx += strlen(p_fifo_id);
embed_text[idx++] = ',';
}
else
{
memcpy((void *)&embed_text[idx], (void *)V_ERR_MSG, 7);
}
} /* print2_and_idx */
Instead of using *printf() with FP arguments, convert the FP values first into scaled integers.
With still calling snprintf(), yet with integer and simple character arguments, my code was about 20x faster than the baseline.
Your mileage may vary. YMMV.
//baseline
void format2double_1(char *mystr, double pi, double e) {
snprintf(mystr, 22, "{%+0.4f,%+0.4f}", pi, e);
//puts(mystr);
}
void format2double_2(char *mystr, double pi, double e) {
int pi_i = (int) lrint(pi * 10000.0);
int api_i = abs(pi_i);
int e_i = (int) lrint(e * 10000.0);
int ae_i = abs(e_i);
snprintf(mystr, 22, "{%c%d.%04d,%c%d.%04d}", //
"+-"[pi_i < 0], api_i / 10000, api_i % 10000, //
"+-"[e_i < 0], ae_i / 10000, ae_i % 10000);
//puts(mystr);
}
[edit]
For a proper -0.0 text, use "+-"[!!signbit(pi)]
[edit]
Some idea for OP to consider as a ftoa() replacement. Central code is lrint(f_in * fscale[precision]); which rounds and scales. Untested.
#define PRINTABLE_MAGNITUDE_LIMIT 1000000
int ftoa_1(char *s, const float f_in, const uint8_t precision) {
int n;
sprintf(s, "%+.*f%n", precision, f_in, &n);
return n;
}
int ftoa_2(char *s, const float f_in, const uint8_t precision) {
float fscale[] = { 1, 10, 100, 1000, 10000, 100000, 1000000 };
long iscale[] = { 1, 10, 100, 1000, 10000, 100000, 1000000 };
assert(precision > 0 && precision < sizeof fscale / sizeof fscale[0]);
// gross range check
if (f_in > -PRINTABLE_MAGNITUDE_LIMIT && f_in < PRINTABLE_MAGNITUDE_LIMIT) {
long value = lrint(f_in * fscale[precision]);
value = labs(value);
long scale = iscale[precision];
long ipart = value / scale;
long fpart = value % scale;
// fine range check
if (ipart < PRINTABLE_MAGNITUDE_LIMIT) {
int n;
sprintf(s, "%c%ld:%0*ld%n", signbit(f_in) ? '-' : '+', ipart, precision,
fpart, &n);
return n;
}
}
// Out of range values need not be of performance concern for now.
return ftoa_1(s, f_in, precision);
}
[edit]
To convert a positive or 0 integer to a string quickly without the need to shift the buffer or reverse it, see below. It also returns the string length for subsequent string building.
// Convert an unsigned to a decimal string and return its length
size_t utoa_length(char *dest, unsigned u) {
size_t len = 0;
if (u >= 10) {
len = utoa_length(dest, u/10);
dest += len;
}
dest[0] = '0' + u%10;
dest[1] = '\0';
return len + 1;
}
In a similar vein of #chux's answer, if the remaining snprintf is still slow you can go down the rabbit hole of hand-composing strings/hand-rendering integers.
char *fmtp04f(char *buf, char *lim, double d) {
// if there's no space at all don't bother
if(buf==lim) return buf;
// 10 characters in maximum 32 bit integer, one for the dot,
// one for the terminating NUL in debug prints
char b[12];
// current position in the buffer
char *bp = b;
// scale and round
int32_t i = lrint(d * 10000.);
// write sign and fix i sign
// (we do have at least one character available in buf)
if(signbit(d)) {
*buf++='-';
i = -i;
} else {
*buf++='+';
}
// *always* write down the last 4 digits, even if they are zeroes
// (they'll become the 4 digits after the decimal dot)
for(; bp!=b+4; ) {
*bp++ = '0' + i%10;
i/=10;
}
*bp++='.';
// write down the remaining digits, writing at least one
do {
*bp++ = '0' + i%10;
i/=10;
} while(i != 0);
// bp is at the character after the last, step back
--bp;
// data is now into b *in reversed order*;
// reverse-copy it into the user-provided buffer
while(buf!=lim) {
*buf++ = *bp;
// check before decrementing, as a pointer to one-before-first
// is not allowed in C
if(bp == b) break;
--bp;
}
if(buf!=lim) *buf=0; // "regular" case: terminate *after*
else lim[-1]=0; // bad case: truncate
return buf;
}
void doformat(char *buf, char *lim, double a, double b) {
if(buf==lim) return; // cannot do anything
*buf++='{';
if(buf==lim) goto end;
buf = fmtp04f(buf, lim, a);
if(buf==lim) return; // already terminated by fmtp04f
*buf++=',';
if(buf==lim) goto end;
buf = fmtp04f(buf, lim, b);
if(buf==lim) return; // idem
*buf++='}';
if(buf==lim) goto end;
*buf++=0;
end:
lim[-1]=0; // always terminate
}
It passes some random tests, so I'm reasonably confident that it is not too wrong.
For some reason, #chux version on my machine (64 bit Linux, gcc 6.3) is generally 2/3 times faster than the baseline, while my version is usually 10/30 times faster than the baseline. I don't know if this is because my snprintf is particularly good or particularly bad. As said above, YMMV.
I have a simple (brute-force) recursive solver algorithm that takes lots of time for bigger values of OpxCnt variable. For small values of OpxCnt, no problem, works like a charm. The algorithm gets very slow as the OpxCnt variable gets bigger. This is to be expected but any optimization or a different algorithm ?
My final goal is that :: I want to read all the True values in the map array by
executing some number of read operations that have the minimum operation
cost. This is not the same as minimum number of read operations.
At function completion, There should be no True value unread.
map array is populated by some external function, any member may be 1 or 0.
For example ::
map[4] = 1;
map[8] = 1;
1 read operation having Adr=4,Cnt=5 has the lowest cost (35)
whereas
2 read operations having Adr=4,Cnt=1 & Adr=8,Cnt=1 costs (27+27=54)
#include <string.h>
typedef unsigned int Ui32;
#define cntof(x) (sizeof(x) / sizeof((x)[0]))
#define ZERO(x) do{memset(&(x), 0, sizeof(x));}while(0)
typedef struct _S_MB_oper{
Ui32 Adr;
Ui32 Cnt;
}S_MB_oper;
typedef struct _S_MB_code{
Ui32 OpxCnt;
S_MB_oper OpxLst[20];
Ui32 OpxPay;
}S_MB_code;
char map[65536] = {0};
static int opx_ListOkey(S_MB_code *px_kod, char *pi_map)
{
int cost = 0;
char map[65536];
memcpy(map, pi_map, sizeof(map));
for(Ui32 o = 0; o < px_kod->OpxCnt; o++)
{
for(Ui32 i = 0; i < px_kod->OpxLst[o].Cnt; i++)
{
Ui32 adr = px_kod->OpxLst[o].Adr + i;
// ...
if(adr < cntof(map)){map[adr] = 0x0;}
}
}
for(Ui32 i = 0; i < cntof(map); i++)
{
if(map[i] > 0x0){return -1;}
}
// calculate COST...
for(Ui32 o = 0; o < px_kod->OpxCnt; o++)
{
cost += 12;
cost += 13;
cost += (2 * px_kod->OpxLst[o].Cnt);
}
px_kod->OpxPay = (Ui32)cost; return cost;
}
static int opx_FindNext(char *map, int pi_idx)
{
int i;
if(pi_idx < 0){pi_idx = 0;}
for(i = pi_idx; i < 65536; i++)
{
if(map[i] > 0x0){return i;}
}
return -1;
}
static int opx_FindZero(char *map, int pi_idx)
{
int i;
if(pi_idx < 0){pi_idx = 0;}
for(i = pi_idx; i < 65536; i++)
{
if(map[i] < 0x1){return i;}
}
return -1;
}
static int opx_Resolver(S_MB_code *po_bst, S_MB_code *px_wrk, char *pi_map, Ui32 *px_idx, int _min, int _max)
{
int pay, kmax, kmin = 1;
if(*px_idx >= px_wrk->OpxCnt)
{
return opx_ListOkey(px_wrk, pi_map);
}
_min = opx_FindNext(pi_map, _min);
// ...
if(_min < 0){return -1;}
kmax = (_max - _min) + 1;
// must be less than 127 !
if(kmax > 127){kmax = 127;}
// is this recursion the last one ?
if(*px_idx >= (px_wrk->OpxCnt - 1))
{
kmin = kmax;
}
else
{
int zero = opx_FindZero(pi_map, _min);
// ...
if(zero > 0)
{
kmin = zero - _min;
// enforce kmax limit !?
if(kmin > kmax){kmin = kmax;}
}
}
for(int _cnt = kmin; _cnt <= kmax; _cnt++)
{
px_wrk->OpxLst[*px_idx].Adr = (Ui32)_min;
px_wrk->OpxLst[*px_idx].Cnt = (Ui32)_cnt;
(*px_idx)++;
pay = opx_Resolver(po_bst, px_wrk, pi_map, px_idx, (_min + _cnt), _max);
(*px_idx)--;
if(pay > 0)
{
if((Ui32)pay < po_bst->OpxPay)
{
memcpy(po_bst, px_wrk, sizeof(*po_bst));
}
}
}
return (int)po_bst->OpxPay;
}
int main()
{
int _max = -1, _cnt = 0;
S_MB_code best = {0};
S_MB_code work = {0};
// SOME TEST DATA...
map[ 4] = 1;
map[ 8] = 1;
/*
map[64] = 1;
map[72] = 1;
map[80] = 1;
map[88] = 1;
map[96] = 1;
*/
// SOME TEST DATA...
for(int i = 0; i < cntof(map); i++)
{
if(map[i] > 0)
{
_max = i; _cnt++;
}
}
// num of Opx can be as much as num of individual bit(s).
if(_cnt > cntof(work.OpxLst)){_cnt = cntof(work.OpxLst);}
best.OpxPay = 1000000000L; // invalid great number...
for(int opx_cnt = 1; opx_cnt <= _cnt; opx_cnt++)
{
int rv;
Ui32 x = 0;
ZERO(work); work.OpxCnt = (Ui32)opx_cnt;
rv = opx_Resolver(&best, &work, map, &x, -42, _max);
}
return 0;
}
You can use dynamic programming to calculate the lowest cost that covers the first i true values in map[]. Call this f(i). As I'll explain, you can calculate f(i) by looking at all f(j) for j < i, so this will take time quadratic in the number of true values -- much better than exponential. The final answer you're looking for will be f(n), where n is the number of true values in map[].
A first step is to preprocess map[] into a list of the positions of true values. (It's possible to do DP on the raw map[] array, but this will be slower if true values are sparse, and cannot be faster.)
int pos[65536]; // Every position *could* be true
int nTrue = 0;
void getPosList() {
for (int i = 0; i < 65536; ++i) {
if (map[i]) pos[nTrue++] = i;
}
}
When we're looking at the subproblem on just the first i true values, what we know is that the ith true value must be covered by a read that ends at i. This block could start at any position j <= i; we don't know, so we have to test all i of them and pick the best. The key property (Optimal Substructure) that enables DP here is that in any optimal solution to the i-sized subproblem, if the read that covers the ith true value starts at the jth true value, then the preceding j-1 true values must be covered by an optimal solution to the (j-1)-sized subproblem.
So: f(i) = min(f(j) + score(pos(j+1), pos(i)), with the minimum taken over all 1 <= j < i. pos(k) refers to the position of the kth true value in map[], and score(x, y) is the score of a read from position x to position y, inclusive.
int scores[65537]; // We effectively start indexing at 1
scores[0] = 0; // Covering the first 0 true values requires 0 cost
// Calculate the minimum score that could allow the first i > 0 true values
// to be read, and store it in scores[i].
// We can assume that all lower values have already been calculated.
void calcF(int i) {
int bestStart, bestScore = INT_MAX;
for (int j = 0; j < i; ++j) { // Always executes at least once
int attemptScore = scores[j] + score(pos[j + 1], pos[i]);
if (attemptScore < bestScore) {
bestStart = j + 1;
bestScore = attemptScore;
}
}
scores[i] = bestScore;
}
int score(int i, int j) {
return 25 + 2 * (j + 1 - i);
}
int main(int argc, char **argv) {
// Set up map[] however you want
getPosList();
for (int i = 1; i <= nTrue; ++i) {
calcF(i);
}
printf("Optimal solution has cost %d.\n", scores[nTrue]);
return 0;
}
Extracting a Solution from Scores
Using this scheme, you can calculate the score of an optimal solution: it's simply f(n), where n is the number of true values in map[]. In order to actually construct the solution, you need to read back through the table of f() scores to infer which choice was made:
void printSolution() {
int i = nTrue;
while (i) {
for (int j = 0; j < i; ++j) {
if (scores[i] == scores[j] + score(pos[j + 1], pos[i])) {
// We know that a read can be made from pos[j + 1] to pos[i] in
// an optimal solution, so let's make it.
printf("Read from %d to %d for cost %d.\n", pos[j + 1], pos[i], score(pos[j + 1], pos[i]));
i = j;
break;
}
}
}
}
There may be several possible choices, but all of them will produce optimal solutions.
Further Speedups
The solution above will work for an arbitrary scoring function. Because your scoring function has a simple structure, it may be that even faster algorithms can be developed.
For example, we can prove that there is a gap width above which it is always beneficial to break a single read into two reads. Suppose we have a read from position x-a to x, and another read from position y to y+b, with y > x. The combined costs of these two separate reads are 25 + 2 * (a + 1) + 25 + 2 * (b + 1) = 54 + 2 * (a + b). A single read stretching from x-a to y+b would cost 25 + 2 * (y + b - x + a + 1) = 27 + 2 * (a + b) + 2 * (y - x). Therefore the single read costs 27 - 2 * (y - x) less. If y - x > 13, this difference goes below zero: in other words, it can never be optimal to include a single read that spans a gap of 12 or more.
To make use of this property, inside calcF(), final reads could be tried in decreasing order of start-position (i.e. in increasing order of width), and the inner loop stopped as soon as any gap width exceeds 12. Because that read and all subsequent wider reads tried would contain this too-large gap and therefore be suboptimal, they need not be tried.
For solving project euler problem 20 to find the sum of digits in 100! i am running the following program , it is working for factorial of small numbers but not for 100.which data type should i use or is it necessary to use an array for storing the digits?
int rec(int);
void main()
{
int f=1,i=1,z,s=0,r,n;
while(i<=100)
{
f=f*i;
f=rec(f);
i++;
}
n=f;
while(n!=0)
{
r=n%10;
n=n/10;
s=s+r;
}
printf("\n%d",s);
}
int rec(int t)
{
if(t%10==0)
{
t=t/10;
rec(t);
}
return t;
}
Approximate factorial of 100 can be calculated using the double type. You can also use the Stirling's formula, stating that
n! ≈ sqrt(2*M_PI*n) * pow(n/exp(0),n)
If you plug in the numbers, you'll get n! ≈ 9*10157. That means your type needs to be able to hold 158 decimal digits or, equivalently, ~log2(9*10157) = 525 bits or 66 8-bit bytes.
No fundamental numeric type in C is big enough. The largest you are guaranteed to get is 64 bits (if you use unsigned long long).
So, if you want to calculate n! in C, you either need to construct long arithmetic multiplication by hand or use a special library that can do that for you.
For this relatively simple task you can actually implement long multiplication and use it to get the factorial value by repeated multiplication.
In the following program I've used an in-place multiplication algorithm, which modifies one of the multiplicands in the process and eventually replaces it with the product. The algorithm can be derived directly from the long multiplication known from school.
This program calculates factorials of integers from 1 up to and including 100:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
typedef unsigned char uint8;
typedef unsigned short uint16;
#if UINT_MAX >= 0xFFFFFFFF
typedef unsigned uint32;
#else
typedef unsigned long uint32;
#endif
typedef unsigned uint;
void MulInPlace(uint8* dst/* n bytes */,
const uint8* src/* n bytes */,
uint n)
{
uint c1, c2;
if (n >= 0xFFFF) abort();
for (c1 = n - 1; c1 != ~0u; c1--)
{
uint16 s = 0;
uint32 p = 0; // p must be able to store ceil(log2(n))+2*8 bits
for (c2 = c1; c2 != ~0u; c2--)
{
p += dst[c2] * src[c1 - c2];
}
dst[c1] = (uint8)(p & 0xFF);
for (c2 = c1 + 1; c2 < n; c2++)
{
p >>= 8;
s += dst[c2] + (uint8)(p & 0xFF);
dst[c2] = (uint8)(s & 0xFF);
s >>= 8;
}
}
}
int ByteDivInPlace(uint8* dst/* n bytes */,
uint n,
uint8 divisor,
uint8* remainder)
{
uint rem = 0;
int nonzero = 0;
while (n)
{
rem += dst[n - 1];
nonzero |= (dst[n - 1] = rem / divisor);
rem = (rem % divisor) << 8;
n--;
}
if (remainder != NULL)
*remainder = (uint8)(rem >> 8);
return nonzero; // 1 if the quotient is non-zero, 0 otherwise
}
void IncInPlace(uint8* dst/* n bytes */,
uint n)
{
uint c = 1;
while (n-- && c)
{
c += *dst;
*dst++ = c & 0xFF;
c >>= 8;
}
}
void DestroyingDecimalPrint(uint8* dst, uint n)
{
uint8 r;
if (ByteDivInPlace(dst, n, 10, &r))
DestroyingDecimalPrint(dst, n);
printf("%d", r);
}
int main(void)
{
int i;
uint8 factorial[66];
uint8 factor[sizeof(factorial)];
uint8 tmp[sizeof(factorial)];
// factor = 1
memset(factor, 0, sizeof(factor));
factor[0] = 1;
// factorial = 1
memcpy(factorial, factor, sizeof(factorial));
for (i = 1; i <= 100; i++)
{
// factorial *= factor
MulInPlace(factorial, factor, sizeof(factorial));
// tmp = factorial
memcpy(tmp, factorial, sizeof(factorial));
// print i and tmp
printf("%i! = ", i);
DestroyingDecimalPrint(tmp, sizeof(tmp));
printf("\n");
// factor += 1
IncInPlace(factor, sizeof(factor));
}
return 0;
}
Output (ideone):
1! = 1
2! = 2
3! = 6
4! = 24
5! = 120
6! = 720
7! = 5040
8! = 40320
9! = 362880
10! = 3628800
11! = 39916800
12! = 479001600
13! = 6227020800
14! = 87178291200
15! = 1307674368000
16! = 20922789888000
17! = 355687428096000
18! = 6402373705728000
19! = 121645100408832000
20! = 2432902008176640000
21! = 51090942171709440000
22! = 1124000727777607680000
23! = 25852016738884976640000
24! = 620448401733239439360000
25! = 15511210043330985984000000
26! = 403291461126605635584000000
27! = 10888869450418352160768000000
28! = 304888344611713860501504000000
29! = 8841761993739701954543616000000
30! = 265252859812191058636308480000000
31! = 8222838654177922817725562880000000
32! = 263130836933693530167218012160000000
33! = 8683317618811886495518194401280000000
34! = 295232799039604140847618609643520000000
35! = 10333147966386144929666651337523200000000
36! = 371993326789901217467999448150835200000000
37! = 13763753091226345046315979581580902400000000
38! = 523022617466601111760007224100074291200000000
39! = 20397882081197443358640281739902897356800000000
40! = 815915283247897734345611269596115894272000000000
41! = 33452526613163807108170062053440751665152000000000
42! = 1405006117752879898543142606244511569936384000000000
43! = 60415263063373835637355132068513997507264512000000000
44! = 2658271574788448768043625811014615890319638528000000000
45! = 119622220865480194561963161495657715064383733760000000000
46! = 5502622159812088949850305428800254892961651752960000000000
47! = 258623241511168180642964355153611979969197632389120000000000
48! = 12413915592536072670862289047373375038521486354677760000000000
49! = 608281864034267560872252163321295376887552831379210240000000000
50! = 30414093201713378043612608166064768844377641568960512000000000000
51! = 1551118753287382280224243016469303211063259720016986112000000000000
52! = 80658175170943878571660636856403766975289505440883277824000000000000
53! = 4274883284060025564298013753389399649690343788366813724672000000000000
54! = 230843697339241380472092742683027581083278564571807941132288000000000000
55! = 12696403353658275925965100847566516959580321051449436762275840000000000000
56! = 710998587804863451854045647463724949736497978881168458687447040000000000000
57! = 40526919504877216755680601905432322134980384796226602145184481280000000000000
58! = 2350561331282878571829474910515074683828862318181142924420699914240000000000000
59! = 138683118545689835737939019720389406345902876772687432540821294940160000000000000
60! = 8320987112741390144276341183223364380754172606361245952449277696409600000000000000
61! = 507580213877224798800856812176625227226004528988036003099405939480985600000000000000
62! = 31469973260387937525653122354950764088012280797258232192163168247821107200000000000000
63! = 1982608315404440064116146708361898137544773690227268628106279599612729753600000000000000
64! = 126886932185884164103433389335161480802865516174545192198801894375214704230400000000000000
65! = 8247650592082470666723170306785496252186258551345437492922123134388955774976000000000000000
66! = 544344939077443064003729240247842752644293064388798874532860126869671081148416000000000000000
67! = 36471110918188685288249859096605464427167635314049524593701628500267962436943872000000000000000
68! = 2480035542436830599600990418569171581047399201355367672371710738018221445712183296000000000000000
69! = 171122452428141311372468338881272839092270544893520369393648040923257279754140647424000000000000000
70! = 11978571669969891796072783721689098736458938142546425857555362864628009582789845319680000000000000000
71! = 850478588567862317521167644239926010288584608120796235886430763388588680378079017697280000000000000000
72! = 61234458376886086861524070385274672740778091784697328983823014963978384987221689274204160000000000000000
73! = 4470115461512684340891257138125051110076800700282905015819080092370422104067183317016903680000000000000000
74! = 330788544151938641225953028221253782145683251820934971170611926835411235700971565459250872320000000000000000
75! = 24809140811395398091946477116594033660926243886570122837795894512655842677572867409443815424000000000000000000
76! = 1885494701666050254987932260861146558230394535379329335672487982961844043495537923117729972224000000000000000000
77! = 145183092028285869634070784086308284983740379224208358846781574688061991349156420080065207861248000000000000000000
78! = 11324281178206297831457521158732046228731749579488251990048962825668835325234200766245086213177344000000000000000000
79! = 894618213078297528685144171539831652069808216779571907213868063227837990693501860533361810841010176000000000000000000
80! = 71569457046263802294811533723186532165584657342365752577109445058227039255480148842668944867280814080000000000000000000
81! = 5797126020747367985879734231578109105412357244731625958745865049716390179693892056256184534249745940480000000000000000000
82! = 475364333701284174842138206989404946643813294067993328617160934076743994734899148613007131808479167119360000000000000000000
83! = 39455239697206586511897471180120610571436503407643446275224357528369751562996629334879591940103770870906880000000000000000000
84! = 3314240134565353266999387579130131288000666286242049487118846032383059131291716864129885722968716753156177920000000000000000000
85! = 281710411438055027694947944226061159480056634330574206405101912752560026159795933451040286452340924018275123200000000000000000000
86! = 24227095383672732381765523203441259715284870552429381750838764496720162249742450276789464634901319465571660595200000000000000000000
87! = 2107757298379527717213600518699389595229783738061356212322972511214654115727593174080683423236414793504734471782400000000000000000000
88! = 185482642257398439114796845645546284380220968949399346684421580986889562184028199319100141244804501828416633516851200000000000000000000
89! = 16507955160908461081216919262453619309839666236496541854913520707833171034378509739399912570787600662729080382999756800000000000000000000
90! = 1485715964481761497309522733620825737885569961284688766942216863704985393094065876545992131370884059645617234469978112000000000000000000000
91! = 135200152767840296255166568759495142147586866476906677791741734597153670771559994765685283954750449427751168336768008192000000000000000000000
92! = 12438414054641307255475324325873553077577991715875414356840239582938137710983519518443046123837041347353107486982656753664000000000000000000000
93! = 1156772507081641574759205162306240436214753229576413535186142281213246807121467315215203289516844845303838996289387078090752000000000000000000000
94! = 108736615665674308027365285256786601004186803580182872307497374434045199869417927630229109214583415458560865651202385340530688000000000000000000000
95! = 10329978488239059262599702099394727095397746340117372869212250571234293987594703124871765375385424468563282236864226607350415360000000000000000000000
96! = 991677934870949689209571401541893801158183648651267795444376054838492222809091499987689476037000748982075094738965754305639874560000000000000000000000
97! = 96192759682482119853328425949563698712343813919172976158104477319333745612481875498805879175589072651261284189679678167647067832320000000000000000000000
98! = 9426890448883247745626185743057242473809693764078951663494238777294707070023223798882976159207729119823605850588608460429412647567360000000000000000000000
99! = 933262154439441526816992388562667004907159682643816214685929638952175999932299156089414639761565182862536979208272237582511852109168640000000000000000000000
100! = 93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000
You should look for overflow, print the value after each iteration.
Note that rec(t); doesn't do anything as it doesn't use the returned value... you want t = rec(t);.
int is definitely too short, try long long... if that's still overflowing, you need another data structure.. eg: GMP Library.
Note: using some "proper" language for the job might give you some insight to the range you have to support... e.g. with python:
>>> import math
>>> math.factorial(100)
93326215443944152681699238856266700490715968264381621468592963895217599993229915608941463976156518286253697920827223758251185210916864000000000000000000000000L
private static void problem20()
{
string muliplent = "100";
for (int i = 99; i > 1; i--)
{
muliplent = getproduct(muliplent, i);
}
int sum = 0;
char[] result=muliplent.ToCharArray();
int count = muliplent.ToCharArray().Count();
for (int j = 0; j < count; j++)
{
sum = sum + (result[j] - '0');
}
Console.WriteLine("sum is {0}", sum);
Console.ReadLine();
}
private static string getproduct(string multiplent, int multiplier)
{
StringBuilder str = new StringBuilder();
int product = 0;
int remainder = 0;
int dividend = 0;
char[] c = multiplent.ToCharArray();
for (int i = c.Count() - 1; i >= 0; i--)
{
product = (((c[i] - '0') * multiplier) + dividend);
remainder = product % 10;
dividend = product / 10;
if (i != 0)
{
str.Insert(0, remainder);
}
}
str.Insert(0, product);
return str.ToString();
}
I am trying to implement a linear least squares fit onto 2 arrays of data: time vs amplitude. The only technique I know so far is to test all of the possible m and b points in (y = m*x+b) and then find out which combination fits my data best so that it has the least error. However, I think iterating so many combinations is sometimes useless because it tests out everything. Are there any techniques to speed up the process that I don't know about? Thanks.
Try this code. It fits y = mx + b to your (x,y) data.
The arguments to linreg are
linreg(int n, REAL x[], REAL y[], REAL* b, REAL* m, REAL* r)
n = number of data points
x,y = arrays of data
*b = output intercept
*m = output slope
*r = output correlation coefficient (can be NULL if you don't want it)
The return value is 0 on success, !=0 on failure.
Here's the code
#include "linreg.h"
#include <stdlib.h>
#include <math.h> /* math functions */
//#define REAL float
#define REAL double
inline static REAL sqr(REAL x) {
return x*x;
}
int linreg(int n, const REAL x[], const REAL y[], REAL* m, REAL* b, REAL* r){
REAL sumx = 0.0; /* sum of x */
REAL sumx2 = 0.0; /* sum of x**2 */
REAL sumxy = 0.0; /* sum of x * y */
REAL sumy = 0.0; /* sum of y */
REAL sumy2 = 0.0; /* sum of y**2 */
for (int i=0;i<n;i++){
sumx += x[i];
sumx2 += sqr(x[i]);
sumxy += x[i] * y[i];
sumy += y[i];
sumy2 += sqr(y[i]);
}
REAL denom = (n * sumx2 - sqr(sumx));
if (denom == 0) {
// singular matrix. can't solve the problem.
*m = 0;
*b = 0;
if (r) *r = 0;
return 1;
}
*m = (n * sumxy - sumx * sumy) / denom;
*b = (sumy * sumx2 - sumx * sumxy) / denom;
if (r!=NULL) {
*r = (sumxy - sumx * sumy / n) / /* compute correlation coeff */
sqrt((sumx2 - sqr(sumx)/n) *
(sumy2 - sqr(sumy)/n));
}
return 0;
}
Example
You can run this example online.
int main()
{
int n = 6;
REAL x[6]= {1, 2, 4, 5, 10, 20};
REAL y[6]= {4, 6, 12, 15, 34, 68};
REAL m,b,r;
linreg(n,x,y,&m,&b,&r);
printf("m=%g b=%g r=%g\n",m,b,r);
return 0;
}
Here is the output
m=3.43651 b=-0.888889 r=0.999192
Here is the Excel plot and linear fit (for verification).
All values agree exactly with the C code above (note C code returns r while Excel returns R**2).
There are efficient algorithms for least-squares fitting; see Wikipedia for details. There are also libraries that implement the algorithms for you, likely more efficiently than a naive implementation would do; the GNU Scientific Library is one example, but there are others under more lenient licenses as well.
From Numerical Recipes: The Art of Scientific Computing in (15.2) Fitting Data to a Straight Line:
Linear Regression:
Consider the problem of fitting a set of N data points (xi, yi) to a straight-line model:
Assume that the uncertainty: sigmai associated with each yi and that the xi’s (values of the dependent variable) are known exactly. To measure how well the model agrees with the data, we use the chi-square function, which in this case is:
The above equation is minimized to determine a and b. This is done by finding the derivative of the above equation with respect to a and b, equate them to zero and solve for a and b. Then we estimate the probable uncertainties in the estimates of a and b, since obviously the measurement errors in the data must introduce some uncertainty in the determination of those parameters. Additionally, we must estimate the goodness-of-fit of the data to the
model. Absent this estimate, we have not the slightest indication that the parameters a and b in the model have any meaning at all.
The below struct performs the mentioned calculations:
struct Fitab {
// Object for fitting a straight line y = a + b*x to a set of
// points (xi, yi), with or without available
// errors sigma i . Call one of the two constructors to calculate the fit.
// The answers are then available as the variables:
// a, b, siga, sigb, chi2, and either q or sigdat.
int ndata;
double a, b, siga, sigb, chi2, q, sigdat; // Answers.
vector<double> &x, &y, &sig;
// Constructor.
Fitab(vector<double> &xx, vector<double> &yy, vector<double> &ssig)
: ndata(xx.size()), x(xx), y(yy), sig(ssig), chi2(0.), q(1.), sigdat(0.)
{
// Given a set of data points x[0..ndata-1], y[0..ndata-1]
// with individual standard deviations sig[0..ndata-1],
// sets a,b and their respective probable uncertainties
// siga and sigb, the chi-square: chi2, and the goodness-of-fit
// probability: q
Gamma gam;
int i;
double ss=0., sx=0., sy=0., st2=0., t, wt, sxoss; b=0.0;
for (i=0;i < ndata; i++) { // Accumulate sums ...
wt = 1.0 / SQR(sig[i]); //...with weights
ss += wt;
sx += x[i]*wt;
sy += y[i]*wt;
}
sxoss = sx/ss;
for (i=0; i < ndata; i++) {
t = (x[i]-sxoss) / sig[i];
st2 += t*t;
b += t*y[i]/sig[i];
}
b /= st2; // Solve for a, b, sigma-a, and simga-b.
a = (sy-sx*b) / ss;
siga = sqrt((1.0+sx*sx/(ss*st2))/ss);
sigb = sqrt(1.0/st2); // Calculate chi2.
for (i=0;i<ndata;i++) chi2 += SQR((y[i]-a-b*x[i])/sig[i]);
if (ndata>2) q=gam.gammq(0.5*(ndata-2),0.5*chi2); // goodness of fit
}
// Constructor.
Fitab(vector<double> &xx, vector<double> &yy)
: ndata(xx.size()), x(xx), y(yy), sig(xx), chi2(0.), q(1.), sigdat(0.)
{
// As above, but without known errors (sig is not used).
// The uncertainties siga and sigb are estimated by assuming
// equal errors for all points, and that a straight line is
// a good fit. q is returned as 1.0, the normalization of chi2
// is to unit standard deviation on all points, and sigdat
// is set to the estimated error of each point.
int i;
double ss,sx=0.,sy=0.,st2=0.,t,sxoss;
b=0.0; // Accumulate sums ...
for (i=0; i < ndata; i++) {
sx += x[i]; // ...without weights.
sy += y[i];
}
ss = ndata;
sxoss = sx/ss;
for (i=0;i < ndata; i++) {
t = x[i]-sxoss;
st2 += t*t;
b += t*y[i];
}
b /= st2; // Solve for a, b, sigma-a, and sigma-b.
a = (sy-sx*b)/ss;
siga=sqrt((1.0+sx*sx/(ss*st2))/ss);
sigb=sqrt(1.0/st2); // Calculate chi2.
for (i=0;i<ndata;i++) chi2 += SQR(y[i]-a-b*x[i]);
if (ndata > 2) sigdat=sqrt(chi2/(ndata-2));
// For unweighted data evaluate typical
// sig using chi2, and adjust
// the standard deviations.
siga *= sigdat;
sigb *= sigdat;
}
};
where struct Gamma:
struct Gamma : Gauleg18 {
// Object for incomplete gamma function.
// Gauleg18 provides coefficients for Gauss-Legendre quadrature.
static const Int ASWITCH=100; When to switch to quadrature method.
static const double EPS; // See end of struct for initializations.
static const double FPMIN;
double gln;
double gammp(const double a, const double x) {
// Returns the incomplete gamma function P(a,x)
if (x < 0.0 || a <= 0.0) throw("bad args in gammp");
if (x == 0.0) return 0.0;
else if ((Int)a >= ASWITCH) return gammpapprox(a,x,1); // Quadrature.
else if (x < a+1.0) return gser(a,x); // Use the series representation.
else return 1.0-gcf(a,x); // Use the continued fraction representation.
}
double gammq(const double a, const double x) {
// Returns the incomplete gamma function Q(a,x) = 1 - P(a,x)
if (x < 0.0 || a <= 0.0) throw("bad args in gammq");
if (x == 0.0) return 1.0;
else if ((Int)a >= ASWITCH) return gammpapprox(a,x,0); // Quadrature.
else if (x < a+1.0) return 1.0-gser(a,x); // Use the series representation.
else return gcf(a,x); // Use the continued fraction representation.
}
double gser(const Doub a, const Doub x) {
// Returns the incomplete gamma function P(a,x) evaluated by its series representation.
// Also sets ln (gamma) as gln. User should not call directly.
double sum,del,ap;
gln=gammln(a);
ap=a;
del=sum=1.0/a;
for (;;) {
++ap;
del *= x/ap;
sum += del;
if (fabs(del) < fabs(sum)*EPS) {
return sum*exp(-x+a*log(x)-gln);
}
}
}
double gcf(const Doub a, const Doub x) {
// Returns the incomplete gamma function Q(a, x) evaluated
// by its continued fraction representation.
// Also sets ln (gamma) as gln. User should not call directly.
int i;
double an,b,c,d,del,h;
gln=gammln(a);
b=x+1.0-a; // Set up for evaluating continued fraction
// by modified Lentz’s method with with b0 = 0.
c=1.0/FPMIN;
d=1.0/b;
h=d;
for (i=1;;i++) {
// Iterate to convergence.
an = -i*(i-a);
b += 2.0;
d=an*d+b;
if (fabs(d) < FPMIN) d=FPMIN;
c=b+an/c;
if (fabs(c) < FPMIN) c=FPMIN;
d=1.0/d;
del=d*c;
h *= del;
if (fabs(del-1.0) <= EPS) break;
}
return exp(-x+a*log(x)-gln)*h; Put factors in front.
}
double gammpapprox(double a, double x, int psig) {
// Incomplete gamma by quadrature. Returns P(a,x) or Q(a, x),
// when psig is 1 or 0, respectively. User should not call directly.
int j;
double xu,t,sum,ans;
double a1 = a-1.0, lna1 = log(a1), sqrta1 = sqrt(a1);
gln = gammln(a);
// Set how far to integrate into the tail:
if (x > a1) xu = MAX(a1 + 11.5*sqrta1, x + 6.0*sqrta1);
else xu = MAX(0.,MIN(a1 - 7.5*sqrta1, x - 5.0*sqrta1));
sum = 0;
for (j=0;j<ngau;j++) { // Gauss-Legendre.
t = x + (xu-x)*y[j];
sum += w[j]*exp(-(t-a1)+a1*(log(t)-lna1));
}
ans = sum*(xu-x)*exp(a1*(lna1-1.)-gln);
return (psig?(ans>0.0? 1.0-ans:-ans):(ans>=0.0? ans:1.0+ans));
}
double invgammp(Doub p, Doub a);
// Inverse function on x of P(a,x) .
};
const Doub Gamma::EPS = numeric_limits<Doub>::epsilon();
const Doub Gamma::FPMIN = numeric_limits<Doub>::min()/EPS
and stuct Gauleg18:
struct Gauleg18 {
// Abscissas and weights for Gauss-Legendre quadrature.
static const Int ngau = 18;
static const Doub y[18];
static const Doub w[18];
};
const Doub Gauleg18::y[18] = {0.0021695375159141994,
0.011413521097787704,0.027972308950302116,0.051727015600492421,
0.082502225484340941, 0.12007019910960293,0.16415283300752470,
0.21442376986779355, 0.27051082840644336, 0.33199876341447887,
0.39843234186401943, 0.46931971407375483, 0.54413605556657973,
0.62232745288031077, 0.70331500465597174, 0.78649910768313447,
0.87126389619061517, 0.95698180152629142};
const Doub Gauleg18::w[18] = {0.0055657196642445571,
0.012915947284065419,0.020181515297735382,0.027298621498568734,
0.034213810770299537,0.040875750923643261,0.047235083490265582,
0.053244713977759692,0.058860144245324798,0.064039797355015485
0.068745323835736408,0.072941885005653087,0.076598410645870640,
0.079687828912071670,0.082187266704339706,0.084078218979661945,
0.085346685739338721,0.085983275670394821};
and, finally fuinction Gamma::invgamp():
double Gamma::invgammp(double p, double a) {
// Returns x such that P(a,x) = p for an argument p between 0 and 1.
int j;
double x,err,t,u,pp,lna1,afac,a1=a-1;
const double EPS=1.e-8; // Accuracy is the square of EPS.
gln=gammln(a);
if (a <= 0.) throw("a must be pos in invgammap");
if (p >= 1.) return MAX(100.,a + 100.*sqrt(a));
if (p <= 0.) return 0.0;
if (a > 1.) {
lna1=log(a1);
afac = exp(a1*(lna1-1.)-gln);
pp = (p < 0.5)? p : 1. - p;
t = sqrt(-2.*log(pp));
x = (2.30753+t*0.27061)/(1.+t*(0.99229+t*0.04481)) - t;
if (p < 0.5) x = -x;
x = MAX(1.e-3,a*pow(1.-1./(9.*a)-x/(3.*sqrt(a)),3));
} else {
t = 1.0 - a*(0.253+a*0.12); and (6.2.9).
if (p < t) x = pow(p/t,1./a);
else x = 1.-log(1.-(p-t)/(1.-t));
}
for (j=0;j<12;j++) {
if (x <= 0.0) return 0.0; // x too small to compute accurately.
err = gammp(a,x) - p;
if (a > 1.) t = afac*exp(-(x-a1)+a1*(log(x)-lna1));
else t = exp(-x+a1*log(x)-gln);
u = err/t;
// Halley’s method.
x -= (t = u/(1.-0.5*MIN(1.,u*((a-1.)/x - 1))));
// Halve old value if x tries to go negative.
if (x <= 0.) x = 0.5*(x + t);
if (fabs(t) < EPS*x ) break;
}
return x;
}
Here is my version of a C/C++ function that does simple linear regression. The calculations follow the wikipedia article on simple linear regression. This is published as a single-header public-domain (MIT) library on github: simple_linear_regression. The library (.h file) is tested to work on Linux and Windows, and from C and C++ using -Wall -Werror and all -std versions supported by clang/gcc.
#define SIMPLE_LINEAR_REGRESSION_ERROR_INPUT_VALUE -2
#define SIMPLE_LINEAR_REGRESSION_ERROR_NUMERIC -3
int simple_linear_regression(const double * x, const double * y, const int n, double * slope_out, double * intercept_out, double * r2_out) {
double sum_x = 0.0;
double sum_xx = 0.0;
double sum_xy = 0.0;
double sum_y = 0.0;
double sum_yy = 0.0;
double n_real = (double)(n);
int i = 0;
double slope = 0.0;
double denominator = 0.0;
if (x == NULL || y == NULL || n < 2) {
return SIMPLE_LINEAR_REGRESSION_ERROR_INPUT_VALUE;
}
for (i = 0; i < n; ++i) {
sum_x += x[i];
sum_xx += x[i] * x[i];
sum_xy += x[i] * y[i];
sum_y += y[i];
sum_yy += y[i] * y[i];
}
denominator = n_real * sum_xx - sum_x * sum_x;
if (denominator == 0.0) {
return SIMPLE_LINEAR_REGRESSION_ERROR_NUMERIC;
}
slope = (n_real * sum_xy - sum_x * sum_y) / denominator;
if (slope_out != NULL) {
*slope_out = slope;
}
if (intercept_out != NULL) {
*intercept_out = (sum_y - slope * sum_x) / n_real;
}
if (r2_out != NULL) {
denominator = ((n_real * sum_xx) - (sum_x * sum_x)) * ((n_real * sum_yy) - (sum_y * sum_y));
if (denominator == 0.0) {
return SIMPLE_LINEAR_REGRESSION_ERROR_NUMERIC;
}
*r2_out = ((n_real * sum_xy) - (sum_x * sum_y)) * ((n_real * sum_xy) - (sum_x * sum_y)) / denominator;
}
return 0;
}
Usage example:
#define SIMPLE_LINEAR_REGRESSION_IMPLEMENTATION
#include "simple_linear_regression.h"
#include <stdio.h>
/* Some data that we want to find the slope, intercept and r2 for */
static const double x[] = { 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83 };
static const double y[] = { 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46 };
int main() {
double slope = 0.0;
double intercept = 0.0;
double r2 = 0.0;
int res = 0;
res = simple_linear_regression(x, y, sizeof(x) / sizeof(x[0]), &slope, &intercept, &r2);
if (res < 0) {
printf("Error: %s\n", simple_linear_regression_error_string(res));
return res;
}
printf("slope: %f\n", slope);
printf("intercept: %f\n", intercept);
printf("r2: %f\n", r2);
return 0;
}
The original example above worked well for me with slope and offset but I had a hard time with the corr coef. Maybe I don't have my parenthesis working the same as the assumed precedence? Anyway, with some help of other web pages I finally got values that match the linear trend-line in Excel. Thought I would share my code using Mark Lakata's variable names. Hope this helps.
double slope = ((n * sumxy) - (sumx * sumy )) / denom;
double intercept = ((sumy * sumx2) - (sumx * sumxy)) / denom;
double term1 = ((n * sumxy) - (sumx * sumy));
double term2 = ((n * sumx2) - (sumx * sumx));
double term3 = ((n * sumy2) - (sumy * sumy));
double term23 = (term2 * term3);
double r2 = 1.0;
if (fabs(term23) > MIN_DOUBLE) // Define MIN_DOUBLE somewhere as 1e-9 or similar
r2 = (term1 * term1) / term23;
as an assignment I had to code in C a simple linear regression using RMSE loss function. The program is dynamic and you can enter your own values and choose your own loss function which is for now limited to Root Mean Square Error. But first here are the algorithms I used:
now the code... you need gnuplot to display the chart, sudo apt install gnuplot
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/types.h>
#define BUFFSIZE 64
#define MAXSIZE 100
static double vector_x[MAXSIZE] = {0};
static double vector_y[MAXSIZE] = {0};
static double vector_predict[MAXSIZE] = {0};
static double max_x;
static double max_y;
static double mean_x;
static double mean_y;
static double teta_0_intercept;
static double teta_1_grad;
static double RMSE;
static double r_square;
static double prediction;
static char intercept[BUFFSIZE];
static char grad[BUFFSIZE];
static char xrange[BUFFSIZE];
static char yrange[BUFFSIZE];
static char lossname_RMSE[BUFFSIZE] = "Simple Linear Regression using RMSE'";
static char cmd_gnu_0[BUFFSIZE] = "set title '";
static char cmd_gnu_1[BUFFSIZE] = "intercept = ";
static char cmd_gnu_2[BUFFSIZE] = "grad = ";
static char cmd_gnu_3[BUFFSIZE] = "set xrange [0:";
static char cmd_gnu_4[BUFFSIZE] = "set yrange [0:";
static char cmd_gnu_5[BUFFSIZE] = "f(x) = (grad * x) + intercept";
static char cmd_gnu_6[BUFFSIZE] = "plot f(x), 'data.temp' with points pointtype 7";
static char const *commands_gnuplot[] = {
cmd_gnu_0,
cmd_gnu_1,
cmd_gnu_2,
cmd_gnu_3,
cmd_gnu_4,
cmd_gnu_5,
cmd_gnu_6,
};
static size_t size;
static void user_input()
{
printf("Enter x,y vector size, MAX = 100\n");
scanf("%lu", &size);
if (size > MAXSIZE) {
printf("Wrong input size is too big\n");
user_input();
}
printf("vector's size is %lu\n", size);
size_t i;
for (i = 0; i < size; i++) {
printf("Enter vector_x[%ld] values\n", i);
scanf("%lf", &vector_x[i]);
}
for (i = 0; i < size; i++) {
printf("Enter vector_y[%ld] values\n", i);
scanf("%lf", &vector_y[i]);
}
}
static void display_vector()
{
size_t i;
for (i = 0; i < size; i++){
printf("vector_x[%lu] = %lf\t", i, vector_x[i]);
printf("vector_y[%lu] = %lf\n", i, vector_y[i]);
}
}
static void concatenate(char p[], char q[]) {
int c;
int d;
c = 0;
while (p[c] != '\0') {
c++;
}
d = 0;
while (q[d] != '\0') {
p[c] = q[d];
d++;
c++;
}
p[c] = '\0';
}
static void compute_mean_x_y()
{
size_t i;
double tmp_x = 0.0;
double tmp_y = 0.0;
for (i = 0; i < size; i++) {
tmp_x += vector_x[i];
tmp_y += vector_y[i];
}
mean_x = tmp_x / size;
mean_y = tmp_y / size;
printf("mean_x = %lf\n", mean_x);
printf("mean_y = %lf\n", mean_y);
}
static void compute_teta_1_grad()
{
double numerator = 0.0;
double denominator = 0.0;
double tmp1 = 0.0;
double tmp2 = 0.0;
size_t i;
for (i = 0; i < size; i++) {
numerator += (vector_x[i] - mean_x) * (vector_y[i] - mean_y);
}
for (i = 0; i < size; i++) {
tmp1 = vector_x[i] - mean_x;
tmp2 = tmp1 * tmp1;
denominator += tmp2;
}
teta_1_grad = numerator / denominator;
printf("teta_1_grad = %lf\n", teta_1_grad);
}
static void compute_teta_0_intercept()
{
teta_0_intercept = mean_y - (teta_1_grad * mean_x);
printf("teta_0_intercept = %lf\n", teta_0_intercept);
}
static void compute_prediction()
{
size_t i;
for (i = 0; i < size; i++) {
vector_predict[i] = teta_0_intercept + (teta_1_grad * vector_x[i]);
printf("y^[%ld] = %lf\n", i, vector_predict[i]);
}
printf("\n");
}
static void compute_RMSE()
{
compute_prediction();
double error = 0;
size_t i;
for (i = 0; i < size; i++) {
error = (vector_predict[i] - vector_y[i]) * (vector_predict[i] - vector_y[i]);
printf("error y^[%ld] = %lf\n", i, error);
RMSE += error;
}
/* mean */
RMSE = RMSE / size;
/* square root mean */
RMSE = sqrt(RMSE);
printf("\nRMSE = %lf\n", RMSE);
}
static void compute_loss_function()
{
int input = 0;
printf("Which loss function do you want to use?\n");
printf(" 1 - RMSE\n");
scanf("%d", &input);
switch(input) {
case 1:
concatenate(cmd_gnu_0, lossname_RMSE);
compute_RMSE();
printf("\n");
break;
default:
printf("Wrong input try again\n");
compute_loss_function(size);
}
}
static void compute_r_square(size_t size)
{
double num_err = 0.0;
double den_err = 0.0;
size_t i;
for (i = 0; i < size; i++) {
num_err += (vector_y[i] - vector_predict[i]) * (vector_y[i] - vector_predict[i]);
den_err += (vector_y[i] - mean_y) * (vector_y[i] - mean_y);
}
r_square = 1 - (num_err/den_err);
printf("R_square = %lf\n", r_square);
}
static void compute_predict_for_x()
{
double x = 0.0;
printf("Please enter x value\n");
scanf("%lf", &x);
prediction = teta_0_intercept + (teta_1_grad * x);
printf("y^ if x = %lf -> %lf\n",x, prediction);
}
static void compute_max_x_y()
{
size_t i;
double tmp1= 0.0;
double tmp2= 0.0;
for (i = 0; i < size; i++) {
if (vector_x[i] > tmp1) {
tmp1 = vector_x[i];
max_x = vector_x[i];
}
if (vector_y[i] > tmp2) {
tmp2 = vector_y[i];
max_y = vector_y[i];
}
}
printf("vector_x max value %lf\n", max_x);
printf("vector_y max value %lf\n", max_y);
}
static void display_model_line()
{
sprintf(intercept, "%0.7lf", teta_0_intercept);
sprintf(grad, "%0.7lf", teta_1_grad);
sprintf(xrange, "%0.7lf", max_x + 1);
sprintf(yrange, "%0.7lf", max_y + 1);
concatenate(cmd_gnu_1, intercept);
concatenate(cmd_gnu_2, grad);
concatenate(cmd_gnu_3, xrange);
concatenate(cmd_gnu_3, "]");
concatenate(cmd_gnu_4, yrange);
concatenate(cmd_gnu_4, "]");
printf("grad = %s\n", grad);
printf("intercept = %s\n", intercept);
printf("xrange = %s\n", xrange);
printf("yrange = %s\n", yrange);
printf("cmd_gnu_0: %s\n", cmd_gnu_0);
printf("cmd_gnu_1: %s\n", cmd_gnu_1);
printf("cmd_gnu_2: %s\n", cmd_gnu_2);
printf("cmd_gnu_3: %s\n", cmd_gnu_3);
printf("cmd_gnu_4: %s\n", cmd_gnu_4);
printf("cmd_gnu_5: %s\n", cmd_gnu_5);
printf("cmd_gnu_6: %s\n", cmd_gnu_6);
/* print plot */
FILE *gnuplot_pipe = (FILE*)popen("gnuplot -persistent", "w");
FILE *temp = (FILE*)fopen("data.temp", "w");
/* create data.temp */
size_t i;
for (i = 0; i < size; i++)
{
fprintf(temp, "%f %f \n", vector_x[i], vector_y[i]);
}
/* display gnuplot */
for (i = 0; i < 7; i++)
{
fprintf(gnuplot_pipe, "%s \n", commands_gnuplot[i]);
}
}
int main(void)
{
printf("===========================================\n");
printf("INPUT DATA\n");
printf("===========================================\n");
user_input();
display_vector();
printf("\n");
printf("===========================================\n");
printf("COMPUTE MEAN X:Y, TETA_1 TETA_0\n");
printf("===========================================\n");
compute_mean_x_y();
compute_max_x_y();
compute_teta_1_grad();
compute_teta_0_intercept();
printf("\n");
printf("===========================================\n");
printf("COMPUTE LOSS FUNCTION\n");
printf("===========================================\n");
compute_loss_function();
printf("===========================================\n");
printf("COMPUTE R_square\n");
printf("===========================================\n");
compute_r_square(size);
printf("\n");
printf("===========================================\n");
printf("COMPUTE y^ according to x\n");
printf("===========================================\n");
compute_predict_for_x();
printf("\n");
printf("===========================================\n");
printf("DISPLAY LINEAR REGRESSION\n");
printf("===========================================\n");
display_model_line();
printf("\n");
return 0;
}
Look at Section 1 of this paper. This section expresses a 2D linear regression as a matrix multiplication exercise. As long as your data is well-behaved, this technique should permit you to develop a quick least squares fit.
Depending on the size of your data, it might be worthwhile to algebraically reduce the matrix multiplication to simple set of equations, thereby avoiding the need to write a matmult() function. (Be forewarned, this is completely impractical for more than 4 or 5 data points!)
The fastest, most efficient way to solve least squares, as far as I am aware, is to subtract (the gradient)/(the 2nd order gradient) from your parameter vector. (2nd order gradient = i.e. the diagonal of the Hessian.)
Here is the intuition:
Let's say you want to optimize least squares over a single parameter. This is equivalent to finding the vertex of a parabola. Then, for any random initial parameter, x0, the vertex of the loss function is located at x0 - f(1) / f(2). That's because adding - f(1) / f(2) to x will always zero out the derivative, f(1).
Side note: Implementing this in Tensorflow, the solution appeared at w0 - f(1) / f(2) / (number of weights), but I'm not sure if that's due to Tensorflow or if it's due to something else..