Related
Here is my test code to find 1st clipping area on the screen.
Two subroutines and dummy loops in the code to compare the performance of them.
point_in_neon (NEON version) and point_in (Regular version) does the same thing:
find out the first clipping area (contains given point) in given list and return -1 if there is no matching area.
I expected NEON version is faster than regular version.
Unfortunately, it is slower than regular version. Is there another way to speed it up?
The compiler command is:
${CC} -O2 -ftree-vectorize -o vcomp vcomp.c
Thanks,
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <sys/time.h>
#include <arm_neon.h>
#define WIDTH (4096)
#define HEIGHT (4096)
#define CLIPS (32)
static inline uint64_t now(void) {
struct timeval tv;
gettimeofday(&tv,NULL);
return tv.tv_sec*1000000+tv.tv_usec;
}
typedef struct _rect_t {
int32_t x;
int32_t y;
uint32_t width;
uint32_t height;
} rect_t;
typedef struct _point_t {
int32_t x;
int32_t y;
} point_t;
int32_t inline point_in_neon(const point_t *pt, const rect_t rs[4]) {
const int32_t right[4]={
rs[0].x+rs[0].width-1,
rs[1].x+rs[1].width-1,
rs[2].x+rs[2].width-1,
rs[3].x+rs[3].width-1
}, bottom[4]={
rs[0].y+rs[0].height-1,
rs[1].y+rs[1].height-1,
rs[2].y+rs[2].height-1,
rs[3].y+rs[3].height-1
};
int32x4_t p, r;
uint32x4_t t;
uint32_t res[4];
//p = <Xp, Xp, Xp, Xp>
p=vld1q_dup_s32(&pt->x);
//r = <Left0, Left1, Left2, Left3>
r=vld1q_lane_s32(&rs[0].x, r, 0);
r=vld1q_lane_s32(&rs[1].x, r, 1);
r=vld1q_lane_s32(&rs[2].x, r, 2);
r=vld1q_lane_s32(&rs[3].x, r, 3);
//t = (p >= r)
t=vcgeq_s32(p, r);
//r = <Right0, Right1, Right2, Right3>
r=vld1q_s32(&right);
//t = t & (r >= p)
t=vandq_u32(t, vcgeq_s32(r, p));
//p = <Yp, Yp, Yp, Yp>
p=vld1q_dup_s32(&pt->y);
//r = <Top0, Top1, Top2, Top3>
r=vld1q_lane_s32(&rs[0].y, r, 0);
r=vld1q_lane_s32(&rs[1].y, r, 1);
r=vld1q_lane_s32(&rs[2].y, r, 2);
r=vld1q_lane_s32(&rs[3].y, r, 3);
//t = t & (p >= r)
t=vandq_u32(t, vcgeq_s32(p, r));
//r = <Bottom0, Bottom1, Bottom2, Bottom3>
r=vld1q_s32(&bottom);
//t = t & (r >= p)
t=vandq_u32(t, vcgeq_s32(r, p));
vst1q_u32(res, t);
if(res[0])
return 0;
else if(res[1])
return 1;
else if(res[2])
return 2;
else if(res[3])
return 3;
return -1;
}
int32_t inline point_in(const point_t *pt, const rect_t *rs, uint32_t len) {
int32_t i;
for(i=0;i<len;i++) {
int32_t right=rs[i].x+rs[i].width-1,
bottom=rs[i].y+rs[i].height-1;
if(pt->x>=rs[i].x && pt->x<=right &&
pt->y>=rs[i].y && pt->y<=bottom)
return i;
}
return -1;
}
int32_t main(int32_t argc, char *argv[]) {
rect_t rs[CLIPS];
int32_t i, j;
uint64_t ts0, ts1;
int32_t res[2][CLIPS];
srand((unsigned int)time(NULL));
for(i=0;i<CLIPS;i++) {
rs[i].x=rand()%WIDTH;
rs[i].y=rand()%HEIGHT;
rs[i].width=rand()%WIDTH;
rs[i].height=rand()%HEIGHT;
}
memset(res, 0, sizeof(res));
ts0=now();
for(i=0;i<HEIGHT;i++) {
for(j=0;j<WIDTH;j++) {
point_t p={i, j};
int32_t idx=point_in(&p, rs, CLIPS);
if(idx>=0)
res[0][idx]=1;
}
}
ts0=now()-ts0;
ts1=now();
for(i=0;i<HEIGHT;i++) {
for(j=0;j<WIDTH;j++) {
int32_t k, idx;
point_t p={i, j};
for(k=0, idx=-1;k<CLIPS/4;k++) {
idx=point_in_neon(&p, &rs[k*4]);
if(idx>=0)
break;
}
if(idx>=0)
res[1][k*4+idx]=1;
}
}
ts1=now()-ts1;
/*
for(i=0;i<CLIPS;i++) {
if(res[0][i]!=res[1][i]) {
printf("error.\n");
return 1;
}
}
*/
printf("regular = %lu\n", ts0);
printf("neon = %lu\n", ts1);
return 0;
}
According to Peter Cordes's suggestion, I replaced data loding parts of point_in_neon subroutine with vld4q_s32 intrinsic and subsequent right and bottom calculation can be vectorized. Now the code is shorter and faster than regular version.
int32_t inline point_in_neon(const point_t *pt, const rect_t rs[4]) {
int32x4x4_t r;
int32x4_t right, bottom, p;
uint32x4_t t;
uint32_t res[4];
/*
r.val[0] = <X0, X1, X2, X3>
r.val[1] = <Y0, Y1, Y2, Y3>
r.val[2] = <Width0, Width1, Width2, Width3>
r.val[3] = <Height0, Height1, Height2, Height3>
*/
r=vld4q_s32(rs);
//right = <Right0, Right1, Right2, Right3>
right=vsubq_s32(vaddq_s32(r.val[0], r.val[2]), vdupq_n_s32(1));
//bottom = <Bottom0, Bottom1, Bottom2, Bottom3>
bottom=vsubq_s32(vaddq_s32(r.val[1], r.val[3]), vdupq_n_s32(1));
//p = <Xp, Xp, Xp, Xp>
p=vld1q_dup_s32(&pt->x);
//t = (p >= left)
t=vcgeq_s32(p, r.val[0]);
//t = t & (right >= p)
t=vandq_u32(t, vcgeq_s32(right, p));
//p = <Yp, Yp, Yp, Yp>
p=vld1q_dup_s32(&pt->y);
//t = t & (p >= top)
t=vandq_u32(t, vcgeq_s32(p, r.val[1]));
//t = t & (r >= bottom)
t=vandq_u32(t, vcgeq_s32(bottom, p));
vst1q_u32(res, t);
if(res[0])
return 0;
else if(res[1])
return 1;
else if(res[2])
return 2;
else if(res[3])
return 3;
return -1;
}
Starting with your original point_in method, we can clean up a little bit here by removing the -1's, and changing <= to <.
int32_t inline point_in(const point_t *pt, const rect_t *rs, uint32_t len) {
int32_t i;
for(i=0; i < len; i++)
{
// this is pointless - change your data structures so that
// the rect stores minx/maxx, miny/maxy instead!
int32_t right = rs[i].x + rs[i].width;
int32_t bottom= rs[i].y + rs[i].height;
bool cmp0 = pt->x >= rs[i].x;
bool cmp1 = pt->y >= rs[i].y;
bool cmp2 = pt->x < right;
bool cmp3 = pt->y < bottom;
if(cmp0 & cmp1 & cmp2 & cmp3)
return i;
}
return -1;
}
Next obvious thing to point out:
// your screen size...
#define WIDTH (4096)
#define HEIGHT (4096)
// yet your structures use uint32 as storage???
typedef struct _rect_t {
int32_t x;
int32_t y;
uint32_t width;
uint32_t height;
} rect_t;
typedef struct _point_t {
int32_t x;
int32_t y;
} point_t;
If you can get away with using 16bit integers, this will go at twice the speed (because you can fit 8x 16bit numbers in a SIMD register, v.s. 4x 32bit). Whilst we're at it, we might as well change the data layout to structure of array at the same time.
I'm also going to hoist the pointless p.x + width out, and store it as xmax/ymax instead (removes duplicated computation in your loops).
typedef struct rect_x8_t {
int16x8_t x;
int16x8_t y;
int16x8_t xmax; //< x + width
int16x8_t ymax; //< y + height
} rect_x8_t;
typedef struct point_x8_t {
int16x8_t x;
int16x8_t y;
} point_x8_t;
On the assumption you don't have a number of clips that's divisible by 8, we'll need to pad the number slightly (not a big deal)
// assuming this has already been initialised
rect_t rs[CLIPS];
// how many batches of 8 do we need?
uint32_t CLIPS8 = (CLIPS / 8) + (CLIPS & 7 ? 1 : 0);
// allocate in batches of 8
rect_x8_t rs8[CLIPS8] = {};
// I'm going to do this rubbishly as an pre-process step.
// I don't care too much about efficiency here...
for(uint32_t i = 0; i < CLIPS; ++i) {
rs8[i / 8].x[i & 7] = rs[i].x;
rs8[i / 8].y[i & 7] = rs[I].y;
rs8[i / 8].xmax[i & 7] = rs[i].x + rs[i].width;
rs8[i / 8].ymax[i & 7] = rs[i].y + rs[i].height;
}
I have a couple of concerns here:
for(i=0;i<HEIGHT;i++) {
for(j=0;j<WIDTH;j++) {
// This seems wrong? Shouldn't it be p = {j, i} ?
point_t p={i, j};
int32_t idx=point_in(&p, rs, CLIPS);
// I'm not quite sure what the result says about your
// image data and clip regions???
//
// This seems like a really silly way of asking
// a simple question about the clip regions. The pixels
// don't have any effect here.
if(idx >= 0)
res[0][idx] = 1;
}
}
Anyhow, now refactoring the point_in method to use int16x8_t, we get:
inline int32_t point_in_x8(const point_x8_t pt,
const rect_x8_t* rs,
uint32_t len) {
for(int32_t i = 0; i < len; i++) {
// perform comparisons on 8 rects at a time
uint16x8_t cmp0 = vcgeq_s16(pt.x, rs[i].x);
uint16x8_t cmp1 = vcgeq_s16(pt.y, rs[i].y);
uint16x8_t cmp2 = vcltq_s16(pt.x, rs[i].xmax);
uint16x8_t cmp3 = vcltq_s16(pt.y, rs[I].ymax);
// combine to single comparison value
uint16x8_t cmp01 = vandq_u16(cmp0, cmp1);
uint16x8_t cmp23 = vandq_u16(cmp2, cmp3);
uint16x8_t cmp0123 = vandq_u16(cmp01, cmp23);
// use a horizontal max to see if any lanes are true
if(vmaxvq_u16(cmp0123)) {
for(int32_t j = 0; j < 8; ++j) {
if(cmp0123[j])
return 8*i + j;
}
}
}
return -1;
}
Any additional padded elements in the rect_x8_t structs should end up being ignored (since they should be 0/0, 0/0, which will always end up being false).
Then finally...
for(i = 0; i < HEIGHT; i++) {
point_x8_t p;
// splat the y value
p.y = vld1q_dup_s16(i);
for(j = 0; j < WIDTH; j++) {
// splat the x value
p.x = vld1q_dup_s16(j);
int32_t idx = point_in_x8(p, rs8, CLIPS8);
if(idx >= 0)
res[1][idx] = 1;
}
}
The vld4 instruction actually has a fairly high latency. Given that WIDTH * HEIGHT is actually a very big number, pre-swizzling here (as a pre-processing step) makes a lot more sense imho.
HOWEVER
This whole algorithm could be massively improved by simply ignoring the pixels, and working on CLIP regions directly.
A clip region will be false if it is entirely contained by the preceding clip regions
for(i = 0; i < CLIPS; i++) {
// if region is empty, ignore.
if(rs[i].width == 0 || rs[i].height == 0) {
res[0][i] = 0;
continue;
}
// first region will always be true (unless it's of zero size)
if(i == 0) {
res[0][1] = 1;
continue;
}
uint32_t how_many_intersect = 0;
bool entirely_contained = false;
uint32_t intersection_indices[CLIPS] = {};
// do a lazy test first.
for(j = i - 1; j >= 0; --j) {
// if the last region is entirely contained by preceding
// ones, it will be false. exit loop.
if(region_is_entirely_contained(rs[i], rs[j])) {
res[0][i] = 0;
entirely_contained = true;
j = -1; ///< break out of loop
}
else
// do the regions intersect?
if(region_intersects(rs[i], rs[j])) {
intersection_indices[how_many_intersect] = j;
++how_many_intersect;
}
}
// if one region entirely contains this clip region, skip it.
if(entirely_contained) {
continue;
}
// if you only intersect one or no regions, the result is true.
if(how_many_intersect <= 1) {
res[0][i] = 1;
continue;
}
// If you get here, the result is *probably* true, however
// you will need to split this clip region against the previous
// ones to be fully sure. If all regions are fully contained,
// the answer is false.
// I won't implement it, but something like this:
* split rs[i] against each rs[intersection_indices[]].
* Throw away the rectangles that are entirely contained.
* Each bit that remains should be tested against each rs[intersection_indices[]]
* If you find any split rectangle that isn't contained,
set to true and move on.
}
I tried to implement this code and it works to a certain point (x<0.6). I am just wondering why it ouputs 'inf' although the stop criteria should terminate the program when it reaches the maximum accuracy of double.
#include <stdio.h>
#include <math.h>
double fak(int n) {
int f = 1;
int i = 0;
do {
i++;
f *= i;
} while(i<n);
return f;
}
double func_e() {
double res = 0;
double res_old = 0;
double x, k;
x = 1;
k = 0;
do {
res_old = res;
res += ((pow(x,k)) / fak(k));
k++;
} while(res != res_old);
return res;
}
int main(void) {
//printf("power %f", pow(3,3));
printf("%f", func_e());
//printf("%f", fak(3));
printf("\n");
return 0;
}
Check the return value of your function fak. It will overflow and at a certain point return 0. The division by 0.0 results in inf.
When I modify function fak as
double fak(int n) {
int f = 1;
int i = 0;
do {
i++;
f *= i;
} while(i<n);
printf("fak(%d) = %d\n", n, f);
return f;
}
and run it on https://onlinegdb.com/ZxaXfI5xcG, the output is
fak(0) = 1
fak(1) = 1
fak(2) = 2
fak(3) = 6
fak(4) = 24
fak(5) = 120
fak(6) = 720
fak(7) = 5040
fak(8) = 40320
fak(9) = 362880
fak(10) = 3628800
fak(11) = 39916800
fak(12) = 479001600
fak(13) = 1932053504
fak(14) = 1278945280
fak(15) = 2004310016
fak(16) = 2004189184
fak(17) = -288522240
fak(18) = -898433024
fak(19) = 109641728
fak(20) = -2102132736
fak(21) = -1195114496
fak(22) = -522715136
fak(23) = 862453760
fak(24) = -775946240
fak(25) = 2076180480
fak(26) = -1853882368
fak(27) = 1484783616
fak(28) = -1375731712
fak(29) = -1241513984
fak(30) = 1409286144
fak(31) = 738197504
fak(32) = -2147483648
fak(33) = -2147483648
fak(34) = 0
fak(35) = 0
inf
This means your loop ends when both res and res_old have the value inf.
Additional remark:
In func_e you use double k; and pass this to double fak(int n) which converts the value to int. Function fak does the calculation in int and implicitly converts the result to double in the return statement.
I suggest to avoid these conversions. (Or at least think about the possible problems.) The compiler may warn about this if you enable all warnings.
The problem is the following: Given "ABC+DEF=GHI" format string, where A,B,C etc. represent unique digits, find the expression that gives maximum GHI. Ex: Input string is AAB+AAB=AAB, then there's no solution. If it is instead AAA + BBB = AAA, a solution is 999 + 000 = 999. Another example string: ABC + CBA = GGG, a result is => 543 + 345 = 888.
I have ruled out impossible cases easily. The algorithm I have in mind is a bruteforce, that simply tries maximizing the rhs first. However my problem was doing this fast, and also watching out for the unique digits. What's an efficient way to solve this problem?
Notes: I wish to solve this in a singlethreaded approach, and my current problem is detecting if a unique digit is used in "assign_value" function. Perhaps a better method to assign values is there?
EDIT: As per smci's suggestion, here's what I want to achieve, in the very end: ABRA + CADABRA + ABRA + CADABRA == HOUDINI ; 7457 + 1797457 + 7457 + 1797457 == 3609828 -- A system that can handle not only strings of the form I provided in the beginning (3 digit number + 3 digit number = 3 digit number) but also those. However it doesn't hurt to start simple and go with the solution of format I gave :)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_EXPRESSION_SIZE 11 + 1
#define MAX_VARIABLES 9
int variables_read[MAX_VARIABLES] = { 0 };
struct variable {
int coefficient;
int* ptr;
int side;
int canhavezero;
unsigned value_max;
};
typedef struct variable Variable;
struct equation {
Variable* variables[9]; // max
unsigned distinct_on_rhs;
unsigned var_count;
};
typedef struct equation Equation;
int int_pow(int n, int k) {
int res = 1;
for(int i = 0; i < k; ++i)
res *= n;
return res;
}
void AddVariable(Equation* E, Variable* V) {
E->variables[E->var_count++] = V;
}
int IsImpossible(char* expression) {
// if all letters are same or end letters are same, no solution
if(
(expression[0] == expression[4] && expression[0] == expression[8]) ||
(!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
)
return 1;
return 0;
}
int assign_value(Equation* E, int pos, int* values) {
if(!E->variables[pos]->value_count) {
if(pos < 0)
return 2;
// if no possible values left, reset this, but take one value count from the closest variable
E->variables[pos - 1]->value_count--;
E->variables[pos]->value_count = E->variables[pos]->value_max;
return 0;
}
int i;
for(i = 9; i >= 0 && values[i] == -1; --i)
printf("Assigning %d to %c\n", E->variables[pos]->value_set[E->variables[pos]->value_count - 1], 'A' + (E->variables[pos]->ptr - E->variables[0]->ptr));
*(E->variables[pos]->ptr) = values[i];
values[i] = -1; // we have unique numbers
return 0;
}
int isSolved(Equation E) {
int sum = 0, coeff = 0;
printf("Trying...\n");
for(int i = 0; i < E.var_count; ++i) {
coeff = E.variables[i]->coefficient * (*E.variables[i]->ptr);
printf("%d ", *E.variables[i]->ptr);
if(E.variables[i]->side)
coeff *= -1;
sum += coeff;
}
printf("\nSum was %d\n", sum);
return !sum;
}
char* evaluate(char* expression) {
char* res;
// check for impossible cases first
if(IsImpossible(expression)) {
res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
strcpy(res, "No Solution!");
return res;
}
res = (char *) malloc(sizeof(char) * MAX_EXPRESSION_SIZE);
// now try to find solutions, first describe the given characters as equations
Equation E;
E.var_count = 0;
E.distinct_on_rhs = 0;
int side_mode = 0, powcounter = 0;
int a = -1, b = -1, c = -1, d = -1, e = -1, f = -1, g = -1, h = -1, i = -1;
int* max_variables[MAX_VARIABLES] = { &a, &b, &c, &d, &e, &f, &g, &h, &i };
for(int j = 0; j < MAX_EXPRESSION_SIZE - 1; ++j) {
if(expression[j] == '+')
continue;
if(expression[j] == '=') {
side_mode = 1;
continue;
}
Variable* V = (Variable *) malloc(sizeof(Variable));
// we know we always get 3 digit numbers but we can easily change if we need to
V->coefficient = int_pow(10, 2 - (powcounter % 3));
V->ptr = max_variables[expression[j] - 'A'];
V->side = side_mode;
E.distinct_on_rhs += side_mode && !variables_read[expression[j] - 'A'];
if(!(powcounter % 3)) { // beginning of a number
V->value_count = 9;
V->value_max = 9;
V->canhavezero = 0;
}
else {
V->value_count = 10;
V->value_max = 10;
V->canhavezero = 1;
}
AddVariable(&E, V);
variables_read[expression[j] - 'A'] = 1;
++powcounter;
}
for(int j = 0; j < E.var_count; ++j)
printf("%d %c %d\n", E.variables[j]->coefficient, 'A' + (E.variables[j]->ptr - max_variables[0]), E.variables[j]->side);
// we got a representaion of the equation, now try to solve it
int solved = 0;
// O(9^N), where N is number of distinct variables.
// An optimization we can do is, we first assign possible max values to rhs number, then go down. We need max number.
printf("Distincts: %d\n", E.distinct_on_rhs);
do {
// try to assign values to all variables and try if it solves the equation
// but first try to assign rhs as max as possible
int values[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
int temp = E.var_count - E.distinct_on_rhs;
while(temp < E.var_count) {
solved = assign_value(&E, temp, values);
++temp;
}
for(int j = E.var_count - 1 - E.distinct_on_rhs; j >= 0; --j)
solved = assign_value(&E, j, values);
if(solved) // can return no solution
break;
printf("Solving...\n");
solved = isSolved(E);
system("PAUSE");
} while(!solved);
if(solved == 2) {
res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
strcpy(res, "No Solution!");
}
else {
}
return res;
}
int main() {
char expression[MAX_EXPRESSION_SIZE] = { 0 };
do {
printf("Enter the formula: ");
scanf("%s", expression);
char* res = evaluate(expression);
printf("%s\n", res);
free(res);
} while(expression[0] != '-');
return 0;
}
I would start with the result. There are not that many different cases:
AAA
AAB, ABA, BAA
ABC
All other cases can be reduced to these by renaming the variables. ABC + CBA = GGG would become DBC + CBD = AAA.
Then you have
10 possible solutions for the one-variable case AAA
90 (10*9) for the two variable cases
720 (10*9*8) for the three variable case
assuming that zero is allowed anywhere. If not, you can filter out those that are not allowed.
This sets the variables for the right side of the equation. Each variable that appears only on the left, adds possible solutions. B adds a factor of 9, C a factor of 8, D 7 and so forth.
The most "efficient" solution would take all knowledge of the task and simple print the result. So the question is how much of the conditions can be coded and where and what flexibility is needed.
An alternative is to view the generation of test cases and evaluation of them separately.
A simple recursion function can generate the 10! (362880) test cases of unique digits.
unsigned long long count = 0;
unsigned long long sol = 0;
void evaluate(int object[]) {
count++;
int ABC = object[0] * 100 + object[1] * 10 + object[2];
int DEF = object[3] * 100 + object[4] * 10 + object[5];
int GHI = object[6] * 100 + object[7] * 10 + object[8];
if (ABC + DEF == GHI) {
printf("%4llu %03d + %03d = %03d\n", ++sol, ABC,DEF,GHI);
}
}
void form_combos(int pool[], size_t pool_count, int object[],
size_t object_count, size_t object_count_max) {
if (object_count >= object_count_max) {
evaluate(object);
return;
}
assert(pool_count > 0);
int *pool_end = pool + pool_count - 1;
for (size_t p = 0; p < pool_count; p++) {
int sample = pool[p]; // take one out
pool[p] = *pool_end; // replace it with the end
object[object_count] = sample;
form_combos(pool, pool_count - 1, object, object_count + 1,
object_count_max);
pool[p] = sample; // restore pool item
}
}
int main() {
int pool[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
size_t pool_size = sizeof pool / sizeof pool[0];
#define object_count 9
int object[object_count];
form_combos(pool, pool_size, object, 0, object_count);
printf("Evaluate() iterations %llu\n", count);
}
Output
1 091 + 762 = 853
2 091 + 763 = 854
3 091 + 735 = 826
...
1726 874 + 061 = 935
1727 875 + 046 = 921
1728 876 + 045 = 921
Evaluate() iterations 3628800
What is nice about this approach is that if the task was now find
ABC*ABC + DEF*DEF == GHI*GHI
Changing only 2 lines of code:
if (ABC*ABC + DEF*DEF == GHI*GHI) {
printf("%4llu sqr(%03d) + sqr(%03d) = sqr(%03d)\n", ++sol, ABC,DEF,GHI);
}
results in
1 sqr(534) + sqr(712) = sqr(890)
2 sqr(546) + sqr(728) = sqr(910)
3 sqr(712) + sqr(534) = sqr(890)
4 sqr(728) + sqr(546) = sqr(910)
Evaluate() iterations 3628800
Ok, so for a trivial solution (a base to build a generalization on, so far it only works on the format <3 digit number> + <3 digit number> = <3 digit number>) inspired from #chux and #alain's suggestions is the following code. It truly runs on O(10^N) where N is the distinct number of digits present, or variables if you'd like to call them that. I'll see if I can generalize this even further.
Note that this is for the initial problem of finding the largest rhs. Take that into account as well.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_DIGITS 10
#define MAX_VARIABLES 9
#define MAX_EXPRESSION_SIZE 11
int IsImpossible(char* expression) {
// if all letters are same or end letters are same, no solution
if(
(expression[0] == expression[4] && expression[0] == expression[8]) ||
(!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
)
return 1;
return 0;
}
int ArePointersAssigned(int*** pointers) {
for(int i = 0; i < MAX_VARIABLES; ++i) {
if(**pointers[i] == -1)
return 0;
}
return 1;
}
int evaluate(int*** pointers) {
int ABC = *(*pointers[0]) * 100 + *(*pointers[1]) * 10 + *(*pointers[2]);
int DEF = *(*pointers[3]) * 100 + *(*pointers[4]) * 10 + *(*pointers[5]);
int GHI = *(*pointers[6]) * 100 + *(*pointers[7]) * 10 + *(*pointers[8]);
if (ABC + DEF == GHI) { // since we use dfs, if this is a solution simply return it
//printf("%d + %d = %d\n", ABC, DEF, GHI);
return 1;
}
return 0;
}
// use the solved pointer to escape recursion early
// check_end checks if we reached 6 for the 2nd time, if it's first time we ignore (because it's start state)
void form_combos(int pool[], int pool_count, int object_count, int*** pointers, int* solved) {
if(object_count == MAX_DIGITS - 1)
object_count = 0;
if(*solved) // if a branch solved this, escape recursion
return;
if (ArePointersAssigned(pointers)) { // that means we got a full equation set
*solved = evaluate(pointers);
if(*solved)
return;
}
int *pool_end = pool + pool_count - 1;
for (int p = pool_count - 1; p >= 0 && !*solved; p--) {
int sample = pool[p]; // take one out
pool[p] = *pool_end; // replace it with the end
int temp = **pointers[object_count];
if(**pointers[object_count] == -1)
**pointers[object_count] = sample;
form_combos(pool, pool_count - 1, object_count + 1, pointers, solved);
pool[p] = sample; // restore pool item
if(!*solved)
**pointers[object_count] = temp;
}
}
int main() {
char expression[MAX_EXPRESSION_SIZE] = { 0 };
printf("Enter the formula: ");
scanf("%s", expression);
while(expression[0] != '-') {
if(IsImpossible(expression))
printf("No solution!\n");
else {
int digits[MAX_DIGITS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
int object[MAX_VARIABLES] = { -1, -1, -1, -1, -1, -1, -1, -1, -1 }; // stack for dfs
int *A = &object[0], *B = &object[1], *C = &object[2],
*D = &object[3], *E = &object[4], *F = &object[5],
*G = &object[6], *H = &object[7], *I = &object[8];
// set same pointers
int** pointers[MAX_VARIABLES] = { &A, &B, &C, &D, &E, &F, &G, &H, &I };
// analyze the equation
int var = 0;
for(int p = 0; p < MAX_EXPRESSION_SIZE; ++p) {
if(expression[p] >= 'A' && expression[p] <= 'I') {
*pointers[var++] = &object[expression[p] - 'A']; // link same pointers
}
}
int solved = 0, check_end = 0;
form_combos(digits, MAX_DIGITS, MAX_DIGITS - 4, pointers, &solved);
if(!solved) // it can be unsolvable still
printf("No solution!\n");
else
printf("%d%d%d + %d%d%d = %d%d%d\n", *A, *B, *C, *D, *E, *F, *G, *H, *I);
}
printf("Enter the formula: ");
scanf("%s", expression);
}
return 0;
}
I am trying to implement a linear least squares fit onto 2 arrays of data: time vs amplitude. The only technique I know so far is to test all of the possible m and b points in (y = m*x+b) and then find out which combination fits my data best so that it has the least error. However, I think iterating so many combinations is sometimes useless because it tests out everything. Are there any techniques to speed up the process that I don't know about? Thanks.
Try this code. It fits y = mx + b to your (x,y) data.
The arguments to linreg are
linreg(int n, REAL x[], REAL y[], REAL* b, REAL* m, REAL* r)
n = number of data points
x,y = arrays of data
*b = output intercept
*m = output slope
*r = output correlation coefficient (can be NULL if you don't want it)
The return value is 0 on success, !=0 on failure.
Here's the code
#include "linreg.h"
#include <stdlib.h>
#include <math.h> /* math functions */
//#define REAL float
#define REAL double
inline static REAL sqr(REAL x) {
return x*x;
}
int linreg(int n, const REAL x[], const REAL y[], REAL* m, REAL* b, REAL* r){
REAL sumx = 0.0; /* sum of x */
REAL sumx2 = 0.0; /* sum of x**2 */
REAL sumxy = 0.0; /* sum of x * y */
REAL sumy = 0.0; /* sum of y */
REAL sumy2 = 0.0; /* sum of y**2 */
for (int i=0;i<n;i++){
sumx += x[i];
sumx2 += sqr(x[i]);
sumxy += x[i] * y[i];
sumy += y[i];
sumy2 += sqr(y[i]);
}
REAL denom = (n * sumx2 - sqr(sumx));
if (denom == 0) {
// singular matrix. can't solve the problem.
*m = 0;
*b = 0;
if (r) *r = 0;
return 1;
}
*m = (n * sumxy - sumx * sumy) / denom;
*b = (sumy * sumx2 - sumx * sumxy) / denom;
if (r!=NULL) {
*r = (sumxy - sumx * sumy / n) / /* compute correlation coeff */
sqrt((sumx2 - sqr(sumx)/n) *
(sumy2 - sqr(sumy)/n));
}
return 0;
}
Example
You can run this example online.
int main()
{
int n = 6;
REAL x[6]= {1, 2, 4, 5, 10, 20};
REAL y[6]= {4, 6, 12, 15, 34, 68};
REAL m,b,r;
linreg(n,x,y,&m,&b,&r);
printf("m=%g b=%g r=%g\n",m,b,r);
return 0;
}
Here is the output
m=3.43651 b=-0.888889 r=0.999192
Here is the Excel plot and linear fit (for verification).
All values agree exactly with the C code above (note C code returns r while Excel returns R**2).
There are efficient algorithms for least-squares fitting; see Wikipedia for details. There are also libraries that implement the algorithms for you, likely more efficiently than a naive implementation would do; the GNU Scientific Library is one example, but there are others under more lenient licenses as well.
From Numerical Recipes: The Art of Scientific Computing in (15.2) Fitting Data to a Straight Line:
Linear Regression:
Consider the problem of fitting a set of N data points (xi, yi) to a straight-line model:
Assume that the uncertainty: sigmai associated with each yi and that the xi’s (values of the dependent variable) are known exactly. To measure how well the model agrees with the data, we use the chi-square function, which in this case is:
The above equation is minimized to determine a and b. This is done by finding the derivative of the above equation with respect to a and b, equate them to zero and solve for a and b. Then we estimate the probable uncertainties in the estimates of a and b, since obviously the measurement errors in the data must introduce some uncertainty in the determination of those parameters. Additionally, we must estimate the goodness-of-fit of the data to the
model. Absent this estimate, we have not the slightest indication that the parameters a and b in the model have any meaning at all.
The below struct performs the mentioned calculations:
struct Fitab {
// Object for fitting a straight line y = a + b*x to a set of
// points (xi, yi), with or without available
// errors sigma i . Call one of the two constructors to calculate the fit.
// The answers are then available as the variables:
// a, b, siga, sigb, chi2, and either q or sigdat.
int ndata;
double a, b, siga, sigb, chi2, q, sigdat; // Answers.
vector<double> &x, &y, &sig;
// Constructor.
Fitab(vector<double> &xx, vector<double> &yy, vector<double> &ssig)
: ndata(xx.size()), x(xx), y(yy), sig(ssig), chi2(0.), q(1.), sigdat(0.)
{
// Given a set of data points x[0..ndata-1], y[0..ndata-1]
// with individual standard deviations sig[0..ndata-1],
// sets a,b and their respective probable uncertainties
// siga and sigb, the chi-square: chi2, and the goodness-of-fit
// probability: q
Gamma gam;
int i;
double ss=0., sx=0., sy=0., st2=0., t, wt, sxoss; b=0.0;
for (i=0;i < ndata; i++) { // Accumulate sums ...
wt = 1.0 / SQR(sig[i]); //...with weights
ss += wt;
sx += x[i]*wt;
sy += y[i]*wt;
}
sxoss = sx/ss;
for (i=0; i < ndata; i++) {
t = (x[i]-sxoss) / sig[i];
st2 += t*t;
b += t*y[i]/sig[i];
}
b /= st2; // Solve for a, b, sigma-a, and simga-b.
a = (sy-sx*b) / ss;
siga = sqrt((1.0+sx*sx/(ss*st2))/ss);
sigb = sqrt(1.0/st2); // Calculate chi2.
for (i=0;i<ndata;i++) chi2 += SQR((y[i]-a-b*x[i])/sig[i]);
if (ndata>2) q=gam.gammq(0.5*(ndata-2),0.5*chi2); // goodness of fit
}
// Constructor.
Fitab(vector<double> &xx, vector<double> &yy)
: ndata(xx.size()), x(xx), y(yy), sig(xx), chi2(0.), q(1.), sigdat(0.)
{
// As above, but without known errors (sig is not used).
// The uncertainties siga and sigb are estimated by assuming
// equal errors for all points, and that a straight line is
// a good fit. q is returned as 1.0, the normalization of chi2
// is to unit standard deviation on all points, and sigdat
// is set to the estimated error of each point.
int i;
double ss,sx=0.,sy=0.,st2=0.,t,sxoss;
b=0.0; // Accumulate sums ...
for (i=0; i < ndata; i++) {
sx += x[i]; // ...without weights.
sy += y[i];
}
ss = ndata;
sxoss = sx/ss;
for (i=0;i < ndata; i++) {
t = x[i]-sxoss;
st2 += t*t;
b += t*y[i];
}
b /= st2; // Solve for a, b, sigma-a, and sigma-b.
a = (sy-sx*b)/ss;
siga=sqrt((1.0+sx*sx/(ss*st2))/ss);
sigb=sqrt(1.0/st2); // Calculate chi2.
for (i=0;i<ndata;i++) chi2 += SQR(y[i]-a-b*x[i]);
if (ndata > 2) sigdat=sqrt(chi2/(ndata-2));
// For unweighted data evaluate typical
// sig using chi2, and adjust
// the standard deviations.
siga *= sigdat;
sigb *= sigdat;
}
};
where struct Gamma:
struct Gamma : Gauleg18 {
// Object for incomplete gamma function.
// Gauleg18 provides coefficients for Gauss-Legendre quadrature.
static const Int ASWITCH=100; When to switch to quadrature method.
static const double EPS; // See end of struct for initializations.
static const double FPMIN;
double gln;
double gammp(const double a, const double x) {
// Returns the incomplete gamma function P(a,x)
if (x < 0.0 || a <= 0.0) throw("bad args in gammp");
if (x == 0.0) return 0.0;
else if ((Int)a >= ASWITCH) return gammpapprox(a,x,1); // Quadrature.
else if (x < a+1.0) return gser(a,x); // Use the series representation.
else return 1.0-gcf(a,x); // Use the continued fraction representation.
}
double gammq(const double a, const double x) {
// Returns the incomplete gamma function Q(a,x) = 1 - P(a,x)
if (x < 0.0 || a <= 0.0) throw("bad args in gammq");
if (x == 0.0) return 1.0;
else if ((Int)a >= ASWITCH) return gammpapprox(a,x,0); // Quadrature.
else if (x < a+1.0) return 1.0-gser(a,x); // Use the series representation.
else return gcf(a,x); // Use the continued fraction representation.
}
double gser(const Doub a, const Doub x) {
// Returns the incomplete gamma function P(a,x) evaluated by its series representation.
// Also sets ln (gamma) as gln. User should not call directly.
double sum,del,ap;
gln=gammln(a);
ap=a;
del=sum=1.0/a;
for (;;) {
++ap;
del *= x/ap;
sum += del;
if (fabs(del) < fabs(sum)*EPS) {
return sum*exp(-x+a*log(x)-gln);
}
}
}
double gcf(const Doub a, const Doub x) {
// Returns the incomplete gamma function Q(a, x) evaluated
// by its continued fraction representation.
// Also sets ln (gamma) as gln. User should not call directly.
int i;
double an,b,c,d,del,h;
gln=gammln(a);
b=x+1.0-a; // Set up for evaluating continued fraction
// by modified Lentz’s method with with b0 = 0.
c=1.0/FPMIN;
d=1.0/b;
h=d;
for (i=1;;i++) {
// Iterate to convergence.
an = -i*(i-a);
b += 2.0;
d=an*d+b;
if (fabs(d) < FPMIN) d=FPMIN;
c=b+an/c;
if (fabs(c) < FPMIN) c=FPMIN;
d=1.0/d;
del=d*c;
h *= del;
if (fabs(del-1.0) <= EPS) break;
}
return exp(-x+a*log(x)-gln)*h; Put factors in front.
}
double gammpapprox(double a, double x, int psig) {
// Incomplete gamma by quadrature. Returns P(a,x) or Q(a, x),
// when psig is 1 or 0, respectively. User should not call directly.
int j;
double xu,t,sum,ans;
double a1 = a-1.0, lna1 = log(a1), sqrta1 = sqrt(a1);
gln = gammln(a);
// Set how far to integrate into the tail:
if (x > a1) xu = MAX(a1 + 11.5*sqrta1, x + 6.0*sqrta1);
else xu = MAX(0.,MIN(a1 - 7.5*sqrta1, x - 5.0*sqrta1));
sum = 0;
for (j=0;j<ngau;j++) { // Gauss-Legendre.
t = x + (xu-x)*y[j];
sum += w[j]*exp(-(t-a1)+a1*(log(t)-lna1));
}
ans = sum*(xu-x)*exp(a1*(lna1-1.)-gln);
return (psig?(ans>0.0? 1.0-ans:-ans):(ans>=0.0? ans:1.0+ans));
}
double invgammp(Doub p, Doub a);
// Inverse function on x of P(a,x) .
};
const Doub Gamma::EPS = numeric_limits<Doub>::epsilon();
const Doub Gamma::FPMIN = numeric_limits<Doub>::min()/EPS
and stuct Gauleg18:
struct Gauleg18 {
// Abscissas and weights for Gauss-Legendre quadrature.
static const Int ngau = 18;
static const Doub y[18];
static const Doub w[18];
};
const Doub Gauleg18::y[18] = {0.0021695375159141994,
0.011413521097787704,0.027972308950302116,0.051727015600492421,
0.082502225484340941, 0.12007019910960293,0.16415283300752470,
0.21442376986779355, 0.27051082840644336, 0.33199876341447887,
0.39843234186401943, 0.46931971407375483, 0.54413605556657973,
0.62232745288031077, 0.70331500465597174, 0.78649910768313447,
0.87126389619061517, 0.95698180152629142};
const Doub Gauleg18::w[18] = {0.0055657196642445571,
0.012915947284065419,0.020181515297735382,0.027298621498568734,
0.034213810770299537,0.040875750923643261,0.047235083490265582,
0.053244713977759692,0.058860144245324798,0.064039797355015485
0.068745323835736408,0.072941885005653087,0.076598410645870640,
0.079687828912071670,0.082187266704339706,0.084078218979661945,
0.085346685739338721,0.085983275670394821};
and, finally fuinction Gamma::invgamp():
double Gamma::invgammp(double p, double a) {
// Returns x such that P(a,x) = p for an argument p between 0 and 1.
int j;
double x,err,t,u,pp,lna1,afac,a1=a-1;
const double EPS=1.e-8; // Accuracy is the square of EPS.
gln=gammln(a);
if (a <= 0.) throw("a must be pos in invgammap");
if (p >= 1.) return MAX(100.,a + 100.*sqrt(a));
if (p <= 0.) return 0.0;
if (a > 1.) {
lna1=log(a1);
afac = exp(a1*(lna1-1.)-gln);
pp = (p < 0.5)? p : 1. - p;
t = sqrt(-2.*log(pp));
x = (2.30753+t*0.27061)/(1.+t*(0.99229+t*0.04481)) - t;
if (p < 0.5) x = -x;
x = MAX(1.e-3,a*pow(1.-1./(9.*a)-x/(3.*sqrt(a)),3));
} else {
t = 1.0 - a*(0.253+a*0.12); and (6.2.9).
if (p < t) x = pow(p/t,1./a);
else x = 1.-log(1.-(p-t)/(1.-t));
}
for (j=0;j<12;j++) {
if (x <= 0.0) return 0.0; // x too small to compute accurately.
err = gammp(a,x) - p;
if (a > 1.) t = afac*exp(-(x-a1)+a1*(log(x)-lna1));
else t = exp(-x+a1*log(x)-gln);
u = err/t;
// Halley’s method.
x -= (t = u/(1.-0.5*MIN(1.,u*((a-1.)/x - 1))));
// Halve old value if x tries to go negative.
if (x <= 0.) x = 0.5*(x + t);
if (fabs(t) < EPS*x ) break;
}
return x;
}
Here is my version of a C/C++ function that does simple linear regression. The calculations follow the wikipedia article on simple linear regression. This is published as a single-header public-domain (MIT) library on github: simple_linear_regression. The library (.h file) is tested to work on Linux and Windows, and from C and C++ using -Wall -Werror and all -std versions supported by clang/gcc.
#define SIMPLE_LINEAR_REGRESSION_ERROR_INPUT_VALUE -2
#define SIMPLE_LINEAR_REGRESSION_ERROR_NUMERIC -3
int simple_linear_regression(const double * x, const double * y, const int n, double * slope_out, double * intercept_out, double * r2_out) {
double sum_x = 0.0;
double sum_xx = 0.0;
double sum_xy = 0.0;
double sum_y = 0.0;
double sum_yy = 0.0;
double n_real = (double)(n);
int i = 0;
double slope = 0.0;
double denominator = 0.0;
if (x == NULL || y == NULL || n < 2) {
return SIMPLE_LINEAR_REGRESSION_ERROR_INPUT_VALUE;
}
for (i = 0; i < n; ++i) {
sum_x += x[i];
sum_xx += x[i] * x[i];
sum_xy += x[i] * y[i];
sum_y += y[i];
sum_yy += y[i] * y[i];
}
denominator = n_real * sum_xx - sum_x * sum_x;
if (denominator == 0.0) {
return SIMPLE_LINEAR_REGRESSION_ERROR_NUMERIC;
}
slope = (n_real * sum_xy - sum_x * sum_y) / denominator;
if (slope_out != NULL) {
*slope_out = slope;
}
if (intercept_out != NULL) {
*intercept_out = (sum_y - slope * sum_x) / n_real;
}
if (r2_out != NULL) {
denominator = ((n_real * sum_xx) - (sum_x * sum_x)) * ((n_real * sum_yy) - (sum_y * sum_y));
if (denominator == 0.0) {
return SIMPLE_LINEAR_REGRESSION_ERROR_NUMERIC;
}
*r2_out = ((n_real * sum_xy) - (sum_x * sum_y)) * ((n_real * sum_xy) - (sum_x * sum_y)) / denominator;
}
return 0;
}
Usage example:
#define SIMPLE_LINEAR_REGRESSION_IMPLEMENTATION
#include "simple_linear_regression.h"
#include <stdio.h>
/* Some data that we want to find the slope, intercept and r2 for */
static const double x[] = { 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83 };
static const double y[] = { 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46 };
int main() {
double slope = 0.0;
double intercept = 0.0;
double r2 = 0.0;
int res = 0;
res = simple_linear_regression(x, y, sizeof(x) / sizeof(x[0]), &slope, &intercept, &r2);
if (res < 0) {
printf("Error: %s\n", simple_linear_regression_error_string(res));
return res;
}
printf("slope: %f\n", slope);
printf("intercept: %f\n", intercept);
printf("r2: %f\n", r2);
return 0;
}
The original example above worked well for me with slope and offset but I had a hard time with the corr coef. Maybe I don't have my parenthesis working the same as the assumed precedence? Anyway, with some help of other web pages I finally got values that match the linear trend-line in Excel. Thought I would share my code using Mark Lakata's variable names. Hope this helps.
double slope = ((n * sumxy) - (sumx * sumy )) / denom;
double intercept = ((sumy * sumx2) - (sumx * sumxy)) / denom;
double term1 = ((n * sumxy) - (sumx * sumy));
double term2 = ((n * sumx2) - (sumx * sumx));
double term3 = ((n * sumy2) - (sumy * sumy));
double term23 = (term2 * term3);
double r2 = 1.0;
if (fabs(term23) > MIN_DOUBLE) // Define MIN_DOUBLE somewhere as 1e-9 or similar
r2 = (term1 * term1) / term23;
as an assignment I had to code in C a simple linear regression using RMSE loss function. The program is dynamic and you can enter your own values and choose your own loss function which is for now limited to Root Mean Square Error. But first here are the algorithms I used:
now the code... you need gnuplot to display the chart, sudo apt install gnuplot
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/types.h>
#define BUFFSIZE 64
#define MAXSIZE 100
static double vector_x[MAXSIZE] = {0};
static double vector_y[MAXSIZE] = {0};
static double vector_predict[MAXSIZE] = {0};
static double max_x;
static double max_y;
static double mean_x;
static double mean_y;
static double teta_0_intercept;
static double teta_1_grad;
static double RMSE;
static double r_square;
static double prediction;
static char intercept[BUFFSIZE];
static char grad[BUFFSIZE];
static char xrange[BUFFSIZE];
static char yrange[BUFFSIZE];
static char lossname_RMSE[BUFFSIZE] = "Simple Linear Regression using RMSE'";
static char cmd_gnu_0[BUFFSIZE] = "set title '";
static char cmd_gnu_1[BUFFSIZE] = "intercept = ";
static char cmd_gnu_2[BUFFSIZE] = "grad = ";
static char cmd_gnu_3[BUFFSIZE] = "set xrange [0:";
static char cmd_gnu_4[BUFFSIZE] = "set yrange [0:";
static char cmd_gnu_5[BUFFSIZE] = "f(x) = (grad * x) + intercept";
static char cmd_gnu_6[BUFFSIZE] = "plot f(x), 'data.temp' with points pointtype 7";
static char const *commands_gnuplot[] = {
cmd_gnu_0,
cmd_gnu_1,
cmd_gnu_2,
cmd_gnu_3,
cmd_gnu_4,
cmd_gnu_5,
cmd_gnu_6,
};
static size_t size;
static void user_input()
{
printf("Enter x,y vector size, MAX = 100\n");
scanf("%lu", &size);
if (size > MAXSIZE) {
printf("Wrong input size is too big\n");
user_input();
}
printf("vector's size is %lu\n", size);
size_t i;
for (i = 0; i < size; i++) {
printf("Enter vector_x[%ld] values\n", i);
scanf("%lf", &vector_x[i]);
}
for (i = 0; i < size; i++) {
printf("Enter vector_y[%ld] values\n", i);
scanf("%lf", &vector_y[i]);
}
}
static void display_vector()
{
size_t i;
for (i = 0; i < size; i++){
printf("vector_x[%lu] = %lf\t", i, vector_x[i]);
printf("vector_y[%lu] = %lf\n", i, vector_y[i]);
}
}
static void concatenate(char p[], char q[]) {
int c;
int d;
c = 0;
while (p[c] != '\0') {
c++;
}
d = 0;
while (q[d] != '\0') {
p[c] = q[d];
d++;
c++;
}
p[c] = '\0';
}
static void compute_mean_x_y()
{
size_t i;
double tmp_x = 0.0;
double tmp_y = 0.0;
for (i = 0; i < size; i++) {
tmp_x += vector_x[i];
tmp_y += vector_y[i];
}
mean_x = tmp_x / size;
mean_y = tmp_y / size;
printf("mean_x = %lf\n", mean_x);
printf("mean_y = %lf\n", mean_y);
}
static void compute_teta_1_grad()
{
double numerator = 0.0;
double denominator = 0.0;
double tmp1 = 0.0;
double tmp2 = 0.0;
size_t i;
for (i = 0; i < size; i++) {
numerator += (vector_x[i] - mean_x) * (vector_y[i] - mean_y);
}
for (i = 0; i < size; i++) {
tmp1 = vector_x[i] - mean_x;
tmp2 = tmp1 * tmp1;
denominator += tmp2;
}
teta_1_grad = numerator / denominator;
printf("teta_1_grad = %lf\n", teta_1_grad);
}
static void compute_teta_0_intercept()
{
teta_0_intercept = mean_y - (teta_1_grad * mean_x);
printf("teta_0_intercept = %lf\n", teta_0_intercept);
}
static void compute_prediction()
{
size_t i;
for (i = 0; i < size; i++) {
vector_predict[i] = teta_0_intercept + (teta_1_grad * vector_x[i]);
printf("y^[%ld] = %lf\n", i, vector_predict[i]);
}
printf("\n");
}
static void compute_RMSE()
{
compute_prediction();
double error = 0;
size_t i;
for (i = 0; i < size; i++) {
error = (vector_predict[i] - vector_y[i]) * (vector_predict[i] - vector_y[i]);
printf("error y^[%ld] = %lf\n", i, error);
RMSE += error;
}
/* mean */
RMSE = RMSE / size;
/* square root mean */
RMSE = sqrt(RMSE);
printf("\nRMSE = %lf\n", RMSE);
}
static void compute_loss_function()
{
int input = 0;
printf("Which loss function do you want to use?\n");
printf(" 1 - RMSE\n");
scanf("%d", &input);
switch(input) {
case 1:
concatenate(cmd_gnu_0, lossname_RMSE);
compute_RMSE();
printf("\n");
break;
default:
printf("Wrong input try again\n");
compute_loss_function(size);
}
}
static void compute_r_square(size_t size)
{
double num_err = 0.0;
double den_err = 0.0;
size_t i;
for (i = 0; i < size; i++) {
num_err += (vector_y[i] - vector_predict[i]) * (vector_y[i] - vector_predict[i]);
den_err += (vector_y[i] - mean_y) * (vector_y[i] - mean_y);
}
r_square = 1 - (num_err/den_err);
printf("R_square = %lf\n", r_square);
}
static void compute_predict_for_x()
{
double x = 0.0;
printf("Please enter x value\n");
scanf("%lf", &x);
prediction = teta_0_intercept + (teta_1_grad * x);
printf("y^ if x = %lf -> %lf\n",x, prediction);
}
static void compute_max_x_y()
{
size_t i;
double tmp1= 0.0;
double tmp2= 0.0;
for (i = 0; i < size; i++) {
if (vector_x[i] > tmp1) {
tmp1 = vector_x[i];
max_x = vector_x[i];
}
if (vector_y[i] > tmp2) {
tmp2 = vector_y[i];
max_y = vector_y[i];
}
}
printf("vector_x max value %lf\n", max_x);
printf("vector_y max value %lf\n", max_y);
}
static void display_model_line()
{
sprintf(intercept, "%0.7lf", teta_0_intercept);
sprintf(grad, "%0.7lf", teta_1_grad);
sprintf(xrange, "%0.7lf", max_x + 1);
sprintf(yrange, "%0.7lf", max_y + 1);
concatenate(cmd_gnu_1, intercept);
concatenate(cmd_gnu_2, grad);
concatenate(cmd_gnu_3, xrange);
concatenate(cmd_gnu_3, "]");
concatenate(cmd_gnu_4, yrange);
concatenate(cmd_gnu_4, "]");
printf("grad = %s\n", grad);
printf("intercept = %s\n", intercept);
printf("xrange = %s\n", xrange);
printf("yrange = %s\n", yrange);
printf("cmd_gnu_0: %s\n", cmd_gnu_0);
printf("cmd_gnu_1: %s\n", cmd_gnu_1);
printf("cmd_gnu_2: %s\n", cmd_gnu_2);
printf("cmd_gnu_3: %s\n", cmd_gnu_3);
printf("cmd_gnu_4: %s\n", cmd_gnu_4);
printf("cmd_gnu_5: %s\n", cmd_gnu_5);
printf("cmd_gnu_6: %s\n", cmd_gnu_6);
/* print plot */
FILE *gnuplot_pipe = (FILE*)popen("gnuplot -persistent", "w");
FILE *temp = (FILE*)fopen("data.temp", "w");
/* create data.temp */
size_t i;
for (i = 0; i < size; i++)
{
fprintf(temp, "%f %f \n", vector_x[i], vector_y[i]);
}
/* display gnuplot */
for (i = 0; i < 7; i++)
{
fprintf(gnuplot_pipe, "%s \n", commands_gnuplot[i]);
}
}
int main(void)
{
printf("===========================================\n");
printf("INPUT DATA\n");
printf("===========================================\n");
user_input();
display_vector();
printf("\n");
printf("===========================================\n");
printf("COMPUTE MEAN X:Y, TETA_1 TETA_0\n");
printf("===========================================\n");
compute_mean_x_y();
compute_max_x_y();
compute_teta_1_grad();
compute_teta_0_intercept();
printf("\n");
printf("===========================================\n");
printf("COMPUTE LOSS FUNCTION\n");
printf("===========================================\n");
compute_loss_function();
printf("===========================================\n");
printf("COMPUTE R_square\n");
printf("===========================================\n");
compute_r_square(size);
printf("\n");
printf("===========================================\n");
printf("COMPUTE y^ according to x\n");
printf("===========================================\n");
compute_predict_for_x();
printf("\n");
printf("===========================================\n");
printf("DISPLAY LINEAR REGRESSION\n");
printf("===========================================\n");
display_model_line();
printf("\n");
return 0;
}
Look at Section 1 of this paper. This section expresses a 2D linear regression as a matrix multiplication exercise. As long as your data is well-behaved, this technique should permit you to develop a quick least squares fit.
Depending on the size of your data, it might be worthwhile to algebraically reduce the matrix multiplication to simple set of equations, thereby avoiding the need to write a matmult() function. (Be forewarned, this is completely impractical for more than 4 or 5 data points!)
The fastest, most efficient way to solve least squares, as far as I am aware, is to subtract (the gradient)/(the 2nd order gradient) from your parameter vector. (2nd order gradient = i.e. the diagonal of the Hessian.)
Here is the intuition:
Let's say you want to optimize least squares over a single parameter. This is equivalent to finding the vertex of a parabola. Then, for any random initial parameter, x0, the vertex of the loss function is located at x0 - f(1) / f(2). That's because adding - f(1) / f(2) to x will always zero out the derivative, f(1).
Side note: Implementing this in Tensorflow, the solution appeared at w0 - f(1) / f(2) / (number of weights), but I'm not sure if that's due to Tensorflow or if it's due to something else..
Given an array of positive integers, what's the most efficient algorithm to find non-consecutive elements from this array which, when added together, produce the maximum sum?
Dynamic programming? Given an array A[0..n], let M(i) be the optimal solution using the elements with indices 0..i. Then M(-1) = 0 (used in the recurrence), M(0) = A[0], and M(i) = max(M(i - 1), M(i - 2) + A[i]) for i = 1, ..., n. M(n) is the solution we want. This is O(n). You can use another array to store which choice is made for each subproblem, and so recover the actual elements chosen.
Let A be the given array and Sum be another array such that Sum[i] represents the maximum sum of non-consecutive elements from arr[0]..arr[i].
We have:
Sum[0] = arr[0]
Sum[1] = max(Sum[0],arr[1])
Sum[2] = max(Sum[0]+arr[2],Sum[1])
...
Sum[i] = max(Sum[i-2]+arr[i],Sum[i-1]) when i>=2
If size is the number of elements in arr then sum[size-1] will be the answer.
One can code a simple recursive method in top down order as:
int sum(int *arr,int i) {
if(i==0) {
return arr[0];
}else if(i==1) {
return max(arr[0],arr[1]);
}
return max(sum(arr,i-2)+arr[i],sum(arr,i-1));
}
The above code is very inefficient as it makes exhaustive duplicate recursive calls. To avoid this we use memoization by using an auxiliary array called sum as:
int sum(int *arr,int size) {
int *sum = malloc(sizeof(int) * size);
int i;
for(i=0;i<size;i++) {
if(i==0) {
sum[0] = arr[0];
}else if(i==1) {
sum[1] = max(sum[0],arr[1]);
}else{
sum[i] = max(sum[i-2]+arr[i],sum[i-1]);
}
}
return sum[size-1];
}
Which is O(N) in both space and time.
O(N) in time and O(1) in space (DP) solution:
int dp[2] = {a[0], a[1]};
for(int i = 2; i < a.size(); i++)
{
int temp = dp[1];
dp[1] = dp[0] + a[i];
dp[0] = max(dp[0], temp);
}
int answer = max(dp[0], dp[1]);
/**
* Given an array of positive numbers, find the maximum sum of elements such
* that no two adjacent elements are picked
* Top down dynamic programming approach without memorisation.
* An alternate to the bottom up approach.
*/
public class MaxSumNonConsec {
public static int maxSum(int a[], int start, int end) {
int maxSum = 0;
// Trivial cases
if (start == end) {
return a[start];
} else if (start > end) {
return 0;
} else if (end - start == 1) {
return a[start] > a[end] ? a[start] : a[end];
} else if (start < 0) {
return 0;
} else if (end >= a.length) {
return 0;
}
// Subproblem solutions, DP
for (int i = start; i <= end; i++) {
int possibleMaxSub1 = maxSum(a, i + 2, end);
int possibleMaxSub2 = maxSum(a, start, i - 2);
int possibleMax = possibleMaxSub1 + possibleMaxSub2 + a[i];
if (possibleMax > maxSum) {
maxSum = possibleMax;
}
}
return maxSum;
}
public static void main(String args[]) {
int a[] = { 8, 6, 11, 10, 11, 10 };
System.out.println(maxSum(a, 0, a.length - 1));
}
}
The solution by #Ismail Badawi does not seem to work in the following case: Let us take the array: 8, 3, 1, 7 Then in this case, the algo returns max sum = 9 whereas it should be 15.
A solution to correct it is given an array A[0..n], let M(i) be the optimal solution using the elements with indices 0..i. Then M(0) = A[0], and M(i) = max(M(i - 1), M(i - 2) + A[i], M(i-3) + A[i]) for i = 3, ..., n. M(n) is the solution we want. This is O(n).
IIUC: say your array is 1,2,3,4,5 then 3+5 would be 'correct' and 4+5 not, this means you'll have to find the largest numbers and check if they are consecutive. So an algorithm would be to make use of a second array, for the number of elements you need to add which you fill by traversing the original array and finding the largest non-consecutive integers, then add this up.
For the above array I guess [1,3], [1,4], [1,5], [1,3,5], [2,4], [2,5], [3,5] would be valid non-consecutive integers to be summed, the max sum would be 9 in this case [1,3,5]. So, to adapt the above algorithm, I would suggest you step through the array using several temporary arrays to find all the non-consecutive integer lists, and then check which is the largest. Keep in mind that 'most elements' does not mean 'largest sum'.
Dynamic programming solution is the most elegant of all.
And it serves for any value of the distance between two numbers that should not be considered.
But for k= 1, which is for consecutive numbers constraint, I tried using backtracking.
There are different patterns to be compared for the maximum sum. Below is the list :
Number of patterns for 1 = 1
[1]
Number of patterns for 2 = 2
[1][2]
Number of patterns for 3 = 2
[1, 3][2]
Number of patterns for 4 = 3
[1, 3][1, 4][2, 4]
Number of patterns for 5 = 4
[1, 3, 5][1, 4][2, 4][2, 5]
Number of patterns for 6 = 5
[1, 3, 5][1, 3, 6][1, 4, 6][2, 4, 6][2, 5]
Number of patterns for 7 = 7
[1, 3, 5, 7][1, 3, 6][1, 4, 6][1, 4, 7][2, 4, 6][2, 4, 7][2, 5, 7]
Number of patterns for 8 = 9
[1, 3, 5, 7][1, 3, 5, 8][1, 3, 6, 8][1, 4, 6, 8][1, 4, 7][2, 4, 6, 8][2, 4, 7][2, 5, 7][2, 5, 8]
Number of patterns for 9 = 12
[1, 3, 5, 7, 9][1, 3, 5, 8][1, 3, 6, 8][1, 3, 6, 9][1, 4, 6, 8][1, 4, 6, 9][1, 4, 7, 9][2, 4, 6, 8][2, 4, 6, 9][2, 4, 7, 9][2, 5, 7, 9][2, 5, 8]
Following is the code in java:
public class MaxSeqRecursive {
private static int num = 5;
private static int[] inputArry = new int[] { 1,3,9,20,7 };
private static Object[] outArry;
private static int maxSum = 0;
public static void main(String[] args) {
List<Integer> output = new ArrayList<Integer>();
output.add(1);
convert(output, -1);
for (int i = 0; i < outArry.length; i++) {
System.out.print(outArry[i] + ":");
}
System.out.print(maxSum);
}
public static void convert( List<Integer> posArry, int prevValue) {
int currentValue = -1;
if (posArry.size() == 0) {
if (prevValue == 2) {
return;
} else {
posArry.add(2);
prevValue = -1;
}
}
currentValue = (int) posArry.get(posArry.size() - 1);
if (currentValue == num || currentValue == num - 1) {
updateMax(posArry);
prevValue = (int) posArry.get(posArry.size() - 1);
posArry.remove(posArry.size() - 1);
} else {
int returnIndx = getNext(posArry, prevValue);
if (returnIndx == -2)
return;
if (returnIndx == -1) {
prevValue = (int) posArry.get(posArry.size() - 1);
posArry.remove(posArry.size() - 1);
} else {
posArry.add(returnIndx);
prevValue = -1;
}
}
convert(posArry, prevValue);
}
public static int getNext( List<Integer> posArry, int prevValue) {
int currIndx = posArry.size();
int returnVal = -1;
int value = (int) posArry.get(currIndx - 1);
if (prevValue < num) {
if (prevValue == -1)
returnVal = value + 2;
else if (prevValue - value < 3)
returnVal = prevValue + 1;
else
returnVal = -1;
}
if (returnVal > num)
returnVal = -1;
return returnVal;
}
public static void updateMax(List posArry) {
int sum = 0;
for (int i = 0; i < posArry.size(); i++) {
sum = sum + inputArry[(Integer) posArry.get(i) - 1];
}
if (sum > maxSum) {
maxSum = sum;
outArry = posArry.toArray();
}
}
}
Time complexity: O( number of patterns to be compared)
Another Java Implementation ( runs in linear time )
public class MaxSum {
private static int ofNonConsecutiveElements (int... elements) {
int maxsofar,maxi2,maxi1;
maxi1 = maxsofar = elements[0];
maxi2 = 0;
for (int i = 1; i < elements.length; i++) {
maxsofar = Math.max(maxi2 + elements[i], maxi1);
maxi2 = maxi1;
maxi1 = maxsofar;
}
return maxsofar;
}
public static void main(String[] args) {
System.out.println(ofNonConsecutiveElements(6, 4, 2, 8, 1));
}
}
My solution is O(N) time and O(1) space.
private int largestSumNonConsecutive(int[] a) {
return largestSumNonConsecutive(a, a.length-1)[1];
}
private int[] largestSumNonConsecutive(int[] a, int end) { //returns array largest(end-1),largest(end)
if (end==0) return new int[]{0,a[0]};
int[] largest = largestSumNonConsecutive(a, end-1);
int tmp = largest[1];
largest[1] = Math.max(largest[0] + a[end], largest[1]);
largest[0] = tmp;
return largest;
}
int nonContigousSum(vector<int> a, int n) {
if (n < 0) {
return 0;
}
return std::max(nonContigousSum(a, n - 1), nonContigousSum(a, n - 2) + a[n]);
}
this is the recursive approach with the help of which we can solve this question
(OPTIMAL SUB-STRUCTURE HALLMARK OF DYNAMIC PROGRAMMING.
Here we are considering two cases, in first we exclude a[n] and in the second we include a[n] and return the max of those sub cases found.
We are basically finding all the subsets of the array and returning the length of the non-contiguous array with max sum.
Use tabulation or memoization for avoiding same sub-problems.
A penny from me.
public class Problem {
/**
* Solving by recursion, top down approach. Always try this recursion approach and then go with
* iteration. We have to add dp table to optimize the time complexity.
*/
public static int maxSumRecur(int arr[], int i) {
if(i < 0) return 0;
if(i == 0) return arr[0];
if(i == 1) return Math.max(arr[0], arr[1]);
int includeIthElement = arr[i] + maxSumRecur(arr, i-2);
int excludeIthElement = maxSumRecur(arr, i-1);
return Math.max(includeIthElement, excludeIthElement);
}
/**
* Solving by iteration. Bottom up approach.
*/
public static void maxSumIter(int arr[]) {
System.out.println(Arrays.toString(arr));
int dp[] = new int[arr.length];
dp[0] = arr[0];
dp[1] = Math.max(arr[0], arr[1]);
for(int i=2; i <= arr.length - 1; i++) {
dp[i] = Math.max(arr[i] + dp[i-2], dp[i-1]);
}
System.out.println("Max subsequence sum by Iteration " + dp[arr.length - 1] + "\n");
}
public static void maxSumRecurUtil(int arr[]) {
System.out.println(Arrays.toString(arr));
System.out.println("Max subsequence sum by Recursion " + maxSumRecur(arr, arr.length - 1) +
"\n");
}
public static void main(String[] args) {
maxSumRecurUtil(new int[]{5, 5, 10, 100, 10, 5});
maxSumRecurUtil(new int[]{20, 1, 2, 3});
maxSumIter(new int[]{5, 5, 10, 100, 10, 5});
maxSumIter(new int[]{20, 1, 2, 3});
}
}
Make a list of numbers that is the odd or even sums corresponding to each number so far; e.g. for input of [1,2,4,1,2,3,5,3,1,2,3,4,5,2] the odd-even sums would be [1,2,5,3,7,6,12,9,13,11,16,15,21,17]
Now walk the list backwards greedily summing but skipping those elements whose odd/even sum is less than that of next-to-be-considered element.
src = [1,2,4,1,2,3,5,3,1,2,3,4,5,2]
odd_even_sums = src[:2]
for i in xrange(2,len(src)):
odd_even_sums.append(src[i] + odd_even_sums[i-2])
best = []
for i in xrange(len(src)-1,-1,-1):
if i == 0:
best.append(i)
elif odd_even_sums[i-1] > odd_even_sums[i]:
pass
elif odd_even_sums[i-1] == odd_even_sums[i]:
raise Exception("an exercise for the reader")
else:
best.append(i)
best.reverse()
print "Best:",",".join("%s=%s"%(b,src[b]) for b in best)
print "Scores:",sum(odd_even_sums[b] for b in best)
Outputs:
Best: 0=1,1=2,2=4,4=2,6=5,8=1,10=3,12=5
Scores: 77
public static int findMaxSum(int[] a){
int sum0=0; //will hold the sum till i-2
int sum1=0;//will hold the sum till i-1
for(int k : a){
int x=Math.max(sum0+k, sum1);//max(sum till (i-2)+a[i], sum till (i-1))
sum0=sum1;
sum1=x;
}
return sum1;
}
Below is the crux of algorithm:
max(max sum till (i-2)+a[i], max sum till (i-1))
O(N) time complexity and O(1) space complexity.
A rather naive yet complete implementation.
Recursion equation is T(n) = n^2 + nT(n-3), which if I'm not wrong leads to exponential time. The (n-3) comes from the fact a number cannot add with itself/previous/next numbers.
The program reports the constituent list that makes up the sum (there are multiple, exponentially growing, of these lists, but it just picks one).
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
public class MaxSumNoAdjacent {
private static class Sum {
int sum;
List<Integer> constituents = new ArrayList<>();
Sum(int sum, List<Integer> constituents) {
this.sum = sum;
this.constituents = constituents;
}
#Override
public String toString() {
return "sum: " + sum + " " + constituents.toString();
}
}
public static Sum maxSum(int[] arr) {
List<Integer> input = new ArrayList<>();
for (int i : arr) {
if (i != Integer.MIN_VALUE) { //Integer.MIN_VALUE indicates unreachability
input.add(i);
}
}
if (input.size() == 0) {
return null;
}
if (input.size() == 1) {
List<Integer> constituents = new ArrayList<>();
constituents.add(input.get(0));
return new Sum(input.get(0), constituents);
}
if (input.size() == 2) {
int max = Math.max(input.get(0), input.get(1));
List<Integer> constituents = new ArrayList<>();
constituents.add(max);
return new Sum(max, constituents);
}
Map<Integer, int[]> numberAndItsReachability = new HashMap<>();
for (int i = 0; i < input.size(); i++) {
int[] neighbours = new int[input.size()];
if (i > 0) {
neighbours[i-1] = Integer.MIN_VALUE; //unreachable to previous
}
if (i < input.size()-1) {
neighbours[i+1] = Integer.MIN_VALUE; //unreachable to next
}
neighbours[i] = Integer.MIN_VALUE; //unreachable to itself
for (int j = 0; j < neighbours.length; j++) {
if (neighbours[j] == 0) {
neighbours[j] = input.get(j); //remember values of reachable neighbours
}
}
numberAndItsReachability.put(input.get(i), neighbours);
}
Sum maxSum = new Sum(Integer.MIN_VALUE, null);
for (Entry<Integer, int[]> pair : numberAndItsReachability.entrySet()) {
Sum sumMinusThisNumber = maxSum(pair.getValue()); //call recursively on its reachable neighbours
if (sumMinusThisNumber != null) {
int candidateSum = sumMinusThisNumber.sum + pair.getKey();
if (maxSum.sum < candidateSum) {
sumMinusThisNumber.constituents.add(pair.getKey());
maxSum = new Sum(candidateSum, sumMinusThisNumber.constituents);
}
}
}
return maxSum;
}
public static void main(String[] args) {
int[] arr1 = {3,2,5,10,7};
int[] arr2 = {3,2,7,10};
int[] arr3 = {5,5,10,40,50,35};
int[] arr4 = {4,4,4,4};
System.out.println(maxSum(arr1).toString());
System.out.println(maxSum(arr2).toString());
System.out.println(maxSum(arr3).toString());
System.out.println(maxSum(arr4).toString());
}
}
Here is a C# version for reference (you may refer to: http://dream-e-r.blogspot.com/2014/07/maximum-sum-of-non-adjacent-subsequence.html):
In-order to solve a problem using dynamic programming there should be a solution which has optimal substructure and overlapping sub problems properties. And the current problem has optimal substructure property.
Say, f(i) is defined as maximum subsequence sum of non adjacent elements for 'i' items, then
f( i) = 0 if i = 0
max (f(i-1), f(i-2) + a[i])
Below is the algorithm for the same (no
te it can solved without the encapsulating data in 'record' - i just preferred it this way) - which should illustrate the above idea:
int FindMaxNonAdjuscentSubsequentSum(int[] a)
{
a.ThrowIfNull("a");
if(a.Length == 0)
{
return 0;
}
Record r = new Record()
{
max_including_item = a[0],
max_excluding_item = 0
};
for (int i = 1; i < a.Length; i++)
{
var t = new Record();
//there will be only two cases
//1. if it includes the current item, max is maximum of non adjuscent sub
//sequence sum so far, excluding the last item
t.max_including_item = r.max_excluding_item + a[i];
//2. if it excludes current item, max is maximum of non adjuscent subsequence sum
t.max_excluding_item = r.Max;
r = t;
}
return r.Max;
}
Unit Tests
[TestMethod]
[TestCategory(Constants.DynamicProgramming)]
public void MaxNonAdjascentSubsequenceSum()
{
int[] a = new int[] { 3, 2, 5, 10, 7};
Assert.IsTrue(15 == this.FindMaxNonAdjuscentSubsequentSum(a));
a = new int[] { 3, 2, 5, 10 };
Assert.IsTrue(13 == this.FindMaxNonAdjuscentSubsequentSum(a));
a = new int[] { 5, 10, 40, 50, 35 };
Assert.IsTrue(80 == this.FindMaxNonAdjuscentSubsequentSum(a));
a = new int[] { 1, -1, 6, -4, 2, 2 };
Assert.IsTrue(9 == this.FindMaxNonAdjuscentSubsequentSum(a));
a = new int[] { 1, 6, 10, 14, -5, -1, 2, -1, 3 };
Assert.IsTrue(25 == this.FindMaxNonAdjuscentSubsequentSum(a));
}
where
public static int Max(int a, int b)
{
return (a > b) ? a : b;
}
class Record
{
public int max_including_item = int.MinValue;
public int max_excluding_item = int.MinValue;
public int Max
{
get
{
return Max(max_including_item, max_excluding_item);
}
}
}
public static int maxSumNoAdj(int[] nums){
int[] dp = new int[nums.length];
dp[0] = Math.max(0, nums[0]); // for dp[0], select the greater value (0,num[0])
dp[1] = Math.max(nums[1], Math.max(0, dp[0]));
int maxSum = Math.max(dp[0], dp[1]);
for(int i = 2; i < nums.length; i++){
int ifSelectCurrent = Math.max(nums[i] + dp[i-2], dp[i-2]);// if select, there are two possible
int ifNotSelectCurrent = Math.max(dp[i-1], dp[i-2]); // if not select, there are two posible
dp[i] = Math.max(ifSelectCurrent, ifNotSelectCurrent); // choose the greater one
maxSum = Math.max(dp[i], maxSum); // update the result
}
return maxSum;
}
public static void main(String[] args) {
int[] nums = {-9, 2, 3, -7, 1, 1};
System.out.println(maxSumNoAdj(nums));
}