OpenCl: addition of large amount of arrays - arrays

In the code bellow, each work items generate a array sum_qcos_i.
In order to add them I first make local addition using the local array sum_qcos_tmp.
Then I copy every local array in a global one dimensional matrix sum_qcos_part.
I need to add that matrix columns for my purpose, that's what does each work item before using the result.
Here is the code
__kernel __attribute__((vec_type_hint(double4))) void energy_forces( const int atom_number,
const int nvect,
__global double4 *kvect,__global double *qcos,__global double *qsin,
__global double *cst_ewald ,
__global double4 *positions,
__global double4 *forces_r,
__global double *sum_qcos_part,__global double *sum_qsin_part)
{
int i = 0 ;
int gti = 0 , ggi = 0 , lti = 0;
double kr = (double)0.0 ;
double ss = (double)0.0 , cc = (double)0.0 ;
double prod = (double)0.0 ;
double valqcos = 0. , valqsin = 0. ;
double4 zeroes_4 = (double4){ 0.0,0.0,0.0,0.0 };
double sum_qcos_i[NVECTOR_MAX] ;
double sum_qsin_i[NVECTOR_MAX] ;
#if defined NVECTOR_MAX
__local double sum_qcos_tmp[NVECTOR_MAX] ;
__local double sum_qsin_tmp[NVECTOR_MAX] ;
#endif
lti = get_local_id(0);
ggi = get_group_id(0);
for (k=0;k<nvect;k++) { /*k-vectors*/
sum_qcos_tmp[k] = .0 ;
sum_qsin_tmp[k] = .0 ;
sum_qcos_i[k] = .0 ;
sum_qsin_i[k] = .0 ;
}
double fk = (double)0.0 ;
double4 fr_i = zeroes_4 ;
double4 kvec_i = zeroes_4;
for (gti = get_global_id(0); gti < atom_number; gti += get_global_size(0))
{
pos_i = positions[gti];
for (k=0;k<nvect;k++) { /* sum over k-vectors to compute QCOS and QSIN for Ewald sum*/
prod = dot((double4)pos_i,(double4)kvect[k]);
ss = (double)sincos(-prod,&cc);
valqcos = cc ;
valqsin = ss ;
// valqcos = 1. ;
// valqsin = 1. ;
qcos[gti*NVECTOR_MAX+k] = valqcos ;
qsin[gti*NVECTOR_MAX+k] = valqsin ;
sum_qcos_i[k] = valqcos ; /* private variable */
sum_qsin_i[k] = valqsin ;
} /* end sum over k-vectors to compute QCOS and QSIN for Ewald sum*/
} // end for gti
int ii = 0 ;
for ( ii = 0;ii<get_local_size(0);ii++ )
{
if (lti == ii)
{
for (k=0;k<nvect;k++)
{ /* k-vectors */
sum_qcos_tmp[k] += sum_qcos_i[k] ; /* accumulates private data to local variable */
sum_qsin_tmp[k] += sum_qsin_i[k] ;
}
barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE) ;
}
}
if (lti == 0)
{
for (k=0;k<nvect;k++) {
sum_qcos_part[ggi*NVECTOR_MAX+k] = sum_qcos_tmp[k] ; /* cp local data to global array */
sum_qsin_part[ggi*NVECTOR_MAX+k] = sum_qsin_tmp[k] ;
}
}
int iii = 0 ;
for (gti = get_global_id(0); gti < atom_number; gti += get_global_size(0))
{
fr_i = zeroes_4 ;
barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE) ;
for (k=0;k<nvect;k++)
{
sum_qcos_i[k] = .0 ;
sum_qsin_i[k] = .0 ;
for (iii=0;iii<get_num_groups(0);iii++)
{
sum_qcos_i[k] += sum_qcos_part[iii*NVECTOR_MAX+k] ;
sum_qsin_i[k] += sum_qsin_part[iii*NVECTOR_MAX+k] ;
}
}
barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE) ;
for (k=0;k<nvect;k++)
{
fk = ( sum_qcos_i[k]*qsin[gti*NVECTOR_MAX+k] - sum_qsin_i[k]*qcos[gti*NVECTOR_MAX+k] ) ;
fr_i += cst_ewald[k] * fk * kvect[k] ;
}
#if defined(SCALAR_KERNELS)
forces_r[gti].x = fr_i.x;
forces_r[gti].y = fr_i.y;
forces_r[gti].z = fr_i.z;
forces_r[gti].w = .0 ;
#elif defined(VECTOR_KERNELS)
forces_r[gti] = fr_i;
#endif
} // end for gti
}
This kernel doesn't work and I can't figure why.
Some hints would be very helpful here.
Thank you.

Adding a barrier made the trick:
for ( ii = 0;ii<get_local_size(0);ii++ )
{
if (lti == ii)
{
barrier(CLK_LOCAL_MEM_FENCE|CLK_GLOBAL_MEM_FENCE) ;
for (k=0;k<nvect;k++)
{ /* k-vectors */
sum_qcos_tmp[k] += sum_qcos_i[k] ; /* accumulates private data to local variable */
sum_qsin_tmp[k] += sum_qsin_i[k] ;
}
}
}
The final of the the vector forces_r is still wrong abut it's always the same now.

Actually the problem is not solved.
I define the global work size based on the number of particles so that each work item takes care of one particles.
In my calculations, I dot product each particles coordinates with a certain number of vectors.
My issue now is that the result of those dot products depends on the group size. I put the vectors in a array of double4. The global work size goes from something like 1000 to 10000, whereas the number of vectors I use for the dot product is always around 200.
I'm wondering whether there is a requirement on arrays size regarding the global work size.
for (gti = get_global_id(0); gti < atom_number; gti += get_global_size(0))
{
pos_i = positions[gti];
for (k=0;k<nvect;k++) { /* sum over k-vectors to compute QCOS and QSIN for Ewald sum*/
prod = dot((double4)pos_i,(double4)kvect[k]);
ss = (double)sincos(-prod,&cc);
valqcos = cc ;
valqsin = ss ;
qcos[gti*NVECTOR_MAX+k] = valqcos ;
qsin[gti*NVECTOR_MAX+k] = valqsin ;
sum_qcos_i[k] = valqcos ; /* private variable */
sum_qsin_i[k] = valqsin ;
} /* end sum over k-vectors to compute QCOS and QSIN for Ewald sum*/
} // end for gtiforgot
Any hints here?

Related

Loop through 2 arrays in one for loop?

anyone know how we can loop through two arrays in one for loop?
function setwinner() internal returns(address){
for (uint stime = 0 ; stime < squareStartTimeArray.length; stime++ & uint etime = 0; etime = squareEndTimeArray.length etime++) {
if (winningTime >= stime & winningTime <= etime) {
winningIndex = stime;
if (assert(stime == etime) == true) {
winningAddress = playerArray[stime];
}
}
}
}
To loop through multiple arrays in the same loop you should make sure that they both have the same length first. then you can use this:
require(arrayOne.length == arrayTwo.length)
for (i; arrayOne.length > i; i++) {
arrayOne[i] = ....;
arrayTwo[i] = ....;
}

How to accumulate arrays of data efficiently in C

The problem is that I have a huge matrix A, and given an (quite large) integer array, for example, say my matrix is:
[0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,
4,4,4,4,4,4,4,4,
...............]
and the integer array is [0, 2, 4]
Then the desired answer is [6,6,6,6,6,6,6,6] by accumulating [0,0,0,0,0,0,0,0], [2,2,2,2,2,2,2,2],[4,4,4,4,4,4,4,4]
This is a simple problem, but a naive C implementation seems to be very slow. This is especially the case when accumulating a lot of rows.
manually loop_unrolling doesn't seem to help. I am not familiar with inline assembly, any suggestions? I am wondering if there is a known library for such operations as well.
Below is my current implementation:
void accumulateRows(int* js, int num_j, Dtype* B, int nrow, int ncol, int incRowB, Dtype* buffer){
int i = 0;
int num_accumulated_rows = (num_j / 8) * 8;
int remaining_rows = num_j - num_accumulated_rows;
// unrolling factor of 8, each time, accumulate 8 rows
for(; i < num_accumulated_rows; i+=8){
int r1 = js[i];
int r2 = js[i+1];
int r3 = js[i+2];
int r4 = js[i+3];
int r5 = js[i+4];
int r6 = js[i+5];
int r7 = js[i+6];
int r8 = js[i+7];
register Dtype* B1_row = &B[r1*incRowB];
register Dtype* B2_row = &B[r2*incRowB];
register Dtype* B3_row = &B[r3*incRowB];
register Dtype* B4_row = &B[r4*incRowB];
register Dtype* B5_row = &B[r5*incRowB];
register Dtype* B6_row = &B[r6*incRowB];
register Dtype* B7_row = &B[r7*incRowB];
register Dtype* B8_row = &B[r8*incRowB];
for(int j = 0; j < ncol; j+=1){
register Dtype temp = B1_row[j] + B2_row[j] + B3_row[j] + B4_row[j];
temp += B5_row[j] + B6_row[j] + B7_row[j] + B8_row[j];
buffer[j] += temp;
}
}
// left_over from the loop unrolling
for(; i < remaining_rows; i++){
int r = js[i];
Dtype* B_row = &B[r*incRowB];
for(int i = 0; i < n; i++){
buffer[i] += B_row[i];
}
}
}
EDIT
I think this accumulation is very common in database, for example when we want to make a query about the total sales made in any Monday, Tueday, etc.
I know gcc supports Intel SSE, and I am looking to learn how to apply that to this problem, since this is very much SIMD
here is one way to implement the function, along with a few suggestions about further speedups
#include <stdlib.h> // size_t
typedef int Dtype;
// Note:
// following function assumes a 'contract' with the caller
// that no entry in 'whichRows[]'
// is larger than (number of rows in 'baseArray[][]' -1)
void accumulateRows(
// describe source 2d array
/* size_t numRows */ size_t numCols, Dtype BaseArray[][ numCols ],
// describe row selector array
size_t numSelectRows, size_t whichRows[ numSelectRows ],
// describe result array
Dtype resultArray[ numCols ] )
{
size_t colIndex;
size_t selectorIndex;
// initialize resultArray to all 0
for( colIndex = 0; colIndex < numCols; colIndex++ )
{
resultArray[colIndex] = 0;
}
// accumulate totals for each column of selected rows
for( selectorIndex = 0; selectorIndex < numSelectRows; selectorIndex++ )
{
for( colIndex = 0; colIndex < numCols; colIndex++ )
{
resultArray[colIndex] += BaseArray[ whichRows[selectorIndex] ][colIndex];
} // end for each column
} // end for each selected row
}
#if 0
// you might want to unroll the "initialize resultArray" loop
// by replacing the loop with
resultArray[0] = 0;
resultArray[1] = 0;
resultArray[2] = 0;
resultArray[3] = 0;
resultArray[4] = 0;
resultArray[5] = 0;
resultArray[6] = 0;
resultArray[7] = 0;
// however, that puts a constraint on the number of columns always being 8
#endif
#if 0
// you might want to unroll the 'sum of columns' loop by replacing the loop with
resultArray[0] += BaseArray[ whichRows[selectorIndex] ][0];
resultArray[1] += BaseArray[ whichRows[selectorIndex] ][1];
resultArray[2] += BaseArray[ whichRows[selectorIndex] ][2];
resultArray[3] += BaseArray[ whichRows[selectorIndex] ][3];
resultArray[4] += BaseArray[ whichRows[selectorIndex] ][4];
resultArray[5] += BaseArray[ whichRows[selectorIndex] ][5];
resultArray[6] += BaseArray[ whichRows[selectorIndex] ][6];
resultArray[7] += BaseArray[ whichRows[selectorIndex] ][7];
// however, that puts a constraint on the number of columns always being 8
#endif
#if 0
// on Texas Instrument DSPs ,
// could use a #pragma to unroll the loop
// or (better)
// make use of the built-in loop table
// to massively speed up the execution of the loop(s)
#endif

One of my value in array gives "0"

In my file includes two columns and I'm try to gather data from file. I need to compare two value in one column. For example, if array[5] is higher than array[4], do something. Here my code:
int control(double col2[], double col3[], int subscript){
double a, b, fcontrol ;
int k /* group */ ;
/* some necessary values for JD controlling */
a = col2[subscript] ;
b = col2[subscript-1] ;
/* for JD controlling */
fcontrol = a - b ;
printf("kontrol = %.12f a = %.12f b = %.12f\n", fcontrol, a, b) ;
/* if value of between two data is equal or higher than 10 hour return 1 */
if(fcontrol >= 0.416666666667){
return 1 ;
}
else{
return 0 ;
}
b is always 0. How can I fix it?
My terminal is :
kontrol = 258.426728989849 a = 258.426728989849 b = 0.000000000000
kontrol = 258.447161800788 a = 258.447161800788 b = 0.000000000000
kontrol = 258.467594711488 a = 258.467594711488 b = 0.000000000000
kontrol = 260.245248070103 a = 260.245248070103 b = 0.000000000000
kontrol = 260.265680861012 a = 260.265680861012 b = 0.000000000000
kontrol = 260.286113551461 a = 260.286113551461 b = 0.000000000000
kontrol = 260.306546441912 a = 260.306546441912 b = 0.000000000000
Here my all code :
/* TASK */
#include<stdio.h>
int kontrol(double col2[], double col3[], int subscript) ;
int main(){
int kolon1,
n = 0, /* for array */
j, z, /* for "for" loopr */
flag = 0 ;
int grup = 0 ;
double kolon2, kolon3,
col2[100000], col3[100000] ;
char ignore[100];
FILE *okuPtr ;
FILE *yazPtr ;
char oku_tbl[100] ;
sprintf(oku_tbl, "deneme.tbl") ;
/* error if file isnt opened*/
if ((okuPtr = fopen(oku_tbl, "r")) == NULL)
printf("%s Acilamadi", oku_tbl) ;
/* file is opened */
else{
char yaz_tbl[100] ;
sprintf(yaz_tbl, "deneme_data.tbl") ;
/* errof if file isnt opened */
if((yazPtr = fopen(yaz_tbl, "w")) == NULL)
printf("%s acilamadi\n", yaz_tbl) ;
/* file is opened */
else{
/* first read */
fscanf(okuPtr, "%d%lf%lf", &kolon1, &kolon2, &kolon3) ;
/* until end of file */
while (!feof(okuPtr)){
/* ignore first 3 line */
fgets(ignore, 100, okuPtr) ;
col2[n] = kolon2 ;
col3[n] = kolon3 ;
flag = control(col2, col3, n) ;
n++ ;
/* if flag == 1 */
if (flag == 1){
for (z = 0 ; z <= --n ; z++){
fprintf(yazPtr, "%d\t%.12f\t%.12f\n", grup, col2[z], col3[z]) ;
}
n = 0 ;
grup++ ;
}
/* yeni veri oku */
fscanf(okuPtr, "%d%lf%lf", &kolon1, &kolon2, &kolon3) ;
}
/* diziyi yazdir
for (j = 0 ; j <= n-1 ; j++){
printf("%d\t%-.12f\t%-.12f\n", k, col2[j], col3[j]) ;
} */
}
}
return 0 ;
}
int control(double col2[], double col3[], int subscript){
double a, b,
fcontrol ;
int k /* group */ ;
/* some necessary values for JD controlling */
a = col2[subscript] ;
b = col2[subscript-1] ;
/* for JD controlling */
fcontrol = a - b ;
printf("kontrol = %.12f a = %.12f b = %.12f\n", fcontrol, a, b) ;
/* if value of between two data is equal or higher than 10 hour return 1 */
if(fcontrol >= 0.416666666667){
return 1 ;
}
else{
return 0 ;
}
}
Problem is that when you call control for first time when n=0
flag = control(col2,col3, n);
But notice here b=col2[subscript-1] will actually be b=col2[-1] which is creating problem as array indexing start with 0 thus your program has this behaviour .
while (!feof(okuPtr))
It is always wrong. See here-Why is “while ( !feof (file) )” always wrong?
Instead use this -
while(fgets(ignore, 100, okuPtr))
{
/*Your code*/
}

Exc_Bad_Access in C

I am new to C and at the moment I am trying to debug my code. I am hitting a dead end on this particular error. It gives me an error :
THREAD 1: EXC_BAD_ACCESS(code=2, address = 0x7fff5fc00000)
The error appears at the section krec[0].
I am still yet to figure out what is allow in C and what is not allow when writing c. Would love to have some suggestion to make my code more efficient too. Here is my code :
int k_vector_calculation(int krec[3])
{
int Boxx[3], Boxy[3], Boxz [3];
Boxx[0] = lx ;
Boxx[1] = 0 ;
Boxx[2] = 0 ;
Boxy[0] = 0 ;
Boxy[1] = ly ;
Boxy[2] = 0 ;
Boxz[0] = 0 ;
Boxz[1] = 0 ;
Boxz[2] = lz ;
int Productbc[3], Productac[3], Productab[3] ;
Productbc[0] = Boxy[1] * Boxz[2] ;
Productbc[1] = 0 ;
Productbc[2] = 0 ;
Productac[0] = 0 ;
Productac[1] = Boxx [0]* Boxz [2] ;
Productac[2] = 0 ;
Productab[0] = 0 ;
Productab[1] = 0 ;
Productab[2] = Boxx [0]* Boxy [1] ;
float V0 = 0;
V0 = lx*ly*lz ;
int u[3], v[3], w[3] ;
u[0] = 2*PI/V0*Productbc[0] ;
u[1] = 2*PI/V0*Productbc[1] ;
u[2] = 2*PI/V0*Productbc[2] ;
v[0] = 2*PI/V0*Productac[0] ;
v[1] = 2*PI/V0*Productac[1] ;
v[2] = 2*PI/V0*Productac[2] ;
w[0] = 2*PI/V0*Productab[0] ;
w[1] = 2*PI/V0*Productab[1] ;
w[2] = 2*PI/V0*Productab[2] ;
int l, m, o;
l = nearbyint(lx) ;
m = nearbyint(ly) ;
o = nearbyint(lz) ;
krec[0] = l*u[0]+m*v[0]+o*w[0] ;
krec[1] = l*u[1]+m*v[1]+o*w[1] ;
krec[2] = l*u[2]+m*v[2]+o*w[2] ;
return k_vector_calculation(&krec[3]);
}
The problem is here
return k_vector_calculation(&krec[3]);
You have passed an int array krek[3] in which the valid locations are krek[0],krek[1] and krek[2].Also,you have an infinite recursion as you call the function again and again in the last return statement.

how to find max value with less iterations

I am changing the phase of signal from 0 to 360 by each degree to get max voltage value.Because if i change phase of the signal the voltage also changes.I have the fallowing code to find max value.
void Maxphase(float *max, unsigned int *index)
{
*max = 0.0;
float value;
unsigned int i, data;
for (i=0;i<=360;i++)
{
phaseset(i);
delay_ms(100);
data = readvalue();
value = voltage(mux1);
if(value > *max) //find max value
{
*max = value; //max voltage
*index = i;
}
}
}
from the above code I am getting Max value(voltage) after 38 sec(360*100) because for every read operation my device needs 100ms delay. This is too large, I can't change hardware thus i want to get the max value within 2 to 3 sec by optimizing software.
then I have tried with the fallowing code.
void Maxphase(float *max1, unsigned int *index1)
{
max = 0.0;
float value;
unsigned int i,j,data;
for (i=0;i<=360;i+=10)
{
phaseset(i);
delay_ms(100);
data = readvalue();
value = voltage(mux1);
if(value > max) //find max value
{
max = value; //max voltage
index = i;
}
}
*max1=max;
*index1=index;
for (i=*index1-9;i<=*index1+9;i+=1)
{
j=i;
phaseset(j);
delay_ms(100);
data = readvalue();
value = voltage(mux1);
if(value > *max1) //find max value
{
*max1 = value; //max voltage
*index1 = i;
}
}
}
I have reduced time from 45 sec to 7 sec. i have reduced iterations 360 to 54(54*100). I want to reduce it 7 sec to 2 sec.
Can any one help me with better algorithm that i can get max value from (0 to 360) with in 2 sec.
I have measured the voltage values using scope by changing phase. I have written below how it vary voltage with phase.
Phase (degree) voltage(max)
0 0.9mv
45 9.5mv
90 9.0mv
135 0.9mv
180 292mv
225 601mv
270 555mv
315 230mv
360 0.9mv
I am new to C programming. Can anyone provide sample code for the best algorithm.
Golden section search is probably what you are after. It is effective, but still pretty simple.
If you want something even faster and more sophisticated, you can use Brent's method.
If you can be sure that there is only a single highest point on your 360 degrees you can do a recursive divide and conquer.
You start by looking e.g. at 0, 180, 270. Let's say you find the answer is that 180 + 270 together have the highest value. Than you start by looking in at 210.... Which side is higher? And so on ...
Exploiting the various comments and suggestions here, I present this untested piece of code. I don't know whether this works at all or is an improvement over the existing source, but it was fun to try, anyway:
extern void phaseset(int);
extern void delay_ms(int);
extern float readvalue();
extern float voltage(int);
extern int mux1;
float probe(int phase)
{
float data;
phaseset(phase);
delay_ms(100);
data = readvalue(); /* data is ignored? */
return voltage(mux1); /* mux1? */
}
/* helper routine, find the max in a given range [phase1, phase2] */
void maxphase_aux(int phase1, float vol1, int phase2, float vol2, int *phaseret, float *volret)
{
float xvol1 = 0, xvol2 = 0;
int xphase1 = -1, xphase2 = -1;
/* test the voltage in the middle */
int phasem = abs(phase2 - phase1) / 2;
float volm = probe(phasem);
if (volm > vol1 && volm > vol2) {
/* middle point is the highest so far,
* search left and right for maximum */
*volret = volm;
*phaseret = phasem;
maxphase_aux(phase1, vol1, phasem, volm, &xphase1, &xvol1);
maxphase_aux(phase2, vol2, phasem, volm, &xphase2, &xvol2);
} else if (volm < vol1 && volm > vol2) {
/* vol1 is the highest so far,
* search between volm and vol1 for maximum */
maxphase_aux(phase1, vol1, phasem, volm, &xphase1, &xvol1);
} else if (volm > vol1 && volm < vol2) {
/* vol2 is the highest so far,
* search between volm and vol2 for maximum */
maxphase_aux(phase2, vol2, phasem, volm, &xphase2, &xvol2);
} else {
/* not possible? */
return;
}
if (xvol1 > volm) {
*volret = xvol1;
*phaseret = xphase1;
}
if (xvol2 > volm) {
*volret = xvol2;
*phaseret = xphase2;
}
}
void maxphase(int *phaseret, float *volret)
{
float v0 = probe(0);
float v360 = probe(360);
maxphase_aux(0, v0, 360, v360, phaseret, volret);
}
UPDATE: 2012-11-10.
#include <stdio.h>
#include <string.h>
#include <math.h>
#define FAKE_TARGET 89
unsigned fake_target = FAKE_TARGET;
float probe_one(unsigned int phase);
void Maxphase(float *max, unsigned int *index);
void Maxphase(float *max, unsigned int *index)
{
unsigned int aim, idx, victim;
struct best {
unsigned pos;
float val;
} samples[4] = {{0, 0.0}, };
for (aim = 0;aim < 360;aim += 90) {
idx=aim/90;
samples[idx].pos = aim;
samples[idx].val = probe_one(samples[idx].pos);
if (!idx || samples[idx].val < samples[victim].val ) victim = idx;
}
/* eliminate the weakist postion, and rotate the rest,
** such that:
** samples[0] := lower boundary.
** samples[1] := our best guess
** samples[2] := upper boundary
** samples[3] := scratch/probe element
*/
fprintf(stderr, "Victim=%u\n", victim );
switch(victim) {
case 0: samples[0] = samples[1]; samples[1] = samples[2]; samples[2] = samples[3]; break;
case 1: samples[1] = samples[3]; samples[3] = samples[0]; samples[0] = samples[2]; samples[2] = samples[3]; break;
case 2: samples[2] = samples[1]; samples[1] = samples[0]; samples[0] = samples[3]; break;
case 3: break;
}
/* Calculation is easier if the positions are increasing.
** (We can always perform the modulo 360 if needed)
*/
if (samples[0].pos > samples[1].pos ) samples[1].pos += 360;
if (samples[1].pos > samples[2].pos ) samples[2].pos += 360;
while( 1) {
int step;
step = samples[2].pos - samples[0].pos;
if (step < 3) break;
do {
fprintf(stderr, "\n[%u %u %u] Diff=%d\n"
, samples[0].pos , samples[1].pos , samples[2].pos , step);
if (step > 0) step++; else step--;
step /= 2;
aim = (samples[0].pos + step ) ;
/* avoid hitting the middle cell twice */
if (aim %360 != samples[1].pos %360) break;
step += 1;
aim = (samples[0].pos + step ) ;
if (aim %360 != samples[1].pos %360) break;
step -= 2;
aim = (samples[0].pos + step ) ;
break;
} while(0);
fprintf(stderr, "Step=%d Aim=%u, Idx=%u\n",step, aim,idx );
samples[3].pos = aim;
samples[3].val = probe_one( samples[3].pos );
victim= (samples[3].pos > samples[1].pos ) ? 2 : 0;
if (samples[3].val > samples[1].val) idx= 1; else idx = victim;
fprintf(stderr, "Victim=%u, TargetIdx=%u\n", victim, idx );
/* This should not happen */
if (samples[3].val < samples[victim].val) break;
if (idx != victim) samples[2-victim] = samples[idx];
samples[idx] = samples[3];
}
*max = samples[1].val;
*index = samples[1].pos % 360;
}
float probe_one(unsigned int phase)
{
float value;
#ifdef FAKE_TARGET
int dif;
dif = fake_target-phase;
if (dif < -180) dif = 360+dif;
else if (dif > 180) dif = 360-dif;
/* value = 1.0 / (1 + pow(phase-231, 2)); */
value = 1.0 / (1 + pow(dif, 2));
fprintf(stderr, "Target = %d: Probe(%d:%d) := %f\n", fake_target, phase, dif, value );
sleep (1);
#else
unsigned int data;
phase %= 360;
phaseset(phase);
delay_ms(100);
data = readvalue(); // what is this ?
value = voltage(mux1);
#endif
return value;
}
int main(int argc, char **argv)
{
float value;
unsigned int index;
if (argv[1]) sscanf (argv[1], "%u", &fake_target);
fake_target %= 360;
Maxphase(&value, &index) ;
printf("Phase=%u Max=%f\n", index, value );
return 0;
}

Resources