How do i find mean , median of a set of numbers without using arrays in C?
Question is rather not the way to find mean or median but how to store a set of numbers and perform some operations on them if use of arrays is not allowed ?
A fun problem.
The key to to find a place to store all the numbers. Code could use a file, a linked list, etc. The below uses a linked list and recursion.
Leave it to OP for refinements about multiple modes or median when count is even.
typedef struct num_S {
struct num_S *prev;
double x;
} num_T;
void GetNum(num_T *prev) {
num_T current;
current.prev = prev;
// If new number found ...
if (scanf("%lf", ¤t.x) == 1) {
num_T *p = ¤t;
// Sort new entry into list
while (p->prev != NULL && p->x < p->prev->x) {
double t = p->x;
p->x = p->prev->x;
p->prev->x = t;
p = p->prev;
}
GetNum(¤t);
// End of list -now process the list
} else {
unsigned ModeCount = 0;
double Mode = 0.0;
unsigned ModeCandidateCount = 0;
double ModeCandidate = 0.0 / 0.0;
unsigned Count = 0;
double SumX = 0.0;
double SumXX = 0.0;
num_T *p = current.prev;
while (p != NULL) {
Count++;
SumX += p->x;
SumXX += p->x * p->x;
if (p->x == ModeCandidate) {
ModeCandidateCount++;
} else {
ModeCandidateCount = 1;
ModeCandidate = p->x;
}
if (ModeCandidateCount > ModeCount) {
ModeCount = ModeCandidateCount;
Mode = p->x;
}
p = p->prev;
}
printf("Count = %u\n", Count);
if (Count > 0) {
printf("Mode = %lf Count %u\n", Mode, ModeCount);
printf("Mean = %lf\n", SumX / Count);
printf("STD = %lf\n", sqrt(Count * SumX - SumXX) / Count);
Count /= 2;
num_T *p = current.prev;
while (Count-- > 0) {
p = p->prev;
}
printf("Median = %lf\n", p->x);
}
fflush(stdout);
}
}
int main(void) {
GetNum(NULL);
return 0;
}
Input 4 3 4 2 4 1 EOF
Output:
Count = 6
Mode = 4.000000 Count 3
Mean = 3.000000
STD = 1.130388
Median = 3.000000
STD Ref: STD Rapid calculation methods
Related
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
//#include<igraph.h>
#define NUM_VERTICES1 15000// No. of data for Newman Watts to be used 15000:
//#define strings 10 // No. of base strings to be used 160:
//Function for generating infection rate randomly:
void unifRand(double *x, double *x1, double *x2)
{
int i;
const int n = 200; // 20
srand(unsigned(time(NULL)));
for(i = 0; i < n - 1; i++)
{
//x2[i] = rand()/double(RAND_MAX); //generate random number for choosing the infected neighbors(m):
x[i] = (0.2)+(0.4-0.2)*rand()/double(RAND_MAX);
x2[i] = 0.02; // fix the neighbor m and check:
x1[i] = log(1-x[i]);// Infection rate lambda:
printf("%lf\t%lf\t%lf\t%d\t%d\t\n", x[i], x1[i],x2[i],rand(), RAND_MAX);
}
}
// Function 2:
struct Edge {
int vertex;
struct Edge * next;
};
// Inserts Node to the Linked List by Head Insertion - O(1)
// Returns address of head which is the newly created node.
struct Edge * addEdge(struct Edge * currentHead, int newVertex)
{
struct Edge * newHead
= (struct Edge *) malloc(sizeof(struct Edge));
newHead->vertex = newVertex;
newHead->next = currentHead;
return newHead;
}
int main()
{
FILE *wg = NULL;
FILE *ob = NULL;
wg = fopen("ncwang1.txt","w");
ob = fopen("obs.txt","w");
if(wg == NULL)
{
printf("Error in opening file wg!\n");
}
if(ob == NULL)
{
printf("Error in opening file ob!\n");
}
int vertices = 200, edges = 400, i; // 20,50:(100,50)
int strings = 160;
int nobs = 10;
int v1, v2;
double j;
int k;
double t=0.0;
double dt=0.1;
double b;
double x[vertices], x1[vertices];
double x2[vertices];
unifRand(x,x1,x2);
// printf("Enter the Number of Vertices -\n");
// scanf("%d", &vertices);
//printf("\nEnter the Number of Edges -\n");
// scanf("%d", &edges);
struct Edge * adjacencyList[vertices + 1];
// Size is made (vertices + 1) to use the
// array as 1-indexed, for simplicity
// initialize array:
for (i = 0; i <= vertices; ++i) {
adjacencyList[i] = NULL;
}
for (i = 0; i <= edges; ++i) {
//scanf(%d%d", &v1, &v2);
v1 = rand()%200;
v2 = rand()%200;
// Adding edge v1 --> v2
// Add edge from v1 --> v2
if(v1 != v2)
adjacencyList[v1] = addEdge(adjacencyList[v1], v2);
// Adding edge v2 --> v1
// Remove this if you want a Directed Graph
adjacencyList[v2] = addEdge(adjacencyList[v2], v1);
}
// Printing Adjacency List
printf("\nAdjacency List -\n\n");
for(j = 0; j < strings; j++){
for (i = 0; i <= vertices; ++i) {
printf("adjacencyList[%d] -> ", i);
struct Edge * traverse = adjacencyList[i];
while (traverse != NULL)
{
b = (double)j/vertices;
fprintf(wg,"%d \t%d \t\t%0.6lf\t\t%0.1lf\t\t%0.8lf\t\n", i, traverse->vertex,-(x1[i]*(traverse->vertex))/100,b,
x[i]);
//fprintf(ob,"%d\t%0.2lf\t%0.1lf\n",k,(-log(1-x[i])*(traverse->vertex)),b);
printf("%d -> ", traverse->vertex);
traverse = traverse->next;
}
printf("NULL\n");
}
}
return 0;
fclose(wg);
fclose(ob);
wg = NULL;
ob = NULL;
}
I have written the above code for a network reconstruction performance from a reseach paper. I have to plot 'b' versus (-log(1-x[i])*(traverse->vertex)) from the output. The authors of the paper have mentioned that "the results are obtained by ensemble averaging over 10 independent realizations. How I can implement this in my code. As I am new to statistical physics, I do not know how to implement. Any suggestions will be helpful. The current output gives only a single line at b = 0.1, 0.2..1.0 which is not the expected output.c
#include <stdio.h>
#include <stdlib.h>
typedef struct listNode *listPointer;
struct listNode{
int data;
listPointer link;
};
int boolean_search(listPointer f, int q) {
listPointer t = f;
while(t){
if(q == t->data){
return 1;
}
t = t ->link;
}
return 0;
}
listPointer elem_search(listPointer f, int q) {
listPointer t = (listPointer) malloc(sizeof(listPointer));
t->data = -1;
t->link =f;
while(t){
if(q == t->data){
//printf("Match Found");
return t;
}
t = t ->link;
}
return NULL;
}
void del_num(listPointer f, int q) {
listPointer temp = elem_search(f, q), z;
if(temp) {
z=temp;
temp = temp->link;
free(z);
}
}
void delete_trial(listPointer x,int num) {
del_num(x, num);
}
listPointer create(int start, int finish)
{
int i = 0, flag = 1;
listPointer created = (listPointer) malloc(sizeof(listPointer)), st = NULL;
for( i = start; i < finish; i++)
{
listPointer temp = (listPointer) malloc(sizeof(listPointer));
int num = rand() % 50;
//printf("%d",search(st, num));
if(flag == 1 && num !=0) {
temp->data = num;
temp->link = NULL;
created = temp;
st = created;
flag = 0;
} else if(!boolean_search(st, num) && num !=0)
{
//printf("I am here");
temp->data = num;
temp->link = NULL;
created->link = temp;
created = temp;
}
}
return st;
}
void display(listPointer start){
listPointer temp = start;
printf("\nContents of given pointer : ");
while(temp){
printf("%d ",temp->data);
temp = temp->link;
}
}
int count_list( listPointer x) {
listPointer y =x;
int count = 0;
while(y) {
count ++;
y = y->link;
}
return count;
}
int count_non_zero(listPointer x) {
listPointer y =x;
int count = 0;
while(y) {
if(y->data != 0 )count ++;
y = y->link;
}
return count;
}
int scan_least_no(listPointer x){
int least = x->data, z;
listPointer y = x;
while(y)
{
z = y->data;
if(( z < least) && z != 0){
least = z;
}
y = y->link;
}
return least;
}
listPointer scan_least_ptr(listPointer x){
int least = x->data, z;
listPointer y = x, f = x;
while(y)
{
z = y->data;
if(( z < least) && z != 0){
least = z;
f = y;
}
y = y->link;
}
return f;
}
listPointer merge_asc(listPointer a, listPointer b) {
listPointer result = (listPointer) malloc(sizeof(listPointer));
result->data = -1;
result ->link = NULL;
int countA = count_non_zero(a), countB = count_non_zero(b), flag =0;//flag 0 for A, flag 1 for B
int leastNum = 0, leastNumA = 0, leastNumB = 0;
listPointer result_start = result, copyA = a, copyB =b;
printf("Count of listA : %d, listB : %d", countA, countB);
//scanning least element
//compare every element to existing ,the least one in connected to the link of that node
while((countA + countB) != 0)
{
printf("Stuck in here");
if(countA > 0)
{
//if least from here flag 0
leastNumA = scan_least_no(copyA);
} else if(countB > 0)
{
//if least from here flag 1
leastNumB = scan_least_no(copyA);
}
if(leastNumA < leastNumB)
{
flag = 0;
leastNum = leastNumA;
} else {
flag =1;
leastNum = leastNumB;
}
if(flag == 0)
{
result ->link = elem_search(copyA, leastNum);
result = result ->link;
delete_trial(a, leastNum);
}else if(flag == 1){
result ->link = elem_search(copyB, leastNum);
result = result ->link;
delete_trial(b, leastNum);
}
countA = count_non_zero(copyA);
countB = count_non_zero(copyB);
}
printf("\nMerged Pointer : ");
display(result_start);
return result_start;
}
int main()
{
listPointer a = create(10,20), b = create(1, 30), c;
display(a);
display(b);
c = merge_asc(a, b);
display(c);
return 0;
}
This program takes two linked lists and makes a new linked list from the existing linked lists.
Constraints: elements should be in ascending order, new linked list should use the existing nodes
Q: "let x = x1,x2, x3, xn and y = y1, y2, y3, yn be two linked lists. Write a program to merge the two lists together to form a new linked list z in which nodes are in ascending order. Following the merge x,y do not exist as individual lists. Each node initially in x ad y is now in z".
I couldn't figure out what is wrong with my merge_asc function because output is stuck "Contents of given pointer : 33 36 27 15 43 35 42 49 21 "
I am thinking that in delete function, due to the free function there is something going on that I didn't considered in my program.
Can anyone tell me what is wrong or what is the solution of it and what approach I should take in these situations?
I have tested every other function and they are working fine.
I am using ubuntu 14.04 and gcc4.8.4
Given two unsorted lists
2 -> 7 -> 1
5 -> 3 -> 4
If the input lists were sorted then you would use a merge sort and could do it in linear time but since these are unsorted lists you will need to take atleast NlogN time. I would advise starting by sorting one of the input lists and then doing an insertion sort with the elements of the other list.
so start by sorting the first list to get
1 -> 2 -> 7
then run an insertion sort with the other list to get
1 -> 2 -> 5 -> 7
1 -> 2 -> 3 -> 5 -> 7
1 -> 2 -> 3 -> 4 -> 5 -> 7
I'm trying to integrate the function 1/((1+x^2)x^0.5) using the trapezium rule. I need the precision to be as great as possible so I am therefore increasing the number of strips, N, until the computer cannot recognise a change between the total for consecutive N. However, the end condition is not currently working, leading to continuous integration. Does anyone have any better suggestions than my current code?
Many thanks,
Beth
#include<stdio.h>
#include<math.h>
#include<float.h>
double inter(double x, double h, double y, double N, double total)
{
total= total +0.5*(1/((1+pow(x,2))*sqrt(x)));
x=x+h;
while (x<y)
{
total=total+(1/((1+pow(x,2))*sqrt(x)));
x=x+h;
//printf("x - %.16lf \n", x);
}
total= total +0.5*(1/((1+pow(x,2))*sqrt(x)));
total=total*h;
//printf("t - %lf \n", total);
return total;
}
main()
{
double x,y,total,h,value,newvalue,f, N;
int finish;
x=0.1;
y=1000;
total=0;
N=1000;
finish=0;
value=0;
while(finish==0)
{
h=(y-x)/(N-1);
newvalue=inter(x,h,y,N,total);
printf("h-%.16lf\n", h);
printf("N-%.16lf\n", N);
printf("New value %.16lf\n", newvalue);
printf("c-%.16lf\n", value);
if(value==newvalue)
{
finish=1;
printf("finish-%d\n", finish);
}
else
{
value=newvalue;
newvalue=newvalue-3;
N=N+1000;
printf("newvalue-%lf\n", newvalue);
printf("value-%lf\n", value);
}
}
printf("%lf\n", value);
}
If you wish to create an automatic refinement of your numerical integration, one technique is to look at the relative convergence of your integration.
double previous = 0;
double current = inter( x, (y-x)/(N-1), y, N, total ); // Solve some baseline
do
{
N = N + 1000;
h = (y-x)/(N-1);
previous = current;
current = inter( x, h, y, N, total );
} while( abs( current - previous ) / current > 0.001 );
That code will stop after you observe less than 0.1% relative refinement in your estimation. Decreasing 0.001 will effectively increase your accuracy. Usually the best way to compare doubles is through a tolerance check like:
abs( a - b ) < k
where k is some factor of the order of accuracy you wish to achieve.
This integral is difficult because the f(x) -> ∞ as x -> 0. In this example, I changed the range to 1 to 1000. I also used a summation function to minimize rounding error when summing up a large number of values. The integral from wolframalpha ~= .487474, this program results in ~=.487475 . The exact integral can be found using this link:
integral 1/((1+x^2)sqrt(x))
#include<stdio.h>
#include<math.h>
#include<float.h>
/* clear array */
void clearsum(double asum[2048])
{
size_t i;
for(i = 0; i < 2048; i++)
asum[i] = 0.;
}
/* add a number into array */
void addtosum(double d, double asum[2048])
{
size_t i;
while(1){
/* i = exponent of d */
i = ((size_t)((*(unsigned long long *)&d)>>52))&0x7ff;
if(i == 0x7ff){ /* max exponent, could be overflow */
asum[i] += d;
return;
}
if(asum[i] == 0.){ /* if empty slot store d */
asum[i] = d;
return;
}
d += asum[i]; /* else add slot to d, clear slot */
asum[i] = 0.; /* and continue until empty slot */
}
}
/* return sum from array */
double returnsum(double asum[2048])
{
double sum = 0.;
size_t i;
for(i = 0; i < 2048; i++)
sum += asum[i];
return sum;
}
double fx(double x)
{
return 1./((1.+x*x)*sqrt(x));
}
double inter(double x, double y, double n)
{
double asum[2048]; /* for summation functions */
double h;
double d;
if(n < 1.){
n = 1.;
h = 0.;
} else {
h = (y-x)/(n-1.0);
}
y -= h/2.;
clearsum(asum);
d = .5*h*fx(x);
addtosum(d, asum);
for( ; x < y; x += h){
d = h*fx(x);
addtosum(d, asum);
}
d = .5*h*fx(x);
addtosum(d, asum);
d = returnsum(asum);
return d;
}
int main()
{
double x,y,n,value,newvalue;
x=1.0;
y=1000.;
value=0.;
for(n = 100000000.; 1; n += 100000000.)
{
newvalue=inter(x,y,n);
printf("new value %.16lf %.0lf\n", newvalue, n);
if(fabs(newvalue-value) < (newvalue*1E-7))
break;
value = newvalue;
}
return 0;
}
Using Simpson's rule, the results are more accurate and converge at much smaller values for n:
#include<stdio.h>
#include<math.h>
#include<float.h>
/* clear array */
void clearsum(double asum[2048])
{
size_t i;
for(i = 0; i < 2048; i++)
asum[i] = 0.;
}
/* add a number into array */
void addtosum(double d, double asum[2048])
{
size_t i;
while(1){
/* i = exponent of d */
i = ((size_t)((*(unsigned long long *)&d)>>52))&0x7ff;
if(i == 0x7ff){ /* max exponent, could be overflow */
asum[i] += d;
return;
}
if(asum[i] == 0.){ /* if empty slot store d */
asum[i] = d;
return;
}
d += asum[i]; /* else add slot to d, clear slot */
asum[i] = 0.; /* and continue until empty slot */
}
}
/* return sum from array */
double returnsum(double asum[2048])
{
double sum = 0.;
size_t i;
for(i = 0; i < 2048; i++)
sum += asum[i];
return sum;
}
double fx(double x)
{
return 1./((1.+x*x)*sqrt(x));
}
double simpson(double x, double y, double n)
{
double asum[2048]; /* for summation functions */
double h;
double a;
if(n < 1.){
n = 1.;
h = 0.;
} else {
h = (y-x)/(n-1.0);
}
y += h/2.;
clearsum(asum);
for( ; x < y; x += h){
a = h/6.*(fx(x) + 4.*fx(x + h/2.) + fx(x + h));
addtosum(a, asum);
}
a = returnsum(asum);
return a;
}
int main()
{
double x,y,n,value,newvalue;
x=1.0;
y=1000.;
value=0.;
for(n = 1000.; 1; n += 1000.)
{
newvalue=simpson(x,y,n);
printf("new value %.16lf %.0lf\n", newvalue, n);
if(fabs(newvalue-value) < (newvalue*1E-10))
break;
value = newvalue;
}
return 0;
}
This question already exists:
Closed 11 years ago.
Possible Duplicate:
Is this C-program correct(pointers and arrays)?
My program crashes when I free the mallocated array in the end. Why?
Also, I'm not 100% on how to allocate it in the first place. The program works as intended though, ecept for the crash when I free the pointer.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/* Approximates a solution to a differential equation on the form:
y'(t) + ay(t) = x(t)
y(0) = b
*/
double* runge_kutta_2nd_order(double stepSize, double a, double b, double (*x) (double), double upto)
{
int resultSize = ((int) (upto / stepSize)) + 1;
double yt = b;
double time;
double k1,k2,ystar1,ystar2;
int index = 1;
double *results = (double*) malloc(resultSize * (sizeof(double)));
if(results == NULL)
exit(0);
results[0] = b;
for(time = 0; time <= upto; time += stepSize)
{
k1 = x(time) - a * yt;
ystar1 = yt + stepSize * k1;
k2 = x(time + stepSize) - a * ystar1;
ystar2 = yt + (k1 + k2) / 2 * stepSize;
yt = ystar2;
results[index] = ystar2;
index++;
}
return results;
}
void free_results(double *r)
{
free(r);
r = NULL;
}
double insignal(double t)
{
return exp(t/2)*(sin(5*t) - 10*cos(5*t));
}
int main(void)
{
int i;
double *res = runge_kutta_2nd_order(0.01,-1,0,&insignal,10);
printf("\nRunge Kutta 2nd order approximation of the differential equation:");
printf("\ny'(t) - y(t) = e^(t/2) * (sin(5t) - 10cos(5t))");
printf("\ny(0) = 0");
printf("\n0 <= t <= 10");
for(i=0; i<1001; i++){
printf("\ni = %lf => y = ", 0.01*i);
printf("%lf", res[i]);
}
printf("\n");
free_results(res);
return 0;
}
You have a heap overflow in runge_kutta_2nd_order. Carefully check the loop to ensure that index < resultSize always holds.
Here is my perceptron implementation in ANSI C:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
float randomFloat()
{
srand(time(NULL));
float r = (float)rand() / (float)RAND_MAX;
return r;
}
int calculateOutput(float weights[], float x, float y)
{
float sum = x * weights[0] + y * weights[1];
return (sum >= 0) ? 1 : -1;
}
int main(int argc, char *argv[])
{
// X, Y coordinates of the training set.
float x[208], y[208];
// Training set outputs.
int outputs[208];
int i = 0; // iterator
FILE *fp;
if ((fp = fopen("test1.txt", "r")) == NULL)
{
printf("Cannot open file.\n");
}
else
{
while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF)
{
if (outputs[i] == 0)
{
outputs[i] = -1;
}
printf("%f %f %d\n", x[i], y[i], outputs[i]);
i++;
}
}
system("PAUSE");
int patternCount = sizeof(x) / sizeof(int);
float weights[2];
weights[0] = randomFloat();
weights[1] = randomFloat();
float learningRate = 0.1;
int iteration = 0;
float globalError;
do {
globalError = 0;
int p = 0; // iterator
for (p = 0; p < patternCount; p++)
{
// Calculate output.
int output = calculateOutput(weights, x[p], y[p]);
// Calculate error.
float localError = outputs[p] - output;
if (localError != 0)
{
// Update weights.
for (i = 0; i < 2; i++)
{
float add = learningRate * localError;
if (i == 0)
{
add *= x[p];
}
else if (i == 1)
{
add *= y[p];
}
weights[i] += add;
}
}
// Convert error to absolute value.
globalError += fabs(localError);
printf("Iteration %d Error %.2f %.2f\n", iteration, globalError, localError);
iteration++;
}
system("PAUSE");
} while (globalError != 0);
system("PAUSE");
return 0;
}
The training set I'm using: Data Set
I have removed all irrelevant code. Basically what it does now it reads test1.txt file and loads values from it to three arrays: x, y, outputs.
Then there is a perceptron learning algorithm which, for some reason, is not converging to 0 (globalError should converge to 0) and therefore I get an infinite do while loop.
When I use a smaller training set (like 5 points), it works pretty well. Any ideas where could be the problem?
I wrote this algorithm very similar to this C# Perceptron algorithm:
EDIT:
Here is an example with a smaller training set:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
float randomFloat()
{
float r = (float)rand() / (float)RAND_MAX;
return r;
}
int calculateOutput(float weights[], float x, float y)
{
float sum = x * weights[0] + y * weights[1];
return (sum >= 0) ? 1 : -1;
}
int main(int argc, char *argv[])
{
srand(time(NULL));
// X coordinates of the training set.
float x[] = { -3.2, 1.1, 2.7, -1 };
// Y coordinates of the training set.
float y[] = { 1.5, 3.3, 5.12, 2.1 };
// The training set outputs.
int outputs[] = { 1, -1, -1, 1 };
int i = 0; // iterator
FILE *fp;
system("PAUSE");
int patternCount = sizeof(x) / sizeof(int);
float weights[2];
weights[0] = randomFloat();
weights[1] = randomFloat();
float learningRate = 0.1;
int iteration = 0;
float globalError;
do {
globalError = 0;
int p = 0; // iterator
for (p = 0; p < patternCount; p++)
{
// Calculate output.
int output = calculateOutput(weights, x[p], y[p]);
// Calculate error.
float localError = outputs[p] - output;
if (localError != 0)
{
// Update weights.
for (i = 0; i < 2; i++)
{
float add = learningRate * localError;
if (i == 0)
{
add *= x[p];
}
else if (i == 1)
{
add *= y[p];
}
weights[i] += add;
}
}
// Convert error to absolute value.
globalError += fabs(localError);
printf("Iteration %d Error %.2f\n", iteration, globalError);
}
iteration++;
} while (globalError != 0);
// Display network generalisation.
printf("X Y Output\n");
float j, k;
for (j = -1; j <= 1; j += .5)
{
for (j = -1; j <= 1; j += .5)
{
// Calculate output.
int output = calculateOutput(weights, j, k);
printf("%.2f %.2f %s\n", j, k, (output == 1) ? "Blue" : "Red");
}
}
// Display modified weights.
printf("Modified weights: %.2f %.2f\n", weights[0], weights[1]);
system("PAUSE");
return 0;
}
In your current code, the perceptron successfully learns the direction of the decision boundary BUT is unable to translate it.
y y
^ ^
| - + \\ + | - \\ + +
| - +\\ + + | - \\ + + +
| - - \\ + | - - \\ +
| - - + \\ + | - - \\ + +
---------------------> x --------------------> x
stuck like this need to get like this
(as someone pointed out, here is a more accurate version)
The problem lies in the fact that your perceptron has no bias term, i.e. a third weight component connected to an input of value 1.
w0 -----
x ---->| |
| f |----> output (+1/-1)
y ---->| |
w1 -----
^ w2
1(bias) ---|
The following is how I corrected the problem:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define LEARNING_RATE 0.1
#define MAX_ITERATION 100
float randomFloat()
{
return (float)rand() / (float)RAND_MAX;
}
int calculateOutput(float weights[], float x, float y)
{
float sum = x * weights[0] + y * weights[1] + weights[2];
return (sum >= 0) ? 1 : -1;
}
int main(int argc, char *argv[])
{
srand(time(NULL));
float x[208], y[208], weights[3], localError, globalError;
int outputs[208], patternCount, i, p, iteration, output;
FILE *fp;
if ((fp = fopen("test1.txt", "r")) == NULL) {
printf("Cannot open file.\n");
exit(1);
}
i = 0;
while (fscanf(fp, "%f %f %d", &x[i], &y[i], &outputs[i]) != EOF) {
if (outputs[i] == 0) {
outputs[i] = -1;
}
i++;
}
patternCount = i;
weights[0] = randomFloat();
weights[1] = randomFloat();
weights[2] = randomFloat();
iteration = 0;
do {
iteration++;
globalError = 0;
for (p = 0; p < patternCount; p++) {
output = calculateOutput(weights, x[p], y[p]);
localError = outputs[p] - output;
weights[0] += LEARNING_RATE * localError * x[p];
weights[1] += LEARNING_RATE * localError * y[p];
weights[2] += LEARNING_RATE * localError;
globalError += (localError*localError);
}
/* Root Mean Squared Error */
printf("Iteration %d : RMSE = %.4f\n",
iteration, sqrt(globalError/patternCount));
} while (globalError > 0 && iteration <= MAX_ITERATION);
printf("\nDecision boundary (line) equation: %.2f*x + %.2f*y + %.2f = 0\n",
weights[0], weights[1], weights[2]);
return 0;
}
... with the following output:
Iteration 1 : RMSE = 0.7206
Iteration 2 : RMSE = 0.5189
Iteration 3 : RMSE = 0.4804
Iteration 4 : RMSE = 0.4804
Iteration 5 : RMSE = 0.3101
Iteration 6 : RMSE = 0.4160
Iteration 7 : RMSE = 0.4599
Iteration 8 : RMSE = 0.3922
Iteration 9 : RMSE = 0.0000
Decision boundary (line) equation: -2.37*x + -2.51*y + -7.55 = 0
And here's a short animation of the code above using MATLAB, showing the decision boundary at each iteration:
It might help if you put the seeding of the random generator at the start of your main instead of reseeding on every call to randomFloat, i.e.
float randomFloat()
{
float r = (float)rand() / (float)RAND_MAX;
return r;
}
// ...
int main(int argc, char *argv[])
{
srand(time(NULL));
// X, Y coordinates of the training set.
float x[208], y[208];
Some small errors I spotted in your source code:
int patternCount = sizeof(x) / sizeof(int);
Better change this to
int patternCount = i;
so you doesn't have to rely on your x array to have the right size.
You increase iterations inside the p loop, whereas the original C# code does this outside the p loop. Better move the printf and the iteration++ outside the p loop before the PAUSE statement - also I'd remove the PAUSE statement or change it to
if ((iteration % 25) == 0) system("PAUSE");
Even doing all those changes, your program still doesn't terminate using your data set, but the output is more consistent, giving an error oscillating somewhere between 56 and 60.
The last thing you could try is to test the original C# program on this dataset, if it also doesn't terminate, there's something wrong with the algorithm (because your dataset looks correct, see my visualization comment).
globalError will not become zero, it will converge to zero as you said, i.e. it will become very small.
Change your loop like such:
int maxIterations = 1000000; //stop after one million iterations regardless
float maxError = 0.001; //one in thousand points in wrong class
do {
//loop stuff here
//convert to fractional error
globalError = globalError/((float)patternCount);
} while ((globalError > maxError) && (i<maxIterations));
Give maxIterations and maxError values applicable to your problem.