Error Back Propagation gets stuck in a wrong value

Error Back Propagation gets stuck in a wrong value - c

I am currently doing an assignment which requires me to make a neural network of f(x) = x(1-x).
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define ITER 300000
#define L_RATE 0.5
#define N 11
#define I 1
#define J 4
#define K 1
#define SIGMOID(x) (1/(1+exp(-(x))))
So I am currently using 4 hiden nodes(J) and a learning rate of 0.5(L_RATE) and planning to have 11 datas(N) as inputs for training.
double error_v(double o, double t, double h)
{
return -(t-o)*o*(1-o)*h;
}
double error_w(double x, double h, double w, double t, double o)
{
return -x*h*(1-h)*w*(t-o)*o*(1-o);
}
These would be the error gradient for weight of each layers.
int main(void)
{
double x[N][I];
double t[N][K];
double w[J][I+1];
double v[K][J+1];
double net_w[N][J];
double net_v[N][K];
double h[N][J];
double o[N][K];
for(int n=0; n<N; n++)
{
for(int i=0; i<I; i++)
{
x[n][i] = 0.1 * n;
t[n][i] = x[n][i] * (1 - x[n][i]);
}
}
//INITIALIZE WEIGHT
srand(time(NULL));
for(int j=0; j<J; j++)
{
for(int i=0; i<I+1; i++)
{
w[j][i] = (double)rand()/RAND_MAX*2.0-1.0;
printf("%.2f ", w[j][i]);
}
}
for(int k=0; k<K; k++)
{
for(int j=0; j<J+1; j++)
{
v[k][j] = (double)rand()/RAND_MAX*2.0-1.0;
printf("%.2f ", v[k][j]);
}
}
for(int iter=0; iter<ITER; iter++)
{
for(int n=0; n<N; n++)
{
//Initialize net zero
for(int j=0; j<J; j++)
{
net_w[n][j] = 0;
}
for(int k=0; k<K; k++)
{
net_v[n][k] = 0;
}
//Sum up net_w and produce h
for(int j=0; j<J; j++)
{
for(int i=0; i<I; i++)
{
net_w[n][j] += x[n][i] * w[j][i];
}
net_w[n][j] += 1 * w[j][I];
h[n][j] = SIGMOID(net_w[n][j]);
}
//Sum up net_v and produce o
for(int k=0; k<K; k++)
{
for(int j=0; j<J; j++)
{
net_v[n][k] += h[n][k] * v[k][j];
}
net_v[n][k] += 1 * v[k][J];
o[n][k] = SIGMOID(net_v[n][k]);
}
}
So until here, I calculated the first net value by multiplying the input(including one bias) and weight(w) and the second net value by multiplying the 'sigmoided' value of the first net(including one bias) and weight(v).
for(int n=0; n<N; n++)
{
//error_v
for(int k=0; k<K; k++)
{
for(int j=0; j<J; j++)
{
v[k][j] -= L_RATE * error_v(o[n][k], t[n][k], h[n][j]);
}
v[k][J] -= L_RATE * error_v(o[n][k], t[n][k], 1);
}
}
for(int n=0; n<N; n++)
{
//error_w
for(int j=0; j<J; j++)
{
for(int i=0; i<I; i++)
{
for(int k=0; k<K; k++)
{
w[j][i] -= L_RATE * error_w(x[n][i], h[n][j], w[k][j], t[n][k], o[n][k]);
}
}
for(int k=0; k<K; k++)
{
w[j][I] -= L_RATE * error_w(1, h[n][j], w[k][j], t[n][k], o[n][k]);
}
}
}
}
But here's the problem. I corrected the error of the weight by multiplying the learning rate and the error gradient by the following formula.
I thought I had to use 3-for-loop for the second layer and 4-for-loop in respect of each number of input layer, hidden layer, output layer and training data.
printf("INPUT\n");
for(int n=0; n<N; n++)
{
printf("%.2f ", x[n][0]);
}
printf("\n");
printf("OUTPUT\n");
for(int n=0; n<N; n++)
{
printf("%.2f ", o[n][0]);
}
printf("\n");
printf("EXPECTED\n");
for(int n=0; n<N; n++)
{
printf("%.2f ", t[n][0]);
}
printf("\n");
return 0;
}
But when I print out the trained output, It doesn't get close to the expected data but rather gets stuck in a particular value.
I saw many others had been through this kind of problem, and their solution was giving random non zero values to the initial weight or changing the learning rate or number of iterations. I've done this numerous times but It doesn't seem to match my problem. I would be very thankful if someone could tell me what I am doing wrong right now.

In the loop //Sum up net_v and produce o where the v-weighted output from the hidden layer's neurons' activations h is accumulated in net_v, there's a wrong index k:
net_v[n][k] += h[n][k] * v[k][j];
should be
net_v[n][k] += h[n][j] * v[k][j];

Related

Dynamic numeric arrays

How do this in C? Fill the matrix with random numbers.
Mapping the upper half of the matrix to the lower half
mirror symmetrically along the horizontal axis.
I made a function to output a multidimensional array and generate a random number. I am stuck on the next step: I don't know how to map the top half of the matrix to the bottom half mirror symmetrically along the horizontal axis.
const int m = 3, n = 4;
void InputMatrix(int x[m][n])
{
int i, j;
for (i = 0; i < m; i++)
for (j = 0; j < n; j++)
{
printf("Enter [%d][%d]:", i, j);
scanf("%d", &x[i][j]);
}
}
void PrintMatrix(int x[m][n])
{
int i, j;
printf("\n");
for (i = 0; i < m; i++)
{
for (j = 0; j < n; j++)
printf("%d\t", x[i][j]);
printf("\n");
}
}
void FillMatrix(int x[m][n])
{
int i, j;
for (i = 0; i < m; i++)
for (j = 0; j < n; j++)
{
x[i][j] = rand() % 100 - 50;
}
}

Expression must have arithmetic type issue

I need to multiply two square matrixes A and B 15x15.
Unfortunately, I'm getting this kind of error.
I know the problem is in pointers while calculating matrix C.
C[i][j] += *(A + k) * *(B + k)
I hope you can explain me what's wrong. I'm a beginner xD.
Thank you in advance.
#include <stdio.h>
#define N 15
#define _CRT_SECURE_NO_WARNINGS
int main() {
int A[N][N];
int B[N][N];
int C[N][N];
printf("Input matrix A.\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("Enter your element:\n");
scanf_s("%d", &A[i][j]);
}
printf("\n");
}
printf("Input matrix B.\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("Enter your element:\n");
scanf_s("%d", &B[i][j]);
}
printf("\n");
}
printf("Matrix A.\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("%d\t", A[i][j]);
}
printf("\n");
}
printf("Matrix B.\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("%d\t", B[i][j]);
}
printf("\n");
}
for (int i = 0; i < 15; i++) {
for (int j = 0; j < 15; j++) {
C[i][j] = 0;
for (int k = 0; k < 14; k++) {
C[i][j] += *(A + k) * *(B + k);
k++;
}
}
}
printf("Your result:\n");
printf("Matrix C.\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("%d\t", C[i][j]);
}
printf("\n");
}
return 0;
}

The problem in the multiplication is that A+k and B+k have type int (*)[15] which means dereferencing it once only makes a pointer out of them; furthermore, you need to take row and column items individually, which means A[i][k] and B[k][j], right? (also, there's no point on using confusing syntax, as the underlying operation is exactly the same).
Here's a fixed and improved version:
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#define N 15
/* Improvement 1 (type abstraction) */
typedef int NxN_int_matrix[N][N];
/* Improvement 2 (input function & wrapper) */
#define input_matrix(var) input_matrix_ex((var), #var)
static void input_matrix_ex(NxN_int_matrix dst, char *name)
{
printf("Input matrix %s.\n", name);
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
/* Improvement 3 (nicer prompt) */
printf("%s[%2d][%2d]: ", name, i, j);
fflush(stdout);
scanf_s("%d", &dst[i][j]);
}
}
printf("\n");
}
/* Improvement 4 (print function) */
#define print_matrix(var) print_matrix_ex(#var, (var))
static void print_matrix_ex(char *name, NxN_int_matrix M)
{
printf("Matrix %s.\n", name);
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("%d\t", M[i][j]);
}
printf("\n");
}
}
/* Improvement 5 (move multiplication to a function too, and fix it) */
static void mult_matrix(NxN_int_matrix dst, NxN_int_matrix a, NxN_int_matrix b)
{
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
/* Improvement 6 (don't write out intermediate values) */
int tmp = 0;
for (int k = 0; k < N; k++)
tmp += a[i][k] * b[k][j];
dst[i][j] = tmp;
}
}
}
int main()
{
NxN_int_matrix A, B, C;
input_matrix(A);
input_matrix(B);
print_matrix(A);
print_matrix(B);
mult_matrix(C, A, B);
printf("Your result:\n");
print_matrix(C);
return 0;
}
/* Possible further improvements:
* - using a transposed B might make multiplication faster
*/

How to raise a matrix to a power with double pointers in C

I am trying to raise a matrix to a power, using pointers but there is a mistake in my code I can't find.
#include <stdlib.h>
#include <stdio.h>
#include <conio.h>
>
int **alloc(int r, int c) {
int **d;
d = (int **)malloc(r * sizeof(int *));
for (int i = 0; i < r; i++) {
d[i] = (int *)malloc(c * sizeof(int));
}
return d;
void input(int **A, int r, int c) {
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++) {
printf("[%d][%d]=", i, j);
scanf_s("%d", &A[i][j]);
}
}
}
void output(int **A, int r, int c) {
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++) {
printf("%d ", A[i][j]);
}
printf("\n");
}
}
void power(int **A,int**D, int r, int c,int p) {
int i, j,k;
for (i = 0; i < r; i++) {
for (j = 0; j < c; j++) {
D[i][j] = A[i][j];
}
}
while (p) {
// this is the matrix multiplication, where I attempt to multiply my matrix A with itself, and store the result in D, which initially started as A's copy, and p is the power I'm raising it to.
for (i = 0; i < r; i++) {
for (j = 0; j < c; j++) {
for (k = 0; k < c; k++)
D[i][j] = D[i][j] + A[i][k] * D[k][j];
}
}
p--;
}
}
void main() {
int r, c;
int **A, **D;
printf("rows A: ");
scanf_s("%d", &r);
printf("columns A: ");
scanf_s("%d", &c);
A = alloc(r, c);
printf("\nValues of A:\n");
input(A, r, c);
printf("\nMatrIX A is:\n");
output(A, r, c);
D = alloc(r, c);
printf("input the value you want to raise your matrix to: ");
int p;
scanf_s("%d", &p);
power(A, D, r, c, p);
printf("\nMatrix raised to said power is:\n");
output(D, r, c);
_getch();
}
When I input the rows and columns as 2 each, and input all values in the matrix as 1 and raise it to the power of 3, my answer should be
4 4
4 4
instead of
72 72
232 232
What is wrong in my code? If I were to print the D matrix before the multiplication, it would print it correctly, as:
1 1
1 1

Your two-step allocation looks okay, except that you don't free the memory after you're done. Your problem is in how you multiply the matrix:
Raising a matrix to a power involves multiplying it to itself. You can only do that if the matrix is square. You can replace all occurrences of rows r and columns c with a single dimension n.
When you do the actual multiplication:
D[i][j] = D[i][j] + A[i][k] * D[k][j];
you assign to D and read from it at the same time. Subsequent calculations (of the same multiplication) will see a changed value of D[i][j]. You will need a temporary "scratch" matrix.
Your code multiplies once too many. Also, Raising a matrix to the power of zero should yield the identity matrix.
Here's how your power function could look like:
void power(int **A, int **D, int n, int p)
{
int i, j,k;
// assign identity matrix to result D
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
D[i][j] = (i == j);
}
}
// create a scratch matrix
int **tmp = alloc(n);
while (p-- > 0) {
// multiply [tmp] = [A] * [D]
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
tmp[i][j] = 0;
for (k = 0; k < n; k++)
tmp[i][j] += A[i][k] * D[k][j];
}
}
// copy [D] = [tmp]
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
D[i][j] = tmp[i][j];
}
}
}
// TODO: clean up the scratch matrix
}

C - Two dimensional matrix, add in a shape of two symmetrical up-down triangles (almost like X)

The objective: Add only the pieces of the matrix that are part of a full X (upper and lower triangle).
1 1 1
0 1 0
1 1 1
Like this, middle one should add only once.
I can't add the lower triangle properly. Help much appreciated :)
void write(int niz[20][20], int n){
int i, j;
for(i=0; i<n; i++){
for(j=0; j<n; j++){
scanf("%d", &niz[i][j]);
}
}
}
void x(int niz[20][20], int n){
//Upper triangle
int i, j, pr=n, suma=0;
for(i=0; i<n/2 + n%2; i++,pr--){
for(j=i; j<pr; j++){
suma += niz[i][j];
}
}
printf("%d\n",suma);
//Lower triangle
pr = n;
for(i=n; i>n/2 + n%2; i--,pr--){
printf("%d",pr);
for(j=n-i; j<pr; j++){
printf("\n%d", niz[i][j]);
suma += niz[i][j];
}
}
printf("%d", suma);
}
int main()
{
int n;
printf("Matrix dimensions: ");
scanf("%d", &n);
printf("Numbers in the matrix: \n");
int niz[n][n];
write(niz, n);
x(niz, n);
}

Instead of writing separate functions for each lower, upper & diagonals you can do all together with little tricks, but it works only if row == column and thats's what you want I think.
int main() {
/* it can be anything like a[3][3] or a[7][7] and elements can
be all one or all 2 or any number */
int arr[5][5] = { {1,1,1,1,1},
{0,0,1,0,0},
{0,0,1,0,0},
{0,0,1,0,0},
{1,1,1,1,1} };
int row = sizeof(arr)/sizeof(arr[0]);
int col = sizeof(arr[0])/sizeof(arr[0][0]);
int sum = 0;
for(int index = 0; index < row; index++) {
for(int sub_index = 0; sub_index < col; sub_index++) {
if(index == 0 || (index == row-1) || sub_index == row/2)
sum = sum + arr[index][sub_index];
}
}
printf("sum = %d \n",sum);
return 0;
}
Its fine if it helps you otherwise write your own logic.

There are some mismatches between the declarations and types of the arguments passed to OP's function. While in main they declare a variable length array, named niz:
int n;
// ...
int niz[n][n];
The posted signature of both write and x requires an int niz(*)[20]. It should be changed to:
void write(int n, int niz[n][n]);
// this ^^^ may be a size_t, just remember to write it before the array
About the pattern you have to follow for the sum, I can't say to fully understand your requirement, but if I'm not completely wrong, it could be done this way:
#include <stdio.h>
#include <stdlib.h>
void read_matrix(int n, int niz[n][n])
{
for(int i=0; i<n; i++) {
for(int j=0; j<n; j++) {
scanf("%d", &niz[i][j]);
}
}
}
// Separate the calculation from the printing
int hourglass_sum(int n, int niz[n][n])
{
int sum = 0;
int i = 0;
//Upper triangle
for(int k = n; i < k; ++i, --k) {
for(int j = i; j < k; ++j) {
sum += niz[i][j];
}
}
//Lower triangle
for(int k = i + 1; i < n; ++i, ++k) {
for(int j = n - i - 1; j < k; ++j) {
sum += niz[i][j];
}
}
return sum;
}
int main()
{
int n;
printf("Matrix dimensions: ");
scanf("%d", &n);
int niz[n][n];
read_matrix(n, niz);
printf("\nSum: %d", hourglass_sum(n, niz));
}

Varying Results When Displaying Result Matrix

I am currently working on a code that calculates the following equation:
W = (XT * X)^-1 * XT * Y
with XT being the transpose of X and ^-1 the inverse of the product XT*X.
To do this, I am passing a text file with a matrix to the program, and then performing operations on it to obtain the proper matrix formats to use in the equation. Although the code works some times, there are instances when I will run the code and receive the proper answer, but others when a random number will be produced.
For example, when using the matrix [first integer = columns-1, second integer = rows]:
4
4
4.000000,3.000000,2.000000,3.000000,200.000000
5.000000,2.000000,1.000000,7.000000,300.000000
1.000000,4.000000,2.000000,1.000000,500.000000
8.000000,8.000000,9.000000,3.000000,200.000000
it will produce an answer for XT * (XT * X)^-1 as:
0.497617 -0.166646 0.061712 -0.137570
-61.086998 -0.258283 -0.340935 -0.064228
0.186411 -0.083895 0.285920 -0.082587
-0.722773 0.207515 -0.009408 0.238550
-0.579552 0.345476 0.154362 0.055048
and the numbers will not remain the same through each test run. It does this with this result matrix multiplied by Y [the last column in the original matrix] as well. Below is a sample of the code I have written thus far:
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char* argv[]){
if(argc < 2){
printf("error.");
return 0;
}
FILE *fptrain = fopen(argv[1], "r");
int row, col, i, j;
fscanf(fptrain, "%d", &col);
col = col+1;
fscanf(fptrain, "%d", &row);
char ch;
//creates the original X and Y matrix
float trainX[row][col];
float trainY[row][1];
for(i=0; i<row; i++)
{
trainX[i][0] = 1.000000;
for(j=1; j<col; j++)
{
fscanf(fptrain, "%f%c", &trainX[i][j], &ch);
}
fscanf(fptrain, "%f%c", &trainY[i][0], &ch);
}
//creates the X transposed matrix
float trainXtrans[col][row];
for(i=0; i<row; i++)
{
for(j=0; j<col; j++)
{
trainXtrans[j][i] = trainX[i][j];
}
}
//multiplies X and X transposed
float trainXtemp[row][row];
int s;
int num=0;
for(i=0; i<row; i++)
{
for(j=0; j<row; j++)
{
for(s=0; s<col; s++)
{
num = num + trainX[i][s]*trainXtrans[s][j];
}
trainXtemp[i][j] = num;
num =0;
}
}
//finds the identity matrix of X times X transposed
float trainXinden[row][row*2];
for(i=0; i<row; i++)
{
for(j=0; j<row; j++)
{
trainXinden[i][j] = trainXtemp[i][j];
}
for(j=row; j<row*2; j++)
{
if(j==i+row)
{
trainXinden[i][j] = 1;
}
else{
trainXinden[i][j] = 0;
}
}
}
//finds the inverse of X times X transposed through Gauss Jordan Elimination
int k;
float divscalar;
for(i=0; i<row; i++)
{
divscalar = trainXinden[i][i];
for(j=0; j<row*2; j++)
{
trainXinden[i][j] = trainXinden[i][j]/divscalar;
}
for(k=0; k<row; k++)
{
if(i!=k)
{
float subscalar = trainXinden[k][i];
for(j=0; j<row*2; j++)
{
trainXinden[k][j] = trainXinden[k][j] - subscalar*trainXinden[i][j];
}
}
}
}
//copies over the result of gauss jordan elimination
float trainXinverse[row][row];
for(i=0; i<row; i++)
{
for(j=0; j<row; j++)
{
trainXinverse[i][j] = trainXinden[i][j+row];
}
}
//multiplies (X times X transpose) inverse by (X transposed)
float trainXinvXt[col][row];
for(i=0; i<col; i++)
{
for(j=0; j<row; j++)
{
for(s=0; s<row; s++)
{
trainXinvXt[i][j] += trainXtrans[i][s]*trainXinverse[s][j];
}
}
}
//multiples (trainXinvXt) by Y
float weight[row][1];
for(i=0; i<col; i++)
{
for(s=0; s<col-1; s++)
{
weight[i][0] += trainXinvXt[i][s]*trainY[s][0];
}
}
return 0;
}
is this perhaps a memory issue or is my Gauss-Jordan Elimination method throwing something off?