Vector equations in C - c

Im trying to code this function which computes the projection formula
P(x) = x + (c- a dot x)a * (1/|a|^2). Note that x and a are vectors and c is a scalar. Also note that a dot x is the product dot/inner product of a and x. Here is what I have,
double dotProduct(double *q, double *b, int length) {
double runningSum = 0;
for (int index = 0; index < length; index++)
runningSum += q[index] * b[index];
return runningSum;
}
void project(double *x ,int n, double *a, double c)
{ double m_sum = 0.0;
for (int i = 0; i < n; i++) {
m_sum += a[i]*a[i];
}
for ( int j = 0; j < n; j++) {
x[j] = x[j] + (1/ m_sum)* (c - dotProduct(a, x, n))*a[j];
}
}
So my question is how do I create a test file to check the consistency of the project function since it doesn’t return anything. Is my code even right? does not return anything.

Related

I am using wrong indexing in one of the loops but can't figure out which one. I have made the changes which were suggested

#include<stdio.h>
#include<math.h>
#include<stdlib.h>
const int N = 3;
void LUBKSB(double b[], double a[N][N], int N, int *indx)
{
int i, ii, ip, j;
double sum;
ii = 0;
for(i=0;i<N;i++)
{
ip = indx[i];
sum = b[ip];
b[ip] = b[i];
if (ii)
{
for(j = ii;j<i-1;j++)
{
sum = sum - a[i][j] * b[j];
}
}
else if(sum)
{
ii = i;
}
b[i] = sum;
}
for(i=N-1;i>=0;i--)
{
sum = b[i];
for (j = i; j<N;j++)
{
sum = sum - a[i][j] * b[j];
}
b[i] = sum/a[i][i];
}
for (i=0;i<N;i++)
{
printf("b[%d]: %lf \n",i,b[i]);
}
}
void ludecmp(double a[][3], int N)
{
int i, imax, j, k;
double big, dum, sum, temp, d;
double *vv = (double *) malloc(N * sizeof(double));
int *indx = (int *) malloc(N * sizeof(double));
double TINY = 0.000000001;
double b[3] = {2*M_PI,5*M_PI,-8*M_PI};
d = 1.0;
for(i=0;i<N;i++)
{
big = 0.0;
for(j=0;j<N;j++)
{
temp = fabs(a[i][j]);
if (temp > big)
{
big = temp;
}
}
if (big == 0.0)
{
printf("Singular matrix\n");
exit(1);
}
vv[i] = 1.0/big;
}
for(j=0;j<N;j++)
{
for(i=0;i<j-1;i++)
{
sum = a[i][j];
for(int k=0;k<i-1;k++)
{
sum = sum - (a[i][k] * a[k][j]);
}
a[i][j] = sum;
}
big = 0.0;
for(i=j;i<N;i++)
{
sum = a[i][j];
for(k=0;k<j-1;k++)
{
sum = sum - a[i][k] * a[k][j];
}
a[i][j] =sum;
dum = vv[i] * fabs(a[i][j]);
if(dum >= big)
{
big = dum;
imax = i;
}
}
if(j != imax)
{
for(k=0;k<N;k++)
{
dum = a[imax][k];
a[imax][k] = a[j][k];
a[j][k] = dum;
}
d = -d;
vv[imax] = vv[j];
}
indx[j] = imax;
if (a[j][j] == 0)
{
a[j][j] = TINY;
}
if (j != N)
{
dum = 1.0/a[j][j];
for(i = j; i<N; i++)
{
a[i][j] = a[i][j] * dum;
}
}
}
LUBKSB(b,a,N,indx);
free(vv);
free(indx);
}
int main()
{
int N, i, j;
N = 3;
double a[3][3] = { 1, 2, -1, 6, -5, 4, -9, 8, -7};
ludecmp(a,N);
}
I am using these algorithms to find LU decomposition of matrix and trying to find solution A.x = b
Given a N ×N matrix A denoted as {a}N,Ni,j=1, the routine replaces it by the LU
decomposition of a rowwise permutation of itself. “a” and “N” are input. “a” is also output,
modified to apply the LU decomposition; {indxi}N
i=1 is an output vector that records the
row permutation effected by the partial pivoting; “d” is output and adopts ±1 depending on
whether the number of row interchanges was even or odd. This routine is used in combination
with algorithm 2 to solve linear equations or invert a matrix.
Solves the set of N linear equations A . x = b. Matrix {a}
N,N
i,j=1 is actually the
LU decomposition of the original matrix A, obtained from algorithm 1. Vector {indxi}
N
i=1 is
input as the permutation vector returned by algorithm 1. Vector {bi}
N
i=1 is input as the righthand side vector B but returns with the solution vector X. Inputs {a}
N,N
i,j=1, N, and {indxi}
N
i=1
are not modified in this algorithm.
There are a number of problems with your code:
In your for-loops, i <= N should be i < N and i = N should be i = N - 1.
The absolute value of a double is returned by fabs, not abs.
The statement exit should be exit(1) or exit(EXIT_FALILURE).
Two of your functions lack a return statement.
You should also free the memory you have allocated with the function free. When you compile a C program you should also enable all warnings.

Gradient descent returning nan

I need to write a function to get a curve fit of a dataset. The code below is what I have. It attempts to use gradient descent to find polynomial coefficients which best fit the data.
//solves for y using the form y = a + bx + cx^2 ...
double calc_polynomial(int degree, double x, double* coeffs) {
double y = 0;
for (int i = 0; i <= degree; i++)
y += coeffs[i] * pow(x, i);
return y;
}
//find polynomial fit
//returns an array of coefficients degree + 1 long
double* poly_fit(double* x, double* y, int count, int degree, double learningRate, int iterations) {
double* coeffs = malloc(sizeof(double) * (degree + 1));
double* sums = malloc(sizeof(double) * (degree + 1));
for (int i = 0; i <= degree; i++)
coeffs[i] = 0;
for (int i = 0; i < iterations; i++) {
//reset sums each iteration
for (int j = 0; j <= degree; j++)
sums[j] = 0;
//update weights
for (int j = 0; j < count; j++) {
double error = calc_polynomial(degree, x[j], coeffs) - y[j];
//update sums
for (int k = 0; k <= degree; k++)
sums[k] += error * pow(x[j], k);
}
//subtract sums
for (int j = 0; j <= degree; j++)
coeffs[j] -= sums[j] * learningRate;
}
free(sums);
return coeffs;
}
And my testing code:
double x[] = { 0, 1, 2, 3, 4 };
double y[] = { 5, 3, 2, 3, 5 };
int size = sizeof(x) / sizeof(*x);
int degree = 1;
double* coeffs = poly_fit(x, y, size, degree, 0.01, 1000);
for (int i = 0; i <= degree; i++)
printf("%lf\n", coeffs[i]);
The code above works when degree = 1, but anything higher causes the coefficients to come back as nan.
I've also tried replacing
coeffs[j] -= sums[j] * learningRate;
with
coeffs[j] -= (1/count) * sums[j] * learningRate;
but then I get back 0s instead of nan.
Anyone know what I'm doing wrong?
I tried degree = 2, iteration = 10 and got results other than nan (values around a few thousands) Adding one to iteration seems making magnitude of the results larger by about 3 times after that.
From this observation, I guessed that the results are being multiplied by count.
In the expression
coeffs[j] -= (1/count) * sums[j] * learningRate;
Both of 1 and count are integers, so integer division is done in 1/count and it will become zero if count is larger than 1.
Instead of that, you can divide the result of multiplication by count.
coeffs[j] -= sums[j] * learningRate / count;
Another way is using 1.0 (double value) instead of 1.
coeffs[j] -= (1.0/count) * sums[j] * learningRate;
Aside:
A candidate NAN source is adding opposite signed values where one is an infinity. Given OP is using pow(x, k), which grows rapidly, using other techniques help.
Consider a chained multiplication rather than pow(). The result is usually more numerically stable. calc_polynomial() for example:
double calc_polynomial(int degree, double x, double* coeffs) {
double y = 0;
// for (int i = 0; i <= degree; i++)
for (int i = degree; i >= 0; i--)
//y += coeffs[i] * pow(x, i);
y = y*x + coeffs[i];
}
return y;
}
Similar code could be used for the main() body.

problem calculating the inverse of a matrix

I'm trying to calculate the inverse of a square matrix of any rank N x N. I'm using a struct to store the values of the matrix which I can to effectively and I am already able to calculate the determinant. But there must be some issue with the inverse function. This is the code
struct m{
size_t row;
size_t col;
double *data;
};
void inverse(size_t n, struct m *A) /*Calculate the inverse of A */
{
size_t i,j,i_count,j_count, count=0;
double det = determinant(n, A);
size_t id = 0;
double *d;
struct m C; /*The Adjoint matrix */
C.data = malloc(sizeof(double) * n * n);
C.row = n;
C.col = n;
struct m *minor; /*matrices obtained by removing the i row and j column*/
if (!(minor = malloc(n*n*(n+1)*sizeof *minor))) {
perror ("malloc-minor");
exit(-1);
}
if (det == 0){
printf("The matrix is singular\n");
exit(1);
}
for(id=0; id < n*n; id++){
d = minor[id].data = malloc(sizeof(double) * (n-1) * (n-1));
for(count=0; count < n; count++)
{
//Creating array of Minors
i_count = 0;
for(i = 0; i < n; i++)
{
j_count=0;
for(j = 0; j < n; j++)
{
if(j == count)
continue; // don't copy the minor column element
*d = A->data[i * A->col + j];
d++;
j_count++;
}
i_count++;
}
}
}
for(id=0; id < n*n; id++){
for(i=0; i < n; i++){
for(j=0; j < n; j++)
C.data[i * C.col + j] = determinant(n-1,&minor[id]);//Recursive call
}
}
transpose(&C);
scalar_product(1/det, &C);
*A = C;
}
The determinant is calculated recursively with this algorithm:
double determinant(size_t n, struct m *A)
{
size_t i,j,i_count,j_count, count=0;
double det = 0;
if(n < 1)
{
printf("Error\n");
exit(1);
}
if(n==1) return A->data[0];
else if(n==2) return (A->data[0]* A->data[1 * A->col + 1] - A->data[0 + 1] * A->data[1*A->col + 0]);
else{
struct m C;
C.row = A->row-1;
C.col = A->col-1;
C.data = malloc(sizeof(double) * (A->row-1) * (A->col-1));
for(count=0; count < n; count++)
{
//Creating array of Minors
i_count = 0;
for(i = 1; i < n; i++)
{
j_count=0;
for(j = 0; j < n; j++)
{
if(j == count)
continue; // don't copy the minor column element
C.data[i_count * C.col + j_count] = A->data[i * A->col + j];
j_count++;
}
i_count++;
}
det += pow(-1, count) * A->data[count] * determinant(n-1,&C);//Recursive call
}
free(C.data);
return det;
}
}
You can find the complete code here: https://ideone.com/gQRwVu.
Use some other variable in the loop after :
det + =pow(-1,count) * A->data[count] *determinant (n-1,&C)
Your calculation of the inverse doesn't quite correspond to the algorithm described e. g. for Inverse of a Matrix
using Minors, Cofactors and Adjugate, even taken into account that you for now omitted the adjugate and division step. Compare your outermost for loop in inverse() to this working implementation:
double Rdata[(n-1)*(n-1)]; // remaining data values
struct m R = { n-1, n-1, Rdata }; // matrix structure for them
for (count = 0; count < n*n; count++) // Create n*n Matrix of Minors
{
int row = count/n, col = count%n;
for (i_count = i = 0; i < n; i++)
if (i != row) // don't copy the current row
{
for (j_count = j = 0; j < n; j++)
if (j != col) // don't copy the current column
Rdata[i_count*R.col+j_count++] = A->data[i*A->col+j];
i_count++;
}
// transpose by swapping row and column
C.data[col*C.col+row] = pow(-1, row&1 ^ col&1) * determinant(n-1, &R) / det;
}
It yields for the given input data the correct inverse matrix
1 2 -4.5
0 -1 1.5
0 0 0.5
(already transposed and divided by the determinant of the original matrix).
Minor notes:
The *A = C; at the end of inverse() loses the original data pointer of *A.
The formatting function f() is wrong for negative values, since the fraction is also negative in this case. You could write if (fabs(f)<.00001).

Speed up matrix-matrix multiplication using SSE vector instructions

I have some trouble in vectorize some C code using SSE vector instructions. The code which I have to victorize is
#define N 1000
void matrix_mul(int mat1[N][N], int mat2[N][N], int result[N][N])
{
int i, j, k;
for (i = 0; i < N; ++i)
{
for (j = 0; j < N; ++j)
{
for (k = 0; k < N; ++k)
{
result[i][k] += mat1[i][j] * mat2[j][k];
}
}
}
}
Here is what I got so far:
void matrix_mul_sse(int mat1[N][N], int mat2[N][N], int result[N][N])
{
int i, j, k; int* l;
__m128i v1, v2, v3;
v3 = _mm_setzero_si128();
for (i = 0; i < N; ++i)
{
for (j = 0; j < N; j += 4)
{
for (k = 0; k < N; k += 4)
{
v1 = _mm_set1_epi32(mat1[i][j]);
v2 = _mm_loadu_si128((__m128i*)&mat2[j][k]);
v3 = _mm_add_epi32(v3, _mm_mul_epi32(v1, v2));
_mm_storeu_si128((__m128i*)&result[i][k], v3);
v3 = _mm_setzero_si128();
}
}
}
}
After execution I got wrong result. I know that the reason is the loading from memory to v2. I loop through mat1 in row major order so I need to load mat2[0][0], mat2[1][0], mat2[2][0], mat2[3][0].... but what actually loaded is mat2[0][0], mat2[0][1], mat2[0][2], mat2[0][3]... because mat2 has stored in the memory in row major order. I tried to fix this problem but without any improvement.
Can anyone help me please.
Below fixed your implementation:
void matrix_mul_sse(int mat1[N][N], int mat2[N][N], int result[N][N])
{
int i, j, k;
__m128i v1, v2, v3, v4;
for (i = 0; i < N; ++i)
{
for (j = 0; j < N; ++j) // 'j' must be incremented by 1
{
// read mat1 here because it does not use 'k' index
v1 = _mm_set1_epi32(mat1[i][j]);
for (k = 0; k < N; k += 4)
{
v2 = _mm_loadu_si128((const __m128i*)&mat2[j][k]);
// read what's in the result array first as we will need to add it later to our calculations
v3 = _mm_loadu_si128((const __m128i*)&result[i][k]);
// use _mm_mullo_epi32 here instead _mm_mul_epi32 and add it to the previous result
v4 = _mm_add_epi32(v3, _mm_mullo_epi32(v1, v2));
// store the result
_mm_storeu_si128((__m128i*)&result[i][k], v4);
}
}
}
}
In short _mm_mullo_epi32 (requires SSE4.1) produces 4 x int32 results as opposed to _mm_mul_epi32 which does 2 x int64 results. If you cannot use SSE4.1 then have a look at the answer here for an alternative SSE2 solution.
Full description by Intel Intrinsic Guide:
_mm_mullo_epi32: Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store
the low 32 bits of the intermediate integers in dst.
_mm_mul_epi32: Multiply the low 32-bit integers from each packed 64-bit element in a and b, and store the
signed 64-bit results in dst.
I kinda changed around your code to make the addressing explicit [ it helps in this case ].
#define N 100
This is a stub for the vector unit multiple & accumulate operation; you should be able to replace NV with whatever throw your vector unit has, and put the relevant opcodes in here.
#define NV 8
int Vmacc(int *A, int *B) {
int i = 0;
int x = 0;
for (i = 0; i < NV; i++) {
x += *A++ * *B++;
}
return x;
}
This multiply has two notable variations from the norm:
1. It caches the columnar vector into a contiguous one.
2. It attempts to push slices of the multiply accumulate into a vector-like func.
Even without using the vector unit, this takes half the time of naive version just because of better cache/prefetch utilization.
void mm2(int *A, int *B, int n, int *C) {
int c, r;
int stride = 0;
int cache[N];
for (c = 0; c < n; c++) {
/* cache cumn i: */
for (r = 0; r < n; r++) {
cache[r] = B[c + r*n];
}
for (r = 0; r < n; r++) {
int k = 0;
int x = 0;
int *Av = A + r*n;
for (k = 0; k+NV-1 < n; k += NV) {
x += Vmacc(Av+k, cache+k);
}
while (k < n) {
x += Av[k] * cache[k];
k++;
}
C[r*n + c] = x;
}
}
}

Incorrect checksum for free freed object - 2D double array in C

I'm getting the error:
malloc: *** error for object 0x100502048: incorrect checksum for freed
object - object was probably modified after being freed.
The thing is, this error happens randomly. Sometimes it the program executes and I get the answer I am looking for, and sometimes this error pops up.
I am using xcode to debug, and it points to this function definition:
double **Hermite_coeff(double *z, double *output, double *deriv, int n)
{
int i, j;
double **H;
H = calloc(2*n, sizeof(double*)); // <-----Error points to here
for (i = 0; i < 2*n; ++i)
H[i] = calloc((i+1),sizeof(double));
for (i = 0; i < n; ++i)
{
H[2*i][0] = output[i];
H[2*i+1][0] = output[i];
H[2*i+1][1] = deriv[i];
if (i != 0)
{
H[2*i][1] = (H[2*i][0] - H[2*i-1][0])/(z[2*i] - z[2*i-1]);
}
}
for (i = 2; i < 2*n; ++i)
{
for (j = 2; j <= i; j++)
{
H[i][j] = (H[i][j-1] - H[i-1][j-1])/(z[i] - z[i-j]);
}
}
return H;
}
This is the function that generates the double *z.
double *Hermite_z_sequence(double *input, int n)
{
int i;
double *z;
if ((z = calloc(2*n, sizeof(double))) == NULL)
{
printf("Malloc failed in Hermite_z_sequence\n");
return NULL;
}
for (i = 0; i < 2*n; ++i)
{
z[2*i] = input[i];
z[2*i+1] = input[i];
}
return z;
}
This is ultimately what I am trying to run.
double Hermite_interpolation(double *z, double **coeff, int n, double x)
{
int i, j;
double result, sum;
result = coeff[0][0];
for (i = 1; i < 2*n; i++)
{
sum = 1;
for (j = 1; j <= i; j++)
sum *= (x - z[j-1]);
result += (coeff[i][i]*sum);
}
return result;
}
This is how I define my input, output, and deriv:
// Input
double input[] = {0.30, 0.32, 0.35};
// Output
double sin_x[] = {0.29552, 0.31457, 0.34290};
// Derivative of sin_x
double cos_x[] = {0.95534, 0.94924, 0.93937};
My main():
int main(int argc, char **argv)
{
// initializing the given parameters for the assignment
int n;
double actual_output, x, *z, **h_coeff, hermite_result;
double input[] = {0.30, 0.32, 0.35};
double sin_x[] = {0.29552, 0.31457, 0.34290};
double cos_x[] = {0.95534, 0.94924, 0.93937};
n = 3;
x = 0.34;
z = Hermite_z_sequence(input, n);
h_coeff = Hermite_coeff(z, sin_x, cos_x, n);
hermite_result = Hermite_interpolation(z, h_coeff, n, x);
actual_output = sin(x);
printf("Hermite H_5(%.2f) = %.7f\n", x, hermite_result);
printf("Relative error: %.7f\n\n", relative_error(actual_output, hermite_result));
h_coeff = destroy_diagonal_2D_array(h_coeff, 2*n);
free(z);
return 0;
}
And sometimes this shows:
Hermite H_5(0.34) = 0.3334889
Relative error: 0.0000054
And at other times this shows:
malloc: *** error for object 0x1004090e8: incorrect checksum for freed object - object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
(lldb)
Have a look at this, and tell me if it looks right to you:
for (i = 0; i < 2*n; ++i)
{
z[2*i] = input[i];
z[2*i+1] = input[i];
}
Assuming we have
z = calloc(2*n, sizeof(double))
In that loop, you step far over 2*n. In the condition for for, you probably meant to write i < n instead of i < 2*n

Resources