Cost function not decreasing in gradient descent implementation - c

I am trying implemented batch gradient descent in C language. The problem is, my cost function increases dramatically in every turn and I am not able to understand what is wrong. I checked my code several times and it seems to me that I coded exactly the formulas. Do you have any suggestions or ideas what might be the wrong in the implementation?
My data set is here: https://archive.ics.uci.edu/ml/datasets/Housing
And I reference these slides for the algorithm (I googled this): http://asv.informatik.uni-leipzig.de/uploads/document/file_link/527/TMI04.2_linear_regression.pdf
I read the data set correctly into the main memory. Below part shows how I store the data set information in main memory. It is straight-forward.
//Definitions
#define NUM_OF_ATTRIBUTES 13
#define NUM_OF_SETS 506
#define LEARNING_RATE 0.07
//Data holder
struct data_set_s
{
double x_val[NUM_OF_SETS][NUM_OF_ATTRIBUTES + 1];
double y_val[NUM_OF_SETS];
double teta_val[NUM_OF_ATTRIBUTES + 1];
};
//RAM
struct data_set_s data_set;
Teta values are initialized to 0 and x0 values are initialized to 1.
Below section is the hypothesis function, which is the standart polynomial function.
double perform_hypothesis_a(unsigned short set_index)
{
double result;
int i;
result = 0;
for(i = 0; i < NUM_OF_ATTRIBUTES + 1; i++)
result += data_set.teta_val[i] * data_set.x_val[set_index][i];
return result;
}
Below section is the cost function.
double perform_simplified_cost_func(double (*hypothesis_func)(unsigned short))
{
double result, val;
int i;
result = 0;
for(i = 0; i < NUM_OF_SETS; i++)
{
val = hypothesis_func(i) - data_set.y_val[i];
result += pow(val, 2);
}
result = result / (double)(2 * NUM_OF_SETS);
return result;
}
Below section is the gradient descent function.
double perform_simplified_gradient_descent(double (*hypothesis_func)(unsigned short))
{
double temp_teta_val[NUM_OF_ATTRIBUTES + 1], summation, val;
int i, j, k;
for(i = 0; i < NUM_OF_ATTRIBUTES + 1; i++)
temp_teta_val[i] = 0;
for(i = 0; i < 10; i++) //assume this is "while not converged"
{
for(j = 0; j < NUM_OF_ATTRIBUTES + 1; j++)
{
summation = 0;
for(k = 0; k < NUM_OF_SETS; k++)
{
summation += (hypothesis_func(k) - data_set.y_val[k]) * data_set.x_val[k][j];
}
val = ((double)LEARNING_RATE * summation) / NUM_OF_SETS);
temp_teta_val[j] = data_set.teta_val[j] - val;
}
for(j = 0; j < NUM_OF_ATTRIBUTES + 1; j++)
{
data_set.teta_val[j] = temp_teta_val[j];
}
printf("%lg\n ", perform_simplified_cost_func(hypothesis_func));
}
return 1;
}
While it seems correct to me, when I print the cost function at the end of the every gradient descent, it goes like: 1.09104e+011, 5.234e+019, 2.51262e+028, 1.20621e+037...

Related

As a result of processing arrays -nan(ind)

I am writing a program that creates arrays of a given length and manipulates them. You cannot use other libraries.
First, an array M1 of length N is formed, after which an array M2 of length N is formed/2.
In the M1 array, the division by Pi operation is applied to each element, followed by elevation to the third power.
Then, in the M2 array, each element is alternately added to the previous one, and the tangent modulus operation is applied to the result of addition.
After that, exponentiation is applied to all elements of the M1 and M2 array with the same indexes and the resulting array is sorted by dwarf sorting.
And at the end, the sum of the sines of the elements of the M2 array is calculated, which, when divided by the minimum non-zero element of the M2 array, give an even number.
The problem is that the result X gives is -nan(ind). I can't figure out exactly where the error is.
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
const int A = 441;
const double PI = 3.1415926535897931159979635;
inline void dwarf_sort(double* array, int size) {
size_t i = 1;
while (i < size) {
if (i == 0) {
i = 1;
}
if (array[i - 1] <= array[i]) {
++i;
}
else
{
long tmp = array[i];
array[i] = array[i - 1];
array[i - 1] = tmp;
--i;
}
}
}
inline double reduce(double* array, int size) {
size_t i;
double min = RAND_MAX, sum = 0;
for (i = 0; i < size; ++i) {
if (array[i] < min && array[i] != 0) {
min = array[i];
}
}
for (i = 0; i < size; ++i) {
if ((int)(array[i] / min) % 2 == 0) {
sum += sin(array[i]);
}
}
return sum;
}
int main(int argc, char* argv[])
{
int i, N, j;
double* M1 = NULL, * M2 = NULL, * M2_copy = NULL;
double X;
unsigned int seed = 0;
N = atoi(argv[1]); /* N равен первому параметру командной строки */
M1 = malloc(N * sizeof(double));
M2 = malloc(N / 2 * sizeof(double));
M2_copy = malloc(N / 2 * sizeof(double));
for (i = 0; i < 100; i++)
{
seed = i;
srand(i);
/*generate*/
for (j = 0; j < N; ++j) {
M1[j] = (rand_r(&seed) % A) + 1;
}
for (j = 0; j < N / 2; ++j) {
M2[j] = (rand_r(&seed) % (10 * A)) + 1;
}
/*map*/
for (j = 0; j < N; ++j)
{
M1[j] = pow(M1[j] / PI, 3);
}
for (j = 0; j < N / 2; ++j) {
M2_copy[j] = M2[j];
}
M2[0] = fabs(tan(M2_copy[0]));
for (j = 0; j < N / 2; ++j) {
M2[j] = fabs(tan(M2[j] + M2_copy[j]));
}
/*merge*/
for (j = 0; j < N / 2; ++j) {
M2[j] = pow(M1[j], M2[j]);
}
/*sort*/
dwarf_sort(M2, N / 2);
/*sort*/
X = reduce(M2, N / 2);
}
printf("\nN=%d.\n", N);
printf("X=%f\n", X);
return 0;
}
Knowledgeable people, does anyone see where my mistake is? I think I'm putting the wrong data types to the variables, but I still can't solve the problem.
Replace the /* merge */ part with this:
/*merge*/
for (j = 0; j < N / 2; ++j) {
printf("%f %f ", M1[j], M2[j]);
M2[j] = pow(M1[j], M2[j]);
printf("%f\n", M2[j]);
}
This will print the values and the results of the pow operation. You'll see that some of these values are huge resulting in an capacity overflow of double.
Something like pow(593419.97, 31.80) will not end well.

Can't figure out what's wrong with my code for a HackerRank problem in C

I'm sorry to ask help for a HackerRank problem here, I know it's not really the right place but nobody is answering me on HackerRank. Also, I'm new in C, so don't be to rude please.
Problem's description:
You are given n triangles, specifically, their sides a, b and c. Print them in the same style but sorted by their areas from the smallest one to the largest one. It is guaranteed that all the areas are different.
Link to the problem : https://www.hackerrank.com/challenges/small-triangles-large-triangles/problem
We can only edit the sort_by_area function.
First of all, I didn't calculate the triangles' area, I've just calculated the perimeter of each triangle, because the formula is simpler to read and to execute. Normally, that doesn't change anything for the result since a bigger perimeter means a bigger area. Tell me if I'm wrong.
The problem is that I have unexpected results: there's numbers on a line from my output that I really don't know from where they come. See:
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct {
int a;
int b;
int c;
} triangle;
void sort_by_area(triangle *tr, int n) {
// Array for storing the perimeter.
int *size = malloc(100 * sizeof(*size));
// Adding perimeters in size array.
for (int i = 0; i < n; i++) {
size[i] = tr[i].a + tr[i].b + tr[i].c;
}
// Sort.
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (size[j] > size[j + 1]) {
// Sort in size array.
int temp = size[j];
size[j] = size[j + 1];
size[j + 1] = temp;
// Sort in tr array.
temp = tr[j].a;
tr[j].a = tr[j + 1].a;
tr[j + 1].a = temp;
temp = tr[j].b;
tr[j].b = tr[j + 1].b;
tr[j + 1].b = temp;
temp = tr[j].c;
tr[j].c = tr[j + 1].c;
tr[j + 1].c = temp;
}
}
}
}
int main() {
int n;
scanf("%d", &n);
triangle *tr = malloc(n * sizeof(triangle));
for (int i = 0; i < n; i++) {
scanf("%d%d%d", &tr[i].a, &tr[i].b, &tr[i].c);
}
sort_by_area(tr, n);
for (int i = 0; i < n; i++) {
printf("%d %d %d\n", tr[i].a, tr[i].b, tr[i].c);
}
return 0;
}
Input:
3
7 24 25
5 12 13
3 4 5
Output:
0 417 0 // Unexpected results on this line.
3 4 5
5 12 13
Expected output:
3 4 5
5 12 13
7 24 25
It seems that an error occurs from the 7 24 25 triangle, but for me, my code seems to be good.... Can you help to find out what's wrong ? I really want to understand before going to another problem.
The assumption that a greater parameter implies a greater area is incorrect. Why? Imagine an isosceles triangle with a base of 1000 units and a height of 1e-9 units. The area is minuscule, compared to an equilateral triangle with unit length whereas the former has a huge perimeter (~2000 units) compared to the latter (3 units). That's just an (extreme) example to convey the flaw in your assumption.
I'd suggest you roll up your own area function. It's even mentioned on the problem page to use Heron's formula. Since it's just to be used in the comparison, then we don't need the exact area but an indicative area. So something like
double area(triangle const* tr) {
if(tr) {
double semiPerimeter = (tr->a + tr->b + tr->c)/2.0;
return semiPerimeter* (semiPerimeter - tr->a) * (semiPerimeter - tr->b) * (semiPerimeter - tr->c);
} else {
return 0;
}
}
Where we don't really need to calculate the square root since we just need to compare the areas across triangles and comparing the square of areas across triangles should be fine.
After this, it's just a matter of plugging this into whatever you did, after correcting the inner j loop to run only till n-1 (as the other answer has also explained)
void sort_by_area(triangle* tr, int n) {
/**
* Sort an array a of the length n
*/
double areaArr[n];
for(size_t i = 0; i < n; ++i) {
areaArr[i] = area(&tr[i]);
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < n - 1; j++) {
if (areaArr[j] > areaArr[j + 1]) {
// Sort in area array.
int temp = areaArr[j];
areaArr[j] = areaArr[j + 1];
areaArr[j + 1] = temp;
// Sort in tr array.
triangle tmp = tr[j];
tr[j] = tr[j + 1];
tr[j + 1] = tmp;
}
}
}
}
You could directly use qsort too here since the problem doesn't prohibit using standard functions, something like:
int qsortCompare(void const* a, void const* b) {
triangle const* trA = a;
triangle const* trB = b;
if(trA && trB) {
double areaA = area(trA);
double areaB = area(trB);
return (areaA < areaB) ? -1 :
((areaA > areaB)? 1: 0);
}
return 0;
}
void sort_by_area(triangle* tr, int n) {
qsort(tr, n, sizeof(triangle), &qsortCompare);
}
Also, don't be restricted to add functions in the problem solution. The actual driver code only calls sort_by_area() but you can write other functions in the solution and call them from sort_by_area().
The inner loop does not need to run till n, only till n-1
for (int j = 0; j < n - 1; j++)
Because when j == n, then you are comparing with random junk outside of your respective arrays by accessing size[j+1] and tr[j+1].
Also, when swapping, you don't need to copy the structure members one-by-one. You can simply do:
// Sort in tr array.
triangle tmp = tr[j];
tr[j] = tr[j + 1];
tr[j + 1] = tmp;
Edit: As #CiaPan pointed out:
You have a memory leak. You need to call free() after you are done with using the malloc'd memory.
You are not allocating the right amount of memory. If you are passed more than 100 triangles, your code might behave weirdly or randomly crash.
int *size = malloc(n* sizeof(*size));
Full code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct {
int a;
int b;
int c;
} triangle;
void sort_by_area(triangle *tr, int n) {
// Array for storing the perimeter.
int *size = malloc(n* sizeof(*size));
// Adding perimeters in size array.
for (int i = 0; i < n; i++) {
size[i] = tr[i].a + tr[i].b + tr[i].c;
}
// Sort.
for (int i = 0; i < n; i++) {
for (int j = 0; j < n - 1; j++) {
if (size[j] > size[j + 1]) {
// Sort in size array.
int temp = size[j];
size[j] = size[j + 1];
size[j + 1] = temp;
// Sort in tr array.
triangle tmp = tr[j];
tr[j] = tr[j + 1];
tr[j + 1] = tmp;
}
}
}
}
int main() {
int n;
scanf("%d", &n);
triangle *tr = malloc(n * sizeof(triangle));
for (int i = 0; i < n; i++) {
scanf("%d%d%d", &tr[i].a, &tr[i].b, &tr[i].c);
}
sort_by_area(tr, n);
for (int i = 0; i < n; i++) {
printf("%d %d %d\n", tr[i].a, tr[i].b, tr[i].c);
}
return 0;
}

I am trying to improve the performance speed of my cross-correlation algorithm. What things can I do to make my C code run faster?

I created a cross-correlation algorithm, and I am trying to maximize its performance by reducing the time it takes for it to run. First of all, I reduced the number of function calls within the "crossCorrelationV2" function. Second, I created several macros at the top of the program for constants. Third, I reduced the number of loops that are inside the "crossCorrelationV2" function. The code that you see is the most recent code that I have.
Are there any other methods I can use to try and reduce the processing time of my code?
Let's assume that I am only focused on the functions "crossCorrelationV2" and "createAnalyzingWave".
I would be glad for any advice, whether in general about programming or pertaining to those two specific functions; I am a beginner programmer. Thanks.
#include <stdio.h>
#include <stdlib.h>
#define ARRAYSIZE 4096
#define PULSESNUMBER 16
#define DATAFREQ 1300
// Print the contents of the array onto the console.
void printArray(double array[], int size){
int k;
for (k = 0; k < size; k++){
printf("%lf ", array[k]);
}
printf("\n");
}
// Creates analyzing square wave. This square wave has unity (1) magnitude.
// The number of high values in each period is determined by high values = (analyzingT/2) / time increment
void createAnalyzingWave(double analyzingFreq, double wave[]){
int highValues = (1 / analyzingFreq) * 0.5 / ((PULSESNUMBER * (1 / DATAFREQ) / ARRAYSIZE));
int counter = 0;
int p;
for(p = 1; p <= ARRAYSIZE; p++){
if ((counter % 2) == 0){
wave[p - 1] = 1;
} else{
wave[p - 1] = 0;
}
if (p % highValues == 0){
counter++;
}
}
}
// Creates data square wave (for testing purposes, for the real implementation actual ADC data will be used). This
// square wave has unity magnitude.
// The number of high values in each period is determined by high values = array size / (2 * number of pulses)
void createDataWave(double wave[]){
int highValues = ARRAYSIZE / (2 * PULSESNUMBER);
int counter = 0;
int p;
for(p = 0; p < ARRAYSIZE; p++){
if ((counter % 2) == 0){
wave[p] = 1;
} else{
wave[p] = 0;
}
if ((p + 1) % highValues == 0){
counter++;
}
}
}
// Finds the average of all the values inside an array
double arrayAverage(double array[], int size){
int i;
double sum = 0;
// Same thing as for(i = 0; i < arraySize; i++)
for(i = size; i--; ){
sum = array[i] + sum;
}
return sum / size;
}
// Cross-Correlation algorithm
double crossCorrelationV2(double dataWave[], double analyzingWave[]){
int bigArraySize = (2 * ARRAYSIZE) - 1;
// Expand analyzing array into array of size 2arraySize-1
int lastArrayIndex = ARRAYSIZE - 1;
int lastBigArrayIndex = 2 * ARRAYSIZE - 2; //bigArraySize - 1; //2 * arraySize - 2;
double bigAnalyzingArray[bigArraySize];
int i;
int b;
// Set first few elements of the array equal to analyzingWave
// Set remainder of big analyzing array to 0
for(i = 0; i < ARRAYSIZE; i++){
bigAnalyzingArray[i] = analyzingWave[i];
bigAnalyzingArray[i + ARRAYSIZE] = 0;
}
double maxCorrelationValue = 0;
double currentCorrelationValue;
// "Beginning" of correlation algorithm proper
for(i = 0; i < bigArraySize; i++){
currentCorrelationValue = 0;
for(b = lastBigArrayIndex; b > 0; b--){
if (b >= lastArrayIndex){
currentCorrelationValue = dataWave[b - lastBigArrayIndex / 2] * bigAnalyzingArray[b] + currentCorrelationValue;
}
bigAnalyzingArray[b] = bigAnalyzingArray[b - 1];
}
bigAnalyzingArray[0] = 0;
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
}
return maxCorrelationValue;
}
int main(){
int samplesNumber = 25;
double analyzingFreq = 1300;
double analyzingWave[ARRAYSIZE];
double dataWave[ARRAYSIZE];
createAnalyzingWave(analyzingFreq, analyzingWave);
//createDataWave(arraySize, pulsesNumber, dataWave);
double maximumCorrelationArray[samplesNumber];
int i;
for(i = 0; i < samplesNumber; i++){
createDataWave(dataWave);
maximumCorrelationArray[i] = crossCorrelationV2(dataWave, analyzingWave);
}
printf("Average of the array values: %lf\n", arrayAverage(maximumCorrelationArray, samplesNumber));
return 0;
}
The first point is that you are explicitly shifting the analizingData array, this way you are required twice as much memory and moving the items is about 50% of your time. In a test here using crossCorrelationV2 takes 4.1 seconds, with the implementation crossCorrelationV3 it runs in ~2.0 seconds.
The next thing is that you are spending time multiplying by zero on the padded array, removing that, and also removing the padding, and simplifying the indices we end with crossCorrelationV4 that makes the program to run in ~1.0 second.
// Cross-Correlation algorithm
double crossCorrelationV3(double dataWave[], double analyzingWave[]){
int bigArraySize = (2 * ARRAYSIZE) - 1;
// Expand analyzing array into array of size 2arraySize-1
int lastArrayIndex = ARRAYSIZE - 1;
int lastBigArrayIndex = 2 * ARRAYSIZE - 2; //bigArraySize - 1; //2 * arraySize - 2;
double bigAnalyzingArray[bigArraySize];
int i;
int b;
// Set first few elements of the array equal to analyzingWave
// Set remainder of big analyzing array to 0
for(i = 0; i < ARRAYSIZE; i++){
bigAnalyzingArray[i] = analyzingWave[i];
bigAnalyzingArray[i + ARRAYSIZE] = 0;
}
double maxCorrelationValue = 0;
double currentCorrelationValue;
// "Beginning" of correlation algorithm proper
for(i = 0; i < bigArraySize; i++){
currentCorrelationValue = 0;
// Instead of checking if b >= lastArrayIndex inside the loop I use it as
// a stopping condition.
for(b = lastBigArrayIndex; b >= lastArrayIndex; b--){
// instead of shifting bitAnalizing[b] = bigAnalyzingArray[b-1] every iteration
// I simply use bigAnalizingArray[b-i]
currentCorrelationValue = dataWave[b - lastBigArrayIndex / 2] * bigAnalyzingArray[b - i] + currentCorrelationValue;
}
bigAnalyzingArray[0] = 0;
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
}
return maxCorrelationValue;
}
// Cross-Correlation algorithm
double crossCorrelationV4(double dataWave[], double analyzingWave[]){
int bigArraySize = (2 * ARRAYSIZE) - 1;
// Expand analyzing array into array of size 2arraySize-1
int lastArrayIndex = ARRAYSIZE - 1;
int lastBigArrayIndex = 2 * ARRAYSIZE - 2; //bigArraySize - 1; //2 * arraySize - 2;
// I will not allocate the bigAnalizingArray here
// double bigAnalyzingArray[bigArraySize];
int i;
int b;
// I will not copy the analizingWave to bigAnalyzingArray
// for(i = 0; i < ARRAYSIZE; i++){
// bigAnalyzingArray[i] = analyzingWave[i];
// bigAnalyzingArray[i + ARRAYSIZE] = 0;
// }
double maxCorrelationValue = 0;
double currentCorrelationValue;
// Compute the correlation by symmetric paris
// the idea here is to simplify the indices of the inner loops since
// they are computed more times.
for(i = 0; i < lastArrayIndex; i++){
currentCorrelationValue = 0;
for(b = lastArrayIndex - i; b >= 0; b--){
// instead of shifting bitAnalizing[b] = bigAnalyzingArray[b-1] every iteration
// I simply use bigAnalizingArray[b-i]
currentCorrelationValue += dataWave[b] * analyzingWave[b + i];
}
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
if(i != 0){
currentCorrelationValue = 0;
// Correlate shifting to the other side
for(b = lastArrayIndex - i; b >= 0; b--){
// instead of shifting bitAnalizing[b] = bigAnalyzingArray[b-1] every iteration
// I simply use bigAnalizingArray[b-i]
currentCorrelationValue += dataWave[b + i] * analyzingWave[b];
}
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
}
}
return maxCorrelationValue;
}
If you want more optimization you can unroll some iterations of the loop and enable some compiler optimizations like vector extension.

Segmentation fault in function implementing Ford-Fulkerson

I'm working on a class assignment and I've run into an issue I haven't been able to figure out. I'm implementing the Ford-Fulkerson algorithm using BFS to find max flow. But while trying to set my Residual Capacity matrix to the given capacity, I hit a segmentation fault. In the test code we received, I can see that the original capacity matrix was passed by value by its address, but I have a feeling that in my code I'm not interacting with it the way I think I am? Which leads me to believe that I may have the same issue recurring elsewhere. I worked with gdb and saw that I hit a segmentation fault on this line here in my nested for loop :
resCap[i][j] = *(capacity + i*n + j);
However, nothing I have tried has worked for me though so I am pretty stumped.
void maximum_flow(int n, int s, int t, int *capacity, int *flow)
{
int i, j, resCap[n][n], path[n]; // residual capacity and BFS augmenting path
int min_path = INT_MAX; // min of the augmenting path
// Assign residual capacity equal to the given capacity
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
{
resCap[i][j] = *(capacity + i*n + j);
*(flow + i*n + j) = 0; // no initial flow
}
// Augment path with BFS from source to sink
while (bfs(n, s, t, &(resCap[0][0]), path))
{
// find min of the augmenting path
for (j = t; j != s; j = path[j])
{
i = path[j];
min_path = min(min_path, resCap[i][j]);
}
// update residual capacities and flows on both directions
for (j = t; j != s; j = path[j])
{
i = path[j];
if(*(capacity + i*n + j) > 0)
*(flow + i*n + j) += min_flow_path;
else
*(flow + j*n + i) -= min_flow_path;
resCap[i][j] -= min_flow_path;
resCap[j][i] += min_flow_path;
}
}
}
And here is the test code provided to us in case it is needed:
int main(void)
{ int cap[1000][1000], flow[1000][1000];
int i,j, flowsum;
for(i=0; i< 1000; i++)
for( j =0; j< 1000; j++ )
cap[i][j] = 0;
for(i=0; i<499; i++)
for( j=i+1; j<500; j++)
cap[i][j] = 2;
for(i=1; i<500; i++)
cap[i][500 + (i/2)] =4;
for(i=500; i < 750; i++ )
{ cap[i][i-250]=3;
cap[i][750] = 1;
cap[i][751] = 1;
cap[i][752] = 5;
}
cap[751][753] = 5;
cap[752][753] = 5;
cap[753][750] = 20;
for( i=754; i< 999; i++)
{ cap[753][i]=1;
cap[i][500]=3;
cap[i][498]=5;
cap[i][1] = 100;
}
cap[900][999] = 1;
cap[910][999] = 1;
cap[920][999] = 1;
cap[930][999] = 1;
cap[940][999] = 1;
cap[950][999] = 1;
cap[960][999] = 1;
cap[970][999] = 1;
cap[980][999] = 1;
cap[990][999] = 1;
printf("prepared capacity matrix, now executing maxflow code\n");
maximum_flow(1000,0,999,&(cap[0][0]),&(flow[0][0]));
for(i=0; i<=999; i++)
for(j=0; j<=999; j++)
{ if( flow[i][j] > cap[i][j] )
{ printf("Capacity violated\n"); exit(0);}
}
flowsum = 0;
for(i=0; i<=999; i++)
flowsum += flow[0][i];
printf("Outflow of 0 is %d, should be 10\n", flowsum);
flowsum = 0;
for(i=0; i<=999; i++)
flowsum += flow[i][999];
printf("Inflow of 999 is %d, should be 10\n", flowsum);
printf("End Test\n");
}
This line is likely going to segfault, it does using Clang.
int i, j, resCap[n][n], path[n];
You're declaring a very large array on the stack. Just how big can be seen when you try and allocated it using calloc. Try this instead and don't forget to free it using the same sort of loop.
int **resCap2 = calloc(1, n * sizeof(int *));
assert(resCap2);
for (i = 0; i < n; i++) {
resCap2[i] = calloc(1, n * sizeof(int));
assert(resCap2[i]);
}
This is a lot of space ie
(1000 * sizeof(int*) * (1000 * n * sizeof(int)))

Program Bugs with large sequences (C) [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 7 years ago.
Improve this question
I am trying to code the Waterman algorithm in C.
Now when the length of the sequence exceeds 35 the program just lags.
I have no idea where to start looking, tried but got nothing worked out.
Here's the code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// Max Function Prototype.
int maxfunction(int, int);
// Prototype of the random Sequences generator Function.
void gen_random(char *, const int);
int main(int argc, char *argv[]) {
// Looping variable and Sequences.
int i = 0, j = 0, k = 0;
char *X, *Y;
int length1, length2;
// Time Variables.
time_t beginning_time, end_time;
// Getting lengths of sequences
printf("Please provide the length of the first Sequence\n");
scanf("%d", &length1);
printf("Please provide the length of the second Sequence\n");
scanf("%d", &length2);
X = (char*)malloc(sizeof(char) * length1);
Y = (char*)malloc(sizeof(char) * length2);
int m = length1 + 1;
int n = length2 + 1;
int L[m][n];
int backtracking[m + n];
gen_random(X, length1);
gen_random(Y, length2);
printf("First Sequence\n");
for (i = 0; i < length1; i++) {
printf("%c\n", X[i]);
}
printf("\nSecond Sequence\n");
for (i = 0; i < length2; i++) {
printf("%c\n", Y[i]);
}
// Time calculation beginning.
beginning_time = clock();
// Main Part--Core of the algorithm.
for (i = 0; i <= m; i++) {
for (j = 0; j <= n; j++) {
if (i == 0 || j == 0) {
L[i][j] = 0;
} else
if (X[i-1] == Y[j-1]) {
L[i][j] = L[i-1][j-1] + 1;
backtracking[i] = L[i-1][j-1];
} else {
L[i][j] = maxfunction(L[i-1][j], L[i][j-1]);
backtracking[i] = maxfunction(L[i-1][j], L[i][j-1]);
}
}
}
// End time calculation.
end_time = clock();
for (i = 0; i < m; i++) {
printf(" ( ");
for (j = 0; j < n; j++) {
printf("%d ", L[i][j]);
}
printf(")\n");
}
// Printing out the result of backtracking.
printf("\n");
for (k = 0; k < m; k++) {
printf("%d\n", backtracking[k]);
}
printf("Consumed time: %lf", (double)(end_time - beginning_time));
return 0;
}
// Max Function.
int maxfunction(int a, int b) {
if (a > b) {
return a;
} else {
return b;
}
}
// Random Sequence Generator Function.
void gen_random(char *s, const int len) {
int i = 0;
static const char alphanum[] = "ACGT";
for (i = 0; i < len; ++i) {
s[i] = alphanum[rand() % (sizeof(alphanum) - 1)];
}
s[len] = 0;
}
Since you null terminate the sequence in gen_random with s[len] = 0;, you should allocate 1 more byte for each sequence:
X = malloc(sizeof(*X) * (length1 + 1));
Y = malloc(sizeof(*Y) * (length2 + 1));
But since you define variable length arrays for other variables, you might as well define these as:
char X[length1 + 1], Y[length2 + 1];
Yet something else is causing a crash on my laptop: your nested loops iterate from i = 0 to i <= m, and j = 0 to j <= n. That's one step too many, you index out of bounds into L.
Here is a corrected version:
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
The resulting code executes very quickly, its complexity is O(m*n) in both time and space, but m and n are reasonably small at 35. It runs in less than 50ms for 1000 x 1000.
Whether it implements Smith-Waterman's algorithm correctly is another question.

Resources