how can I transpose a 2D matrix in place in C? - c

I'm trying to transpose a 2D matrix (10x10) in place:
for (a = 0; a < 10; a++) {
for (b = 0; b < 10; b++) {
tmp = matrix[a][b];
matrix[b][a] = matrix[a][b];
matrix[a][b] = tmp;
}
}
If I can increase the starting value 'b' of the inner for statement by 1, it works fine.
However, when one loop is turned, the value of the variable is set to 0. It is very natural.
Is there a way to increase the starting value 'b' of the inner for loop after running around a loop?
I really want to solve this problem.
Can you use global variables or any other way to solve this problem?

Your swapping code is incorrect: you should overwrite the saved value first.
Furthermore, you must stop the inner loop when b == a, otherwise the values would be swapped twice, and the transposition would fail.
Here is a corrected version:
/* swap values on either side of the first diagonal */
for (a = 1; a < 10; a++) {
/* stop the inner loop when b == a */
for (b = 0; b < a; b++) {
int tmp = matrix[a][b];
matrix[a][b] = matrix[b][a];
matrix[b][a] = tmp;
}
}
This simple algorithm is not cache optimal for large matrices, especially for power of 2 sizes. More elaborate algorithms have been developed for in place matrix transpostion.
For example, here is a benchmark for 1024x1024 matrices comparing the naive algorithm with an advanced recursive approach:
#include <stdio.h>
#include <time.h>
#define SIZE 1024
static int mat[SIZE][SIZE];
void initialize_matrix(int matrix[SIZE][SIZE]) {
int a, b, x = 0;
for (a = 0; a < SIZE; a++) {
for (b = 0; b < SIZE; b++) {
mat[a][b] = x++;
}
}
}
int check_transpose_matrix(int matrix[SIZE][SIZE]) {
int a, b, x = 0;
for (a = 0; a < SIZE; a++) {
for (b = 0; b < SIZE; b++) {
if (mat[b][a] != x++)
return 1;
}
}
return 0;
}
void naive_transpose(int matrix[SIZE][SIZE]) {
/* swap values on either side of the first diagonal */
for (int a = 1; a < SIZE; a++) {
/* stop the inner loop when b == a */
for (int b = 0; b < a; b++) {
int tmp = matrix[a][b];
matrix[a][b] = matrix[b][a];
matrix[b][a] = tmp;
}
}
}
#define THRESHOLD 4
void transpose_tile(int row, int col, int size, int matrix[SIZE][SIZE]) {
if (size > THRESHOLD) {
transpose_tile(row, col, size / 2, matrix);
transpose_tile(row, col + size / 2, size / 2, matrix);
transpose_tile(row + size / 2, col, size / 2, matrix);
transpose_tile(row + size / 2, col + size / 2, size / 2, matrix);
} else {
for (int a = 0; a < size; a++) {
for (int b = 0; b < size; b++) {
int tmp = matrix[row + a][col + b];
matrix[row + a][col + b] = matrix[col + b][row + a];
matrix[col + b][row + a] = tmp;
}
}
}
}
void transpose_tile_diag(int pos, int size, int matrix[SIZE][SIZE]) {
if (size > THRESHOLD) {
transpose_tile_diag(pos, size / 2, matrix);
transpose_tile(pos, pos + size / 2, size / 2, matrix);
transpose_tile_diag(pos + size / 2, size / 2, matrix);
} else {
/* swap values on either side of the first diagonal */
for (int a = 1; a < size; a++) {
/* stop the inner loop when b == a */
for (int b = 0; b < a; b++) {
int tmp = matrix[pos + a][pos + b];
matrix[pos + a][pos + b] = matrix[pos + b][pos + a];
matrix[pos + b][pos + a] = tmp;
}
}
}
}
void advanced_transpose(int matrix[SIZE][SIZE]) {
transpose_tile_diag(0, SIZE, matrix);
}
int main(int argc, char *argv[]) {
clock_t t_min;
initialize_matrix(mat);
naive_transpose(mat);
if (check_transpose_matrix(mat)) {
printf("naive_transpose failed!\n");
return 1;
}
/* benchmark naive algorithm */
t_min = 0;
for (int i = 0; i < 100; i++) {
clock_t t = clock();
naive_transpose(mat);
t = clock() - t;
if (i == 0 || t_min > t)
t_min = t;
}
printf("naive: %.3fms\n", t_min * 1000.0 / CLOCKS_PER_SEC);
initialize_matrix(mat);
advanced_transpose(mat);
if (check_transpose_matrix(mat)) {
printf("advanced_transpose failed!\n");
return 1;
}
/* benchmark advanced algorithm */
t_min = 0;
for (int i = 0; i < 100; i++) {
clock_t t = clock();
advanced_transpose(mat);
t = clock() - t;
if (i == 0 || t_min > t)
t_min = t;
}
printf("advanced: %.3fms\n", t_min * 1000.0 / CLOCKS_PER_SEC);
return 0;
}
Output on my 5 year old macbook:
naive: 7.299ms
advanced: 1.157ms

Related

As a result of processing arrays -nan(ind)

I am writing a program that creates arrays of a given length and manipulates them. You cannot use other libraries.
First, an array M1 of length N is formed, after which an array M2 of length N is formed/2.
In the M1 array, the division by Pi operation is applied to each element, followed by elevation to the third power.
Then, in the M2 array, each element is alternately added to the previous one, and the tangent modulus operation is applied to the result of addition.
After that, exponentiation is applied to all elements of the M1 and M2 array with the same indexes and the resulting array is sorted by dwarf sorting.
And at the end, the sum of the sines of the elements of the M2 array is calculated, which, when divided by the minimum non-zero element of the M2 array, give an even number.
The problem is that the result X gives is -nan(ind). I can't figure out exactly where the error is.
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
const int A = 441;
const double PI = 3.1415926535897931159979635;
inline void dwarf_sort(double* array, int size) {
size_t i = 1;
while (i < size) {
if (i == 0) {
i = 1;
}
if (array[i - 1] <= array[i]) {
++i;
}
else
{
long tmp = array[i];
array[i] = array[i - 1];
array[i - 1] = tmp;
--i;
}
}
}
inline double reduce(double* array, int size) {
size_t i;
double min = RAND_MAX, sum = 0;
for (i = 0; i < size; ++i) {
if (array[i] < min && array[i] != 0) {
min = array[i];
}
}
for (i = 0; i < size; ++i) {
if ((int)(array[i] / min) % 2 == 0) {
sum += sin(array[i]);
}
}
return sum;
}
int main(int argc, char* argv[])
{
int i, N, j;
double* M1 = NULL, * M2 = NULL, * M2_copy = NULL;
double X;
unsigned int seed = 0;
N = atoi(argv[1]); /* N равен первому параметру командной строки */
M1 = malloc(N * sizeof(double));
M2 = malloc(N / 2 * sizeof(double));
M2_copy = malloc(N / 2 * sizeof(double));
for (i = 0; i < 100; i++)
{
seed = i;
srand(i);
/*generate*/
for (j = 0; j < N; ++j) {
M1[j] = (rand_r(&seed) % A) + 1;
}
for (j = 0; j < N / 2; ++j) {
M2[j] = (rand_r(&seed) % (10 * A)) + 1;
}
/*map*/
for (j = 0; j < N; ++j)
{
M1[j] = pow(M1[j] / PI, 3);
}
for (j = 0; j < N / 2; ++j) {
M2_copy[j] = M2[j];
}
M2[0] = fabs(tan(M2_copy[0]));
for (j = 0; j < N / 2; ++j) {
M2[j] = fabs(tan(M2[j] + M2_copy[j]));
}
/*merge*/
for (j = 0; j < N / 2; ++j) {
M2[j] = pow(M1[j], M2[j]);
}
/*sort*/
dwarf_sort(M2, N / 2);
/*sort*/
X = reduce(M2, N / 2);
}
printf("\nN=%d.\n", N);
printf("X=%f\n", X);
return 0;
}
Knowledgeable people, does anyone see where my mistake is? I think I'm putting the wrong data types to the variables, but I still can't solve the problem.
Replace the /* merge */ part with this:
/*merge*/
for (j = 0; j < N / 2; ++j) {
printf("%f %f ", M1[j], M2[j]);
M2[j] = pow(M1[j], M2[j]);
printf("%f\n", M2[j]);
}
This will print the values and the results of the pow operation. You'll see that some of these values are huge resulting in an capacity overflow of double.
Something like pow(593419.97, 31.80) will not end well.

Optimized Bubble Sort time for sorting int arrays

I'm currently working on an essay, in which I compare sorting times of certain algorithms. I've written an optimized version of bubble sort, which checks if there is a swap, if not, it stops.
void bubble_sort(int* tab, int n) {
int zamiana, x, i;
do {
zamiana = 0;
for (int counter = 0, i = 1; i < n; ++counter, ++i) {
if (tab[counter] > tab[i]) {
x = tab[counter];
tab[counter] = tab[i];
tab[i] = x;
zamiana = 1;
}
}
} while (zamiana != 0);
}
I have found that it takes almost 0s to sort array sorted in ascending order, now I'm testing it on an array sorted in descending order, and times are almost the same as for ascending order. Is it normal?
Code tested:
#include <time.h>
#include <stdio.h>
void quickSort(int* tab, int lewy, int prawy) {
int x, y = lewy - 1, z = prawy + 1, pivot = tab[(lewy + prawy) / 2];
while (1) {
while (pivot < tab[++y]);
while (pivot > tab[--z]);
if (y <= z) {
x = tab[y];
tab[y] = tab[z];
tab[z] = x;
}
else {
break;
}
}
if (z > lewy) {
quickSort(tab, lewy, z);
}
if (y < prawy) {
quickSort(tab, y, prawy);
}
}
void bubble_sort(int* tab, int n) {
int zamiana, x, i;
do {
zamiana = 0;
for (int counter = 0, i = 1; i < n; ++counter, ++i) {
if (tab[counter] > tab[i]) {
x = tab[counter];
tab[counter] = tab[i];
tab[i] = x;
zamiana = 1;
}
}
} while (zamiana != 0);
}
int main (){
int* tab;
int n;
srand(time(NULL));
scanf("%d", &n); //user input array size
tab = (int*)malloc(n * sizeof(int*));
for (int counter = 0; counter < n; ++counter) {
tab[counter] = (rand() % 200) - 100; //<-100;100>
}
quickSort(tab, 0, n); //sorting array to get descending order
clock_t start = clock();
bubble_sort(tab, n); //sorting array
clock_t end = clock();
float seconds = (float)(end - start) / CLOCKS_PER_SEC;
printf("Time elapsed: %f", seconds);
}

I am trying to improve the performance speed of my cross-correlation algorithm. What things can I do to make my C code run faster?

I created a cross-correlation algorithm, and I am trying to maximize its performance by reducing the time it takes for it to run. First of all, I reduced the number of function calls within the "crossCorrelationV2" function. Second, I created several macros at the top of the program for constants. Third, I reduced the number of loops that are inside the "crossCorrelationV2" function. The code that you see is the most recent code that I have.
Are there any other methods I can use to try and reduce the processing time of my code?
Let's assume that I am only focused on the functions "crossCorrelationV2" and "createAnalyzingWave".
I would be glad for any advice, whether in general about programming or pertaining to those two specific functions; I am a beginner programmer. Thanks.
#include <stdio.h>
#include <stdlib.h>
#define ARRAYSIZE 4096
#define PULSESNUMBER 16
#define DATAFREQ 1300
// Print the contents of the array onto the console.
void printArray(double array[], int size){
int k;
for (k = 0; k < size; k++){
printf("%lf ", array[k]);
}
printf("\n");
}
// Creates analyzing square wave. This square wave has unity (1) magnitude.
// The number of high values in each period is determined by high values = (analyzingT/2) / time increment
void createAnalyzingWave(double analyzingFreq, double wave[]){
int highValues = (1 / analyzingFreq) * 0.5 / ((PULSESNUMBER * (1 / DATAFREQ) / ARRAYSIZE));
int counter = 0;
int p;
for(p = 1; p <= ARRAYSIZE; p++){
if ((counter % 2) == 0){
wave[p - 1] = 1;
} else{
wave[p - 1] = 0;
}
if (p % highValues == 0){
counter++;
}
}
}
// Creates data square wave (for testing purposes, for the real implementation actual ADC data will be used). This
// square wave has unity magnitude.
// The number of high values in each period is determined by high values = array size / (2 * number of pulses)
void createDataWave(double wave[]){
int highValues = ARRAYSIZE / (2 * PULSESNUMBER);
int counter = 0;
int p;
for(p = 0; p < ARRAYSIZE; p++){
if ((counter % 2) == 0){
wave[p] = 1;
} else{
wave[p] = 0;
}
if ((p + 1) % highValues == 0){
counter++;
}
}
}
// Finds the average of all the values inside an array
double arrayAverage(double array[], int size){
int i;
double sum = 0;
// Same thing as for(i = 0; i < arraySize; i++)
for(i = size; i--; ){
sum = array[i] + sum;
}
return sum / size;
}
// Cross-Correlation algorithm
double crossCorrelationV2(double dataWave[], double analyzingWave[]){
int bigArraySize = (2 * ARRAYSIZE) - 1;
// Expand analyzing array into array of size 2arraySize-1
int lastArrayIndex = ARRAYSIZE - 1;
int lastBigArrayIndex = 2 * ARRAYSIZE - 2; //bigArraySize - 1; //2 * arraySize - 2;
double bigAnalyzingArray[bigArraySize];
int i;
int b;
// Set first few elements of the array equal to analyzingWave
// Set remainder of big analyzing array to 0
for(i = 0; i < ARRAYSIZE; i++){
bigAnalyzingArray[i] = analyzingWave[i];
bigAnalyzingArray[i + ARRAYSIZE] = 0;
}
double maxCorrelationValue = 0;
double currentCorrelationValue;
// "Beginning" of correlation algorithm proper
for(i = 0; i < bigArraySize; i++){
currentCorrelationValue = 0;
for(b = lastBigArrayIndex; b > 0; b--){
if (b >= lastArrayIndex){
currentCorrelationValue = dataWave[b - lastBigArrayIndex / 2] * bigAnalyzingArray[b] + currentCorrelationValue;
}
bigAnalyzingArray[b] = bigAnalyzingArray[b - 1];
}
bigAnalyzingArray[0] = 0;
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
}
return maxCorrelationValue;
}
int main(){
int samplesNumber = 25;
double analyzingFreq = 1300;
double analyzingWave[ARRAYSIZE];
double dataWave[ARRAYSIZE];
createAnalyzingWave(analyzingFreq, analyzingWave);
//createDataWave(arraySize, pulsesNumber, dataWave);
double maximumCorrelationArray[samplesNumber];
int i;
for(i = 0; i < samplesNumber; i++){
createDataWave(dataWave);
maximumCorrelationArray[i] = crossCorrelationV2(dataWave, analyzingWave);
}
printf("Average of the array values: %lf\n", arrayAverage(maximumCorrelationArray, samplesNumber));
return 0;
}
The first point is that you are explicitly shifting the analizingData array, this way you are required twice as much memory and moving the items is about 50% of your time. In a test here using crossCorrelationV2 takes 4.1 seconds, with the implementation crossCorrelationV3 it runs in ~2.0 seconds.
The next thing is that you are spending time multiplying by zero on the padded array, removing that, and also removing the padding, and simplifying the indices we end with crossCorrelationV4 that makes the program to run in ~1.0 second.
// Cross-Correlation algorithm
double crossCorrelationV3(double dataWave[], double analyzingWave[]){
int bigArraySize = (2 * ARRAYSIZE) - 1;
// Expand analyzing array into array of size 2arraySize-1
int lastArrayIndex = ARRAYSIZE - 1;
int lastBigArrayIndex = 2 * ARRAYSIZE - 2; //bigArraySize - 1; //2 * arraySize - 2;
double bigAnalyzingArray[bigArraySize];
int i;
int b;
// Set first few elements of the array equal to analyzingWave
// Set remainder of big analyzing array to 0
for(i = 0; i < ARRAYSIZE; i++){
bigAnalyzingArray[i] = analyzingWave[i];
bigAnalyzingArray[i + ARRAYSIZE] = 0;
}
double maxCorrelationValue = 0;
double currentCorrelationValue;
// "Beginning" of correlation algorithm proper
for(i = 0; i < bigArraySize; i++){
currentCorrelationValue = 0;
// Instead of checking if b >= lastArrayIndex inside the loop I use it as
// a stopping condition.
for(b = lastBigArrayIndex; b >= lastArrayIndex; b--){
// instead of shifting bitAnalizing[b] = bigAnalyzingArray[b-1] every iteration
// I simply use bigAnalizingArray[b-i]
currentCorrelationValue = dataWave[b - lastBigArrayIndex / 2] * bigAnalyzingArray[b - i] + currentCorrelationValue;
}
bigAnalyzingArray[0] = 0;
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
}
return maxCorrelationValue;
}
// Cross-Correlation algorithm
double crossCorrelationV4(double dataWave[], double analyzingWave[]){
int bigArraySize = (2 * ARRAYSIZE) - 1;
// Expand analyzing array into array of size 2arraySize-1
int lastArrayIndex = ARRAYSIZE - 1;
int lastBigArrayIndex = 2 * ARRAYSIZE - 2; //bigArraySize - 1; //2 * arraySize - 2;
// I will not allocate the bigAnalizingArray here
// double bigAnalyzingArray[bigArraySize];
int i;
int b;
// I will not copy the analizingWave to bigAnalyzingArray
// for(i = 0; i < ARRAYSIZE; i++){
// bigAnalyzingArray[i] = analyzingWave[i];
// bigAnalyzingArray[i + ARRAYSIZE] = 0;
// }
double maxCorrelationValue = 0;
double currentCorrelationValue;
// Compute the correlation by symmetric paris
// the idea here is to simplify the indices of the inner loops since
// they are computed more times.
for(i = 0; i < lastArrayIndex; i++){
currentCorrelationValue = 0;
for(b = lastArrayIndex - i; b >= 0; b--){
// instead of shifting bitAnalizing[b] = bigAnalyzingArray[b-1] every iteration
// I simply use bigAnalizingArray[b-i]
currentCorrelationValue += dataWave[b] * analyzingWave[b + i];
}
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
if(i != 0){
currentCorrelationValue = 0;
// Correlate shifting to the other side
for(b = lastArrayIndex - i; b >= 0; b--){
// instead of shifting bitAnalizing[b] = bigAnalyzingArray[b-1] every iteration
// I simply use bigAnalizingArray[b-i]
currentCorrelationValue += dataWave[b + i] * analyzingWave[b];
}
if (currentCorrelationValue > maxCorrelationValue){
maxCorrelationValue = currentCorrelationValue;
}
}
}
return maxCorrelationValue;
}
If you want more optimization you can unroll some iterations of the loop and enable some compiler optimizations like vector extension.

what is the correct time complexity of this function?

what my function does -> Given K sorted arrays arranged in form of a matrix. The task is to merge them. You need to complete mergeKArrays() function which takes 2 arguments, an arr[k][k] 2D Matrix containing k sorted arrays and an integer k denoting the number of sorted arrays. The function should return a pointer to the merged sorted arrays.
int *mergeKArrays(int arr[][N], int k)
{
// int *merged = (int*)malloc(sizeof(int) * k * k);
// do merge sort, as individual are already sorted
// just need to merge the arrays
int *a = arr[0];
int size_c = 2 * k;
int nb = k;
int na = k;
int *b;
int *c;
int ia, ib, ic;
for(int i = 1; i <= k - 1; ++i)
{
// merge(x, arr[i], k * i, k)
b = arr[i];
c = malloc(sizeof(int) * size_c);
if(c == NULL) exit(0);
ia = ib = ic = 0;
while(ia < na && ib < nb)
{
if(a[ia] < b[ib])
{
c[ic++] = a[ia++];
}
else
{
c[ic++] = b[ib++];
}
}
if(ia != na)
{
for(int i = ia; i < na; ++i)
{
c[ic++] = a[i];
}
}
if(ib != nb)
{
for(int i = ib; i < nb; ++i)
{
c[ic++] = b[i];
}
}
a = c;
na = size_c;
// printArray(a, na);
// printf("\n");
size_c = size_c + k;
}
return a;
}
my approach : the for loop runs x = (k-1)times....
each time array of size k is merged with i*k size array...(k+k) + (2k + k) + ....x times
= (k + k +...x times) +(k + 1k + 2k ....x times) = kx + kx(x-1)/2
which gives O(k^3). is this right ?
the actual size of arr given in main is k^2 (k by k matrix)
so n = k^2 => k = n^(0.5)
=> T(n) = O(n^(3/2)) ..??

How to optimize my c code?

I tried to implement C code for Wavelet transform in FPGA (Zynq ZC 702) but the code get stuck and this is because of memory problem so I should optimize my code but I don't know how.
Can anyone please give me some ideas how to do that ?
This is the main of the code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "wavemin.h"
#include "waveaux.h"
#include "waveaux.c"
#include "wavemin.c"
int main() {
printf("Hello World1 \n\r");
wave_object obj;
wt_object wt;
float *inp, *out;
int N, i, J,k;
float temp[1280] = {};
char *name = "db4";
obj = wave_init(name);
printf("Hello World2 \n\r");
N = 1280;
inp = (float*)malloc(sizeof(float) * N);
out = (float*)malloc(sizeof(float) * N);
//wmean = mean(temp, N);
for (i = 0; i < N; ++i) {
inp[i] = temp[i];
printf("Hello World3 \n\r");
//printf("%g \n", inp[i]);
}
J = 4; //Decomposition Levels
wt = wt_init(obj, "dwt", N, J); // Initialize the wavelet transform object
printf("Hello World4 \n\r");
setDWTExtension(wt, "sym"); // Options are "per" and "sym". Symmetric is the default option
printf("Hello World5 \n\r");
setWTConv(wt, "direct");
printf("Hello World6 \n\r");
dwt(wt, inp); // Perform DWT
printf("Hello World7 \n\r");
//getDWTAppx(wt, out, wt->length[0]);
// printf("Approximation Coefficients Level 1 \n");
// for (i = 0; i < wt->length[0]; ++i) {
// printf("%g ", out[i]);
// }
// printf("\n\n");
for (k = 1; k <= J; ++k) {
getDWTDetail(wt, out, wt->length[k], k);
printf("Detail Coefficients Level %d Length %d \n",
k, wt - length[k]);
for (i = 0; i < wt->length[k]; ++i) {
printf("%g ", out[i]);
}
printf("\n\n");
}
wt_summary(wt);// Prints the full summary.
printf("Hello World8 \n\r");
wave_free(obj);
wt_free(wt);
free(inp);
free(out);
return 0;
}
The other part of the code where there is the function used in the main function:
#include "wavemin.h"
wave_object wave_init(char *wname) {
wave_object obj = NULL;
int retval;
retval = 0;
if (wname != NULL) {
retval = filtlength(wname);
}
obj = (wave_object)malloc(sizeof(struct wave_set) + sizeof(float) * 4 *
retval);
obj->filtlength = retval;
obj->lpd_len = obj->hpd_len = obj->lpr_len = obj->hpr_len = obj->filtlength;
strcpy(obj->wname, wname);
if (wname != NULL) {
filtcoef(wname, obj->params, obj->params + retval, obj->params + 2 *
retval, obj->params + 3 * retval);
}
obj->lpd = &obj->params[0];
obj->hpd = &obj->params[retval];
obj->lpr = &obj->params[2 * retval];
obj->hpr = &obj->params[3 * retval];
return obj;
}
wt_object wt_init(wave_object wave, char *method, int siglength, int J) {
int size, i, MaxIter;
wt_object obj = NULL;
size = wave->filtlength;
MaxIter = wmaxiter(siglength, size);
if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) {
obj = (wt_object)malloc(sizeof(struct wt_set) + sizeof(float) *
(siglength + 2 * J * (size + 1)));
obj->outlength = siglength + 2 * J * (size + 1); // Default
strcpy(obj->ext, "sym"); // Default
}
obj->wave = wave;
obj->siglength = siglength;
obj->J = J;
obj->MaxIter = MaxIter;
strcpy(obj->method, method);
if (siglength % 2 == 0) {
obj->even = 1;
}
else {
obj->even = 0;
}
strcpy(obj->cmethod, "direct"); // Default
obj->cfftset = 0;
obj->lenlength = J + 2;
obj->output = &obj->params[0];
if (!strcmp(method, "dwt") || !strcmp(method, "DWT")) {
for (i = 0; i < siglength + 2 * J * (size + 1); ++i) {
obj->params[i] = 0.0;
}
}
//wave_summary(obj->wave);
return obj;
}
static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA,
float *cD, int len_cD) {
int i, l, t, len_avg;
len_avg = wt->wave->lpd_len;
for (i = 0; i < len_cA; ++i) {
t = 2 * i + 1;
cA[i] = 0.0;
cD[i] = 0.0;
for (l = 0; l < len_avg; ++l) {
if ((t - l) >= 0 && (t - l) < N) {
cA[i] += wt->wave->lpd[l] * inp[t - l];
cD[i] += wt->wave->hpd[l] * inp[t - l];
printf("world1 \n\r");
}
else if ((t - l) < 0) {
cA[i] += wt->wave->lpd[l] * inp[-t + l - 1];
cD[i] += wt->wave->hpd[l] * inp[-t + l - 1];
printf("world2 \n\r");
}
else if ((t - l) >= N) {
cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1];
cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1];
printf("world3 \n\r");
}
}
}
}
void dwt(wt_object wt, float *inp) {
int i, J, temp_len, iter, N, lp;
int len_cA;
float *orig, *orig2;
temp_len = wt->siglength;
J = wt->J;
wt->length[J + 1] = temp_len;
wt->outlength = 0;
wt->zpad = 0;
orig = (float*)malloc(sizeof(float) * temp_len);
orig2 = (float*)malloc(sizeof(float) * temp_len);
for (i = 0; i < wt->siglength; ++i) {
orig[i] = inp[i];
printf("Hello1 \n\r");
}
if (wt->zpad == 1) {
orig[temp_len - 1] = orig[temp_len - 2];
printf("Hello2 \n\r");
}
N = temp_len;
lp = wt->wave->lpd_len;
if (!strcmp(wt->ext, "sym")) {
//printf("\n YES %s \n", wt->ext);
i = J;
while (i > 0) {
N = N + lp - 2;
N = (int)ceil((float)N / 2.0);
wt->length[i] = N;
wt->outlength += wt->length[i];
i--;
}
wt->length[0] = wt->length[1];
wt->outlength += wt->length[0];
N = wt->outlength;
printf("Hello3 \n\r");
for (iter = 0; iter < J; ++iter) {
len_cA = wt->length[J - iter];
N -= len_cA;
dwt_sym(wt, orig, temp_len, orig2, len_cA, wt->params + N, len_cA);
temp_len = wt->length[J - iter];
printf("Hello4 \n\r");
if (iter == J - 1) {
for (i = 0; i < len_cA; ++i) {
wt->params[i] = orig2[i];
printf("Hello5 \n\r");
}
} else {
for (i = 0; i < len_cA; ++i) {
orig[i] = orig2[i];
printf("Hello6 \n\r");
}
}
}
} else {
printf("Signal extension can be either per or sym");
exit(-1);
}
free(orig);
free(orig2);
}
void setDWTExtension(wt_object wt, char *extension) {
if (!strcmp(extension, "sym")) {
strcpy(wt->ext, "sym");
} else {
printf("Signal extension can be either per or sym");
exit(-1);
}
}
void setWTConv(wt_object wt, char *cmethod) {
if (!strcmp(cmethod, "direct")) {
strcpy(wt->cmethod, "direct");
}
}
void getDWTDetail(wt_object wt, float *detail, int N, int level) {
/*
returns Detail coefficents at the jth level where j = 1,2,.., J
and Wavelet decomposition is stored as
[A(J) D(J) D(J-1) ..... D(1)] in wt->output vector
Use getDWTAppx() to get A(J)
Level 1 : Length of D(J), ie N, is stored in wt->length[1]
Level 2 :Length of D(J-1), ie N, is stored in wt->length[2]
....
Level J : Length of D(1), ie N, is stored in wt->length[J]
*/
int i, iter, J;
J = wt->J;
if (level > J) {
printf("The decomposition only has %d levels", J);
}
iter = wt->length[0];
for (i = 1; i < level; ++i) {
iter += wt->length[i];
}
for (i = 0; i < N; ++i) {
detail[i] = wt->output[i + iter];
}
}
void getDWTAppx(wt_object wt, float *appx, int N) {
/*
Wavelet decomposition is stored as
[A(J) D(J) D(J-1) ..... D(1)] in wt->output vector
Length of A(J) , N = wt->length[0]
*/
int i;
for (i = 0; i < N; ++i) {
appx[i] = wt->output[i];
}
}
void wt_summary(wt_object wt) {
int i;
int J, t;
J = wt->J;
printf("Wavelet Coefficients are contained in vector : %s \n", "output");
printf("\n");
printf("Approximation Coefficients \n");
printf("Level %d Access : output[%d] Length : %d \n",
1, 0, wt->length[0]);
printf("\n");
printf("Detail Coefficients \n");
t = wt->length[0];
for (i = 0; i < J; ++i) {
printf("Level %d Access : output[%d] Length : %d \n",
i + 1, t, wt->length[i + 1]);
t += wt->length[i + 1];
}
printf("\n");
}
void wave_free(wave_object object) {
free(object);
}
void wt_free(wt_object object) {
free(object);
}
enter image description here
In your code
Always check if malloc has returned non NULL value
Check your stack and heap settings in the linker file as you declare massive local variables and do a lots of mallocs - I suspect the (nomen omen)stack overflow, or failed mallocs.
Is it a bare metal program or you run it under some kind of OS?
Just for a matter of style and concision, I would rewrite this:
if (siglength % 2 == 0) {
obj->even = 1;
}
else {
obj->even = 0;
}
Into the following code:
obj->even = !(siglength % 2);
Or, alternatively:
obj->even = (siglength % 2) ? 0 : 1;
Also, I think there is room for optimization in this function:
static void dwt_sym(wt_object wt, float *inp, int N, float *cA, int len_cA,
float *cD, int len_cD) {
int i, l, t, len_avg;
len_avg = wt->wave->lpd_len;
for (i = 0; i < len_cA; ++i) {
t = 2 * i + 1;
cA[i] = 0.0;
cD[i] = 0.0;
for (l = 0; l < len_avg; ++l) {
if ((t - l) >= 0 && (t - l) < N) {
cA[i] += wt->wave->lpd[l] * inp[t - l];
cD[i] += wt->wave->hpd[l] * inp[t - l];
printf("world1 \n\r");
}
else if ((t - l) < 0) {
cA[i] += wt->wave->lpd[l] * inp[-t + l - 1];
cD[i] += wt->wave->hpd[l] * inp[-t + l - 1];
printf("world2 \n\r");
}
else if ((t - l) >= N) {
cA[i] += wt->wave->lpd[l] * inp[2 * N - t + l - 1];
cD[i] += wt->wave->hpd[l] * inp[2 * N - t + l - 1];
printf("world3 \n\r");
}
}
}
}
First, you are always referring to t - 1 and never t itself, so why not have:
t = 2 * i;
And, I can guess that a lot of computation can be placed outside of the inner loop... If you want to optimize, there are many good candidate here.
One last word about optimization!
You should first profile your software and see where you spend the most time before thinking about optimization. You cannot optimize "in the air" without knowing where your software does really struggle. Consider using gprof.
PS: You should never ever use the letter l (ell) as a variable... it is way to close from the number 1 (one). Consider changing this is also, it can improve the reading.

Resources