Reduce function in c - c

So I'm trying learn how to reduce my lines of code, and I came across one of my "larger" functions I wanted to look at.
int DTWDistance(int* x, int xsize, int* y, int ysize){
const double LARGE_VALUE = 1e30;
const int min_window = abs(xsize - ysize);
int i, j, minj, maxj, window;
double dist, min;
double **distances = malloc((xsize + 1) * sizeof(double *));
for(i = 0; i < xsize + 1; ++i)
distances[i] = malloc((ysize + 1) * sizeof(double));
window = 1*ysize;
if(xsize > ysize)
window = 1*xsize;
if(window < min_window)
window = min_window;
for(i = 0; i <= xsize; ++i)
for(j = 0; j <= ysize; ++j)
distances[i][j] = LARGE_VALUE;
distances[0][0] = 0;
for(i = 0; i < xsize; ++i)
{
minj = i - window;
if(minj < 0)
minj = 0;
maxj = i + window;
if(maxj > ysize)
maxj = ysize;
for(j = minj; j < maxj; ++j)
{
dist = abs(x[i] - y[j]);
min = distances[i][j];
if(min > distances[i][j+1])
min = distances[i][j+1];
if(min > distances[i+1][j])
min = distances[i+1][j];
distances[i+1][j+1] = dist + min;
}
}
dist = distances[xsize][ysize];
for(i = 0; i < xsize + 1; ++i)
free(distances[i]);
free(distances);
return dist;
}
To me it looks alright, but it might be because I've looked so many times at it now. So now I'm gonna ask a fresh pair of eye to look at this. Can you see an easier way of writing this or should I just go with this?
Note: this is for me to learn how I can write my code in another, maybe smarter way?
EDIT reduced code
//DTW - Dynamic Time Warping - Compare two -usually temporal- sequences
int DTWDistance(int* x, int xsize, int* y, int ysize){
const int LARGE_VALUE = INT_MAX;
int i, j, minj, maxj, fr, myMin, dist;
int **distances = malloc((xsize + 1) * sizeof(int *));
for(i = 0; i < xsize + 1; ++i)
distances[i] = malloc((ysize + 1) * sizeof(int));
for(i = 0; i <= xsize; ++i)
for(j = 0; j <= ysize; ++j)
distances[i][j] = LARGE_VALUE;
distances[0][0] = 0;
for(i = 0; i < xsize; ++i)
{
minj = max(i - ysize, 0);
maxj = min(i + ysize, ysize);
for(j = minj; j < maxj; ++j)
{
dist = abs(x[i] - y[j]);
fr = min(distances[i][j + 1], distances[i + 1][j]);
myMin = min(distances[i][j], fr);
distances[i+1][j+1] = dist + myMin;
}
}
dist = distances[xsize][ysize];
for(i = 0; i < xsize + 1; ++i)
free(distances[i]);
free(distances);
return dist;
}

Another simplification is using Variable Length Array.
If xsize * ysize is reasonable (up to about 100k) then you can use VLA with automatic storage (usually allocated on stack).
You can replace:
double **distances = malloc((xsize + 1) * sizeof(double *));
for(i = 0; i < xsize + 1; ++i)
distances[i] = malloc((ysize + 1) * sizeof(double));
with
double distances[xsize + 1][ysize + 1];
and remove all deallocation code:
for(i = 0; i < xsize + 1; ++i)
free(distances[i]);
free(distances);
If sizes are large then you can use VLA with dynamic storage:
double (*distances)[ysize + 1] = malloc((xsize + 1) * sizeof *distances);
and free it at the end with:
free(distances)
VLAs have other advantages over more popular "array of arrays".
Single large allocations is generally a lot faster than a bunch of smaller ones. Moreover, accessing a true 2D array usually more cache friendly and easier to be autovectorized by a compiler.

A lot of your code can be reduced if you make use of min and max macros:
#define min(a, b) ((a) < (b)) ? (a) : (b);
#define max(a, b) ((a) > (b)) ? (a) : (b);
This makes it possible to turn this into one single line:
// Old:
min = distances[i][j];
if(min > distances[i][j+1])
min = distances[i][j+1];
if(min > distances[i+1][j])
min = distances[i+1][j];
// New:
// note: renamed 'min' to avoid naming conflict with macro
myMin = min(distances[i][j], min(distances[i][j + 1], distances[i + 1][j]);
Likewise:
if(minj < 0)
minj = 0;
Could be replaced with:
minj = max(i - window, 0);
And:
if(maxj > ysize)
maxj = ysize;
Could be:
maxj = min(i + window, ysize);
Again:
if(window < min_window)
window = min_window;
Can be:
window = min(window, min_window);
There, I just turned your 51 line function into a 40 line function.

Related

As a result of processing arrays -nan(ind)

I am writing a program that creates arrays of a given length and manipulates them. You cannot use other libraries.
First, an array M1 of length N is formed, after which an array M2 of length N is formed/2.
In the M1 array, the division by Pi operation is applied to each element, followed by elevation to the third power.
Then, in the M2 array, each element is alternately added to the previous one, and the tangent modulus operation is applied to the result of addition.
After that, exponentiation is applied to all elements of the M1 and M2 array with the same indexes and the resulting array is sorted by dwarf sorting.
And at the end, the sum of the sines of the elements of the M2 array is calculated, which, when divided by the minimum non-zero element of the M2 array, give an even number.
The problem is that the result X gives is -nan(ind). I can't figure out exactly where the error is.
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
const int A = 441;
const double PI = 3.1415926535897931159979635;
inline void dwarf_sort(double* array, int size) {
size_t i = 1;
while (i < size) {
if (i == 0) {
i = 1;
}
if (array[i - 1] <= array[i]) {
++i;
}
else
{
long tmp = array[i];
array[i] = array[i - 1];
array[i - 1] = tmp;
--i;
}
}
}
inline double reduce(double* array, int size) {
size_t i;
double min = RAND_MAX, sum = 0;
for (i = 0; i < size; ++i) {
if (array[i] < min && array[i] != 0) {
min = array[i];
}
}
for (i = 0; i < size; ++i) {
if ((int)(array[i] / min) % 2 == 0) {
sum += sin(array[i]);
}
}
return sum;
}
int main(int argc, char* argv[])
{
int i, N, j;
double* M1 = NULL, * M2 = NULL, * M2_copy = NULL;
double X;
unsigned int seed = 0;
N = atoi(argv[1]); /* N равен первому параметру командной строки */
M1 = malloc(N * sizeof(double));
M2 = malloc(N / 2 * sizeof(double));
M2_copy = malloc(N / 2 * sizeof(double));
for (i = 0; i < 100; i++)
{
seed = i;
srand(i);
/*generate*/
for (j = 0; j < N; ++j) {
M1[j] = (rand_r(&seed) % A) + 1;
}
for (j = 0; j < N / 2; ++j) {
M2[j] = (rand_r(&seed) % (10 * A)) + 1;
}
/*map*/
for (j = 0; j < N; ++j)
{
M1[j] = pow(M1[j] / PI, 3);
}
for (j = 0; j < N / 2; ++j) {
M2_copy[j] = M2[j];
}
M2[0] = fabs(tan(M2_copy[0]));
for (j = 0; j < N / 2; ++j) {
M2[j] = fabs(tan(M2[j] + M2_copy[j]));
}
/*merge*/
for (j = 0; j < N / 2; ++j) {
M2[j] = pow(M1[j], M2[j]);
}
/*sort*/
dwarf_sort(M2, N / 2);
/*sort*/
X = reduce(M2, N / 2);
}
printf("\nN=%d.\n", N);
printf("X=%f\n", X);
return 0;
}
Knowledgeable people, does anyone see where my mistake is? I think I'm putting the wrong data types to the variables, but I still can't solve the problem.
Replace the /* merge */ part with this:
/*merge*/
for (j = 0; j < N / 2; ++j) {
printf("%f %f ", M1[j], M2[j]);
M2[j] = pow(M1[j], M2[j]);
printf("%f\n", M2[j]);
}
This will print the values and the results of the pow operation. You'll see that some of these values are huge resulting in an capacity overflow of double.
Something like pow(593419.97, 31.80) will not end well.

Access Violation writing location C++ 0x02D1F000

I'm attempting to initialize, populate and parse through an array in order to determine its "stability." To avoid a stack overflow, I decided to create dynamic arrays. The problem is that when it comes to populating the array, I get an exception regarding an access violation to a random location. I don't know if its something in the initialization or in the nested for loop when populating the array. I just can't seem to find anything wrong, nor my classmates/TAs. Thanks in advance for your help! I have tried compiling in VS, XCode, and g++ I have tried commenting out the dynamic array loops as well as the delete loops and gone for "regular arrays" such as float array[x][y] and I still get the same error.
#include <iostream>
#include <array>
#include <iomanip>
#include <cmath>
using namespace std;
int main() {
int check = 0;
int iteration = 0;
int newIteration = 0;
int newNewIteration = 0;
int const DIMENSION = 1024;
//Initializing the dynamic arrays in
//heap to avoid a stack overflow
float** firstGrid = new float*[DIMENSION];
for (int a = 0; a < DIMENSION; ++a) {
firstGrid[a] = new float[DIMENSION];
}
float** secondGrid = new float*[DIMENSION];
for (int b = 0; b < DIMENSION; ++b) {
secondGrid[b] = new float[DIMENSION];
}
float** thirdGrid = new float*[DIMENSION];
for (int c = 0; c < DIMENSION; ++c) {
thirdGrid[c] = new float[DIMENSION];
}
//Populating the arrays
//All points inside first array
for (int i = 0; i < DIMENSION; ++i) {
for (int j = 0; i < DIMENSION; ++j) {
firstGrid[i][j] = 0.0; //exception occurs here
}
}
for (int i = 1; i < DIMENSION - 1; ++i) {
for (int j = 1; i < DIMENSION - 1; ++j) {
firstGrid[i][j] = 50.0;
}
}
//Pre-setting second array
for (int i = 0; i < DIMENSION; ++i) {
for (int j = 0; i < DIMENSION; ++j) {
secondGrid[i][j] = 0.0;
}
}
for (int i = 1; i < DIMENSION - 1; ++i) {
for (int j = 1; i < DIMENSION - 1; ++j) {
secondGrid[i][j] = 50.0;
}
}
//Pre-setting third array
for (int i = 0; i < DIMENSION; ++i) {
for (int j = 0; i < DIMENSION; ++j) {
thirdGrid[i][j] = 0.0;
}
}
for (int i = 1; i < DIMENSION - 1; ++i) {
for (int j = 1; i < DIMENSION - 1; ++j) {
thirdGrid[i][j] = 50.0;
}
}
//Checking and Populating new arrays
for (int p = 1; p < DIMENSION - 1; ++p) {
for (int q = 1; q < DIMENSION - 1; ++p) {
check = abs((firstGrid[p - 1][q] + firstGrid[p][q - 1] + firstGrid[p + 1][q] + firstGrid[p][q + 1]) / 4
- firstGrid[p][q]);
if (check > 0.1) {
secondGrid[p][q] = (firstGrid[p - 1][q] + firstGrid[p][q - 1] + firstGrid[p + 1][q] + firstGrid[p][q + 1]) / 4;
iteration = iteration + 1;
}
}
}
for (int p = 1; p < DIMENSION - 1; ++p) {
for (int q = 1; q < DIMENSION - 1; ++p) {
check = abs((secondGrid[p - 1][q] + secondGrid[p][q - 1] + secondGrid[p + 1][q] + secondGrid[p][q + 1]) / 4
- secondGrid[p][q]);
if (check > 0.1) {
thirdGrid[p][q] = (secondGrid[p - 1][q] + secondGrid[p][q - 1] + secondGrid[p + 1][q] + secondGrid[p][q + 1]) / 4;
newIteration = newIteration + 1;
}
}
}
for (int p = 1; p < DIMENSION - 1; ++p) {
for (int q = 1; q < DIMENSION - 1; ++p) {
check = abs((thirdGrid[p - 1][q] + thirdGrid[p][q - 1] + thirdGrid[p + 1][q] + thirdGrid[p][q + 1]) / 4
- thirdGrid[p][q]);
if (check > 0.1) {
newNewIteration = newNewIteration + 1;
}
}
}
//Deleting arrays and freeing memory
for (int x = 0; x < DIMENSION; ++x) {
delete [] firstGrid[x];
}
delete [] firstGrid;
for (int x = 0; x < DIMENSION; ++x) {
delete [] secondGrid[x];
}
delete [] secondGrid;
for (int x = 0; x < DIMENSION; ++x) {
delete [] thirdGrid[x];
}
delete [] thirdGrid;
//iteration checking
cout << iteration << endl << newIteration << endl << newNewIteration;
if (iteration == 179 || newIteration == 179 || newNewIteration == 179) {
return 0;
}
else {
return 1;
}
}
You should use j consistently in your second for-loop (where the error occurs):
for(j=0; j < DIMENSION; j++)

How can I improve locality of reads and writes in the following code?

I'm working on the following image convolution code:
typedef struct fmatrix{
int rows;
int cols;
float** array;
} fmatrix;
typedef struct image{
unsigned char* data;
int w;
int h;
int c;
} image;
typedef struct kernel{
fmatrix* psf;
int divisor;
} kernel;
void convolve_sq(image* src, image* dst, kernel* psf, int pixel){
int size = psf->psf->rows * psf->psf->cols;
float tmp[size];
int n, m; //for psf
int x, y, x0, y0, cur; //for image
y0 = pixel / (src->w * src->c);
x0 = (pixel / src->c) % src->w;
for (n = 0; n < psf->psf->rows; ++n){
for (m = 0; m < psf->psf->cols; ++m){
y = n - (psf->psf->rows / 2);
x = m - (psf->psf->cols / 2);
if ((y + y0) < 0 || (y + y0) >= src->h || (x + x0) < 0 || (x + x0) >= src->w){
tmp[n*psf->psf->rows+m] = 255 * psf->psf->array[n][m];
}
else{
cur = (pixel + y * src->w * src->c + x * src->c);
tmp[n*psf->psf->rows+m] = src->data[cur] * psf->psf->array[n][m]; //misses on read
}
}
}
m = 0;
for (n = 0; n < size; ++n){
m += (int) tmp[n];
}
m /= psf->divisor;
if (m < 0) m = 0;
if (m > 255) m = 255;
dst->data[pixel] = m; //misses on write
}
void convolve_image(image* src, image* dst, kernel* psf){
int i, j, k;
for (i = 0; i < src->h; ++i){
for (j = 0; j < src->w; ++j){
for (k = 0; k < src->c; ++k){
convolve_sq(src, dst, psf, (i * src->w * src->c + j * src->c + k) );
}
}
}
}
Running cachegrind, I've determined two places where there are a substantial number of cache misses, which I've annotated in the code above. For the line marked "misses on read", there were 97,205 D1mr and 97,201 DLmr. For the line marked "misses on write", there were 97,201 D1mw and DLmw. These lines read and write directly to/from the image respectively.
How can I make this code more efficient, in terms of avoiding cache misses?

Segmentation fault in function implementing Ford-Fulkerson

I'm working on a class assignment and I've run into an issue I haven't been able to figure out. I'm implementing the Ford-Fulkerson algorithm using BFS to find max flow. But while trying to set my Residual Capacity matrix to the given capacity, I hit a segmentation fault. In the test code we received, I can see that the original capacity matrix was passed by value by its address, but I have a feeling that in my code I'm not interacting with it the way I think I am? Which leads me to believe that I may have the same issue recurring elsewhere. I worked with gdb and saw that I hit a segmentation fault on this line here in my nested for loop :
resCap[i][j] = *(capacity + i*n + j);
However, nothing I have tried has worked for me though so I am pretty stumped.
void maximum_flow(int n, int s, int t, int *capacity, int *flow)
{
int i, j, resCap[n][n], path[n]; // residual capacity and BFS augmenting path
int min_path = INT_MAX; // min of the augmenting path
// Assign residual capacity equal to the given capacity
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
{
resCap[i][j] = *(capacity + i*n + j);
*(flow + i*n + j) = 0; // no initial flow
}
// Augment path with BFS from source to sink
while (bfs(n, s, t, &(resCap[0][0]), path))
{
// find min of the augmenting path
for (j = t; j != s; j = path[j])
{
i = path[j];
min_path = min(min_path, resCap[i][j]);
}
// update residual capacities and flows on both directions
for (j = t; j != s; j = path[j])
{
i = path[j];
if(*(capacity + i*n + j) > 0)
*(flow + i*n + j) += min_flow_path;
else
*(flow + j*n + i) -= min_flow_path;
resCap[i][j] -= min_flow_path;
resCap[j][i] += min_flow_path;
}
}
}
And here is the test code provided to us in case it is needed:
int main(void)
{ int cap[1000][1000], flow[1000][1000];
int i,j, flowsum;
for(i=0; i< 1000; i++)
for( j =0; j< 1000; j++ )
cap[i][j] = 0;
for(i=0; i<499; i++)
for( j=i+1; j<500; j++)
cap[i][j] = 2;
for(i=1; i<500; i++)
cap[i][500 + (i/2)] =4;
for(i=500; i < 750; i++ )
{ cap[i][i-250]=3;
cap[i][750] = 1;
cap[i][751] = 1;
cap[i][752] = 5;
}
cap[751][753] = 5;
cap[752][753] = 5;
cap[753][750] = 20;
for( i=754; i< 999; i++)
{ cap[753][i]=1;
cap[i][500]=3;
cap[i][498]=5;
cap[i][1] = 100;
}
cap[900][999] = 1;
cap[910][999] = 1;
cap[920][999] = 1;
cap[930][999] = 1;
cap[940][999] = 1;
cap[950][999] = 1;
cap[960][999] = 1;
cap[970][999] = 1;
cap[980][999] = 1;
cap[990][999] = 1;
printf("prepared capacity matrix, now executing maxflow code\n");
maximum_flow(1000,0,999,&(cap[0][0]),&(flow[0][0]));
for(i=0; i<=999; i++)
for(j=0; j<=999; j++)
{ if( flow[i][j] > cap[i][j] )
{ printf("Capacity violated\n"); exit(0);}
}
flowsum = 0;
for(i=0; i<=999; i++)
flowsum += flow[0][i];
printf("Outflow of 0 is %d, should be 10\n", flowsum);
flowsum = 0;
for(i=0; i<=999; i++)
flowsum += flow[i][999];
printf("Inflow of 999 is %d, should be 10\n", flowsum);
printf("End Test\n");
}
This line is likely going to segfault, it does using Clang.
int i, j, resCap[n][n], path[n];
You're declaring a very large array on the stack. Just how big can be seen when you try and allocated it using calloc. Try this instead and don't forget to free it using the same sort of loop.
int **resCap2 = calloc(1, n * sizeof(int *));
assert(resCap2);
for (i = 0; i < n; i++) {
resCap2[i] = calloc(1, n * sizeof(int));
assert(resCap2[i]);
}
This is a lot of space ie
(1000 * sizeof(int*) * (1000 * n * sizeof(int)))

Allocate 3D matrix in one big chunk

I'd like to allocate a 3D matrix in one big chunk. It should be possible to access this matrix in the [i][j][k] fashion, without having to calculate the linearized index every time.
I think it should be something like below, but I'm having trouble filling the ...
double ****matrix = (double ****) malloc(...)
for (int i = 0; i < imax; i++) {
matrix[i] = &matrix[...]
for (int j = 0; j < jmax; j++) {
matrix[i][j] = &matrix[...]
for (int k = 0; k < kmax; k++) {
matrix[i][j][k] = &matrix[...]
}
}
}
For the single allocation to be possible and work, you need to lay out the resulting memory like this:
imax units of double **
imax * jmax units of double *
imax * jmax * kmax units of double
Further, the 'imax units of double **' must be allocated first; you can reorder the other two sections, but it is most sensible to deal with them in the order listed.
You also need to be able to assume that double and double * (and double **, but that's not much of a stretch) are sufficiently well aligned that you can simply allocate the chunks contiguously. That is going to hold OK on most 64-bit systems with type double, but be aware of the possibility that it does not hold on 32-bit systems or for other types than double (basically, the assumption could be problematic when sizeof(double) != sizeof(double *)).
With those caveats made, then this code works cleanly (tested on Mac OS X 10.10.2 with GCC 4.9.1 and Valgrind version valgrind-3.11.0.SVN):
#include <stdio.h>
#include <stdlib.h>
typedef double Element;
static Element ***alloc_3d_matrix(size_t imax, size_t jmax, size_t kmax)
{
size_t i_size = imax * sizeof(Element **);
size_t j_size = imax * jmax * sizeof(Element *);
size_t k_size = imax * jmax * kmax * sizeof(Element);
Element ***matrix = malloc(i_size + j_size + k_size);
if (matrix == 0)
return 0;
printf("i = %zu, j = %zu, k = %zu; sizes: i = %zu, j = %zu, k = %zu; "
"%zu bytes total\n",
imax, jmax, kmax, i_size, j_size, k_size, i_size + j_size + k_size);
printf("matrix = %p .. %p\n", (void *)matrix,
(void *)((char *)matrix + i_size + j_size + k_size));
Element **j_base = (void *)((char *)matrix + imax * sizeof(Element **));
printf("j_base = %p\n", (void *)j_base);
for (size_t i = 0; i < imax; i++)
{
matrix[i] = &j_base[i * jmax];
printf("matrix[%zu] = %p (%p)\n",
i, (void *)matrix[i], (void *)&matrix[i]);
}
Element *k_base = (void *)((char *)j_base + imax * jmax * sizeof(Element *));
printf("k_base = %p\n", (void *)k_base);
for (size_t i = 0; i < imax; i++)
{
for (size_t j = 0; j < jmax; j++)
{
matrix[i][j] = &k_base[(i * jmax + j) * kmax];
printf("matrix[%zu][%zu] = %p (%p)\n",
i, j, (void *)matrix[i][j], (void *)&matrix[i][j]);
}
}
/* Diagnostic only */
for (size_t i = 0; i < imax; i++)
{
for (size_t j = 0; j < jmax; j++)
{
for (size_t k = 0; k < kmax; k++)
printf("matrix[%zu][%zu][%zu] = %p\n",
i, j, k, (void *)&matrix[i][j][k]);
}
}
return matrix;
}
int main(void)
{
size_t i_max = 3;
size_t j_max = 4;
size_t k_max = 5;
Element ***matrix = alloc_3d_matrix(i_max, j_max, k_max);
if (matrix == 0)
{
fprintf(stderr, "Failed to allocate matrix[%zu][%zu][%zu]\n", i_max, j_max, k_max);
return 1;
}
for (size_t i = 0; i < i_max; i++)
{
for (size_t j = 0; j < j_max; j++)
{
for (size_t k = 0; k < k_max; k++)
matrix[i][j][k] = (i + 1) * 100 + (j + 1) * 10 + k + 1;
}
}
for (size_t i = 0; i < i_max; i++)
{
for (size_t j = 0; j < j_max; j++)
{
for (size_t k = k_max; k > 0; k--)
printf("[%zu][%zu][%zu] = %6.0f\n", i, j, k-1, matrix[i][j][k-1]);
}
}
free(matrix);
return 0;
}
Example output (with some boring bits omitted):
i = 3, j = 4, k = 5; sizes: i = 24, j = 96, k = 480; 600 bytes total
matrix = 0x100821630 .. 0x100821888
j_base = 0x100821648
matrix[0] = 0x100821648 (0x100821630)
matrix[1] = 0x100821668 (0x100821638)
matrix[2] = 0x100821688 (0x100821640)
k_base = 0x1008216a8
matrix[0][0] = 0x1008216a8 (0x100821648)
matrix[0][1] = 0x1008216d0 (0x100821650)
matrix[0][2] = 0x1008216f8 (0x100821658)
matrix[0][3] = 0x100821720 (0x100821660)
matrix[1][0] = 0x100821748 (0x100821668)
matrix[1][1] = 0x100821770 (0x100821670)
matrix[1][2] = 0x100821798 (0x100821678)
matrix[1][3] = 0x1008217c0 (0x100821680)
matrix[2][0] = 0x1008217e8 (0x100821688)
matrix[2][1] = 0x100821810 (0x100821690)
matrix[2][2] = 0x100821838 (0x100821698)
matrix[2][3] = 0x100821860 (0x1008216a0)
matrix[0][0][0] = 0x1008216a8
matrix[0][0][1] = 0x1008216b0
matrix[0][0][2] = 0x1008216b8
matrix[0][0][3] = 0x1008216c0
matrix[0][0][4] = 0x1008216c8
matrix[0][1][0] = 0x1008216d0
matrix[0][1][1] = 0x1008216d8
matrix[0][1][2] = 0x1008216e0
matrix[0][1][3] = 0x1008216e8
matrix[0][1][4] = 0x1008216f0
matrix[0][2][0] = 0x1008216f8
…
matrix[2][2][4] = 0x100821858
matrix[2][3][0] = 0x100821860
matrix[2][3][1] = 0x100821868
matrix[2][3][2] = 0x100821870
matrix[2][3][3] = 0x100821878
matrix[2][3][4] = 0x100821880
[0][0][4] = 115
[0][0][3] = 114
[0][0][2] = 113
[0][0][1] = 112
[0][0][0] = 111
[0][1][4] = 125
[0][1][3] = 124
[0][1][2] = 123
[0][1][1] = 122
[0][1][0] = 121
[0][2][4] = 135
…
[2][2][0] = 331
[2][3][4] = 345
[2][3][3] = 344
[2][3][2] = 343
[2][3][1] = 342
[2][3][0] = 341
There is a lot of diagnostic output in the code shown.
This code will work with C89 (and C99 and C11), without requiring support for variable-length arrays or VLAs — though since I declare variables in for loops, the code as written requires C99 or later, but it can easily be fixed to declare the variables outside the for loops and it can then compile with C89.
This can be done with one simple malloc() call in C (not in C++, though, there are no variable length arrays in C++):
void foo(int imax, int jmax, int kmax) {
double (*matrix)[jmax][kmax] = malloc(imax*sizeof(*matrix));
//Allocation done. Now fill the matrix:
for(int i = 0; i < imax; i++) {
for(int j = 0; j < jmax; j++) {
for(int k = 0; k < kmax; k++) {
matrix[i][j][k] = ...
}
}
}
}
Note that C allows jmax and kmax to be dynamic values that are only known at runtime. That is the ability that's missing in C++, which makes C arrays much more powerful than their C++ counterpart.
The only drawback of this approach, as WhozCraig rightly notes, is that you can't return the resulting matrix as the return value of the function without resorting to a void*. However, you can return it by reference like this:
void foo(int imax, int jmax, int kmax, double (**outMatrix)[jmax][kmax]) {
*outMatrix = malloc(imax*sizeof(**outMatrix));
double (*matrix)[jmax][kmax] = *outMatrix; //avoid having to write (*outMatrix)[i][j][k] everywhere
... //as above
}
This function would need to be called like this:
int imax = ..., jmax = ..., kmax = ...;
double (*myMatrix)[jmax][kmax];
foo(imax, jmax, kmax, &myMatrix);
That way you get full type checking on the inner two dimension sizes even though they are runtime values.
Note: This was intended to be a comment but it got too long, until it turned into a proper answer.
You can't use a single chunk of memory without performing some calculations.
Note that the beginning of each row is marked by the formula
// row_begin is the memory address of the row at index row_idx
row_begin = row_idx * jmax * kmax
And then, each column depends on where the row starts:
// column_begin is the memory address of the column
// at index column_idx of the row starting at row_begin
column_begin = row_begin + column_idx * kmax
Which, using absolute addresses (relative to the matrix pointer, of course) translates to:
column_begin = (row_idx * jmax * kmax) + column_idx * kmax
Finally, getting the k-index of an element is very straightforward, following the previous rule this could turn in an infinite recursion:
// element address = row_address + column_address + element_k_index
element_k_idx = column_begin + element_k_idx
Which translates to
element_k_idx = (row_idx * jmax * kmax) + column_idx * kmax + element_k_idx
This works for me:
void foo(int imax, int jmax, int kmax)
{
// Allocate memory for all the numbers.
// Think of this as (imax*jmax) number of memory chunks,
// with each chunk containing kmax doubles.
double* data_0 = malloc(imax*jmax*kmax*sizeof(double));
// Allocate memory for the previus dimension of pointers.
// This of this as imax number of memory chunks,
// with each chunk containing jmax double*.
double** data_1 = malloc(imax*jmax*sizeof(double*));
// Allocate memory for the previus dimension of pointers.
double*** data_2 = malloc(imax*sizeof(double**));
for (int i = 0; i < imax; i++)
{
data_2[i] = &data_1[i*jmax];
for (int j = 0; j < jmax; j++)
{
data_1[i*jmax+j] = &data_0[(i*jmax+j)*kmax];
}
}
// That is the matrix.
double ***matrix = data_2;
for (int i = 0; i < imax; i++)
{
for (int j = 0; j < jmax; j++)
{
for (int k = 0; k < kmax; k++)
{
matrix[i][j][k] = i+j+k;
}
}
}
for (int i = 0; i < imax; i++)
{
for (int j = 0; j < jmax; j++)
{
for (int k = 0; k < kmax; k++)
{
printf("%lf ", matrix[i][j][k]);
}
printf("\n");
}
}
// Deallocate memory
free(data_2);
free(data_1);
free(data_0);
}

Resources