Allocating dynamic array of structs with dynamic arrays in C - c

I am trying to allocate an array of structs, with each struct also containing dynamic arrays. They will later be communicated via MPI_Sendrecv:
struct cell {
double a, b, c, *aa, *bb;
} *Send_l, *Send_r;
I want Send_l and Send_r to have count number of elements, the arrays aa and bb should contain sAS number of elements. This is all done after MPI_Init.
void allocateForSendRecv(int count) {
int sAS = 5;
int iter = 0;
Send_l = (struct cell *)malloc(count * (sizeof(struct cell)));
for (iter = 0; iter < count; iter++) {
Send_l[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
Send_l[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
}
//sAS-1, as sizeof(struct cell) already contains a single (double) for aa and bb.
Send_r = (struct cell *)malloc(count * (sizeof(struct cell)));
for (iter = 0; iter < count; iter++) {
Send_r[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
Send_r[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
}
}
With this, I can freely allocate, fill and deallocate, however when I call the following, my results diverge from my reference (using all stack arrays).
MPI_Sendrecv(&(Send_r[0]), count, ..., &(Send_l[0]), count, ...)
I haven't found the exact reason, but posts about similar issues made me assume its due to my non-contiguous memory allocation. Ive tried to solve the problem by using a single malloc call, only to get a segmentation fault when I fill my arrays aa and bb:
Send_l = malloc(count * (sizeof(*Send_l)) + count *(sizeof(*Send_l) + 2 * (sAS - 1) * sizeof(double)));
Send_r = malloc(count * (sizeof(*Send_r)) + count *(sizeof(*Send_r) + 2 * (sAS - 1) * sizeof(double)));
I have reused some code to allocate 2D arrays and applied it to this struct problem, but haven't been able to make it work. Am I right in assuming that, with a functioning single malloc call and therefore contiguous memory allocation, my MPI_Sendrecv would work fine? Alternatively, would using MPI_Type_create_struct solve my non-contiguous memory problem?
Minimal example (without MPI) of segmentation fault. Using allocateSendRecv, everything is fine. But the single alloc in allocateInOneSendRecv gives me issues.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
struct cell {
double a, b, c, *aa, *bb;
} *Send_l, *Send_r;
void allocateSendRecv(int count, int sAS);
void fillSendRecv(int count, int sAS);
void freeSendRecv(int count);
void printSendRecv(int count, int sAS);
void allocateInOneSendRecv(int count, int sAS);
int main(int argc, char *argv[])
{
const int count = 2;
const int sAS = 9;
allocateSendRecv(count, sAS);
//allocateInOneSendRecv(count, sAS);
fillSendRecv(count, sAS);
printSendRecv(count, sAS);
freeSendRecv(count);
return 0;
}
void allocateSendRecv(int count, int sAS) {
int iter = 0;
printf("Allocating!\n");
Send_r = (struct cell *)malloc(count * (sizeof(struct cell)));
for (iter = 0; iter < count; iter++) {
Send_r[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
Send_r[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
}
Send_l = (struct cell *)malloc(count * (sizeof(struct cell)));
for (iter = 0; iter < count; iter++) {
Send_l[iter].aa = (double *)malloc((sAS - 1) * sizeof(double));
Send_l[iter].bb = (double *)malloc((sAS - 1) * sizeof(double));
}
}
void allocateInOneSendRecv(int count, int sAS) {
printf("Allocating!\n");
Send_l = malloc(count * (sizeof(*Send_l)) + count *(sizeof(*Send_l) + 2 * (sAS - 1) * sizeof(double)));
Send_r = malloc(count * (sizeof(*Send_r)) + count *(sizeof(*Send_r) + 2 * (sAS - 1) * sizeof(double)));
}
void freeSendRecv(int count) {
int iter = 0;
printf("Deallocating!\n");
free(Send_r);
free(Send_l);
}
void fillSendRecv(int count, int sAS) {
int iter = 0;
int iter2= 0;
double dummyDouble = 5.0;
printf("Filling!\n");
for (iter = 0; iter < count; iter++) {
Send_l[iter].a = dummyDouble;
Send_l[iter].b = dummyDouble;
Send_l[iter].c = dummyDouble;
for (iter2 = 0; iter2 < sAS; iter2++) {
Send_l[iter].aa[iter2] = dummyDouble;
Send_l[iter].bb[iter2] = dummyDouble;
}
dummyDouble++;
Send_r[iter].a = dummyDouble;
Send_r[iter].b = dummyDouble;
Send_r[iter].c = dummyDouble;
for (iter2 = 0; iter2 < sAS; iter2++) {
Send_r[iter].aa[iter2] = dummyDouble;
Send_r[iter].bb[iter2] = dummyDouble;
}
dummyDouble++;
}
}
void printSendRecv(int count, int sAS) {
int iter = 0;
printf("Printing!\n");
for (iter = 0; iter < count; iter++) {
printf("%f \n", Send_l[iter].a);
printf("%f \n", Send_l[iter].b);
printf("%f \n", Send_l[iter].c);
printf("%f \n", Send_l[iter].aa[sAS - 1]);
printf("%f \n\n", Send_l[iter].bb[sAS - 1]);
printf("%f \n", Send_r[iter].a);
printf("%f \n", Send_r[iter].b);
printf("%f \n", Send_r[iter].c);
printf("%f \n", Send_r[iter].aa[sAS - 1]);
printf("%f \n\n", Send_r[iter].bb[sAS - 1]);
}
}

Your current problem is that you can only pass the start address of Send_l (resp. Send_r). From that point, all memory has to be contiguous and you must know its total size and give it later to MPI_SendRecv.
But after allocation, you must ensure that aa and bb members are correctly initialized to point inside the allocated bloc of memory.
A possible code could be:
void allocateSendRecv(int count, int subCount) {
int iter;
// total size of each struct
size_t sz = sizeof(struct cell) + 2 * subCount * sizeof(double);
// one single contiguous allocation
Send_r = malloc(count * sz); // nota: never cast malloc in C language!
// per each cell make aa and bb point into the allocated memory
for (iter = 0; iter < count; iter++) {
Send_r[iter].aa = ((double*)(Send_r + count)) + 2 * subCount * iter;
Send_r[iter].bb = Send_r[iter].aa + subCount;
}
// id. for Send_l
Send_l = malloc(count * sz);
for (iter = 0; iter < count; iter++) {
Send_l[iter].aa = ((double*)(Send_l + count)) + 2 * subCount * iter;
Send_l[iter].bb = Send_l[iter].aa + subCount;
}
}
Here I have first the array of cell structures and then 1 aa array and 1 bb array per structure in that order.
That is enough to get rid of the segmentation fault...

The single global struct
struct cell
{
double a, b, c, *aa, *bb;
} * Send_l, *Send_r;
is a bit fragile:
aa and bb are allocated as arrays of double but the subCount -1 size is not there. It is buried into the code.
Send_l and Send_r are also pointers to arrays of struct cell but the count size is not there. It is also buried into the code. The single struct is global and it is also weak.
This makes hard to test, allocate or free data. I will left a C example using a bit of encapsulation and that you can adapt to your case under MPI. I will use your code and functions with a bit of OOP orientation :)
The example includes 2 programs and functions to serialize and deserialize the data. For testing, the data is written to a file by the 1st program and read back by the second one. The same printSendRecv() shows the data before and after the data is written to disk.
A Cell structure
typedef struct
{
double a;
double b;
double c;
double* aa;
double* bb;
} Cell;
The Send structure
typedef struct
{
Cell l;
Cell r;
} Send;
The Set structure
typedef struct
{
unsigned count;
unsigned subCount;
Send* send;
} Set;
So a Set has all that is needed to describe its contents.
function prototypes
Set* allocateSendRecv(size_t, size_t);
int fillSendRecv(Set*);
Set* freeSendRecv(Set*);
int printSendRecv(Set*, const char*);
Using encapsulation and a bit of RAII from C++ you can rewrite allocateSendRecv() and freeSendRecv() as constructor and destructor of the struct as:
Set* allocateSendRecv(size_t count, size_t subCount)
{
// count is the number of send buffers
// subcount is the size of the arrays inside each cell
printf(
"AllocateSendRecv(count = %llu, subCount = %llu)\n", count,
subCount);
Set* nw = (Set*)malloc(sizeof(Set));
nw->count = count;
nw->subCount = subCount;
nw->send = (Send*)malloc(count * sizeof(Send));
// now that we have Send allocate the Cell arrays
for (size_t i = 0; i < count; i++)
{
nw->send[i].l.aa =
(double*)malloc(subCount * sizeof(double));
nw->send[i].l.bb =
(double*)malloc(subCount * sizeof(double));
nw->send[i].r.aa =
(double*)malloc(subCount * sizeof(double));
nw->send[i].r.bb =
(double*)malloc(subCount * sizeof(double));
}
return nw;
}
Set* freeSendRecv(Set* set)
{
if (set == NULL) return NULL;
printf(
"\nDeallocating(count = %llu, subCount = %llu)\n",
set->count, set->subCount);
for (size_t i = 0; i < set->count; i++)
{
free(set->send[i].l.aa);
free(set->send[i].l.bb);
}
free(set->send);
free(set);
return NULL;
}
Writing this way the tst pointer is invalidated in the call to freeSendRecv(). In this case tst is allocated with count and subCount as 2 and 5 and this goes inside the Set.
fillSendRecv() uses incremental fill values to make it easy to pinpoint some eventual displacement. printSendRecv() accpets a string for an optional message. Values are printed before and after the creation of the Set.
Example: serialize and deserialize a buffer
serialize()
In order to write to disk or to transmit the data first aa and bb arrays must be expanded. The example uses v2-out x y 4 file to create and show a struct using these values and then write if to file
int main(int argc, char** argv)
{
char f_name[256] = {0};
if (argc < 3) usage();
strcpy(f_name, argv[3]);
size_t count = atoll(argv[1]);
size_t subCount = atoll(argv[2]);
Set* tst = allocateSendRecv(count,subCount);
fillSendRecv(tst);
printSendRecv(tst, "printSendRecv(): ");
to_disk(tst, f_name);
tst = freeSendRecv(tst);
return 0;
}
These functions take a Set and write to a file:
int to_disk(Set*, const char*);
int write_cell(Cell*, const size_t, FILE*);
deserialize()
Since the Set has all that is needed to recreate the Set just the file name is needed. The example uses v2-in file to read back the data from file and show it on screen
int main(int argc,char** argv)
{
char f_name[256] = {0};
if (argc < 2) usage();
strcpy(f_name, argv[1]);
Set* tst = from_disk(f_name);
printSendRecv(tst, "As read from disk: ");
tst = freeSendRecv(tst);
return 0;
}
These functions read a file and return a pointer to a Set with the data:
Set* from_disk(const char*);
int read_cell(FILE*, Cell*, const size_t);
output of an example
Here the programs are
v2-out to create a Set and write to a file in disk
v2-in to read a file created by v2-out and load into a new Set
dump.bin is created and Set has count = 2 and subCount = 4
PS C:\SO>
PS C:\SO> .\v2-out 2 4 dump-2-4.bin
AllocateSendRecv(count = 2, subCount = 4)
FillSendRecv()
printSendRecv(): Count is 2, subCount is 4
Set 1 of 2
l:
[a,b,c] = [ 42.001, 42.002, 42.003]
aa: 42.004 42.005 42.006 42.007
bb: 42.008 42.009 42.010 42.011
r:
[a,b,c] = [ 42.012, 42.013, 42.014]
aa: 42.015 42.016 42.017 42.018
bb: 42.019 42.020 42.021 42.022
Set 2 of 2
l:
[a,b,c] = [ 42.023, 42.024, 42.025]
aa: 42.026 42.027 42.028 42.029
bb: 42.030 42.031 42.032 42.033
r:
[a,b,c] = [ 42.034, 42.035, 42.036]
aa: 42.037 42.038 42.039 42.040
bb: 42.041 42.042 42.043 42.044
writing 'Set' to "dump-2-4.bin"
Deallocating(count = 2, subCount = 4)
PS C:\SO> .\v2-in dump-2-4.bin
read 'Set' from "dump-2-4.bin"
From disk: Count = 2, SubCount = 4
AllocateSendRecv(count = 2, subCount = 4)
new 'Set' created
As read from disk: Count is 2, subCount is 4
Set 1 of 2
l:
[a,b,c] = [ 42.001, 42.002, 42.003]
aa: 42.004 42.005 42.006 42.007
bb: 42.008 42.009 42.010 42.011
r:
[a,b,c] = [ 42.012, 42.013, 42.014]
aa: 42.015 42.016 42.017 42.018
bb: 42.019 42.020 42.021 42.022
Set 2 of 2
l:
[a,b,c] = [ 42.023, 42.024, 42.025]
aa: 42.026 42.027 42.028 42.029
bb: 42.030 42.031 42.032 42.033
r:
[a,b,c] = [ 42.034, 42.035, 42.036]
aa: 42.037 42.038 42.039 42.040
bb: 42.041 42.042 42.043 42.044
Deallocating(count = 2, subCount = 4)
The example in 2 files
a header v2.h
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct
{
double a;
double b;
double c;
double* aa;
double* bb;
} Cell;
typedef struct
{
Cell l;
Cell r;
} Send;
typedef struct
{
size_t count;
size_t subCount;
Send* send;
} Set;
Set* allocateSendRecv(size_t, size_t);
int fillSendRecv(Set*);
Set* freeSendRecv(Set*);
int printSendRecv(Set*, const char*);
// helpers
Set* from_disk(const char*);
double get_next(void);
int print_cell(Cell*, size_t, const char*);
int read_cell(FILE*, Cell*, const size_t);
int to_disk(Set*, const char*);
int write_cell(Cell*, const size_t, FILE*);
code in file v2.c
#include "v2.h"
#include <stdio.h>
#pragma pack(show)
Set* allocateSendRecv(size_t count, size_t subCount)
{
// count is the number of send buffers
// subcount is the size of the arrays inside each cell
printf(
"AllocateSendRecv(count = %llu, subCount = %llu)\n", count,
subCount);
Set* nw = (Set*)malloc(sizeof(Set));
nw->count = count;
nw->subCount = subCount;
nw->send = (Send*)malloc(count * sizeof(Send));
// now that we have Send allocate the Cell arrays
for (size_t i = 0; i < count; i++)
{
nw->send[i].l.aa =
(double*)malloc(subCount * sizeof(double));
nw->send[i].l.bb =
(double*)malloc(subCount * sizeof(double));
nw->send[i].r.aa =
(double*)malloc(subCount * sizeof(double));
nw->send[i].r.bb =
(double*)malloc(subCount * sizeof(double));
}
return nw;
}
int fillSendRecv(Set* s)
{
printf("FillSendRecv()\n");
if (s == NULL) return -1;
for (size_t i = 0; i < s->count; i += 1)
{
// l
s->send[i].l.a = get_next();
s->send[i].l.b = get_next();
s->send[i].l.c = get_next();
for (size_t j = 0; j < s->subCount; j += 1)
s->send[i].l.aa[j] = get_next();
for (size_t j = 0; j < s->subCount; j += 1)
s->send[i].l.bb[j] = get_next();
// r
s->send[i].r.a = get_next();
s->send[i].r.b = get_next();
s->send[i].r.c = get_next();
for (size_t j = 0; j < s->subCount; j += 1)
s->send[i].r.aa[j] = get_next();
for (size_t j = 0; j < s->subCount; j += 1)
s->send[i].r.bb[j] = get_next();
}
return 0;
}
Set* freeSendRecv(Set* set)
{
if (set == NULL) return NULL;
printf(
"\nDeallocating(count = %llu, subCount = %llu)\n",
set->count, set->subCount);
for (size_t i = 0; i < set->count; i++)
{
free(set->send[i].l.aa);
free(set->send[i].l.bb);
}
free(set->send);
free(set);
return NULL;
}
int printSendRecv(Set* s, const char* msg)
{
if (s == NULL) return -1;
if (msg != NULL) printf("%s", msg);
printf(
" Count is %llu, subCount is %llu\n", s->count,
s->subCount);
for (size_t i = 0; i < s->count; i += 1)
{
printf("\tSet %llu of %llu\n", 1 + i, s->count);
print_cell(&s->send[i].l, s->subCount, "\tl:\n");
print_cell(&s->send[i].r, s->subCount, "\tr:\n");
printf("\n");
}
printf("\n");
return 0;
}
// helpers
Set* from_disk(const char* file)
{
printf("read 'Set' from \"%s\"\n", file);
FILE* in = fopen(file, "rb");
if (in == NULL) return NULL;
size_t res = 0;
size_t count = 0;
res = fread(&count, sizeof(count), 1, in);
size_t subCount = 0;
res = fread(&subCount, sizeof(subCount), 1, in);
printf("From disk: Count = %llu, SubCount = %llu\n",
count,subCount);
Set* nw = allocateSendRecv(count, subCount);
if (nw == NULL)
{
fclose(in);
return NULL; // could not alloc
}
printf("new 'Set' created\n");
nw->count = count;
nw->subCount = subCount;
// so we have the exact structure to hold ALL data
for (size_t i = 0; i < nw->count; i += 1)
{
read_cell(in, &nw->send[i].l, nw->subCount);
read_cell(in, &nw->send[i].r, nw->subCount);
}
fclose(in);
return nw;
}
double get_next(void)
{
static double ix = 42.;
ix += .001;
return ix;
}
int print_cell(Cell* cell, size_t sz, const char* msg)
{
printf(
"%s\t[a,b,c] = [%10.3f,%10.3f,%10.3f]\n", msg,
cell->a, cell->b, cell->c);
printf("\taa: ");
for (size_t j = 0; j < sz; j += 1)
printf("%10.3f ", cell->aa[j]);
printf("\n\tbb: ");
for (size_t j = 0; j < sz; j += 1)
printf("%10.3f ", cell->bb[j]);
printf("\n\n");
return 0;
}
int read_cell(FILE* in, Cell* cell, const size_t size)
{
if (in == NULL) return -2;
if (cell == NULL) return -1;
size_t res = 0;
// a,b,c,aa,bb
res += fread(&cell->a, 1, 3 * sizeof(double), in);
res += fread(cell->aa, 1, size * sizeof(double), in);
res += fread(cell->bb, 1, size * sizeof(double), in);
return 0;
}
int to_disk(Set* set, const char* file)
{
printf("writing 'Set' to \"%s\"\n", file);
FILE* out = fopen(file, "wb");
if (out == NULL) return -1;
size_t res = 0;
res = fwrite(&set->count, sizeof(set->count), 1, out);
res = fwrite(&set->subCount, sizeof(set->subCount), 1, out);
for (size_t i = 0; i < set->count; i += 1)
{
write_cell(&set->send[i].l, set->subCount, out);
write_cell(&set->send[i].r, set->subCount, out);
}
fclose(out);
return 0;
}
int write_cell(Cell* cell, const size_t size, FILE* out)
{
if (cell == NULL) return -1;
if (out == NULL) return -2;
size_t res = 0;
// a,b,c, aa, bb
res += fwrite(&cell->a, 1, 3 * sizeof(double), out);
res += fwrite(cell->aa, 1, size * sizeof(double), out);
res += fwrite(cell->bb, 1, size * sizeof(double), out);
//printf("write_cell(): %llu bytes written to disk\n", res);
return 0;
}
main() for the 2 examples is above in text
casting the return for malloc()
Yes, I always cast the return of malloc() as I and many others do no like anything implicit. And also because malloc() accepts any expression that evaluates to a size an lloking at the expression not always say something about the area. Many times the program allocates data for many structures, some enclosed. This little program has 3. So using the cast works as a reminder for the programmmers of what the program intends to allocate, and can avoid many bugs, since the expression many times is not sufficient to show what is what.
This thing about malloc() and cast comes from the C-FAQ, an old never-updated thing that is a compilation of articles from usenet all dating before 2000. And even in that time people wrote there about the possible reasons to CAST the pointer.
One of the reason pro-casting in the (C-FAQ)[https://c-faq.com/malloc/sd3.html] is that it could alert the programmer for have forgotten to use an include for stdlib.h. I mean it:
Suppose that you call malloc but forget to #include <stdlib.h>.
The compiler is likely to assume that malloc is a function
returning int, which is of course incorrect, and will lead to trouble
Therefore, the seemingly redundant casts are used by people who are
(a) concerned with portability to all pre-ANSI compilers, or
(b) of the opinion that implicit conversions are a bad thing.
I would add the reason I described above.

You can use anonymous struct but it has some caveats:
#define CELL(n) \
struct { \
double a, b, c, aa[n], bb[n]; \
}
the limitations are you need to cannot use global variables as is, and you have to pass void * to subroutines (and then cast inside the body). If you need global variables, you can only use pointers declared as void *
For example
#include <stdio.h>
#include <stdlib.h>
#define CELL(n) \
struct { \
double a, b, c, aa[n], bb[n]; \
}
void * Send_r;
void * Send_l;
void * allocateCells(int count, int sAS) {
return malloc (count * sizeof(CELL(sAS))); // no cast here
}
void fillCells(void * _cells, int count, int sAS, double dummyDouble) {
int iter = 0;
int iter2= 0;
printf("Filling!\n");
CELL(sAS) * cells = _cells;
for (iter = 0; iter < count; iter++) {
cells[iter].a = dummyDouble;
cells[iter].b = dummyDouble;
cells[iter].c = dummyDouble;
for (iter2 = 0; iter2 < sAS; iter2++) {
cells[iter].aa[iter2] = dummyDouble;
cells[iter].bb[iter2] = dummyDouble;
}
}
}
void dumpCells(void * _cells, int count, int sAS, char *file) {
FILE *fd = fopen(file, "w");
CELL(sAS) * cells = _cells;
fwrite(cells, sizeof(*cells), count, fd);
fclose(fd);
}
int main(int argc, char *argv[]) {
int sAS = 5;
int count1 = 10;
Send_r = allocateCells(count1, sAS);
fillCells(Send_r, count1, sAS, 5.0);
dumpCells(Send_r, count1, sAS, "1.bin");
int sAS2 = 20;
int count2 = 30;
Send_l = allocateCells(count2, sAS2);
fillCells(Send_l, count2, sAS2, 6.0);
dumpCells(Send_l, count2, sAS2, "2.bin");
}

Related

double** pointer being realloc'd was not allocated

I have to implement a clustering algorithm, after loading the dataset, I go to check for each point in which cluster it can be inserted. If points cannot be inserted into any cluster, I have to move them from the dataset and insert them into the retained set. Since I do not know a priori the size of the retained set, I allocate an area of memory initially equal to 0 and that is incremented by the bytes size needed to hold a point each time I have to insert a point into the retained set.
It works for some iterations (4 to be precise) and then stops
This is what I try:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <malloc/malloc.h>
#include <float.h>
#include <stdbool.h>
double **load_dataset(char *filename, int d, int chunk_size);
int assign_point_to_cluster(double **clusters, int **set, double **retained_set, double *point,double *standard_deviation,
int d, int k, int *chunk_size, int p_in_r);
int find_candidate_cluster(double **clusters, double *point, double *std_deviation, int d, int k);
double mean(const double *vector, int d);
double mahalanobis(const double *cluster, const double *point, const double *standard_deviation, int d);
void compute_std_dev(const double *cluster, double *standard_deviation_vector, int d);
int inizialize_cluster(double **dataset, int d, int k, double **clusters, int **set, int chunk_size, bool retain);
double compute_sum_of_euclidean_distance(double **center_points, double *point, int n, int d);
void feature_scaling(double **dataset, int d, int chunk_size);
int main(int argc, char **argv) {
if(argc < 6){
printf("Error parameters! Usage: ./main <input_file> <total number of point> <chunk_size> <points_dimension> <cluster_number>");
return 0;
}
char* filename = argv[1];
int d = atoi(argv[4]), k = atoi(argv[5]), chunk_size = atoi(argv[3]), total = atoi(argv[2]);
int k_compressed = 0;
printf("Path: %s\n", filename);
printf("Number of point in set %i\n", total);
printf("chunk size: %i\n", chunk_size);
printf("Dimension of points: %i\n", d);
printf("Number of cluster: %i\n", k);
printf("----------------\n");
double **clusters = malloc(k * sizeof(double *));
double *standard_deviation = malloc(d * sizeof(double));
int **discard_set = malloc(k * sizeof(int *));
double **retained_set = malloc(1);
double * cohesion = malloc(2 * sizeof(double));
double* radius = NULL;
double **mini_cluster = NULL;
double* temp_cluster = NULL;
int** compressed_set = NULL;
double** mini_cluster_temp = NULL;
int p_in_r = 0;
double **dataset = load_dataset(filename, d, chunk_size);
/**
* Rescaling of variables
*/
//feature_scaling(dataset, d, chunk_size); TODO: Something is wrong
/**
* Cluster initialization
*/
if(!clusters || !discard_set || !standard_deviation || !retained_set || !cohesion){
printf("Something went wrong in main(), memory allocation failed!");
exit(1);
}
chunk_size = inizialize_cluster(dataset, d, k, clusters, discard_set, chunk_size, false);
/**
* At this point we are only interested in keeping a "summary" of the data that will be placed within a cluster.
* In dataset we put the id of the points that are added to a cluster, while cluster contains the statistics
* useful to perform clustering
**/
/**
* We start processing the points the (CHUNK - 1)eighth point in the dataset is assigned to the cluster if the
* mahalanobis distance is less than a threshold and if it is the closest.
* Clusetering dataset -> discard_set
*/
while (chunk_size > 0) {
p_in_r += assign_point_to_cluster(clusters, discard_set, retained_set, dataset[chunk_size - 1], standard_deviation, d, k, &chunk_size, p_in_r);
/**
* always working on the last element of the dataset, it is not necessary to move the list of points,
* just delete the last element
*/
free(dataset[chunk_size]);
dataset[chunk_size] = NULL;
dataset = realloc(dataset, chunk_size * sizeof(double *));
if(dataset == NULL){
printf("Something went wrong in main(), memory allocation failed!");
exit(1);
}
}
free(dataset);
dataset = NULL;
return 0;
}
int inizialize_cluster(double **dataset, int d, int k, double ** clusters, int** set, int chunk_size, bool retain) {
double ** center_point = malloc(k * sizeof(double *));
for (int i = 0; i < k; i++) {
center_point[i] = malloc((d + 1) * sizeof(double));
if(center_point[i] == NULL){
printf("Something went wrong in inizialize_cluster(), memory allocation failed!");
exit(1);
}
}
/**
* The point representing the center of the first cluster is chosen as the first point in the dataset
**/
memcpy(*center_point, *dataset, (d + 1) * sizeof(double));
/**
* The first point can be removed from the dataset or
* in case we are working on the retained set, move it to the end.
**/
chunk_size--;
if(retain){
double* temp = malloc(sizeof(double *));
memcpy(temp, dataset, sizeof(double *));
memcpy(dataset, dataset+1, chunk_size * sizeof(double *));
memcpy(dataset+chunk_size-1, temp, sizeof(double *));
/*for (int i = 0; i < CHUNK; ++i) {
printf("id[%i]: %f", dataset[i][0]);
}*/
}
else{
free(dataset[0]);
memcpy(dataset, dataset+1, chunk_size * sizeof(double *));
dataset[chunk_size] = NULL;
dataset = realloc(dataset, chunk_size * sizeof(double *));
if(dataset == NULL){
printf("Something went wrong in inizialize_cluster(), memory allocation failed!");
exit(1);
}
}
/**
* The centers of the next clusters are chosen as those that are furthest apart
**/
double max;
int pos;
double distance;
for (int i = 1; i < k; i++) {
/**
* I choose the point that maximizes the sum of the distances from the centerpieces
*/
max = -1;
for (int j = 0; j < chunk_size; j++){
distance = compute_sum_of_euclidean_distance(center_point, dataset[j], i, d);
if (distance > max) {
pos = j;
max = distance;
}
}
memcpy(*(center_point + i), *(dataset + pos), (d + 1) * sizeof(double));
/**
* When a point is chosen as the center of a cluster, I remove it from the dataset
**/
chunk_size--;
if(retain){
double** temp = malloc(sizeof(double *));
memcpy(temp, dataset + pos, sizeof(double *));
memcpy(dataset + pos, dataset + pos + 1, (chunk_size - pos) * sizeof(double *));
memcpy(dataset + chunk_size - 1, temp, sizeof(double *));
}
else{
free(dataset[pos]);
memcpy(dataset + pos, dataset + pos + 1, (chunk_size - pos) * sizeof(double *));
dataset = realloc(dataset, chunk_size * sizeof(double *));
if(dataset == NULL){
printf("Something went wrong in inizialize_cluster(), memory allocation failed!");
exit(1);
}
}
}
/**
* When I have found k points that can be used as the initial centres of the k clusters,
* I summarize them (calculate cluster statistics) and enter them into the discard set.
*/
for (int i = 0; i < k; i++) {
/**
* Cluster and discard set initialization
*/
clusters[i] = malloc(((2 * d) + 1) * sizeof(double));
set[i] = malloc(sizeof(int ));
if(clusters[i] == NULL || set[i] == NULL){
printf("Something went wrong in in inizialize_cluster(), memory allocation failed!");
exit(1);
}
clusters[i][0]=1;
set[i][0] = (int) center_point[i][0];
for (int j = 1; j < d + 1; j++) {
clusters[i][j] = center_point[i][j];
clusters[i][j + d] = pow(center_point[i][j], 2);
}
free(center_point[i]);
center_point[i] = NULL;
}
free(center_point);
center_point = NULL;
return chunk_size;
}
double **load_dataset(char *filename, int d, int chunk_size) {
double **dataset = malloc(chunk_size * sizeof(double *));
if(dataset == NULL){
printf("Something went wrong in load_dataset(), memory allocation failed!");
exit(1);
}
for (int i = 0; i < chunk_size; i++) {
dataset[i] = malloc((d + 1) * sizeof(double));
if(dataset[i] == NULL){
printf("Something went wrong in load_dataset(), memory allocation failed!");
exit(1);
}
}
FILE *file;
file=fopen(filename, "r");
if (file == NULL){
printf("Something went wrong in load_dataset(), file opening failed! (row 162)");
exit(1);
}
char *line = NULL, *token;
size_t len = 0;
int i = 0;
int j = 0;
int first_line = 0;
while ((getline(&line, &len, file)) != -1 && i < chunk_size) {
if(first_line != 0) {
while ((token = strsep(&line, ",")) != NULL) {
dataset[i][j] = atof(token);
j++;
}
j = 0;
i++;
} else{
first_line = 1;
}
}
fclose(file);
return dataset;
}
int assign_point_to_cluster(double **clusters, int **set, double **retained_set, double *point,double *standard_deviation,
int d, int k, int *chunk_size, int p_in_r) {
/**
* For each point I assess which cluster it can go into
*/
int candidate;
candidate = find_candidate_cluster(clusters, point, standard_deviation, d, k);
/**
* After identifying the candidate cluster (if there is one), I add the point to the discard set and update the
* cluster statistics otherwise I go ahead and put the point in the retained set
*/
(*chunk_size)--;
if(candidate > -1){
/**
* I add the point to the discard/compressed set
*/
clusters[candidate][0]++;
set[candidate] = realloc(set[candidate], (unsigned long)clusters[candidate][0] * sizeof(int));
if(set[candidate] == NULL){
printf("Something went wrong in in assign_point_to_cluster(), memory allocation failed!");
exit(1);
}
set[candidate][(int) clusters[candidate][0] - 1] = (int) point[0];
/**
* I update the cluster statistics
*/
for (int i = 1; i < d + 1; i++) {
clusters[candidate][i] += point[i];
clusters[candidate][i + d] += pow(point[i], 2);
}
}
else if(retained_set){
/**
* I insert the point in the retained set
*/
p_in_r++;
retained_set = realloc(retained_set, p_in_r * sizeof(double *));
retained_set[p_in_r - 1] = malloc((d + 1) * sizeof(double));
memcpy(*(retained_set + p_in_r - 1), point, (d + 1) * sizeof(double ));
return 1;
}
return 0;
}
int find_candidate_cluster(double **clusters, double *point, double *std_deviation, int d, int k) {
double actual = DBL_MAX;
int candidate = -1;
double threshold;
double distance;
for (int j = 0; j < k; j++) {
/**
* Calculation of varainza,threshold and mahalanobis' distance
*/
compute_std_dev(clusters[j], std_deviation, d);
//TODO: Would it be okay as a threshold? An alternative could be the module?
threshold = 3.5 * mean(std_deviation, d);
distance = mahalanobis(clusters[j], point, std_deviation, d);
if(distance < threshold && distance < actual){
/**
* the cluster is a candidate for the point
*/
candidate = j;
actual = distance;
}
}
return candidate;
}
double mean(const double *vector, int d) {
double sum = 0;
for (int i = 0; i < d; ++i) {
sum += vector[i];
}
return sum/d;
}
void compute_std_dev(const double *cluster, double *standard_deviation_vector, int d) {
double sigma;
/**
* Vector of the variances of the components of the cluster elements
*/
for (int i = 0; i < d; i++) {
sigma = sqrt(fabs(cluster[i + 1 + d]/cluster[0] - pow(cluster[i + 1]/cluster[0], 2)));
if( sigma == 0)
sigma = 1;
standard_deviation_vector[i] = sigma;
}
}
double mahalanobis(const double *cluster, const double *point, const double *standard_deviation, int d) {
double distance=0;
for (int i = 1; i < d; ++i) {
distance += pow((point[i] - cluster[i]) / standard_deviation[i - 1], 2);
}
return sqrt(distance)/d; //TODO: can it be okay? I thought so since the threshold is the average of the st.dev.
}
double compute_sum_of_euclidean_distance(double **center_points, double *point, int n, int d) {
double component_sum = 0;
double final_sum = 0;
for (int i = 0; i < n; i++) {
for (int j = 1; j < d + 1; j++){
component_sum += pow(center_points[i][j] - point[j], 2);
}
final_sum += sqrt(component_sum);
}
return final_sum;
}
void feature_scaling(double **dataset, int d, int chunk_size) {
/**
* We perform a Z-score Normalization
**/
double mean;
double sigma;
double sum;
double sumQ;
double variance;
/**
* We calculate mean and variance for each column
**/
for (int i = 1; i < d + 1; i++) {
sum = 0;
for (int j = 0; j < chunk_size; j++) {
sum += dataset[j][i];
}
mean = sum / chunk_size;
sumQ = 0;
for (int j = 0; j < chunk_size; j++) {
sumQ += pow((dataset[j][i] - mean), 2);
}
variance = sumQ / chunk_size;
sigma = sqrt(variance);
if( sigma == 0)
sigma = 1;
/**
* Feature scaling: (x-x_med)/sigma
**/
for (int j = 0; j < chunk_size; j++) {
dataset[j][i] = (dataset[j][i] - mean) / sigma;
}
}
}
The command I use when run is:
./main "db.csv" 100 35 4 3
It works if the 3rd argument is less then 34
The file db.csv contains:
CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100),cluster
1,0,19,15,39,4
2,0,21,47,81,3
3,1,20,56,6,4
4,1,23,16,77,3
5,1,31,17,40,4
6,1,22,17,76,3
7,1,35,18,6,4
8,1,23,18,94,3
9,0,64,19,3,4
10,1,30,19,72,3
11,0,67,19,14,4
12,1,35,19,99,3
13,1,58,20,15,4
14,1,24,20,77,3
15,0,37,20,13,4
16,0,22,20,79,3
17,1,35,21,35,4
18,0,20,21,66,3
19,0,52,23,29,4
20,1,35,23,98,3
21,0,35,24,35,4
22,0,25,24,73,3
23,1,46,25,5,4
24,0,31,25,73,3
25,1,54,28,14,4
26,0,29,28,82,3
27,1,45,28,32,4
28,0,35,28,61,3
29,1,40,29,31,4
30,1,23,29,87,3
31,0,60,30,4,4
32,1,21,30,73,3
33,0,53,33,4,4
34,0,18,33,92,3
35,1,49,33,14,4
36,1,21,33,81,3
37,1,42,34,17,4
38,1,30,34,73,3
39,1,36,37,26,4
40,1,20,37,75,3
41,1,65,38,35,0
42,0,24,38,92,3
43,0,48,39,36,0
44,1,31,39,61,5
45,1,49,39,28,4
46,1,24,39,65,3
47,1,50,40,55,0
48,1,27,40,47,5
49,1,29,40,42,5
50,1,31,40,42,5
51,1,49,42,52,0
52,0,33,42,60,5
53,1,31,43,54,5
54,0,59,43,60,0
55,1,50,43,45,0
56,0,47,43,41,0
57,1,51,44,50,0
58,0,69,44,46,0
59,1,27,46,51,5
60,0,53,46,46,0
61,0,70,46,56,0
62,0,19,46,55,5
63,1,67,47,52,0
64,1,54,47,59,0
65,0,63,48,51,0
66,0,18,48,59,5
67,1,43,48,50,0
68,1,68,48,48,0
69,0,19,48,59,5
70,1,32,48,47,5
71,0,70,49,55,0
72,1,47,49,42,0
73,1,60,50,49,0
74,1,60,50,56,0
75,0,59,54,47,0
76,0,26,54,54,5
77,1,45,54,53,0
78,0,40,54,48,5
79,1,23,54,52,5
80,1,49,54,42,0
81,0,57,54,51,0
82,0,38,54,55,5
83,0,67,54,41,0
84,1,46,54,44,0
85,1,21,54,57,5
86,0,48,54,46,0
87,1,55,57,58,0
88,1,22,57,55,5
89,1,34,58,60,5
90,1,50,58,46,0
91,1,68,59,55,0
92,0,18,59,41,5
93,0,48,60,49,0
94,1,40,60,40,5
95,1,32,60,42,5
96,0,24,60,52,5
97,1,47,60,47,0
98,1,27,60,50,5
99,0,48,61,42,0
100,0,20,61,49,5
101,1,23,62,41,5
102,1,49,62,48,0
103,0,67,62,59,0
104,0,26,62,55,5
105,0,49,62,56,0
106,1,21,62,42,5
107,1,66,63,50,0
108,0,54,63,46,0
109,0,68,63,43,0
110,0,66,63,48,0
111,0,65,63,52,0
112,1,19,63,54,5
113,1,38,64,42,5
114,0,19,64,46,5
115,1,18,65,48,5
116,1,19,65,50,5
117,1,63,65,43,0
118,1,49,65,59,0
119,1,51,67,43,0
120,1,50,67,57,0
121,0,27,67,56,5
122,1,38,67,40,5
123,1,40,69,58,5
124,0,39,69,91,1
125,1,23,70,29,5
126,1,31,70,77,1
127,0,43,71,35,2
128,0,40,71,95,1
129,0,59,71,11,2
130,0,38,71,75,1
131,0,47,71,9,2
132,0,39,71,75,1
133,1,25,72,34,5
134,1,31,72,71,1
135,0,20,73,5,2
136,1,29,73,88,1
137,1,44,73,7,2
138,0,32,73,73,1
139,0,19,74,10,2
140,1,35,74,72,1
141,1,57,75,5,2
142,0,32,75,93,1
143,1,28,76,40,5
144,1,32,76,87,1
145,0,25,77,12,2
146,0,28,77,97,1
147,0,48,77,36,2
148,1,32,77,74,1
149,1,34,78,22,2
150,0,34,78,90,1
151,0,43,78,17,2
152,0,39,78,88,1
153,1,44,78,20,2
154,1,38,78,76,1
155,1,47,78,16,2
156,1,27,78,89,1
157,0,37,78,1,2
158,1,30,78,78,1
159,0,34,78,1,2
160,1,30,78,73,1
161,1,56,79,35,2
162,1,29,79,83,1
163,0,19,81,5,2
164,1,31,81,93,1
165,0,50,85,26,2
166,1,36,85,75,1
167,0,42,86,20,2
168,1,33,86,95,1
169,1,36,87,27,2
170,0,32,87,63,1
171,0,40,87,13,2
172,0,28,87,75,1
173,0,36,87,10,2
174,0,36,87,92,1
175,1,52,88,13,2
176,1,30,88,86,1
177,0,58,88,15,2
178,0,27,88,69,1
179,0,59,93,14,2
180,0,35,93,90,1
181,1,37,97,32,2
182,1,32,97,86,1
183,0,46,98,15,2
184,1,29,98,88,1
185,1,41,99,39,2
186,0,30,99,97,1
187,1,54,101,24,2
188,0,28,101,68,1
189,1,41,103,17,2
190,1,36,103,85,1
191,1,34,103,23,2
192,1,32,103,69,1
193,0,33,113,8,2
194,1,38,113,91,1
195,1,47,120,16,2
196,1,35,120,79,1
197,1,45,126,28,2
198,0,32,126,74,1
199,0,32,137,18,2
200,0,30,137,83,1
download it from mega: db.csv.
Originally found on Kaggle but I made some modifications.
Edit: I included the whole code
Edit: I alse get this error trying to see what is in retained_set : read memory from 0x3d2fdfcb8030 failed (0 of 8 bytes read)
Edit: I translate the comment in the code and added the file I use as input
assign_point_to_cluster has a local variable double **retained_set. This means that you cannot do retained_set = realloc(retained_set, ... or you will just change where that local variable points at, not where the pointer-to-pointer on the caller side points at. And because of that you also create a memory leak. See this FAQ: Dynamic memory access only works inside function
As for how to solve it, it appears that encapsulating all of this data into structs would simplify the program a lot. You could also implement it as an "opaque type" (How to do private encapsulation in C?) and get rid of the caller's responsibility to handle dynamic allocation.
Using 2D arrays instead of pointer-to-pointers might also simplify the program and improve performance. For example if you could use a "pointer to array pointer" parameter double (**retained_set)[x][y]) then you could do double (*tmp)[x][y] = realloc(*retained_set,...) and then *retained_set = tmp;, which would affect the caller. But structs would be easier to read so that should be the first option.
Also note that malloc.h has been obsolete since forever. difference between <stdlib.h> and <malloc.h>

sort array in C, return sorted indices

I'm using an example from https://phoxis.org/2012/07/12/get-sorted-index-orderting-of-an-array/ where he returns the sort indices from a sort of an array, i.e.
3,4,2,6,8 returns 4,3,1,0,2 (+1 for each index in R). This is the equivalent of R's order function
I've translated his/her code to work as a function returning an array of sorted indices. The code gives the correct answer.
keeping track of the original indices of an array after sorting in C has a similar response, but as #BLUEPIXY warns, his solution doesn't work in all circumstances. I need something that will work in all circumstances, including ties.
however, the original author uses a global pointer, which causes a memory leak, and free() doesn't fix it. which I don't know how to do this without the global pointer.
How can I fix this memory leak, or at least return sorted indices in C that will always work?
#include <stdio.h>
#include <stdlib.h>
/* holds the address of the array of which the sorted index
* order needs to be found
*/
int * base_arr = NULL;
/* Note how the compare function compares the values of the
* array to be sorted. The passed value to this function
* by `qsort' are actually the `idx' array elements.
*/
static int compar_increase (const void * a, const void * b) {
int aa = *((int * ) a), bb = *((int *) b);
if (base_arr[aa] < base_arr[bb]) {
return 1;
} else if (base_arr[aa] == base_arr[bb]) {
return 0;
} else {
// if (base_arr[aa] > base_arr[bb])
return -1;
}
}
int * order_int (const int * ARRAY, const size_t SIZE) {
int * idx = malloc(SIZE * sizeof(int));
base_arr = malloc(sizeof(int) * SIZE);
for (size_t i = 0; i < SIZE; i++) {
base_arr[i] = ARRAY[i];
idx[i] = i;
}
qsort(idx, SIZE, sizeof(int), compar_increase);
free(base_arr); base_arr = NULL;
return idx;
}
int main () {
const int a[] = {3,4,2,6,8};
int * b = malloc(sizeof(int) * sizeof(a) / sizeof (*a));
b = order_int(a, sizeof(a) / sizeof(*a));
for (size_t i = 0; i < sizeof(a)/sizeof(*a); i++) {
printf("b[%lu] = %d\n", i, b[i]+1);
}
free(b); b = NULL;
return 0;
}
A straightforward approach without using a global variable can look the following way
#include <stdio.h>
#include <stdlib.h>
int cmp_ptr(const void *a, const void *b)
{
const int **left = (const int **)a;
const int **right = (const int **)b;
return (**left < **right) - (**right < **left);
}
size_t * order_int(const int *a, size_t n)
{
const int **pointers = malloc(n * sizeof(const int *));
for (size_t i = 0; i < n; i++) pointers[i] = a + i;
qsort(pointers, n, sizeof(const int *), cmp_ptr);
size_t *indices = malloc(n * sizeof(size_t));
for (size_t i = 0; i < n; i++) indices[i] = pointers[i] - a;
free(pointers);
return indices;
}
int main( void )
{
const int a[] = { 3,4,2,6,8 };
const size_t N = sizeof(a) / sizeof(*a);
size_t *indices = order_int(a, N);
for (size_t i = 0; i < N; i++) printf("%d ", a[indices[i]]);
putchar('\n');
free(indices);
return 0;
}
The program output is
8 6 4 3 2
As for the memory leak then it is due to overwriting the value of the pointer to redundantly allocated memory.
int * b = malloc(sizeof(int) * sizeof(a) / sizeof (*a));
b = order_int(a, sizeof(a) / sizeof(*a));
The memory allocation does not make sense.
The problem I see is that within main function - you are allocating pointer b some memory -
int * b = malloc(sizeof(int) * sizeof(a) / sizeof (*a));
The next line calls order_int(...) that returns a pointer to already allocated memory -
b = order_int(a, sizeof(a) / sizeof(*a));
Looking at the order_int function -
int * order_int (const int * ARRAY, const size_t SIZE) {
int * idx = malloc(SIZE * sizeof(int));
base_arr = malloc(sizeof(int) * SIZE);
for (size_t i = 0; i < SIZE; i++) {
base_arr[i] = ARRAY[i];
idx[i] = i;
}
qsort(idx, SIZE, sizeof(int), compar_increase);
free(base_arr); base_arr = NULL;
return idx;
}
.. you see that idx has been already been allocated the correct memory.
I would suggest removing the malloc from b - see below.
int * b = NULL;

wrong allocate memory matrix C?

I'm developing a program that read from CSV file and calculate score with a method "calculateMLpa". The method receive array of char and array of 10 float, and transform array of float in matrix 3x3. When read the position 3rd number from array, insert in matrix the 4th number and same for 6th number.
I.E.
array value[]={0.000000;123.814934;234.000000;100.000000;166.000000; 203.086639;383.000000;186.000000;338.000000;173.098419 }
array traj[]={"0-0";"0-1";"0-2";"1-0";"1-1";"1-2";"2-0";"2-1";"2-2"}
Xn_val[]={"0","1","2"}
When transform in matrix the result is:
123.814934 234.000000 166.000000
166.000000 203.086639 186.000000
186.000000 338.000000 173.098419
While the expected for [0;2] is 100.000000 and for [1;2]=383.000000, but when print the currently value of traj it's correct.
How can I fix this problem?
The code is all here:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>
#define ARRAYSIZE(x) (sizeof(x)/sizeof(*(x)))
int csv_parse ( char *line, int size )
{
char *p;
char *dp;
int inquote;
int na;
int nTo_comma;
char prevc = ',';
char *list[256];
dp = NULL;
// inquote = 0;
na = 0;
prevc = ';';
nTo_comma=0;
for ( p = line; *p != '\n'; p++ )
{
nTo_comma++;
list[nTo_comma] = p;
if(*p == prevc)
{
printf("%s\t", list);
return na;
}
}
printf("\n");
return na;
}
double calculateMLpa(const char *Xn_val[], char *traj[], float value[], double alphaxixj, double tauxi, int sz, int dim) {
double mlx = 0;
double v;
double alphaxi;
char *state;
int i;
int p;
int j;
int k;
// int sz = sizeof(Xn_val) / sizeof(int);
// int dim = sizeof(traj) / sizeof(char);
double trns[sz][sz];
double m[sz];
char *trat="-";
// m[xi] values: the number of transitions leaving the state xi
printf("%d %d \n",sz,dim);
int cont=0;
for (i = 0; i <= sz; i++) {
m[i] = 0.0;
for (j = 0; j <= sz; j++) {
v = 0.0;
int newlength = strlen(Xn_val[i])+strlen(trat)+strlen(Xn_val[j])+1;
state = malloc(sizeof(char)*newlength);
if(state != NULL){
state[0] = '\0';
strcat(state,Xn_val[i]);
strcat(state,trat);
strcat(state,Xn_val[j]);
printf("%s ",state);
}else {
printf(stderr,"malloc failed!\n");
}
// for (k=0; k<=dim;++k){
if (traj[cont] != NULL ){
if (strcmp(traj[cont],state)==0){
v = value[cont+1];
printf("%f \n",v);
}
}
trns[i][j] = v;
printf("%f - \n",trns[i][j]);
if (strcmp(Xn_val[i],Xn_val[j])!=0)
m[i] = m[i] + v;
cont++;
}
}
for (i=0;i<=sz;++i){
for(j=0;j<=sz;++j){
printf("%f ",trns[i][j]);
}
printf("\n");
}
for (p=0;p<=sz;++p){
printf("%f - \n",m[p]);
}
printf("%f %f\n",trns[0][1],trns[0][2]);
alphaxi = alphaxixj * (((double) sz) - 1.0);
alphaxi = alphaxixj;
printf("%d ",sz);
for (i = 0; i <= sz; i++) {
for (j = 0; j <= sz; j++) {
// xi!=xj
if (strcmp(Xn_val[i], Xn_val[j])!=0) {
mlx = mlx + lgamma(alphaxixj + trns[i][j]) - lgamma(alphaxixj);
}
// xi
else {
mlx = mlx + lgamma(alphaxi) - lgamma(alphaxi + m[i]);
mlx = mlx + lgamma(alphaxi + m[i] + 1.0)+ (alphaxi + 1.0) * log(tauxi);
mlx = mlx - lgamma(alphaxi + 1.0)- (alphaxi + m[i] + 1.0) * log(tauxi + trns[i][j]);
}
}
}
return (mlx);
}
#define MAXFLDS 200 /* maximum possible number of fields */
#define MAXFLDSIZE 32 /* longest possible field + 1 = 31 byte field */
void parse(char *record, char *delim, char arr[][MAXFLDSIZE], int *fldcnt) {
char*p = strtok(record, delim);
int fld = 0;
while (p) {
strcpy(arr[fld], p);
fld++;
p = strtok('\0', delim);
}
*fldcnt = fld;
}
void main() {
printf("inizio\n");
FILE *pf;
int N=20;
bool first=true;
const char *a[]={"0","1","2"};
char *traject[]={"0-0","0-1","0-2","1-0","1-1","1-2","2-0","2-1","2-2"};
double bs=0;
char *trat="-";
pf=fopen("//home//user//prova.csv","r");
float array[10][10];
float *t;
char *str= "hello";
char *state;
t = (float *)malloc(N * sizeof(float));
int f=0;
if (pf)
{
size_t i, j, k;
char buffer[BUFSIZ], *ptr;
/*
* Read each line from the file.
*/
for ( i = 0; fgets(buffer, sizeof buffer, pf); ++i )
{
/*
* Parse the comma-separated values from each line into 'array'.
*/
for ( j = 0, ptr = buffer; j < ARRAYSIZE(*array); ++j, ++ptr )
{
array[i][j] = strtof(ptr, &ptr);
}
}
fclose(pf);}
else /* fopen() returned NULL */
{
perror(pf);
}
for(f=0; f<10; ++f){
if(f==0){}
else if(f==1 && array[f][8]==0)
array[f][8]=123.8149353;
t[f]=array[f][8];
//printf("%f \n",t[f]);
}
for (f=0;f<10; ++f){
printf("%f - ",t[f]);
}
//printf("%s, %s, %s \n",a[0],a[1],a[2]);
printf("start\n");
int sz = sizeof(a) / sizeof(char);
int dim = sizeof(traject) / sizeof(char);
printf("%d , %d \n",sz,dim);
bs=calculateMLpa(a,traject,t,1.0,0.1,sz,dim);
printf("done \n");
printf("%f ",bs);
}
EDIT
I try to pass array size
sz=sizeof(a)/sizeof(char)
dim = sizeof(traject) / sizeof(char);
but their value is 24 and 72 respectively, and the execution stops at 0-2 value 100.000000
Arrays passed to functions decay to pointers to the start of the array. So
#define ARRAYSIZE(x) (sizeof(x)/sizeof(*(x)))
Will not return anything meaningful when checking for its size in that case
To fix, pass the Array size as an additional Argument.
One major problem is that when you pass arrays to functions, they decay to pointers, and the sizeof trick you use to get the array size will not work.
You need to pass the actual array sizes as arguments.

Dynamic multidimensional array on the heap

I want to create a function which can allocate a multidimensional array on the heap with only one call to malloc. (Pointer array) So a function call would look like this:
size_t dim[2] = {2, 4};
int **_2darray = alloc_array(sizeof(int), dim, 2);
// ^ should be the "same" as:
int __2darray[2][4];
What I have so far is the SIZE computation of the whole block needed to hold the array and the pointers:
void *alloc_array(size_t element_size, size_t dimensions[static 1], size_t ndims)
{
unsigned char *DATA = NULL;
size_t SIZE = 0;
size_t multiplicators[ndims];
// Calculate for each dimension the multiplier
// SIZE 3d array: (N1 * sizeof(T **) + (N1 * N2 + sizeof(T *) + (N1 * N2 * n3 + sizeof(T))
// ^- first mulitplier ^ second multiplier ^ third multiplier
for (size_t i = 0; i < ndims; ++i) {
multiplicators[i] = dimensions[i];
for (size_t j = 0; j < i; ++j) {
multiplicators[i] *= dimensions[j];
}
}
SIZE = 0;
for (size_t dimI = 0; dimI < ndims; ++dimI) {
size_t mulval = multiplicators[dimI];
// The elements are in the "last" dimension
if (dimI+1 == ndims) {
SIZE += element_size * mulval;
} else {
// All other elements are pointers to the specific element
SIZE += sizeof(void *) * mulval;
}
}
DATA = malloc(SIZE);
return DATA;
}
So by now the SIZE calculation works. But now I'm stuck with setting the pointers to the right element. I know it's easy with dealing with static dimensions but I want this to be done with dynamic dimensions.
#include <stdlib.h>
#include <stdio.h>
void fill_array_pointers (void** pointers, char* elements,
size_t element_size, size_t total_elements_size,
size_t dimensions[], size_t ndims)
{
if (ndims == 2)
{
size_t i;
for (i = 0; i < dimensions[0]; ++i)
{
pointers[i] = elements + i * element_size * dimensions[1];
}
}
else
{
size_t i;
size_t block_size = total_elements_size / dimensions[0];
for (i = 0; i < dimensions[0]; ++i)
{
pointers[i] = pointers + dimensions[0] + i * dimensions[1];
fill_array_pointers (pointers + dimensions[0]
+ i * dimensions[1],
elements + block_size * i,
element_size, block_size,
dimensions+1, ndims-1);
}
}
}
void* alloc_array (size_t element_size, size_t dimensions[],
size_t ndims)
{
size_t total_elements_size = element_size;
int i;
// total size of elements
for (i = 0; i < ndims; ++i)
total_elements_size *= dimensions[i];
// total size of pointers
size_t total_pointers_size = 0;
int mulval = 1;
for (i = 0; i < ndims-1; ++i)
{
total_pointers_size += dimensions[i] * sizeof(void*) * mulval;
mulval *= dimensions[i];
}
size_t total_size = total_pointers_size;
size_t oddball = total_pointers_size % element_size;
// really needs to be alignof but we don't have it
if (oddball) total_size += (element_size - oddball);
total_size += total_elements_size;
void* block = malloc (total_size);
void** pointers = block;
char* elements = (char*)block + total_size - total_elements_size;
fill_array_pointers (pointers, elements, element_size,
total_elements_size, dimensions, ndims);
return block;
}
Test drive:
int main ()
{
size_t dims[] = { 2, 3, 4 };
int*** arr = alloc_array(sizeof(int), dims, 3);
int i, j, k;
for (i = 0; i < dims[0]; ++i)
for (j = 0; j < dims[1]; ++j)
for (k = 0; k < dims[2]; ++k)
{
arr[i][j][k] = i*100+j*10+k;
}
for (i = 0; i < dims[0]*dims[1]*dims[2]; ++i)
{
printf ("%03d ", (&arr[0][0][0])[i]);
}
printf ("\n");
free (arr);
}
This will not work for multidimensional char arrays on systems where sizeof(char*) != sizeof(char**); such systems exist but are rare. Multidimensional char arrays are pointless anyway.
The test runs cleanly under valgrind.
This is more an intellectual exercise than anything else. If you need maximum performance, don't use arrays of pointers, use a flat array and ugly but efficient explicit index calculations. If you need clear and concise code, you are probably better off allocating each level separately.

Using a structure in a recursive function (referenced structure)

I'm having problems understanding how to write code that solves the following problem: I have a structure containing a 2D-array. Then I have a recursive function that take a pointer to the structure as an argument and I want the recursive function to be able to manipulate the structure sent, not a local copy.
The struct is initialized in the function initStruct, where memory for the 2D-array is allocated. The recursive function builds up an array and at a specific point calls a function to insert it into the structure's array.
The code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int** spBasis(int);
void mpBasis(int**, int, int, int, int, int, int, int*, struct mpBasis *, int, int);
void initMpBasis(struct mpBasis *, int, int);
void insertMpState(struct mpBasis *, int *);
struct mpBasis {
int** basis;
int size;
int capacity;
};
int main() {
int a, b, c, d;
char maxE[256];
char noParticles[256];
char P[256];
char M[256];
FILE *fp;
int **spStates;
struct mpBasis *mp;
int mpState[6] = {0, 0, 0, 0, 0, 0};
printf("Input max e for sp states, no of particles, parity (1 for odd and 0 for even) and magnetic projection: ");
gets(maxE);
gets(noParticles);
gets(P);
gets(M);
spStates = spBasis(atoi(maxE));
fp = fopen("spStates.txt", "a+");
fprintf(fp, "E\tj\tl\tm\n");
for (a = 0; a < 330; a++) {
fprintf(fp, "State %d: ", a+1);
for (b = 0; b < 4; b++) {
fprintf(fp, "%d\t", spStates[a][b]);
}
fprintf(fp, "\n");
}
mp = malloc(sizeof(struct mpBasis));
initMpBasis(mp, 5449, 6);
for (c = 0; c < 5449; c++) {
for (d = 0; d < 6; d++) {
fprintf(fp, "%d: %d\t", c, mp->basis[c][d]);
}
fprintf(fp, "\n");
}
printf("%p\n", (void*) mp);
printf("hello 3");
mpBasis(spStates, 0, atoi(maxE), 0, atoi(M), 0, atoi(P), mpState, mp, 0, 0);
fclose(fp);
return 0;
}
int** spBasis(int maxE) {
int c;
int i, j, k, l;
int q = 0;
int** spStates;
spStates = (int**)malloc(330 * sizeof(int *));
for (c = 0; c < 330; c++) {
spStates[c] = malloc(4 * sizeof(int));
}
for (i = 0; i <= maxE; i++) {
for (j = i % 2; j <= i; j += 2) {
for (k = -(2 * j + 1); k <= (2 * j + 1); k += 2) {
spStates[q][0] = i;
spStates[q][1] = j;
spStates[q][2] = 2 * j + 1;
spStates[q][3] = k;
q += 1;
}
for (l = -(2 * j - 1); l <= (2 * j - 1); l += 2) {
spStates[q][0] = i;
spStates[q][1] = j;
spStates[q][2] = 2 * j - 1;
spStates[q][3] = l;
q += 1;
}
}
}
return spStates;
}
void mpBasis(int** spStates, int e, int maxE, int m, int M, int l,
int P, int * mpState, struct mpBasis *mpB, int position, int lastSpState) {
int i;
for (i = lastSpState; i < 330; i++) {
if (e > maxE) {
break;
} else if (position == 5) {
if (m == M && l % 2 == P) {
insertMpState(mpB, mpState);
break;
}
} else {
// add spState to mpState and make the recursive call for the next position
mpState[position] = i;
mpBasis(spStates, e + spStates[i][0], maxE, m + spStates[i][3], M,
l + spStates[i][1], P, mpState, mpB, position+1, i);
}
}
}
void initMpBasis(struct mpBasis *a, int initialSize, int sizeY) {
int c;
a->basis = (int **)malloc(initialSize * sizeof(int*));
for (c = 0; c < initialSize; c++) {
a->basis[c] = (int *) malloc(sizeY * sizeof(int));
}
a->size = 0;
a->capacity = initialSize;
}
void insertMpState(struct mpBasis *a, int* mpState) {
/*if (a->size == a->capacity) {
a->size *= 2;
a->basis = (int **)realloc(a->basis, a->size * sizeof(int));
}*/
a->basis[a->size++] = mpState;
}
Added all the code.
The problem is that after the recursive function has been called, the "basis" array in structure mpBasis still only contains random values, i.e. the mpBasis function hasn't done anything with it. Am I passing the mp argument by value here?
Thanks for your help!
The first step is to compile with warnings enabled. Eg if you are using GCC you can use option -Wall -Wextra.
EDIT:
(previous listing of >20 errors removed)
Ok, since you are using Visual Studio, enable warnings like this:
Open the project's Property Pages dialog box.
Select C/C++.
On the General property page, modify the Warning Level to /W4

Resources