I've started working my way through the Princeton Algorithms course on Coursera. The course uses Java, but I decided to follow along with C as it is what I am most comfortable with. One of the assignments has you write a program to estimate the value of the percolation threshold via a Monte Carlo simulation (https://coursera.cs.princeton.edu/algs4/assignments/percolation/specification.php). I have written all the code, but my program's output is not the same as the one on the site (it is not completely off, but still incorrect.) e.g.
Their implementation:
~/Desktop/percolation> java-algs4 PercolationStats 200 100
mean = 0.5929934999999997
stddev = 0.00876990421552567
95% confidence interval = [0.5912745987737567, 0.5947124012262428]
~/Desktop/percolation> java-algs4 PercolationStats 2 100000
mean = 0.6669475
stddev = 0.11775205263262094
95% confidence interval = [0.666217665216461, 0.6676773347835391]
Mine:
~/percolation> ./percolation_stats 200 100
mean = 0.628564
stddev = 0.206286
95% confidence interval = [0.588132, 0.668996]
~/percolation> ./percolation_stats 2 100000
mean = 0.728548
stddev = 0.189745
95% confidence interval = [0.727371, 0.729724]
Here is my code:
percolation_stats.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include "percolation.h"
double mean(double *, int);
double stddev(double *, int);
double confidencelo(double *, int);
double confidencehi(double *, int);
int main(int argc, char **argv) {
int n, T, row, col;
percolation *p;
double *samp;
srand((unsigned) time(NULL));
sscanf(argv[1], "%d", &n);
sscanf(argv[2], "%d", &T);
samp = malloc(T * sizeof *samp);
for (int i = 0; i < T; ++i) {
p = creategrid(n);
while (!percolates(p)) {
do {
row = rand() % n + 1;
col = rand() % n + 1;
} while (is_open(p, row, col));
open(p, row, col);
}
samp[i] = (double) number_of_open_sites(p) / (n * n);
}
printf("mean = %g\n", mean(samp, T));
printf("stddev = %g\n", stddev(samp, T));
printf("95%% confidence interval = [%g, %g]\n", confidencelo(samp, T),
confidencehi(samp, T));
return 0;
}
// mean: sample mean of percolation threshold
double mean(double *a, int len) {
double sum = 0.0;
for (int i = 0; i < len; ++i) {
sum += a[i];
}
return sum / len;
}
// stddev: sample standard deviation of percolation threshold
double stddev(double *a, int len) {
double mean(double *, int);
double sum = 0.0;
double avg = mean(a, len);
for (int i = 0; i < len; ++i) {
sum += (a[i] - avg) * (a[i] - avg);
}
return sqrt(sum / (len - 1));
}
// confidencelo: low endpoint of 95% confidence interval
double confidencelo(double *a, int len) {
double mean(double *, int);
double stddev(double *, int);
return mean(a, len) - (1.96 * stddev(a, len)) / sqrt(len);
}
// confidencehi: high endpoint of 95% confidence interval
double confidencehi(double *a, int len) {
double mean(double *, int);
double stddev(double *, int);
return mean(a, len) + (1.96 * stddev(a, len)) / sqrt(len);
}
percolation.h
#include <stdbool.h>
typedef struct percolation percolation;
percolation *creategrid(int);
void open(percolation *, int, int);
bool is_open(percolation *, int, int);
bool is_full(percolation *, int, int);
int number_of_open_sites(percolation *);
bool percolates(percolation *);
percolation.c
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "quick_union.h"
#define pos(p, x, y) (((x) - 1) * (p)->width + ((y) - 1))
typedef struct percolation {
int width;
int open_sites;
UF *uf;
bool *open;
} percolation;
// creategrid: creates n-by-n grid, with all sites initially blocked
percolation *creategrid(int n) {
if (n <= 0) {
fprintf(stderr, "open: illegal argument\n");
exit(1);
}
percolation *p;
p = malloc(sizeof *p);
p->width = n;
p->open_sites = 0;
p->uf = createuf(n * n + 2);
p->open = malloc(n * n * sizeof *p->open);
for (int i = 0; i < n * n; ++i) {
p->open[i] = false;
}
for (int i = 0; i < n; ++i) {
connect(p->uf, n * n, pos(p, 1, i));
connect(p->uf, n * n + 1, pos(p, n, i));
}
return p;
}
// open: opens the site (row, col) if it is not open already
void open(percolation *p, int row, int col) {
if (row < 1 || row > p->width || col < 1 || col > p->width) {
fprintf(stderr, "open: illegal argument\n");
exit(1);
}
if (p->open[pos(p, row, col)]) {
return;
}
p->open[pos(p, row, col)] = true;
++p->open_sites;
if (col > 1 && p->open[pos(p, row, col - 1)]) {
connect(p->uf, pos(p, row, col), pos(p, row, col - 1));
}
if (col < p->width && p->open[pos(p, row, col + 1)]) {
connect(p->uf, pos(p, row, col), pos(p, row, col + 1));
}
if (row > 1 && p->open[pos(p, row - 1, col)]) {
connect(p->uf, pos(p, row, col), pos(p, row - 1, col));
}
if (row < p->width && p->open[pos(p, row + 1, col)]) {
connect(p->uf, pos(p, row, col), pos(p, row + 1, col));
}
}
// is_open: is the site (row, col) open?
bool is_open(percolation *p, int row, int col) {
if (row < 1 || row > p->width || col < 1 || col > p->width) {
fprintf(stderr, "is_open: illegal argument\n");
exit(1);
}
return p->open[pos(p, row, col)];
}
// is_full: is the site (row, col) full?
bool is_full(percolation *p, int row, int col) {
if (row < 1 || row > p->width || col < 1 || col > p->width) {
fprintf(stderr, "is_full: illegal argument\n");
exit(1);
}
return !p->open[pos(p, row, col)];
}
// number_of_open_sites: returns the number of open sites
int number_of_open_sites(percolation *p) {
return p->open_sites;
}
// percolates: does the system percolate?
bool percolates(percolation *p) {
return connected(p->uf, p->width * p->width, p->width + p->width + 1);
}
quick_union.h
#include <stdbool.h>
typedef struct UF UF;
UF *createuf(int);
void connect(UF *, int, int);
bool connected(UF *, int, int);
int find(UF *, int);
quick_union.c
#include <stdlib.h>
#include <stdbool.h>
#define max(a, b) (((a) > (b)) ? (a) : (b))
typedef struct UF {
int count;
int *id;
int *sz;
int *largest;
} UF;
// createuf: return pointer to UF with n elements
UF *createuf(int n) {
UF *uf;
uf = malloc(sizeof *uf);
uf->count = n;
uf->id = malloc(n * sizeof *uf->id);
uf->sz = malloc(n * sizeof *uf->sz);
uf->largest = malloc(n * sizeof *uf->largest);
for (int i = 0; i < n; ++i) {
uf->id[i] = uf->largest[i] = i;
uf->sz[i] = 1;
}
return uf;
}
// connect: connect elements p and q
void connect(UF *uf, int p, int q) {
int root(UF *, int);
int i = root(uf, p);
int j = root(uf, q);
if (i == j) {
return;
}
if (uf->sz[i] <= uf->sz[j]) {
uf->id[i] = j;
uf->sz[j] += uf->sz[i];
uf->largest[j] = max(uf->largest[i], uf->largest[j]);
} else {
uf->id[j] = i;
uf->sz[i] += uf->sz[j];
uf->largest[i] = max(uf->largest[j], uf->largest[i]);
}
}
// connected: return true if elements p and q are connected
bool connected(UF *uf, int p, int q) {
int root(UF *, int);
return root(uf, p) == root(uf, q);
}
// find: return largest element in i's connected component
int find(UF *uf, int i) {
int root(UF *, int);
return uf->largest[root(uf, i)];
}
// root: return root element of i
int root(UF *uf, int i) {
while (i != uf->id[i]) {
uf->id[i] = uf->id[uf->id[i]];
i = uf->id[i];
}
return i;
}
Where did I go wrong?
Related
SOLVED: the answer was - the range of inputs was too small :) Thanks #sagi !
I've implemented a Quicksort algorithm for my classes and had to time the results for random data inputs (from -100 to 100).
Turns out, it follows N^2 time complexity almost to the dot and I have no idea why.
Here's the results of timing:
Time results
My time calculating function.
void Timing (void(*f)(int*, int), int *tab, int n, char *wiadomosc) {
clock_t start = clock();
f(tab, n); //losowe dane
clock_t end = clock();
printf("%s %lf \n", wiadomosc, (double)(end - start) / (CLOCKS_PER_SEC));
}
void QUICK_SORT_F(int *tab, int n) { //pivot as last element
quick_sort(tab, 0, n-1);
}
void quick_sort(int *tab, int first_indx, int last_indx) { //pivot as last element
while (first_indx < last_indx) {
int P = Part(tab, first_indx, last_indx);
if (P - first_indx > last_indx - P) {
quick_sort(tab, first_indx, P-1);
first_indx = P + 1;
} else {
quick_sort(tab, P + 1, last_indx);
last_indx = P - 1;
}
}
}
int Part(int* tab, int minIndx, int maxIndx) {
int pivot = tab[maxIndx];
int P = minIndx - 1;
for (int i = minIndx; i < maxIndx; ++i) {
if (tab[i] < pivot) {
++P;
SWAP(tab, i, P);
}
}
SWAP(tab, P + 1, maxIndx);
return P + 1;
} //for quicksort
void SWAP(int *tab, int x, int y) { //swaps tab[x] with tab[y]
int hld = tab[x];
tab[x] = tab[y];
tab[y] = hld;
}
As for popular request, here's the code to generate data:
int generuj(int min, int max) {
return (rand() % (max - min) + min);
}
void TESTY(void(*f)(int*, int),int n, char* metoda) {
FILE* FWB = fopen(PLIK_Z_DANYMI, "wb");
for (int i = 0; i < n; ++i) {
int r = generuj(-100, 100);
fwrite(&r, sizeof(r), 1, FWB);
}
fclose(FWB);
testowanko(f, n,metoda);
}
void testowanko(void(*f)(int*, int), int n, char* metoda) {
printf("\t %s \t \n",metoda);
FILE* RF = fopen(PLIK_Z_DANYMI, "rb");
int* tab = malloc(sizeof(int) * n);
fread(tab, sizeof(int), n, RF);
Timowanie(f, tab, n, "czas sortowania przy losowych danych = ");
}
I'm tryingto compare chars from a matrix, but it's not adding any values and i don't know why
so here's my code:
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#define MAX_LINES 1000
#define MAX_LINE_LENGTH 1000
//---------------------
//READING & WRITING
//---------------------
char *ints_new(int n)
{
return (char *) malloc(n * sizeof(char));
}
char **ints2_new(int rows, int cols)
{
char **result = (char **) malloc(rows * sizeof(char *));
char *p = ints_new(rows * cols);
for (int i = 0; i < rows; i++, p += cols)
result[i] = p;
return result;
}
int str_readline(FILE *f, char *s)
{
int result = EOF;
char *p = fgets(s, INT_MAX, f);
if (p != NULL)
{
result = (int) strlen(s);
if (result > 0 && s[result-1] == '\n')
s[--result] = '\0';
}
return result;
}
char *str_dup(const char *s)
{
char *result = (char *) malloc(strlen(s) + 1);
strcpy(result, s);
return result;
}
int strings_read(FILE *f, char **a)
{
int result = 0;
char line[MAX_LINE_LENGTH + 2];
while (str_readline(f, line) != EOF)
a[result++] = str_dup(line);
return result;
}
// --------------------
// Problema A
// --------------------
void values_to_m(char **m, int rows, int cols, char **readings)
{
int i;
int j;
int k = 0;
int l = 0;
for(i = 0; i < rows; i++)
{
for(j = 0; j < cols; j++)
{
m[i][j] = readings[k][l];
l++;
}
k++;
l = 0;
}
}
int count_points(char **m, int i, int j, int rows, int cols)
{
int result = 0;
if(i < rows-2)
{
if(m[i][j] == m[i+1][j] == m[i+2][j])
result++;
if(j < cols-2)
{
if(m[i][j] == m[i][j+1] == m[i][j+2])
result++;
if(m[i][j] == m[i+1][j+1] == m[i+2][j+2])
result++;
}
if(j > 1)
{
if(m[i][j] == m[i+1][j-1] == m[i+2][j-2])
result++;
}
}
else
{
if(j < cols-2)
{
if(m[i][j] == m[i][j+1] == m[i][j+2])
result++;
}
}
printf("%d\n", result);
return result;
}
void points(char **m, int rows, int cols)
{
int i;
int j;
int player1 = 0; //O's
int player2 = 0; //X's
for(i = 0; i < rows; i++)
{
for(j = 0; j < cols; j++)
{
int count;
count = count_points(m, i, j, rows, cols); //counts points
if (m[i][j] == 'X') //if values i'm couning are X, points go to player 2
player2 += count;
else if(m[i][j] == 'O') //if O go to player 1
player1 += count;
}
}
printf("%d %d\n", player1, player2);
}
// --------------------
// --------------------
void test_problem_A()
{
char **readings = malloc((MAX_LINES * MAX_LINE_LENGTH) * sizeof(char) + 1);
int rows = strings_read(stdin, readings); //to read from console
int cols = strlen(readings[0]);
printf("%d\n%d\n", rows, cols); //just to make sure nr of rows and cols is right
char **m = ints2_new(rows, cols); //create matrix
values_to_m(m, rows, cols, readings); //put the values to matrix
points(m, rows, cols); //calculate points
ints2_printf(m, rows, cols, "%c");
}
// --------------------
// --------------------
int main(int argc, char **argv)
{
test_problem_A();
return 0;
}
My programm has to read a bunch of 'X', 'O' and '.'.
If there are 3 'X' in a row(vertical, horizontal or diagonal) player 2 gets 1 point, if the same happens to 'O', player 1 gets 1 point. '.' don't count any points.
my matrix had to have minimum 3 rows and cols and maximum 1000 rows and cols.
example:
If i put in console
XXO
OXO
OXO
player 1 and 2 each get 1 point
if i put:
XXXXXO //(int this line Player 2 get 3 points because there are 3 times 3 X in a row)
OXOXOO
OXOOXO
OXOXOO
player 1 gets 5 points
and player 2 gets 6 points
So my problema is with function "count_points" it's not counting any points, when I print "result" it always gives me 0 points.
Can't I compare 2 chars if they belong in a matrix?
Thanks
In count_points, you try to compare three values with expressions like
if (a == b == c) ...
This doesn't do what you think it does. You treat it like a comparison in mathematical notation, but C interprets it as:
if ((a == b) == c) ...
The comparison a == b yields either 0 or 1. That result is then compared with c.
You could rewrite your desired expression as
if (a == b && b == c) ...
Given that your a, b and c are compound expressions, you could write a small function for that:
static int eq3(int a, int b, int c)
{
return (a == b && b == c);
}
int count_points(char **m, int i, int j, int rows, int cols)
{
int result = 0;
if (i < rows-2) {
if (eq3(m[i][j], m[i+1][j], m[i+2][j]))
result++;
if (j < cols - 2) {
if (eq3(m[i][j], m[i][j+1], m[i][j+2]))
result++;
if (eq3(m[i][j], m[i+1][j+1], m[i+2][j+2]))
result++;
}
if (j > 1) {
if (eq3(m[i][j], m[i+1][j-1], m[i+2][j-2]))
result++;
}
} else {
if (j < cols-2) {
if (eq3(m[i][j], m[i][j+1], m[i][j+2]))
result++;
}
}
return result;
}
As for the allocation of your matrix, see alk's answer. Your method of allocation - one char ** for the rows and then string duplication for the row data, could leave you with a ragged array and you may not safely access m[j + 1][i] for some cases where i is a valid index for row j, but not for row j + 1.
For starters, here you want to allocate pointers to char:
char **readings = malloc((MAX_LINES * MAX_LINE_LENGTH) * sizeof(char) + 1);
So do so:
char **readings = malloc((MAX_LINES * MAX_LINE_LENGTH) * sizeof(char*) + 1);
or even better:
char **readings = malloc((MAX_LINES * MAX_LINE_LENGTH) * sizeof *readings + 1);
I'm not C expert and I've read through the forum, but I still need some advice regarding a sorting problem on C.
I have 4 dynamic arrays of doubles in C. All of them are the same size, and lets say n. What I want to do is to sort all of them using one of the arrays as first order and a second array as my second order. So if the arrays are *x, *y, *w and *z. I want to sort them according to the values of *x, then *y.
I must do this efficiently because the arrays are quite large.
Any help will be much appreciated.
The easy way to do this would be to map your four separate arrays onto a single array of a struct type like
struct rec {
double x;
double y;
double w;
double z;
};
struct rec *arr = malloc( sizeof *arr * N ); // where N is the number of
// elements in each array
if ( !arr )
// malloc failed, handle error somehow
for ( size_t i = 0; i < N; i++ )
{
arr[i].x = x[i];
arr[i].y = y[i];
arr[i].w = w[i];
arr[i].z = z[i];
}
and then create a comparison function to pass to qsort:
int cmpRec( const void *lhs, const void *rhs )
{
struct rec *l = lhs;
struct rec *r = rhs;
if ( l->x < r->x )
return -1;
else if ( l->x > r->x )
return 1;
else
{
if ( l->y < r->y )
return -1;
else if ( l->y > r->y )
return 1;
else
return 0;
}
return 0;
}
Now you can use the qsort library function to sort that array of struct:
qsort( arr, N, sizeof *arr, cmpRec );
Once that array is sorted, you can map the results back onto your four original arrays.
Clearly, sorting this using standard qsort() is not going to work; there isn't a mechanism for passing four arrays.
Equally clearly, if the data were structured as an array of structures, then using qsort() would be feasible.
Question 1: Is it feasible to create an array of structures, load it, sort it, and then unload back into the original arrays?
Question 2: Another option is to sort an array of integers:
int indexes[n];
for (int i = 0; i < n; i++)
indexes[i] = i;
qsort(indexes, n, sizeof(indexes[0]), comparator);
The comparator function would have to be able to access the x and y arrays as file scope variables:
int comparator(void const *v1, void const *v2)
{
int i1 = *(int *)v1;
int i2 = *(int *)v2;
extern double *x, *y;
if (x[i1] > x[i2])
return +1;
else if (x[i1] < x[i2])
return -1;
else if (y[i1] > y[i2])
return +1;
else if (y[i1] < y[i2])
return -1;
else
return 0;
}
You'd then be able to access the arrays using x[indexes[i]] etc to access the ith element in sorted order.
Is that acceptable?
If that is not convenient either, then you will end up writing your own sort; it isn't horribly painful, but will require some care.
I spent some time adapting an existing sort test framework to this scenario. The full code is quite large because it includes a lot of testing support code. The core function (compare, swap, partition and quicksort) are here (122 lines, including comment and blank lines):
/* SO 20271977 - sort arrays x, y, z, w (type double, size n) in parallel based on values in x and y */
/*
** To apply this to the real code, where there are 4 arrays to be sorted
** in parallel, you might write:
**
** Array4 a;
** a.x = x;
** a.y = y;
** a.z = z;
** a.w = w;
** a.n = n;
** quicksort_random(&a);
**
** Or even:
**
** quicksort_random((Array4){ .n = n, .x = x, .y = y, .z = z, .w = w });
**
** combining designated initializers and compound literals. Or you could write a
** trivial wrapper so that you can call:
**
** quicksort_random_wrapper(n, x, y, z, w);
*/
/* SOF so-20271977.h */
#include <stddef.h>
typedef struct Array4
{
size_t n;
double *x;
double *y;
double *z;
double *w;
} Array4;
extern void quicksort_random(Array4 *A);
/* EOF so-20271977.h */
#include <assert.h>
#include <stdlib.h> /* lrand48() */
/*
** Note that a more careful implementation would use nrand48() instead
** of lrand48() to prevent its random number generation from interfering
** with other uses of the x-rand48() functions.
*/
typedef size_t (*Part)(Array4 *A, size_t p, size_t r);
static void quicksort_partition(Array4 *A, size_t p, size_t r, Part partition);
static size_t partition_random(Array4 *A, size_t p, size_t r);
/* Quick Sort Wrapper function - specifying random partitioning */
void quicksort_random(Array4 *A)
{
quicksort_partition(A, 0, A->n - 1, partition_random);
}
/* Main Quick Sort function */
static void quicksort_partition(Array4 *A, size_t p, size_t r, Part partition)
{
if (p < r)
{
size_t q = (*partition)(A, p, r);
assert(p <= q && q <= r);
if (q > 0)
quicksort_partition(A, p, q-1, partition);
quicksort_partition(A, q+1, r, partition);
}
}
static inline int compare(Array4 const *A, size_t p, size_t r)
{
if (A->x[p] < A->x[r])
return -1;
else if (A->x[p] > A->x[r])
return +1;
if (A->y[p] < A->y[r])
return -1;
else if (A->y[p] > A->y[r])
return +1;
else
return 0;
}
static inline size_t random_int(size_t p, size_t r)
{
return(lrand48() % (r - p + 1) + p);
}
static inline void swap(Array4 *A, size_t i, size_t j)
{
double d;
d = A->x[i];
A->x[i] = A->x[j];
A->x[j] = d;
d = A->y[i];
A->y[i] = A->y[j];
A->y[j] = d;
d = A->z[i];
A->z[i] = A->z[j];
A->z[j] = d;
d = A->w[i];
A->w[i] = A->w[j];
A->w[j] = d;
}
static size_t partition_random(Array4 *A, size_t p, size_t r)
{
size_t pivot = random_int(p, r);
swap(A, pivot, r);
size_t i = p-1;
size_t j = p;
while (j <= r)
{
if (compare(A, j, r) <= 0)
swap(A, j, ++i);
j++;
}
return i;
}
The test framework (quite ridiculously elaborate if it weren't that I already had a variant of it on hand) is 369 lines including blank lines and comment lines — and all the code above:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define FLTFMT "%13.6f"
typedef struct Array4
{
size_t n;
double *x;
double *y;
double *z;
double *w;
} Array4;
static int trace = 0;
static void *xmalloc(size_t size)
{
void *space = malloc(size);
if (space == 0)
{
fprintf(stderr, "Out of memory (%zu)\n", size);
exit(1);
}
return space;
}
void quicksort_last(Array4 *A);
void quicksort_random(Array4 *A);
void selectionsort(Array4 *A);
static inline int compare(Array4 const *A, size_t p, size_t r)
{
if (A->x[p] < A->x[r])
return -1;
else if (A->x[p] > A->x[r])
return +1;
if (A->y[p] < A->y[r])
return -1;
else if (A->y[p] > A->y[r])
return +1;
else
return 0;
}
static void dump_array(char const *tag, Array4 const *A)
{
printf("%s [%zu..%zu]:\n", tag, (size_t)0, A->n-1);
for (size_t i = 0; i < A->n; i++)
printf("(" FLTFMT ", " FLTFMT ", " FLTFMT ", " FLTFMT ")\n",
A->x[i], A->y[i], A->z[i], A->w[i]);
}
static void chk_sort(Array4 const *A)
{
for (size_t i = 0; i < A->n - 1; i++)
{
//if (compare(A, i, i+1) > 0)
{
if (A->x[i] > A->x[i+1])
{
printf("Out of order: A.x[%zu] = " FLTFMT ", A.x[%zu] = " FLTFMT "\n",
i, A->x[i], i+1, A->x[i+1]);
}
else if ((A->x[i] == A->x[i+1] && A->y[i] > A->y[i+1]))
{
printf("Out of order: A.x[%zu] = " FLTFMT ", A.x[%zu] = " FLTFMT ", "
"A.y[%zu] = " FLTFMT ", A.y[%zu] = " FLTFMT "\n",
i, A->x[i], i+1, A->x[i+1], i, A->y[i], i+1, A->y[i+1]);
}
}
}
}
static inline void set(Array4 *A, size_t p, double d)
{
A->x[p] = d;
A->y[p] = d + drand48() - 0.5;
A->z[p] = d / 2.0;
A->w[p] = d * 2.0;
}
static void load_random(Array4 *A)
{
size_t size = A->n;
for (size_t i = 0; i < size; i++)
{
A->x[i] = drand48() * size;
A->y[i] = drand48() * size + drand48() - 0.5;
A->z[i] = drand48() * size / 2.0;
A->w[i] = drand48() * size * 2.0;
}
}
static void load_ascending(Array4 *A)
{
for (size_t i = 0; i < A->n; i++)
set(A, i, i);
}
static void load_descending(Array4 *A)
{
for (size_t i = 0; i < A->n; i++)
set(A, i, A->n - i);
}
static void load_uniform(Array4 *A)
{
for (size_t i = 0; i < A->n; i++)
set(A, i, A->n);
}
static void load_organpipe(Array4 *A)
{
for (size_t i = 0; i <= A->n / 2; i++)
set(A, i, i);
for (size_t i = A->n / 2 + 1; i < A->n; i++)
set(A, i, A->n - i);
}
static void load_invorganpipe(Array4 *A)
{
size_t range = A->n / 2;
for (size_t i = 0; i < A->n / 2; i++)
set(A, i, range - i);
for (size_t i = A->n / 2 + 1; i < A->n; i++)
set(A, i, i - range);
}
typedef void (*Load)(Array4 *A);
typedef void (*Sort)(Array4 *A);
typedef size_t (*Part)(Array4 *A, size_t p, size_t r);
static void test_one_sort(Array4 *A, Sort sort, char const *s_tag,
char const *l_tag, char const *z_tag)
{
if (trace)
{
printf("%s-%s-%s:", z_tag, l_tag, s_tag);
dump_array("Before", A);
}
clock_t start = clock();
(*sort)(A);
clock_t finish = clock();
double sec = (finish - start) / (double)CLOCKS_PER_SEC;
printf("%s-%s-%s: %13.6f\n", z_tag, l_tag, s_tag, sec);
chk_sort(A);
if (trace)
{
printf("%s-%s-%s:", z_tag, l_tag, s_tag);
dump_array("After", A);
}
fflush(stdout);
}
static Array4 *alloc_array(size_t size)
{
Array4 *A = xmalloc(sizeof(*A));
A->n = size;
A->x = xmalloc(size * sizeof(A->x[0]));
A->y = xmalloc(size * sizeof(A->y[0]));
A->z = xmalloc(size * sizeof(A->z[0]));
A->w = xmalloc(size * sizeof(A->w[0]));
return A;
}
static Array4 *dup_array(Array4 *A)
{
size_t size = A->n;
Array4 *B = alloc_array(size);
if (B != 0)
{
B->n = size;
memmove(B->x, A->x, size * sizeof(A->x[0]));
memmove(B->y, A->y, size * sizeof(A->y[0]));
memmove(B->z, A->z, size * sizeof(A->z[0]));
memmove(B->w, A->w, size * sizeof(A->w[0]));
}
return B;
}
static void free_array(Array4 *A)
{
free(A->x);
free(A->y);
free(A->z);
free(A->w);
free(A);
}
static void test_set_sorts(Array4 *A, char const *l_tag, char const *z_tag)
{
struct sorter
{
Sort function;
char const *tag;
} sort[] =
{
{ quicksort_last, "QS.L" },
{ quicksort_random, "QS.R" },
{ selectionsort, "SS.N" },
};
enum { NUM_SORTS = sizeof(sort) / sizeof(sort[0]) };
for (int i = 0; i < NUM_SORTS; i++)
{
Array4 *B = dup_array(A);
test_one_sort(B, sort[i].function, sort[i].tag, l_tag, z_tag);
free(B);
}
}
static void test_set_loads(size_t size, char const *z_tag)
{
struct loader
{
Load function;
char const *tag;
} load[] =
{
{ load_random, "R" },
{ load_ascending, "A" },
{ load_descending, "D" },
{ load_organpipe, "O" },
{ load_invorganpipe, "I" },
{ load_uniform, "U" },
};
enum { NUM_LOADS = sizeof(load) / sizeof(load[0]) };
Array4 *A = alloc_array(size);
for (int i = 0; i < NUM_LOADS; i++)
{
load[i].function(A);
test_set_sorts(A, load[i].tag, z_tag);
}
free_array(A);
}
/* Main Quick Sort function */
static void quicksort_partition(Array4 *A, size_t p, size_t r, Part partition)
{
if (p < r)
{
size_t q = (*partition)(A, p, r);
assert(p <= q && q <= r);
if (q > 0)
quicksort_partition(A, p, q-1, partition);
quicksort_partition(A, q+1, r, partition);
}
}
static size_t partition_random(Array4 *A, size_t p, size_t r);
static size_t partition_last(Array4 *A, size_t p, size_t r);
/* Quick Sort Wrapper function - specifying random partitioning */
void quicksort_random(Array4 *A)
{
quicksort_partition(A, 0, A->n - 1, partition_random);
}
/* Quick Sort Wrapper function - specifying partitioning about last element */
void quicksort_last(Array4 *A)
{
quicksort_partition(A, 0, A->n - 1, partition_last);
}
static inline size_t random_int(size_t p, size_t r)
{
return(lrand48() % (r - p + 1) + p);
}
static inline void swap(Array4 *A, size_t i, size_t j)
{
double d;
d = A->x[i];
A->x[i] = A->x[j];
A->x[j] = d;
d = A->y[i];
A->y[i] = A->y[j];
A->y[j] = d;
d = A->z[i];
A->z[i] = A->z[j];
A->z[j] = d;
d = A->w[i];
A->w[i] = A->w[j];
A->w[j] = d;
}
static size_t partition_random(Array4 *A, size_t p, size_t r)
{
size_t pivot = random_int(p, r);
swap(A, pivot, r);
size_t i = p-1;
size_t j = p;
while (j <= r)
{
if (compare(A, j, r) <= 0)
swap(A, j, ++i);
j++;
}
return i;
}
static size_t partition_last(Array4 *A, size_t p, size_t r)
{
size_t i = p-1;
size_t j = p;
while (j <= r)
{
if (compare(A, j, r) <= 0)
swap(A, j, ++i);
j++;
}
return i;
}
/* Selection Sort algorithm */
void selectionsort(Array4 *A)
{
size_t r = A->n;
for (size_t p = 0; p < r; p++)
{
for (size_t i = p; i < r; i++)
{
if (compare(A, p, i) > 0)
swap(A, p, i);
}
}
}
/*
** To apply this to the real code, where there are 4 arrays to be sorted
** in parallel, you might write:
**
** Array4 a;
** a.x = x;
** a.y = y;
** a.z = z;
** a.w = w;
** a.n = n;
** quicksort_random(&a);
**
** Or even:
**
** quicksort_random((Array4){ .n = n, .x = x, .y = y, .z = z, .w = w });
**
** combining designated initializers and compound literals. Or you could write a
** trivial wrapper so that you can call:
**
** quicksort_random_wrapper(n, x, y, z, w);
*/
int main(void)
{
srand48((long)time(0));
for (size_t i = 10; i <= 40; i += 10)
{
char buffer[10];
snprintf(buffer, sizeof(buffer), "%zuK", i);
test_set_loads(1000*i, buffer);
}
return 0;
}
If you can't use qsort with
typedef struct Point {
double x;
double y;
double w;
double z;
} Point;
Use qsort with
typedef struct UglyThing {
double x;
int i;
} UglyThing;
Create an array of size n, fill x with x values, i with index.
Call qsort. At the end, i will store the permutation order.
Swap the three other arrays according to the permutation order.
Then do the same with little arrays ("with same x") in the y direction.
If this ugly trick is not possible, then I don't see any other solution than reinventing the wheel.
(edit : I have just seen Andrew said something very close to this answer...sorry!)
Bye,
Francis
I'm writing a CUDA kernel and each thread has to complete the following task: suppose I have an ordered array a of n unsigned integers (the first one is always 0) stored in shared memory, each thread has to find the array index i such that a[i] ≤ threadIdx.x and a[i + 1] > threadIdx.x.
A naive solution could be:
for (i = 0; i < n - 1; i++)
if (a[i + 1] > threadIdx.x) break;
but I suppose this is not the optimal way to do it... can anyone suggest anything better?
Like Robert, I was thinking that a binary search has got to be faster that a naïve loop -- the upper bound of operation count for a binary search is O(log(n)), compared to O(N) for the loop.
My extremely simple implementation:
#include <iostream>
#include <climits>
#include <assert.h>
__device__ __host__
int midpoint(int a, int b)
{
return a + (b-a)/2;
}
__device__ __host__
int eval(int A[], int i, int val, int imin, int imax)
{
int low = (A[i] <= val);
int high = (A[i+1] > val);
if (low && high) {
return 0;
} else if (low) {
return -1;
} else {
return 1;
}
}
__device__ __host__
int binary_search(int A[], int val, int imin, int imax)
{
while (imax >= imin) {
int imid = midpoint(imin, imax);
int e = eval(A, imid, val, imin, imax);
if(e == 0) {
return imid;
} else if (e < 0) {
imin = imid;
} else {
imax = imid;
}
}
return -1;
}
__device__ __host__
int linear_search(int A[], int val, int imin, int imax)
{
int res = -1;
for(int i=imin; i<(imax-1); i++) {
if (A[i+1] > val) {
res = i;
break;
}
}
return res;
}
template<int version>
__global__
void search(int * source, int * result, int Nin, int Nout)
{
extern __shared__ int buff[];
int tid = threadIdx.x + blockIdx.x*blockDim.x;
int val = INT_MAX;
if (tid < Nin) val = source[threadIdx.x];
buff[threadIdx.x] = val;
__syncthreads();
int res;
switch(version) {
case 0:
res = binary_search(buff, threadIdx.x, 0, blockDim.x);
break;
case 1:
res = linear_search(buff, threadIdx.x, 0, blockDim.x);
break;
}
if (tid < Nout) result[tid] = res;
}
int main(void)
{
const int inputLength = 128000;
const int isize = inputLength * sizeof(int);
const int outputLength = 256;
const int osize = outputLength * sizeof(int);
int * hostInput = new int[inputLength];
int * hostOutput = new int[outputLength];
int * deviceInput;
int * deviceOutput;
for(int i=0; i<inputLength; i++) {
hostInput[i] = -200 + 5*i;
}
cudaMalloc((void**)&deviceInput, isize);
cudaMalloc((void**)&deviceOutput, osize);
cudaMemcpy(deviceInput, hostInput, isize, cudaMemcpyHostToDevice);
dim3 DimBlock(256, 1, 1);
dim3 DimGrid(1, 1, 1);
DimGrid.x = (outputLength / DimBlock.x) +
((outputLength % DimBlock.x > 0) ? 1 : 0);
size_t shmsz = DimBlock.x * sizeof(int);
for(int i=0; i<5; i++) {
search<1><<<DimGrid, DimBlock, shmsz>>>(deviceInput, deviceOutput,
inputLength, outputLength);
}
for(int i=0; i<5; i++) {
search<0><<<DimGrid, DimBlock, shmsz>>>(deviceInput, deviceOutput,
inputLength, outputLength);
}
cudaMemcpy(hostOutput, deviceOutput, osize, cudaMemcpyDeviceToHost);
for(int i=0; i<outputLength; i++) {
int idx = hostOutput[i];
int tidx = i % DimBlock.x;
assert( (hostInput[idx] <= tidx) && (tidx < hostInput[idx+1]) );
}
cudaDeviceReset();
return 0;
}
gave about a five times speed up compared to the loop:
>nvprof a.exe
======== NVPROF is profiling a.exe...
======== Command: a.exe
======== Profiling result:
Time(%) Time Calls Avg Min Max Name
60.11 157.85us 1 157.85us 157.85us 157.85us [CUDA memcpy HtoD]
32.58 85.55us 5 17.11us 16.63us 19.04us void search<int=1>(int*, int*, int, int)
6.52 17.13us 5 3.42us 3.35us 3.73us void search<int=0>(int*, int*, int, int)
0.79 2.08us 1 2.08us 2.08us 2.08us [CUDA memcpy DtoH]
I'm sure that someoneclever could do a lot better than that. But perhaps this gives you at least a few ideas.
can anyone suggest anything better?
A brute force approach would be to have each thread do a binary search (on threadIdx.x + 1).
// sets idx to the index of the first element in a that is
// equal to or larger than key
__device__ void bsearch_range(const int *a, const int key, const unsigned len_a, unsigned *idx){
unsigned lower = 0;
unsigned upper = len_a;
unsigned midpt;
while (lower < upper){
midpt = (lower + upper)>>1;
if (a[midpt] < key) lower = midpt +1;
else upper = midpt;
}
*idx = lower;
return;
}
__global__ void find_my_idx(const int *a, const unsigned len_a, int *my_idx){
unsigned idx = (blockDim.x * blockIdx.x) + threadIdx.x;
unsigned sp_a;
int val = idx+1;
bsearch_range(a, val, len_a, &sp_a);
my_idx[idx] = ((val-1) < a[sp_a]) ? sp_a:-1;
}
This is coded in browser, not tested. It's hacked from a piece of working code, however. If you have trouble making it work, I can revisit it. I don't recommend this approach on a device without caches (cc 1.x device).
This is actually searching on the full unique 1D thread index (blockDim.x * blockIdx.x + threadIdx.x + 1) You can change val to be anything you like.
You could also add an appropriate thread check, if the number of threads you intend to launch is greater than the length of your my_idx result vector.
I imagine there is a more clever approach that may use something akin to prefix sums.
This is the best algorithm so far. It's called: LPW Indexed Search
__global__ void find_position_lpw(int *a, int n)
{
int idx = threadIdx.x;
__shared__ int aux[ MAX_THREADS_PER_BLOCK /*1024*/ ];
aux[idx] = 0;
if (idx < n)
atomicAdd( &aux[a[idx]], 1); // atomics in case there are duplicates
__syncthreads();
int tmp;
for (int j = 1; j <= MAX_THREADS_PER_BLOCK / 2; j <<= 1)
{
if( idx >= j ) tmp = aux[idx - j];
__syncthreads();
if( idx >= j ) aux[idx] += tmp;
__syncthreads();
}
// result in "i"
int i = aux[idx] - 1;
// use "i" here...
// ...
}
I'm not C expert and I've read through the forum, but I still need some advice regarding a sorting problem on C.
I have 4 dynamic arrays of doubles in C. All of them are the same size, and lets say n. What I want to do is to sort all of them using one of the arrays as first order and a second array as my second order. So if the arrays are *x, *y, *w and *z. I want to sort them according to the values of *x, then *y.
I must do this efficiently because the arrays are quite large.
Any help will be much appreciated.
The easy way to do this would be to map your four separate arrays onto a single array of a struct type like
struct rec {
double x;
double y;
double w;
double z;
};
struct rec *arr = malloc( sizeof *arr * N ); // where N is the number of
// elements in each array
if ( !arr )
// malloc failed, handle error somehow
for ( size_t i = 0; i < N; i++ )
{
arr[i].x = x[i];
arr[i].y = y[i];
arr[i].w = w[i];
arr[i].z = z[i];
}
and then create a comparison function to pass to qsort:
int cmpRec( const void *lhs, const void *rhs )
{
struct rec *l = lhs;
struct rec *r = rhs;
if ( l->x < r->x )
return -1;
else if ( l->x > r->x )
return 1;
else
{
if ( l->y < r->y )
return -1;
else if ( l->y > r->y )
return 1;
else
return 0;
}
return 0;
}
Now you can use the qsort library function to sort that array of struct:
qsort( arr, N, sizeof *arr, cmpRec );
Once that array is sorted, you can map the results back onto your four original arrays.
Clearly, sorting this using standard qsort() is not going to work; there isn't a mechanism for passing four arrays.
Equally clearly, if the data were structured as an array of structures, then using qsort() would be feasible.
Question 1: Is it feasible to create an array of structures, load it, sort it, and then unload back into the original arrays?
Question 2: Another option is to sort an array of integers:
int indexes[n];
for (int i = 0; i < n; i++)
indexes[i] = i;
qsort(indexes, n, sizeof(indexes[0]), comparator);
The comparator function would have to be able to access the x and y arrays as file scope variables:
int comparator(void const *v1, void const *v2)
{
int i1 = *(int *)v1;
int i2 = *(int *)v2;
extern double *x, *y;
if (x[i1] > x[i2])
return +1;
else if (x[i1] < x[i2])
return -1;
else if (y[i1] > y[i2])
return +1;
else if (y[i1] < y[i2])
return -1;
else
return 0;
}
You'd then be able to access the arrays using x[indexes[i]] etc to access the ith element in sorted order.
Is that acceptable?
If that is not convenient either, then you will end up writing your own sort; it isn't horribly painful, but will require some care.
I spent some time adapting an existing sort test framework to this scenario. The full code is quite large because it includes a lot of testing support code. The core function (compare, swap, partition and quicksort) are here (122 lines, including comment and blank lines):
/* SO 20271977 - sort arrays x, y, z, w (type double, size n) in parallel based on values in x and y */
/*
** To apply this to the real code, where there are 4 arrays to be sorted
** in parallel, you might write:
**
** Array4 a;
** a.x = x;
** a.y = y;
** a.z = z;
** a.w = w;
** a.n = n;
** quicksort_random(&a);
**
** Or even:
**
** quicksort_random((Array4){ .n = n, .x = x, .y = y, .z = z, .w = w });
**
** combining designated initializers and compound literals. Or you could write a
** trivial wrapper so that you can call:
**
** quicksort_random_wrapper(n, x, y, z, w);
*/
/* SOF so-20271977.h */
#include <stddef.h>
typedef struct Array4
{
size_t n;
double *x;
double *y;
double *z;
double *w;
} Array4;
extern void quicksort_random(Array4 *A);
/* EOF so-20271977.h */
#include <assert.h>
#include <stdlib.h> /* lrand48() */
/*
** Note that a more careful implementation would use nrand48() instead
** of lrand48() to prevent its random number generation from interfering
** with other uses of the x-rand48() functions.
*/
typedef size_t (*Part)(Array4 *A, size_t p, size_t r);
static void quicksort_partition(Array4 *A, size_t p, size_t r, Part partition);
static size_t partition_random(Array4 *A, size_t p, size_t r);
/* Quick Sort Wrapper function - specifying random partitioning */
void quicksort_random(Array4 *A)
{
quicksort_partition(A, 0, A->n - 1, partition_random);
}
/* Main Quick Sort function */
static void quicksort_partition(Array4 *A, size_t p, size_t r, Part partition)
{
if (p < r)
{
size_t q = (*partition)(A, p, r);
assert(p <= q && q <= r);
if (q > 0)
quicksort_partition(A, p, q-1, partition);
quicksort_partition(A, q+1, r, partition);
}
}
static inline int compare(Array4 const *A, size_t p, size_t r)
{
if (A->x[p] < A->x[r])
return -1;
else if (A->x[p] > A->x[r])
return +1;
if (A->y[p] < A->y[r])
return -1;
else if (A->y[p] > A->y[r])
return +1;
else
return 0;
}
static inline size_t random_int(size_t p, size_t r)
{
return(lrand48() % (r - p + 1) + p);
}
static inline void swap(Array4 *A, size_t i, size_t j)
{
double d;
d = A->x[i];
A->x[i] = A->x[j];
A->x[j] = d;
d = A->y[i];
A->y[i] = A->y[j];
A->y[j] = d;
d = A->z[i];
A->z[i] = A->z[j];
A->z[j] = d;
d = A->w[i];
A->w[i] = A->w[j];
A->w[j] = d;
}
static size_t partition_random(Array4 *A, size_t p, size_t r)
{
size_t pivot = random_int(p, r);
swap(A, pivot, r);
size_t i = p-1;
size_t j = p;
while (j <= r)
{
if (compare(A, j, r) <= 0)
swap(A, j, ++i);
j++;
}
return i;
}
The test framework (quite ridiculously elaborate if it weren't that I already had a variant of it on hand) is 369 lines including blank lines and comment lines — and all the code above:
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define FLTFMT "%13.6f"
typedef struct Array4
{
size_t n;
double *x;
double *y;
double *z;
double *w;
} Array4;
static int trace = 0;
static void *xmalloc(size_t size)
{
void *space = malloc(size);
if (space == 0)
{
fprintf(stderr, "Out of memory (%zu)\n", size);
exit(1);
}
return space;
}
void quicksort_last(Array4 *A);
void quicksort_random(Array4 *A);
void selectionsort(Array4 *A);
static inline int compare(Array4 const *A, size_t p, size_t r)
{
if (A->x[p] < A->x[r])
return -1;
else if (A->x[p] > A->x[r])
return +1;
if (A->y[p] < A->y[r])
return -1;
else if (A->y[p] > A->y[r])
return +1;
else
return 0;
}
static void dump_array(char const *tag, Array4 const *A)
{
printf("%s [%zu..%zu]:\n", tag, (size_t)0, A->n-1);
for (size_t i = 0; i < A->n; i++)
printf("(" FLTFMT ", " FLTFMT ", " FLTFMT ", " FLTFMT ")\n",
A->x[i], A->y[i], A->z[i], A->w[i]);
}
static void chk_sort(Array4 const *A)
{
for (size_t i = 0; i < A->n - 1; i++)
{
//if (compare(A, i, i+1) > 0)
{
if (A->x[i] > A->x[i+1])
{
printf("Out of order: A.x[%zu] = " FLTFMT ", A.x[%zu] = " FLTFMT "\n",
i, A->x[i], i+1, A->x[i+1]);
}
else if ((A->x[i] == A->x[i+1] && A->y[i] > A->y[i+1]))
{
printf("Out of order: A.x[%zu] = " FLTFMT ", A.x[%zu] = " FLTFMT ", "
"A.y[%zu] = " FLTFMT ", A.y[%zu] = " FLTFMT "\n",
i, A->x[i], i+1, A->x[i+1], i, A->y[i], i+1, A->y[i+1]);
}
}
}
}
static inline void set(Array4 *A, size_t p, double d)
{
A->x[p] = d;
A->y[p] = d + drand48() - 0.5;
A->z[p] = d / 2.0;
A->w[p] = d * 2.0;
}
static void load_random(Array4 *A)
{
size_t size = A->n;
for (size_t i = 0; i < size; i++)
{
A->x[i] = drand48() * size;
A->y[i] = drand48() * size + drand48() - 0.5;
A->z[i] = drand48() * size / 2.0;
A->w[i] = drand48() * size * 2.0;
}
}
static void load_ascending(Array4 *A)
{
for (size_t i = 0; i < A->n; i++)
set(A, i, i);
}
static void load_descending(Array4 *A)
{
for (size_t i = 0; i < A->n; i++)
set(A, i, A->n - i);
}
static void load_uniform(Array4 *A)
{
for (size_t i = 0; i < A->n; i++)
set(A, i, A->n);
}
static void load_organpipe(Array4 *A)
{
for (size_t i = 0; i <= A->n / 2; i++)
set(A, i, i);
for (size_t i = A->n / 2 + 1; i < A->n; i++)
set(A, i, A->n - i);
}
static void load_invorganpipe(Array4 *A)
{
size_t range = A->n / 2;
for (size_t i = 0; i < A->n / 2; i++)
set(A, i, range - i);
for (size_t i = A->n / 2 + 1; i < A->n; i++)
set(A, i, i - range);
}
typedef void (*Load)(Array4 *A);
typedef void (*Sort)(Array4 *A);
typedef size_t (*Part)(Array4 *A, size_t p, size_t r);
static void test_one_sort(Array4 *A, Sort sort, char const *s_tag,
char const *l_tag, char const *z_tag)
{
if (trace)
{
printf("%s-%s-%s:", z_tag, l_tag, s_tag);
dump_array("Before", A);
}
clock_t start = clock();
(*sort)(A);
clock_t finish = clock();
double sec = (finish - start) / (double)CLOCKS_PER_SEC;
printf("%s-%s-%s: %13.6f\n", z_tag, l_tag, s_tag, sec);
chk_sort(A);
if (trace)
{
printf("%s-%s-%s:", z_tag, l_tag, s_tag);
dump_array("After", A);
}
fflush(stdout);
}
static Array4 *alloc_array(size_t size)
{
Array4 *A = xmalloc(sizeof(*A));
A->n = size;
A->x = xmalloc(size * sizeof(A->x[0]));
A->y = xmalloc(size * sizeof(A->y[0]));
A->z = xmalloc(size * sizeof(A->z[0]));
A->w = xmalloc(size * sizeof(A->w[0]));
return A;
}
static Array4 *dup_array(Array4 *A)
{
size_t size = A->n;
Array4 *B = alloc_array(size);
if (B != 0)
{
B->n = size;
memmove(B->x, A->x, size * sizeof(A->x[0]));
memmove(B->y, A->y, size * sizeof(A->y[0]));
memmove(B->z, A->z, size * sizeof(A->z[0]));
memmove(B->w, A->w, size * sizeof(A->w[0]));
}
return B;
}
static void free_array(Array4 *A)
{
free(A->x);
free(A->y);
free(A->z);
free(A->w);
free(A);
}
static void test_set_sorts(Array4 *A, char const *l_tag, char const *z_tag)
{
struct sorter
{
Sort function;
char const *tag;
} sort[] =
{
{ quicksort_last, "QS.L" },
{ quicksort_random, "QS.R" },
{ selectionsort, "SS.N" },
};
enum { NUM_SORTS = sizeof(sort) / sizeof(sort[0]) };
for (int i = 0; i < NUM_SORTS; i++)
{
Array4 *B = dup_array(A);
test_one_sort(B, sort[i].function, sort[i].tag, l_tag, z_tag);
free(B);
}
}
static void test_set_loads(size_t size, char const *z_tag)
{
struct loader
{
Load function;
char const *tag;
} load[] =
{
{ load_random, "R" },
{ load_ascending, "A" },
{ load_descending, "D" },
{ load_organpipe, "O" },
{ load_invorganpipe, "I" },
{ load_uniform, "U" },
};
enum { NUM_LOADS = sizeof(load) / sizeof(load[0]) };
Array4 *A = alloc_array(size);
for (int i = 0; i < NUM_LOADS; i++)
{
load[i].function(A);
test_set_sorts(A, load[i].tag, z_tag);
}
free_array(A);
}
/* Main Quick Sort function */
static void quicksort_partition(Array4 *A, size_t p, size_t r, Part partition)
{
if (p < r)
{
size_t q = (*partition)(A, p, r);
assert(p <= q && q <= r);
if (q > 0)
quicksort_partition(A, p, q-1, partition);
quicksort_partition(A, q+1, r, partition);
}
}
static size_t partition_random(Array4 *A, size_t p, size_t r);
static size_t partition_last(Array4 *A, size_t p, size_t r);
/* Quick Sort Wrapper function - specifying random partitioning */
void quicksort_random(Array4 *A)
{
quicksort_partition(A, 0, A->n - 1, partition_random);
}
/* Quick Sort Wrapper function - specifying partitioning about last element */
void quicksort_last(Array4 *A)
{
quicksort_partition(A, 0, A->n - 1, partition_last);
}
static inline size_t random_int(size_t p, size_t r)
{
return(lrand48() % (r - p + 1) + p);
}
static inline void swap(Array4 *A, size_t i, size_t j)
{
double d;
d = A->x[i];
A->x[i] = A->x[j];
A->x[j] = d;
d = A->y[i];
A->y[i] = A->y[j];
A->y[j] = d;
d = A->z[i];
A->z[i] = A->z[j];
A->z[j] = d;
d = A->w[i];
A->w[i] = A->w[j];
A->w[j] = d;
}
static size_t partition_random(Array4 *A, size_t p, size_t r)
{
size_t pivot = random_int(p, r);
swap(A, pivot, r);
size_t i = p-1;
size_t j = p;
while (j <= r)
{
if (compare(A, j, r) <= 0)
swap(A, j, ++i);
j++;
}
return i;
}
static size_t partition_last(Array4 *A, size_t p, size_t r)
{
size_t i = p-1;
size_t j = p;
while (j <= r)
{
if (compare(A, j, r) <= 0)
swap(A, j, ++i);
j++;
}
return i;
}
/* Selection Sort algorithm */
void selectionsort(Array4 *A)
{
size_t r = A->n;
for (size_t p = 0; p < r; p++)
{
for (size_t i = p; i < r; i++)
{
if (compare(A, p, i) > 0)
swap(A, p, i);
}
}
}
/*
** To apply this to the real code, where there are 4 arrays to be sorted
** in parallel, you might write:
**
** Array4 a;
** a.x = x;
** a.y = y;
** a.z = z;
** a.w = w;
** a.n = n;
** quicksort_random(&a);
**
** Or even:
**
** quicksort_random((Array4){ .n = n, .x = x, .y = y, .z = z, .w = w });
**
** combining designated initializers and compound literals. Or you could write a
** trivial wrapper so that you can call:
**
** quicksort_random_wrapper(n, x, y, z, w);
*/
int main(void)
{
srand48((long)time(0));
for (size_t i = 10; i <= 40; i += 10)
{
char buffer[10];
snprintf(buffer, sizeof(buffer), "%zuK", i);
test_set_loads(1000*i, buffer);
}
return 0;
}
If you can't use qsort with
typedef struct Point {
double x;
double y;
double w;
double z;
} Point;
Use qsort with
typedef struct UglyThing {
double x;
int i;
} UglyThing;
Create an array of size n, fill x with x values, i with index.
Call qsort. At the end, i will store the permutation order.
Swap the three other arrays according to the permutation order.
Then do the same with little arrays ("with same x") in the y direction.
If this ugly trick is not possible, then I don't see any other solution than reinventing the wheel.
(edit : I have just seen Andrew said something very close to this answer...sorry!)
Bye,
Francis