Let's say I have structure like this:
typedef struct MyStruct{
char *string1;
int number1, number2, number3;
char string2[11], string3[9];
char *string4;
char *string5;
}MyStruct;
Programs prompts user to choose by what field it should sort the data. I am having trouble thinking of a way to sort array effectively. Do I really need to write separate sorting functions for each field? There must be some other way, because writing 8 functions, where 2 would suffice, doesn't look rational.
Look up qsort() from <stdlib.h>. It takes a comparator function. You can write separate comparator functions for the different sort orders, but still use the standard library qsort() to do the sorting.
For example:
int ms_cmp_string1(const void *vp1, const void *vp2)
{
const MyStruct *ms1 = vp1;
const MyStruct *ms2 = vp2;
int cmp = strcmp(ms1->string1, ms1->string2);
if (cmp != 0)
return cmp;
else if (ms1->number1 < ms2->number1)
return -1;
else if (ms1->number1 > ms2->number1)
return +1;
//...other comparisons as required...
else
return 0;
}
This is a decent outline for comparators. This one sorts on string1 and then by number1. You can either write variants that sort on different fields, or devise a scheme that applies the various possible tests in an order of your choosing. But the basic outline works pretty well and is suitable for passing to qsort() without any casts necessary.
You don't need to write 8 functions if only 2 are needed. Build your own qsort function and send a last parameter containing the member offset to the compare function, then, in your compare function, cast pointer + offset to the right type.
Something like:
int comp_int(const void *pa, const void *pb, size_t offset)
{
const int *a = (const int *)((const char *)pa + offset);
const int *b = (const int *)((const char *)pb + offset);
return *a - *b;
}
int comp_string(const void *pa, const void *pb, size_t offset)
{
const char *a = (const char *)pa + offset;
const char *b = (const char *)pb + offset;
return strcmp(a, b);
}
void swap(void *v[], int a, int b)
{
void *temp;
temp = v[a];
v[a] = v[b];
v[b] = temp;
}
void sort(void *v[], int left, int right, size_t offset, int (*comp)(const void *, const void *, size_t))
{
int i, last;
if (left >= right) return;
swap(v, left, (left + right) / 2);
last = left;
for (i = left + 1; i <= right; i++) {
if ((*comp)(v[i], v[left], offset) < 0)
swap(v, ++last, i);
}
swap(v, left, last);
sort(v, left, last - 1, offset, comp);
sort(v, last + 1, right, offset, comp);
}
offsetof can help
Here is a sample of using qsort from my another answer:
struct stringcase { char* string; void (*func)(void); };
void funcB1();
void funcAzA();
struct stringcase cases [] =
{ { "B1", funcB1 }
, { "AzA", funcAzA }
};
struct stringcase work_cases* = NULL;
int work_cases_cnt = 0;
// comparator function
int stringcase_cmp( const void *p1, const void *p2 )
{
return strcasecmp( ((struct stringcase*)p1)->string, ((struct stringcase*)p2)->string);
}
// prepare the data for searching
void prepare() {
// allocate the work_cases and copy cases values from it to work_cases
qsort( cases, i, sizeof( struct stringcase ), stringcase_cmp );
}
If you're using the GNU C library, there's an extension called qsort_r() that lets you pass an extra parameter to the comparison function.
Using some macros:
#include <stdio.h>
#include <stdlib.h>
struct data {
int x, y, z;
};
#define comp(member) comp_##member
#define comp_build(member) \
int comp_##member(const void *pa, const void *pb) \
{ \
const struct data *a = pa, *b = pb; \
return (a->member < b->member) ? -1 : (a->member > b->member); \
}
comp_build(x)
comp_build(y)
comp_build(z)
int main(void)
{
#define ROWS 3
struct data v[] = {
{3, 2, 1},
{1, 3, 2},
{2, 1, 3}
};
int i;
puts("Unsorted");
for (i = 0; i < ROWS; i++) printf("%d %d %d\n", v[i].x, v[i].y, v[i].z);
qsort(v, ROWS, sizeof(struct data), comp(x));
puts("Sorted by x");
for (i = 0; i < ROWS; i++) printf("%d %d %d\n", v[i].x, v[i].y, v[i].z);
puts("Sorted by y");
qsort(v, ROWS, sizeof(struct data), comp(y));
for (i = 0; i < ROWS; i++) printf("%d %d %d\n", v[i].x, v[i].y, v[i].z);
puts("Sorted by z");
qsort(v, ROWS, sizeof(struct data), comp(z));
for (i = 0; i < ROWS; i++) printf("%d %d %d\n", v[i].x, v[i].y, v[i].z);
return 0;
}
Related
Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 10 months ago.
Improve this question
Can anyone tell me what am I doing wrong in this generic quicksort code following this pseudocode Quicksort & Partition, the algorithm works, because I have already done it with integers only without the compare function by passing an int array to the quicksort and partition functions, but I have tried to make it work for both int and strings. In this code I have tested only the int values, but the code doesn't work, the output is the initial value of the array, it's the same exact thing for the strings I get the same initial array as an output. I have commented the string part because they get sorted the same way as the integers. This is the integer code that works Integer working code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//prototipi delle funzioni
typedef int (*compare_function)(const void *, const void *);
void generic_quicksort(void *v, int i, int f, size_t size, compare_function compare);
void generic_swap(void *a, void *b, size_t size);
int generic_partition(void *v, int i, int f, size_t size, compare_function compare);
void print_int_array(const int *array, size_t len) {
size_t i;
for (i = 0; i < len; i++)
printf("%d | ", array[i]);
putchar('\n');
}
//funzione di confronto
int compare_int(const void *, const void *);
int compare_str(const void *a, const void *b) {
const char **ia = (const char **)a;
const char **ib = (const char **)b;
return strcmp(*ia, *ib);
/* strcmp functions works exactly as expected from
comparison function */
}
void print_cstring_array(char **array, size_t len) {
size_t i;
for (i = 0; i < len; i++)
printf("%s | ", array[i]);
putchar('\n');
}
int main() {
int v[] = { 5, 4, 3, 2, 1 };
char *strings[] = { "Zorro", "Alex", "Celine", "Bill", "Forest", "Dexter" };
int n = sizeof(v) / sizeof(int);
print_int_array(v, n);
generic_quicksort((void *)v, 0, n - 1, sizeof(int), compare_int);
print_int_array(v, n);
/*
int s = sizeof(strings) / sizeof(*char);
print_cstring_array(strings, s);
generic_quicksort((void *)strings, 0, s - 1, sizeof(*char), compare_str);
print_cstring_array(strings, s);
*/
return 0;
}
int compare_int(const void *a, const void *b) {
return *((int*)a) - *((int*)b);
}
void generic_quicksort(void *v, int i, int f, size_t size, int (*comp)(const void *, const void *)) {
if (i >= f)
return;
int p = generic_partition(v, i, f, size, comp);
generic_quicksort(v, i, p - 1, size, comp);
generic_quicksort(v, p + 1, f, size, comp);
}
void generic_swap(void *a, void *b, size_t size) {
void *tmp = malloc(size);
memcpy(tmp, a, size);
memcpy(a, b, size);
memcpy(b, tmp, size);
free(tmp);
}
int generic_partition(void *v, int i, int f, size_t size, int (*comp)(const void *, const void *)) {
void *x = malloc(size);
int k, j;
memcpy(x, v + (i * size), size);
k = i - 1;
for (j = i; j <= f - 1; j++) {
if (comp(v + (j * size), x) <= 0) {
k++;
generic_swap(v + (k * size), v + (j * size), size);
}
}
generic_swap(v + ((k + 1) * size), v + (f * size), size);
free(x);
return (k + 1);
}
There are multiple problems in the code:
int n = sizeof(v) / sizeof(int); is risky: there is a silent assumption about the type of v. You should write int n = sizeof(v) / sizeof(*v);
The convention to pass the indices of the first and last elements of the slice is confusing and not idiomatic in C, you should pass the index of the first element and the index of the element after the last one. This allows for unsigned index types and empty arrays.
v + (j * size) uses void pointer arithmetics, which is an extension not available on all systems. Use unsigned char pointers for this.
the comparison function for integers has undefined behavior for large absolute values because subtracting them may cause an arithmetic overflow. You should use this instead:
int compare_int(const void *a, const void *b) {
int ia = *(const int *)a;
int ib = *(const int *)b;
return (ia > ib) - (ia < ib);
}
generic_swap uses malloc and memcpy. This causes much overhead for small elements, you should use a simple loop:
void generic_swap(void *a, void *b, size_t size) {
unsigned char *pa = (unsigned char *)a;
unsigned char *pb = (unsigned char *)b;
while (size-- > 0) {
unsigned char c = *pa;
*pa++ = *pb;
*pb++ = c;
}
}
The generic_partition in the reference uses the last element as the pivot, but you initialize x from the first element. You should write memcpy(x, v + (f * size), size);. This is causing the failure. The current code might work by coincidence for the int version. Using the first or the last element as a pivot causes worst case behavior on sorted arrays.
Here is a modified version:
#include <stdio.h>
#include <string.h>
//prototipi delle funzioni
typedef int (*compare_function)(const void *, const void *);
void generic_quicksort(void *v, int i, int f, size_t size, compare_function compare);
//funzione di confronto
int compare_int(const void *a, const void *b) {
int ia = *(const int *)a;
int ib = *(const int *)b;
return (ia > ib) - (ia < ib);
}
int compare_str(const void *a, const void *b) {
const char *sa = *(const char * const *)a;
const char *sb = *(const char * const *)b;
return strcmp(sa, sb);
}
void print_int_array(const int *array, size_t len) {
size_t i;
if (len > 0) {
printf("%d", array[0]);
for (i = 1; i < len; i++)
printf("| %d", array[i]);
}
putchar('\n');
}
void print_cstring_array(const char * const *array, size_t len) {
size_t i;
if (len > 0) {
printf("%s", array[0]);
for (i = 1; i < len; i++)
printf(" | %s", array[i]);
}
putchar('\n');
}
static void generic_swap(void *a, void *b, size_t size) {
unsigned char *pa = (unsigned char *)a;
unsigned char *pb = (unsigned char *)b;
while (size-- > 0) {
unsigned char c = *pa;
*pa++ = *pb;
*pb++ = c;
}
}
static int generic_partition(void *v, int i, int f, size_t size,
int (*comp)(const void *, const void *))
{
unsigned char *p = (unsigned char *)v;
int j, k = i;
// using first element as pivot
for (j = i + 1; j < f; j++) {
if (comp(p + j * size, p + i * size) <= 0) {
k++;
generic_swap(p + k * size, p + j * size, size);
}
}
/* swap the pivot to the end of the left part */
generic_swap(p + i * size, p + k * size, size);
return k;
}
void generic_quicksort(void *v, int i, int f, size_t size,
int (*comp)(const void *, const void *))
{
if (f > i + 1) {
int p = generic_partition(v, i, f, size, comp);
generic_quicksort(v, i, p, size, comp);
generic_quicksort(v, p + 1, f, size, comp);
}
}
int main() {
int v[] = { 5, 4, 3, 2, 1 };
int n = sizeof(v) / sizeof(*v);
const char *strings[] = { "Zorro", "Alex", "Celine", "Bill", "Forest", "Dexter" };
int s = sizeof(strings) / sizeof(*strings);
print_int_array(v, n);
generic_quicksort((void *)v, 0, n, sizeof(*v), compare_int);
print_int_array(v, n);
print_cstring_array(strings, s);
generic_quicksort((void *)strings, 0, s, sizeof(*strings), compare_str);
print_cstring_array(strings, s);
return 0;
}
Note that choosing the first or the last element as the pivot leads to worst case complexity for a sorted array. The depth of recursion for generic_quicksort will be the length of the array, potentially causing a stack overflow.
Here is a modified version that is protected against this, but still has quadratic time complexity on a sorted array:
void generic_quicksort(void *v, int i, int f, size_t size,
int (*comp)(const void *, const void *))
{
while (f > i + 1) {
int p = generic_partition(v, i, f, size, comp);
if (p - i < f - p) {
generic_quicksort(v, i, p, size, comp);
i = p + 1;
} else {
generic_quicksort(v, p + 1, f, size, comp);
f = p;
}
}
}
i'm trying to make a qsort function from scratch that sorts an array of pointers to structs
this is the code i have right now
static void swap(int *a, int *b) {
int tmp = *a;
*a = *b;
*b = tmp;
}
void _qsort(void* list, int list_len, int left, int right,
int(*comp)(const struct shpg_item *a, const struct shpg_item *b)) {
void *vt, *v3;
int i, last, mid = (left + right) / 2;
if (left >= right)
return;
void* vl = (char*)(list + (left * list_len));
void* vr = (char*)(list + (mid * list_len));
swap(vl, vr);
last = left;
for (i = left + 1; i <= right; i++) {
// vl and vt will have the starting address
// of the elements which will be passed to
// comp function.
vt = (char*)(list + (i * list_len));
if ((*comp)(vl, vt) > 0) {
++last;
v3 = (char*)(list + (last * list_len));
swap(vt, v3);
}
}
v3 = (char*)(list + (last * list_len));
swap(vl, v3);
_qsort(list,list_len, left, last - 1, comp);
trace_int(1);
_qsort(list, list_len, last + 1, right, comp);
}
void list_sort(struct shpg_item **list, int list_len,
int(*comp)(const struct shpg_item *a, const struct shpg_item *b)) {
_qsort(*list,list_len,0,(list_len-1),comp);
}
but this gives a segmentation fault error , can any one tell me why and help me ?
void * pointer addition
void * pointer addition is undefined behavior. But since the usual UB is OK, this may or may not be OP's trouble.
void _qsort(void* list, int list_len, int left, ...
...
(list + (left * list_len)) // UB
Instead recommend casting before addition.
// void* vl = (char*)(list + (left * list_len));
void* vl = ((char*) list) + (left * list_len);
Other issues may exist
I haven't check the entire code but your swap function seems wrong. Depending on the comment lines in your code;
// vl and vt will have the starting address
// of the elements which will be passed to
// comp function.
if (list + (left * list_len)) and (list + (last * list_len)) are pointers to be swapped (pointers to a string or a struct, for example), your swap function decoration & your caller line should read as:
Swapping two integers, floats, doubles, etc (in general swapping values only):
void swap(int *a, int *b) {
int t = *a;
*a = *b;
*b = t;
}
...
int x = 5;
int y = 3;
swap(&x, &y);
If you need to swap two pointers (a char * string or another type of pointer pointing to a struct), you can just swap pointer values without swapping the content pointed in the actual memory:
void swap(void **a, void **b) {
void *t = *a;
*a = *b;
*b = t;
}
...
char *x = "some string";
char *y = "some other string";
swap(&x, &y);
I've included a working example in the middle part of this answer, and also added an example using qsort.
Taking a quick look at the code I see problem here:
void _qsort(void* list, ...
Since list is an array of pointers it should be:
void _qsort(void** list, ...
or
void _qsort(void* list[], ...
With this declaration, pointer arithmetic will not be an issue, for example, list+3 == &list[3] == pointer to the 3rd pointer in the array. There's no need to cast list, as void** list will work fine in the main part of the code. The only code that will do any casting is the caller's compare function.
You can choose to emulate qsort's compare function parameters using type void **: compare(list+i, list+j), but it would be simpler to use type void *: compare(list[i], list[j]).
Swap should use void** as parameters. The call would be
swap(list+i, list+j)
/* ... */
void swap(void **i, void **j){
void * t;
t = *i;
*i = *j;
*j = t;
}
There are some comments about a void pointer possibly having a different size than a struct pointer or any type of data pointer, and that this could cause an issue. If this was true, then the C library function qsort() would not work because the first parameter for qsort is a void pointer, which will result in the caller's pointer being cast to a void pointer. In the caller's compare function, both parameters are const void pointers which the caller's compare function has to cast to the actual pointer types. With qsort() and the caller's compare function, parameters are being cast both to and from void pointers without issue.
C guarantees that a void pointer can be used to hold any type of data pointer, so in essence a void pointer is a generic data pointer (in 16 bit segment or selector environments, a generic "near" data pointer).
This is a working example, using typical Lomuto partition scheme (pivot = a[hi]):
#include <stdio.h>
#include <stdlib.h>
typedef struct {
int data;
char name[32];
}XMPL;
int cmpr(void * pi, void *pj)
{
if(((XMPL *)pi)->data < ((XMPL *)pj)->data)
return -1;
if(((XMPL *)pi)->data > ((XMPL *)pj)->data)
return 1;
return 0;
}
void swap(void **i, void **j){
void * t;
t = *i;
*i = *j;
*j = t;
}
void QuickSort(void **a, int lo, int hi, int(*cmpp)(void *, void *))
{
void *p;
int i, j;
while(lo < hi){
p = a[hi];
i = lo;
for(j = lo; j < hi; ++j){
if((cmpp(a[j], p) < 0)){
swap(a+i, a+j);
++i;
}
}
swap(a+i, a+hi);
if(i - lo <= hi - i){ /* avoid stack overflow */
QuickSort(a, lo, i-1, cmpp);
lo = i+1;
} else {
QuickSort(a, i+1, hi, cmpp);
hi = i-1;
}
}
}
#define COUNT (1024)
int main(int argc, char**argv)
{
XMPL *ax; /* array of structures */
XMPL **pax; /* array of pointers to structures */
int i;
ax = malloc(COUNT * sizeof(XMPL));
pax = malloc(COUNT * sizeof(void **));
for(i = 0; i < COUNT; i++){ /* init structs, array of ptrs */
ax[i].data = rand();
pax[i] = ax+i;
}
QuickSort(pax, 0, COUNT-1, cmpr);
for(i = 1; i < COUNT; i++){
if(pax[i-1]->data > pax[i]->data){
break;
}
}
if(i == COUNT)
printf("passed\n");
else
printf("failed\n");
free(pax);
free(ax);
return(0);
}
Hoare parition scheme will probably be a bit faster. However, in this case, merge sort should be faster than quick sort. Merge sort does more moves but fewer compares than quick sort, and in this case, only pointers are being moved, while the compare involves an indirection via a pointer and a call to a compare function via a pointer.
Same basic code, but using qsort. Note that the cmpr() function needed one more dereference for each parameter.
#include <stdio.h>
#include <stdlib.h>
typedef struct {
int data;
char name[32];
}XMPL;
int cmpr(const void * pi, const void *pj)
{
if((*(XMPL **)pi)->data < (*(XMPL **)pj)->data)
return -1;
if((*(XMPL **)pi)->data > (*(XMPL **)pj)->data)
return 1;
return 0;
}
#define COUNT (1024)
int main(int argc, char**argv)
{
XMPL *ax; /* array of structures */
XMPL **pax; /* array of pointers to structures */
int i;
ax = malloc(COUNT * sizeof(XMPL));
pax = malloc(COUNT * sizeof(void **));
for(i = 0; i < COUNT; i++){ /* init structs, array of ptrs */
ax[i].data = rand();
pax[i] = ax+i;
}
qsort(pax, COUNT, sizeof(XMPL *), cmpr);
for(i = 1; i < COUNT; i++){
if(pax[i-1]->data > pax[i]->data){
break;
}
}
if(i == COUNT)
printf("passed\n");
else
printf("failed\n");
free(pax);
free(ax);
return(0);
}
I'm working for this ADT project and i need to implement the insertion sort algorithm and verify that it works fine in an appropriate test function, that apply the algorithm to an array of double, a string and a struct.
I'm using as a guideline this pseudocode:
procedure InsertionSort(a, n)
for i <- 1, (n-1) do
j <- 1
while (j>0) and (a[j] < a[j-1]) do
Swap(a, j-1, j)
end while
end for
end procedure
I can't understand what the problem is.
The test function gives me error on the first assert () [ in test_sort_algorithm(...) ], therefore telling me that the algorithm is not working properly. But I can't understand where the error is. I've tried to recreate the algorithm for a normal array, without using a void pointer, and everything works. So I guess my problem is that I didn't understand the use of void pointers.
Can anyone please help me understand what's wrong with my Insertion sort algorithm?
Thank you.
This is my attempt:
/**
* \brief Sorts the given array according to the insertion sort algorithm.
*
* \param base Pointer to the start of the input array.
* \param n Number of elements in the input array.
* \param size The size (in bytes) of each element of the array.
* \param cmp Pointer to the comparison function used to sort the array in
* ascending order.
* The comparison function is called with two arguments that point to the
* objects being compared and must return an interger less than, equal to, or
* greater than zero if the first argument is considered to be respectively
* less than, equal to, or greater than the second.
*/
void upo_insertion_sort(void *base, size_t n, size_t size, upo_sort_comparator_t cmp)
{
size_t i, j;
unsigned char *ptr = base;
for (i = 1; i <= n-1; i++)
{
j = i;
while ( (j > 0) && (cmp(ptr+j*size, ptr+(j-1)*size) < 0) )
{
swap(ptr+(j-1)*size, ptr+j*size, size);
j = j - 1;
}
}
}
void swap(void *a, void *b, size_t n)
{
void *tmp = malloc(n);
if (tmp == NULL) { abort(); }
memmove(tmp, a, n);
memmove(a, b, n);
memmove(b, tmp, n);
free(tmp);
}
upo_sort_comparator_t cmp is a pointer to a comparison function. Declaration:
/** \brief Type definition for comparison functions used to compare two elements */
typedef int (*upo_sort_comparator_t)(const void*, const void*);
As I say before this function must be tested, to see if the algorithm work properly.
Code:
#define N 9
struct item_s
{
long id;
char *name;
};
typedef struct item_s item_t;
static double da[] = {3.0,1.3,0.4,7.8,13.2,-1.1,6.0,-3.2,78};
static double expect_da[] = {-3.2,-1.1,0.4,1.3,3.0,6.0,7.8,13.2,78.0};
static const char *sa[] = {"The","quick","brown","fox","jumps","over","the","lazy","dog"};
static const char *expect_sa[] = {"The","brown","dog","fox","jumps","lazy","over","quick","the"};
static item_t ca[] = {{9,"john"},{8,"jane"},{7,"mary"},{6,"anthony"},{5,"stevie"},{4,"bob"},{3,"ann"},{2,"claire"},{1,"alice"}};
static item_t expect_ca[] = {{1,"alice"},{2,"claire"},{3,"ann"},{4,"bob"},{5,"stevie"},{6,"anthony"},{7,"mary"},{8,"jane"},{9,"john"}};
/* Comparators */
static int double_comparator(const void *a, const void *b);
static int string_comparator(const void *a, const void *b);
static int item_comparator(const void *a, const void *b);
/* Test cases */
void test_sort_algorithm(void (*sort)(void*,size_t,size_t,upo_sort_comparator_t));
static void test_insertion_sort();
int double_comparator(const void *a, const void *b)
{
const double *aa = a;
const double *bb = b;
return (*aa > *bb) - (*aa < *bb);
}
int string_comparator(const void *a, const void *b)
{
const char **aa = (const char**) a;
const char **bb = (const char**) b;
return strcmp(*aa, *bb);
}
int item_comparator(const void *a, const void *b)
{
const item_t *aa = a;
const item_t *bb = b;
return (aa->id > bb->id) - (aa->id < bb->id);
}
void test_sort_algorithm(void (*sort)(void*,size_t,size_t,upo_sort_comparator_t))
{
int ok = 1;
size_t i = 0;
double *da_clone = NULL;
char **sa_clone = NULL;
item_t *ca_clone = NULL;
ok = 1;
da_clone = malloc(N*sizeof(double));
assert( da_clone != NULL );
memcpy(da_clone, da, N*sizeof(double));
sort(da_clone, N, sizeof(double), double_comparator);
for (i = 0; i < N; ++i)
{
ok &= !double_comparator(&da_clone[i], &expect_da[i]);
}
free(da_clone);
assert( ok );
ok = 1;
sa_clone = malloc(N*sizeof(char*));
assert( sa_clone != NULL );
memcpy(sa_clone, sa, N*sizeof(char*));
sort(sa_clone, N, sizeof(char*), string_comparator);
for (i = 0; i < N; ++i)
{
ok &= !string_comparator(&sa_clone[i], &expect_sa[i]);
}
free(sa_clone);
assert( ok );
ok = 1;
ca_clone = malloc(N*sizeof(item_t));
assert( ca_clone != NULL );
memcpy(ca_clone, ca, N*sizeof(item_t));
sort(ca_clone, N, sizeof(item_t), item_comparator);
for (i = 0; i < N; ++i)
{
ok &= !item_comparator(&ca_clone[i], &expect_ca[i]);
}
free(ca_clone);
assert( ok );
}
void test_insertion_sort()
{
test_sort_algorithm(upo_insertion_sort);
}
int main()
{
printf("Test case 'insertion sort'... ");
fflush(stdout);
test_insertion_sort();
printf("OK\n");
return 0;
}
There is an array of structures that I want to sort by the value of specific fields. The data types in the fields are identical. In c++ I used pointer to member to avoid rewriting same sort for different fields.
#include<string.h>
typedef struct {
int id;
int year;
int price;
} example_struct;
void sort_arr(example_struct a[5], const char* usr_field) {
int example_struct::*field = nullptr;
if (strcmp(usr_field, "id") == 0)
field = &example_struct::id;
else if (strcmp(usr_field, "year") == 0)
field = &example_struct::year;
else if (strcmp(usr_field, "price") == 0)
field = &example_struct::price;
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5 - i; j++) {
if (a[j].*field > a[j + 1].*field) {
buff = a[j];
a[j] = a[j+1];
a[j+1] = buff;
}
}
}
}
int main {
example_struct a[5];
fill_arr(a); //somehow filling the array
sort_arr(a,"year");
}
I want to know, whether this code can be somehow emulated on C and how to do this
You should be able to use offsetof macro to get the offset to the member you want to compare, then use pointer arithmetic to get the value of the member. Note you won't get any type information for that member, just the offset into the struct so you'll want to ensure you know the sizes of the member fields.
So to do your compares, you could do this:
void sort_arr(example_struct a[5], const char* usr_field) {
size_t offset;
if (strcmp(usr_field, "id") == 0)
offset = offsetof(example_struct, id);
else if (strcmp(usr_field, "year") == 0)
offset = offsetof(example_struct, year);
else if (strcmp(usr_field, "price") == 0)
offset = offsetof(example_struct, price);
...
if (*((int*)((char*)&a[j])+offset) > *((int*)((char*)&a[j+1])+offset)) {
...
It may help to define some macros to make the member access a little more pleasant to work with.
#define memberat(ref, offset, membertype) *((membertype*)((char*)ref)+(size_t)offset)
if (memberat(&a[j], offset, int) > memberat(&a[j+1], offset, int))
If you're dealing with members of different types, you'll have to use function pointers to handle the comparisons since how they are compared will differ.
The marco offsetof is the answer, but sorting this way is overly hard and not especially type-safe, (if one changes the type, one will probably not receive an error or maybe even a warning.) This is typical C code to sort; one has one function that picks out the int in the struct that one wants for each int value and compares for qsort.
#include <stdlib.h> /* EXIT_* qsort rand */
#include <stdio.h> /* printf */
#include <string.h> /* strcmp */
#include <assert.h> /* assert */
struct ExampleStruct { int id, year, price; };
static void fill(struct ExampleStruct *const a) {
assert(a);
/* <http://c-faq.com/lib/randrange.html> */
a->id = rand() / (RAND_MAX / 99998 + 1) + 1;
a->year = rand() / (RAND_MAX / 119 + 1) + 1900;
a->price = rand() / (RAND_MAX / 999999 + 1) + 1;
}
static void print(struct ExampleStruct *const a) {
assert(a);
printf("%05d\t%d\t$%d\n", a->id, a->year, a->price);
}
static void for_each(struct ExampleStruct *const a, const size_t a_size,
void (*const action)(struct ExampleStruct *const)) {
size_t i;
assert(a && action);
for(i = 0; i < a_size; i++) action(&a[i]);
}
static int cmp_int(const int a, const int b) { return (a > b) - (b > a); }
static int cmp_id(const void *const va, const void *const vb) {
const struct ExampleStruct *const a = va, *const b = vb;
return cmp_int(a->id, b->id);
}
static int cmp_year(const void *const va, const void *const vb) {
const struct ExampleStruct *const a = va, *const b = vb;
return cmp_int(a->year, b->year);
}
static int cmp_price(const void *const va, const void *const vb) {
const struct ExampleStruct *const a = va, *const b = vb;
return cmp_int(a->price, b->price);
}
int main(void) {
struct ExampleStruct a[5];
size_t a_size = sizeof a / sizeof *a;
for_each(a, a_size, &fill);
printf("Sorted by id.\n");
qsort(a, a_size, sizeof *a, &cmp_id);
for_each(a, a_size, &print);
printf("Sorted by year.\n");
qsort(a, a_size, sizeof *a, &cmp_year);
for_each(a, a_size, &print);
printf("Sorted by price.\n");
qsort(a, a_size, sizeof *a, &cmp_price);
for_each(a, a_size, &print);
return EXIT_SUCCESS;
}
I am working on a generic merge sort algorithm. Now the problem is I keep getting garbage values when I print the content of the supposedly "sorted" array.
The merge sort algorithm:
void merge(void *a, int n, int size, int (*fcmp)(const void *, const void *)){
int i, j, k, mid=n/2;
void * temp = (void *)malloc(n*size);
for(i=0, j=mid, k=0; k<n; k++){
if((i<mid)&&(j>= n)){
memcpy(temp+(k*size), a+i*size, size);
i++;
}
else if((i<mid)&&(fcmp(a + i*size, a+j*size) <= 0)){
memcpy(temp+(k*size), a+j*size, size);
j++;
}
}
for(i=0, j=0; j<n; i++, j++)
memcpy(a+(j*size),temp+(i*size),size);
free(temp);
}
void genmsort(void *a, int n, int size, int (*fcmp)(const void *, const void *)){
if(n>1){
genmsort(a, n/2, size, (int(*)(const void *, const void *)) compnode);
genmsort(a+(n/2)*size, n-n/2, size, (int(*)(const void *, const void *)) compnode);
merge(a, n, size, (int(*)(const void *, const void *)) compnode);
}
}
The compnode function:
int compnode(node *a, node *b){
return (strcmp(a->name, b->name));
}
The initialization function:
void init_node(node a[], int n){
int i;
for(i=0; i<n; i++){
a[i].stdno=i+1;
sprintf(a[i].name, "%li", a[i].stdno);
}
srand(8);
for(i=0; i<n; i++)
genswap(a+i, a+(rand()%n), sizeof(node));
}
And the main function:
int main(){
int n=10;
clock_t t1, t2;
node *b;
b=(node *)malloc(n*sizeof(node));
init_node(b, n);
t1=clock();
genmsort(b, n, sizeof(node), (int(*)(const void *, const void *)) compnode);
t2=clock();
free(b);
}
What could be wrong here? I'm sorry for the lengthy code but I hope you can understand it. I would really appreciate your help because I am stuck with this code for some time now.
The latitude of things in this code are copious. Some are show-stoppers, but ultimately it is your merge function. A typical merge algorithm moves one item into the target buffer with each iteration until such time as one list or the other is exhausted. Once that happens the remaining items in the remaining list are bulk-copied into place and the algorithm terminates.
You have a fundamental flaw, and we'll cover that now. Your main loop runs k all the way through to n, and at least that is right. But, look at your first expressions in your if-else-if conditions:
if((i<mid)&&(j>= n))
{
memcpy(temp+(k*size), a+i*size, size);
i++;
}
else if((i<mid)&&(fcmp(a + i*size, a+j*size) <= 0))
{
memcpy(temp+(k*size), a+j*size, size);
j++;
}
They both have i<mid, so this could be simplified to be:
if (i<mid)
{
if (j>=n)
{
memcpy(temp+(k*size), a+i*size, size);
i++;
}
else if (fcmp(a + i*size, a+j*size) <= 0))
{
memcpy(temp+(k*size), a+j*size, size);
j++;
}
}
which means if your i-side is ever exhausted before your j-side, you simply do nothing from that point on, just incrementing k until it reaches n. The rest of the j-side of the split-list is completely ignored. Then, at the end of the function you copy uninitialized data right over the top of your original array.
Some things to consider. First, typedef your comparator function requirements and stick to it. It is the responsibility of the comparator to adhere to the requirements of the callback-requestor; not the other way around.
typedef int (*fn_cmp)(const void*, const void*);
and use this correctly by implementing your callback to that standard.
// compare two nodes.
int compare_node(const void* lhs, const void* rhs)
{
const node* lhn = lhs;
const node* rhn = rhs;
return (strcmp(lhn->name, rhn->name));
}
This also makes your generic mergesort much cleaner:
// generic mergesort algorithm
void genmsort(void *src, unsigned int len, unsigned int size, fn_cmp fcmp)
{
if (len < 2)
return;
unsigned int mid = len/2;
genmsort(src, mid, size, fcmp);
genmsort((unsigned char*)src+(mid*size), len - mid, size, fcmp);
merge(src, mid, len-mid, size, fcmp);
}
Readability aside, the biggest difference between the following merge and yours is the addition of the second length parameter (the fact that this one works is considered a bonus). You're code inferred this value from the single length originally passed in; something you did in an entirely separate place in your code when calculating your recursive partition sizes Those same sizes need to be passed here as well, for multiple reasons that include consistency and usability).
Consider the following please. If it is possible to annotate this algorithm better, or make it clearer, I'm at a loss to see how:
// merges two lists back to back in a single sequence.
void merge(void *src,
unsigned int alen, // note parition size.
unsigned int blen, // and again here.
unsigned int size,
fn_cmp fcmp)
{
void *bsrc = (unsigned char*)src + alen * size;
void *dst = malloc((alen + blen)*size);
unsigned int a = 0, b = 0, k = 0;
for (k=0; k<(alen+blen); ++k)
{
// still got a's ?
if (a < alen)
{
// still got b's ?
if (b < blen)
{
// get "lesser" of the two.
if (fcmp((const unsigned char*)src + a*size,
(const unsigned char*)bsrc + b*size) <= 0)
{
// a is less. move it in.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)src + a++*size, size);
}
else
{ // b is less. move it in.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)bsrc + b++*size, size);
}
}
else
{ // no more b's. move the rest of the a's
// into the target and leave.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)src + a*size, (alen - a)*size);
k += (alen-a);
}
}
else
{ // else no a's. move the rest of the b's into
// the target and leave.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)bsrc + b*size, (blen - b)*size);
k += (blen-b);
}
}
// copy final output.
memcpy(src, dst, (alen+blen)*size);
free(dst);
}
Finally, this codes does not require any compiler extensions such as the standard-violating incremental void* you so-heavily exploited in your code. I strongly advise you stay clear of such extensions.
The following is the full test program used to verify the algorithm above and its interface. Read it carefully.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include <time.h>
// simple node definition.
typedef struct node
{
char name[32];
int id;
} node;
// compare two nodes.
int compare_node_names(const void* lhs, const void* rhs)
{
const node* lhn = lhs;
const node* rhn = rhs;
return (strcmp(lhn->name, rhn->name));
}
// compare two nodes.
int compare_node_ids(const void* lhs, const void* rhs)
{
const node* lhn = lhs;
const node* rhn = rhs;
return (lhn->id - rhn->id);
}
// comparator requirements.
typedef int (*fn_cmp)(const void*, const void*);
// merges two lists back to back in a single sequence.
void merge(void *src,
unsigned int alen, // note parition size.
unsigned int blen, // and again here.
unsigned int size,
fn_cmp fcmp)
{
void *bsrc = (unsigned char*)src + alen * size;
void *dst = malloc((alen + blen)*size);
unsigned int a = 0, b = 0, k = 0;
for (k=0; k<(alen+blen); ++k)
{
// still got a's ?
if (a < alen)
{
// still got b's ?
if (b < blen)
{
// get "lesser" of the two.
if (fcmp((const unsigned char*)src + a*size,
(const unsigned char*)bsrc + b*size) <= 0)
{
// a is less. move it in.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)src + a++*size, size);
}
else
{ // b is less. move it in.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)bsrc + b++*size, size);
}
}
else
{ // no more b's. move the rest of the a's
// into the target and leave.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)src + a*size, (alen - a)*size);
k += (alen-a);
}
}
else
{ // else no a's. move the rest of the b's into
// the target and leave.
memcpy((unsigned char *)dst + k*size,
(const unsigned char*)bsrc + b*size, (blen - b)*size);
k += (blen-b);
}
}
// copy final output.
memcpy(src, dst, (alen+blen)*size);
free(dst);
}
// generic mergesort algorithm
void genmsort(void *src, unsigned int len, unsigned int size, fn_cmp fcmp)
{
if (len < 2)
return;
unsigned int mid = len/2;
genmsort(src, mid, size, fcmp);
genmsort((unsigned char*)src+(mid*size), len - mid, size, fcmp);
merge(src, mid, len-mid, size, fcmp);
}
int main()
{
static const unsigned int N = 50;
node *data = malloc(N * sizeof(*data));
int i=0;
srand((unsigned)time(NULL));
for (i=0;i<N;++i)
{
data[i].id = i+1;
sprintf(data[i].name, "String%.3d", 1 + rand() % 999);
}
// sort on names.
genmsort(data, N, sizeof(data[0]), compare_node_names);
for (i=0;i<N;++i)
printf("%s : %u\n", data[i].name, data[i].id);
printf("\n");
// use a different comparator, this time by id.
genmsort(data, N, sizeof(data[0]), compare_node_ids);
for (i=0;i<N;++i)
printf("%s : %u\n", data[i].name, data[i].id);
printf("\n");
free(data);
return 0;
}
Output
String053 : 49
String097 : 38
String104 : 46
String122 : 41
String129 : 8
String139 : 3
String168 : 30
String184 : 22
String222 : 16
String230 : 28
String249 : 4
String265 : 34
String285 : 44
String295 : 20
String298 : 47
String300 : 19
String321 : 2
String375 : 37
String396 : 50
String408 : 13
String430 : 31
String466 : 35
String483 : 24
String484 : 27
String491 : 25
String494 : 39
String507 : 10
String513 : 7
String514 : 11
String539 : 5
String556 : 29
String570 : 43
String583 : 33
String584 : 42
String620 : 15
String632 : 12
String671 : 21
String705 : 23
String710 : 14
String714 : 45
String724 : 18
String733 : 9
String755 : 48
String805 : 36
String814 : 6
String847 : 32
String876 : 40
String893 : 26
String906 : 17
String972 : 1
String972 : 1
String321 : 2
String139 : 3
String249 : 4
String539 : 5
String814 : 6
String513 : 7
String129 : 8
String733 : 9
String507 : 10
String514 : 11
String632 : 12
String408 : 13
String710 : 14
String620 : 15
String222 : 16
String906 : 17
String724 : 18
String300 : 19
String295 : 20
String671 : 21
String184 : 22
String705 : 23
String483 : 24
String491 : 25
String893 : 26
String484 : 27
String230 : 28
String556 : 29
String168 : 30
String430 : 31
String847 : 32
String583 : 33
String265 : 34
String466 : 35
String805 : 36
String375 : 37
String097 : 38
String494 : 39
String876 : 40
String122 : 41
String584 : 42
String570 : 43
String285 : 44
String714 : 45
String104 : 46
String298 : 47
String755 : 48
String053 : 49
String396 : 50
Auxilliary problems
Transcribed from the comments to the question.
For pity's sake, write compnode() sanely so that you don't have to go through the ghastly casts! Write it to take two const void * arguments and convert them in the code (it'll be a no-op):
int compnode(const void *v1, const void *v2)
{
const node *a = v1;
const node *b = v2;
return strcmp(a->name, b->name);
}
Also, don't use GCC's extensions. It is a bad habit if you have any pretensions towards writing portable code. Writing a+(n/2)*size where the argument is void *a is undefined behaviour per the C standard. You have to convert to char * (or some other type other than void *) before adding.
In genmnode(), you should be passing fcmp to the recursive functions and the merge() function, instead of passing compnode() directly.
Gannicus asked:
What do you mean pass fcmp instead of compnode?
WhozCraig explained:
[It] means you're passing your custom comparator function to the "generic" sort function as the fcmp parameter. Within that function, you blindly pass compnode to the recursive calls. You should be passing fcmp to those recursive calls instead, or your "generic" ideology just went out the window.
Primary problem
The primary problem is in your merge() function. The interface to that is most unusual. Normally, you pass two arrays to be merged, along with the size of each. You've chosen to pass one array and do some fancy footwork. The code in the main for loop in screws everything up.
void merge(void *a, int n, int size, int (*fcmp)(const void *, const void *)){
int i, j, k, mid=n/2;
void * temp = (void *)malloc(n*size);
for(i=0, j=mid, k=0; k<n; k++){
if((i<mid)&&(j>= n)){
memcpy(temp+(k*size), a+i*size, size);
i++;
}
else if((i<mid)&&(fcmp(a + i*size, a+j*size) <= 0)){
memcpy(temp+(k*size), a+j*size, size);
j++;
}
}
for(i=0, j=0; j<n; i++, j++)
memcpy(a+(j*size),temp+(i*size),size);
free(temp);
}
The trailing loop should be a single memcpy() operation, but what's there will work.
You have a single array, a, with a total of n elements of the given size. It must be treated as two sub-arrays, one of elements [0..mid), the LHS, and the other of elements [mid..n), the RHS. The ranges include the lower bound and exclude the upper bound.
The first condition inside the loop says 'if there is an element left in LHS and nothing left in RHS, copy the LHS element to the output'. The second condition says 'if there is an element left in the LHS (and, by elimination, there is an element in the RHS too), and the LHS compares smaller than the RHS, then copy the RHS element to the output'.
There are different and ultimately equivalent ways to write the merge process, but
normally the easiest to understand is:
while (item left in LHS and item left in RHS)
{
if (item in LHS is smaller than item in RHS)
copy LHS to result
else
copy RHS to result
}
while (item left in LHS)
copy item to result
while (item left in RHS)
copy item to result
The loop implemented does not come close to implementing that logic, or one of its equivalents.
Working code
Here's my diagnostic version of your code. The memset() at the top of merge() should not matter; you should be copying to temp and writing over all the X's. In practice, you are not.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node node;
struct node
{
long stdno;
char name[20];
};
static
void genswap(void *v1, void *v2, size_t size)
{
char v3[size];
memmove(v3, v1, size);
memmove(v1, v2, size);
memmove(v2, v3, size);
}
static
void print_node(const char *tag, node a[], int n)
{
printf("%s\n", tag);
for (int i = 0; i < n; i++)
printf("%2d: %p %2ld %s\n", i, &a[i], a[i].stdno, a[i].name);
}
static
void merge(void *a, int n, int size, int (*fcmp)(const void *, const void *))
{
int i, j, k, mid = n/2;
void *temp = (void *)malloc(n*size);
memset(temp, 'X', n*size);
printf("-->> %s\n", __func__);
print_node("Before Merge", (node *)a, n);
for (i = 0, j = mid, k = 0; k < n; k++)
{
if ((i < mid) && (j >= n))
{
memcpy(temp+(k*size), a+i*size, size);
i++;
}
else if ((i < mid) && (fcmp(a + i*size, a+j*size) <= 0))
{
memcpy(temp+(k*size), a+j*size, size);
j++;
}
}
print_node("Mid Merge", (node *)temp, n);
for (i = 0, j = 0; j < n; i++, j++)
memcpy(a+(j*size), temp+(i*size), size);
free(temp);
print_node("After Merge", (node *)a, n);
printf("<<-- %s\n", __func__);
}
static
void genmsort(void *a, int n, int size, int (*fcmp)(const void *, const void *))
{
if (n > 1)
{
genmsort(a, n/2, size, fcmp);
genmsort(a+(n/2)*size, n-n/2, size, fcmp);
merge(a, n, size, fcmp);
}
}
static
int compnode(const void *v1, const void *v2)
{
const node *a = v1;
const node *b = v2;
printf("%s: (%ld:%s) vs (%ld:%s)\n", __func__, a->stdno, a->name, b->stdno, b->name);
return(strcmp(a->name, b->name));
}
static
void init_node(node a[], int n)
{
for (int i = 0; i < n; i++)
{
a[i].stdno = i+1;
sprintf(a[i].name, "%li", a[i].stdno);
}
srand(8);
for (int i = 0; i < n; i++)
genswap(a+i, a+(rand()%n), sizeof(node));
}
int main(void)
{
int n = 10;
node *b = (node *)malloc(n*sizeof(node));
init_node(b, n);
print_node("Before:", b, n);
genmsort(b, n, sizeof(node), compnode);
print_node("After:", b, n);
free(b);
return 0;
}