Hash functions not sensitive to element order - c

I'm working with integer sequences with non-repeating elements, for some reasons I am tring to remove duplicates by building a hashset.
int * a = {123, 145, 210, 77};
int * b = {145, 77, 123, 210}; // should be removed
int * c = {123, 37, 16};
int * d = {123, 145, 72, 91};
Is there order insensitive hash functions that return same result for a and b?
I have come out of some solutions, but they performs poorly:
sorting - The sequences are immutable, sorting will invlove extra space and O(NlogN) time.
xor - The element in sequences ranges from 0 to hundreds, many bits of hash value may be wasted.
Is there other methods?

Hashing is not the only thing you should do. Because of information loss, completely different arrays could return the same hash. In order to only remove duplicates, you would need to check for equivalence too. A hashset does that too, but places the elements based on a hash, so you can find them more easily.
Here is an example implementation:
#include <stdlib.h>
#include <stdbool.h>
struct hashset {
int count;
int capacity;
struct hashset_element {
int hash;
int arrlen;
int *arrval;
} *elements;
};
void init_hashset(struct hashset *set) {
set->count = 0;
set->capacity = 0;
set->elements = NULL;
}
int hash(int *arr, int len) {
// you can replace this hash function
// this is a pretty simple one
int out = 0;
for (int i = 0; i < len; i++) {
out += arr[i];
}
return out;
}
void arrequals(int *arr1, int len1, int *arr2, int len2) {
if (len1 != len2)
return false;
arr1srt = sort(arr1, len1);
arr2srt = sort(arr2, len2);
for (int i = 0; i < len1; i++) {
if (arr1srt[i] != arr2srt[i])
free(arr1srt);
free(arr2srt);
return false;
}
free(arr1srt);
free(arr2srt);
return true;
}
bool hashset_contains(struct hashset *set, int *arr, int len) {
int rawhash = hash(arr, len);
int hash = rawhash % set->capacity;
for (int i = hash; i < set->capacity; i++) {
if (set->elements[i]->arrval == NULL)
return false;
if (arrequals(set->elements[i]->arrval,
set->elements[i]->arrlen, arr, len)
return true;
}
for (int i = 0; i < hash; i++) {
if (set->elements[i]->arrval == NULL)
return false;
if (set->elements[i]->hash == rawhash &&
arrequals(set->elements[i]->arrval,
set->elements[i]->arrlen, arr, len)
return true;
}
return false;
}
void hashset_realloc(struct hashset *set) {
struct hashset_element* oldarr = set->elements;
int old_capacity = set->capacity;
set->elements = malloc(sizeof(struct hash_element) * set->capacity + 1024);
set->capacity += 1024;
for (int i = 0; i < set->capacity; i++) {
if (oldarr[i]->arrval != NULL)
hashset_add_element(set, oldarr[i]->arrval, oldarr[i]->arrlen);
}
}
void hashset_add_element(struct hashset *set, int *arr, int len) {
if (!hashset_contains(set, arr, len)) {
if (set->count >= set->capacity / 2) {
realloc_hashset(set);
}
int rawhash = hash_element(arr, len);
int hash = rawhash % set->capacity;
for (int i = hash; i < set->capacity; i++) {
if (set->elements[i]->arrval == NULL) {
set->elements[i]->hash = rawhash;
set->elements[i]->arrval = arr;
set->elements[i]->arrlen = len;
set->count++;
return;
}
}
for (int i = 0; i < hash; i++) {
if (set->elements[i]->arrval == NULL) {
set->elements[i]->hash = rawhash;
set->elements[i]->arrval = arr;
set->elements[i]->arrlen = len;
set->count++;
return;
}
}
}
}
void destroy_hashset(struct hashset *set) {
if (set->elements != NULL)
free(set->elements);
}
int hashset_to_array(struct hashset *set, int **arrout, int *lenout, int maxlen) {
int w = 0;
for (int i = 0; i < capacity; i++) {
if (w >= maxlen)
break;
arrout[w] = set->elements[i]->arrval;
lenout[w] = set->elements[i]->arrlen;
w++;
}
return set->count;
}
I did not test this code, but give it a try and feel free to correct me, if there is an error in my code. You have to implement your sort function yourself. I don't know how big the arrays are, you are working with, so I can't pick an ideal algorithm for you. Order insensitive comparing is only possible in O(n*log(n)). O(n) is possible, if the integers have a maximum size, small enough to use a counting table.
In ideal cases this hashset has a runtime of O(1). The worst case runtime is O(n), which is very unlikely to happen. The hash algorithm has a runtime of O(n), which is not ideal but okay for small arrays.

Related

Best way to increase all elements in array until they reach a specific value in C

I am writing a function to increase the values of all elements in an array until they all reach a specified value, this value can be different for each item. This is what I have so far.
Edit: I forgot to mention that the reason not to set it directly as in arr[i].value = arr[i].setpoint (as pointed out by kaylum) it is because there is a limitation where, while incrementing, the values in the array can only differ by the same unit of increment, in this case 1. So abs(arr[i].value - arr[i+1].value) <= 1. Once an element reaches the setpoint it is free of this limitation (so that the other elements can reach its setpoint)
#include <stdio.h>
typedef struct
{
unsigned value;
unsigned setpoint;
} ValSet;
void set_all(ValSet arr[static 1], size_t arr_size)
{
for (size_t cnt = arr_size; cnt;)
{
cnt = arr_size;
for (size_t i = 0; i < arr_size; i++)
{
if (arr[i].value != arr[i].setpoint)
{
arr[i].value++;
}
else
{
cnt--;
}
}
}
}
int main()
{
ValSet arr[2] = {{0, 2}, {0, 4}};
set_all(arr, 2);
return 0;
}
This generates the desired output so, after the call to set_all(arr, 2), arr[0].value = 2 and arr[1].value = 4, but I am not sure this is the best idea. Is there a better way to achieve this?
You were a bit unspecific about what you want exactly, so here are some solutions:
typedef struct {
unsigned value;
unsigned setpoint;
} ValSet;
/**
* for each element:
* increases value by one, until value is not smaller than setpoint
* will never decrease value
*/
void increase_all_slow(ValSet* arr, int len) {
for (int i = 0; i < len; i++) {
while (arr[i].value < arr[i].setpoint) {
arr[i].value++;
}
}
}
/**
* for each element:
* sets value to setpoint, if value is smaller than setpoint
* will never decrease value
*/
void increase_all(ValSet* arr, int len) {
for (int i = 0; i < len; i++) {
if (arr[i].value < arr[i].setpoint) {
arr[i].value = arr[i].setpoint;
}
}
}
/**
* for each element:
* sets value to setpoint
* decreases value if less than setpoint
*/
void set_all(ValSet* arr, int len) {
for (int i = 0; i < len; i++) {
arr[i].value = arr[i].setpoint;
}
}
Your implementation has a lot of redundant checks, every element will be compared if (arr.value[i] != arr.setpoint[i]) as long as any element is not set correctly. This will make it very slow.
I hope my code is self explanatory (I even added comments where usually you don't need them). If not, feel free to ask
Edit:
Now I know what you want. Next time tell us before:
typedef struct {
unsigned value;
unsigned setpoint;
} ValSet;
/**
* Until every element is equal (or bigger) to its corresponding setpoint.
* Every elements value that is not, will be increased.
*/
void increase_all(ValSet* arr, int len) {
int max_allowed = INT_MAX;
for (int i = 0; i < len; i++) {
if (arr[i].value < max_allowed) {
max_allowed = arr[i].value;
}
}
bool done = false;
while (!done) {
max_allowed++;
done = true;
for (int i = 0; i < len; i++) {
if (arr[i].value < arr[i].setpoint && arr[i].value < max_allowed) {
arr[i].value++;
done = false;
}
}
}
}

Why does the point crash when free it?

In the following code, the result is ok, but the code will be crash when executing finish, and increase one error: Heap corruption detected, the free list is damaged at 0x600000008f50
int *mergeSort(int *a,int count) {
int leftCount = count / 2;
int rightCount = count - leftCount;
int *leftData = getData(a, 0, leftCount);
int *rightData = getData(a, leftCount, count);
int *sortedLeftData = mergeSort(leftData, leftCount);
int *sortedRightData = mergeSort(rightData, rightCount);
int *resultData = mergeData(sortedLeftData, sortedRightData, leftCount,
rightCount);
return resultData;
}
int *getData(int *a,int from, int to) {
if (from > to) { return nil; }
int *res = malloc(to - from + 1);
for (int index = from; index < to; index ++) {
int value = a[index];
res[index-from] = value;
}
return res;
}
int *mergeData(int *a, int *b, int acount, int bcount) {
int *result = malloc(acount + bcount);
int aindex,bindex,rindex;
aindex = bindex = rindex = 0;
while (aindex < acount | bindex < bcount) {
int value,avalue = INT_MAX,bvalue = INT_MAX;
if (aindex < acount) { avalue = a[aindex]; }
if (bindex < bcount) { bvalue = b[bindex]; }
// get value from a point.
if (avalue <= bvalue) {
value = avalue;
aindex ++;
}else {
// get value from b point.
value = bvalue;
bindex ++;
}
result[rindex] = value;
rindex ++;
}
return result;
}
I don't understand why does crash when free the point, any answer will helpfull, thanks.
All of your allocations are too small, and thus you are overflowing your buffers.
The malloc function allocates the requested number of bytes. You need to multiply the number of elements you require by sizeof(int) if your elements are int type. e.g.
int *result = malloc((acount + bcount) * sizeof(int));
Other potential problems I spotted while reading your code are:
Using the bitwise-or operator instead of logical-or:
while (aindex < acount | bindex < bcount)
// ^ should be ||
You never free your temporary buffers, thus your program will blow out memory by leaking like crazy. You must free leftData, rightData, sortedLeftData and sortedRightData in the mergeSort function after you are finished with them.
Note that merge sort actually does not require so much allocation. Doing so will have a huge impact on performance. An efficient implementation only requires a single additional buffer for scratch operations, which can be allocated at the beginning.
I did implementation merge sort use single buffer, as the following code:
void mergeSort(int *a, int count) {
int *tempBuffer = malloc(count * sizeof(int));
mergeSortWithBuffer(a, 0, 0, count - 1,tempBuffer);
free(tempBuffer);
}
void mergeSortWithBuffer(int *a, int leftStart, int rightStart, int end, int *tempBuffer) {
int leftCount = rightStart - leftStart;
int rightCount = end - rightStart + 1;
if (leftCount + rightCount <= 1) { return; }
if (leftCount != 0) {
// left dichotomy
int lls = leftStart;
int lrs = leftStart + leftCount/2;
int lnd = rightStart - 1;
mergeSortWithBuffer(a, lls, lrs, lnd,tempBuffer);
}
if (rightCount != 0) {
// right dichotomy
int rls = rightStart;
int rrs = rightStart + rightCount/2;
int rnd = end;
mergeSortWithBuffer(a, rls, rrs, rnd,tempBuffer);
}
mergeData(a, leftStart, rightStart, end, tempBuffer);
}
void mergeData(int *a, int leftStart, int rightStart, int end,int *tempBuffer) {
int leftCount = rightStart - leftStart;
int rightCount = end - rightStart + 1;
int lindex,rindex;
lindex = rindex = 0;
while (lindex < leftCount || rindex < rightCount) {
int lv = INT_MAX,rv = INT_MAX;
if (lindex < leftCount) { lv = a[leftStart + lindex]; }
if (rindex < rightCount) { rv = a[rightStart + rindex]; }
if (lv <= rv) {
tempBuffer[leftStart + lindex + rindex] = lv;
lindex ++;
}else {
tempBuffer[leftStart + lindex + rindex] = rv;
rindex ++;
}
}
for (int index = 0; index < end - leftStart + 1; index ++) {
a[leftStart + index] = tempBuffer[leftStart + index];
}
}
I thought the mergeData function can replace data in the point a each other without the temp buffer, but the logic is too complex and the efficient is not fast, so i add the temp buffer in this function.
Would you have better suggestions if you have willing?

Dynamically exclude some numbers from randomly generated sequence

I want to produce a random sequence of numbers between a range, for example 100 to 200.
After a while, depending on some events, I want to produce a new sequence between the same range (100 to 200), but this time I want to exclude some numbers. For example I don't want [150,165,170].
And the next time, these excluded numbers may or may not be included in the sequence.
One possible approach could be an array of numbers like this:
int rndm[] {100,101,102,103,...};
and use the index of the array to generate a random number at a time:
random(rndm[0-99]);
But I need to use as few instruction/data structures as possible in order to achieve performance.
I am using C for this code and I use random() or randomSeed(seed) and I want to know what the most efficient approach to handle this issue is, in terms of data structures should be used for the speed and memory.
This solution is efficient in the case that there are not many exclusions during the lifetime, once the exclusion function is quadratic.
There is a struct called RandomArray that holds a pointer to and array with size N. N is the desired size of the sequence. The time and space complexity is linear O(N) for the create function.
When an event happens it shall call the function excludeValue, with a time complexity of O(N) and space complexity of 1.
If it is desired to exclude a bunch of values, the function excludeValues (pay attention to s at the end) shall be called. In this case the complexity is O(N x K) and the space complexity is 1. K is the amount of values that shall be excluded.
#include <stdio.h>
#include <stdlib.h>
struct RandomArray {
int *pData;
size_t dataLen;
int excludedIdx;
};
struct RandomArray *excludeValue(struct RandomArray *pAr, int val) {
size_t i;
for (i = 0; i < pAr->excludedIdx; ++i) {
if (pAr->pData[i] == val) {
pAr->excludedIdx--;
int tmp = pAr->pData[i];
pAr->pData[i] = pAr->pData[pAr->excludedIdx];
pAr->pData[pAr->excludedIdx] = tmp;
// Do test again the position
--i;
}
} return pAr;
}
struct RandomArray *excludeValues(struct RandomArray *pAr, int *pVals, size_t len) {
size_t i;
for (i = 0; i < len; ++i)
excludeValue(pAr, pVals[i]);
}
struct RandomArray *destroyRandomArray(struct RandomArray *pAr) {
if (pAr) {
if (pAr->pData)
free(pAr->pData);
pAr->dataLen = 0;
}
return pAr;
}
struct RandomArray *createRandomArray(
struct RandomArray *pAr,
size_t dataLen,
int lowLimit, int highLimit) {
int i;
int range = (highLimit - lowLimit);
pAr->pData = (int*)malloc(sizeof(int) * dataLen);
pAr->dataLen = dataLen;
srand(time(NULL));
for (i = 0; i < dataLen; ++i) {
pAr->pData[i] = rand() % (range + 1) + lowLimit;
}
// Clear excluded indexs
pAr->excludedIdx = pAr->dataLen; return pAr;
}
void printRandomArray(struct RandomArray *pAr) {
size_t i;
printf("Random Array (len = %d): ", pAr->dataLen);
for (i =0; i < pAr->dataLen; ++i) {
printf(" %d", pAr->pData[i]);
}
printf("\n");
}
void printValidRandomArray(struct RandomArray *pAr) {
size_t i;
printf("Valid Random Array (len = %d): ", pAr->excludedIdx);
for (i =0; i < pAr->excludedIdx; ++i) {
printf(" %d", pAr->pData[i]);
}
printf("\n");
}
void printExcludedRandomArray(struct RandomArray *pAr) {
size_t i;
printf("Excluded Random Array (len = %d): ", pAr->dataLen - pAr->excludedIdx);
for (i = pAr->excludedIdx; i < pAr->dataLen; ++i) {
printf(" %d", pAr->pData[i]);
}
printf("\n");
}
void printAllRandomArray(struct RandomArray *pAr) {
printRandomArray(pAr);
printValidRandomArray(pAr);
printExcludedRandomArray(pAr);
}
int main() {
int lowLimit = 100;
int highLimit = 105;
int arrayLen = 10;
struct RandomArray myAr;
createRandomArray(&myAr, arrayLen, lowLimit, highLimit);
printAllRandomArray(&myAr);
printf("\n");
excludeValue(&myAr, 100);
printAllRandomArray(&myAr);
printf("\n");
excludeValue(&myAr, 101);
printAllRandomArray(&myAr);
printf("\n");
excludeValue(&myAr, 102);
printAllRandomArray(&myAr);
printf("\n");
excludeValue(&myAr, 103);
printAllRandomArray(&myAr);
printf("\n");
excludeValue(&myAr, 104);
printAllRandomArray(&myAr);
printf("\n");
excludeValue(&myAr, 105);
printAllRandomArray(&myAr);
destroyRandomArray(&myAr);
createRandomArray(&myAr, arrayLen, lowLimit, highLimit);
printf("\n\n\n");
printAllRandomArray(&myAr);
printf("\n");
int vals[] = { 102, 105, 104 };
excludeValues(&myAr, vals, sizeof(vals) / sizeof(vals[0]));
printAllRandomArray(&myAr);
destroyRandomArray(&myAr);
}
This was asked here on the Arduino forum but I saw it here too. My answer is in Arduino's flavor of C++ since it was posted there...
Of course, performance varies as the set of excluded numbers grows relative to the set of numbers to be used to create your "new sequence."
void setup() {
Serial.begin(115200);
randomSeed(analogRead(A0));
}
void loop() {
// create an arbitray sized array to be filled with unique values to exclude from desired array
const int arraySize = 5;
int exclusions[arraySize];
for (int i = 0; i < arraySize; i++) {
// fill the array with unique values...
int val;
do {
val = random(100, 200);
} while([&]() {
for (auto j : exclusions) {
if (val == j) {
return true;
}
}
return false;
}());
exclusions[i] = val;
}
Serial.print(F("Exclusion Array: "));
for (int i = 0; i < arraySize; i++) {
Serial.print(exclusions[i]);
if (i < arraySize - 1)
Serial.print(F(", "));
}
Serial.println();
// create a new array of arbitrary length of unique random numbers in >>>the same<<< range as above (but not necessary)
Serial.print(F("Starting...\n"));
uint32_t start = millis();
const int listSize = 32;
int list[listSize];
for (int i = 0; i < listSize; i++) {
// fill the array with unique values that will exclude exclusions[]
int val;
do {
val = random(100, 200);
} while([&]() {
for (auto j : list) {
if (val == j) {
return true;
}
for (auto k : exclusions) {
if (val == k) {
return true;
}
}
}
return false;
}());
list[i] = val;
}
uint32_t end = millis();
Serial.println(end - start);
// OPTIONAL -> lets sort the final arry to make spotting the duplicates easier:
for (int i = 0; i < listSize; i++) {
for (int j = i + 1; j < listSize; j++) {
if (list[j] < list[i]) {
int temp = list[i];
list[i] = list[j];
list[j] = temp;
}
}
}
// output the final array
Serial.print(F("Final Array: "));
for (int i = 0; i < listSize; i++) {
Serial.print(list[i]);
if (i < listSize - 1)
Serial.print(F(", "));
}
Serial.print(F("\n\n\n"));
delay(1000);
}

sort and remove duplicates from int array in c

I am learning C and came over the topic of sorting. I wrote a comp() function in and used qsort to sort an array of int. Now for the next task I need to remove the duplicates from the array.
Is it possible to sort and remove duplicates at the same time?
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
int indexes[10] = { 0, 98, 45, 65, 45, 98, 78, 56, 65, 45 };
int comp(const void * elem1, const void * elem2) {
int f = *((int*) elem1);
int s = *((int*) elem2);
if (f > s) {
return 1;
}
if (f < s) {
return -1;
}
return 0;
}
void printIndexArray() {
int i = 0;
for (i = 0; i < 10; i++) {
printf("i is %d\n", indexes[i]);
}
}
int main() {
qsort(indexes, sizeof(indexes) / sizeof(int), sizeof(int), comp);
printIndexArray();
return 0;
}
Since your numbers are already sorted, removing dupes is easy. In C++, it's even built in as std::unique:
http://en.cppreference.com/w/cpp/algorithm/unique
Assuming you want to do it yourself, you can do it the same way unique does it:
int* unique (int* first, int* last)
{
if (first==last) return last;
int* result = first;
while (++first != last)
{
if (!(*result == *first))
*(++result)=*first;
}
return ++result;
}
Yes
This can be achieved by mergesort. If both left and right are the same just merge the one value
That's the code that removes the duplicates using mergesort. This snippet of code does the removing work:
else if(a[p1] == a[p2])
{
merged[p] = a[p1];
p1++;
p2++;
}
That's the iterative merge sort while the recursive version would be easier.
#include <stdio.h>
#include <stdlib.h>
#define min(a,b) (((a) < (b)) ? (a) : (b))
int indexes[10] = { 0, 98, 45, 65, 45, 98, 78, 56, 65, 45 };
void merge(int *a, int s, int m, int e)
{
int p1 = s;
int p2 = m + 1;
int * merged = (int*)malloc(sizeof(int) * (e - s + 1));
int p = 0;
while(p1 < m + 1 && p2 < e + 1)
{
if(a[p1] > a[p2])
{
merged[p] = a[p2];
p2++;
}
else if(a[p1] == a[p2])
{
merged[p] = a[p1];
p1++;
p2++;
}
else
{
merged[p] = a[p1];
p1++;
}
p++;
}
while(p1 < m + 1)
{
merged[p++] = a[p1++];
}
while(p2 < e + 1)
merged[p++] = a[p2++];
int i;
for(i = 0;i < (e -s+1); i++)
{
a[s + i] = merged[i];
}
free(merged);
}
void merge_sort(int *a, int n)
{
int width;
for(width = 1; width < n; width = 2 * width)
{
int i;
for(i = 0; i < n; i = i + 2 * width)
{
merge(a, i, min(i + width - 1, n - 1), min(i + 2 * width - 1, n - 1) );
}
}
}
void printIndexArray()
{
int i = 0;
for(i = 0; i < 10; i++)
{
printf("i is %d\n", indexes[i]);
}
}
int main()
{
merge_sort(indexes, sizeof(indexes) / sizeof(int) );
printIndexArray();
return 0;
}
#include <stdio.h>
#include <stdlib.h>
int indexes[10] = { 0, 98, 45, 65, 45, 98, 78, 56, 65, 45 };
size_t undup(int array[], size_t len)
{
size_t src,dst;
if (!len) return 0;
for (src=dst=1; src < len; src++) {
if (array[dst-1] == array[src]) continue;
array[dst++] = array[src];
}
return dst;
}
int comp(const void * elem1, const void * elem2) {
int f = *((int*) elem1);
int s = *((int*) elem2);
if (f > s) return 1;
if (f < s) return -1;
return 0;
}
void printIndexArray(size_t len) {
size_t i = 0;
for (i = 0; i < len; i++) {
printf("array[%zu] is %d\n", i, indexes[i]);
}
}
int main() {
size_t len = 10;
printf("Before sort\n" );
printIndexArray(len);
qsort(indexes, sizeof indexes / sizeof indexes[0], sizeof indexes[0], comp);
printf("After sort\n" );
printIndexArray(len);
len = undup(indexes,10);
printf("After undup\n" );
printIndexArray(len);
return 0;
}
The short answer is: yes.
The long answer is: it is always possible, but the complexity to do it depends heavily on the algorithm you use.
The more complex algorithms like quick-sort, slow-sort, bucket-sort, and straight-radix-sort do not lend themselves to such an enhancement, because they rely on the data being in a consecutive array, that can implicitly be split into subarrays. So, when you detect a duplicate, you cannot easily take it out. Again, it is possible, but certainly not a problem for beginners.
The less complex in-place algorithms like bubble-sort, insertion-sort, and shell-sort make it relatively easy: you can just replace one of the duplicates you detect with a sentinel value that sorts greater than all legal values, and let it rise to the top. After that, you just need to scoop off the cream of sentinel values and you are done.
The algorithms that really lend themselves to removing duplicates, are the ones that use intermediate arrays that grow/shrink in the process; in these cases you can just shrink or skip growing one of these intermediate arrays when you detect a duplicate. Candidates are merge-sort and heap-sort.
Note, however, that it is more prudent to just sort the array, and eliminate duplicates in a second, separate step. Why? Because eliminating duplicates adds complexity to the inner loop of the sorting algorithm, which is of O(n*log(n)) in most relevant cases. But eliminating duplicates from a sorted array is an O(n) operation, making the split operation faster than the fused one.

Trouble creating a descending heap sort in C

void heapSort(int list[], int last)
{
// Local Declarations
int sorted;
int holdData;
int walker;
// Statements
for (walker = 1; walker <= last; walker++)
reheapUp (list, walker);
// Min Heap created. Now to sort!
sorted = last;
while (sorted > 0)
{
holdData = list[0];
list[0] = list[sorted];
list[sorted] = holdData;
sorted--;
reheapDown (list, 0, sorted, moves, comparisons);
}
return;
}
void reheapUp (int heap[], int newNode)
{
// Local Declarations
int parent;
int hold;
// Create a min heap
// Statements
if (newNode)
{
parent = (newNode - 1) / 2;
if (heap[newNode] > heap[parent]) // Only change made from ascending order
{
hold = heap[parent];
heap[parent] = heap[newNode];
heap[newNode] = hold;
reheapUp (heap, parent);
}
}
return;
}
void reheapDown (int heap[], int root, int last)
{
// Local Declarations
int hold;
int leftKey;
int rightKey;
int largeChildKey;
int largeChildIndex;
// Statements
if ((root * 2 + 1) <= last)
{
// There is atleast one child
leftKey = heap[root * 2 + 1];
if ((root * 2 + 2) <= last) {
rightKey = heap[root * 2 + 2];
}
else
rightKey = -1;
// Determine which child is larger
if (leftKey > rightKey)
{
largeChildKey = leftKey;
largeChildIndex = root * 2 + 1;
}
else
{
largeChildKey = rightKey;
largeChildIndex = root * 2 + 2;
}
// Test if root > large subtree
if (heap[root] < heap[largeChildIndex])
{
// parent < child
hold = heap[root];
heap[root] = heap[largeChildIndex];
heap[largeChildIndex] = hold;
reheapDown(heap, largeChildIndex, last);
}
}
return;
}
I got ascending order to heap sort to function by creating a max heap. I read that to create a descending order heap sort I need to create a min heap which I did as shown by changing heap[newNode] < heap[parent] to heap[newNode] > heap[parent] as shown in the code. However, it is still out order. Therefore, I wanted to do what are the other steps? Do I need to alter reheapDown somehow as well?
You need to change all value comparisons you make like heap[root] < heap[largeChildIndex] you didn't mention you changed.
First of all you need to change every comparison operators accordingly, just take them all and think of the problem.
Secondly you only have to reheapUp to (last/2) to create the heap, because the key at (last/2+1) doesn't have any childs.
And I made some heap-sort in C before and I had way less lines of code, and only had one "heapify" function. You might want to look at your code and try to simplify things.
EDIT : if you want some inspiration here is what I did
void fixHeap(int position,int length)
{
int child = (2*position)+1;
int temp;
while (child<=length)
{
if (child<length && vector[child]<vector[child+1])
{
child++;
}
if (vector[position]<vector[child])
{
temp = vector[position];
vector[position] = vector[child];
vector[child] = temp;
position = child;
child = (2*position)+1;
}
else
{
return;
}
}
}
void heapSort(int vector[],int N)
{
int counter;
int temp;
for (counter=(N-1)/2; counter>=0; counter--)
{
fixHeap(counter,N-1);
}
for (counter=N-1; counter>0; counter--)
{
temp = vector[counter];
vector[counter] = vector[0];
vector[0] = temp;
fixHeap(0,counter-1);
}
}
Here is heap sort using min heap implementation. Have a look, if it helps!
#include "stdafx.h"
#define LEFT(i) (2 * (i))
#define RIGHT(i) (((2 * (i)) + 1))
#define PARENT(i) ((i) / 2))
void print_heap(int input[], int n)
{
int i;
printf("Printing heap: \n");
for (i = 0; i < n; i++)
printf("%d ", input[i]);
printf("\n");
}
void swap_nodes(int *a, int *b)
{
int tmp;
tmp = *a;
*a = *b;
*b = tmp;
}
void min_heapify(int input[], int i, int n)
{
int least;
int l = LEFT(i + 1) - 1; // Get 0 based array index
int r = RIGHT(i + 1) - 1; // Get 0 based array index
if (l < n && input[l] < input[i]) {
least = l;
} else {
least = i;
}
if (r < n && input[r] < input[least]) {
least = r;
}
if (least != i) {
swap_nodes(&input[i], &input[least]);
min_heapify(input, least, n);
}
}
void heapify(int input[], int n)
{
for (int i = n/2; i >= 0; i--)
min_heapify(input, i, n);
}
void heap_sort(int input[], int n)
{
heapify(input, n);
for (int i = n - 1; i >= 1; i--) {
swap_nodes(&input[0], &input[i]);
n = n - 1;
min_heapify(input, 0, n);
}
}
int _tmain(int argc, _TCHAR* argv[])
{
int input[] = {5, 3, 17, 10, 84, 19, 6, 22, 9, 1};
int n = sizeof(input) / sizeof(input[0]);
print_heap(input, n);
heap_sort(input, n);
print_heap(input, n);
return 0;
}

Resources