comparison of shell sorting and merge sorting - c

Comparing the running time of these two sorts, for some reason I get that they work in almost the same time, and on sorted arrays, shell sorting works 4 times faster. Although this can't be, because in the best case, shell sorting is performed in time n(log n)^2, while merge sorting is performed in nlogn time, which is faster than shell sorting. What could be the problem? I tried it on different PCs, but shell sorting is still faster.
UPD: For arrays filled randomly, it works as it should, but for already sorted ones, shell sorting works twice as fast.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#define DIFF 3276
double wtime(void);
void randArray(int* array, int size);
void shellSort(int a[], int size);
void merge(int arr[], int l, int m, int r);
void mergeSort(int arr[], int l, int r);
int main(void)
{
FILE* file1;
FILE* file2;
int* array = NULL;
unsigned seed = time(NULL);
if (!(file1 = fopen("shell.txt", "w"))) {
return 1;
}
for (int step = DIFF; step <= 32760; step += DIFF) {
srand(seed);
array = (int*)malloc(step * sizeof(int));
double start, end;
randArray(array, step);
start = wtime();
shellSort(array, step);
end = wtime();
free(array);
fprintf(file1, "%d\t%lf\n", step, end - start);
}
fclose(file1);
if (!(file2 = fopen("merge.txt", "w"))) {
return 1;
}
for (int step = DIFF; step <= 32760; step += DIFF) {
srand(seed);
array = (int*)malloc(step * sizeof(int));
double start, end;
randArray(array, step);
start = wtime();
mergeSort(array, 0, step - 1);
end = wtime();
free(array);
fprintf(file2, "%d\t%lf\n", step, end - start);
}
fclose(file2);
}
void randArray(int* array, int size)
{
for (int i = 0; i < size; i++) {
array[i] = i;
}
}
double wtime(void)
{
struct timeval t;
gettimeofday(&t, NULL);
return (double)t.tv_sec + (double)t.tv_usec * 1E-6;
}
void shellSort(int a[], int size)
{
int i, j;
int s = size / 2;
while (s > 0) {
for (i = s; i < size; i++) {
int temp = a[i];
for (j = i - s; (j >= 0) && (a[j] > temp); j -= s)
a[j + s] = a[j];
a[j + s] = temp;
}
s /= 2;
}
}
void merge(int arr[], int l, int m, int r)
{
int i, j, k;
int n1 = m - l + 1;
int n2 = r - m;
int L[n1], R[n2];
for (i = 0; i < n1; i++)
L[i] = arr[l + i];
for (j = 0; j < n2; j++)
R[j] = arr[m + 1 + j];
i = 0;
j = 0;
k = l;
while (i < n1 && j < n2) {
if (L[i] <= R[j]) {
arr[k] = L[i];
i++;
} else {
arr[k] = R[j];
j++;
}
k++;
}
while (i < n1) {
arr[k] = L[i];
i++;
k++;
}
while (j < n2) {
arr[k] = R[j];
j++;
k++;
}
}
void mergeSort(int arr[], int l, int r)
{
if (l < r) {
int m = l + (r - l) / 2;
mergeSort(arr, l, m);
mergeSort(arr, m + 1, r);
merge(arr, l, m, r);
}
}

Your shell sort works in-place and for a sorted array it never swaps a single element. So it only has the cost of the compares and the branch predictor will predict the compare perfectly every time.
Your merge sort on the other hand copies the data to temp arrays and back in every step. That's 2 * log(n) copies of the whole array. The extra memory needed might also exceed the L1 cache of your CPU for the larger test runs making this magnitudes worse. You can cut that in half by alternating between 2 arrays, well almost log(n) + 1 copies worst case. Branch prediction for the sorted case is perfect too, it's just the copies that cost you.

Related

How to do merge sort without using additional arrays for splitting the initial array?

I was trying to solve a problem which asks to write merge sort code but without using additional arrays for partitioning the initial array. I guess the code is wrote is almost good but the problem I am facing is that I can't figure out how to maintain and update the array while being sorted. I know the problem is in Merge function.
How can I fix the code?
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void PrintArray(int A[], int n)
{
for(int i=0; i < n; i++)
printf("%d ", A[i]);
printf("\n");
}
void merge(int A[], int left, int mid, int right, int n){
int B[n];
int i = left, j = mid+1, k=0;
while(i<=mid && j <= right){
if(A[i]>=A[j]){
B[k++] = A[i++];
}
else {
B[k++] = A[j++];
}
}
while(i<=mid){
B[k++] = A[i++];
}
while(j<=right){
B[k++] = A[j++];
}
for(i=0; i<n; i++){
A[i] = B[i];
}
}
void MergeSort(int A[], int left, int right, int n)
{
if(left<right){
int mid;
mid = floor((left+right)/2);
MergeSort(A,left,mid,n/2);
MergeSort(A,mid+1,right,n/2);
merge(A,left,mid,right,n);
}
else return;
}
int main()
{
int n;
scanf("%d",&n);
int A[n];
for(int i=0; i < n; i++) scanf("%d", &A[i]);
MergeSort(A, 0, n-1, n);
PrintArray(A, n);
return 0;
}
In the final for loop in merge, change:
A[i] = B[i];
Into:
A[left + i] = B[i];
Edit: Even after that fix, the sort was still wrong. The correct fix for the final loop is:
for (i = left; i <= right; ++i)
A[i] = B[i - left];
The original for (i = 0; i < n; ++i) didn't work because just passing n / 2 could pass a value that was one less than needed. With this new fix, n doesn't need to be passed to merge at all. So, n is really only needed for the public function. See the UPDATE section below.
Side notes:
You don't need to use floor at all. It's superfluous for integer math [and might make the results less accurate].
You are sorting in reverse order (e.g. 3, 2, 1 instead of 1, 2, 3). To sort in ascending order, in merge, change: if (A[i] >= A[j]) to if (A[i] <= A[j])
You are not creating an initial extra array, but you have B on the stack in merge, so, you are using an auxiliary/temp array. This is true regardless whether you copy from A to B at the start of merge or copy back from B to A at the end of merge
So, you don't have a true "in-place" algorithm.
In fact, for large enough arrays, having B on the stack would cause a stack overflow. It might be better to use a heap allocation for B.
You could put this in a global/public "wrapper" function for mergeSort (e.g. mergeSortPublic). Do (e.g.) B = malloc(sizeof(int) * n) at the start and do free(B) at the end. You can make B global scope or pass it as an extra arg to your merge functions
UPDATE:
Here's a fully cleaned up version that adds diagnostic tests.
Because of the change in the final loop in merge, it no longer needs the n value. So, it's no longer needed in mergeSort either with the mergeSortPub change.
I refactored the first loop in merge to be slightly faster by not refetching already fetched array values. The optimizer might have found this speedup, but I think it's better to state it explicitly.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void
PrintArray(int A[], int n)
{
int totlen = 0;
for (int i = 0; i < n; i++) {
totlen += printf(" %d", A[i]);
if (totlen >= 72) {
printf("\n");
totlen = 0;
}
}
if (totlen > 0)
printf("\n");
}
void
merge(int A[], int left, int mid, int right, int *B)
{
int i = left,
j = mid + 1,
k = 0;
int Ai = A[i];
int Aj = A[j];
while (i <= mid && j <= right) {
if (Ai <= Aj) {
B[k++] = Ai;
Ai = A[++i];
}
else {
B[k++] = Aj;
Aj = A[++j];
}
}
while (i <= mid)
B[k++] = A[i++];
while (j <= right)
B[k++] = A[j++];
// original code
#if 0
for (i = 0; i < n; i++)
A[i] = B[i];
#endif
// first fix -- still broken
#if 0
for (i = 0; i < n; i++)
A[left + i] = B[i];
#endif
// correct fix
#if 1
for (i = left; i <= right; ++i)
A[i] = B[i - left];
#endif
}
void
MergeSort(int A[], int left, int right, int *B)
{
if (left < right) {
int mid = (left + right) / 2;
MergeSort(A, left, mid, B);
MergeSort(A, mid + 1, right, B);
merge(A, left, mid, right, B);
}
}
void
MergeSortPub(int A[], int n)
{
int *B = malloc(sizeof(*B) * n);
MergeSort(A,0,n - 1,B);
free(B);
}
void
dotest(int tstno)
{
int n = rand() % 1000;
int *A = malloc(sizeof(*A) * n);
for (int i = 0; i < n; ++i)
A[i] = n - i;
MergeSortPub(A,n);
int old = A[0];
int bad = 0;
for (int i = 1; i < n; ++i) {
int cur = A[i];
if (cur < old) {
if (! bad)
printf("dotest: %d -- i=%d old=%d cur=%d\n",tstno,i,old,cur);
bad = 1;
}
old = cur;
}
if (bad) {
PrintArray(A,n);
exit(1);
}
}
int
main(void)
{
int n;
#if 0
scanf("%d", &n);
int A[n];
for (int i = 0; i < n; i++)
scanf("%d", &A[i]);
MergeSortPub(A, n);
PrintArray(A, n);
#else
for (int tstno = 1; tstno <= 1000; ++tstno)
dotest(tstno);
#endif
return 0;
}
There are a few variations of merge sort that do not use any additional space other than local variables. Optimal implementations of this are complicated and about 50% slower than conventional merge sort, and most of these implementations are for academic research.
There is a wiki article for one variation, that is hybrid of insertion and merge sort.
https://en.wikipedia.org/wiki/Block_sort
Link to a more optimized version in grailsort.h in this github repository. The void GrailSort(SORT_TYPE *arr,int Len) function does not use any additional buffer.
https://github.com/Mrrl/GrailSort

C Code doesn't run all the way through on CMD but works fine on other IDE's Sorting Algorithms

The main.c file works fine in Repl.it, OnlineGDB, and Mimir. But I had originally written the code in VSCode but the code will stop running at random points, only on command prompt. Sometimes it will only run two lines, or all the way to 40,000, and rarely have I gotten it to run all the way through. It seems as though there is some sort of limitation on command prompt or my compiler. Attached is my main.c file and a screenshot of what my command prompt output looks like. Each time I run the code it stops at a random point. Jamila suggested adding system(“PAUSE”); before return 0; in the main function but that did not do it. I had Jon try the code through his command prompt and he didn’t have an issue either. So it seems it comes down to my computer. I have reinstalled MinGW according to the instructions from Intro to C but the issue is still present. I have an i9 processor & 16gb of Ram, so it shouldn’t be a hardware limitation. This is just odd behavior and I want to understand why it is only my computer that has this problem. I have also tried running it with the leak_detector_c.c but that makes no difference as well. Code works fine in Mimir, OnlineGDB, and Repl.it.
IMAGE 1 IMAGE 2 IMAGE 3
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MAXVAL 100000
void randArray(int A[], int size, int maxval)
{
int i;
for(i=0l; i<size; i++)
A[i] = rand()%maxval + 1;
}
void arrayCopy(int from[], int to[], int size)
{
int j;
for(j=0; j<size; j++)
to[j] = from[j];
}
long timediff(clock_t t1, clock_t t2)
{
long elapsed;
elapsed = ((double)t2-t1) / CLOCKS_PER_SEC * 1000;
return elapsed;
}
void swap(int *a, int *b)
{
int temp = *a;
*a = *b;
*b = temp;
}
void bubbleSort(int A[], int n)
{
int i, j;
for(i=n-2; i>=0; i--)
{
for(j=0; j<=i; j++)
if(A[j] > A[j+1])
swap(&A[j], &A[j]+1);
}
}
void insertionSort(int arr[], int n)
{
int i, item, j;
for (i = 1; i < n; i++)
{
item = arr[i];
/* Move elements of arr[0..i-1], that are
greater than key, to one position ahead
of their current position */
for(j=i-1; j>=0; j--)
{
if(arr[j]>item)
arr[j+1] = arr[j];
else
break;
}
arr[j+1] = item;
}
}
void merge(int arr[], int l, int m, int r)
{
int i, j, k;
int n1 = m - l + 1;
int n2 = r - m;
/* create temp arrays */
int *L = (int*) malloc(n1*sizeof(int));
int *R = (int*) malloc(n2*sizeof(int));
/* Copy data to temp arrays L[] and R[] */
for (i = 0; i < n1; i++)
L[i] = arr[l + i];
for (j = 0; j < n2; j++)
R[j] = arr[m + 1+ j];
/* Merge the temp arrays back into arr[l..r]*/
i = 0; // Initial index of first subarray
j = 0; // Initial index of second subarray
k = l; // Initial index of merged subarray
while (i < n1 && j < n2)
{
if (L[i] <= R[j])
{
arr[k] = L[i];
i++;
}
else
{
arr[k] = R[j];
j++;
}
k++;
}
/* Copy the remaining elements of L[], if there
are any */
while (i < n1)
{
arr[k] = L[i];
i++;
k++;
}
/* Copy the remaining elements of R[], if there
are any */
while (j < n2)
{
arr[k] = R[j];
j++;
k++;
}
free(L);
free(R);
}
void mergeSort(int arr[], int l, int r)
{
if (l < r)
{
// get the mid point
int m = (l+r)/2;
// Sort first and second halves
mergeSort(arr, l, m);
mergeSort(arr, m+1, r);
// printf("Testing l=%d r=%d m=%d\n", l, r, m);
merge(arr, l, m, r);
}
}
int partition(int *vals, int low, int high)
{
// Pick a random partition element and swap it into index low.
int i = low + rand()%(high-low+1);
swap(&vals[low], &vals[i]);
int lowpos = low; //here is our pivot located.
low++; //our starting point is after the pivot.
// Run the partition so long as the low and high counters don't cross.
while(low<=high)
{
// Move the low pointer until we find a value too large for this side.
while(low<=high && vals[low]<=vals[lowpos]) low++;
// Move the high pointer until we find a value too small for this side.
while(high>=low && vals[high] > vals[lowpos]) high--;
// Now that we've identified two values on the wrong side, swap them.
if (low<high)
swap(&vals[low], &vals[high]);
}
// Swap the pivot element element into its correct location.
swap(&vals[lowpos], &vals[high]);
return high; //return the partition point
}
// Pre-condition: s and f are value indexes into numbers.
// Post-condition: The values in numbers will be sorted in between indexes s
// and f.
void quickSort(int* numbers, int low, int high) {
// Only have to sort if we are sorting more than one number
if (low < high) {
int split = partition(numbers,low,high);
quickSort(numbers,low,split-1);
quickSort(numbers,split+1,high);
}
}
void selectionSort(int arr[], int n)
{
int i, j, min_idx, temp;
// One by one move boundary of unsorted subarray
for (i = 0; i < n-1; i++)
{
//printf("\nIteration# %d\n",i+1);
// Find the minimum element in unsorted array
min_idx = i;
for (j = i+1; j < n; j++)
if (arr[j] < arr[min_idx])
min_idx = j;
// Swap the found minimum element with the first element
temp = arr[i];
arr[i] = arr[min_idx];
arr[min_idx] = temp;
}
}
int main()
{
int sizes[] = {1000, 10000, 20000, 40000, 50000, 100000, 1000000};
int *originalArray;
int* sortedArray;
int i, j;
long elapsed;
clock_t start, end;
for(i=0; i<7; i++)
{
originalArray = malloc(sizeof(int)*sizes[i]);
sortedArray = malloc(sizeof(int)*sizes[i]);
randArray(originalArray, sizes[i], MAXVAL);
arrayCopy(originalArray, sortedArray, sizes[i]);
start = clock();
bubbleSort(sortedArray, sizes[i]);
end= clock();
elapsed=timediff(start,end);
printf("Sorting %d values took %ld milliseconds for Bubble sort.\n", sizes[i], elapsed);
arrayCopy(originalArray, sortedArray, sizes[i]);
start = clock();
insertionSort(sortedArray, sizes[i]);
end= clock();
elapsed=timediff(start, end);
printf("Sorting %d values took %ld milliseconds for Insertion sort.\n", sizes[i], elapsed);
arrayCopy(originalArray, sortedArray, sizes[i]);
start = clock();
mergeSort(sortedArray, 0, sizes[i]);
end = clock();
elapsed=timediff(start, end);
printf("Sorting %d values took %ld milliseconds for Merge sort.\n", sizes[i], elapsed);
arrayCopy(originalArray, sortedArray, sizes[i]);
start = clock();
selectionSort(sortedArray, sizes[i]);
end = clock();
elapsed=timediff(start, end);
printf("Sorting %d values took %ld milliseconds for Selection sort.\n", sizes[i], elapsed);
arrayCopy(originalArray, sortedArray, sizes[i]);
start = clock();
quickSort(sortedArray, 0, sizes[i]);
end = clock();
elapsed=timediff(start, end);
printf("Sorting %d values took %ld milliseconds for Quick sort.\n", sizes[i], elapsed);
free(sortedArray);
free(originalArray);
}
return 0;
}
This answer was wrong, but I don't delete it yet so I can reply to comments.
Another guess: quicksort & partition look like you assumed low & high both inclusive. If so, the first call should be
quickSort(sortedArray, 0, sizes[i] - 1);
instead of
quickSort(sortedArray, 0, sizes[i]);
Unlike all other sorting routines, this one expects the ending index, not the array's length as its last parameter.

I have written a code of mergeSort(from clrs book) but not getting an output

I have taken a reference from clrs book of Introduction to Algorithms of Merge Sort Algorithms and written a program in C language. Though I had check it manually by pen and paper the code seems to be correct then also I am not getting the right output.
The Output is shown below:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
void merge(int array[], int start, int middle, int end) {
int n1 = middle - start + 1;
int n2 = end - start;
int i;
int leftarray[n1 + 1], rightarray[n2 + 1];
for (i = 0; i < n1; i++) {
leftarray[i] = array[start + i];
}
for (int j = 0; i < n2; i++) {
rightarray[j] = array[middle + j + 1];
}
leftarray[n1 + 1] = 1000000;
rightarray[n2 + 1] = 1000000;
int k, j = 0;
i = 0;
for (k = start; k <= end; k++) {
if (leftarray[i] > rightarray[j]) {
array[k] = rightarray[j];
j++;
} else {
array[k] = leftarray[i];
i++;
}
}
}
void mergeSort(int array[], int start, int end) {
if (start < end) {
int middle = (start + end) / 2;
mergeSort(array, start, middle);
mergeSort(array, middle + 1, end);
merge(array, start, middle, end);
}
}
void sorting(int array[], int length) {
int i;
for (i = 0; i < length; i++) {
printf("%d ", array[i]);
}
}
int main() {
int noOfelements;
scanf("%d", &noOfelements);
int array[noOfelements];
for (int i = 0; i < noOfelements; i++) {
scanf("%d", &array[i]);
}
printf("Before Sorting: ");
sorting(array, noOfelements);
mergeSort(array, 0, noOfelements - 1);
printf("After Sorting: ");
sorting(array, noOfelements);
return 0;
}
Output of the Above Program:
5
5 4 3 2 1
Before Sorting: 5 4 3 2 1 After Sorting: 2 0 5 0 32
Although the book Introduction to Algorithms by Thomas H Cormen, Charles E Leiserson , Ronald L Rivest, and Clifford Stein, is considered an excellent textbook, the algorithm you implemented has several shortcomings:
it uses the concept of sentinel values, values set at the end of the arrays to be merged, supposedly larger than any existing values, to hopefully simplify the merging process. In reality, there are no such values as if there were any, why should they not appear in the arrays as regular values?
it uses end as the index of the last element in the array: it would be much simpler to use end as the index of the first element beyond the array, thus removing the need for confusing +1 / -1 adjustments and allowing for empty arrays.
you use int middle = (start + end) / 2; which could overflow for large values of start and end, although other parts of your implementation would fail if you try and sort such a huge array. Still it is safer to write int middle = start + (end - start) / 2;
Your code fails because you set the sentinel values one position too far. You should write:
leftarray[n1] = 1000000;
rightarray[n2] = 1000000;
Here is a better approach:
#include <stdio.h>
void merge(int array[], int start, int middle, int end) {
int n1 = middle - start;
int i, j, k;
// save the elements from the left half, no need to save the right half
int leftarray[n1];
for (i = 0; i < n1; i++) {
leftarray[i] = array[start + i];
}
for (i = 0, j = middle, k = start; i < n1; k++) {
if (j >= end || leftarray[i] <= array[j]) {
array[k] = leftarray[i];
i++;
} else {
array[k] = array[j];
j++;
}
}
}
void mergeSort(int array[], int start, int end) {
if (end - start >= 2) {
int middle = start + (end - start) / 2;
if (middle - start > 100000) {
// avoid stack overflow: allocate at most 100k ints for `leftarray`
middle = start + 100000;
}
mergeSort(array, start, middle);
mergeSort(array, middle, end);
merge(array, start, middle, end);
}
}
void print_array(const char *prefix, const int array[], int length) {
printf("%s:", prefix);
for (int i = 0; i < length; i++) {
printf(" %d", array[i]);
}
printf("\n");
}
int main() {
int noOfelements;
if (scanf("%d", &noOfelements) != 1 || noOfelements < 0)
return 1;
int array[noOfelements];
for (int i = 0; i < noOfelements; i++) {
if (scanf("%d", &array[i]) != 1)
return 1;
}
print_array("Before Sorting", array, noOfelements);
mergeSort(array, 0, noOfelements);
print_array("After Sorting", array, noOfelements);
return 0;
}

Get the sorted indices of an array using quicksort

I have changed to quicksort code to sort an array of floats which I got from tutorialgatway.org. However I need the sorted indices. I am aware of the qsort library function that can be used to get the sorted indices and I can implement that. However, I want to avoid standard library (I know this is not recommendation). The reason for not using a standard library is that I need to sort large number of arrays in a loop, which I need to parallelize using openMP, therefore writing function explicitly would allow me to parallelize quicksort function in a loop.
/* C Program for Quick Sort */
#include <stdio.h>
void Swap(float *x, float *y) {
float Temp;
Temp = *x;
*x = *y;
*y = Temp;
}
void quickSort(float a[], int first, int last) {
int i, j;
int pivot;
if (first < last) {
pivot = first;
i = first;
j = last;
while (i < j) {
while (a[i] <= a[pivot] && i < last)
i++;
while (a[j] > a[pivot])
j--;
if (i < j) {
Swap(&a[i], &a[j]);
}
}
Swap(&a[pivot], &a[j]);
quickSort(a, first, j - 1);
quickSort(a, j + 1, last);
}
}
int main() {
int number, i;
float a[100];
printf("\n Please Enter the total Number of Elements : ");
scanf("%d", &number);
printf("\n Please Enter the Array Elements : ");
for (i = 0; i < number; i++)
scanf("%f", &a[i]);
quickSort(a, 0, number - 1);
printf("\n Selection Sort Result : ");
for (i = 0; i < number; i++) {
printf(" %f \t", a[i]);
}
printf("\n");
return 0;
}
How can I return the sorted indices in the code ?
You need to generate an array of indexes from 0 to size-1, then sort the array of indexes according to the array values. So the code does compares using array[index[...]], and does swaps on index[...].
An alternative is to generate an array of pointers from &array[0] to &array[size-1]. When the pointers are sorted, you can convert them to indexes by using: index[i] = pointer[i] - &array[0] (could use a union for the indexes and pointers).
Example program with standard version of Hoare partition scheme to sort array of indexes in I[] according to floats in A[]:
#include <stdio.h>
#include <stdlib.h>
void QuickSort(float A[], size_t I[], size_t lo, size_t hi)
{
if (lo < hi)
{
float pivot = A[I[lo + (hi - lo) / 2]];
size_t t;
size_t i = lo - 1;
size_t j = hi + 1;
while (1)
{
while (A[I[++i]] < pivot);
while (A[I[--j]] > pivot);
if (i >= j)
break;
t = I[i];
I[i] = I[j];
I[j] = t;
}
QuickSort(A, I, lo, j);
QuickSort(A, I, j + 1, hi);
}
}
#define COUNT (4*1024*1024) // number of values to sort
int main(int argc, char**argv)
{
int r; // random number
size_t i;
float * A = (float *) malloc(COUNT*sizeof(float));
size_t * I = (size_t *) malloc(COUNT*sizeof(size_t));
for(i = 0; i < COUNT; i++){ // random floats
r = (((rand()>>4) & 0xff)<< 0);
r += (((rand()>>4) & 0xff)<< 8);
r += (((rand()>>4) & 0xff)<<16);
r += (((rand()>>4) & 0xff)<<24);
A[i] = (float)r;
}
for(i = 0; i < COUNT; i++) // array of indexes
I[i] = i;
QuickSort(A, I, 0, COUNT-1);
for(i = 1; i < COUNT; i++){
if(A[I[i-1]] > A[I[i]]){
printf("error\n");
break;
}
}
free(I);
free(A);
return(0);
}
This version of quicksort avoids stack overflow by only using recursion of the smaller side of the partition. Worst case time complexity will still be O(n^2), but the stack space complexity is limited to O(log(n)).
void QuickSort(float A[], size_t I[], size_t lo, size_t hi)
{
while (lo < hi)
{
float pivot = A[I[lo + (hi - lo) / 2]];
size_t t;
size_t i = lo - 1;
size_t j = hi + 1;
while (1)
{
while (A[I[++i]] < pivot);
while (A[I[--j]] > pivot);
if (i >= j)
break;
t = I[i];
I[i] = I[j];
I[j] = t;
}
/* avoid stack overflow */
if((j - lo) < (hi - j)){
QuickSort(A, I, lo, j);
lo = j+1;
} else {
QuickSort(A, I, j + 1, hi);
hi = j;
}
}
}

Merge sort algorithm in C not working as expected

I am trying to implement the merge sort algorithm in C. I understand how the algorithm is supposed to work however I am encountering some difficulties with the implementation.
I understand that there are hundreds of examples and source code for it's implementation but I was hoping someone could help me understand why mine is not working correctly.
My code is below and after the code I explain what I have tried so far.
#include <stdio.h>
void merge(int a[], int L[], int R[],int nL, int nR) //nL and nR are the lengths of L[] and R[]
{
int i = 0 , j = 0, k = 0;
while(i<nL && j <nR)
{
if(L[i] <= R[j]){
a[k] = L[i];
i++;
}
else{
a[k] = R[j];
j++;
}
k++;
}
while(i < nL){
a[k] = L[i];
i++;
k++;
}
while(j < nR) {
a[k] = R[j];
j++;
k++;
}
}
void mergesort(int a[],int n) //n is the length of a[]
{
if(n < 2) return; //BASE CASE
int mid = n / 2;
int left[mid];
int right[n-mid];
for(int i = 0; i < mid; i++)
{
left[i] = a[i];
}
for(int i = mid; i < n-1; i++)
{
right[i-mid] = a[i];
}
int nL = sizeof(left) / sizeof(left[0]);
int nR = sizeof(right) / sizeof(right[0]);
mergesort(left, nL);
mergesort(right, nR);
merge(a,left,right,nL,nR);
}
int main(void)
{
printf("Initial:\n");
printf("3 4 1 6\n");
int numbers[4] = {3,4,1,6};
int n = sizeof(numbers) / sizeof(int);
mergesort(numbers,n);
printf("Sorted:\n");
for(int i =0 ; i < 4; i++)
{
printf("%d ", numbers[i]);
}
return 0;
}
As it is and with the unsorted array [3,4,1,6] the output is 0 0 1 3.
Clearly the 1 and 3 are in the right order relative to each other but the two zeros at the beginning are clearly wrong. At first it seemed to me that I was inserting 4 and 6 to the right and out of bounds of the array.
I used some print statements to try and debug but I haven't been able to figure out what was going on. I even tried to follow my code with gdb but I still could not sort it.
Does any one have any ideas of what might be happening?
A more nearly idiomatic way of writing the merge() code would be:
void merge(int a[], int L[], int R[],int nL, int nR)
{
int i = 0, j = 0, k = 0;
while (i < nL && j < nR)
{
if (L[i] <= R[j])
a[k++] = L[i++];
else
a[k++] = R[j++];
}
while (i < nL)
a[k++] = L[i++];
while (j < nR)
a[k++] = R[j++];
}
That's about half the number of lines of your code, and within broad limits, the less code there is to read, the better. There are those who insist on having braces after each loop or conditional. I don't think that's necessary (or particularly helpful), but if that's the style you like, you can use it.
Your mergesort() code is less flabby, but could be changed to:
void mergesort(int a[],int n) //n is the length of a[]
{
if (n < 2)
return; //BASE CASE
int mid = n / 2;
int left[mid];
int right[n-mid];
for (int i = 0; i < mid; i++)
left[i] = a[i];
for (int i = mid; i < n; i++)
right[i-mid] = a[i];
mergesort(left, mid);
mergesort(right, n - mid);
merge(a, left, right, mid, n - mid);
}
This includes the fix for your main problem — the loop loading the right array was leaving the last element uncopied.
With a debugging function such as:
void dump_array(const char *tag, int n, int *a)
{
printf("%s:%d:", tag, n);
for (int i = 0; i < n; i++)
printf(" %3d", a[i]);
putchar('\n');
}
You can do a lot of effective debugging with:
void mergesort(int a[],int n)
{
if (n < 2)
return;
int mid = n / 2;
int left[mid];
int right[n-mid];
dump_array("-->>mergesort()", n, a);
for (int i = 0; i < mid; i++)
left[i] = a[i];
dump_array("left", mid, left);
for (int i = mid; i < n; i++)
right[i-mid] = a[i];
dump_array("right", n - mid, right);
mergesort(left, mid);
dump_array("merged-L", mid, left);
mergesort(right, n - mid);
dump_array("merged-R", n - mid, right);
merge(a, left, right, mid, n - mid);
dump_array("<<--mergesort()", n, a);
}
In your code, the output with the tag right would show 0 or semi-random data for the last element, rather than what you're expecting. This would be a hint as to where the trouble is. Keep the dump_array() function around; it is a useful creature to have. It's a simple-minded version; you can invent more complex versions which outputs a newline at intermediate positions for long arrays, for example.
The issue is in the following code:
for(int i = mid; i < n-1; i++)
{
right[i-mid] = a[i];
}
It should be:
for(int i = mid; i < n; i++) // right should range from mid to n - 1 *inclusive*
{
right[i-mid] = a[i];
}
This is simple implementation of merge sort without any complications. Just pass the array pointer and total number of entires in the array.
void merge(int *a, int top)// Array pointer and max entries
{
int l1, k, l2, u1, u2, size = 1, i, j;
int *sa;
sa = (int *)calloc(top, sizeof(int));
while (size < top)
{
l1 = 0;
k = 0;
while (l1 + size < top)
{
l2 = l1 + size;
u1 = l2 - 1;
u2 = ((l2 + size - 1) < top ? l2 + size - 1 : top - 1);
for (i = l1, j = l2; i <= u1 && j <= u2; )// Merging
{
sa[k++] = a[i] <= a[j] ? a[i++] : a[j++];
}
for ( ; i <= u1; )
sa[k++] = a[i++];
for ( ; j <= u2; )
sa[k++] = a[j++];
l1 = u2 + 1;
}
for (i = l1; i < top; i++) // For the left outs of the process
sa[k++] = a[i];
for (i = 0; i < top; i++)
a[i] = sa[i];
size *= 2;
}
}

Resources