segfault in merge sort implementation

segfault in merge sort implementation - c

I'm working on a C implementation as an exercise (I'm a student). I have the logic fine (I've used the implementation itself before), but I get a segfault when actually running it. I've looked for a long time, and I can't understand what's causing it. Here is my complete code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ARRAY_CAPACITY 50
void do_sort(int* list);
void merge_sort(int* list_of, int* buffer_of, int start, int end);
void do_sort(int* list)
{
int capacity = ((ARRAY_CAPACITY) / 2);
int buffer[capacity];
merge_sort(list, buffer, 0, ARRAY_CAPACITY);
}
void merge_sort(int* list_of, int* buffer_of, int start, int end)
{
printf("%s", "hi!");
int i, t;
if((end - start) < 2) return;
int mid = (start + end) / 2;
merge_sort(list_of, buffer_of, start, mid);
merge_sort(list_of, buffer_of, mid, end);
int left = 0;
int right = mid;
for(i = 0; i < ARRAY_CAPACITY; i++)
{
buffer_of[i] = list_of[i];
}
for(t = start; t < end; t++)
{
if((left < (mid - start)) && (right == end || buffer_of[left] < list_of[right]))
{
list_of[t] = buffer_of[left];
left++;
}
else
{
list_of[t] = list_of[right];
right++;
}
}
}
int main()
{
srand(time(NULL));
int number_array[ARRAY_CAPACITY];
int i;
for(i = 0; i < ARRAY_CAPACITY; i++)
{
number_array[i] = (rand() % 100);
}
printf("%d\n", number_array[3]);
int j, m;
printf("%s\n", "Pre-Sorted Array: ");
for(j = 0; j < ARRAY_CAPACITY; j++)
{
printf("%d ", number_array[j]);
}
do_sort(number_array);
for(m = 0; m < ARRAY_CAPACITY; m++)
{
printf("%d ", number_array[m]);
}
printf("\n");
}
The output is as follows:
50 (this is a random number, but it always prints successfully)
Pre-Sorted Array:
Segmentation fault
So the segfault triggers when I try to loop to print the pre sorted array, but I've just proven that the array values were properly set, so I can't fathom this error. Help?

You have the following code:
void merge_sort(int* list_of, int* buffer_of, int start, int end)
{
...
for(i = 0; i < ARRAY_CAPACITY; i++)
{
buffer_of[i] = list_of[i];
}
...
That code will get called, at one point, with the following arguments:
list_of is an array of 50 integers.
buffer_of is an array of 25 integers.
start is 0.
end is 50.
You will copy 50 elements of list_of into buffer_of, but buffer_of has only room for 25 elements.

Related

Array declaration fails while trying to declare int array

I've been learning & coding sorting algorithms for some time and recently I've coded merge sort in C, and I've also coded a sort_test function to test the function that I write. In the sort test function, I'm declaring an array and assigning random values to it, but when the array size gets to 1,000,000 the program crashes. Why is that happening?
sort_test.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "merge_sort.h"
#include "sort_test.h"
// test size
#define MIN 10
#define MAX 1000000
// int comparator
int cmpInt(const void *elem1,const void * elem2){
int e1 = *(int *)elem1; // i-1
int e2 = *(int *)elem2; // i
if(e2 < e1){
return -1;
} else if(e2 > e1){
return 1;
} else {
return 0;
}
}
// double comparator
int cmpDouble(const void *elem1,const void *elem2){
double e1 = *(double *)elem1;
double e2 = *(double *)elem2;
if(e2 < e1){
return -1;
} else if(e2 > e1){
return 1;
} else {
return 0;
}
}
void initSeed(){
srand(time(NULL));
}
void intSortTest(){
initSeed();
for(size_t i = MIN;i <= MAX;i *=10){
int arr[i];
for(size_t j = 0; j < i;j++){
arr[j] = rand();
}
// sorting the array
mergesort(arr,0,i);
// checking if sorted array hold the
// condition i[0] <= i[1] ... <= i[n].
for(size_t j = 1;j < i;j++){
int *e1 = &arr[j-1];
int *e2 = &arr[j];
assert(cmpInt(e2,e1) <= 0);
}
printf("INT TEST : %7d\tPASSED\n",i);
}
printf("\n");
}
void doubleSortTest(){
initSeed();
for(int i = MIN; i <= MAX; i *= 10){
double arr[i];
for(int j = 0 ; j < i;j++){
arr[j] = (double)(rand() % 100) + 1.0;
}
// perform sort
//insertion_sort(arr,sizeof (double),i,cmpDouble);
for(int j = 1; j < i;j++){
double *e1 = &arr[j-1];
double *e2 = &arr[j];
assert(cmpDouble(e2,e1) <= 0);
}
printf("Double Test : %5d\tPASSED\n",i);
}
printf("\n");
}
sort_test.h
#ifndef SORT_TEST_H
#define SORT_TEST_H
void initSeed();
void intSortTest();
void doubleSortTest();
int cmpDouble(const void *elem1,const void *elem2);
int cmpInt(const void *elem1,const void * elem2);
#endif // SORT_TEST_H
merge_sort.h
#ifndef MERGE_SORT_H
#define MERGE_SORT_H
void mergesort(int *arr,int start,int end);
void merge(int *arr,int start,int med,int end);
#endif // MERGE_SORT_H
merge_sort.c
#include <stdio.h>
#include "sort_test.h"
#include "merge_sort.h"
int main(){
intSortTest();
return 0;
}
void mergesort(int *arr,int start,int end){
if(start < end){
int median = (end + start) / 2;
mergesort(arr,start,median);
mergesort(arr,median+1,end);
merge(arr,start,median,end);
}
}
void merge(int *arr,int start,int median,int end){
int i = start; int j = median+1;
int copy[end+1];
int cIndex = 0;
while(i <= median && j <= end) {
if(arr[j] <= arr[i]){
copy[cIndex++] = arr[j++];
} else {
copy[cIndex++] = arr[i++];
}
}
while(i <= median){
copy[cIndex++] = arr[i++];
}
while(j <= end){
copy[cIndex++] = arr[j++];
}
for(int k = 0; k < cIndex; k++){
arr[start++] = copy[k];
}
}

It is because you are allocating the arrays on the stack. Try the following code instead.
void intSortTest(){
initSeed();
for(size_t i = MIN;i <= MAX;i *=10){
int *arr = malloc(i*sizeof(int)); // <-- changed this
for(size_t j = 0; j < i;j++){
arr[j] = rand();
}
// sorting the array
mergesort(arr,0,i);
// checking if sorted array hold the
// condition i[0] <= i[1] ... <= i[n].
for(size_t j = 1;j < i;j++){
int *e1 = &arr[j-1];
int *e2 = &arr[j];
assert(cmpInt(e2,e1) <= 0);
}
printf("INT TEST : %7d\tPASSED\n",i);
free(arr); // <-- added this
}
printf("\n");
}
EDIT
Also the merge algorithm is incorrect. More precisely, you have a problem with the value list boundaries.
When you define the start and end index of a value list, the values are in arr[start] to arr[end-1], not arr[end]. The number of values is then end-start. With this convention, you have an empty list when start == end.
As a consequence, the function mergesort becomes:
void mergesort(int *arr,int start,int end){
if (start+1 >= end)
return; // a list with 0 or 1 values is already sorted
int median = (end + start) / 2;
mergesort(arr,start,median);
mergesort(arr,median,end);
merge(arr,start,median,end);
}
The merge function then become as follow:
void merge(int *arr,int start,int median,int end){
int i = start; int j = median;
int *copy = malloc((end-start)*sizeof(int)); // use malloc for huge arrays
int cIndex = 0;
while(i < median && j < end) { // not i <= median && j <= end
if(arr[j] <= arr[i]){
copy[cIndex++] = arr[j++];
} else {
copy[cIndex++] = arr[i++];
}
}
while(i < median){ // not i <= median
copy[cIndex++] = arr[i++];
}
while(j < end){ // not j <= median
copy[cIndex++] = arr[j++];
}
for(int k = 0; k < cIndex; k++){
arr[start++] = copy[k];
}
free(copy);
}
As you can see, there are only minor differences.
With this code, your program runs without error.

Now that the code is visible, it is fairly easy to see that you are indeed blowing the stack as I suggested in one of my many comments.
In merge(), you have:
int copy[end+1];
as well as in intSortTest() having:
int arr[i];
where i reaches 1,000,000.
When end is 1,000,000 — it is set from i — you have an array of one million int values being sorted, and a copy with another one million int values (plus 1), so you attempt to place two million 4-byte int values on the stack — and 8,000,000 bytes blows the stack limits. Since 800,000 bytes (the previous size) fits on the stack in both Unix and Windows, it isn't 100% clear which you are using. There isn't much margin for error on Unix/Linux; the limit is thoroughly blown on Windows because neither 4 MB array fits on the stack.
The recommended fix is to use dynamic memory allocation (malloc() et al) instead of stack allocation — in both the sort test function and in the main merge() code.

How can I find why my merge sorting algorithm crash when sorting an array of 1 million element?

I'm a French student and trying to calculate the execution time of the Merge Sort algorithm for different size of array.
I also want to write the different execution time in a .csv file. But when my program tries to sort an array with 1 million elements the process returns -1073741571 (0xC00000FD) in Code::Blocks. So if you could point me to a way to find a solution I would be very grateful!
Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
void genTab(int *tab, int n) {
int i;
for (i = 0; i < n; i++) {
tab[i] = rand() % 100;
}
}
void fusion(int *tab, int deb, int mid, int fin) {
int i = deb;
int j = mid + 1;
int k = deb;
int temp[fin + 1];
while ((i <= mid) && (j <= fin)) {
if (tab[i] <= tab[j]) {
temp[k] = tab[i];
i++;
} else {
temp[k] = tab[j];
j++;
}
k++;
}
while (i <= mid) {
temp[k] = tab[i];
i++;
k++;
}
while (j <= fin) {
temp[k] = tab[j];
k++;
j++;
}
for (i = deb; i <= fin; i++) {
tab[i] = temp[i];
}
}
void triFusion(int *tab, int i, int j) {
if (i < j) {
triFusion(tab, i, (int)((i + j) / 2));
triFusion(tab, (int)((i + j) / 2 + 1), j);
fusion(tab, i, (int)((i + j) / 2), j);
}
}
void reset(int *tab1, int *tab2, int n) {
for (int i = 0; i < n; i++) {
tab2[i] = tab1[i];
}
}
int main() {
srand(time(NULL));
clock_t start, end;
int nbrTest[15] = {
1000, 5000, 10000, 50000, 80000, 100000, 120000, 140000,
150000, 180000, 200000, 250000, 300000, 450000, 1000000
};
FILE *fp;
char *tpsExecution = "exeTime.csv";
fp = fopen(tpsExecution, "w");
fprintf(fp, "Array Size; Merge Time");
for (int i = 0; i < 15; i++) {
int n = nbrTest[i];
printf("Calculating time for an array of %d \n", n);
int *tab = malloc(sizeof(int) * n);
genTab(tab, n);
int *copie = malloc(sizeof(int) * n);
reset(tab, copie, n);
start = clock();
triFusion(tab, 0, n - 1);
end = clock();
float tpsFusion = (float)(end - start) / CLOCKS_PER_SEC;
reset(tab, copie, n);
printf("writing in the file\n");
fprintf(fp, "\n%d;%f", n, tpsFusion);
free(tab);
free(copie);
}
fclose(fp);
return 0;
}

int temp[fin+1]; may exceed the space limit for the stack. You should allocate it with malloc instead, and free it with free.
If you want to exclude malloc and free from the timed code, the allocation could be performed outside the timed code and passed in as work space.

(Note: posted after the answer from #Eric Postpischil).
The function
void fusion(int * tab, int deb, int mid, int fin)
Has the line
int temp[fin+1];
and the value of fin comes through another function from the number of elements n to be sorted
triFusion(tab, 0, n-1);
and as an automatic variable, breaks the stack when n is large.
I suggest replacing the line with
int *temp = malloc((fin+1) * sizeof *temp);
if(temp == NULL) {
puts("malloc");
exit(1);
}
// ...
free(temp);

fusion() is always allocating the full size of the array for temp, even when only a small fraction of temp is being used. You could change this to:
int k = 0;
...
int temp[fin+1-deb];
...
tab[i]=temp[i-deb];
still this will exceed stack space if n is large. So as suggested in the other answers:
int k = 0;
...
int *temp = malloc((fin+1-deb)*sizeof(int));
...
tab[i]=temp[i-deb];
...
free(temp)
or better still, do a one time allocation of a second array in main or in a "helper" function, the include a pointer to the second array in the merge sort functions.

How to delete multiple elements from a array at the same time

I want to delete multiple elements from array using index array,this is my code:
// b is an index array, n is size of b,
// player is the array need to be delete elements,
// size is the size of player
void play_cards(int b[],int n,int player[],int *size){
int i;
for(i = 0; i < n; i++)
delete_cards(b[i],player,size);
}
void delete_cards(int n,int player[],int *size){
int i;
for(i = n; i < *size; i++)
player[i] = player[i+1];
*size -= 1;
}
int main(void){
int player[10] = {1,2,3,3,4,4,5,5,6,7};
int index[6] = {2,3,4,5,6,7};
int size = 10;
play_cards(index,6,player,&size);
for(int i = 0; i < size; i++)
printf("%d|",player[i]);
puts("");
return 0;
}
I expect print the player should be 1,2,6,7 instead of 1,2,3,4. How should I fix it?

First I would not call the function delete_cards as it suggests that it deletes multiple cards which it does not - just delete_card would make things more clear.
Anyway - when you change the player array before you have played all cards in the index array, you change the meaning of the indexes. This is why your current code doesn't work.
So you can do two things:
a) Play all cards first and then delete the cards played. This could be done by first marking played card with -1 and then have a loop where you removed all element being -1
or
b) Play a card, delete it and adjust the remaining elements in index by decrementing them by one. Note: This solution requires that index is sorted (lowest first).
Solution a) could look something like this:
void delete_played_cards(int player[],int *size)
{
int i;
int next_pos = 0;
int deleted = 0;
for(i = 0; i < *size; i++)
{
if (player[i] != -1)
{
player[next_pos] = player[i];
if (i != next_pos)
{
player[i] = -1;
}
++next_pos;
}
else
{
++deleted;
}
}
*size -= deleted;
}
void play_cards(int b[],int n,int player[],int *size)
{
int i;
for(i = 0; i < n; i++)
{
player[b[i]] = -1; // Mark card as played
}
delete_played_cards(player,size);
}
int main(void)
{
int player[10] = {1,2,3,3,4,4,5,5,6,7};
int index[6] = {2,3,4,5,6,7};
int size = 10;
play_cards(index,6,player,&size);
for(int i = 0; i < size; i++)
printf("%d|",player[i]);
puts("");
return 0;
}

Modify play_cards:
void play_cards(int b[], int n, int player[], int *size)
{
int i;
for(i = n-1; i >= 0; i--)
delete_cards(b[i],player,size);
}
This will start deleting from the end of array.
As BLUEPIXY mentioned.

here is a pseudocode that you can work with:
given a sorted list, 1..n
for i = 2 up to length of list:
if list[i] is equal to list[i-1]:
shift the sublist [2..] 1 position to the left
else
increment i by 1

If you want to delete easily and efficiently without using loop you can use memcpy
#include <stdio.h>
#include <string.h>
#define INDEX_MAX 6
int main ()
{
int size = 10;
int src[] = {1,2,3,3,4,4,5,5,6,7};
int index[] = {2,3,4,5,6,7};
int x;
size = size - INDEX_MAX;
memcpy(src+2, src+8, sizeof(int)*(size-2));// - 2 since index 1 and 2 remains in the array
for(x = 0; x < size; x++){
printf("%d",src[x]);
}
return(0);
}

Make a program run linear in C

So based in the following problem from cumulative sum query I created the solution. But is any other way to solve the problem in C with linear complexity O(N)?
Problem description:
William Macfarlane wants to look at an array.
You are given a list of N numbers and Q queries. Each query is
specified by two numbers i and j; the answer to each query is the sum
of every number between the range [i, j] (inclusive).
Note: the query ranges are specified using 0-based indexing.
Input
The first line contains N, the number of integers in our list (N <=
100,000). The next line holds N numbers that are guaranteed to fit
inside an integer. Following the list is a number Q (Q <= 10,000). The
next Q lines each contain two numbers i and j which specify a query
you must answer (0 <= i, j <= N-1). Output
Output
For each query, output the answer to that query on its own line in the
order the queries were made.
Here is the solution:
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
struct node {
int first;
int last;
};
int sum_array(int *array, int first, int last) {
int sum = 0;
for (int i = first; i <= last; i++) {
sum += array[i];
}
return sum;
}
int main() {
FILE* input = fopen("share.in","r");
int N = 0;
fscanf(input,"%d",&N);
int *array = (int*)malloc(N * sizeof(int));
for (int i = 0; i < N; i++) {
fscanf(input,"%d",&array[i]);
}
int Q = 0;
fscanf(input,"%d",&Q);
struct node query[Q];
for (int i=0; i < Q; i++) {
fscanf(input,"%d",&query[i].first);
fscanf(input,"%d",&query[i].last);
}
fclose(input);
int sum = 0;
for ( int i = 0; i < Q ; i++) {
int first = query[i].first;
int last = query[i].last;
sum = sum_array(array,first,last);
printf("Number of queries : %d , sum is %d\n",i ,sum);
}
free(array);
return 0;
}
Update:
The answer given is good. But for some reason I couldn't make it work.
So here is the code rewritten and if someone can explain me what I do wrong I will be happy! Keep in mind we want the range to be [first,last]
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
struct node {
int first;
int last;
};
int sum_array(int *array, int first, int last) {
int sum = 0;
for (int i = first; i <= last; i++) {
sum += array[i];
}
return sum;
}
int main() {
FILE* input = fopen("share.in","r");
int N = 0;
fscanf(input,"%d",&N);
int *array = (int*)malloc(N * sizeof(int));
int *integralArray = (int*)malloc(N * sizeof(int));
for (int i = 0; i < N; i++) {
fscanf(input,"%d",&array[i]);
integralArray[i] = array[i] + ((i > 0) ? array[i-1] : 0);
}
int Q = 0;
fscanf(input,"%d",&Q);
struct node query[Q];
for (int i=0; i < Q; i++) {
fscanf(input,"%d",&query[i].first);
fscanf(input,"%d",&query[i].last);
}
fclose(input);
int sum = 0;
for (int i = 0; i < Q ; i++) {
int first = query[i].first;
int last = query[i].last;
sum = integralArray[last] - integralArray[first - 1];
printf("Number of queries : %d , sum is %d\n",i ,sum);
}
free(array);
return 0;
}

You'd form the integral array. Modify to something like:
int *array = (int*)malloc(N * sizeof(int));
int *integralArray = (int*)malloc(N * sizeof(int));
for (int i = 0; i < N; i++) {
fscanf(input,"%d",&array[i]);
integralArray[i] = array[i] + ((i > 0) ? integralArray[i-1] : 0);
}
So the element at integralArray[i] is the sum of all elements in array from 0 to i.
Then, to get the sum from a to b, where a > b, integralArray[b] is the sum from 0 to b and integralArray[a] is the sum from 0 to a so you can just compute integralArray[b] - integralArray[a] to get the total from a to b. Intuitively, integralArray[b] includes the numbers you want but it also includes the numbers up to and including a. You don't want those so you take them off again.
Vary appropriately for inclusion or exclusion of the number at a and the number at b. That as given will include the number at b but not that at a. You could adjust your integralArray to be one earlier (so integralArray[b] is the sum from 0 to b-1) or adjust your indices.

C: reverse array in subrange

I am trying to implement left array rotation using the method described here: http://www.cs.bell-labs.com/cm/cs/pearls/s02b.pdf (under section Reversal algorithm)
I am having trouble reversing the array when the start index isn't 0.
Here is what I have so far:
void reverse_arr(int *a, int start, int end)
{
int i;
int len = end - start;
//printf("Len pre loop: %d\n", len);
int swap;
for(i = start; i < --len; i++)
{
//printf("start: %d\tlen: %d\n", start, len);
swap = a[i];
a[i] = a[len];
a[len] = swap;
}
}
This works great when the start index is 0, but when it is anything else it never reverses all of the elements.
ex:
int test[] = {1,2,3,4,5,6};
reverse_arr(test, 0, 2); //reverse the first 2 elements of the array
results in: {2,1,3,4,5,6} which is expected
int test[] = {1,2,3,4,5,6};
reverse_arr(test, 2, 6); //reverse the last 4 elements of the array
results in: {2,1,4,3,5,6} which is not expected, only the 3 and 4 were reversed.
Any help would be appreciated.

You need to adjust the other subscript in the swap:
void reverse_arr(int *a, int start, int end)
{
int len = end - start;
for (int i = start; i < --len; i++)
{
int swap = a[i];
a[i] = a[i+len];
a[i+len] = swap;
}
}
You could probably also use:
void reverse_arr(int *a, int start, int end)
{
int len = end - start;
end--;
for (int i = start; i < --len; i++, end--)
{
int swap = a[i];
a[i] = a[end];
a[end] = swap;
}
}
Working test code
#if defined(VERSION1)
static void reverse_arr(int *a, int start, int end)
{
int i;
int len = end - start;
int swap;
for (i = start; i < --len; i++)
{
swap = a[i];
a[i] = a[i+len];
a[i+len] = swap;
}
}
#else
static void reverse_arr(int *a, int start, int end)
{
int i;
int len = end - start;
end--;
for (i = start; i < --len; i++, end--)
{
int swap = a[i];
a[i] = a[end];
a[end] = swap;
}
}
#endif
#define DIM(x) (sizeof(x)/sizeof(x[0]))
#include <stdio.h>
static void print_array(int *array, size_t size)
{
for (size_t i = 0; i < size; i++)
printf(" %d", array[i]);
putchar('\n');
}
static void tester(int lo, int hi)
{
int test[] = {1,2,3,4,5,6};
printf("Before: (%d, %d)\n", lo, hi);
print_array(test, DIM(test));
reverse_arr(test, lo, hi); //reverse the first 2 elements of the array
printf("After: (%d, %d)\n", lo, hi);
print_array(test, DIM(test));
putchar('\n');
}
int main(void)
{
tester(0, 2);
tester(2, 6);
return(0);
}
And the results from compiling with -DVERSION1 or not are the same:
Before: (0, 2)
1 2 3 4 5 6
After: (0, 2)
2 1 3 4 5 6
Before: (2, 6)
1 2 3 4 5 6
After: (2, 6)
1 2 6 4 5 3

You should drop len altogether, and use end instead. While you are at it, you can also drop i, and use start. Your code would become a lot more readable that way:
void reverse_arr(int *a, int start, int end)
{
int swap;
while(start < end)
{
swap = a[start];
a[start++] = a[--end];
a[end] = swap;
}
}

It's because you're comparing i against len which is fine if it starts at 0 but not if it starts at anything else (the base values are different).
For example if you want to rotate three characters starting at offset 12, nothing will happen since the for loop begins in a exit state.
A quick fix is to simply adjust a before beginning the loop so that it effectively thinks the (local value) a is the start of the array:
a += start;
for (i = start; i < --len; i++) // No change on this line.