C: segfault on merge sort only on large files - c

The following code sorts an array of words, working on small arrays, and segfaulting on large ones (>400000 words, though I haven't found a limit). It is being called by a program that passes it an array of words (read from a file) to be sorted and tests its success:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "csort.h"
#include "sort.h"
// array points to array of pointers to strings, count is number of entries in array
void sortC(char** array, unsigned int count){
array = merge_sort(array, count);
// testing:
/*for (int i = 0; i < count; i++){
printf("%s ", array[i]);
}*/
}
char** merge_sort(char** array, int count){
if (count <= 1) return array;
else {
int lcount = 0;
int rcount = 0;
int middle = count/2;
lcount = middle;
char* left[lcount];
subArray(array, left, 0, middle);
rcount = count-middle;
char* right[rcount];
subArray(array, right, middle, count);
return merge(merge_sort(left, lcount), merge_sort(right, rcount), array, 0, lcount, rcount);
}
}
void subArray(char** array, char** subarray, int start, int end){
int ai; // index in original array
int si; // index in subarray
for (ai = start, si = 0; ai < end; ai++, si++){
subarray[si] = array[ai];
}
}
char** merge(char** left, char** right, char** output, int oi, int lcount, int rcount){
if (lcount > 0 && rcount > 0){
int lmin = findMinimum(left, lcount);
int rmin = findMinimum(right, rcount);
if (strcmp(left[lmin], right[rmin]) < 0){
output[oi] = left[lmin];
removeFromArray(left, lmin, lcount);
lcount--;
}
else {
output[oi] = right[rmin];
removeFromArray(right, rmin, rcount);
rcount--;
}
}
else if (lcount == 0) {
if (rcount == 1) {
output[oi] = right[0];
return output;
} else {
int rmin = findMinimum(right, rcount);
output[oi] = right[rmin];
removeFromArray(right, rmin, rcount);
rcount--;
}
}
else if (rcount == 0) {
if (lcount == 1) {
output[oi] = left[0];
return output;
} else {
int lmin = findMinimum(left, lcount);
output[oi] = left[lmin];
removeFromArray(left, lmin, lcount);
lcount--;
}
}
return merge(left, right, output, ++oi, lcount, rcount);
}
int findMinimum(char** array, int count){
char* minvalue = array[0];
char* currentvalue = minvalue;
int minindex = 0;
for (int i = 1; i < count; i++){
currentvalue = array[i];
if (strcmp(currentvalue, minvalue) < 0){
minvalue = currentvalue;
minindex = i;
}
}
return minindex;
}
void removeFromArray(char** array, int index, int count){
// removes specified index from an array
for (int i = index; i < count; i++){
if (i+1 == count){
array[i] = 0; // this entry will be gone when count decrements
} else {
array[i] = array[i+1];
}
}
}

If there's no bug on your code then the problem might be how you are storing the data. Do you use malloc() to allocate the array to store your data or are you declaring an array that is big enough?
For large data sets you must use malloc(), which will allocate space on the HEAP instead of the stack. The stack has a limited space. This would explain why with smaller data your program works and with bigger data sets it crashes.
Also one very important point is that you are using recursion: merge() calling merge(). Too many recursive calls could lead to a stack overflow (segfault).

Looks like a stack overflow, you are allocating automatic arrays of thousands if items in each invocation, and then recursing.
These lines, to be specific:
char* left[lcount];
and
char* right[rcount];
For the values in your comment, where count == 7157, this would be quite costly in terms of stack space.
Consider either using malloc() for these, or figure out a way to represent a sub-array without requiring new memory.

Related

Printf function who returning an array of int

I'm trying this exercice but I don't know how to printf my function in main.
Exercice:
1) Write a function who returning an int tab with all values between min and max
#include <stdlib.h>
#include <stdio.h>
int *ft_range(int min, int max)
{
int len;
int *tab;
len = min;
while (len < max)
len++;
tab = (int *)malloc(sizeof(*tab) * len + 1);
while (min < max)
{
*tab = min;
min++;
}
return(tab);
}
int main()
{
ft_range(0, 10);
return(0);
}
returning an int tab with all values between min and max
Depending on the idea of "between", it is an open question if the end values should be included. Given OP's mis-coded +1 in sizeof(*tab) * len + 1, I'll go with the idea both ends should be included.
Miscalculation of len
Rather than loop, simply subtract
//len = min;
//while (len < max)
// len++;
len = max - min + 1;
Allocation miscalculated
Good to use sizeof *pointer, yet the + 1 makes little sense. If anything the ... * len + 1 should have been ... * (len + 1). Yet the +1 is handled with the above fix. Also cast not needed in C.
// tab = (int *)malloc(sizeof(*tab) * len + 1);
tab = malloc(sizeof *tab * len);
Wrong assignment
Code repeatedly assigned the same *tab location.
//while (min < max)
//{
// *tab = min;
// min++;
//}
for (int i = min; i <= max; i++) {
tab[i - min] = i;
}
No allocation error checking nor min, max validation
Potential for int overflow with mix - min
Be sure to free allocations
Alternative
#include <stdlib.h>
#include <stdio.h>
int *ft_range(int min, int max) {
if (min > max) {
return NULL;
}
size_t len = (size_t)max - min + 1;
int *tab = malloc(sizeof *tab * len);
if (tab == NULL) {
return NULL;
}
for (size_t i = 0; i < len; i++) {
tab[i] = int(min + i);
}
return tab;
}
int main() {
int mn = 0;
int mx = 10;
int *ft = ft_range(mn, mx);
if (ft) {
int *p = ft;
for (int i = mn; i <= mx; i++) {
printf("%d ", *p++);
}
free(ft);
}
return 0;
}
inside "ft_range", when you are trying to calculate the length the array needs to do, all you have to do is subtract the minimum from the maximum. what you did is much slower and unnecessary.
when allocating memory, you did not need to add a "+1" at the end. you may have seen it done in other examples, but it does not apply here.
the "while" loop inside "ft_range" needs to have a "<=" sign, otherwise it will stop before it reaches the "max" value.
when adding a value to the "tab" int array, you are always doing so by dereferencing it (putting a "*" before it), so every one of your values will come on the first position of the array and overwrite themselves. you need to have another "int i" to keep track of the current index of the array.
make sure to free the memory you allocated with "malloc" after you finish your enumeration. it does not matter right now, but if you ever get to writing more complex programs you will need to do so to keep the performance up, which can be critical.
here's a working code, with a few comments (i'm shit at comments, if you don't understand them, just ask me bro)
#include <stdlib.h>
#include <stdio.h>
int *ft_range(int min, int max)
{
int len;
int * tab;
len = max-min;
tab = (int *)malloc(sizeof(*tab) * len);
// create an index to track the position inside "tab"
int i = 0;
// sign needs to be "<=" so it does not stop before it reaches the max value
while (min <= max)
{
tab[i] = min;
// ++ needs to come before so the variable's value is updated right here
++min;
// increase the i index to the next position in "tab"
++i;
}
return(tab);
}
int main()
{
int min = 5;
int max = 10;
int len = max-min;
int * range = ft_range(min, max);
for(int i = 0; i <= len; ++i)
{
// %d = integer
// \n = move to next line
printf("%d\n", range[i]);
}
getchar();
return(0);
}
You correctly allocate memory to the table, you just need to printf the value in your main by using a loop trough your tab, i write this code who work perfectly.
#include <stdlib.h>
int *ft_range(int min, int max)
{
int *ptr;
int mi;
int i;
int range;
range = (max - min);
mi = min;
ptr = NULL;
if (min > max)
return (NULL);
else
ptr = malloc(sizeof(int) * range);
i = 0;
while (i < range)
{
ptr[i] = mi + i;
i++;
}
return (ptr);
}
// #include <stdio.h>
// int main()
// {
// //int i = 0;
// int min = 1;
// int max = 30;
// while(min < max)
// {
// printf("%d\n", *ft_range(min, max));
// min++;
// }
// }

Counting swaps in Quicksort in C

I am trying to count the number of swaps that occur in my quicksort in C. However, I am getting values that are incorrect and not sure where I went wrong. I am using a structures as my arrays to be sorted.
struct anArray{
int numbers[maxSize];
int swaps;
};
/* Partition function */
int partition(struct anArray *array, int start, int end){
if(start == end){
return start;
}
int pivot = array->numbers[end];
int low = start - 1;
int high = end;
for(;;){
do{
low++;
} while(array->numbers[low] < pivot);
do{
high--;
} while(array->numbers[high] > pivot);
/* Detector for when the cells meet */
if(low >= high){
swap(array, low, end);
return low;
}
}
/* Swapping the values */
swap(array, low, high);
}
This is my partition function used to "separate" the arrays.
void quickSort(struct anArray *array, int start, int end){
if(end - start <= 0){ return; }
else{
int pivot = array->numbers[end];
int partitionPoint = partition(array, start, end);
quickSort(array, start, partitionPoint - 1);
quickSort(array, partitionPoint + 1, end);
}
}
This is my quicksorting function. It's a recursive function.
My swap function increments counter by 1 every time it's called.
In my main, I set
myArray->swaps = counter;
But the number of times the swaps occurs isn't right. For example, if I sort an array that goes from 1 to 9, the number of swaps should be 0 but I get 9. I've tried incrementing counter when it's in the partition function only but it gives me the same result.
Is there something wrong with my partition function?
Thank you very much
Edit 1:
Here's my swap function.
void swap(struct anArray *array, int first, int second){
int temp = array->numbers[first];
array->numbers[first] = array->numbers[second];
array->numbers[second] = temp;
counter++;
}
I've tried using
void swap(struct anArray *array, int first, int second, int swapCount)
and then have swapCount be array->swaps when calling the swap function, and incrementing it by 1 but it gives me the same answer.
Here's a part of my main.
int main(){
struct anArray *ascending = (struct anArray*)malloc(10 * sizeof(struct anArray));
int ascend[maxSize] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
initArray(ascending, ascend);
quickSort(ascending, 0, maxSize - 1);
ascending->swaps = counter;
printf("Test: Unique random values\nSorted: [ ");
for(int i = 0; i < maxSize; i++){
printf("%i ", ascending->numbers[i]);
}
printf("]\nSwaps: %i\nComps: \n\n", ascending->swaps);
The other parts of my main are just other arrays to be sorted. The initArray is used to set the values of array->numbers and also reset array->swaps to 0.
Your quicksort code seems pretty good. I didn't examine it rigorously, but it passed a simple test, so I didn't investigate further. (Edit: Based on your feedback, I created a third version in my second update that shows that the sort has an issue for larger data inputs).
The main bug was the malloc at the top of main. We do not want an array of the struct anArray:
struct anArray *ascending = malloc(10 * sizeof(struct anArray));
That is, we do not want (e.g.) 10 structs, we want a single struct and to fill in 10 ints that go into the numbers field that is in that single struct.
The initArray function was not posted, so I had to guess/deduce what it might be. Based on the above bug, I'm not sure that numbers would have been initialized correctly.
From the code fragments posted, I was able to piece together a whole program. I've created two versions:
One with the bugs annotated [but not fixed] that compiles cleanly.
And, a second that is fully cleaned up, working, and generalized for arbitrary array sizes [please pardon the gratuitous style cleanup]
Here is [something close to] your original code with the bugs annotated:
#include <stdio.h>
#include <stdlib.h>
// NOTE/BUG: this was not defined and _fixed_ defines should be all caps
#define maxSize 10
struct anArray {
int numbers[maxSize];
int swaps;
};
int counter;
void
initArray(struct anArray *array,const int *src)
{
for (int idx = 0; idx < maxSize; ++idx)
array->numbers[idx] = src[idx];
array->swaps = 0;
}
void
swap(struct anArray *array, int first, int second)
{
int temp = array->numbers[first];
array->numbers[first] = array->numbers[second];
array->numbers[second] = temp;
counter++;
}
/* Partition function */
int
partition(struct anArray *array, int start, int end)
{
if (start == end) {
return start;
}
int pivot = array->numbers[end];
int low = start - 1;
int high = end;
for (;;) {
do {
low++;
} while (array->numbers[low] < pivot);
do {
high--;
} while (array->numbers[high] > pivot);
/* Detector for when the cells meet */
if (low >= high) {
swap(array, low, end);
return low;
}
}
/* Swapping the values */
swap(array, low, high);
}
void
quickSort(struct anArray *array, int start, int end)
{
if (end - start <= 0) {
return;
}
else {
// NOTE/BUG: pivot is _not_ used
int pivot = array->numbers[end];
int partitionPoint = partition(array, start, end);
quickSort(array, start, partitionPoint - 1);
quickSort(array, partitionPoint + 1, end);
}
}
int
main(void)
{
// NOTE/BUG: we do _not_ want an array of the struct, but an array of int
// that is allocated for "number" _inside_ the struct
struct anArray *ascending = malloc(10 * sizeof(struct anArray));
int ascend[maxSize] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
// NOTE/BUG: this was not defined
initArray(ascending, ascend);
quickSort(ascending, 0, maxSize - 1);
ascending->swaps = counter;
printf("Test: Unique random values\nSorted: [ ");
for (int i = 0; i < maxSize; i++) {
printf("%i ", ascending->numbers[i]);
}
printf("]\nSwaps: %i\nComps: \n\n", ascending->swaps);
return 0;
}
Here is a cleaned up and working version. I've generalized it so it can take an arbitrarily long array. I've also done a bit of style and code cleanup:
#include <stdio.h>
#include <stdlib.h>
typedef struct {
int *numbers;
int size;
int swaps;
} Array;
Array *
initArray(const int *src,int size)
{
Array *array = malloc(sizeof(Array));
array->numbers = malloc(size * sizeof(int));
array->size = size;
// store in reverse order so the sort will actually do something
for (int idx = 0; idx < size; ++idx)
array->numbers[size - 1 - idx] = src[idx];
array->swaps = 0;
return array;
}
void
freeArray(Array *array)
{
free(array->numbers);
free(array);
}
void
swap(Array *array, int first, int second)
{
int temp = array->numbers[first];
array->numbers[first] = array->numbers[second];
array->numbers[second] = temp;
array->swaps += 1;
}
/* Partition function */
int
partition(Array *array, int start, int end)
{
if (start == end)
return start;
int pivot = array->numbers[end];
int low = start - 1;
int high = end;
for (;;) {
do {
low++;
} while (array->numbers[low] < pivot);
do {
high--;
} while (array->numbers[high] > pivot);
/* Detector for when the cells meet */
if (low >= high) {
swap(array, low, end);
return low;
}
}
/* Swapping the values */
swap(array, low, high);
}
void
quickSort(Array *array, int start, int end)
{
if (end - start <= 0)
return;
//int pivot = array->numbers[end];
int partitionPoint = partition(array, start, end);
quickSort(array, start, partitionPoint - 1);
quickSort(array, partitionPoint + 1, end);
}
int
main(void)
{
// NOTE/BUG: we do _not_ want an array of the struct, but an array of int
// that is allocated for "number" _inside_ the struct
int original[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
int size = sizeof(original) / sizeof(original[0]);
Array *ascending = initArray(original, size);
quickSort(ascending, 0, ascending->size - 1);
printf("Test: Unique random values\nSorted: [ ");
for (int i = 0; i < ascending->size; i++) {
int expected = original[i];
int actual = ascending->numbers[i];
printf("%d%s ", actual, (actual == expected) ? "" : "???");
}
printf("]\nSwaps: %i\nComps: \n\n", ascending->swaps);
freeArray(ascending);
return 0;
}
UPDATE:
What does the line int size = sizeof(original) / sizeof(original[0]); do exactly?
Does it give me an integer for size which I set to be the size of how many numbers I can hold in an array?
Yes, that is common/idiomatic trick to get the count of the number of elements of a fixed size array:
int array[] = { 1, 2, 3 };
size_t count = sizeof(array) / sizeof(array[0]);
Here, sizeof(array) is 3 times the size [in bytes] of the individual elements [which are int, which is 4 bytes], so we have 3 * 4 or 12.
sizeof(array[0]) is the size of the single, first element of the array, which is [again] an int, so this is 4.
So, when we divide the two, we have 12 / 4 or 3, which is the number of elements.
If so, wouldn't the amount of numbers I can hold be really small if sizeof(original[0]) happens to be very large?
No, because of the division. It doesn't care how large the element size [in bytes] is, because the ratio always produces the number of elements.
The sizeof(arr) / sizeof(arr[0]) trick is useful to get the count when we do: int arr[] = { ... };
If we do:
#define ARRCOUNT 3
int arr[ARRCOUNT] = { 1, 2, 3 };
We already know the count (i.e. it is ARRCOUNT).
The [slight] advantage to the sizeof/sizeof trick is that if we had incorrectly defined ARRCOUNT as 4 by mistake, it would still compile, link, and run, but would produce incorrect results [because there were only 3 elements].
This is a common enough trick that we can define a generic macro [that we can reuse by putting it a .h file]:
#define ARRAY_COUNT(arr_) (sizeof(arr_) / sizeof(arr_))
UPDATE #2:
I've tried your code (even tried copying and pasting it) but my swaps is still showing 9 despite my array to be sorted is just going from { 1 to 10}. Not sure why this keeps occurring.
I believe [now] you have a bug in the sort itself.
I've produced another version that has much more extensive test data generation and comparison.
At a minimum, because of the way the tests are structured, the first element of the sorted array should always have a value of 1.
The test that fails is the one that does a random shuffle of the original array before sending it in to be sorted.
You can add other tests as needed. The array needn't be so large to show the problem. For example, the following single test is enough to produce the error:
bigtest(100,237,1);
Anyway, here is the enhanced diagnostic code:
#include <stdio.h>
#include <stdlib.h>
#define MAXLEN 60
typedef struct {
int *numbers;
int size;
int swaps;
} Array;
Array *
initArray(const int *src,int size,int randshuf)
{
int idx;
Array *array = malloc(sizeof(Array));
array->numbers = malloc(size * sizeof(int));
array->size = size;
array->swaps = 0;
// store in reverse order so the sort will actually do something
switch (randshuf) {
case 0: // reverse the numbers
for (idx = 0; idx < size; ++idx)
array->numbers[size - 1 - idx] = src[idx];
break;
default: // do _crude_ random shuffle
for (idx = 0; idx < size; ++idx)
array->numbers[idx] = 0;
for (idx = 0; idx < size; ++idx) {
while (1) {
int ridx = rand() % size;
if (array->numbers[ridx] == 0) {
array->numbers[ridx] = src[idx];
break;
}
}
}
break;
}
return array;
}
void
freeArray(Array *array)
{
free(array->numbers);
free(array);
}
void
swap(Array *array, int first, int second)
{
int temp = array->numbers[first];
array->numbers[first] = array->numbers[second];
array->numbers[second] = temp;
array->swaps += 1;
}
/* Partition function */
int
partition(Array *array, int start, int end)
{
if (start == end)
return start;
int pivot = array->numbers[end];
int low = start - 1;
int high = end;
for (;;) {
do {
low++;
} while (array->numbers[low] < pivot);
do {
high--;
} while (array->numbers[high] > pivot);
/* Detector for when the cells meet */
if (low >= high) {
swap(array, low, end);
return low;
}
}
/* Swapping the values */
swap(array, low, high);
}
void
quickSort(Array *array, int start, int end)
{
if (end - start <= 0)
return;
//int pivot = array->numbers[end];
int partitionPoint = partition(array, start, end);
quickSort(array, start, partitionPoint - 1);
quickSort(array, partitionPoint + 1, end);
}
void
print_orig(const int *orig,int count)
{
int len = 0;
printf("Test: Original numbers (%d):\n",count);
for (int idx = 0; idx < count; ++idx) {
len += printf(" %10d ", orig[idx]);
if (len >= MAXLEN) {
printf("\n");
len = 0;
}
}
if (len > 0)
printf("\n");
}
int
print_array(Array *array,const int *orig,const char *reason)
{
int len = 0;
int cmp;
int err = -1;
printf("Test: Array Values (%s):\n",reason);
for (int idx = 0; idx < array->size; ++idx) {
int actual = array->numbers[idx];
if (orig != NULL) {
int expected = orig[idx];
cmp = (actual == expected);
}
else
cmp = 1;
len += printf(" %10d%c", actual, cmp ? ' ' : '?');
if (len >= MAXLEN) {
printf("\n");
len = 0;
}
if (cmp)
continue;
if (err < 0)
err = idx;
}
if (orig != NULL)
printf("\nSwaps: %i\nComps: \n\n", array->swaps);
else {
if (len > 0)
printf("\n");
}
return err;
}
void
bigtest(int count,int randgap,int randshuf)
// count -- number of elements (negative means random)
// randgap -- gap between element values (negative means random)
// randshuf -- 0=simple reverse, 1=random shuffle
{
int *orig;
Array *array;
printf("\n");
for (int idx = 1; idx <= 80; ++idx)
printf("-");
printf("\n");
printf("COUNT: %d, RANDGAP: %d, RANDSHUF: %d\n",count,randgap,randshuf);
// get number of elements
if (count < 0)
count = (rand() % count) + 1;
// get element gap (e.g. 1 --> {1, 2, 3}, 2 --> { 1, 3, 5 }
if (randgap < 0)
randgap = (rand() % randgap) + 1;
printf("COUNT: %d, RANDGAP: %d, RANDSHUF: %d\n",count,randgap,randshuf);
// get original array
orig = malloc(sizeof(int) * count);
// fill in original array
do {
int val = 1;
// simple gap
if (randgap >= 0) {
if (randgap == 0)
randgap = 1;
for (int idx = 0; idx < count; ++idx, val += randgap)
orig[idx] = val;
break;
}
// random gap
int gap;
for (int idx = 0; idx < count; ++idx, val += gap) {
orig[idx] = val;
gap = (rand() % randgap) + 1;
}
} while (0);
print_orig(orig,count);
array = initArray(orig,count,randshuf);
print_array(array,NULL,"Shuffled");
quickSort(array, 0, array->size - 1);
print_array(array,orig,"Sorted");
freeArray(array);
free(orig);
}
int
main(void)
{
bigtest(10,0,0);
bigtest(-100,23,0);
bigtest(-1000,-2337,0);
bigtest(-1000,-2337,1);
return 0;
}

I've got a segmentation fault but I dont find oO?

It should sort with merge. There are two functions the merge and the sort merge. Some not known functions (read array from file and print array) are totally functional in an input file.
Valgrind show me that the failure is at the allocation from array2 and when it read and write at the 3rd while-loop in void merge.
void merge(int* array, int start, int middle, int end) {
int size = end - start + 1;
int *array2 = malloc(size*sizeof(array2));
int k = start;
int m = middle + 1;
int i = 0;
int j = 0;
while ( k <= middle && m <= end ) {
if ( array[k] <= array[m] ) {
array2[i] = array[k];
k++;
}
else {
array2[i] = array[m];
m++;
}
i++;
}
while ( k <= middle ) {
array2[i] = array[k];
k++;
i++;
}
while ( m <= end ) {
array2[i] = array[k];
k++;
i++;
}
while ( j < i ) {
array[ start + j -1 ] = array2[j];
j++;
}
free(array2);
}
void merge_sort(int* array, int first, int last) {
int middle;
if ( first < last ) {
middle = ((first+last) / 2);
merge_sort (array, first, middle);
merge_sort (array, middle + 1, last);
merge (array, first, middle, last);
}
}
int main (int argc, char *argv[])
{
if (argc!=3) {
printf ("usage: %s <maximale anzahl> <dateipfad>\n", argv[0]);
exit(2);
}
char *filename = argv[2];
int *array;
int size = atoi(argv[1]);
array = malloc(size*sizeof(array));
int len = read_array_from_file(array, atoi(argv[1]), filename);
printf("Eingabe:\n");
print_array(array, len);
merge_sort(array, array[0], len);
printf("Sortiert:\n");
print_array(array, len);
free(array);
return 0;
}
At least this is wrong:
int *array2 = malloc(size*sizeof(array2));
I think you mean:
int *array2 = malloc(size * sizeof(*array2));
You want to allocate size times the size of each entry, not the size of the array pointer.
But (on a 64-bit machine) this will actually make your array half the number of bytes, causing your overrun to happen sooner. You have a logic error that you need to figure out by stepping through your code with a debugger.

Manipulating a global array in a recursive function

I'm working through an algorithms MOOC and have a small program that takes an array A of ints in arbitrary order, counts the number of inversions (an inversion being the number of pairs (i,j) of array indices with i<j and A[i] > A[j]).
Below is the code I've written. I'm trying to tackle it using a "divide and conquer" approach where we recursively split the input array into two halves, sort each half individually while counting the inversions and then merge the two halves.
The trick is I need to keep track of the number of inversions and sort the arrays, so I pass the original array around the various recursive calls as an argument to the function and pass the count of inversions as a return value.
The code executes correctly through the first set of recursive calls that successively divide and sort [1,5,3], however when I get to the 3rd invocation of mergeAndCountSplitInv it crashes at the line:
sortedArrayLeft = realloc(sortedArrayLeft, sizeof(int)*(rightLen + leftLen));
with the error:
malloc: *** error for object 0x100103abc: pointer being realloc'd was not allocated
I can't see where I'm not using malloc correctly and I've combed through this checking to see I'm doing the pointer arithmetic correctly and can't spot any errors, but clearly error(s) exist.
Any help is appreciated.
// main.c
// inversionInC
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// function to help with debugging array/pointer arithmetic
void logArrayLenAndContents (char *arrayName, int arrayToPrint[], int arrayLen){
printf("%s\n", arrayName);
printf("len:%d\n", arrayLen);
for (int idx = 0; idx < arrayLen; idx++) {
printf("array[%d]: %d\n", idx, arrayToPrint[idx]);
}
}
int mergeAndCountSplitInv(int sortedArrayLeft[], int leftLen, int sortedArrayRight[], int rightLen)
{
printf("Calling mergeAndCount with sortedArrayLeft:\n");
logArrayLenAndContents("left Array", sortedArrayLeft, leftLen);
printf("...and sortedArrayRight:\n");
logArrayLenAndContents("right Array", sortedArrayRight, rightLen);
int i = 0;
int j = 0;
int k = 0;
int v = 0; // num of split inversions
int* outArray;
outArray = malloc((leftLen + rightLen) * sizeof(int));
while (i < leftLen && j < rightLen) {
if (sortedArrayLeft[i] < sortedArrayRight[j]) {
outArray[k] = sortedArrayLeft[i];
i++;
} else{
outArray[k] = sortedArrayRight[j];
v += leftLen - i;
j++;
}
k++;
}
// if at the end of either array then append the remaining elements
if (i < leftLen) {
while (i < leftLen) {
outArray[k] = sortedArrayLeft[i];
i++;
k++;
}
}
if (j < rightLen) {
while (j < rightLen) {
outArray[k] = sortedArrayRight[j];
j++;
k++;
}
}
printf("Wrapping up mergeAndCount where outArray contains:\n");
logArrayLenAndContents("outArray", outArray, k);
sortedArrayLeft = realloc(sortedArrayLeft, sizeof(int)*(rightLen + leftLen));
return v;
}
int sortAndCount(int inArray[], int inLen){
printf("Calling sortAndCount with:\n");
logArrayLenAndContents("inArray", inArray, inLen);
if (inLen < 2) {
return 0;
}
int inArrayLenPart1 = ceil(inLen/2.0);
int inArrayLenPart2 = inLen - inArrayLenPart1;
int* rightArray = malloc(sizeof(int) * inArrayLenPart2);
rightArray = &inArray[inArrayLenPart1];
int x = sortAndCount(inArray, inArrayLenPart1);
printf("sortAndCount returned x = %d\n\n", x);
int y = sortAndCount(rightArray, inArrayLenPart2);
printf("sortAndCount returned y = %d\n\n", y);
int z = mergeAndCountSplitInv(inArray, inArrayLenPart1, rightArray, inArrayLenPart2);
printf("mergeAndCount returned z = %d\n", z);
return x+y+z;
}
int main(int argc, const char * argv[])
{
static int* testArray;
testArray = malloc(5 * sizeof(int));
for (int i = 0; i<=4; i++) {
testArray[0] = 1;
testArray[1] = 5;
testArray[2] = 3;
testArray[3] = 2;
testArray[4] = 4;
}
int x = sortAndCount(testArray, 5);
printf("x = %d\n", x);
return 0;
}
This happens because the value of sortedArrayLeft gets lost as soon as the function returns. The realocated value does not make it to the caller, so inArray of the sortAndCount may be pointing to freed memory if realloc needs to reallocate and copy.
In order to fix this, pass a pointer to the pointer, letting sortedArrayLeft to propagate back to inArray of sortAndCount:
int mergeAndCountSplitInv(int **sortedArrayLeft, int leftLen, int sortedArrayRight[], int rightLen) {
...
*sortedArrayLeft = realloc(*sortedArrayLeft, sizeof(int)*(rightLen + leftLen));
return v;
}
...
int sortAndCount(int **inArray, int inLen) {
...
int z = mergeAndCountSplitInv(inArray, inArrayLenPart1, rightArray, inArrayLenPart2);
}
...
int x = sortAndCount(&testArray, 5);

Removing Duplicates from an Array using C [duplicate]

This question already has answers here:
Algorithm: efficient way to remove duplicate integers from an array
(34 answers)
Closed 8 years ago.
I want small clarification in array concept in C.
I have array:
int a[11]={1,2,3,4,5,11,11,11,11,16,16};
I want result like this:
{1,2,3,4,5,11,16}
Means I want remove duplicates.
How is it possible?
You can't readily resize arrays in C - at least, not arrays as you've declared that one. Clearly, if the data is in sorted order, it is straight-forward to copy the data to the front of the allocated array and treat it as if it was of the correct smaller size (and it is a linear O(n) algorithm). If the data is not sorted, it gets messier; the trivial algorithm is quadratic, so maybe a sort (O(N lg N)) followed by the linear algorithm is best for that.
You can use dynamically allocated memory to manage arrays. That may be beyond where you've reached in your studies, though.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
static int intcmp(const void *pa, const void *pb)
{
int a = *(int *)pa;
int b = *(int *)pb;
if (a > b)
return +1;
else if (a < b)
return -1;
else
return 0;
}
static int compact(int *array, int size)
{
int i;
int last = 0;
assert(size >= 0);
if (size <= 0)
return size;
for (i = 1; i < size; i++)
{
if (array[i] != array[last])
array[++last] = array[i];
}
return(last + 1);
}
static void print(int *array, int size, const char *tag, const char *name)
{
int i;
printf("%s\n", tag);
for (i = 0; i < size; i++)
printf("%s[%d] = %d\n", name, i, array[i]);
}
int main(void)
{
int a[11] = {1,2,3,4,5,11,11,11,11,16,16};
int a_size = sizeof(a) / sizeof(a[0]);
print(a, a_size, "Before", "a");
a_size = compact(a, a_size);
print(a, a_size, "After", "a");
int b[11] = {11,1,11,3,16,2,5,11,4,11,16};
int b_size = sizeof(b) / sizeof(b[0]);
print(b, b_size, "Before", "b");
qsort(b, b_size, sizeof(b[0]), intcmp);
print(b, b_size, "Sorted", "b");
b_size = compact(b, b_size);
print(b, b_size, "After", "b");
return 0;
}
#define arraysize(x) (sizeof(x) / sizeof(x[0])) // put this before main
int main() {
bool duplicate = false;
int a[11] = {1,2,3,4,5,11,11,11,11,16,16}; // doesnt have to be sorted
int b[11];
int index = 0;
for(int i = 0; i < arraysize(a); i++) { // looping through the main array
for(int j = 0; j < index; j++) { // looping through the target array where we know we have data. if we haven't found anything yet, this wont loop
if(a[i] == b[j]) { // if the target array contains the object, no need to continue further.
duplicate = true;
break; // break from this loop
}
}
if(!duplicate) { // if our value wasn't found in 'b' we will add this non-dublicate at index
b[index] = a[i];
index++;
}
duplicate = false; // restart
}
// optional
int c[index]; // index will be the number of objects we have in b
for(int k = 0; k < index; k++) {
c[k] = b[k];
}
}
If you really have to you can create a new array where that is the correct size and copy this into it.
As you can see, C is a very basic (but powerful) language and if you can, use a vector to but your objects in instead (c++'s std::vector perhaps) which can easily increase with your needs.
But as long as you only use small numbers of integers you shouldn't loose to much. If you have big numbers of data, you can always allocate the array on the heap with "malloc()" and pick a smaller size (maybe half the size of the original source array) that you then can increase (using realloc()) as you add more objects to it. There is some downsides reallocating the memory all the time as well but it is a decision you have to make - fast but allocation more data then you need? or slower and having the exact number of elements you need allocated (which you really cant control since malloc() might allocate more data then you need in some cases).
//gcc -Wall q2.cc -o q2 && q2
//Write a program to remove duplicates from a sorted array.
/*
The basic idea of our algorithm is to compare 2 adjacent values and determine if they
are the same. If they are not the same and we weren't already looking previusly at adjacent pairs
that were the same, then we output the value at the current index. The algorithm does everything
in-place and doesn't allocate any new memory. It outputs the unique values into the input array.
*/
#include <stdio.h>
#include <assert.h>
int remove_dups(int *arr, int n)
{
int idx = 0, odx = -1;
bool dup = false;
while (idx < n)
{
if (arr[idx] != arr[idx+1])
{
if (dup)
dup = false;
else
{
arr[++odx] = arr[idx];
}
} else
dup = true;
idx++;
}
return (odx == -1) ? -1 : ++odx;
}
int main(int argc, char *argv[])
{
int a[] = {31,44,44,67,67,99,99,100,101};
int k = remove_dups(a,9);
assert(k == 3);
for (int i = 0;i<k;i++)
printf("%d ",a[i]);
printf("\n\n");
int b[] = {-5,-3,-2,-2,-2,-2,1,3,5,5,18,18};
k = remove_dups(b,12);
assert(k == 4);
for (int i = 0;i<k;i++)
printf("%d ",b[i]);
printf("\n\n");
int c[] = {1,2,3,4,5,6,7,8,9};
k = remove_dups(c,9);
assert(k == 9);
for (int i = 0;i<k;i++)
printf("%d ",c[i]);
return 0;
}
you should create a new array and you should check the array if contains the element you want to insert before insert new element to it.
The question is not clear. Though, if you are trying to remove duplicates, you can use nested 'for' loops and remove all those values which occur more than once.
C does not have a built in data type that supports what you want -- you would need to create your own.
int a[11]={1,2,3,4,5,11,11,11,11,16,16};
As this array is sorted array, you can achieve very easily by following code.
int LengthofArray = 11;
//First elemnt can not be a duplicate so exclude the same and start from i = 1 than 0.
for(int i = 1; i < LengthofArray; i++);
{
if(a[i] == a[i-1])
RemoveArrayElementatIndex(i);
}
//function is used to remove the elements in the same as index passed to remove.
RemoveArrayElementatIndex(int i)
{
int k = 0;
if(i <=0)
return;
k = i;
int j =1; // variable is used to next item(offset) in the array from k.
//Move the next items to the array
//if its last item then the length of the array is updated directly, eg. incase i = 10.
while((k+j) < LengthofArray)
{
if(a[k] == a[k+j])
{
//increment only j , as another duplicate in this array
j = j +1 ;
}
else
{
a[k] = a[k+j];
//increment only k , as offset remains same
k = k + 1;
}
}
//set the new length of the array .
LengthofArray = k;
}
You could utilise qsort from stdlib.h to ensure your array is sorted into ascending order to remove the need for a nested loop.
Note that qsort requires a pointer to a function (int_cmp in this instance), i've included it below.
This function, int_array_unique returns the duplicate free array 'in-place' i.e. it overwrites the original and returns the length of the duplicate free array via the pn pointer
/**
* Return unique version of int array (duplicates removed)
*/
int int_array_unique(int *array, size_t *pn)
{
size_t n = *pn;
/* return err code 1 if a zero length array is passed in */
if (n == 0) return 1;
int i;
/* count the no. of unique array values */
int c=0;
/* sort input array so any duplicate values will be positioned next to each
* other */
qsort(array, n, sizeof(int), int_cmp);
/* size of the unique array is unknown at this point, but the output array
* can be no larger than the input array. Note, the correct length of the
* data is returned via pn */
int *tmp_array = calloc(n, sizeof(int));
tmp_array[c] = array[0];
c++;
for (i=1; i<n; i++) {
/* true if consecutive values are not equal */
if ( array[i] != array[i-1]) {
tmp_array[c] = array[i];
c++;
}
}
memmove(array, tmp_array, n*sizeof(int));
free(tmp_array);
/* set return parameter to length of data (e.g. no. of valid integers not
* actual allocated array length) of the uniqe array */
*pn = c;
return 0;
}
/* qsort int comparison function */
int int_cmp(const void *a, const void *b)
{
const int *ia = (const int *)a; // casting pointer types
const int *ib = (const int *)b;
/* integer comparison: returns negative if b > a
and positive if a > b */
return *ia - *ib;
}
Store the array element with small condition into new array
**just run once 100% will work
!)store the first value into array
II)store the another element check with before stored value..
III)if it exists leave the element--and check next one and store
here the below code run this u will understand better
int main()
{
int a[10],b[10],i,n,j=0,pos=0;
printf("\n enter a n value ");
scanf("%d",&n);
printf("\n enter a array value");
for(i=0;i<n;i++)
{
scanf("%d",&a[i]);//gets the arry value
}
for(i=0;i<n;i++)
{
if(check(a[i],pos,b)==0)//checks array each value its exits or not
{
b[j]=a[i];
j++;
pos++;//count the size of new storing element
}
}
printf("\n after updating array");
for(j=0;j<pos;j++)
{
printf("\n %d",b[j]);
} return 0;
}
int check(int x,int pos,int b[])
{ int m=0,i;
for(i=0;i<pos;i++)//checking the already only stored element
{
if(b[i]==x)
{
m++; //already exists increment the m value
}
}
return m;
}

Resources