Quickselect C-implementation returning unexpected values - c

I've implemented the following quick-select algorithm in C using pointers instead of arrays to allow for dynamic memory allocation.
#include <stdio.h>
#include <stdlib.h>
int random(int min, int max)
{
return rand() % (max + 1 - min) + min;
}
int quickselect(int *a, int l, int k)
{
int piv = *(a+random(0, l));
int *less = malloc(sizeof(int)*l), *great = malloc(sizeof(int)*l); //allocate to the max possible size
int l_len = 0, g_len = 0;
for (int i = 0; i <= l; i++)
if (*(a+i) < piv)
*(less+l_len++) = a[i]; //set "array indice" (dereferneced pointer) and increment array len
else if (*(a+i) > piv)
*(great+g_len++) = a[i];
less = realloc(less, sizeof(int)*l_len); //re-allocate appropiate to the proper size
great = realloc(great, sizeof(int)*g_len);
if (k <= l_len)
return quickselect(less, l_len, k);
else if (k > l - g_len)
return quickselect(great, g_len, k - (l - g_len));
else
return piv;
}
int main()
{
int a[] = {5,2,6,3,7,1,0};
printf("%d", quickselect(a, sizeof(a)/sizeof(*a), 1));
return 0;
}
When run, I'm getting unexpected values ranging from the negative millions to the positive millions. The randomness of the function makes this hard to debug. Any help?

Related

Printf function who returning an array of int

I'm trying this exercice but I don't know how to printf my function in main.
Exercice:
1) Write a function who returning an int tab with all values between min and max
#include <stdlib.h>
#include <stdio.h>
int *ft_range(int min, int max)
{
int len;
int *tab;
len = min;
while (len < max)
len++;
tab = (int *)malloc(sizeof(*tab) * len + 1);
while (min < max)
{
*tab = min;
min++;
}
return(tab);
}
int main()
{
ft_range(0, 10);
return(0);
}
returning an int tab with all values between min and max
Depending on the idea of "between", it is an open question if the end values should be included. Given OP's mis-coded +1 in sizeof(*tab) * len + 1, I'll go with the idea both ends should be included.
Miscalculation of len
Rather than loop, simply subtract
//len = min;
//while (len < max)
// len++;
len = max - min + 1;
Allocation miscalculated
Good to use sizeof *pointer, yet the + 1 makes little sense. If anything the ... * len + 1 should have been ... * (len + 1). Yet the +1 is handled with the above fix. Also cast not needed in C.
// tab = (int *)malloc(sizeof(*tab) * len + 1);
tab = malloc(sizeof *tab * len);
Wrong assignment
Code repeatedly assigned the same *tab location.
//while (min < max)
//{
// *tab = min;
// min++;
//}
for (int i = min; i <= max; i++) {
tab[i - min] = i;
}
No allocation error checking nor min, max validation
Potential for int overflow with mix - min
Be sure to free allocations
Alternative
#include <stdlib.h>
#include <stdio.h>
int *ft_range(int min, int max) {
if (min > max) {
return NULL;
}
size_t len = (size_t)max - min + 1;
int *tab = malloc(sizeof *tab * len);
if (tab == NULL) {
return NULL;
}
for (size_t i = 0; i < len; i++) {
tab[i] = int(min + i);
}
return tab;
}
int main() {
int mn = 0;
int mx = 10;
int *ft = ft_range(mn, mx);
if (ft) {
int *p = ft;
for (int i = mn; i <= mx; i++) {
printf("%d ", *p++);
}
free(ft);
}
return 0;
}
inside "ft_range", when you are trying to calculate the length the array needs to do, all you have to do is subtract the minimum from the maximum. what you did is much slower and unnecessary.
when allocating memory, you did not need to add a "+1" at the end. you may have seen it done in other examples, but it does not apply here.
the "while" loop inside "ft_range" needs to have a "<=" sign, otherwise it will stop before it reaches the "max" value.
when adding a value to the "tab" int array, you are always doing so by dereferencing it (putting a "*" before it), so every one of your values will come on the first position of the array and overwrite themselves. you need to have another "int i" to keep track of the current index of the array.
make sure to free the memory you allocated with "malloc" after you finish your enumeration. it does not matter right now, but if you ever get to writing more complex programs you will need to do so to keep the performance up, which can be critical.
here's a working code, with a few comments (i'm shit at comments, if you don't understand them, just ask me bro)
#include <stdlib.h>
#include <stdio.h>
int *ft_range(int min, int max)
{
int len;
int * tab;
len = max-min;
tab = (int *)malloc(sizeof(*tab) * len);
// create an index to track the position inside "tab"
int i = 0;
// sign needs to be "<=" so it does not stop before it reaches the max value
while (min <= max)
{
tab[i] = min;
// ++ needs to come before so the variable's value is updated right here
++min;
// increase the i index to the next position in "tab"
++i;
}
return(tab);
}
int main()
{
int min = 5;
int max = 10;
int len = max-min;
int * range = ft_range(min, max);
for(int i = 0; i <= len; ++i)
{
// %d = integer
// \n = move to next line
printf("%d\n", range[i]);
}
getchar();
return(0);
}
You correctly allocate memory to the table, you just need to printf the value in your main by using a loop trough your tab, i write this code who work perfectly.
#include <stdlib.h>
int *ft_range(int min, int max)
{
int *ptr;
int mi;
int i;
int range;
range = (max - min);
mi = min;
ptr = NULL;
if (min > max)
return (NULL);
else
ptr = malloc(sizeof(int) * range);
i = 0;
while (i < range)
{
ptr[i] = mi + i;
i++;
}
return (ptr);
}
// #include <stdio.h>
// int main()
// {
// //int i = 0;
// int min = 1;
// int max = 30;
// while(min < max)
// {
// printf("%d\n", *ft_range(min, max));
// min++;
// }
// }

Array declaration fails while trying to declare int array

I've been learning & coding sorting algorithms for some time and recently I've coded merge sort in C, and I've also coded a sort_test function to test the function that I write. In the sort test function, I'm declaring an array and assigning random values to it, but when the array size gets to 1,000,000 the program crashes. Why is that happening?
sort_test.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "merge_sort.h"
#include "sort_test.h"
// test size
#define MIN 10
#define MAX 1000000
// int comparator
int cmpInt(const void *elem1,const void * elem2){
int e1 = *(int *)elem1; // i-1
int e2 = *(int *)elem2; // i
if(e2 < e1){
return -1;
} else if(e2 > e1){
return 1;
} else {
return 0;
}
}
// double comparator
int cmpDouble(const void *elem1,const void *elem2){
double e1 = *(double *)elem1;
double e2 = *(double *)elem2;
if(e2 < e1){
return -1;
} else if(e2 > e1){
return 1;
} else {
return 0;
}
}
void initSeed(){
srand(time(NULL));
}
void intSortTest(){
initSeed();
for(size_t i = MIN;i <= MAX;i *=10){
int arr[i];
for(size_t j = 0; j < i;j++){
arr[j] = rand();
}
// sorting the array
mergesort(arr,0,i);
// checking if sorted array hold the
// condition i[0] <= i[1] ... <= i[n].
for(size_t j = 1;j < i;j++){
int *e1 = &arr[j-1];
int *e2 = &arr[j];
assert(cmpInt(e2,e1) <= 0);
}
printf("INT TEST : %7d\tPASSED\n",i);
}
printf("\n");
}
void doubleSortTest(){
initSeed();
for(int i = MIN; i <= MAX; i *= 10){
double arr[i];
for(int j = 0 ; j < i;j++){
arr[j] = (double)(rand() % 100) + 1.0;
}
// perform sort
//insertion_sort(arr,sizeof (double),i,cmpDouble);
for(int j = 1; j < i;j++){
double *e1 = &arr[j-1];
double *e2 = &arr[j];
assert(cmpDouble(e2,e1) <= 0);
}
printf("Double Test : %5d\tPASSED\n",i);
}
printf("\n");
}
sort_test.h
#ifndef SORT_TEST_H
#define SORT_TEST_H
void initSeed();
void intSortTest();
void doubleSortTest();
int cmpDouble(const void *elem1,const void *elem2);
int cmpInt(const void *elem1,const void * elem2);
#endif // SORT_TEST_H
merge_sort.h
#ifndef MERGE_SORT_H
#define MERGE_SORT_H
void mergesort(int *arr,int start,int end);
void merge(int *arr,int start,int med,int end);
#endif // MERGE_SORT_H
merge_sort.c
#include <stdio.h>
#include "sort_test.h"
#include "merge_sort.h"
int main(){
intSortTest();
return 0;
}
void mergesort(int *arr,int start,int end){
if(start < end){
int median = (end + start) / 2;
mergesort(arr,start,median);
mergesort(arr,median+1,end);
merge(arr,start,median,end);
}
}
void merge(int *arr,int start,int median,int end){
int i = start; int j = median+1;
int copy[end+1];
int cIndex = 0;
while(i <= median && j <= end) {
if(arr[j] <= arr[i]){
copy[cIndex++] = arr[j++];
} else {
copy[cIndex++] = arr[i++];
}
}
while(i <= median){
copy[cIndex++] = arr[i++];
}
while(j <= end){
copy[cIndex++] = arr[j++];
}
for(int k = 0; k < cIndex; k++){
arr[start++] = copy[k];
}
}
It is because you are allocating the arrays on the stack. Try the following code instead.
void intSortTest(){
initSeed();
for(size_t i = MIN;i <= MAX;i *=10){
int *arr = malloc(i*sizeof(int)); // <-- changed this
for(size_t j = 0; j < i;j++){
arr[j] = rand();
}
// sorting the array
mergesort(arr,0,i);
// checking if sorted array hold the
// condition i[0] <= i[1] ... <= i[n].
for(size_t j = 1;j < i;j++){
int *e1 = &arr[j-1];
int *e2 = &arr[j];
assert(cmpInt(e2,e1) <= 0);
}
printf("INT TEST : %7d\tPASSED\n",i);
free(arr); // <-- added this
}
printf("\n");
}
EDIT
Also the merge algorithm is incorrect. More precisely, you have a problem with the value list boundaries.
When you define the start and end index of a value list, the values are in arr[start] to arr[end-1], not arr[end]. The number of values is then end-start. With this convention, you have an empty list when start == end.
As a consequence, the function mergesort becomes:
void mergesort(int *arr,int start,int end){
if (start+1 >= end)
return; // a list with 0 or 1 values is already sorted
int median = (end + start) / 2;
mergesort(arr,start,median);
mergesort(arr,median,end);
merge(arr,start,median,end);
}
The merge function then become as follow:
void merge(int *arr,int start,int median,int end){
int i = start; int j = median;
int *copy = malloc((end-start)*sizeof(int)); // use malloc for huge arrays
int cIndex = 0;
while(i < median && j < end) { // not i <= median && j <= end
if(arr[j] <= arr[i]){
copy[cIndex++] = arr[j++];
} else {
copy[cIndex++] = arr[i++];
}
}
while(i < median){ // not i <= median
copy[cIndex++] = arr[i++];
}
while(j < end){ // not j <= median
copy[cIndex++] = arr[j++];
}
for(int k = 0; k < cIndex; k++){
arr[start++] = copy[k];
}
free(copy);
}
As you can see, there are only minor differences.
With this code, your program runs without error.
Now that the code is visible, it is fairly easy to see that you are indeed blowing the stack as I suggested in one of my many comments.
In merge(), you have:
int copy[end+1];
as well as in intSortTest() having:
int arr[i];
where i reaches 1,000,000.
When end is 1,000,000 — it is set from i — you have an array of one million int values being sorted, and a copy with another one million int values (plus 1), so you attempt to place two million 4-byte int values on the stack — and 8,000,000 bytes blows the stack limits. Since 800,000 bytes (the previous size) fits on the stack in both Unix and Windows, it isn't 100% clear which you are using. There isn't much margin for error on Unix/Linux; the limit is thoroughly blown on Windows because neither 4 MB array fits on the stack.
The recommended fix is to use dynamic memory allocation (malloc() et al) instead of stack allocation — in both the sort test function and in the main merge() code.

How can I find why my merge sorting algorithm crash when sorting an array of 1 million element?

I'm a French student and trying to calculate the execution time of the Merge Sort algorithm for different size of array.
I also want to write the different execution time in a .csv file. But when my program tries to sort an array with 1 million elements the process returns -1073741571 (0xC00000FD) in Code::Blocks. So if you could point me to a way to find a solution I would be very grateful!
Here is my code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
void genTab(int *tab, int n) {
int i;
for (i = 0; i < n; i++) {
tab[i] = rand() % 100;
}
}
void fusion(int *tab, int deb, int mid, int fin) {
int i = deb;
int j = mid + 1;
int k = deb;
int temp[fin + 1];
while ((i <= mid) && (j <= fin)) {
if (tab[i] <= tab[j]) {
temp[k] = tab[i];
i++;
} else {
temp[k] = tab[j];
j++;
}
k++;
}
while (i <= mid) {
temp[k] = tab[i];
i++;
k++;
}
while (j <= fin) {
temp[k] = tab[j];
k++;
j++;
}
for (i = deb; i <= fin; i++) {
tab[i] = temp[i];
}
}
void triFusion(int *tab, int i, int j) {
if (i < j) {
triFusion(tab, i, (int)((i + j) / 2));
triFusion(tab, (int)((i + j) / 2 + 1), j);
fusion(tab, i, (int)((i + j) / 2), j);
}
}
void reset(int *tab1, int *tab2, int n) {
for (int i = 0; i < n; i++) {
tab2[i] = tab1[i];
}
}
int main() {
srand(time(NULL));
clock_t start, end;
int nbrTest[15] = {
1000, 5000, 10000, 50000, 80000, 100000, 120000, 140000,
150000, 180000, 200000, 250000, 300000, 450000, 1000000
};
FILE *fp;
char *tpsExecution = "exeTime.csv";
fp = fopen(tpsExecution, "w");
fprintf(fp, "Array Size; Merge Time");
for (int i = 0; i < 15; i++) {
int n = nbrTest[i];
printf("Calculating time for an array of %d \n", n);
int *tab = malloc(sizeof(int) * n);
genTab(tab, n);
int *copie = malloc(sizeof(int) * n);
reset(tab, copie, n);
start = clock();
triFusion(tab, 0, n - 1);
end = clock();
float tpsFusion = (float)(end - start) / CLOCKS_PER_SEC;
reset(tab, copie, n);
printf("writing in the file\n");
fprintf(fp, "\n%d;%f", n, tpsFusion);
free(tab);
free(copie);
}
fclose(fp);
return 0;
}
int temp[fin+1]; may exceed the space limit for the stack. You should allocate it with malloc instead, and free it with free.
If you want to exclude malloc and free from the timed code, the allocation could be performed outside the timed code and passed in as work space.
(Note: posted after the answer from #Eric Postpischil).
The function
void fusion(int * tab, int deb, int mid, int fin)
Has the line
int temp[fin+1];
and the value of fin comes through another function from the number of elements n to be sorted
triFusion(tab, 0, n-1);
and as an automatic variable, breaks the stack when n is large.
I suggest replacing the line with
int *temp = malloc((fin+1) * sizeof *temp);
if(temp == NULL) {
puts("malloc");
exit(1);
}
// ...
free(temp);
fusion() is always allocating the full size of the array for temp, even when only a small fraction of temp is being used. You could change this to:
int k = 0;
...
int temp[fin+1-deb];
...
tab[i]=temp[i-deb];
still this will exceed stack space if n is large. So as suggested in the other answers:
int k = 0;
...
int *temp = malloc((fin+1-deb)*sizeof(int));
...
tab[i]=temp[i-deb];
...
free(temp)
or better still, do a one time allocation of a second array in main or in a "helper" function, the include a pointer to the second array in the merge sort functions.

How to check to make sure there are no repeats when generating 6 random numbers?

I can get the random numbers into an array but I can't figure out how to check to make sure that they aren't repeating. I print out the code but there are no numbers in the array (prints out nothing).
//puts random numbers into an array
i = 0, j = 0;
srand(time(NULL));
for (i = 0; i < arrSize; i++)
{
randArr[i] = randNums(1,50);
}
i = 0;
for(i = 0; i < arrSize; i++)
{
printf("%d ", randArr[i]);
}
printf("\n\n");
//checks to make sure there are no duplicates
i = 0, j = 0, k = 0, temp = 0;
for (i = 0; i < arrSize; i++)
{
for (j = 1; j <= arrSize;)
{
if (randArr[j] == randArr[i])
{
for (k = j; k <= arrSize; k++)
{
temp = randNums(1,50);
randArr[k + 1] = temp;
}
arrSize--;
}
else
j++;
}
}
//generates random numbers between the inputed max and min
int randNums(int min, int max)
{
int result = 0, low = 0, high = 0;
if (min < max)
{
low = min;
high = max + 1;
}
else
{
low = max + 1;
high = min;
}
result = (rand() % (high - low)) + low;
return (result);
}
Beware! There are many different solutions to this problem and they all have one or another downside. If I was to quickly implement it, I would go for something like this (without too much C-magic going on):
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE (30)
#define RAND_MIN (1)
#define RAND_MAX (50)
static int randNums(int min, int max) {
// ...
}
int main(void) {
(void) srand(time(NULL));
int arr[SIZE];
int used = 0;
while (used < SIZE) {
int num = randNums(RAND_MIN, RAND_MAX);
bool exists = false;
for (int i = 0; i < used; ++i) {
if (arr[i] == num)
exists = true;
}
if (exists == false)
arr[used++] = num;
}
for (int i = 0; i < SIZE; ++i)
(void) printf("%d\n", arr[i]);
return EXIT_SUCCESS;
}
I hope it helps a bit :)
Like this answer, you can do rejection sampling, but the uniform distribution of a fixed number of samples is perfect for a very simple hash set. (Though the asymptotic runtime might be irrelevant for n=6.)
#include <stdlib.h> /* (s)rand */
#include <stdio.h> /* printf */
#include <time.h> /* clock */
#include <assert.h> /* assert */
/* Double-pointers are confusing. */
struct Reference { int *ref; };
/* Simple fixed hash set. */
static struct Reference bins[256];
static int nums[6];
static const size_t no_bins = sizeof bins / sizeof *bins,
no_nums = sizeof nums / sizeof *nums;
static size_t count_num;
/* Uniformly distributed numbers are great for hashing, but possibly clump
together under linear probing. */
static size_t hash(const int n) { return ((size_t)n * 21) % no_bins; }
/* Linear probing. */
static struct Reference *probe(const int n) {
size_t bin_index;
struct Reference *bin;
assert(sizeof bins > sizeof nums);
for(bin_index = hash(n); bin = bins + bin_index,
bin->ref && *bin->ref != n; bin_index = (bin_index + 1) % no_bins);
return bin;
}
/* Return whether it's a new value. */
static int put_in_set(const int n) {
struct Reference *bin = probe(n);
int *num;
assert(count_num < no_nums);
if(bin->ref) return 0; /* Already in hash. */
num = nums + count_num++;
*num = n;
bin->ref = num;
return 1;
}
/* http://c-faq.com/lib/randrange.html */
static int rand_range(const unsigned n) {
unsigned int x = (RAND_MAX + 1u) / n;
unsigned int y = x * n;
unsigned int r;
assert(n > 0);
do {
r = rand();
} while(r >= y);
return r / x;
}
/* Generates random numbers between the inputed max and min without
repetition; [min, max] inclusive. */
static int unique_uniform(const int min, const int max) {
int n;
assert(min <= max && (size_t)(max - min) >= count_num);
do { n = rand_range(max - min + 1) + min; } while(!put_in_set(n));
return n;
}
int main(void) {
int n = 6;
srand((int)clock()), rand(); /* My computer always picks the same first? */
while(n--) { printf("%d\n", unique_uniform(1, 50)); }
return EXIT_SUCCESS;
}
However, if the numbers are densely packed, (eg, unique_uniform(1, 6),) it's going to reject a lot of numbers. Another solution is to take a Poisson distributed numbers as a running sum, (recurrence T(n+1)=T(n)+\mu_{n+1},) where the expected value is the range of numbers divided by the total samples, then take a random permutation.

palindrome checker algorithm

i'm having problems writing this excercise.
this should evaluate if a given array contains a palindrome sequence of numbers, the program builds correctly but doesn't run (console remains black). where am i wrong on this? thanks for all help!
#include <stdio.h>
#include <stdlib.h>
#define SIZE 15
//i'm using const int as exercise demand for it
//should i declare size as int when giving it to function? also if it's been declared?
//i'm a bit confused about that
int palindrome(const int a[], int p, int size);
int main()
{
int a[SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2, 1, 0};
int p = 1; //i'm not using boolean values, but i think should work anyway, right?
p = palindrome(a, p, SIZE);
if (p)
printf("\nseries is palindrome\n");
else
printf("\nseries isn't palindrome\n");
return 0;
}
int palindrome(const int a[], int p, int size)
{
int mid, j;
mid = size / 2;
while (p) {
for (j = 0; j < (SIZE / 2); j++){
if (a[mid + (j + 1)] != a[mid - (j + 1)]) //i think i might be wrong on this, but don't know where i'm in fault
p = 0;
}
}
return p;
}
p.s.
how can i activate debugger "watches" on Code Blocks to look at others function variables? (i put a stop on main function)
You don't need while (p) { loop. It is possible to have infinite loop here (and you have it!), because if you don't change p, this loop never stops.
You mix size and SIZE in the implementation of palindrome() (mid is half of size, but the whole loop is from 0 to SIZE-1).
Also it is better to move int p = 1; in the beginning of implementation of palindrome() (and to remove int p from list of it's parameters).
Just try this:
int palindrome(const int a[], int p, int size)
{
int mid, j;
mid = size / 2;
for (j = 0; j < (size / 2); j++){
if (a[mid + (j + 1)] != a[mid - (j + 1)]);
p = 0;
break;
}
}
return p;
}
here's an alternative without p where palindrome returns 0 or 1
int palindrome(const int a[], int size)
{
int j , k , ret;
for (j = 0 , k = size - 1 ; j < k; j++ , k--)
{
if (a[j)] != a[k])
{
ret = 0;
break;
}
}
if(j >= k)
ret = 1;
return ret;
}
you can call palindrome in the if statement in main like this :
if(palindrome(a , SIZE))
printf("\nseries is palindrome\n");
else
printf("\nseries isn't palindrome\n");

Resources