Quicksort string array in C - c

I'm writing a quicksort algorithm to sort an array of strings.
The problem is that my array with the data seem to be overwritten with something right after i allocate the right and left quicksort arrays, because i print the array and its all there, but after i use malloc to allocate the others arrays, i print it again and i'm missing some elements.
Here's the output:
Pivot: 2
Emma, Olivia, Victoria, Gwyneth, Chloe, Hayley, Scarlett,
Emma, Olivia, Victoria, Gwyneth, , , ,
Anyone knows whats happening? What am missing?
char **concatenate(char **array1, int n1, char *pivot, char **array2, int n2, int len){
int i=0, j=0;
int elements = n1 + n2 + 1;
// alocating array
char **concat = (char**) malloc(sizeof(*concat) * elements);
concat[0] = (char*) malloc(sizeof(*concat) * elements * len);
for(i=1; i<elements; i++)
concat[i] = &(concat[0][i*len]);
// concatenating
for(i=0; i<n1; i++)
concat[i] = array1[i];
concat[i++] = pivot;
for(j=0; j<n2; j++)
concat[i++] = array2[j];
// returning
return concat;
}
char **quicksort(char **array, int elements, int len){
// array is already sorted
if(elements < 2)
return array;
int pivot;
int i=0, l=0, r=0;
// selecting the pivot (median)
if(elements % 2 == 0)
pivot = ((elements + 1) / 2) -1;
else
pivot = (elements / 2) -1;
//REMOVE
printf("Pivot: %d\n", pivot);
for(i=0; i<elements; i++)
printf("%s, ", array[i]);
printf("\n");
// alocating arrays
char **left = (char**) malloc(sizeof(*left) * pivot);
left[0] = (char*) malloc(sizeof(*left) * pivot * len);
for(i=1; i<pivot; i++)
left[i] = &(left[0][i*len]);
char **rigth = (char**) malloc(sizeof(*rigth) * pivot);
rigth[0] = (char*) malloc(sizeof(*rigth) * pivot * len);
for(i=1; i<pivot; i++)
rigth[i] = &(rigth[0][i*len]);
//REMOVE
for(i=0; i<elements; i++)
printf("%s, ", array[i]);
printf("\n");
//quicksorting
for(i=0; i<elements; i++){
if(array[i] == array[pivot])
continue;
int comp = strcmp(array[i], array[pivot]);
//REMOVE
printf("%d: strcmp %s, %s is %d\n", i, array[i], array[pivot], comp);
if(comp < pivot)
left[l++] = array[i];
else
rigth[r++] = array[i];
}
//REMOVE
printf("concatenate(");
for(i=0; i<l; i++)
printf("%s ", left[i]);
printf("|%s| ", array[pivot]);
for(i=0; i<r; i++)
printf("%s ", rigth[i]);
printf(")\n");
// recursion and return
return concatenate(quicksort(left, l, len), l, array[pivot], quicksort(rigth, r, len), r, len);
}
int main(int argc, char *argv[]){
int i, j, aux;
char **teste = (char**) malloc(sizeof(*teste) * 7);
teste[0] = (char*) malloc(sizeof(*teste) * 7 * 128);
for(i=1; i<7; i++)
teste[i] = &(teste[0][i*128]);
teste[0] = "Emma";
teste[1] = "Olivia";
teste[2] = "Victoria";
teste[3] = "Gwyneth";
teste[4] = "Chloe";
teste[5] = "Hayley";
teste[6] = "Scarlett";
quicksort(teste, 7, 128);
printf("AFTER\n");
for(i=0; i<7; i++)
printf("%s, ", teste[i]);
printf("\n");
return 0;
}

There is zero reason to allocate for quicksort, and in fact the function can easily suffice in your case with a simple interface of quicksort(char *arr[], unsigned int len), using pointer-math for the subsequence invocations.
Provide a swap algorithm for exchanging pointers:
void swap_str_ptrs(char const **arg1, char const **arg2)
{
const char *tmp = *arg1;
*arg1 = *arg2;
*arg2 = tmp;
}
Then the algorithm is:
void quicksort_strs(char const *args[], unsigned int len)
{
unsigned int i, pvt=0;
if (len <= 1)
return;
// swap a randomly selected value to the last node
swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);
// reset the pivot index to zero, then scan
for (i=0;i<len-1;++i)
{
if (strcmp(args[i], args[len-1]) < 0)
swap_str_ptrs(args+i, args+pvt++);
}
// move the pivot value into its place
swap_str_ptrs(args+pvt, args+len-1);
// and invoke on the subsequences. does NOT include the pivot-slot
quicksort_strs(args, pvt++);
quicksort_strs(args+pvt, len - pvt);
}
Thats everything. including the partitioning.
How It Works
There are two general recursive quicksort algorithms: the squeeze, and the sweep. This is the sweep algorithm. We march up the sequence, swapping any element "less" than than pivot value (which is swapped to the end of the sequence before the loop starts) to a target slot, the index of which is initially the beginning of the sequence and increases with each swap operation. When the "sweep" is finished, the pvt index is where the pivot value belongs, as everything below that slot is "less" than the that value. So one more swap is made to put the pivot value into position. After that we have two partitions, which are recursed. It is vital that the slot we just identified as the pivot location is not included in either of those partitions. It is the only value we know is in its final resting place.
Test Harnass
Including the above code, we test this with a basic set of strings purposely out of order:
void print_list(char const *args[], unsigned len)
{
unsigned i=0;
for (;i<len;++i)
puts(args[i]);
}
int main()
{
char const *args[] =
{
"this", "is", "a", "test", "of", "quicksort", "with", "strings"
};
srand((unsigned)time(NULL));
quicksort_strs(args, sizeof(args)/sizeof(*args));
print_list(args, sizeof(args)/sizeof(*args));
return 0;
}
Output
a
is
of
quicksort
strings
test
this
with
Non-recursive implementation
It should be noted that the above algorithm lends itself beautifully to a non-recursive implementation. A local dynamic stack is used for holding pairs of data: an pointer and a length. Optimized to not push trivial segments (segments of length 1 or 0) on to the stack, one implementation would like like this:
void quicksort_strs(char const *args[], unsigned int len)
{
// holds our non-recursive stack of segments
struct segment
{
char const **arr;
unsigned int len;
struct segment* next;
} *stack = NULL;
stack = malloc(sizeof(*stack));
stack->arr = args;
stack->len = len;
stack->next = NULL;
while (stack != NULL)
{
unsigned int i, pvt=0;
struct segment *tmp = stack;
stack = stack->next;
// pull values and delete segment record
args = tmp->arr;
len = tmp->len;
free(tmp);
// nothing to unary segments
if (len <= 1)
continue;
// swap a randomly selected value to the last node
swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);
// reset the pivot index to zero, then scan
for (i=0;i<len-1;++i)
{
if (strcmp(args[i], args[len-1]) < 0)
swap_str_ptrs(args+i, args+pvt++);
}
// move the pivot value into its place
swap_str_ptrs(args+pvt, args+len-1);
// lhs segment push
if (pvt > 1)
{
tmp = malloc(sizeof(*tmp));
tmp->arr = args;
tmp->len = pvt;
tmp->next = stack;
stack = tmp;
}
// rhs segment push
if ((len - ++pvt) > 1)
{
tmp = malloc(sizeof(*tmp));
tmp->arr = args+pvt;
tmp->len = len-pvt;
tmp->next = stack;
stack = tmp;
}
}
}
Obviously having a canned node-stack implementation would shorten this up considerably, but the idea should be readily apparent. A realloc() schema for holding nodes on the end of the "stack" rather than the beginning would be equally interesting, as it would eliminate the need to next pointer management, replaced with a top index instead.
Anyway, good luck, and I hope it helps.

Related

Seg fault issue in mergesort algorithm with dynamic array

I'm trying to implement the mergesort algorithm using a dynamic array structure in c, but when i call the function to split the original array instead of getting two subarrays i get a seg fault error.
I'm pretty sure it has something to deal with how i define the size of my structure, but i cannot get over it. Here's how i've defined my structure and how i create and initialize it:
typedef struct dynarray
{
void **memory;
size_t allocated; //total size of the array
size_t used; //used size of the array
int index;
} dynarray;
//creates a new, empty, dynarray
void create_dynarray(dynarray **array, size_t size)
{
*array = calloc(size, sizeof(array));
(*array)->memory = NULL;
(*array)->allocated = 0;
(*array)->used = 0;
(*array)->index = -1;
}
This how i've defined my mergesort functions
//function used to slice the dynarray in two subarrays and call merge function
void* dynarray_mergesort(dynarray *param){
if(dynarray_length(param)>1){
param->index = 0;
printf("index of first:%d\t", param->index);
size_t size = param->used;
size_t m = size/2;
size_t n = size - size/2;
struct dynarray *l;
create_dynarray(&l, m);
printf("index of left:%d\t", l->index);
struct dynarray *r;
create_dynarray(&r, n);
printf("index of right:%d\n", r->index);
for(int i = 0 ; i < m; i++){
add_elem(l, param->memory[i]);
}for(int j = m; j < n; j++){
add_elem(r, param->memory[j]);
}
puts("first");
print_array(l);
puts("second");
print_array(r);
dynarray_mergesort(l);
dynarray_mergesort(r);
//dynarray_merge(param, l , r, size);
}
return param;
}
//function used to mergesort the array
void* dynarray_merge(dynarray *param, dynarray *l, dynarray *r, int size){
int i,j,k;
while(i < size/2 && j < size-size/2){
if(l->memory[i] < r->memory[j]){
param->memory[k] = l->memory[i];
i++;
k++;
}else{
param->memory[k] = r->memory[j];
j++;
k++;
}
}
while(i < size/2)
param->memory[k++] = l->memory[i++];
}while(j < size-size/2){
param->memory[k++] = r->memory[j++];
}
return param;
}
//function used to mergesort the array
void* dynarray_merge(dynarray *param, dynarray *l, dynarray *r, int size){
int i,j,k;
while(i < size/2 && j < size-size/2){
if(l->memory[i] < r->memory[j]){
param->memory[k] = l->memory[i];
i++;
k++;
}else{
param->memory[k] = r->memory[j];
j++;
k++;
}
}
while(i < size/2){
param->memory[k++] = l->memory[i++];
}while(j < size-size/2){
param->memory[k++] = r->memory[j++];
}
return param;
}
Probably i'm confused on how the size of my dynamic array is defined and how i have to treat it in my functions. Here's a compilable example to help you understand the problem. It is pretty long but most functions can be ignored as they are utility functions and they seem to work good. The problem is located in the mergesort function, but i'm afraid it could be related on how i've defined my dynarraystructure.
Ps. the line calling the dynarray_merge(param, l , r, size); is commented because i'm working on problems located in the dynarray_mergesort(dynarray *param);
Ps2: the printf functions called inside the dynarray_mergesort(dynarray *param); are used as debugging infos.
#include<stdio.h>
#include<stdlib.h>
typedef struct dynarray
{
void **memory;
size_t allocated;
size_t used;
int index;
} dynarray;
//get length of the dynarray
int dynarray_length(dynarray *array)
{
return array->index + 1;
}
//retrieves an element in a specific position of the dynarray
void* get_i_elem(dynarray *array,int index)
{
if (index < 0 || index > array->index) return NULL;
return array->memory[index];
}
//print arrays, useful to test
void print_array(dynarray *array)
{
for(int i = 0; i < dynarray_length(array); i++) {
printf("%d\t", *(int *)get_i_elem(array, i));
//puts("");
}
}
//creates a new, empty, dynarray
void create_dynarray(dynarray **array, size_t size)
{
*array = calloc(size, sizeof(array));
(*array)->memory = NULL;
(*array)->allocated = 0;
(*array)->used = 0;
(*array)->index = -1;
}
//adds a new element at the bottom of dynarray
void add_elem(dynarray *array, void *data)
{
size_t toallocate;
size_t size = sizeof(void *);
if ((array->allocated - array->used) < size){ // if M - N ...
toallocate = array->allocated == 0 ? size : (array->allocated * 2);
array->memory = realloc(array->memory, toallocate);
array->allocated = toallocate;
}
array->memory[++array->index] = data;
array->used = array->used + size;
}
//function used to slice the dynarray in two subarrays and call merge function
void* dynarray_mergesort(dynarray *param){
if(dynarray_length(param)>1){
param->index = 0;
printf("index of first:%d\t", param->index);
size_t size = param->used;
size_t m = size/2;
size_t n = size - size/2;
struct dynarray *l;
create_dynarray(&l, m);
printf("index of left:%d\t", l->index);
struct dynarray *r;
create_dynarray(&r, n);
printf("index of right:%d\n", r->index);
for(int i = 0 ; i < m; i++){
add_elem(l, param->memory[i]);
}for(int j = m; j < n; j++){
add_elem(r, param->memory[j]);
}
puts("first");
print_array(l);
puts("second");
print_array(r);
dynarray_mergesort(l);
dynarray_mergesort(r);
//dynarray_merge(param, l , r, size);
}
return param;
}
//function used to mergesort the array
void* dynarray_merge(dynarray *param, dynarray *l, dynarray *r, int size){
int i,j,k;
while(i < size/2 && j < size-size/2){
if(l->memory[i] < r->memory[j]){
param->memory[k] = l->memory[i];
i++;
k++;
}else{
param->memory[k] = r->memory[j];
j++;
k++;
}
}
while(i < size/2){
param->memory[k++] = l->memory[i++];
}while(j < size-size/2){
param->memory[k++] = r->memory[j++];
}
return param;
}
int main(){
struct dynarray *a;
create_dynarray(&a, 5);
int arr[5] = {18,14, 20,16,12};
int *ap = malloc(sizeof(int));
int *bp = malloc(sizeof(int));
int *cp = malloc(sizeof(int));
int *dp = malloc(sizeof(int));
int *ep = malloc(sizeof(int));
*ap = arr[0];
*bp = arr[1];
*cp = arr[2];
*dp = arr[3];
*ep = arr[4];
add_elem(a, ap);
add_elem(a, bp);
add_elem(a, cp);
add_elem(a, dp);
add_elem(a, ep);
dynarray_mergesort(a);
print_array(a);
}
In addition to the allocation shortfall mentioned in the comments below your question (e.g. needing *array = calloc(size, sizeof **array);), you have a simple error leading to your SegFault (you have other errors as well). You are storing the number of bytes in the size variable in dynarray_mergesort, not the number of pointers. So in dynarray_mergesort when you declare size_t size = param->used; your value of size is multiple of sizeof(void*) (e.g. sizeof(a_pointer)) times the number of pointers you have actually used. This leads to incorrect values for m and n.
To remedy the problem, you can simply do:
size_t size = param->used / sizeof(void*);
You have another error with your loop limits in:
for(size_t j = m; j < n; j++){
add_elem(r, param->memory[j]);
}
Where m = size/2; and n = size - size/2;. You actually need your limits of m -> size, e.g.:
for(size_t j = m; j < size; j++){
add_elem(r, param->memory[j]);
}
(note: above the proper type for i and j are both size_t to correspond to m and n and prevent "comparison between signed and unsigned integer expressions")
As noted in my comment, you have uninitialized value problems in dynarray_merge. You need to initialize i and k, e.g.
int i=0, j=0, k=0;
before you attempt:
i++;
k++;
With those changes your code runs to the end without problems (other than leaking memory):
$ ./bin/dynarraymergeorig
index of first:0 index of left:-1 index of right:-1
first
18 14
second
20 16 12
index of first:0 index of left:-1 index of right:-1
first
18
second
14
index of first:0 index of left:-1 index of right:-1
first
20
second
16 12
index of first:0 index of left:-1 index of right:-1
first
16
second
12
18
You still have problems merging your list (that is left to you to further investigate), but your SegFault issue is solved. Let me know if you have further questions. (other than on the changes required to fix your merge algorithm left to you)
The inside of the create_dynarray function
*array = calloc(size, sizeof(array));
should be changed to:
*array = calloc(size, sizeof(**array))
to do what you actually want to do (allocate a memory for the array with an element size dynarray * size).

Remove duplicates from const array in c

I have to implement a function in C with the following signature: int *unici(const int *vec, size_t size, size_t *newsize), where vec is a const array of ints, size is the size of the array, and *newsize is the size of the new array without duplicates.
This function has to create a new array on the heap and put the values from vec into the array without duplicates.
Example: If vec is [2, 4, 5, 4, 5, 5, 7, 9], size is 8, then the new array should be [2, 4, 5, 7, 9] with *newsize equal to 5.
I tried to implement it, but I don't know how to remove duplicates and put into new array.
int cmpfunc(const void * a, const void * b)
{
return (*(int*)a - *(int*)b);
}
int *unici(const int *vec, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
qsort(vec, size, sizeof(int), cmpfunc);
size_t count = 0;
for (size_t i = 0; i < size; i++)
{
//finding for duplicates
if (vec[i] == vec[i + 1])
count++;
}
*newsize = size - count;
int *tmp = malloc(*newsize * sizeof(int));
//now I've to put in tmp values from vec without duplicates
}
EDIT: that's my solution, I figured out, by the way, thank you all!
int *unici(const int *vec, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
int *tmp = malloc(size * sizeof(int));
for (size_t i = 0; i < size; i++)
{
tmp[i] = vec[i];
}
for (size_t i = 0; i < size; i++)
{
for (size_t j = i + 1; j < size; j++)
{
if (tmp[i] == tmp[j])
{
for (size_t k = j; k<size; k++)
{
tmp[k] = tmp[k + 1];
}
size--;
j--;
}
}
}
*newsize = size;
return tmp;
}
There are two basic approaches.
Duplicate the original array. Sort the elements in the new array, and use a loop to keep only the first one of any runs (more than one same value):
int *result; /* This is the duplicate array; sorted */
size_t i = 0; /* Loop index */
size_t n = 0; /* Unique elements in the duplicate array */
while (i < size) {
const int c = result[i++];
/* Skip if there are more than one consecutive c */
while (i < size && c == result[i])
i++;
/* Copy the unique elements back to the beginning
of the array. */
result[n++] = c;
}
You can, if you want, reallocate result to n * sizeof result[0] bytes.
Store the number of unique elements n to *newsize, and return result.
Allocate the result array, but don't bother copying the values yet. Instead of sorting (to make duplicate values consecutive), use a double loop to check whether each value is unique (already in the result array) or not, and only copy the unique ones to the result array:
int *result; /* Allocated for 'size' elements */
size_t i, j; /* Loop indexes */
size_t n = 0; /* Unique elements in the duplicate array */
for (i = 0; i < size; i++) {
/* Find first duplicate in result. */
for (j = 0; j < n; j++)
if (result[j] == vec[i])
break;
/* If no duplicates found, add to result. */
if (j >= n)
result[n++] = vec[i];
}
You can, if you want, reallocate result to n * sizeof result[0] bytes.
Store the number of unique elements n to *newsize, and return result.
Which one is the better approach, depends on how the result set is used, and whether it is useful for it to be in sorted order. If sorted order is useful, or if speed is important and the order does not matter, the sort approach is likely better.
(The efficiency of the sort approach depends on the efficiency of the sort function. Many sort functions are known that have O(size × log size) time complexity; for a truly huge amount of data, an O(size) radix sort can be used (because the number of values is known beforehand). Note that a radix sort will only beat other sorts for very large sizes, typically in the millions.)
In some cases, it might be important that the result set is in the same order as the vec was, but with the duplicates removed. Then, the second approach is the obvious choice. Its time complexity is O(size × n), which means that it slows down the larger the array and the set of unique elements are.
First of all you don't want const int* in unici it throws the warning
passing argument 1 of ‘qsort’ discards ‘const’ qualifier from pointer target type
And then we go on, to allocating memory for each of the integers that are non-duplicate.
int *unici(int *v, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
qsort(v, size, sizeof(int), cmpfunc);
int *temp = malloc(sizeof *temp);
if (temp == NULL){
perror("Error ");
exit(EXIT_FAILURE);
}
(*newsize)= 0;
temp[(*newsize)++] = v[0];
for (size_t i = 1; i < size; i++)
{
if (v[i] != v[i-1]){
int *tt = realloc(temp,(*newsize+1)*sizeof *tt);
if (tt == NULL){
perror("Error ");
exit(EXIT_FAILURE);
}
temp = tt;
temp[(*newsize)++] = v[i];
}
}
return temp;
}
There are two points with this code
Here the array is supposed to be constant. So it must do duplicate of the array and then do the sorting and then eliminate duplicates and resize again as per the unique elements present. This is having the benefit that the const removal as mentioned - you don't have to do that.
In my code I have used reallocate for each of the elements - which is an overkill. So what to do then? Well as said earlier we will allocate it to maximum size and then reduce the size to the unique list. That again leaves us to debate should we reduce it every time? (Suppose 1-2 positions left out.) Well then it is not so much needed to resize but well one can do that. It's implementor's choice to some extent.
The idea of this simple shrinking is being implemented here:-
int *unici(const int *vv, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
int *v = malloc(sizeof *v * size);
if (v == NULL){
perror("Error in malloc");
exit(EXIT_FAILURE);
}
memcpy(v, vv, size*sizeof*v);
qsort(v, size, sizeof(int), cmpfunc);
(*newsize)= 0;
int last = v[0];
for (size_t i = 1; i < size; i++)
{
if (v[i] != last){
v[(*newsize)++] = last;
last = v[i];
}
}
v[(*newsize)++] = v[size-1];
int *temp = realloc(v, (*newsize)*sizeof *v);
if (temp == NULL){
perror("Error in realloc");
exit(EXIT_FAILURE);
}
v = temp;
return v;
}
Since you sorted your vector it should be easy. Iterate over the vector.. and copy values if they don't match with the previous value. Something like this (not verified compilable code):
size_t src;
size_t dst;
for (src = 0, dst = 0; src < size; src++)
{
// skip check for first element; compare with previous and if they are the same just move on
if (src > 0 && vec[src] == vec[src - 1])
continue;
tmp[dst] = vec[src];
dst++;
}

I've got a segmentation fault but I dont find oO?

It should sort with merge. There are two functions the merge and the sort merge. Some not known functions (read array from file and print array) are totally functional in an input file.
Valgrind show me that the failure is at the allocation from array2 and when it read and write at the 3rd while-loop in void merge.
void merge(int* array, int start, int middle, int end) {
int size = end - start + 1;
int *array2 = malloc(size*sizeof(array2));
int k = start;
int m = middle + 1;
int i = 0;
int j = 0;
while ( k <= middle && m <= end ) {
if ( array[k] <= array[m] ) {
array2[i] = array[k];
k++;
}
else {
array2[i] = array[m];
m++;
}
i++;
}
while ( k <= middle ) {
array2[i] = array[k];
k++;
i++;
}
while ( m <= end ) {
array2[i] = array[k];
k++;
i++;
}
while ( j < i ) {
array[ start + j -1 ] = array2[j];
j++;
}
free(array2);
}
void merge_sort(int* array, int first, int last) {
int middle;
if ( first < last ) {
middle = ((first+last) / 2);
merge_sort (array, first, middle);
merge_sort (array, middle + 1, last);
merge (array, first, middle, last);
}
}
int main (int argc, char *argv[])
{
if (argc!=3) {
printf ("usage: %s <maximale anzahl> <dateipfad>\n", argv[0]);
exit(2);
}
char *filename = argv[2];
int *array;
int size = atoi(argv[1]);
array = malloc(size*sizeof(array));
int len = read_array_from_file(array, atoi(argv[1]), filename);
printf("Eingabe:\n");
print_array(array, len);
merge_sort(array, array[0], len);
printf("Sortiert:\n");
print_array(array, len);
free(array);
return 0;
}
At least this is wrong:
int *array2 = malloc(size*sizeof(array2));
I think you mean:
int *array2 = malloc(size * sizeof(*array2));
You want to allocate size times the size of each entry, not the size of the array pointer.
But (on a 64-bit machine) this will actually make your array half the number of bytes, causing your overrun to happen sooner. You have a logic error that you need to figure out by stepping through your code with a debugger.

Keeping two arrays in the same ordering when sorting

I have an integer array that I need to sort containing unix times. I was going to use qsort to sort it which is fairly trivial. However I also have an array of "strings" that needs to remain in the same order as the integer array.
So position 2 in the integer array would correspond with an element in position two of the other array.
Is there anyway using qsort to maintain such a relationship?
Do it like this
#include <stdlib.h>
#include <stdio.h>
struct Data
{
long int time;
const char *string;
};
int
datacmp(const void *const x, const void *const y)
{
return ((struct Data *) x)->time - ((struct Data *) y)->time;
}
int
main(void)
{
struct Data array[] = {
{1234, "1234 Text"},
{1034, "1034 Text"},
{1041, "1041 Text"}
};
size_t count;
count = sizeof(array) / sizeof(array[0]);
for (size_t i = 0 ; i < count ; ++i)
{
fprintf(stderr, "Entry %zu:\n\ttime : %ld\n\tstring: %s\n\n",
i, array[i].time, array[i].string);
}
fprintf(stderr, "\n");
qsort(array, count, sizeof(array[0]), datacmp);
fprintf(stderr, "---- Sorted array:\n");
for (size_t i = 0 ; i < count ; ++i)
{
fprintf(stderr, "Entry %zu:\n\ttime : %ld\n\tstring: %s\n\n",
i, array[i].time, array[i].string);
}
return 0;
}
A more generic solution that actually sorts 2 (or more) arrays, according to one of the arrays, by sorting an array of pointers to the key array, then reordering all of the arrays to sort them (it also restores the array of pointers back to their initial state). The compare function only needs to know the type that the pointers point to. The reorder in place takes O(n) (linear) time as every move places a value in it's final sorted location. In this example, a[] is an array of integers, b[] is an array of pointers to strings (char *).
int compare(const void *pp0, const void *pp1)
{
int i0 = **(int **)pp0;
int i1 = **(int **)pp1;
if(i0 > i1)return -1;
if(i0 < i1)return 1;
return 0;
}
/* ... */
int *pa = malloc(...); /* array of pointers */
int ta; /* temp value for a */
char *tb; /* temp value for b */
/* ... */
/* initialize array of pointers to a[] */
for(i = 0; i < sizeof(a)/sizeof(a[0]); i++)
pa[i] = &a[i];
/* sort array of pointers */
qsort(pa, sizeof(a)/sizeof(a[0]), sizeof(pa[0]), compare);
/* reorder a[] and b[] according to the array of pointers */
for(i = 0; i < sizeof(a)/sizeof(a[0]); i++){
if(i != pa[i]-a){
ta = a[i];
tb = b[i];
k = i;
while(i != (j = pa[k]-a)){
a[k] = a[j];
b[k] = b[j];
pa[k] = &a[k];
k = j;
}
a[k] = ta;
b[k] = tb;
pa[k] = &a[k];
}
}

Removing Duplicates from an Array using C [duplicate]

This question already has answers here:
Algorithm: efficient way to remove duplicate integers from an array
(34 answers)
Closed 8 years ago.
I want small clarification in array concept in C.
I have array:
int a[11]={1,2,3,4,5,11,11,11,11,16,16};
I want result like this:
{1,2,3,4,5,11,16}
Means I want remove duplicates.
How is it possible?
You can't readily resize arrays in C - at least, not arrays as you've declared that one. Clearly, if the data is in sorted order, it is straight-forward to copy the data to the front of the allocated array and treat it as if it was of the correct smaller size (and it is a linear O(n) algorithm). If the data is not sorted, it gets messier; the trivial algorithm is quadratic, so maybe a sort (O(N lg N)) followed by the linear algorithm is best for that.
You can use dynamically allocated memory to manage arrays. That may be beyond where you've reached in your studies, though.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
static int intcmp(const void *pa, const void *pb)
{
int a = *(int *)pa;
int b = *(int *)pb;
if (a > b)
return +1;
else if (a < b)
return -1;
else
return 0;
}
static int compact(int *array, int size)
{
int i;
int last = 0;
assert(size >= 0);
if (size <= 0)
return size;
for (i = 1; i < size; i++)
{
if (array[i] != array[last])
array[++last] = array[i];
}
return(last + 1);
}
static void print(int *array, int size, const char *tag, const char *name)
{
int i;
printf("%s\n", tag);
for (i = 0; i < size; i++)
printf("%s[%d] = %d\n", name, i, array[i]);
}
int main(void)
{
int a[11] = {1,2,3,4,5,11,11,11,11,16,16};
int a_size = sizeof(a) / sizeof(a[0]);
print(a, a_size, "Before", "a");
a_size = compact(a, a_size);
print(a, a_size, "After", "a");
int b[11] = {11,1,11,3,16,2,5,11,4,11,16};
int b_size = sizeof(b) / sizeof(b[0]);
print(b, b_size, "Before", "b");
qsort(b, b_size, sizeof(b[0]), intcmp);
print(b, b_size, "Sorted", "b");
b_size = compact(b, b_size);
print(b, b_size, "After", "b");
return 0;
}
#define arraysize(x) (sizeof(x) / sizeof(x[0])) // put this before main
int main() {
bool duplicate = false;
int a[11] = {1,2,3,4,5,11,11,11,11,16,16}; // doesnt have to be sorted
int b[11];
int index = 0;
for(int i = 0; i < arraysize(a); i++) { // looping through the main array
for(int j = 0; j < index; j++) { // looping through the target array where we know we have data. if we haven't found anything yet, this wont loop
if(a[i] == b[j]) { // if the target array contains the object, no need to continue further.
duplicate = true;
break; // break from this loop
}
}
if(!duplicate) { // if our value wasn't found in 'b' we will add this non-dublicate at index
b[index] = a[i];
index++;
}
duplicate = false; // restart
}
// optional
int c[index]; // index will be the number of objects we have in b
for(int k = 0; k < index; k++) {
c[k] = b[k];
}
}
If you really have to you can create a new array where that is the correct size and copy this into it.
As you can see, C is a very basic (but powerful) language and if you can, use a vector to but your objects in instead (c++'s std::vector perhaps) which can easily increase with your needs.
But as long as you only use small numbers of integers you shouldn't loose to much. If you have big numbers of data, you can always allocate the array on the heap with "malloc()" and pick a smaller size (maybe half the size of the original source array) that you then can increase (using realloc()) as you add more objects to it. There is some downsides reallocating the memory all the time as well but it is a decision you have to make - fast but allocation more data then you need? or slower and having the exact number of elements you need allocated (which you really cant control since malloc() might allocate more data then you need in some cases).
//gcc -Wall q2.cc -o q2 && q2
//Write a program to remove duplicates from a sorted array.
/*
The basic idea of our algorithm is to compare 2 adjacent values and determine if they
are the same. If they are not the same and we weren't already looking previusly at adjacent pairs
that were the same, then we output the value at the current index. The algorithm does everything
in-place and doesn't allocate any new memory. It outputs the unique values into the input array.
*/
#include <stdio.h>
#include <assert.h>
int remove_dups(int *arr, int n)
{
int idx = 0, odx = -1;
bool dup = false;
while (idx < n)
{
if (arr[idx] != arr[idx+1])
{
if (dup)
dup = false;
else
{
arr[++odx] = arr[idx];
}
} else
dup = true;
idx++;
}
return (odx == -1) ? -1 : ++odx;
}
int main(int argc, char *argv[])
{
int a[] = {31,44,44,67,67,99,99,100,101};
int k = remove_dups(a,9);
assert(k == 3);
for (int i = 0;i<k;i++)
printf("%d ",a[i]);
printf("\n\n");
int b[] = {-5,-3,-2,-2,-2,-2,1,3,5,5,18,18};
k = remove_dups(b,12);
assert(k == 4);
for (int i = 0;i<k;i++)
printf("%d ",b[i]);
printf("\n\n");
int c[] = {1,2,3,4,5,6,7,8,9};
k = remove_dups(c,9);
assert(k == 9);
for (int i = 0;i<k;i++)
printf("%d ",c[i]);
return 0;
}
you should create a new array and you should check the array if contains the element you want to insert before insert new element to it.
The question is not clear. Though, if you are trying to remove duplicates, you can use nested 'for' loops and remove all those values which occur more than once.
C does not have a built in data type that supports what you want -- you would need to create your own.
int a[11]={1,2,3,4,5,11,11,11,11,16,16};
As this array is sorted array, you can achieve very easily by following code.
int LengthofArray = 11;
//First elemnt can not be a duplicate so exclude the same and start from i = 1 than 0.
for(int i = 1; i < LengthofArray; i++);
{
if(a[i] == a[i-1])
RemoveArrayElementatIndex(i);
}
//function is used to remove the elements in the same as index passed to remove.
RemoveArrayElementatIndex(int i)
{
int k = 0;
if(i <=0)
return;
k = i;
int j =1; // variable is used to next item(offset) in the array from k.
//Move the next items to the array
//if its last item then the length of the array is updated directly, eg. incase i = 10.
while((k+j) < LengthofArray)
{
if(a[k] == a[k+j])
{
//increment only j , as another duplicate in this array
j = j +1 ;
}
else
{
a[k] = a[k+j];
//increment only k , as offset remains same
k = k + 1;
}
}
//set the new length of the array .
LengthofArray = k;
}
You could utilise qsort from stdlib.h to ensure your array is sorted into ascending order to remove the need for a nested loop.
Note that qsort requires a pointer to a function (int_cmp in this instance), i've included it below.
This function, int_array_unique returns the duplicate free array 'in-place' i.e. it overwrites the original and returns the length of the duplicate free array via the pn pointer
/**
* Return unique version of int array (duplicates removed)
*/
int int_array_unique(int *array, size_t *pn)
{
size_t n = *pn;
/* return err code 1 if a zero length array is passed in */
if (n == 0) return 1;
int i;
/* count the no. of unique array values */
int c=0;
/* sort input array so any duplicate values will be positioned next to each
* other */
qsort(array, n, sizeof(int), int_cmp);
/* size of the unique array is unknown at this point, but the output array
* can be no larger than the input array. Note, the correct length of the
* data is returned via pn */
int *tmp_array = calloc(n, sizeof(int));
tmp_array[c] = array[0];
c++;
for (i=1; i<n; i++) {
/* true if consecutive values are not equal */
if ( array[i] != array[i-1]) {
tmp_array[c] = array[i];
c++;
}
}
memmove(array, tmp_array, n*sizeof(int));
free(tmp_array);
/* set return parameter to length of data (e.g. no. of valid integers not
* actual allocated array length) of the uniqe array */
*pn = c;
return 0;
}
/* qsort int comparison function */
int int_cmp(const void *a, const void *b)
{
const int *ia = (const int *)a; // casting pointer types
const int *ib = (const int *)b;
/* integer comparison: returns negative if b > a
and positive if a > b */
return *ia - *ib;
}
Store the array element with small condition into new array
**just run once 100% will work
!)store the first value into array
II)store the another element check with before stored value..
III)if it exists leave the element--and check next one and store
here the below code run this u will understand better
int main()
{
int a[10],b[10],i,n,j=0,pos=0;
printf("\n enter a n value ");
scanf("%d",&n);
printf("\n enter a array value");
for(i=0;i<n;i++)
{
scanf("%d",&a[i]);//gets the arry value
}
for(i=0;i<n;i++)
{
if(check(a[i],pos,b)==0)//checks array each value its exits or not
{
b[j]=a[i];
j++;
pos++;//count the size of new storing element
}
}
printf("\n after updating array");
for(j=0;j<pos;j++)
{
printf("\n %d",b[j]);
} return 0;
}
int check(int x,int pos,int b[])
{ int m=0,i;
for(i=0;i<pos;i++)//checking the already only stored element
{
if(b[i]==x)
{
m++; //already exists increment the m value
}
}
return m;
}

Resources