I have to implement a function in C with the following signature: int *unici(const int *vec, size_t size, size_t *newsize), where vec is a const array of ints, size is the size of the array, and *newsize is the size of the new array without duplicates.
This function has to create a new array on the heap and put the values from vec into the array without duplicates.
Example: If vec is [2, 4, 5, 4, 5, 5, 7, 9], size is 8, then the new array should be [2, 4, 5, 7, 9] with *newsize equal to 5.
I tried to implement it, but I don't know how to remove duplicates and put into new array.
int cmpfunc(const void * a, const void * b)
{
return (*(int*)a - *(int*)b);
}
int *unici(const int *vec, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
qsort(vec, size, sizeof(int), cmpfunc);
size_t count = 0;
for (size_t i = 0; i < size; i++)
{
//finding for duplicates
if (vec[i] == vec[i + 1])
count++;
}
*newsize = size - count;
int *tmp = malloc(*newsize * sizeof(int));
//now I've to put in tmp values from vec without duplicates
}
EDIT: that's my solution, I figured out, by the way, thank you all!
int *unici(const int *vec, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
int *tmp = malloc(size * sizeof(int));
for (size_t i = 0; i < size; i++)
{
tmp[i] = vec[i];
}
for (size_t i = 0; i < size; i++)
{
for (size_t j = i + 1; j < size; j++)
{
if (tmp[i] == tmp[j])
{
for (size_t k = j; k<size; k++)
{
tmp[k] = tmp[k + 1];
}
size--;
j--;
}
}
}
*newsize = size;
return tmp;
}
There are two basic approaches.
Duplicate the original array. Sort the elements in the new array, and use a loop to keep only the first one of any runs (more than one same value):
int *result; /* This is the duplicate array; sorted */
size_t i = 0; /* Loop index */
size_t n = 0; /* Unique elements in the duplicate array */
while (i < size) {
const int c = result[i++];
/* Skip if there are more than one consecutive c */
while (i < size && c == result[i])
i++;
/* Copy the unique elements back to the beginning
of the array. */
result[n++] = c;
}
You can, if you want, reallocate result to n * sizeof result[0] bytes.
Store the number of unique elements n to *newsize, and return result.
Allocate the result array, but don't bother copying the values yet. Instead of sorting (to make duplicate values consecutive), use a double loop to check whether each value is unique (already in the result array) or not, and only copy the unique ones to the result array:
int *result; /* Allocated for 'size' elements */
size_t i, j; /* Loop indexes */
size_t n = 0; /* Unique elements in the duplicate array */
for (i = 0; i < size; i++) {
/* Find first duplicate in result. */
for (j = 0; j < n; j++)
if (result[j] == vec[i])
break;
/* If no duplicates found, add to result. */
if (j >= n)
result[n++] = vec[i];
}
You can, if you want, reallocate result to n * sizeof result[0] bytes.
Store the number of unique elements n to *newsize, and return result.
Which one is the better approach, depends on how the result set is used, and whether it is useful for it to be in sorted order. If sorted order is useful, or if speed is important and the order does not matter, the sort approach is likely better.
(The efficiency of the sort approach depends on the efficiency of the sort function. Many sort functions are known that have O(size × log size) time complexity; for a truly huge amount of data, an O(size) radix sort can be used (because the number of values is known beforehand). Note that a radix sort will only beat other sorts for very large sizes, typically in the millions.)
In some cases, it might be important that the result set is in the same order as the vec was, but with the duplicates removed. Then, the second approach is the obvious choice. Its time complexity is O(size × n), which means that it slows down the larger the array and the set of unique elements are.
First of all you don't want const int* in unici it throws the warning
passing argument 1 of ‘qsort’ discards ‘const’ qualifier from pointer target type
And then we go on, to allocating memory for each of the integers that are non-duplicate.
int *unici(int *v, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
qsort(v, size, sizeof(int), cmpfunc);
int *temp = malloc(sizeof *temp);
if (temp == NULL){
perror("Error ");
exit(EXIT_FAILURE);
}
(*newsize)= 0;
temp[(*newsize)++] = v[0];
for (size_t i = 1; i < size; i++)
{
if (v[i] != v[i-1]){
int *tt = realloc(temp,(*newsize+1)*sizeof *tt);
if (tt == NULL){
perror("Error ");
exit(EXIT_FAILURE);
}
temp = tt;
temp[(*newsize)++] = v[i];
}
}
return temp;
}
There are two points with this code
Here the array is supposed to be constant. So it must do duplicate of the array and then do the sorting and then eliminate duplicates and resize again as per the unique elements present. This is having the benefit that the const removal as mentioned - you don't have to do that.
In my code I have used reallocate for each of the elements - which is an overkill. So what to do then? Well as said earlier we will allocate it to maximum size and then reduce the size to the unique list. That again leaves us to debate should we reduce it every time? (Suppose 1-2 positions left out.) Well then it is not so much needed to resize but well one can do that. It's implementor's choice to some extent.
The idea of this simple shrinking is being implemented here:-
int *unici(const int *vv, size_t size, size_t *newsize)
{
if (size == 0)
return NULL;
int *v = malloc(sizeof *v * size);
if (v == NULL){
perror("Error in malloc");
exit(EXIT_FAILURE);
}
memcpy(v, vv, size*sizeof*v);
qsort(v, size, sizeof(int), cmpfunc);
(*newsize)= 0;
int last = v[0];
for (size_t i = 1; i < size; i++)
{
if (v[i] != last){
v[(*newsize)++] = last;
last = v[i];
}
}
v[(*newsize)++] = v[size-1];
int *temp = realloc(v, (*newsize)*sizeof *v);
if (temp == NULL){
perror("Error in realloc");
exit(EXIT_FAILURE);
}
v = temp;
return v;
}
Since you sorted your vector it should be easy. Iterate over the vector.. and copy values if they don't match with the previous value. Something like this (not verified compilable code):
size_t src;
size_t dst;
for (src = 0, dst = 0; src < size; src++)
{
// skip check for first element; compare with previous and if they are the same just move on
if (src > 0 && vec[src] == vec[src - 1])
continue;
tmp[dst] = vec[src];
dst++;
}
Related
The title may be a little confusing so I will explain my goal a bit more in detail here;
I want to write a code that takes an input array of the x numbers
index_x = [0,0,1,0,1,0,0,0,1,0,0,1,0];
Then I want to write a logic that will generate an array that has the index of all the 1s
index_ones = [3,5,9,12];
Current code slightly modified from Jacon:
Edit 1:
#include <stdio.h>
int index_x[] = {0,0,1,0,1,0,0,0,1,0,0,1,0}; // any number of elements
int len = 12;
int main(void) {
int arr[len];
int j = 0;
for (int i = 0; i < len; i++)
if (index_x[i])
arr[j++] = i; // save and advance j only if value is "1"
for (int i = 0; i < j; i++) // only print below j !
printf("%d\n", arr[i]);
}
Output:
2
4
8
11
From this output, I would like to generate another array that is the difference between these elements. In this case the new array would be {2,4,3}.
Edit 2: I will move this to another thread as the discussion has now moved from one problem to another. Do not want to complicate things for future users.
As you do not know how many indexes you need you will need to allocate the memory dynamically. You need also to remember the number of indexes
struct index
{
size_t size;
size_t indexes[];
};
struct index *addIndex(struct index *index, size_t pos)
{
size_t new_size = index ? index -> size + 1 : 1;
index = realloc(index, sizeof(*index) + new_size * sizeof(index -> indexes[0]));
if(index)
{
index -> size = new_size;
index -> indexes[new_size - 1] = pos;
}
return index;
}
struct index *buildIndex(int *arr, size_t arr_size, int val)
{
struct index *index = NULL, *tmp;
for(size_t pos = 0; pos < arr_size; pos++)
{
if(arr[pos] == val)
{
tmp = addIndex(index, pos);
if(tmp) index = tmp;
else { /* error handling */ }
}
}
return index;
}
Find array element count of index_x[].
int index_x[] = {0,0,1,0,1,0,0,0,1,0,0,1,0};
...
// e.g. 13, not 12 as implied with int len = 12;
size_t index_x_count = sizeof index_x / sizeof index_x[0];
Then run through index_x[] to find numbers of the ones.
size_t ones = 0;
for (size_t i = 0; i < index_x_count; i++) {
if (index_x[i] == 1) {
ones++;
}
}
Now we know the size needed for "generate an array that has the index of all the 1s"
// Error check, can't have array size 0
if (ones == 0) Handle_patholocial_case();
// We should avoid forming huge arrays. 10000 is arbitrary.
// When large, rather than form _arrays_, allocate memory (not shown).
if (ones > 10000) Handle_patholocial_case();
// Form the array
int arr[ones];
size_t a = 0;
for (size_t i = 0; i < ones; i++) {
if (index_x[i] == 1) {
arr[a++] = i;
}
}
generate another array that is the difference between these elements.
// Error check for at least one difference, can't have array size 0
if (ones <= 1) Handle_patholocial_case();
// Form the array
size_t diff_count = ones - 1;
int diff[diff_count];
for (size_t d = 0; d < diff_count; d++) {
diff[a] = arr[d+1] - arr[d];
}
Do something with diff[]
for (size_t d = 0; d < diff_count; d++) {
printf("%d ", diff[d]);
}
For example:
[1,2,3] -> [2,4,6]
[9,1] -> [1,8,2]
[6,7,5] -> [1,3,5,0]
I got this question on my first tech interview yesterday (did it in C because that's my best language, so a C answer would be help more) and completely blanked :(
This is what I was thinking:
Start at the end of the array and keeping moving left
At every arr[i], multiply by 2 and see if there're 2 digits (if arr[i]/10 != 0) and if there is a left most digit, carry it over to arr[i-1] as long as a[i-1] != NULL.
I just could not figure out how to actually do this in C. I had something like:
int* multTwo(int* arr, int len) {
int *newarr; // I know i have to malloc, but not sure what size because
// wouldnt the size depend on the new number's size?
int temp, i;
for (i=len-1; i>=0; i--) {
temp = arr[i]*2;
newarr[i] = temp%2;
if(temp/10 != 0)
newarr[i-1] = temp/2;
}
return newarr;
}
But there are a lot of bugs in my code. Is there a better way or am I on the right track?
Some pseudo code. The main idea is to show the depth of C knowledge as part of the interview, not Code golf.
What signature?
// arr is not changed, use `const`
// array indexing best done with `size_t`
int* multTwo(const int* arr, size_t len) {
Size needed and show error handling. Maybe also detect arr == NULL when len > 0
need = len;
// if lead element is 5 or more, add 1.
// Error if element is not in 0-9 range
Allocate memory. Allocating to size of variable de-referenced type is less error prone, easier to review and maintain than coding the variable type. Showing maintenance concerns during a C interview is a good thing. Think if later code changed to unsigned char* multTwo(const unsigned char* arr, size_t len) {, no need to change newarr = malloc(sizeof *newarr * need).
newarr = malloc(sizeof *newarr * need)
Check allocation. An allocation of 0 is OK to return NULL. Yet maybe this routine should still allocate 1 byte, a tad wasteful, to insure a NULL return is an error. Discussing issues like with the interviewer is good. Shows you want to clearly understand the customer's need not just in the meat of the function, but the corner cases.
if (newarr == NULL && need > 0) fail()
Loop though and populate the new array much like OP coded with meaningful variable names and using unsigned array indexing.
size_t arr_i=len;
size_t newarr_i=need;
int carry = 0;
while (arr_i > 0)
sum = arr[--arr_i]*2 + carry;
newarr[--newarr_i] = sum%10;
carry = sum/10;
}
if (carry) {
newarr[--newarr_i] = carry;
}
Return newarr
Best I can think in a short time, like an interview
#include <stdio.h>
#include <stdlib.h>
void invert (int *head, int *tail)
{
int temp;
if (head < tail)
{
temp = *head;
*head = *tail;
*tail = temp;
invert(++head, --tail);
}
}
int* multTwo(int* arr, size_t len)
{
int value = 0;
int n_digits =0 ;
// CONVERT THE ARRAY TO NUMBER
while(len--)
{
value += *arr;
value *=10;
arr++;
}
value /= 10;
// DOUBLE THE NUMBER
value *= 2;
// CONVERT IT TO BUFFER
int *digits = malloc(sizeof(*digits));
while ((value>0) && (digits != NULL))
{
digits[n_digits++] = value%10;
value /= 10;
digits = realloc( digits, sizeof(*digits) * (n_digits+1) );
}
if (digits != NULL)
{
invert(digits, &digits[n_digits-1]);
printf("[ ");
for (int i=0; i<n_digits; i++)
printf("%d, ", digits[i]);
printf("]\n");
}
return digits;
}
int main(void)
{
int array[] = {6,7,5};
multTwo(array, sizeof(array)/sizeof(array[0]));
return 0;
}
I would start by looking to see if either the first digit in arr is 5 or more to check if the newarr array needs to be 1 larger than the original array.
So something like this for initialization:
int* newarr;
int newlen;
if (*arr >= 5)
newlen = len + 1;
else
newlen = len;
newarr = (int*)malloc(sizeof(int) * newlen);
memset(newarr, 0, newlen); //initialize all newarr values to 0
Now obviously we have to do our multiplication now. To get the 1's digit we do use the modulo operator %, and to get the 10's digit we use the division operator /. Of course we only need to do the division if our multiplied value is 10 or greater. So our loop to populate newarr will look something like this:
int i, temp;
for (i = 1; i <= len; i++) {
temp = *(arr + i - 1) * 2;
if (temp < 10) {
*(newarr + i - 1) += temp;
}
else {
*(newarr + i - 1) += temp / 10; //inset 10's digit
*(newarr + i) += temp % 10; //inset 1's digit
}
}
So our full function ends up being
#include <stdlib.h>
#include <string.h>
int* multTwo(int* arr, int len)
{
int* newarr;
int newlen;
if (*arr >= 5)
newlen = len + 1;
else
newlen = len;
newarr = (int*)malloc(sizeof(int) * newlen);
memset(newarr, 0, newlen); //initialize all newarr values to 0
int i, temp;
for (i = 1; i <= len; i++) {
temp = *(arr + i - 1) * 2;
if (temp < 10) {
*(newarr + i - 1) += temp;
}
else {
*(newarr + i - 1) += temp / 10; //insert 10's digit
*(newarr + i) += temp % 10; //inset 1's digit
}
}
return newarr; //don't forget to free once you're done with newarr!
}
I have an integer array that I need to sort containing unix times. I was going to use qsort to sort it which is fairly trivial. However I also have an array of "strings" that needs to remain in the same order as the integer array.
So position 2 in the integer array would correspond with an element in position two of the other array.
Is there anyway using qsort to maintain such a relationship?
Do it like this
#include <stdlib.h>
#include <stdio.h>
struct Data
{
long int time;
const char *string;
};
int
datacmp(const void *const x, const void *const y)
{
return ((struct Data *) x)->time - ((struct Data *) y)->time;
}
int
main(void)
{
struct Data array[] = {
{1234, "1234 Text"},
{1034, "1034 Text"},
{1041, "1041 Text"}
};
size_t count;
count = sizeof(array) / sizeof(array[0]);
for (size_t i = 0 ; i < count ; ++i)
{
fprintf(stderr, "Entry %zu:\n\ttime : %ld\n\tstring: %s\n\n",
i, array[i].time, array[i].string);
}
fprintf(stderr, "\n");
qsort(array, count, sizeof(array[0]), datacmp);
fprintf(stderr, "---- Sorted array:\n");
for (size_t i = 0 ; i < count ; ++i)
{
fprintf(stderr, "Entry %zu:\n\ttime : %ld\n\tstring: %s\n\n",
i, array[i].time, array[i].string);
}
return 0;
}
A more generic solution that actually sorts 2 (or more) arrays, according to one of the arrays, by sorting an array of pointers to the key array, then reordering all of the arrays to sort them (it also restores the array of pointers back to their initial state). The compare function only needs to know the type that the pointers point to. The reorder in place takes O(n) (linear) time as every move places a value in it's final sorted location. In this example, a[] is an array of integers, b[] is an array of pointers to strings (char *).
int compare(const void *pp0, const void *pp1)
{
int i0 = **(int **)pp0;
int i1 = **(int **)pp1;
if(i0 > i1)return -1;
if(i0 < i1)return 1;
return 0;
}
/* ... */
int *pa = malloc(...); /* array of pointers */
int ta; /* temp value for a */
char *tb; /* temp value for b */
/* ... */
/* initialize array of pointers to a[] */
for(i = 0; i < sizeof(a)/sizeof(a[0]); i++)
pa[i] = &a[i];
/* sort array of pointers */
qsort(pa, sizeof(a)/sizeof(a[0]), sizeof(pa[0]), compare);
/* reorder a[] and b[] according to the array of pointers */
for(i = 0; i < sizeof(a)/sizeof(a[0]); i++){
if(i != pa[i]-a){
ta = a[i];
tb = b[i];
k = i;
while(i != (j = pa[k]-a)){
a[k] = a[j];
b[k] = b[j];
pa[k] = &a[k];
k = j;
}
a[k] = ta;
b[k] = tb;
pa[k] = &a[k];
}
}
I'm writing a quicksort algorithm to sort an array of strings.
The problem is that my array with the data seem to be overwritten with something right after i allocate the right and left quicksort arrays, because i print the array and its all there, but after i use malloc to allocate the others arrays, i print it again and i'm missing some elements.
Here's the output:
Pivot: 2
Emma, Olivia, Victoria, Gwyneth, Chloe, Hayley, Scarlett,
Emma, Olivia, Victoria, Gwyneth, , , ,
Anyone knows whats happening? What am missing?
char **concatenate(char **array1, int n1, char *pivot, char **array2, int n2, int len){
int i=0, j=0;
int elements = n1 + n2 + 1;
// alocating array
char **concat = (char**) malloc(sizeof(*concat) * elements);
concat[0] = (char*) malloc(sizeof(*concat) * elements * len);
for(i=1; i<elements; i++)
concat[i] = &(concat[0][i*len]);
// concatenating
for(i=0; i<n1; i++)
concat[i] = array1[i];
concat[i++] = pivot;
for(j=0; j<n2; j++)
concat[i++] = array2[j];
// returning
return concat;
}
char **quicksort(char **array, int elements, int len){
// array is already sorted
if(elements < 2)
return array;
int pivot;
int i=0, l=0, r=0;
// selecting the pivot (median)
if(elements % 2 == 0)
pivot = ((elements + 1) / 2) -1;
else
pivot = (elements / 2) -1;
//REMOVE
printf("Pivot: %d\n", pivot);
for(i=0; i<elements; i++)
printf("%s, ", array[i]);
printf("\n");
// alocating arrays
char **left = (char**) malloc(sizeof(*left) * pivot);
left[0] = (char*) malloc(sizeof(*left) * pivot * len);
for(i=1; i<pivot; i++)
left[i] = &(left[0][i*len]);
char **rigth = (char**) malloc(sizeof(*rigth) * pivot);
rigth[0] = (char*) malloc(sizeof(*rigth) * pivot * len);
for(i=1; i<pivot; i++)
rigth[i] = &(rigth[0][i*len]);
//REMOVE
for(i=0; i<elements; i++)
printf("%s, ", array[i]);
printf("\n");
//quicksorting
for(i=0; i<elements; i++){
if(array[i] == array[pivot])
continue;
int comp = strcmp(array[i], array[pivot]);
//REMOVE
printf("%d: strcmp %s, %s is %d\n", i, array[i], array[pivot], comp);
if(comp < pivot)
left[l++] = array[i];
else
rigth[r++] = array[i];
}
//REMOVE
printf("concatenate(");
for(i=0; i<l; i++)
printf("%s ", left[i]);
printf("|%s| ", array[pivot]);
for(i=0; i<r; i++)
printf("%s ", rigth[i]);
printf(")\n");
// recursion and return
return concatenate(quicksort(left, l, len), l, array[pivot], quicksort(rigth, r, len), r, len);
}
int main(int argc, char *argv[]){
int i, j, aux;
char **teste = (char**) malloc(sizeof(*teste) * 7);
teste[0] = (char*) malloc(sizeof(*teste) * 7 * 128);
for(i=1; i<7; i++)
teste[i] = &(teste[0][i*128]);
teste[0] = "Emma";
teste[1] = "Olivia";
teste[2] = "Victoria";
teste[3] = "Gwyneth";
teste[4] = "Chloe";
teste[5] = "Hayley";
teste[6] = "Scarlett";
quicksort(teste, 7, 128);
printf("AFTER\n");
for(i=0; i<7; i++)
printf("%s, ", teste[i]);
printf("\n");
return 0;
}
There is zero reason to allocate for quicksort, and in fact the function can easily suffice in your case with a simple interface of quicksort(char *arr[], unsigned int len), using pointer-math for the subsequence invocations.
Provide a swap algorithm for exchanging pointers:
void swap_str_ptrs(char const **arg1, char const **arg2)
{
const char *tmp = *arg1;
*arg1 = *arg2;
*arg2 = tmp;
}
Then the algorithm is:
void quicksort_strs(char const *args[], unsigned int len)
{
unsigned int i, pvt=0;
if (len <= 1)
return;
// swap a randomly selected value to the last node
swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);
// reset the pivot index to zero, then scan
for (i=0;i<len-1;++i)
{
if (strcmp(args[i], args[len-1]) < 0)
swap_str_ptrs(args+i, args+pvt++);
}
// move the pivot value into its place
swap_str_ptrs(args+pvt, args+len-1);
// and invoke on the subsequences. does NOT include the pivot-slot
quicksort_strs(args, pvt++);
quicksort_strs(args+pvt, len - pvt);
}
Thats everything. including the partitioning.
How It Works
There are two general recursive quicksort algorithms: the squeeze, and the sweep. This is the sweep algorithm. We march up the sequence, swapping any element "less" than than pivot value (which is swapped to the end of the sequence before the loop starts) to a target slot, the index of which is initially the beginning of the sequence and increases with each swap operation. When the "sweep" is finished, the pvt index is where the pivot value belongs, as everything below that slot is "less" than the that value. So one more swap is made to put the pivot value into position. After that we have two partitions, which are recursed. It is vital that the slot we just identified as the pivot location is not included in either of those partitions. It is the only value we know is in its final resting place.
Test Harnass
Including the above code, we test this with a basic set of strings purposely out of order:
void print_list(char const *args[], unsigned len)
{
unsigned i=0;
for (;i<len;++i)
puts(args[i]);
}
int main()
{
char const *args[] =
{
"this", "is", "a", "test", "of", "quicksort", "with", "strings"
};
srand((unsigned)time(NULL));
quicksort_strs(args, sizeof(args)/sizeof(*args));
print_list(args, sizeof(args)/sizeof(*args));
return 0;
}
Output
a
is
of
quicksort
strings
test
this
with
Non-recursive implementation
It should be noted that the above algorithm lends itself beautifully to a non-recursive implementation. A local dynamic stack is used for holding pairs of data: an pointer and a length. Optimized to not push trivial segments (segments of length 1 or 0) on to the stack, one implementation would like like this:
void quicksort_strs(char const *args[], unsigned int len)
{
// holds our non-recursive stack of segments
struct segment
{
char const **arr;
unsigned int len;
struct segment* next;
} *stack = NULL;
stack = malloc(sizeof(*stack));
stack->arr = args;
stack->len = len;
stack->next = NULL;
while (stack != NULL)
{
unsigned int i, pvt=0;
struct segment *tmp = stack;
stack = stack->next;
// pull values and delete segment record
args = tmp->arr;
len = tmp->len;
free(tmp);
// nothing to unary segments
if (len <= 1)
continue;
// swap a randomly selected value to the last node
swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);
// reset the pivot index to zero, then scan
for (i=0;i<len-1;++i)
{
if (strcmp(args[i], args[len-1]) < 0)
swap_str_ptrs(args+i, args+pvt++);
}
// move the pivot value into its place
swap_str_ptrs(args+pvt, args+len-1);
// lhs segment push
if (pvt > 1)
{
tmp = malloc(sizeof(*tmp));
tmp->arr = args;
tmp->len = pvt;
tmp->next = stack;
stack = tmp;
}
// rhs segment push
if ((len - ++pvt) > 1)
{
tmp = malloc(sizeof(*tmp));
tmp->arr = args+pvt;
tmp->len = len-pvt;
tmp->next = stack;
stack = tmp;
}
}
}
Obviously having a canned node-stack implementation would shorten this up considerably, but the idea should be readily apparent. A realloc() schema for holding nodes on the end of the "stack" rather than the beginning would be equally interesting, as it would eliminate the need to next pointer management, replaced with a top index instead.
Anyway, good luck, and I hope it helps.
This question already has answers here:
Algorithm: efficient way to remove duplicate integers from an array
(34 answers)
Closed 8 years ago.
I want small clarification in array concept in C.
I have array:
int a[11]={1,2,3,4,5,11,11,11,11,16,16};
I want result like this:
{1,2,3,4,5,11,16}
Means I want remove duplicates.
How is it possible?
You can't readily resize arrays in C - at least, not arrays as you've declared that one. Clearly, if the data is in sorted order, it is straight-forward to copy the data to the front of the allocated array and treat it as if it was of the correct smaller size (and it is a linear O(n) algorithm). If the data is not sorted, it gets messier; the trivial algorithm is quadratic, so maybe a sort (O(N lg N)) followed by the linear algorithm is best for that.
You can use dynamically allocated memory to manage arrays. That may be beyond where you've reached in your studies, though.
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
static int intcmp(const void *pa, const void *pb)
{
int a = *(int *)pa;
int b = *(int *)pb;
if (a > b)
return +1;
else if (a < b)
return -1;
else
return 0;
}
static int compact(int *array, int size)
{
int i;
int last = 0;
assert(size >= 0);
if (size <= 0)
return size;
for (i = 1; i < size; i++)
{
if (array[i] != array[last])
array[++last] = array[i];
}
return(last + 1);
}
static void print(int *array, int size, const char *tag, const char *name)
{
int i;
printf("%s\n", tag);
for (i = 0; i < size; i++)
printf("%s[%d] = %d\n", name, i, array[i]);
}
int main(void)
{
int a[11] = {1,2,3,4,5,11,11,11,11,16,16};
int a_size = sizeof(a) / sizeof(a[0]);
print(a, a_size, "Before", "a");
a_size = compact(a, a_size);
print(a, a_size, "After", "a");
int b[11] = {11,1,11,3,16,2,5,11,4,11,16};
int b_size = sizeof(b) / sizeof(b[0]);
print(b, b_size, "Before", "b");
qsort(b, b_size, sizeof(b[0]), intcmp);
print(b, b_size, "Sorted", "b");
b_size = compact(b, b_size);
print(b, b_size, "After", "b");
return 0;
}
#define arraysize(x) (sizeof(x) / sizeof(x[0])) // put this before main
int main() {
bool duplicate = false;
int a[11] = {1,2,3,4,5,11,11,11,11,16,16}; // doesnt have to be sorted
int b[11];
int index = 0;
for(int i = 0; i < arraysize(a); i++) { // looping through the main array
for(int j = 0; j < index; j++) { // looping through the target array where we know we have data. if we haven't found anything yet, this wont loop
if(a[i] == b[j]) { // if the target array contains the object, no need to continue further.
duplicate = true;
break; // break from this loop
}
}
if(!duplicate) { // if our value wasn't found in 'b' we will add this non-dublicate at index
b[index] = a[i];
index++;
}
duplicate = false; // restart
}
// optional
int c[index]; // index will be the number of objects we have in b
for(int k = 0; k < index; k++) {
c[k] = b[k];
}
}
If you really have to you can create a new array where that is the correct size and copy this into it.
As you can see, C is a very basic (but powerful) language and if you can, use a vector to but your objects in instead (c++'s std::vector perhaps) which can easily increase with your needs.
But as long as you only use small numbers of integers you shouldn't loose to much. If you have big numbers of data, you can always allocate the array on the heap with "malloc()" and pick a smaller size (maybe half the size of the original source array) that you then can increase (using realloc()) as you add more objects to it. There is some downsides reallocating the memory all the time as well but it is a decision you have to make - fast but allocation more data then you need? or slower and having the exact number of elements you need allocated (which you really cant control since malloc() might allocate more data then you need in some cases).
//gcc -Wall q2.cc -o q2 && q2
//Write a program to remove duplicates from a sorted array.
/*
The basic idea of our algorithm is to compare 2 adjacent values and determine if they
are the same. If they are not the same and we weren't already looking previusly at adjacent pairs
that were the same, then we output the value at the current index. The algorithm does everything
in-place and doesn't allocate any new memory. It outputs the unique values into the input array.
*/
#include <stdio.h>
#include <assert.h>
int remove_dups(int *arr, int n)
{
int idx = 0, odx = -1;
bool dup = false;
while (idx < n)
{
if (arr[idx] != arr[idx+1])
{
if (dup)
dup = false;
else
{
arr[++odx] = arr[idx];
}
} else
dup = true;
idx++;
}
return (odx == -1) ? -1 : ++odx;
}
int main(int argc, char *argv[])
{
int a[] = {31,44,44,67,67,99,99,100,101};
int k = remove_dups(a,9);
assert(k == 3);
for (int i = 0;i<k;i++)
printf("%d ",a[i]);
printf("\n\n");
int b[] = {-5,-3,-2,-2,-2,-2,1,3,5,5,18,18};
k = remove_dups(b,12);
assert(k == 4);
for (int i = 0;i<k;i++)
printf("%d ",b[i]);
printf("\n\n");
int c[] = {1,2,3,4,5,6,7,8,9};
k = remove_dups(c,9);
assert(k == 9);
for (int i = 0;i<k;i++)
printf("%d ",c[i]);
return 0;
}
you should create a new array and you should check the array if contains the element you want to insert before insert new element to it.
The question is not clear. Though, if you are trying to remove duplicates, you can use nested 'for' loops and remove all those values which occur more than once.
C does not have a built in data type that supports what you want -- you would need to create your own.
int a[11]={1,2,3,4,5,11,11,11,11,16,16};
As this array is sorted array, you can achieve very easily by following code.
int LengthofArray = 11;
//First elemnt can not be a duplicate so exclude the same and start from i = 1 than 0.
for(int i = 1; i < LengthofArray; i++);
{
if(a[i] == a[i-1])
RemoveArrayElementatIndex(i);
}
//function is used to remove the elements in the same as index passed to remove.
RemoveArrayElementatIndex(int i)
{
int k = 0;
if(i <=0)
return;
k = i;
int j =1; // variable is used to next item(offset) in the array from k.
//Move the next items to the array
//if its last item then the length of the array is updated directly, eg. incase i = 10.
while((k+j) < LengthofArray)
{
if(a[k] == a[k+j])
{
//increment only j , as another duplicate in this array
j = j +1 ;
}
else
{
a[k] = a[k+j];
//increment only k , as offset remains same
k = k + 1;
}
}
//set the new length of the array .
LengthofArray = k;
}
You could utilise qsort from stdlib.h to ensure your array is sorted into ascending order to remove the need for a nested loop.
Note that qsort requires a pointer to a function (int_cmp in this instance), i've included it below.
This function, int_array_unique returns the duplicate free array 'in-place' i.e. it overwrites the original and returns the length of the duplicate free array via the pn pointer
/**
* Return unique version of int array (duplicates removed)
*/
int int_array_unique(int *array, size_t *pn)
{
size_t n = *pn;
/* return err code 1 if a zero length array is passed in */
if (n == 0) return 1;
int i;
/* count the no. of unique array values */
int c=0;
/* sort input array so any duplicate values will be positioned next to each
* other */
qsort(array, n, sizeof(int), int_cmp);
/* size of the unique array is unknown at this point, but the output array
* can be no larger than the input array. Note, the correct length of the
* data is returned via pn */
int *tmp_array = calloc(n, sizeof(int));
tmp_array[c] = array[0];
c++;
for (i=1; i<n; i++) {
/* true if consecutive values are not equal */
if ( array[i] != array[i-1]) {
tmp_array[c] = array[i];
c++;
}
}
memmove(array, tmp_array, n*sizeof(int));
free(tmp_array);
/* set return parameter to length of data (e.g. no. of valid integers not
* actual allocated array length) of the uniqe array */
*pn = c;
return 0;
}
/* qsort int comparison function */
int int_cmp(const void *a, const void *b)
{
const int *ia = (const int *)a; // casting pointer types
const int *ib = (const int *)b;
/* integer comparison: returns negative if b > a
and positive if a > b */
return *ia - *ib;
}
Store the array element with small condition into new array
**just run once 100% will work
!)store the first value into array
II)store the another element check with before stored value..
III)if it exists leave the element--and check next one and store
here the below code run this u will understand better
int main()
{
int a[10],b[10],i,n,j=0,pos=0;
printf("\n enter a n value ");
scanf("%d",&n);
printf("\n enter a array value");
for(i=0;i<n;i++)
{
scanf("%d",&a[i]);//gets the arry value
}
for(i=0;i<n;i++)
{
if(check(a[i],pos,b)==0)//checks array each value its exits or not
{
b[j]=a[i];
j++;
pos++;//count the size of new storing element
}
}
printf("\n after updating array");
for(j=0;j<pos;j++)
{
printf("\n %d",b[j]);
} return 0;
}
int check(int x,int pos,int b[])
{ int m=0,i;
for(i=0;i<pos;i++)//checking the already only stored element
{
if(b[i]==x)
{
m++; //already exists increment the m value
}
}
return m;
}