This function f is to find common elements in an array and return result array and i am using 4 four loops to accomplish this task which i feel is no the best use of the loops,
Another problem is, how can i determine the size of the returned array so that my loop is within bounds
here is the code
#include <stdio.h>
#include <stdlib.h>
int *f(int first[], int second[], int size_first, int size_second);
int main(void) {
int arr1[]={1, 8, 3, 2, 6};
int arr2[]= {2, 6, 1};
int size1 = sizeof(arr1)/sizeof(arr1[0]);
int size2 = sizeof(arr2)/sizeof(arr2[0]);
int *intersection = f(arr1, arr2, size1, size2);
for(int i=0;i<3; i++){
printf("%d ", intersection[i]);
}
return 0;
}
// function to find common elements in 2 arrays
int *f(int first[], int second[], int size_first, int size_second){
int k=0, count=0;
//loop through the array to find the number common elements and store in count for dynamic memory allocation in future
for(int i=0;i<size_first;i++){
for(int j=0;j<size_second;j++){
if(first[i]==second[j]){
count ++;
}
}
}
// allocate memory for the common elements by making use of count
int * common_elements = (int*)malloc(count*sizeof(int));
// store the common elements in the new memory location
for(int i=0;i<size_first;i++){
for(int j=0;j<size_second;j++){
if(first[i]==second[j]){
common_elements[k]=first[i];
k++;
}
}
}
return common_elements;
free(common_elements);
}
If you are allowed to waste some memory, note that the intersection cannot have cardinality larger than the number of elements in the smaller set. Therefore, you can allocate more memory than you might need and avoid having to count first and allocate later.
Or, you can realloc as you go.
In general, you need a good data structure for checking set membership more quickly than scanning an entire array although for small sizes which fit in various caches, the linear scan will not perform too shabbily either.
For larger sets, however, you'll want to load the larger of the sets into an AVL tree or Scapegoat tree.
For really large data sets, you'll need to look into Bloom filters and related data structures depending on the use case.
I am including below the most naive improvement in your code which still has the nested loop and wastes memory up to the size of the smaller set to avoid counting common elements first.
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
// TODO: What about duplicates in smaller set?
int *
int_set_intersection(
const int *first,
const int *second,
const size_t size_first,
const size_t size_second,
size_t *n
)
{
size_t K = 0; // number of common elements
const int is_first_smaller = (size_first < size_second);
// Done this way so I can declare variables as consts
const int *set_smaller = is_first_smaller ? first : second;
const int *set_larger = is_first_smaller ? second : first;
const size_t size_smaller = is_first_smaller ? size_first : size_second;
const size_t size_larger = is_first_smaller ? size_second : size_first;
int *common = malloc(size_smaller * sizeof(*common));
if (!common) {
fprintf(stderr, "Failed to allocate memory for %z ints\n", size_smaller);
perror("Cannot allocate memory for common elements");
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < size_smaller; ++i) {
for (size_t j = 0; j < size_larger; ++j) {
if (set_smaller[i] == set_larger[j]) {
common[K] = set_smaller[i];
++K;
break;
}
}
}
*n = K;
return common;
}
void
int_set_print(const int *set, size_t n, FILE *f)
{
FILE *out = f ? f : stdout;
size_t i = 0;
fputs("{ ", out);
for (i = 0; i < n - 1; ++i) {
fprintf(out, "%d, ", set[i]);
}
fprintf(out, "%d }\n", set[i]);
}
int
main(void) {
int arr1[] = {1, 8, 3, 2, 6};
int arr2[] = {2, 5, 1};
size_t n = 0;
const int *intersection = int_set_intersection(
arr1,
arr2,
sizeof(arr1)/sizeof(arr1[0]),
sizeof(arr2)/sizeof(arr2[0]),
&n
);
int_set_print(intersection, n, NULL);
free(intersection); // not really needed, but good hygiene
return 0;
}
For larger arrays, one option is to sort the contents first to make it easier to check for common elements, as shown in the code below. If the original array contents cannot be changed, first copy them into dynamically allocated memory. Dynamically allocated memory is also needed to hold the list of common elements, but that can use the same storage as one of the copies.
OP's original function returns a pointer to dynamically allocated memory containing the array of common elements, but does not indicate the length of the array. I added a parameter to return the length.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int *f(int first[], int second[], int size_first, int size_second, int *size_common);
int main(void) {
int arr1[]={1, 8, 3, 2, 6};
int arr2[]= {2, 6, 1};
int size1 = sizeof(arr1)/sizeof(arr1[0]);
int size2 = sizeof(arr2)/sizeof(arr2[0]);
int size_common;
int *intersection = f(arr1, arr2, size1, size2, &size_common);
for(int i=0;i<size_common; i++){
printf("%d ", intersection[i]);
}
free(intersection);
return 0;
}
static int cmp_int(const void *ap, const void *bp) {
int a = *(const int *)ap;
int b = *(const int *)bp;
return (a > b) - (a < b);
}
// function to find common elements in 2 arrays
int *f(int first[], int second[], int size_first, int size_second,
int *size_common) {
int *copy1;
int *copy2;
int idx1;
int idx2;
int count;
// allocate memory local copies of the arrays
copy1 = malloc(size_first * sizeof (int));
copy2 = malloc(size_second * sizeof (int));
if (!copy1 || !copy2) {
// allocation error
free(copy1);
free(copy2);
*size_common = -1; // use -1 to report error
return NULL;
}
// copy the arrays
memcpy(copy1, first, size_first * sizeof (int));
memcpy(copy2, second, size_second * sizeof (int));
// sort the copies in ascending order
qsort(copy1, size_first, sizeof (int), cmp_int);
qsort(copy2, size_second, sizeof (int), cmp_int);
// find common elements
idx1 = 0;
idx2 = 0;
count = 0;
while (idx1 < size_first && idx2 < size_second) {
if (copy1[idx1] < copy2[idx2]) {
idx1++;
} else if (copy1[idx1] > copy2[idx2]) {
idx2++;
} else {
// common element found!
// use copy1[] to store common elements
copy1[count] = copy1[idx1];
count++;
idx1++;
idx2++;
}
}
// common elements are in copy1[].
// finished with copy2, so free it.
free(copy2);
if (count == 0) {
// no common elements
free(copy1); // free the memory
copy1 = NULL; // and make the function return NULL
} else {
// try to reduce memory for common elements
copy2 = realloc(copy1, count * sizeof (int));
if (copy2) {
// reallocation successful
copy1 = copy2;
} // else, never mind, copy1 is still valid
}
// return the common elements
*size_common = count;
return copy1;
}
If your arrays are of comparable elements (you use integers, which are comparable), the best way in my opinion is to sort both arrays and traverse both in parallel, looking at both sides and comparing the elements at both sides. If there's a lowest element, advance its pointer, leaving the other waiting.... if there's a match (they are equal), you can mark it (more on this later) and advance both pointers, until you reach the end in one array (the sortest). You will get the marks on the matching positions, but if you reorder the array, exchanging the found element with the first of the yet, unmatched elements, you will have all matching elements in the first positions of both arrays, letting you to return only the number of matches from the function and the matches themselves in the first positions of both arrays.
The complexity of this algorithm should be O(n*log(n)) (because of the quicksorts) if you use quicksort, plus O(n) (which doesn't affect the final O) for the matching, so O(n*log(n)) should be the big O complexity, as a general case. Below is a sample code, with a run:
comp.c
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#define N(arr) (sizeof(arr)/sizeof((arr)[0]))
void swap(int *ref_a, int *ref_b)
{
if (ref_a == ref_b)
return; /* nothing to do. */
int temp = *ref_a;
*ref_a = *ref_b;
*ref_b = temp;
}
int int_cmp(const void *_a, const void *_b)
{
const int *a = _a, *b = _b;
return *a - *b;
}
void print(int v[], int v_sz, const char *fmt, ...)
{
va_list p;
va_start(p, fmt);
vprintf(fmt, p);
va_end(p);
char *sep = "[";
for (int i = 0; i < v_sz; i++) {
printf("%s%d", sep, v[i]);
sep = ", ";
}
printf("]\n");
}
int find_matches(int a[], int b[], int a_sz, int b_sz)
{
print(a, a_sz, "a(unsorted)");
print(b, b_sz, "b(unsorted)");
qsort(a, a_sz, sizeof a[0], int_cmp);
qsort(b, b_sz, sizeof b[0], int_cmp);
print(a, a_sz, "a(sorted)");
print(b, b_sz, "b(sorted)");
int i = 0;
for (int i_a = 0, i_b = 0; i_a < a_sz && i_b < b_sz;) {
if (a[i_a] < b[i_b]) {
i_a++;
continue;
} else if (a[i_a] > b[i_b]) {
i_b++;
continue;
}
/* a[i_a] == b[i_b] */
swap(&a[i_a], &a[i]);
swap(&b[i_b], &b[i]);
print(a, a_sz, "after #%d, a:", i);
print(b, b_sz, "after #%d, b:", i);
i_a++; i_b++; i++;
}
return i;
}
int main()
{
int arr1[] = {1, 8, 3, 2, 6, 7};
int arr2[] = {2, 6, 1, 7, 4, 1, 9, 6};
int size1 = N(arr1);
int size2 = N(arr2);
int match = find_matches(arr1, arr2, size1, size2);
for (int i = 0; i < match; i++) {
printf("Match #%d: %d\n", i, arr1[i]);
}
}
It will produce:
$ comp
a(unsorted)[1, 8, 3, 2, 6, 7]
b(unsorted)[2, 6, 1, 7, 4, 1, 9, 6]
a(sorted)[1, 2, 3, 6, 7, 8]
b(sorted)[1, 1, 2, 4, 6, 6, 7, 9]
after #0, a:[1, 2, 3, 6, 7, 8]
after #0, b:[1, 1, 2, 4, 6, 6, 7, 9]
after #1, a:[1, 2, 3, 6, 7, 8]
after #1, b:[1, 2, 1, 4, 6, 6, 7, 9]
after #2, a:[1, 2, 6, 3, 7, 8]
after #2, b:[1, 2, 6, 4, 1, 6, 7, 9]
after #3, a:[1, 2, 6, 7, 3, 8]
after #3, b:[1, 2, 6, 7, 1, 6, 4, 9]
Match #0: 1
Match #1: 2
Match #2: 6
Match #3: 7
$ _
A good interface is to switch in both algorithms the matched elements with the first of the non-yet-matched elements in both arrays, so in this way you can return an integer (the one you use to know the start of the non-yet-matched elements) that tells you the number of matched elements, and you will get them from any of the two arrays.
If the elements are not comparable, and they can be just be compared for equity, then you have to compare each element with any other for a match, take them off from the arrays (this can be done swapping them with the first of the not yet matched elemnts, and advance the pointers), and start again with the reduced versions of them. Some way of doing this is, when you find a match, to exchange them with the first, second, third elements of each array, and use a variation of the above algorithm (you reorder as you match) In this case you compare at first time n*m (but not all), when you get a match, (n-1)*(m-1), ... and so until the last comparition in which you fail all comparitions to (m-k)*(n-k). This is, in the average, m*n/2 + (m-1)*(n-1)/2 +...+ (m-k)*(n-k). something in the range of m(m-1)*n(n-1)/k^2, which is O(m^2*n^2):
comp2.c
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#define N(arr) (sizeof(arr)/sizeof((arr)[0]))
void swap(int *ref_a, int *ref_b)
{
if (ref_a == ref_b)
return; /* nothing to do. */
int temp = *ref_a;
*ref_a = *ref_b;
*ref_b = temp;
}
int int_cmp(const void *_a, const void *_b)
{
const int *a = _a, *b = _b;
return *a - *b;
}
void print(int v[], int v_sz, const char *fmt, ...)
{
va_list p;
va_start(p, fmt);
vprintf(fmt, p);
va_end(p);
char *sep = "[";
for (int i = 0; i < v_sz; i++) {
printf("%s%d", sep, v[i]);
sep = ", ";
}
printf("]\n");
}
int find_matches(int a[], int b[], int a_sz, int b_sz)
{
print(a, a_sz, "a(unsorted)");
print(b, b_sz, "b(unsorted)");
int i = 0;
loop:
for (int i_a = 0; i_a + i < a_sz; i_a++) {
for (int i_b = 0; i_b + i < b_sz; i_b++) {
/* we can only compare for equality */
if (a[i + i_a] == b[i + i_b]) {
swap(&a[i + i_a], &a[i]);
swap(&b[i + i_b], &b[i]);
i++;
goto loop;
}
}
}
print(a, a_sz, "a(final)");
print(b, b_sz, "b(final)");
return i;
}
int main()
{
int arr1[] = {1, 8, 3, 2, 6, 7};
int arr2[] = {2, 6, 1, 7, 4, 1, 9, 6};
int size1 = N(arr1);
int size2 = N(arr2);
int match = find_matches(arr1, arr2, size1, size2);
for (int i = 0; i < match; i++) {
printf("Match #%d: %d\n", i, arr1[i]);
}
}
which produces, when running, the following output:
$ comp2
a(unsorted)[1, 8, 3, 2, 6, 7]
b(unsorted)[2, 6, 1, 7, 4, 1, 9, 6]
a(final)[1, 2, 6, 7, 3, 8]
b(final)[1, 2, 6, 7, 4, 1, 9, 6]
Match #0: 1
Match #1: 2
Match #2: 6
Match #3: 7
$ _
You can reorder the values, there's no difference, in this case you had a two level for loop, mixed with a third level go back to the beginning and start again loop. The loop is warranted to finish, as when you go back to the top, you have increased i, which means the nested for loops will be shorter each time. We can rewrite the find_matches routine in this case by adjusting the array start points, in this manner:
comp3.c
/* ... as before */
int find_matches(int a[], int b[], int a_sz, int b_sz)
{
print(a, a_sz, "a(unsorted)");
print(b, b_sz, "b(unsorted)");
int i = 0;
loop:
for (int i_a = 0; i_a < a_sz; i_a++) {
for (int i_b = 0; i_b < b_sz; i_b++) {
/* we can only compare for equality */
if (a[i_a] == b[i_b]) {
swap(&a[i_a], &a[0]);
swap(&b[i_b], &b[0]);
i++;
print(a++, a_sz--, "a(after match)");
print(b++, b_sz--, "b(after match)");
goto loop;
}
}
}
print(a, a_sz, "a(final)");
print(b, b_sz, "b(final)");
return i;
}
/* ... as before */
that will produce this result (I changed the initial sort order to see how it affects the final result):
$ comp3
a(unsorted): [7, 8, 2, 3, 6, 1]
b(unsorted): [2, 6, 1, 7, 4, 1, 9, 6]
a(after match): [7, 8, 2, 3, 6, 1]
b(after match): [7, 6, 1, 2, 4, 1, 9, 6]
a(after match): [2, 8, 3, 6, 1]
b(after match): [2, 1, 6, 4, 1, 9, 6]
a(after match): [6, 3, 8, 1]
b(after match): [6, 1, 4, 1, 9, 6]
a(after match): [1, 8, 3]
b(after match): [1, 4, 1, 9, 6]
a(final): [8, 3]
b(final): [4, 1, 9, 6]
Match #0: 7
Match #1: 2
Match #2: 6
Match #3: 1
$ _
I would need to replace some data sequences with others, in an array, such as in this example (whose signature I imagined for the replacement function):
seq_replace(
int *array, size_t size0, // The array to modify + its size
int *to_replace, size_t size1, // The sequence to be replaced + its size
int *to_place, size_t size2); // The sequence to be placed + its size
int array[] = {0, 6, 3, 0, 6, 2};
seq_replace(
&array, 6,
(const int[]){0, 6}, 2,
(const int[]){9}, 1);
And would be obtain my array with values {9, 3, 9, 2}.
I imagine that linked lists would be more suitable for this style of task, but I work throughout my project with arrays, and making conversions between types of container would cost time.
Anyway, I don't know of any algorithm to do this kind of thing, and I haven't found anything interesting on the Internet either. I therefore turn to this site for advice on how to carry out this task.
Here is a code that works in the case array is known to be large enough for any replacement. The function returns the new logical size of array. The // p += size2; commented instruction makes it possible to recursively replace inside the result of to_place (as it is the case for a peephole optimizer). For instance in {0,6,6} replacing {0,6} by {0} gives {0}.
There is a main() to test various cases.
#include <stdio.h>
#include <string.h>
void show_array(const int *array, size_t size0)
{
int i;
for(i = 0; i < size0; i++)
printf("%3d", array[i]);
puts("");
}
int seq_replace(int *array, size_t size0, // The array to modify + its size
const int *to_replace, size_t size1, // The sequence to be replaced + its size
const int *to_place, size_t size2) // The sequence to be placed + its size
{
int *p = array, *end = array + size0;
while(p < end)
{
if (p + size1 <= end && memcmp(p, to_replace, size1 * sizeof(*p)) == 0)
{
memmove(p + size2, p + size1, (end - p - size1) * sizeof(*p));
memcpy(p, to_place, size2 * sizeof(*p));
// p += size2; // uncomment to avoid replacements in to_place itself
size0 = size0 - size1 + size2;
end = array + size0;
}
else
{
p++;
}
}
return size0; // return logical new size
}
#define N_ELEM(p) (sizeof(p) / sizeof(*(p)))
// array is physically large enough
int array[1000] = {0, 6, 6, 0, 6, 2, 0, 6, 0, 6};
int size0 = 10; // its logical length
int to_replace[] = {0, 6};
int to_place[] = {0}; // try {9, 8}, {9, 8, 7}, {9} and even {}
int main()
{
printf("initial array; length: %d\n", size0);
show_array(array, size0);
size0 = seq_replace(array, size0,
to_replace, N_ELEM(to_replace),
to_place, N_ELEM(to_place));
printf("final array, new length: %d\n", size0);
show_array(array, size0);
}
I wrote the following insertion sort yesterday (I started learning C 3 days ago). For some reason, the sort does not modify the array AT ALL.
#include <stdio.h>
int *insert(int arr[], int index, int item);
int *isort(int arr[]);
int main() {
int a[17] = {1, 2, 9, 5, 3, 2, 1, 6, 5, 9, 0, 1, 3, 4, 2, 3, 4};
int *b = isort(a);
for (int i = 0; i < 17; i += 1) {
printf("%d ", b[i]);
}
return 0;
}
int *insert(int arr[], int index, int item) {
--index;
while (index >= 0 && item < arr[index]) {
arr[index + 1] = arr[index];
--index;
}
arr[index + 1] = item;
return arr;
}
int *isort(int arr[]) {
for (int i = 1; i < sizeof(arr) - 1; i++) {
arr = insert(arr, i, arr[i]);
}
return arr;
}
I'm thinking it could be my compiler, as I'm running a compiler that is on a non unix machine: lcc-win, but I'm not sure. Is there some fundamental thing I'm missing here?
int *isort(int arr[]) {
for (int i = 1; i < sizeof(arr) - 1; i++) {
arr = insert(arr, i, arr[i]);
}
return arr;
}
In this function sizeof(arr) actually returns the size of the pointer and not the size of the array.
In C a special rule says an array parameter is actually adjusted to a parameter of the corresponding pointer type.
That is:
int *isort(int arr[]) { /* ... */ }
is equivalent to this:
int *isort(int *arr) { /* ... */ }
To fix this, add a new parameter in your function that takes the size of the array:
int *isort(int arr[], size_t size) { /* ... */ }
The first problem, as has been pointed out, is that the isort function uses the sizeof operator on a pointer. The way C treats arrays is a little strange at first glance. The name of an array is a pointer to its first element. So when you call isort like this:
int *b = isort(a);
you are simply pushing a pointer to the array onto the stack. In the definition of isort,
int *isort(int arr[])
declares arr to be a pointer to int just like
int *isort(int *arr)
C is even more confusing in this respect: if you had said:
int *isort(int arr[17])
the arr variable is still just a pointer to int ... the "17" here is discarded! Even with this syntax, sizeof(arr) will still be the size of a pointer to int.
On a 32-bit system (ILP32), sizeof(arr) will always be 4, however big the array is.
Therefore, you need to pass the size of the array to isort. A good general way to do this is to define a macro like this:
#define NITEMS(arr) (sizeof(arr)/sizeof(arr[0]))
This will calculate the number of elements in an array of any type.
Your next problem is more a style one than an actual error:
arr = insert(arr, i, arr[i]);
This calls the insert function with a reference to "arr". The array is modified through this reference, and then a pointer to that array is returned. It will always be the same pointer as you sent in the first place, so this assignment actually does nothing, harmlessly. Like I say, a style problem, not a code error.
The final issue is your isort function stops one short (after you correct the sizeof problem), since you went from 1 to sizeof-1. Here is a fixed version:
#include <stdio.h>
#define NITEMS(arr) (sizeof(arr)/sizeof(arr[0]))
int *insert(int arr[], int index, int item);
int *isort(int arr[], size_t nitems);
int main() {
int a[17] = {1, 2, 9, 5, 3, 2, 1, 6, 5, 9, 0, 1, 3, 4, 2, 3, 4};
int *b = isort(a, NITEMS(a));
for (int i = 0; i < NITEMS(a); i += 1) {
printf("%d ", b[i]);
}
printf("\n");
return 0;
}
int *insert(int arr[], int index, int item) {
--index;
while (index >= 0 && item < arr[index]) {
arr[index + 1] = arr[index];
--index;
}
arr[index + 1] = item;
return arr;
}
int *isort(int arr[], size_t nitems) {
for (int i = 1; i < nitems; i++) {
insert(arr, i, arr[i]);
}
return arr;
}