Is it possible to improve this array intersection code - c

This is the minimum code to obtain arrays intersection without any repetition in the final array. Can it be improved ?
I think it can't because it uses the minimum number of iteration thanks to the break in the inner loop and also that it can't be parallelized due to a critical section inside the if clause, am I wrong ?
I tried to try this function and the Matlab one (intersect) with the same output and the latter is much faster, how is it possible ?
int intersection(int* array1, int* array2, int len1, int len2, int size) {
int j, k, t, intersectC = 0;
int* tmp = (int*)malloc(sizeof(int) * size);
for (j = 0; j < len1; j++) {
for (k = 0; k < len2; k++) {
if (array1[j] == array2[k]) {
for (t = 0; t < intersectC; t++) {
if (tmp[t] == array1[j]) {
break;
}
}
if (t == intersectC) {
tmp[intersectC++] = array1[j];
}
}
}
}
free(tmp);
return intersectC;
}
P.S. size is the greatest between len1 and len2

Your algorithm is O(N3), which is insanely bad considering it can be done quickly in O(N).
The following sorts the arrays (using a base2 radix sort), and then uses an approach akin to merge sort to find the intersection of the sorted arrays.
(I used uint32_t. I leave it to you to adapt to int.)
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define ARRAY_LEN(a) ( sizeof(a) / sizeof(*a) )
#define MALLOC(t, n) ( (t*)malloc(sizeof(t) * n) )
#define REALLOC(p, t, n) ( (t*)realloc(p, sizeof(t) * n) )
static void _sort_uint32s(uint32_t *a, size_t n, uint32_t mask) {
if ( n <= 1 )
return;
uint32_t *p = a;
uint32_t *q = a + n;
while (1) {
while (1) {
if ( ( *p & mask ) != 0 )
break;
if ( ++p == q )
goto DONE_GROUPING;
}
while (1) {
if ( p == --q )
goto DONE_GROUPING;
if ( ( *q & mask ) == 0 )
break;
}
uint32_t tmp = *p;
*p = *q;
*q = tmp;
}
DONE_GROUPING:
mask >>= 1;
if ( !mask )
return;
if ( q > a )
_sort_uint32s(a, q-a, mask);
if ( q < a+n )
_sort_uint32s(q, a+n-q, mask);
}
static void sort_uint32s(uint32_t *a, size_t n) {
_sort_uint32s(a, n, 0x80000000);
}
static size_t min_size_t(size_t a, size_t b) {
return a < b ? a : b;
}
// Returns 0 on success.
// Returns -1 and sets errno on error.
// Will modify (sort) a1 and a2.
// Note that *set_p == NULL is possible on success.
static int intersect(uint32_t *a1, size_t n1, uint32_t *a2, size_t n2, uint32_t **set_p, size_t *n_p) {
size_t n = min_size_t(n1, n2);
uint32_t *set = MALLOC(uint32_t, n);
if (!set) {
*set_p = NULL;
*n_p = 0;
return -1;
}
sort_uint32s(a1, n1);
sort_uint32s(a2, n2);
n = 0;
while ( n1 && n2 ) {
if ( *a1 < *a2 ) {
while ( --n1 && *(++a1) < *a2 );
}
else if ( *a2 < *a1 ) {
while ( --n2 && *(++a2) < *a1 );
}
else {
uint32_t v = *a1;
set[n++] = v;
while ( --n1 && *(++a1) == v );
while ( --n2 && *(++a2) == v );
}
}
if ( !n ) {
free(set);
*set_p = NULL;
*n_p = 0;
return 0;
}
uint32_t *tmp = REALLOC(set, uint32_t, n);
*set_p = tmp ? tmp : set;
*n_p = n;
return 0;
}
int main(void) {
uint32_t a1[] = { 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15 };
size_t n1 = ARRAY_LEN(a1);
uint32_t a2[] = { 12, 1, 5, 2, 2 };
size_t n2 = ARRAY_LEN(a2);
uint32_t *set;
size_t n;
if ( intersect(a1, n1, a2, n2, &set, &n) < 0 ) {
perror(NULL);
exit(1);
}
printf("Intersection:");
for (size_t i=0; i<n; ++i)
printf(" %" PRIu32, set[i]);
printf("\n");
free(set);
return 0;
}

In the end, following comments to this question, I used the radix sort to sort the two arrays and a classic intersection algorithm with O(n+m).
Probably, this code is a bit slower than the one posted by #ikegami but much faster than the one in the question.
int getMax(int* arr, int n){
int mx = arr[0];
for (int i = 1; i < n; i++)
if (arr[i] > mx)
mx = arr[i];
return mx;
}
void countSort(int* arr, int* output, int n, int exp){
int i, count[10] = { 0 };
for (i = 0; i < n; i++)
count[(arr[i] / exp) % 10]++;
for (i = 1; i < 10; i++)
count[i] += count[i - 1];
for (i = n - 1; i >= 0; i--) {
output[count[(arr[i] / exp) % 10] - 1] = arr[i];
count[(arr[i] / exp) % 10]--;
}
for (i = 0; i < n; i++)
arr[i] = output[i];
}
void radixsort(int* arr, int n){
int m = getMax(arr, n);
int* output = (int*)malloc(sizeof(int) * n);
for (int exp = 1; m / exp > 0; exp *= 10) {
countSort(arr, output, n, exp);
}
free(output);
}
int intersection(int* arr1, int* arr2, int len1, int len2, int size){
int t, intersectC = 0;
int* tmp = (int*)malloc(sizeof(int) * size);
radixsort(arr1, len1);
radixsort(arr2, len2);
int i = 0, j = 0;
while (i < len1 && j < len2) {
if (arr1[i] < arr2[j])
i++;
else if (arr2[j] < arr1[i])
j++;
else /* if arr1[i] == arr2[j] */
{
for (t = 0; t < intersectC; t++) {
if (tmp[t] == arr1[j]) {
break;
}
}
if (t == intersectC) {
tmp[intersectC++] = arr1[j];
}
i++;
}
}
free(tmp);
return intersectC;
}

Related

Bigger number in array than the sum of the numbers to the right of it

I'm trying to create a program in C. I'm trying to find "special numbers" in a array. A special number is a number that is bigger than the sum of the numbers to the right of it.
I have this array
int input[] = {20,5,16,17,4,3,5,2,1};
Special numbers in this array are 17, 5, 2, 1... because 17 > 4+3+5+2; ...
I have function int special_numbers(). This function should find this special numbers and store them into array called result() and return it.
I have been trying for a very long time but I can't find a solution.
My code:
#include <stdio.h>
#include <math.h>
int special_numbers();
int main(){
int input[] = {20,5,16,17,4,3,5,2,1};
int result[9];
for(int i = 0; i < count; i++){
printf("%d ", result[i]);
}
printf("%d", special_numbers(input, 9));
return 0;
}
int special_numbers(const int input[], const int array_size, int result[]){
int result = 0;
for (int i = 0; i < array_size; i++){
if(input[i] > input[i+1]){
}
}
return result;
}
There are more efficient ways, but the simple way is to use a nested loop that gets the sum of all the elements after i.
int special_numbers(const int input_array[], const int array_size, int result_array[]){
int result = 0;
for (int i = 0; i < array_size; i++){
int sum = 0;
for (int j = i+1; j < array_size; j++) {
sum += input_array[j];
}
if (array[i] > sum) {
result_array[result++] = array[i];
}
}
return result;
}
You are calling the function twice
int count = special_numbers(input_array, 6, result_array);
//...
printf("%d", special_numbers(input_array, 6));
where the second call is redundant because you already have the variable count and is invalid because you forgot to specify the third argument.
Also it is a bad idea to use magic numbers as 6.
Within the function you need to calculate the sum of elements that follow the current element. It is better to declare the variable that will store the sum as having the type long long int to avoid an overflow.
The function can be defined the following way as it is shown in the demonstration program below.
#include <stdio.h>
size_t special_numbers( const int input_array[], size_t array_size, int result_array[] )
{
size_t result = 0;
long long int sum = 0;
for ( size_t i = array_size; i != 0; --i )
{
if ( i == array_size || sum < input_array[i - 1] ) ++result;
sum += input_array[i-1];
}
sum = 0;
for ( size_t i = array_size, j = result; i != 0; --i )
{
if ( i == array_size || sum < input_array[i - 1] )
{
result_array[--j] = input_array[i-1];
}
sum += input_array[i-1];
}
return result;
}
int main( void )
{
int input_array[] = { 16, 17, 4, 3, 5, 2 };
int result_array[sizeof( input_array ) / sizeof( *input_array )];
const size_t N = sizeof( input_array ) / sizeof( *input_array );
size_t count = special_numbers( input_array, N, result_array );
printf( "%zu: ", count );
for ( size_t i = 0; i < count; i++ )
{
printf( "%d ", result_array[i] );
}
putchar( '\n' );
return 0;
}
The program output is
3: 17 5 2
If the last element of the input array shall be greater than 0 then change the function the following way
size_t special_numbers( const int input_array[], size_t array_size, int result_array[] )
{
size_t result = 0;
long long int sum = 0;
for ( size_t i = array_size; i != 0; --i )
{
if ( sum < input_array[i - 1] ) ++result;
sum += input_array[i-1];
}
sum = 0;
for ( size_t i = array_size, j = result; i != 0; --i )
{
if ( sum < input_array[i - 1] )
{
result_array[--j] = input_array[i-1];
}
sum += input_array[i-1];
}
return result;
}

How to get rid of segmentation fault?

#include <stdio.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
int getCharNum(int a) { return a / (8 * sizeof(char)); }
int getBitNum(int a) { return a % (8 * sizeof(char)); }
int fromCharNum(int a) { return a * 8 * sizeof(char); }
int get2DimI(int a) { return getCharNum(a) / 187500; }
int get2DimJ(int a) { return getCharNum(a) % 187500; }
void rle_compress(char *src, char *dst, int ls, int *ld) {
uint8_t t[129];
int i, j = 0, k = 0, keep;
char out[187500];
t[0] = src[j];
while (j++ < ls) {
t[1] = src[j];
if (t[0] != t[1]) {
i = 1;
if (j < ls)
do
t[++i] = src[++j];
while (j < ls && i < 128 && t[i] != t[i - 1]);
if ((keep = t[i] == t[i - 1]))
--i;
out[k++] = (char)i;
t[0] = t[i];
if (!keep)
continue;
}
i = 2;
do
t[1] = src[++j];
while (++i < 130 && t[0] == t[1]);
out[k++] = i + 125;
out[k++] = t[0];
t[0] = t[1];
}
ld = &k;
dst = out;
}
void rle_extract(char *src, char *dst, int ls) {
int i, j, l = 0, k = 0, max;
char out[187500];
j = 0;
while (k + 2 < ls) {
i = src[k++]; //segfault
j = src[k++];
max = i + (i < 128 ? 1 : -126);
while (max--)
out[l++] = j;
}
dst = out;
return 0;
}
int main(void) {
int32_t n = 0;
scanf("%d", &n);
int32_t a[n];
int32_t b[] = { -1, -1, -1 };
char **count;
count = (char**)malloc(1000 * sizeof(char*));
int count_l[] = { [999] = 0 };
for (int i = 0; i < 1000; ++i) {
count[i] = (char*)malloc(187500 * sizeof(char));
char *temp = NULL;
rle_compress(count[i], temp, 187500, &count_l[i]);
free(count[i]);
count[i] = temp;
}
for (int i = 0; i < n; i++)
scanf("%d", &a[i]);
for (int i = 0; i < n; i++) {
char *src = count[get2DimI(a[i]) / 187500];
char dst[187500];
rle_extract(src, dst, count_l[i / 187500]);
dst[get2DimJ(a[i])] ^= 1 << (getBitNum(a[i]));
rle_compress(dst, count[get2DimI(a[i]) / 187500], 187500, &count_l[i]);
}
int32_t mv = 187500000 / (8 * sizeof(char));
int j = 0;
for (int i = 0; i < mv; i++) {
char *src = count[i / 187500];
char dst[187500];
rle_extract(src, dst, count_l[i / 187500]);
int32_t x = dst[i % 187500];
if (x == 0)
continue;
for (int k = 0; k < 8 * sizeof(char); k++) {
if ((x >> (k)) & 1) {
b[j++] = fromCharNum(i) + k;
}
}
//free(dst);
}
int m1 = min(b[0], min(b[1], b[2])),
m3 = max(b[0], max(b[1], b[2])),
m2 = b[0] + b[1] + b[2] - m1 - m3;
printf("%d %d %d", m1, m2, m3);
for (int i = 0; i < 1000; ++i)
free(count[i]);
free(count);
return 0;
}
How to fix this code?
I'm trying to compress byte array (which should compress greatly as n should be <=1500000 and numbers are from 0 to 1.5*10^9), but code gives me segfault on all testing inputs which I've tried. Without compress everything worked like a charm, but needed a lot of memory (and limits are 64MiB).
The code is obscure but there are some major problems:
void rle_extract(char *src, char *dst, int ls) does not take the output buffer from its caller, nor does it return a pointer to it: dst = out; just updates the argument value, not the caller's variable passed as an argument. Furthermore return 0; from a void function is incorrect too.
in any case, rle_extract should not return its local out buffer because it is only defined during the execution of the function and is discarded as soon as the function returns.
You should either pass the buffer as an argument or allocate it locally and return the pointer to the caller.
There might be other problems, there is no explanation for what the code is supposed to do.

Access violation writing location 0x00000000 pointers in recursive function

This is my code calculating the determinant of a Matrix of complex numbers. I have to define matrices like this(double pointer) because I am working with a really old project with C.(dont ask why)
int i, j, k, c1, c2;
typedef struct {
double re;
double im;
} cmplx;
cmplx** Create2DMatrixCmplx(int d1, int d2)
{
cmplx **matCC = (cmplx**)malloc(d1 * sizeof(cmplx*));
for (i = 0; i < d1; i++)
matCC[i] = ((cmplx*)malloc(d2 * sizeof(cmplx)));
if ( matCC == NULL )
{
printf("Error: out of memory.\n");
return;
}
return matCC;
}
void matrixCAssign(cmplx** a1, cmplx** a2, int l1, int l2)
{
for (i = 0; i < l1; i++)
for (j = 0; j < l2; j++)
a1[i][j] = a2[i][j];
}
cmplx determinantC(cmplx **A, int len)
{
cmplx det, temp;
cmplx** Matrix = Create2DMatrixCmplx(len, len);
cmplx** Minor = Create2DMatrixCmplx(len, len);
matrixCAssign( Matrix, A, len, len );
if(len == 1)
{
det = Matrix[0][0];
}
else if(len == 2)
{
det = (Matrix[0][0] * Matrix[1][1]) - (Matrix[0][1] * Matrix[1][0]));
}
else
{
for(i = 0 ; i < len ; i++)
{
c1 = 0, c2 = 0;
for(j = 0 ; j < len ; j++)
{
for(k = 0 ; k < len ; k++)
{
if(j != 0 && k != i)
{
Minor[c1][c2] = Matrix[j][k];
c2++;
if( c2 > len-2 )
{
c1++;
c2=0;
}
}
}
}
temp = determinantC(Minor,len-1);
det += ( Matrix[0][i] * temp) * O;
O = -1 * O;
}
}
return det;
}
main()
{
cmplx **A= Create2DMatrixCmplx(1024, 1024);
// set data to A
cmplx det = determinantC( A, 1024 );
}
I get Writing Access error in this line:
Minor[c1][c2] = Matrix[j][k];
when the error happen, the values are:
c1=337, c2=338, len=974,
"Minor" dimentions are len*len in each iteration and c1, c2 values are smaller.
So I guess the problem must be with pointers. I am making a new instance of "cmplx** Matrix" in each iteration but I am calling the function with the pointer type. Do I get a new instance of the object when I call it with pointer? If so why is it no problem in iterations before? Value of len at the begining is 1024.
Can somebody help me find the problem? I can not see it.

Runtime error: insertatIndex

#include <stdio.h>
#include <malloc.h>
int insertAt(int *Arr, int len, int num) {
for (int i = 0; i < len; ++i) {
if (num <= Arr[0])
return 0;
else if (num >= Arr[len])
return len + 1;
else if (num >= Arr[i - 1] && num <= Arr[i])
return i;
}
}
int * sortedArrayInsertNumber(int *Arr, int len, int num){
int *output = (int *)malloc((len + 1)*sizeof(int));
if (len <= 0)
return NULL;
for (int i = 0, j = 0; j <= len+1; ++i, ++j) {
if (i == insertAt(Arr, len, num) && j==i) {
output[j] = num;
--i;
}
else if(insertAt(Arr,len,num)==len+1) {
output[j] = num;
}
else {
output[j] = Arr[i];
}
}
return output;
}
int main() {
int input[5] = {2,4,6,8,10};
int *out = (int*)malloc(6*sizeof(int));
out = sortedArrayInsertNumber(input, 5, 12);
for(int i=0;i<6;++i) {
printf("%d\n", out[i]);
}
}
When I try out this test case, it gives me a runtime error sometimes. Other times, it outputs:
2
4
6
8
10
12
Which makes no sense?
Also, is there a way to make my code better?
The question requires me to insert a value num at its appropriate index.
In the function insertAt there is at least two attempts to access memory beyond the array. The first one is in the statement
else if (num >= Arr[len])
^^^^
And the second one is in the statement
else if (num >= Arr[i - 1] && num <= Arr[i])
^^^^^^
when the variable i is equal to 0.
The function sortedArrayInsertNumber starts with a potential memory leak when the variable len is equal at least 0 because at first a memory is allocated and then there is exit from the function with A NULL pointer.
int * sortedArrayInsertNumber(int *Arr, int len, int num){
int *output = (int *)malloc((len + 1)*sizeof(int));
if (len <= 0)
return NULL;
// ...
Also it is a bad idea when the length of an array has type int instead of type size_t.
To call several times the function insertAt in the function sortedArrayInsertNumber does not make sense and breaks the loop.
In the main there is again a memory leak
int *out = (int*)malloc(6*sizeof(int));
out = sortedArrayInsertNumber(input, 5, 12);
The program can look the following way.
#include <stdio.h>
#include<stdlib.h>
size_t insertAt( const int *a, size_t n, int num )
{
size_t i = 0;
while ( i < n && !( num < a[i] ) ) i++;
return i;
}
int * sortedArrayInsertNumber( const int *a, size_t n, int num )
{
int *b = malloc( ( n + 1 ) * sizeof( int ) );
if ( b )
{
size_t pos = insertAt( a, n, num );
size_t i = 0;
for ( ; i < pos; i++ ) b[i] = a[i];
b[i] = num;
for ( ; i < n; i++ ) b[i+1] = a[i];
}
return b;
}
int main(void)
{
int input[] = { 2, 4, 6, 8, 10 };
const size_t N = sizeof( input ) / sizeof( *input );
int *out = sortedArrayInsertNumber( input, N, 12 );
if ( out )
{
for ( size_t i = 0; i < N + 1; i++ )
{
printf( "%d ", out[i] );
}
putchar( '\n' );
}
free( out );
return 0;
}
Its output is
2 4 6 8 10 12
Instead of the loops in the function sortedArrayInsertNumber you can use standard C function memcpy declared in header <string.h>.

Sorting array only with while and if

I get a message when I try to run the program. Why?
Segmentation fault
my code:
#include <stdio.h>
void sort_array(int *arr, int s);
int main() {
int arrx[] = { 6, 3, 6, 8, 4, 2, 5, 7 };
sort_array(arrx, 8);
for (int r = 0; r < 8; r++) {
printf("index[%d] = %d\n", r, arrx[r]);
}
return(0);
}
sort_array(int *arr, int s) {
int i, x, temp_x, temp;
x = 0;
i = s-1;
while (x < s) {
temp_x = x;
while (i >= 0) {
if (arr[x] > arr[i]) {
temp = arr[x];
arr[x] = arr[i];
arr[i] = temp;
x++;
}
i++;
}
x = temp_x + 1;
i = x;
}
}
I think that the problem is in the if statement.
What do you think? Why does it happen? I think that I use in positive way with the pointer to the array.
Thank you!
This loop in your program
while (i >= 0) {
//...
i++;
}
does not make sense because i is increased unconditionly.
The program can look the following way
#include <stdio.h>
void bubble_sort( int a[], size_t n )
{
while ( !( n < 2 ) )
{
size_t i = 0, last = 1;
while ( ++i < n )
{
if ( a[i] < a[i-1] )
{
int tmp = a[i];
a[i] = a[i-1];
a[i-1] = tmp;
last = i;
}
}
n = last;
}
}
int main( void )
{
int a[] = { 6, 3, 6, 8, 4, 2, 5, 7 };
const size_t N = sizeof( a ) / sizeof( *a );
for ( size_t i = 0; i < N; i++ ) printf( "%d ", a[i] );
printf( "\n" );
bubble_sort( a, N );
for ( size_t i = 0; i < N; i++ ) printf( "%d ", a[i] );
printf( "\n" );
return 0;
}
The program output is
6 3 6 8 4 2 5 7
2 3 4 5 6 6 7 8
If you want that the sorting function had only one while loop then you can implement it the following way
void bubble_sort( int a[], size_t n )
{
size_t i = 0;
while ( ++i < n )
{
if ( a[i] < a[i-1] )
{
int tmp = a[i];
a[i] = a[i-1];
a[i-1] = tmp;
i = 0;
}
}
}
In your inner loop, you increment i beyond the size of the array. Your algorithm should require you to decrement i instead, but I am not sure this would be enough to fix the sorting algorithm.
You should first try to implement Bubble sort with a single while loop where you compare adjacent items and step back whenever you swap them.

Resources