Suffix array implementation - c

Can anyone give me hint how to develop the suffix array part? I know the concept; LCP array design but I am not getting how to implement it in C? Can anyone please help? I know the uses, algorithm of the suffix array as I have read a lot on it. I want the implementation hint of the part in which I have to sort the suffixes of a string.
For example, if the string is given as 'banana', then:
Data structure should be like this:($ -> mnemonic)
banana
anana
nana
ana
na
a
$
Then, after keeping it, I need to sort it, which means the lowest substring should be at the top most point. So how to do that? Strings can be of large length. How to do this thing? Can you please give hint or link? I have tried and now thinking from your help.

You may want to have a look at this article
At the end of the article, you will find this implementation of the suffix tree:
NOTE: the following code is C++, however if you substitute the new[] and delete[] operators with C like heap allocation you could reuse it very easily.
inline bool leq(int a1, int a2, int b1, int b2) { // lexic. order for pairs
return(a1 < b1 || a1 == b1 && a2 <= b2);
} // and triples
inline bool leq(int a1, int a2, int a3, int b1, int b2, int b3) {
return(a1 < b1 || a1 == b1 && leq(a2,a3, b2,b3));
}
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
static void radixPass(int* a, int* b, int* r, int n, int K)
{ // count occurrences
int* c = new int[K + 1]; // counter array
for (int i = 0; i <= K; i++) c[i] = 0; // reset counters
for (int i = 0; i < n; i++) c[r[a[i]]]++; // count occurences
for (int i = 0, sum = 0; i <= K; i++) { // exclusive prefix sums
int t = c[i]; c[i] = sum; sum += t;
}
for (int i = 0; i < n; i++) b[c[r[a[i]]]++] = a[i]; // sort
delete [] c;
}
// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n]=s[n+1]=s[n+2]=0, n>=2
void suffixArray(int* s, int* SA, int n, int K) {
int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2;
int* s12 = new int[n02 + 3]; s12[n02]= s12[n02+1]= s12[n02+2]=0;
int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
int* s0 = new int[n0];
int* SA0 = new int[n0];
// generate positions of mod 1 and mod 2 suffixes
// the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
for (int i=0, j=0; i < n+(n0-n1); i++) if (i%3 != 0) s12[j++] = i;
// lsb radix sort the mod 1 and mod 2 triples
radixPass(s12 , SA12, s+2, n02, K);
radixPass(SA12, s12 , s+1, n02, K);
radixPass(s12 , SA12, s , n02, K);
// find lexicographic names of triples
int name = 0, c0 = -1, c1 = -1, c2 = -1;
for (int i = 0; i < n02; i++) {
if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) {
name++; c0 = s[SA12[i]]; c1 = s[SA12[i]+1]; c2 = s[SA12[i]+2];
}
if (SA12[i] % 3 == 1) { s12[SA12[i]/3] = name; } // left half
else { s12[SA12[i]/3 + n0] = name; } // right half
}
// recurse if names are not yet unique
if (name < n02) {
suffixArray(s12, SA12, n02, name);
// store unique names in s12 using the suffix array
for (int i = 0; i < n02; i++) s12[SA12[i]] = i + 1;
} else // generate the suffix array of s12 directly
for (int i = 0; i < n02; i++) SA12[s12[i] - 1] = i;
// stably sort the mod 0 suffixes from SA12 by their first character
for (int i=0, j=0; i < n02; i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
radixPass(s0, SA0, s, n0, K);
// merge sorted SA0 suffixes and sorted SA12 suffixes
for (int p=0, t=n0-n1, k=0; k < n; k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
int i = GetI(); // pos of current offset 12 suffix
int j = SA0[p]; // pos of current offset 0 suffix
if (SA12[t] < n0 ?
leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]) :
leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
{ // suffix from SA12 is smaller
SA[k] = i; t++;
if (t == n02) { // done --- only SA0 suffixes left
for (k++; p < n0; p++, k++) SA[k] = SA0[p];
}
} else {
SA[k] = j; p++;
if (p == n0) { // done --- only SA12 suffixes left
for (k++; t < n02; t++, k++) SA[k] = GetI();
}
}
}
delete [] s12; delete [] SA12; delete [] SA0; delete [] s0;
}

This might help you.
http://code.google.com/p/code-share/source/browse/trunk/cpp/algo/suffix_array/SuffixArray.h
#ifndef _SUFFIX_ARRAY_H
#define _SUFFIX_ARRAY_H
#include<algorithm>
#include<cstring>
#include <stdexcept>
using namespace std;
template<class T>
struct comp_func
{
bool operator()(const T l,const T r)
{
return strcmp(l,r) < 0;
}
};
template<class T =char>
class SuffixArray
{
int len_;
T **data_;
public:
T *operator[](int i)
{
if(i<0 || i>len_)
throw std::out_of_range("Out of range error\n");
return data_[i];
}
SuffixArray(T *str):len_(strlen(str)),data_(new T*[len_])
{
//len_ = strlen(str);
//data_= new T*[len];
for(int i =0;i<len_;++i)
{
data_[i] = &str[i];
cout << data_[i] << endl;
}
std::sort(&data_[0],&data_[len_],comp_func<T *>());
}
void Print()
{
for(int i =0;i<len_;++i)
{
cout << data_[i] << endl;
}
}
};
#endif

Related

dealing with dups in end of the array

This is the task I have got:
I need to write a function (not recursive) which has two parameters.
An array of integers.
An integer representing the size of the array.
The function will move the duplicates to an end of the array.
And will give the size of the different digits.
Example:
5 , 2 , 4 , 5 , 6 , 7 , 2, n = 7
we will get back 5 , 2 , 4 , 6 , 7 , 5 , 2 and 5
We must keep the original sort as it is (which means like in example 5 must)
It does not matter how we sort the duplicates ones but just keep the sort for the original array as it is)
The function has to print the number of different digits (like in example 5)
The the input range of numbers in array [-n,n]
I can only use 1 additional array for help.
It has to be O(n)
I tried it so many times and feel like am missing something. Would appreciate any advice/suggestions.
int moveDup(int* arr, int n)
{
int* C = (int*)calloc(n * 2 + 1, sizeof(int));
assert(C);
/*int* count = C + n;*/
int *D = arr[0];
int value = 0, count = 0;
for (int i = 0; i < n; i++)
{
value = arr[i];
if (C[value + n] == 0)
{
*D = arr[i];
D++;
count++;
}
C[value + n] = C[value + n] + 1;
}
while (1 < C[value + n])
{
*D = i;
D++;
C[value + n]--;
}
free(C);
return count;
}
This algorithm will produce the required results in O(n) arithmetic complexity:
Input is an array A with n elements indexed from A0 to An−1 inclusive. For each Ai, −n ≤ Ai ≤ n.
Create an array C that can be indexed from C−n to C+n, inclusive. Initialize C to all zeros.
Define a pointer D. Initialize D to point to A0.
For 0 ≤ i < n:
If CAi=0, copy Ai to where D points and advance D one element.
Increment CAi.
Set r to the number of elements D has been advanced from A0.
For −n ≤ i ≤ +n:
While 1 < CAi:
Copy i to where D points and advance D one element.
Decrement CAi.
Release C.
Return r. A contains the required values.
A sample implementation is:
#include <stdio.h>
#include <stdlib.h>
#define NumberOf(a) (sizeof (a) / sizeof *(a))
int moveDuplicates(int Array[], int n)
{
int *memory = calloc(2*n+1, sizeof *Array);
if (!memory)
{
fprintf(stderr, "Error, unable to allocate memory.\n");
exit(EXIT_FAILURE);
}
int *count = memory + n;
int *destination = Array;
for (int i = 0; i < n; ++i)
// Count each element. If it is unique, move it to the front.
if (!count[Array[i]]++)
*destination++ = Array[i];
// Record how many unique elements were found.
int result = destination - Array;
// Append duplicates to back.
for (int i = -n; i <= n; ++i)
while (0 < --count[i])
*destination++ = i;
free(memory);
return result;
}
int main(void)
{
int Array[] = { 5, 2, 4, 5, 6, 7, 2 };
printf("There are %d different numbers.\n",
moveDuplicates(Array, NumberOf(Array)));
for (int i = 0; i < NumberOf(Array); ++i)
printf(" %d", Array[i]);
printf("\n");
}
here is the right answer, figured it out by myself.
int moveDup(int* arr, int n)
{
int* seen_before = (int*)calloc(n * 2 + 1, sizeof(int));
assert(seen_before);
int val = 0, count = 0, flag = 1;
int j = 0;
for (int i = 0; i < n; i++)
{
val = arr[i];
if (seen_before[arr[i] + n] == 0)
{
seen_before[arr[i] + n]++;
count++;
continue;
}
else if (flag)
{
j = i + 1;
flag = 0;
}
while (j < n)
{
if (seen_before[arr[j] + n] == 0)
{
count++;
seen_before[arr[j] + n]++;
swap(&arr[i], &arr[j]);
j++;
if (j == n)
{
free(seen_before);
return count;
}
break;
}
/*break;*/
j++;
if (j == n)
{
free(seen_before);
return count;
}
}
}
}
second right answer
int* mem = (int*)calloc(2 * n + 1, sizeof * arr);
assert(mem);
int* count = mem + n;
int* dest = arr;
for (i = 0; i < n; ++i)
{
if (count[arr[i]]++ == 0)
{
*dest = arr[i];
*dest++;
}
}
res = dest - arr;
for (i = -n; i <= n; ++i)
{
while (0 < --count[i])
{
*dest++ = i;
}
}
free(mem);
return res;

Access violation writing location 0x00000000 pointers in recursive function

This is my code calculating the determinant of a Matrix of complex numbers. I have to define matrices like this(double pointer) because I am working with a really old project with C.(dont ask why)
int i, j, k, c1, c2;
typedef struct {
double re;
double im;
} cmplx;
cmplx** Create2DMatrixCmplx(int d1, int d2)
{
cmplx **matCC = (cmplx**)malloc(d1 * sizeof(cmplx*));
for (i = 0; i < d1; i++)
matCC[i] = ((cmplx*)malloc(d2 * sizeof(cmplx)));
if ( matCC == NULL )
{
printf("Error: out of memory.\n");
return;
}
return matCC;
}
void matrixCAssign(cmplx** a1, cmplx** a2, int l1, int l2)
{
for (i = 0; i < l1; i++)
for (j = 0; j < l2; j++)
a1[i][j] = a2[i][j];
}
cmplx determinantC(cmplx **A, int len)
{
cmplx det, temp;
cmplx** Matrix = Create2DMatrixCmplx(len, len);
cmplx** Minor = Create2DMatrixCmplx(len, len);
matrixCAssign( Matrix, A, len, len );
if(len == 1)
{
det = Matrix[0][0];
}
else if(len == 2)
{
det = (Matrix[0][0] * Matrix[1][1]) - (Matrix[0][1] * Matrix[1][0]));
}
else
{
for(i = 0 ; i < len ; i++)
{
c1 = 0, c2 = 0;
for(j = 0 ; j < len ; j++)
{
for(k = 0 ; k < len ; k++)
{
if(j != 0 && k != i)
{
Minor[c1][c2] = Matrix[j][k];
c2++;
if( c2 > len-2 )
{
c1++;
c2=0;
}
}
}
}
temp = determinantC(Minor,len-1);
det += ( Matrix[0][i] * temp) * O;
O = -1 * O;
}
}
return det;
}
main()
{
cmplx **A= Create2DMatrixCmplx(1024, 1024);
// set data to A
cmplx det = determinantC( A, 1024 );
}
I get Writing Access error in this line:
Minor[c1][c2] = Matrix[j][k];
when the error happen, the values are:
c1=337, c2=338, len=974,
"Minor" dimentions are len*len in each iteration and c1, c2 values are smaller.
So I guess the problem must be with pointers. I am making a new instance of "cmplx** Matrix" in each iteration but I am calling the function with the pointer type. Do I get a new instance of the object when I call it with pointer? If so why is it no problem in iterations before? Value of len at the begining is 1024.
Can somebody help me find the problem? I can not see it.

I need help creating a k-combinations algorithm non-recursively

I've looked around online for an non-recursive k-combinations algorithm, but have had trouble understanding all of the reindexing involved; The code I've found online is not commented well, or crashes.
For example, if I have the collection, {'a', 'b', 'c', 'd', 'e'} and I want to find a 3 combinations; ie,
abc
abd
abe
acd
ace
ade
bcd
bce
bde
cde
How can I implement an algorithm to do this? When I write down the general procedure, this it is clear. That is; I increment the last element in a pointer until it points to 'e', increment the second to last element and set the last element to the second to last element + 1, then increment the last element again until it reaches 'e' again, and so on and so forth, as illustrated by how I printed the combinations. I looked at Algorithm to return all combinations of k elements from n for inspiration, but my code only prints 'abc'. Here is a copy of it:
#include <stdio.h>
#include <stdlib.h>
static void
comb(char *buf, int n, int m)
{
// Initialize a pointer representing the combinations
char *ptr = malloc(sizeof(char) * m);
int i, j, k;
for (i = 0; i < m; i++) ptr[i] = buf[i];
while (1) {
printf("%s\n", ptr);
j = m - 1;
i = 1;
// flag used to denote that the end substring is at it's max and
// the j-th indice must be incremented and all indices above it must
// be reset.
int iter_down = 0;
while((j >= 0) && !iter_down) {
//
if (ptr[j] < (n - i) ) {
iter_down = 1;
ptr[j]++;
for (k = j + 1; k < m; k++) {
ptr[k] = ptr[j] + (k - j);
}
}
else {
j--;
i++;
}
}
if (!iter_down) break;
}
}
int
main(void)
{
char *buf = "abcde";
comb(buf, 5, 3);
return 1;
}
The very big problem with your code is mixing up indices and values. You have an array of chars, but then you try to increment the chars as if they were indices into the buffer. What you really need is an array of indices. The array of chars can be discarded, since the indices provide all you need, or you can keep the array of chars separately.
I found a psuedocode description here, http://www4.uwsp.edu/math/nwodarz/Math209Files/209-0809F-L10-Section06_03-AlgorithmsForGeneratingPermutationsAndCombinations-Notes.pdf
and implemented it in C by
#include <stdlib.h>
#include <stdio.h>
// Prints an array of integers
static void
print_comb(int *val, int len) {
int i;
for (i = 0; i < len; i++) {
printf("%d ", val[i]);
}
printf("\n");
}
// Calculates n choose k
static int
choose(int n, int k)
{
double i, l = 1.0;
double val = 1.0;
for (i = 1.0; i <= k; i++) {
l = ((double)n + 1 - i) / i;
val *= l;
}
return (int) val;
}
static void
comb(int n, int r)
{
int i, j, m, max_val;
int s[r];
// Initialize combinations
for (i = 0; i < r; i++) {
s[i] = i;
}
print_comb(s, r);
// Iterate over the remaining space
for (i = 1; i < choose(n, r); i++) {
// use for indexing the rightmost element which is not at maximum value
m = r - 1;
// use as the maximum value at an index, specified by m
max_val = n - 1; // use for
while(s[m] == max_val) {
m--;
max_val--;
}
// increment the index which is not at it's maximum value
s[m]++;
// iterate over the elements after m increasing their value recursively
// ie if the m-th element is incremented, all elements afterwards are
// incremented by one plus it's offset from m
// For example, this is responsible for switching 0 3 4 to 1 2 3 in
// comb(5, 3) since 3 and 4 in the first combination are at their maximum
// value
for (j = m; j < r - 1; j++) {
s[j + 1] = s[j] + 1;
}
print_comb(s, r);
}
}
int
main(void)
{
comb(5, 3);
return 1;
}

Find the maximum value in an array that's the sum of 3 other values

Given a very large integer array, I need to find the maximum value of a4, such that:
a4 = a1 + a2 + a3
Where the ai's are all values in the array.
How would I do this?
Note: Using 4 for loops is not the ideal solution.
There is a simple (expected) O(n^2) solution:
Iterate through all pairs of array elements (a, b) and store their sum in a hash table.
Iterate through all candidate pairs (a4, a1) and check whether a4 - a1 is in the table. The maximum over all valid a4 is the solution. Of course you should process a4 from largest to smallest, but that doesn't affect the asymptotics.
If you want to avoid using an element more than once, you need some additional information stored in the hash table so that you can filter out pairs that colide with a1 or a4 fast.
If the integers in the array are bounded (max - min <= C), it might be useful to know that you can achieve O(n + C log C) using a discrete fourier transform (solvable using FFT).
First of all you should ascending sort your array. then start from the last (biggest) member of the array.
For example, for [1,2,3,777,999,111,665] you'll have sortedArray = {1,2,3,111,665, 777, 999}
then select 999 as a4 and try to create it with other members. So you should select as a3 and try to create (999 - 777) = 222 as a1+a2 since your array is sorted you only need to consider subarray {1,2,3,111}. if there is no pair satisfying this condition, try next biggest member (777) and retry above scenario to find the solution
Based on #Niklas answer, I wrote the following program in Java.
public static int sumOfThree(int [] arr) {
int arrlen = arr.length;
int arrijv [][] = new int [arrlen * (arrlen - 1) / 2][3];
int arrijvlen = 0;
quickSortInDescendingOrder(arr, 0, arrlen - 1); // sorts array into descending order
System.out.println(Arrays.toString(arr));
// populates array with sum of all pair values
for (int i = 0; i < arrlen - 1; i++) {
for (int j = i + 1; j < arrlen; j++) {
// if ((arr[i] + arr[j]) < arr[0]) { // can be added if no negative values
arrijv[arrijvlen][0] = i;
arrijv[arrijvlen][1] = j;
arrijv[arrijvlen][2] = arr[i] + arr[j];
arrijvlen++;
// }
}
}
System.out.print('[');
for (int i = 0; i < arrijvlen; i++) {
System.out.print(arrijv[i][2] + " ");
}
System.out.print("]\n");
// checks for a match of difference of other pair in the populated array
for (int i = 0; i < arrlen - 1; i++) {
for (int j = i + 1; j < arrlen; j++) {
int couldBeA4 = arr[i];
if(isAvailable(arrijv, arrijvlen, couldBeA4 - arr[j], i, j)){
System.out.println(" i3-" + j + " i4-" + i);
return couldBeA4;
}
}
}
return -1;
}
private static boolean isAvailable(int[][] arrijv, int len, int diff, int i, int j) {
boolean output = false;
// returns true if the difference is matched with other combination of i,j
for (int k = 0; k < len; k++) {
if (arrijv[k][2] == diff) {
int pi = arrijv[k][0];
int pj = arrijv[k][1];
if (pi != i && pi != j && pj != i && pj != j) {
System.out.print("i1-" + pj + " i2-" + pi);
output = true;
break;
}
}
}
return output;
}
private static void quickSortInDescendingOrder(int[] array, int low, int high) { // solely used for sorting input array into descending array
if (low < high) {
int partition = getPartitionIndex(array, low, high);
quickSortInDescendingOrder(array, low, partition);
quickSortInDescendingOrder(array, partition + 1, high);
}
}
private static int getPartitionIndex(int[] arr, int lo, int hi) {
int pivot = arr[(lo + hi) / 2];
while (true) {
while (arr[lo] > pivot) {
lo++;
}
while (arr[hi] < pivot) {
hi--;
}
if (arr[lo] == arr[hi]) { // can be removed if no duplicate values
return lo;
} else if (lo < hi) {
int temp = arr[lo];
arr[lo] = arr[hi];
arr[hi] = temp;
} else {
return hi;
}
}
}
Please verify that it works and suggest for further improvements.

Find kthsmallest element from two sorted arrays

I have implemented a working function that does it. But it is not very efficient because it copies a new copy in each call. I am having trouble converting it to using only a_start, a_end, b_start, b_end. I have tried a couple of ways to convert it, but none of them are working for all cases. How can I convert it so that it takes in a start and end pointers for both array a and b?
I have tried the following and modified k-i-1 and k-j-1 so that it only takes in k, but did not work.
int m = a_right-a_left, n=b_right-b_left;
int i = (a_left+a_right)/2;
or int i = (int)((m* (k-1)) / (m+n) );
Below is my working code using a new copy of array each call.
public static int kthSmallest(int[] a, int[] b, int k) {
if (a.length==0)
return b[k-1];
else if (b.length==0)
return a[k-1];
else if (b.length<a.length)
return kthSmallest(b, a, k);
// make sure i + j = k - 1
int m = a.length, n=b.length;
int i = (int)((double)m / (m+n) * (k-1)); // make sure i won't be out of bounds
int j = k - 1 - i;
int bj_1 = 0, ai_1 = 0;
if (i==0) { ai_1 = Integer.MIN_VALUE; } // in case i = 0, outOfBound
else { ai_1 = a[i-1]; }
if (j==0) { bj_1 = Integer.MIN_VALUE; } // in case j = 0, outOfBound
else { bj_1 = b[j-1]; }
if (bj_1 < a[i] && a[i] < b[j]) // kth smallest found, b[j-1] < a[i] < b[j]
return a[i];
if (ai_1 < b[j] && b[j] < a[i]) // kth smallest found, a[i-1] < b[j] < a[i]
return b[j];
if ( a[i] < b[j] ) // if true, exclude a's lower bound (if 2 arrays merged, a's lower bound must
// reside before kth smallest, so also update k.
// also exclude b's upper bound, since they are all greater than kth element.
return kthSmallest(Arrays.copyOfRange(a, i+1, a.length), Arrays.copyOfRange(b, 0, j), k-i-1);
else
return kthSmallest(Arrays.copyOfRange(a, 0, i), Arrays.copyOfRange(b, j+1, b.length), k-j-1);
}
Here's O(log a.length + log b.length) algorithm from the answer to "How to find the kth smallest element in the union of two sorted arrays?" question. It is a direct port from C++ recursive implementation to Java:
public static int ksmallest(int[] a, int[] b,
int a1, int a2, int b1, int b2,
int k) {
int lena = a2 - a1;
int lenb = b2 - b1;
assert (0 <= k && k < (lena + lenb));
if (lena == 0) {
return b[b1 + k];
}
if (lenb == 0) {
return a[a1 + k];
}
int mida = lena / 2;
int midb = lenb / 2;
int ma = a[a1 + mida];
int mb = b[b1 + midb];
if ((mida + midb) < k) {
return (mb < ma) ?
ksmallest(a, b, a1, a2, b1 + midb + 1, b2, k - (midb + 1)) :
ksmallest(a, b, a1 + mida + 1, a2, b1, b2, k - (mida + 1));
}
else {
return (mb < ma) ?
ksmallest(a, b, a1, a1 + mida, b1, b2, k) :
ksmallest(a, b, a1, a2, b1, b1 + midb, k);
}
}
There is also C++ iterative implementation with the same time complexity (without recursion). It could be ported to Java the same way as the recursive version.
Sanity check for the recursive version:
/** concatenate a, b arrays */
public static int[] concatenate(int[] a, int[] b) {
int lena = a.length;
int lenb = b.length;
int[] c = new int[lena + lenb];
System.arraycopy(a, 0, c, 0, lena);
System.arraycopy(b, 0, c, lena, lenb);
return c;
}
public static void main(String[] args) {
int a[] = {0, 3, 7, 8};
int b[] = {0, 2, 3};
int c[] = concatenate(a, b);
Arrays.sort(c);
for (int n = 0; n < (a.length + b.length); n++) {
int k = ksmallest(a, b, 0, a.length, 0, b.length, n);
if (k != c[n]) {
System.out.println(n + ": expected " + c[n] + " got " + k);
}
}
}
On success, it prints nothing.
An algorithm with O(n) but simple to understand;
//both arrays are sorted
private int getKthSmallestElement(int[] array1, int[] array2, int k) {
int elem=-1;
int index1=0,index2=0;
while(k != 0 && (index1<array1.length) && (index2 < array2.length))
{
if(array1[index1] < array2[index2])
{
index1++;
}
else
index2++;
k--;
}
if((index1<array1.length) && (index2 < array2.length))
return array1[index1] > array2[index2] ? array2[index2] :array1[index1] ;
else
{
if(index1 >= array1.length)
{
return array2[index2+k];
}
else{
return array1[index1+k];
}
}
}
Here is a O((logn)^2) simple recursive solution without copying :-
public class KthElement {
public static int binSearch(int[] arr,int low,int high,int key) {
int mid=0;
while(high>=low) {
mid = (high+low)/2;
if(arr[mid]==key) {
return(mid);
}
else if(arr[mid]<key) {
low = mid+1;
}
else {
high = mid-1;
}
}
if(arr[mid]>key) {
return(mid-1);
}
return(mid);
}
public static int kthElement(int[] arr1,int[] arr2,int s1,int h1,int s2,int h2,int k) {
int len1 = (h1-s1+1);
int len2 = (h2-s2+1);
if(len1<=0) {
return(arr2[s2+k-1]);
}
if(len2<=0) {
return(arr1[s1+k-1]);
}
if(k>(len1+len2)) {
return(-1);
}
int mid = (s1+h1)/2;
int i = binSearch(arr2,s2,h2,arr1[mid]);
int size = mid+i-s1-s2+2;
//System.out.println(mid+" "+i+" "+size);
if(size==k){
return(arr1[mid]);
}
if(size>k) {
return(kthElement(arr1, arr2, s1,mid-1, s2,i, k));
}
else {
return(kthElement(arr1, arr2, mid+1,h1,i+1,h2, k-size));
}
}
public static void main(String[] args) {
int[] arr1 = {1,3,5,7,9,11,13};
int[] arr2 = {2,4,6,8,10,12};
int k = 6;
System.out.println(k+"th Element : "+kthElement(arr1, arr2,0,arr1.length-1, 0,arr2.length-1, k));
}
}
Here is JAVA iterative solution :
O( log A.length + log B.length ) time complexity
public static int kthsmallest(int []A, int []B, int K){
int begin = Math.max(0,K-B.length); // binary search begin index
int end = Math.min(A.length,K); // binary search end end index
while(begin < end){
// search until mid = k
int mid = begin +(end-begin)/2;
if(mid<A.length && K-mid>0 && A[mid]<B[K-mid-1]){
begin = mid+1;
}else if( mid > 0 && K-mid <B.length && A[mid-1]>B[K-mid]){
end = mid;
}else{
begin=mid;
break;
}
}
if(begin ==0){
return B[K-1];
}else if(begin == K){
return A[K-1];
}else{
return Math.max(A[begin -1],B[K-begin-1]);
}
}
Same logic as mentioned in the question, but changed it a little bit to not create new arrays for every call.
//alow, ahigh are used to maintain the size of a array we are using instead
//of creating a new array. imilarly for blow, bhigh
public static int search(int[] a, int[] b, int alow, int ahigh, int blow, int bhigh, int k){
if (alow>ahigh) // if "a" array is of zero length, then take kth element from "b"
return b[blow+k-1];
else if (blow>bhigh || bhigh-blow == 0) // Similarly, // if "b" array is of zero length, then take kth element from "a"
return a[alow+k-1];
else if ((bhigh-blow)<(ahigh-alow)) //always make sure that a's length is lower length
return search(b, a,blow,bhigh,alow,ahigh, k);
// make sure i + j = k - 1
int m = ahigh-alow+1, n=bhigh-blow+1;
int i = (int)((double)m / (m+n) * (k-1)); // make sure i won't be out of bounds
int j = Math.min(k - 1 - i,n-1);
int bj_1 = 0, ai_1 = 0;
if (i==0) { ai_1 = Integer.MIN_VALUE; } // in case i = 0, outOfBound
else { ai_1 = a[i-1]; }
if (j==0) { bj_1 = Integer.MIN_VALUE; } // in case j = 0, outOfBound
else { bj_1 = b[j-1]; }
if (bj_1 < a[i] && a[i] < b[j]) // kth smallest found, b[j-1] < a[i] < b[j]
return a[i];
if (ai_1 < b[j] && b[j] < a[i]) // kth smallest found, a[i-1] < b[j] < a[i]
return b[j];
if ( a[i] < b[j] ) // if true, exclude a's lower bound (if 2 arrays merged, a's lower bound must
// reside before kth smallest, so also update k.
// also exclude b's upper bound, since they are all greater than kth element.
return search(a, b,alow+i+1,ahigh, blow,j-1, k-i-1);
else
return search(a,b,alow, i-1, blow+j+1,bhigh, k-j-1);
}

Resources