LCP array for Suffix Array - suffix-array

How to compute the LCP array for a suffix array? It doesn't have to be the most efficient. O(n log n) or O(n) will do. Something relatively easy to code if possible.

Here is a simple C++ implementation.
Longest common prefix(LCP) will be saved in lcp[MAX] array :)
char str[MAX];
int n,gap,sa[MAX],pos[MAX],tmp[MAX],lcp[MAX];
// sa stores the sorted index of the suffixes
// pos stores the serial number of a index in the sorted sequence
bool sufCmp(int i, int j)
{
if(pos[i]!=pos[j])
return pos[i]<pos[j];
i+=gap;
j+=gap;
return (i<n&&j<n)?pos[i]<pos[j]:i>j;
}
void buildSA()
{
n=strlen(str);
for(int i=0;i<n;i++)
sa[i]=i,pos[i]=str[i];
for(gap=1;;gap*=2)
{
sort(sa,sa+n,sufCmp);
for(int i=0;i<n-1;i++)
tmp[i+1]=tmp[i]+sufCmp(sa[i],sa[i+1]);
for(int i=0;i<n;i++)
pos[sa[i]]=tmp[i];
if(tmp[n-1]==n-1)
break;
}
}
void buildLCP()
{
for(int i=0,k=0;i<n;++i)
{
if(pos[i]==n-1)
lcp[pos[i]]=0;
else
{
for(int j=sa[pos[i]+1];str[i+k]==str[j+k];)
k++;
lcp[pos[i]]=k;
if(k)
k--;
}
}
}

Related

bucket sort Implementation without using vector,pointer and counting sort

We want to use Bucket sort to sort numbers between 1 to 2001. the count of numbers can be 10E6.
I know the bucket sort algorithm. But the issue is that in this question, we are not permitted to use variable-length array, vector and pointer. (The only pointer related thing allowed is "pass by reference" of the array) The only solution I found is using using counting sort for each bucket, like the code below, so the code is more like counting sort than the bucket sort: (C language)
#include <stdio.h>
int buckets[201][10]={}; int numbers[1000001]={};
void bucket_sort (int a[],int n) {
for (int i =0;i<=n-1;i++)
{
int index = a[i]/10, index2 = a[i]%10;
buckets[index][index2]++;
}
int counter =0;
for (int i =0;i<=200;i++)
{
for (int j =0; j<=9;j++)
{
while (buckets[i][j])
{
a[counter] = i*10+j;
counter++;
buckets[i][j]--;
}
}
} }
int main() {
int n;
scanf("%d",&n);
if (n==0)
{
return 0;
}
for (int i =0;i<=n-1;i++)
{
scanf("%d",&numbers[i]);
numbers[i];
}
bucket_sort(numbers,n);
for (int i =0;i<=n-1 ;i++)
{
printf("%d\n", numbers[i]);
}
return 0; }
I want to know can bucket sort be implemented without variable-length array, vector and pointer and also without counting sort. Probably using Insertion or Bubble sort. Note that it must be a reasonable bucket-sort algorithm. So defining very big buckets like int bucket [201][1000000]; is also an unacceptable approach.
Given that you can't use variable length arrays or pointers, one of which is required for a bucket sort, your best bet is to go with a counting sort. You only have 2000 possible values, so create an array of size 2000 and for each value you find increments the corresponding array element.
void counting_sort(int a[], int n)
{
int count[2002] = { 0 };
int i, j;
for (i=0; i<n; i++) {
count[a[i]]++;
}
for (i=0, j=0; i<n; i++) {
while (!count[j]) {
j++;
}
a[i] = j;
count[j]--;
}
}

Radix sort gives wrong answer by changing just one loop of count subroutine

It seems a very trivial problem but after a lot of thinking I still can't figure it out. I worte these two codes for Radix sort.
Code 1
#include <stdio.h>
#include <malloc.h>
#define BUCKET_SIZE 10
void prin(int* arr,int n)
{
int i;
for(i=0;i<n;i++)
printf("%d ",*(arr+i));
printf("\n");
}
int maxi(int* arr,int n)
{
int i,max=0;
for(i=0;i<n;i++)
{
if(arr[i]>max)
max=arr[i];
}
return max;
}
int* count(int *arr,int n,int k)
{
int* count,i,index;
int* output;
count=(int*)calloc(BUCKET_SIZE-1,sizeof(int));
output=(int*)malloc(n*sizeof(int));
for(i=0;i<n;i++)
{
index=(arr[i]/k)%10;
count[index]++;
}
for(i=0;i<BUCKET_SIZE;i++)
count[i]+=count[i-1];
for(i=n-1;i>=0;i--)
{
index=(arr[i]/k)%10;
output[count[index]-1]=arr[i];
count[index]--;
}
return output;
}
int* radixsort(int* arr,int n)
{
int i,max,k=1;
max=maxi(arr,n);
while(max>0)
{
max/=10;
arr=count(arr,n,k);
k=k*10;
}
return arr;
}
void main()
{
int n,i;
scanf("%d",&n);
int* arr;
arr=(int*)malloc(n*sizeof(int));
for(i=0;i<n;i++)
scanf("%d",(arr+i));
arr=radixsort(arr,n);
prin(arr,n);
}
Now if I change the sort subroutine like below, this code will not sort the given array and I can't figure why this happened, I am still traversing the whole array so and I am still calculating the right index so my elements should be filled in the right place and I should have a sorted array.
Code 2
Only count function last loop changed.
int* count(int *arr,int n,int k)
{
int* count,i,index;
int* output;
count=(int*)calloc(BUCKET_SIZE-1,sizeof(int));
output=(int*)malloc(n*sizeof(int));
for(i=0;i<n;i++)
{
index=(arr[i]/k)%10;
count[index]++;
}
for(i=0;i<BUCKET_SIZE;i++)
count[i]+=count[i-1];
for(i=0;i<n;i++)
{
index=(arr[i]/k)%10;
output[count[index]-1]=arr[i];
count[index]--;
}
return output;
}
When I am doing just counting sort both functions work well. Can someone point me out where I am going wrong with radix sort, or what is the thing I am missing, and how both well in counting sort.
Thanks.
In your final loop in your count function,
when these lines copy the contents of each "bucket",
they write the last element of the output "bucket" first,
followed by the next-to-last, ending with the first element:
output[count[index]-1]=arr[i];
count[index]--;
In the first version of your program, since you visit the elements of the input array starting at the end of the array and working your way back toward the beginning,
you encounter the last element of each bucket first (and therefore put it in the last position in the output bucket), then the next-to-last element
(which you put in the next-to-last position in the output),
and so forth. The first element of each bucket is the last copied
and is copied to the first position in the bucket.
In the second version of your program, you continue to fill in the spaces in each output bucket from back to front, but you read the input from front to back. This has the result of putting the first element of each bucket in the last position within that bucket, and the last element of the bucket in the first position.
That is, each time you run the count function it reverses the order of elements within each bucket.
If you want to copy the input array reading it from front to back,
you need to fill in each output bucket from front to back
by using ++count[index] instead of --count[index].
You also have to start each entry of count[index] at a lower number so that you write to the correct locations.
Aside: your program does a lot more allocation than it needs to, and doesn't free any memory, so you have a potentially massive memory leak.
You might consider passing already-allocated arrays into count instead of always allocating new ones.
Here is a front to back example, that also replaces the original array with a sorted array, freeing the original array. An alternative would be to do a one time allocation of a second working array, radix sort back and forth between original and working arrays, then keep the sorted array, and free the "other" array.
#include <stdio.h>
#include <stdlib.h>
#define BUCKET_SIZE 10
void prin(int* arr, int n)
{
int i;
for(i = 0; i < n; i++)
printf("%d ", arr[i]);
printf("\n");
}
int maxi(int* arr, int n)
{
int i,max = 0;
for(i = 0; i < n; i++)
{
if(arr[i] > max)
max = arr[i];
}
return max;
}
/* replaces array with sorted array, frees original array */
void count(int** parr, int n, int k)
{
int* count, i, index;
int* arr = *parr;
int* output;
int sum, cur;
count=calloc(BUCKET_SIZE, sizeof(int));
output=malloc(n*sizeof(int));
for(i = 0; i < n; i++){
index = (arr[i]/k)%10;
count[index]++;
}
sum = 0;
for(i = 0; i < BUCKET_SIZE; i++){
cur = count[i];
count[i] = sum;
sum += cur;
}
for(i = 0; i < n; i++){
index = (arr[i]/k)%10;
output[count[index]++] = arr[i];
}
free(arr);
free(count);
*parr = output;
}
void radixsort(int** parr,int n)
{
int max,k=1;
max=maxi(*parr,n);
while(max>0)
{
max/=10;
count(parr,n,k);
k=k*10;
}
}
int main()
{
int n,i;
int* arr;
scanf("%d",&n);
arr = malloc(n*sizeof(int));
for(i = 0; i < n; i++)
scanf("%d",&arr[i]);
radixsort(&arr,n);
prin(arr,n);
free(arr);
return 0;
}

How to get the row size of an 2 dim array in C

Hi there I was trying to make a varation of bucket sort. My Programm is easy because I'm a beginner in C but i don't know how print the 2 dim array when one row was realloc after putting in the numbers. I have tried int lenght= sizeof(buckets[0])/sizeof(buckets[0][0]) but then lenght=1. My alternativ solution in the main and it is not very dynamic therefore can anybody tell me how to get the size of the row in a 2 dim array?? If I would add a number in the first bucket the bucket will be too small so i realloc the bucket but how can i print it now?
Sorry for the bad english :)
#include <stdlib.h>
#include <stdio.h>
#define MAXZAHL 50
int compare(const void * a, const void * b){// compare for the qsort
return(*(int*)a - *(int*)b);
}
int** init_bucket(int l,int teile,int teilgr){// to init the buckets
int **bucket;
bucket=(int**)malloc(teile*sizeof(int));
for (int i=0;i<teile;i++){
bucket[i]=(int *)malloc(teilgr*sizeof(int));
}
for (int i=0;i<teile;i++){
for (int j=0;j<teilgr;j++){
bucket[i][j]=0;
printf("%3i",bucket[i][j]);
}
printf("\n");
}
return bucket;
}
void bucket_int(int**bucket,int bucketsize,int nextVal,int n){// to write the array in the buckets
int start=bucketsize;
for (int i=0;i<n;i++){
if(nextVal< start && bucket[0][i]==0){
bucket[0][i]=nextVal;
break;
}
if(nextVal< start && bucket[0][i]!=0&& i==n-1){
bucket[0][n]=(int)realloc(bucket,sizeof(int));
++i;
bucket[0][i]=nextVal;
break;
}
if((nextVal<(start*2)&& nextVal>=(start))&&(bucket[1][i]==0)){
bucket[1][i]=nextVal;
break;
}
if((nextVal<(start*3)&&nextVal>=(start*2))&&(bucket[2][i]==0)){
bucket[2][i]=nextVal;
break;
}
if((nextVal<(start*4)&&nextVal>=(start*3))&&(bucket[3][i]==0)){
bucket[3][i]=nextVal;
break;
}
if((nextVal<(start*5)&&nextVal>=(start*4))&&(bucket[4][i]==0)){
bucket[4][i]=nextVal;
break;
}
}
}
int main(){
int arr[]={3,26,2,10,33,45,20,15,11,9,34,40,19,16,4,5,26,49,1,0,6,8,7,3};
int len=sizeof(arr)/sizeof(int);//
int teile=5;
int teilgr=MAXZAHL/5;
int **buckets;
buckets=init_bucket(len,teile,teilgr);
for (int i=0;i<len;i++){//write the numbers in the buckets
bucket_int(buckets,teilgr,arr[i],len);
}
printf("\n");
//print Buckets
for (int i=0;i<teile;i++){
for (int j=0;j<teilgr;j++){// is there a way to say j<sizeof(buckets)??
printf("%3i",buckets[i][j]);
}
printf("\n");
}
//sorting the buckets with qsort
for (int i=0;i<teile;i++){
for (int j=0;j<teilgr;j++){
qsort(buckets[i],teilgr,sizeof(int**),compare);
}
}
printf("\n");
//print Buckets
for (int i=0;i<teile;i++){
for (int j=0;j<teilgr;j++){
printf("%3i",buckets[i][j]);
}
printf("\n");
}
printf("\n");
int f=0;
//Putting the buckets back to the array
for (int i=0;i<teile;i++){
for (int j=0;j<teilgr;j++){
if (buckets[i][j]!=0){
arr[f]=buckets[i][j];
f++;
}
}
}
printf("The sorted Array:\n");
for (int i=0;i<len-1;i++){
printf("%i ",arr[i]);
}
printf("\n");
return EXIT_SUCCESS;
}
how to get the size of the row
Read it from where you stored it, when you still knew it.
In C you cannot derive from a pointer to how much memory it points.
sizeof() should do the trick, there was a similar question on SO, did you search?

sort a 2d array and find the index and store in a array

I have a 2d array which has same numbers in a row.
I have to find the index of the elements in increasing order and put it in another array.
For example, assume that the input array has the following numbers:
int test[5][2]= { {12,12},{3,3},{14,14},{5,5},{8,8} }.
I have to output in result array with:
result[5] = {1,3,4,0,2}.
Just the index of the elements in increasing order...
I wrote this program, but the result array is always 1.
int main()
{
int N=5;
int result[5];
int test[5][2] = { {12,12},{3,3},{14,14},{5,5},{8,8} };
int i,j;
int smallindex = 0;
for (j=0; j<5; j++)
{
for (i=1; i<5; i++)
{
if (test[i][0] < test[i-1][0])
{
smallindex=i;
}
}
result[j]=smallindex;
}
for (j=0; j<5; j++)
{
printf("%d \t ", result[j]);
}
}
Can anyone tell me what is wrong in this?.
thanks
Make little modification for if statement in your code.
for(i=0;i<5;i++)
{
smallindex=0;
for(j=0;j<5;j++) {
//try to avoid comparing same element by checking i==j
if(test[i][0]<test[j][0])
smallindex++; // check each element with all elements.count how many elements are greater than specific element.increment count and at the end of inner loop assign to result array.
}
result[i]=smallindex;
}

counting number of swaps in insertion sort

In the problem given here, i have to count total no. of swaps required while sorting an array using insertion sort.
here is my approach
#include <stdio.h>
int main()
{
int t, N, swaps, temp, i, j;
scanf("%d", &t);
while(t--){
scanf("%d", &N);
int arr[N];
swaps = 0;
for(i=0; i<N; ++i){
scanf("%d", &temp);
j=i;
while(j>0 && arr[j-1] > temp){
arr[j] = arr[j-1];
++swaps;
--j;
}
arr[j] = temp;
}
printf("%d\n", swaps);
}
return 0;
}
but, this soln is giving time limit exceeded.
How can i make it more fast?
and, what are the other better solutions of this problem?
this is a standard problem named inversion count
This can be solved using mergesort in O(n*lg(n)). Here is my code for counting the inversions
int a[200001];
long long int count;
void Merge(int p,int q,int r)
{
int n1,n2,i,j,k,li,ri;
n1=q-p+1;
n2=r-q;
int l[n1+1],rt[n2+1];
for(i=0;i<n1;i++)
l[i]=a[p+i];
for(i=0;i<n2;i++)
rt[i]=a[q+1+i];
l[n1]=LONG_MAX;
rt[n2]=LONG_MAX;
li=0;ri=0;
for(i=p;i<=r;i++)
{
if(l[li]<=rt[ri])
a[i]=l[li++];
else
{
a[i]=rt[ri++];
count+=n1-li;
}
}
}
void mergesort(int p,int r)
{
if(p<r)
{
int q=(p+r)/2;
mergesort(p,q);
mergesort(q+1,r);
Merge(p,q,r);
}
}
int main()
{
scanf("%d",&n);
for(i=0;i<n;i++)
scanf("%d",&a[i]);
count=0;
mergesort(0,n-1);
printf("%lld\n",count);
}
Basically the problem of inversion count is to find the no. of pairs i and j where j>i such that a[i]>a[j]
To know the idea behind this you should know the basic merge sort algorithm
http://en.wikipedia.org/wiki/Merge_sort
Idea:
Use divide and conquer
divide: size of sequence n to two lists of size n/2
conquer: count recursively two lists
combine: this is a trick part (to do it in linear time)
combine use merge-and-count. Suppose the two lists are A, B. They are already sorted. Produce an output list L from A, B while also counting the number of inversions, (a,b) where a is-in A, b is-in B and a>b.
The idea is similar to "merge" in merge-sort. Merge two sorted lists into one output list, but we also count the inversion.
Everytime a_i is appended to the output, no new inversions are encountered, since a_i is smaller than everything left in list B. If b_j is appended to the output, then it is smaller than all the remaining items in A, we increase the number of count of inversions by the number of elements remaining in A.
This reminds me of a similar problem you may want to look at: http://www.spoj.pl/problems/YODANESS/
In your problem, you can't afford the time to swap everything in case there are many swaps required. (imagine if the input was in reverse order 9,8,7,6.. then you would have to swap everything with everything basically.
I think in your case, each number must be swapped with all the numbers to the left of it that are smaller than it.
I suggest you use a range tree http://en.wikipedia.org/wiki/Range_tree
The great thing about a range tree is each node can know how many nodes are to its left and to its right. You could ask the tree "how many numbers are there greater than 10" very efficiently and that's how many swaps you would have for a 9 say.
The trick is to build the range tree as you move from i=0 to i=N-1. At each point you can query the tree against the ith number before inserting the ith number into the range tree.
good luck!
I did the same code in c++, and it is getting accepted,it is taking time about 4.2 seconds on spoj(http://www.spoj.com/submit/CODESPTB/).
here is the code snippet:
//http://www.spoj.com/problems/CODESPTB/
//mandeep singh #msdeep14
#include<iostream>
using namespace std;
int insertionsort(int arr[], int s)
{
int current,i,j,count=0;
for(i=1;i<s;i++)
{
current=arr[i];
for(j=i-1;j>=0;j--)
{
if(current<arr[j])
{
arr[j+1]=arr[j];
count++;
}
else
break;
}
arr[j+1]=current;
}
return count;
}
int main()
{
int t,n,i,res;
int arr[100000];
cin>>t;
while(t--)
{
cin>>n;
for(i=0;i<n;i++)
{
cin>>arr[i];
}
res=insertionsort(arr,n);
cout<<res<<endl;
}
return 0;
}
#include < stdio.h >
int main() {
int N, swaps, temp[100], i, j;
scanf("%d", & N);
int arr[N];
swaps = 0;
for (i = 0; i < N; i++) {
scanf("%d", & temp[i]);
j = i;
while (j > 0 && arr[j - 1] > temp[i]) {
arr[j] = arr[j - 1];
++swaps;
--j;
}
arr[j] = temp[i];
}
printf("%d", swaps);
return 0;
}

Resources