I've been trying to do a Dynamic Programming assignment for university but I had no success so far.
The problem:
Given a DNA string and a list of mutation locations (for exemple, pieces 0 and 2 are mutations), find the longest palindromic sub-sequence that contains the most mutations on it.
Input: a string S with 0 to 2000 chars; an integer N such that 0<=N<=|S| and N positions (numbers from 0 to |S|) of mutations.
Output: an integer representing the size of the longest palindromic sub-sequence containing the maximum number of mutations.
Examples:
Input: CAGACAT 0
Output: 5
Input: GATTACA 1 0
Output: 1
Input: GATTACA 3 0 4 5
Output: 3
Input: TATACTATA 2 4 8
Output: 7
We have to code it in C, but what I really need are ideas, any language or pseudo-code is good to me.
My code to find the LPS (in C)
int find_lps(char *input)
{
int len = strlen(input), i, cur_len;
int c[len][len];
for (i = 0; i < len; i++)
c[i][i] = 1;
for (cur_len = 1; cur_len < len; cur_len++) {
for (i = 0; i < len - cur_len; i++) {
int j = i + cur_len;
if (input[i] == input[j]) {
c[i][j] = c[i + 1][j - 1] + 2;
} else {
c[i][j] = max(c[i + 1][j], c[i][j - 1]);
}
}
}
return c[0][len - 1];
}
What I tried to do for the mutations:
1- Creating an array of places where the LPS is changed. That doesn't work, and really, I have no idea of what to do.
More details about the problem:
In a situation where you have n palindromic subsequences, both of them with the same size of mutations inside, I need the longest of them. Given that you have n palindromic subsequences with X mutations, (we have M mutations), I need the longest palindromic subsequence of X mutations, considering you don't have a palindromic subsequence with M mutations. If you do, then you should choose the other subsequence, even if it's shorter. So, first criteria: most mutations in a palindromic subsequence. If we have the same amount, then the longest of the subsequences.
Any help is appreciated, thank you.
Lets define C[i][j] to store 2 values:
1- The length of the longest palindromic sub-sequence in the sub-string S(i,j) that contains the most mutations in it, and lets denote it by C[i][j].len
2- The number of mutations in the longest palindromic sub-sequence in the sub-string S(i,j) that contains the most mutations in it, and lets denote it by C[i][j].ms
Then the result of the problem would be C[0][|S|-1].len
Note: m[i] = 1 means the character s[i] is a mutation, otherwise m[i] = 0
Here is the full code written in c++:
#include <iostream>
#include <string>
using namespace std;
string s;
int m[2001];
struct Node {
int ms;//number of mutations
int len;
Node() {
ms = len = 0;
}
Node(int v1,int v2) {
ms = v1;
len = v2;
}
};
Node C[2001][2001];
Node getBestNode(Node n1, Node n2) {
if (n1.ms > n2.ms)
return n1;
if (n1.ms < n2.ms)
return n2;
if (n1.len > n2.len)
return n1;
if (n1.len < n2.len)
return n2;
return n1;
}
void init() {
for (int i = 0; i < 2001; i++) {
m[i] = 0;
for (int j = 0; j < 2001; j++) C[i][j] = Node(0,0);
}
}
void solve() {
int len = s.length();
// initializing the ranges of length = 1
for (int i = 0; i < len; i++)
C[i][i] = Node( m[i],1 );
// initializing the ranges of length = 2
for (int i = 0; i < len - 1; i++)
if (s[i] == s[i + 1])
C[i][i + 1] = Node(m[i] + m[i + 1],2);
else if (m[i] || m[i + 1])
C[i][i + 1] = Node(1,1) ;
// for ranges of length >= 3
for (int cur_len = 3; cur_len <= len; cur_len++)
for (int i = 0; i <= len - cur_len; i++) {
int j = i + cur_len - 1;
C[i][j] = getBestNode(C[i + 1][j], C[i][j-1]);
if (s[i] == s[j]) {
Node nn = Node(
C[i + 1][j - 1].ms + m[i] + m[j] ,
C[i + 1][j - 1].len + 2
);
C[i][j] = getBestNode(C[i][j], nn);
}
}
}
int main() {
int n;
cin >> s >> n;
init();//initializing the arrays with zeros
for (int i = 0; i < n; i++) {
int x; cin >> x;
m[x] = 1;
}
solve();
cout << C[0][s.length()-1].len << endl;
return 0;
}
The function getBestNode() is returning the best of 2 solutions by considering the number of mutations then the length of the sub-sequence.
Note: The code can be shorter, but I made it this way for clarity.
DISCLAIMER:
Described problem looks like a task from a competition. I'm not participating in any of them, I'm not aware about any ongoing competitions, which might involve the problem. If there are any of them, I'll close the question to stay fair!
I have a problem:
given an array A of values and integer K, split A into exactly K non-overlapping contiguous subarrays in such way that difference between a subarray with minimal and a subarray maximum sums is minimal. It is allowed to rotate A by any number in any direction.
Consider an example:
Input: A = [5 1 1 1 3 2], K = 3
Output: [5][1 1 1][3 2], maximum sum = 5, minimum sum = 3, result = 2
I have partially working code (terribly ugly, my bad, but it does not meant to be production quality):
#include <climits>
#include <cstdio>
#include <cstring>
const int max_n = 50;
const int max_k = 20;
int deps[max_n];
int max (int x, int y) {
return x > y ? x : y;
}
int min (int x, int y) {
return x < y ? x : y;
}
int sum (int a[], int start, int end) {
int res = 0;
for (int i = start; i <= end; ++i) res += a[i];
return res;
}
int k_partitioning(int k, int n, int deps[]) {
int res = INT_MAX;
// consider all possible rotations/shifts
for(int offset = 0; offset < n; ++offset) {
for(int l_min = 0; l_min < n; ++l_min) {
for(int r_min = l_min; r_min < n; ++r_min) {
// check minimal sum subarray
int min_sum = sum (deps, l_min, r_min);
int dp[k][n];
for (int s = 0; s < k; ++s) {
for (int q = 0; q < n; ++q) {
dp[s][q] = 0;
}
}
// assuming that current sum is a target sum
dp[0][r_min-l_min] = min_sum;
for(int p = 1; p < k; ++p) {
for(int l_max = 0; l_max < n; ++l_max) {
for(int r_max = 0; r_max < n; ++r_max) {
int max_sum = sum(deps, l_max, r_max);
if (max_sum >= min_sum) dp[p][r_max] = max(dp[p-1][l_max], max_sum);
} // l_maxs
} // r_maxs
} // partitions
// printing dp
// skip incorrect partitioning, when not all K partitions were used
if (dp[k-1][n-1] == 0) continue;
// update difference
res = min (res, dp[k-1][n-1] - min_sum);
} // end min sum seg
} // start min sum seg
//break;
} // cuts
return res;
}
int main(int argc, char* argv[]) {
int k = 0;
scanf("%d", &k);
int n = 0;
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%d", &deps[i]);
}
printf ("%d\n", k_partitioning(k, n, deps));
return 0;
}
The idea is simple: assume that current partition has minimal sum, enumerate all possible maximal partitions, setup dynamic programming for generating maximum sum with minimal value, check for difference. Total complexity: O(K*N^4).
My problem is that it fails some tests and I'm stuck with troubleshooting it. Could someone help me with it?
Failed test, for example:
N = 4, K = 2, A = [6 13 10 2]
UPDATE
This version should fix some previous issues. First, it removes wasteful loop over "offsets" and adds just an array rotation in the end of l_min loop. Second, I've noticed, that dp can't be initialized with 0 - this is minimization task, so it should be initialized with some large value (depends on a problem's constants, max_value here already is out of value domain). Finally, intervals should not overlap anymore - each sum exclude left end of an interval. However, it still does not produce expected results.
#include <climits>
#include <cstdio>
#include <cstring>
const int max_value = 200000;
const int max_n = 50;
const int max_k = 20;
int deps[max_n];
int max (int x, int y) {
return x > y ? x : y;
}
int min (int x, int y) {
return x < y ? x : y;
}
int sum (int a[], int start, int end) {
int res = 0;
for (int i = start; i <= end; ++i) res += a[i];
return res;
}
int k_partitioning(int k, int n, int deps[]) {
int res = max_value;
for(int l_min = 0; l_min < n; ++l_min) {
for(int r_min = l_min; r_min < n; ++r_min) {
int min_sum = sum (deps, l_min+1, r_min);
int dp[k][n];
for (int s = 0; s < k; ++s) {
for (int q = 0; q < n; ++q) {
dp[s][q] = max_value;
}
}
// assuming that current sum is a target sum
dp[0][r_min-l_min] = min_sum;
for(int p = 1; p < k; ++p) {
for(int l_max = 0; l_max < n; ++l_max) {
for(int r_max = l_max; r_max < n; ++r_max) {
int max_sum = sum(deps, l_max+1, r_max);
if (max_sum >= min_sum) dp[p][r_max] = max(dp[p-1][l_max], max_sum);
} // l_maxs
} // r_maxs
} // partitions
// skip incorrect partitioning, when not all K partitions were used
if (dp[k-1][n-1] == max_value) continue;
// update difference
res = min (res, dp[k-1][n-1] - min_sum);
} // end min sum seg
// rotate an array to consider different starting points
int tmp[n];
for (int i = 0; i < n; ++i) {
int new_idx = i + n + 1;
tmp[new_idx % n] = deps[i];
}
for(int i = 0; i < n; ++i) deps[i] = tmp[i];
} // start min sum seg
return res;
}
int main(int argc, char* argv[]) {
int k = 0;
scanf("%d", &k);
int n = 0;
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%d", &deps[i]);
}
printf ("%d\n", k_partitioning(k, n, deps));
return 0;
}
Ok, I think I did it!
The idea is following: we assume that minimum sum interval always starts from 0. Then we start to enumerate maximum sum intervals, starting from the right boundary of the minimal interval. We build DP problem for current max interval to determine a minimum maximal sum. After that you update result and rotate an array by one.
My code is not perfect in a way that I compute current sums each iteration. One can pre-compute them and just index them each time.
This code might have some bugs, but it passes all test that I have.
#include <climits>
#include <cstdio>
#include <cstring>
const int max_value = 200000;
const int max_n = 50;
const int max_k = 20;
int deps[max_n];
int max (int x, int y) {
return x > y ? x : y;
}
int min (int x, int y) {
return x < y ? x : y;
}
int sum (int a[], int start, int end) {
int res = 0;
for (int i = start; i <= end; ++i) res += a[i];
return res;
}
int k_partitioning(int k, int n, int deps[]) {
int res = max_value;
for(int offset = 0; offset < n; ++offset) {
int l_min = 0;
for(int r_min = l_min; r_min < n; ++r_min) {
int min_sum = sum (deps, l_min, r_min);
int dp[k][n];
for (int s = 0; s < k; ++s) {
for (int q = 0; q < n; ++q) {
dp[s][q] = max_value;
}
}
// assuming that current sum is a target sum
dp[0][r_min-l_min] = min_sum;
for(int p = 1; p < k; ++p) {
for(int l_max = r_min; l_max < n; ++l_max) {
for(int r_max = l_max; r_max < n; ++r_max) {
int max_sum = sum(deps, l_max+1, r_max);
if (max_sum >= min_sum) {
dp[p][r_max] = min(dp[p][r_max], max(dp[p-1][l_max], max_sum));
}
} // l_maxs
} // r_maxs
} // partitions
// skip incorrect partitioning, when not all K partitions were used
if (dp[k-1][n-1] == max_value) continue;
// update difference
res = min (res, dp[k-1][n-1] - min_sum);
} // end min sum seg
int tmp[n];
for (int i = 0; i < n; ++i) {
int new_idx = i + n - 1;
tmp[new_idx % n] = deps[i];
}
for(int i = 0; i < n; ++i) deps[i] = tmp[i];
} // start min sum seg
return res;
}
int main(int argc, char* argv[]) {
int k = 0;
scanf("%d", &k);
int n = 0;
scanf("%d", &n);
for (int i = 0; i < n; ++i) {
scanf("%d", &deps[i]);
}
printf ("%d\n", k_partitioning(k, n, deps));
return 0;
}
Solution without rotations:
1) Compute max M and total S of the array - O(n)
2) Let there be a function F(P), which returns True if it is possible to get a Sum P or less with k (>= 0) partitions still remaining.
3) Do a binary search on range(M, S) using F. - O(log(S-M))
4) Logic behind F: Fill a bucket till it's not greater than S/K. Then move onto next bucket. If there are still items remaining and no buckets remaining, then the answer is false - O(n)
Time Complexity = O(n) + O(n) * (log(S-M)) = O(n*log(S-M))
Solution with Rotations:
For all rotations in [0, 1, ... N-1], compute min sum.
Total Time Complexity = O(n) * O(nlog(S-M)) = O(n^2*log(S-M))
Now that you've got your code working, here's an alternative method :)
Consider that for each k, we can pair a sum growing from A[i] to the left (sum A[i-j..i]) with all available intervals recorded for f(k-1, i-j-1) and update them - for each interval, (low, high), if the sum is greater than high, then new_interval = (low, sum) and if the sum is lower than low, then new_interval = (sum, high); otherwise, the interval stays the same. For example,
i: 0 1 2 3 4 5
A: [5 1 1 1 3 2]
k = 3
i = 3, j = 0
The ordered intervals available for f(3-1, 3-0-1) = f(2,2) are:
(2,5), (1,6) // These were the sums, (A[1..2], A[0]) and (A[2], A[0..1])
Sum = A[3..3-0] = 1
Update intervals: (2,5) -> (1,5)
(1,6) -> (1,6) no change
Now, we can make this iteration much more efficient by recognizing and pruning intervals during the previous k round.
Watch:
A: [5 1 1 1 3 2]
K = 1:
N = 0..5; Intervals: (5,5), (6,6), (7,7), (8,8), (11,11), (13,13)
K = 2:
N = 0: Intervals: N/A
N = 1: Intervals: (1,5)
N = 2: (1,6), (2,5)
Prune: remove (1,6) since any sum <= 1 would be better paired with (2,5)
and any sum >= 6 would be better paired with (2,5)
N = 3: (1,7), (2,6), (3,5)
Prune: remove (2,6) and (1,7)
N = 4: (3,8), (4,7), (5,6), (5,6)
Prune: remove (3,8) and (4,7)
N = 5: (2,11), (5,8), (6,7)
Prune: remove (2,11) and (5,8)
For k = 2, we are now left with the following pruned record:
{
k: 2,
n: {
1: (1,5),
2: (2,5),
3: (3,5),
4: (5,6),
5: (6,7)
}
}
We've cut down the iteration of k = 3 from a list of n choose 2 possible splits to n relevant splits!
The general algorithm applied to k = 3:
for k' = 1 to k
for sum A[i-j..i], for i <- [k'-1..n], j <- [0..i-k'+1]:
for interval in record[k'-1][i-j-1]: // records are for [k'][n']
update interval
prune intervals in k'
k' = 3
i = 2
sum = 1, record[2][1] = (1,5) -> no change
i = 3
// sums are accumulating right to left starting from A[i]
sum = 1, record[2][2] = (2,5) -> (1,5)
sum = 2, record[2][1] = (1,5) -> no change
i = 4
sum = 3, record[2][3] = (3,5) -> no change
sum = 4, record[2][2] = (2,5) -> no change
sum = 5, record[2][1] = (1,5) -> no change
i = 5
sum = 2, record[2][4] = (5,6) -> (2,6)
sum = 5, record[2][3] = (3,5) -> no change
sum = 6, record[2][2] = (2,5) -> (2,6)
sum = 7, record[2][1] = (1,5) -> (1,7)
The answer is 5 paired with record[2][3] = (3,5), yielding the updated interval, (3,5). I'll leave the pruning logic for the reader to work out. If we wanted to continue, here's the pruned list for k = 3
{
k: 3
n: {
2: (1,5),
3: (1,5),
4: (3,5),
5: (3,5)
}
}
I finally solved this question : Split array into three subarrays, It may help you.
here I'm splitting a array into three sub-array with java.
package com.array2;
public class SplitArray {
public static void main(String[] args) {
// TODO Auto-generated method stub
int a[] = { 1, 2, 3, 5, 4, 6, 9, 8, 15, 52, 4, 6, 89 };
splitArray(a);
}
private static void splitArray(int[] a) {
// TODO Auto-generated method stub
int a_l = a.length;
int[] a1 = new int[a.length / 3];
int[] a2 = new int[a.length / 3];
int[] a3 = new int[a.length / 3 + a.length % 3];
for (int i = 0; i < a3.length; i++) {
if (i < a1.length) {
a1[i] = a[i];
a2[i] = a[a1.length + i];
a3[i] = a[a1.length + a2.length + i];
} else {
a3[i] = a[a1.length + a2.length + i];
}
}
}
}
I have this homework:
Given an array consisting of N integers, you are required to print the minimum contiguous sum that can be obtained by performing at most K swaps. During a swap any 2 elements of the given array could be swapped.
I tried this
int currentSum = 0;
int currentMin = 0;
for (int j = 0; j < input.Length; j++)
{
if (input[j] >= 0)
continue;
currentSum += input[j];
if (currentMin > currentSum)
currentMin = currentSum;
}
It will give the minimum sum without any swappings, but how can I improve in no more than K swaps?
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.Iterator;
import java.util.PriorityQueue;
import java.util.Scanner;
import java.util.ArrayList;
import java.util.List;
class TestClass {
static Scanner scanner;
public static void main(String args[] ) throws Exception {
scanner=new Scanner(System.in);
int T=scanner.nextInt();
while(T>0){
int N=scanner.nextInt();
int K=scanner.nextInt();
int[] array=new int[N];
for(int i=0;i<array.length;i++)
{
array[i]=scanner.nextInt();
}
System.out.println(findingMinimumSumSubarray(array, K));
T--;
}
}
public static int findingMinimumSumSubarray(int[] values, int k) {
int N = values.length;
int res = values[0];
for (int L = 0; L < N; L++) {
for (int R = L; R < N; R++) {
List<Integer> A= new ArrayList<Integer>();
List<Integer> B = new ArrayList<Integer>();
int ashu = 0;
for (int i = 0; i < N; i++) {
if (i >= L && i <= R) {
A.add(values[i]);
ashu += values[i];
} else {
B.add(values[i]);
}
}
Collections.sort(A);
Collections.sort(B);
Collections.reverse(B);
res = Math.min(res, ashu);
for (int t = 1; t <= k; t++) {
if (t > A.size() || t > B.size()) break;
ashu -= A.get(A.size() - t);
ashu += B.get(B.size() - t);
res = Math.min(res, ashu);
}
}
}
return res;
}
}
You solution is not correct even without swap.
Test: [-1, 2, -1]. Your answer on this test is -2. Correct answer: -1
I hope that my solution is not best and there is better approach.
Simple O(N^3) complexity solution.
Let's assume that our final minimum contiguous segment will be [L, R] for some 0 <= L <= R < N. Now we have two multiset: A and B. A - multiset with "inner" numbers (numbers that are inside range [L, R]) and B - multiset with "outer" numbers (numbers that are outside of range [L, R]). Out goal is to minimize sum of numbers in A - sum(A). Making swap inside A or B is meaningful, because it will not affect to sum(A). We can swap one element from A with other element in B. We have no more than K swaps, and it means that no more than K elements in A will be swapped with no more than K elements in B. To reach minimum value of sum(A) we will take some maximum elements in A and swap them with minimum elements in B. For example:
A = {-3, -3, -1, 2}; B = {-4, 1, 3, 6}; K = 2;
We can make 0 swaps, A = {-3, -3, -1, 2}; B = {-4, 1, 3, 6}; then sum(A) == -3
We can make 1 swaps, A = {-3, -3, -1, -4}; B = {2, 1, 3, 6}; then sum(A) == -11
We can make 2 swaps, A = {-3, -3, 1, -4}; B = {2, -1, 3, 6}; then sum(A) == -9
Answer is sum(A) == -11
For range [L, R] we can get minimum possible sum. To obtain answer for our initial problem we will iterate over all possible ranges [L, R]. 0 <= L <= R < N
Naive implementation. O(N^3logn) complexity.
int get_minimum_contiguous_sum(vector <int> values, int k) {
int N = values.size();
int ans = values[0]; // initializing with any possible sums
for (int L = 0; L < N; L++) {
for (int R = L; R < N; R++) {
vector <int> A, B; // our "inner" and "outer" sets
int suma = 0; // will store initial sum of elements in A
for (int i = 0; i < N; i++) {
if (i >= L && i <= R) {
A.push_back(values[i]);
suma += values[i];
} else {
B.push_back(values[i]);
}
}
// Sorting set A in non-descending order
sort(A.begin(), A.end());
// Sorting set B in non-increasing order
sort(B.begin(), B.end());
reverse(B.begin(), B.end());
ans = min(ans, suma); // Updating answer with initial state
// Iterating number of swaps that we will make
for (int t = 1; t <= k; t++) {
// if some of two sets contain less than t elements
// then we cannot provide this number of swaps
if (t > A.size() || t > B.size()) break;
// Swapping t-th maximum of A with t-th minimum of B
// It means that t-th maximum of A subtracts from suma
// and t-th minimum of B added to suma
suma -= A[A.size() - t];
suma += B[B.size() - t];
ans = min(ans, suma);
}
}
}
return ans;
}
Optimization
Let's assume that for the range [L, R] we already know sorted set A and reverse sorted set B. When we will compute for the range [L, R + 1] exactly one element will be deleted from B and inserted in A(this number is exactly values[R+1]). C++ has containers set and multiset that can allow us to insert and remove in O(log) time and iterate in O(n) time. Other programming languages also has same containers (in java it is TreeSet/SortedSet). So when we move R to R+1, we will make some simple queries to multiset(insert/remove).
O(N^3) solution.
int get_minimum_contiguous_sum(vector <int> values, int k) {
int N = values.size();
int ans = values[0]; // initializing with any possible sums
for (int L = 0; L < N; L++) {
// "inner" multiset
// Stores in non-increasing order to iterate from beginning
multiset<int, greater<int> > A;
// "outer" multiset
// multiset by defaul stres in non-decreasing order
multiset<int> B;
// Initially all elements of array in B
for (int i = 0; i < N; i++) {
B.insert(values[i]);
}
int suma = 0; // Empty set has sum=0
for (int R = L; R < N; R++) {// Iterate over all possible R
// Removing element from B and inserting to A
B.erase(B.find(values[R]));
A.insert(values[R]);
suma += values[R];
ans = min(ans, suma);
__typeof(A.begin()) it_a = A.begin();
__typeof(B.begin()) it_b = B.begin();
int cur = suma;
for (int i = 1; i <= k; i++) {
if (it_a != A.end() && it_b != B.end())
break;
cur -= *it_a;
cur += *it_b;
ans = min(ans, cur);
it_a++;
it_b++;
}
}
}
return ans;
}