The question at hand is:
Q8. Given an unsorted array A[]. The task is to print all unique pairs in the unsorted array with equal sum. Consider the Input: A[] = {6, 4, 12, 10, 22, 54, 32, 42, 21, 11}
Explain the approach of solving the above problem, and write the code in any one programming language C/C++/Python/Java. What is the time complexity of the above problem?
Here is my solution to the above problem (in C) :
#include <stdio.h>
int main(){
int arr[]={6,4,12,10,22,54,32,42,21,11};
int len=sizeof(arr)/sizeof(arr[0]);
for(int i=0;i<len;i++)
for(int j=i+1;j<len;j++)
for(int k=i+1;k<len;k++)
for(int l=k+1;l<len;l++)
if(arr[i]+arr[j]==arr[l]+arr[k])
printf("(%d,%d),(%d,%d)\n",arr[i],arr[j],arr[k],arr[l]);
return 0;
}
My logic is to take one element at a time, and take its sum with every other element, and for each such iteration, compare the sum of two other unique pair of elements to it.
For example, when i=0, j=3, arr[i]+arr[j]=16. When k=1,l=2, arr[k]+arr[l]=16. Since the pairs are unique (6,10) and (4,12) and their sum is equal, I print them.
Note that the pairs are assumed to be unordered pairs so that (a,b) is the same as (b,a) and so we don't need to repeat that, as they have to be unique.
My question is : I know that my code is almost O(n^4). How can I improve/optimise it further?
FIrst you precompute the sum of each pair and keep the result in a matrix PAIRSUM.
PAIRSUM(0, 0) = 12
PAIRSUM(0, 1) = 10 a s o
Next, you loop over the PAIRSUM and see where 2 entries are similar.
So you reduced the big problem to a smaller one, in which you check the equality of 2 numbers, not of 2 sums of numbers.
For this you keep a vector PAIR in which at index X you keep the entries in PAIRSUM where the sum was X.
For example, PAIR(10) = { {0, 1} }.
You can also consider in PAIRSUM only the matrix above the diagonal, so for which the indexes (i,j) have i>j.
It would be easier in C++, Python, or Java because those languages provide high level containers. In Python, you could use a defaultdict(list) where the key would be the sums and the value a list of pairs giving that sum.
Then you only have to process unique pairs (N2 / 2)
result = collections.defaultdict(list)
for i, a in enumerate(A):
for b in A[i+1:]:
result[a+b].append((a,b))
It will be slightly more complex in C because you do not have the high-level direct access dict. If you can waste some memory and only have small numbers like here, you can say that the highest sum will be less than twice the biggest number in the input array, and directly allocate an array of that size. That way you ensure direct access from a sum. From there, you just use a linked list of pairs and that is all. As a bonus you even get a sorted list of sums.
I you cannot assume that numbers are small you will have to build a direct access container. A hash type container using N*N/2 as size (N being the length of A) and sum%size as hash function should be enough.
For completeness, here is a possible C code not doing the small numbers assumption (this code displays all pairs not only the ones with duplicated sums):
#include <stdio.h>
#include <stdlib.h>
// a node in a linked list of pairs
struct pair_node {
int first;
int second;
struct pair_node *next;
};
// a slot for a hash type containers of pairs indexed by their sum
struct slot {
int number;
struct pair_node *nodes;
struct slot *next;
};
// utility function to find (or create) a slot for a sum
struct slot* find_slot(struct slot **slots, int size, int sum) {
struct slot *slt = slots[sum%size];
while (slt != NULL) {
if (slt->number == sum) {
return slt;
}
slt = slt->next;
}
slt = malloc(sizeof(struct slot));
slt->number = sum;
slt->nodes = NULL;
slt->next = slots[sum%size];
slots[sum%size] = slt;
return slt;
}
int main() {
int A[] = {6,4,12,10,22,54,32,42,21,11}; // the array of numbers
int N = sizeof(A) / sizeof(A[0]);
int arr_size = N * N / 2; // size of the hash table of pairs
struct slot** result = malloc(arr_size * sizeof(struct slot *));
for (int i=0; i<arr_size; i++) {
result[i] = NULL;
}
// process unique pairs
for (int i=0; i<N-1; i++) {
for (int j=i+1; j<N; j++) {
int sum = A[i] + A[j];
// allocate and initialize a node
struct pair_node *node = malloc(sizeof(*node));
node->first = A[i];
node->second = A[j];
// store the node in the hash container
struct slot *slt = find_slot(result, arr_size, sum);
node->next = slt->nodes;
slt->nodes = node;
}
}
// display the result
for (int i=0; i<arr_size; i++) {
for (struct slot* slt=result[i]; slt != NULL;) {
printf("%d :", slt->number);
struct pair_node *node = slt->nodes;
while(node != NULL) {
printf(" (%d,%d)", node->first, node->second);
node = node->next;
free(node); // free what has been allocated
}
printf("\n");
struct slot *old = slt;
slt = slt->next;
free(old);
}
}
free(result);
return EXIT_SUCCESS;
}
C code for calculating all the sums and storing the sums with indexes inside an array of structures. Then we sort the structures and print adjacent structure elements with the same sum.
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <errno.h>
#include <assert.h>
// for debugging
#define debug(...) ((void)0) // printf(__VA_ARGS__)
// two indexes and a sum
struct is_s {
// one index inside the array
size_t i;
// the other index also inside the array
size_t j;
// no idea, must be random
int sum;
};
// used for qsoring the struct is_s
static int is_qsort_compare_sum(const void *a0, const void *b0) {
const struct is_s * const a = a0;
const struct is_s * const b = b0;
return a->sum - b->sum;
}
int unique_pairs(const size_t len, const int arr[len]) {
if (len <= 1) return 0;
// The number of unsorted combinations must be n!/(2!*(n-2)!)
const size_t islen = len * (len - 1) / 2; // #MOehm
debug("%zu\n", islen);
struct is_s * const is = malloc(islen * sizeof(*is));
if (is == NULL) {
return -ENOMEM;
}
size_t isidx = 0;
for (size_t i = 0; i < len; ++i) {
for (size_t j = i + 1; j < len; ++j) {
assert(isidx < islen); // just for safety
struct is_s * const p = &is[isidx++];
p->i = i;
p->j = j;
p->sum = arr[i] + arr[j];
debug("[%zu]=[%zu]=%d [%zu]=%d %d\n", isidx, p->i, arr[p->i], p->j, arr[p->j], p->sum);
}
}
qsort(is, islen, sizeof(*is), is_qsort_compare_sum);
for (size_t i = 0; i < islen - 1; ++i) {
debug("([%zu]=%d,[%zu]=%d)%d = ([%zu]=%d,[%zu]=%d)%d\n",
is[i].i, arr[is[i].i], is[i].j, arr[is[i].j], is[i].sum,
is[i+1].i, arr[is[i+1].i], is[i+1].j, arr[is[i+1].j], is[i+1].sum
);
if (is[i].sum == is[i + 1].sum) {
printf("(%d,%d),(%d,%d) = %d\n",
arr[is[i].i], arr[is[i].j],
arr[is[i+1].i], arr[is[i+1].j], is[i].sum);
}
}
free(is);
return 0;
}
int main(void) {
const int arr[] = {6,4,12,10,22,54,32,42,21,11};
return unique_pairs(sizeof(arr)/sizeof(*arr), arr);
}
The result I get is:
(6,10),(4,12) = 16
(10,22),(21,11) = 32
(12,21),(22,11) = 33
(22,21),(32,11) = 43
(32,21),(42,11) = 53
(12,42),(22,32) = 54
(10,54),(22,42) = 64
As I wonder if this is correct, as #Bathsheba noted, I think the worst case is O(n*n).
It can be done in O(N^2 * log(N^2) * M), where M is the maximum number of pairs(i, j) that have the same sum, so in worst case it would be O(N^3 * log(N)).
Lets iterate for every pair 0 <= i,j < N in order (increasing or decreasing), we have to save the sum of all the previous pairs(i, j) (to know which previous pairs have a certain sum) this can be done with a map with a integer key and a vector of pairs for the mapped value; then for every pair(i, j) you search in the map for it's sum (key = A[i] + A[j]), then al the pairs store in map[sum] are answers to this pair(i, j).
You don't have to worry about for the following pairs to (i, j) that have the sum because the following pairs when they be processed they will count it.
Here is a Java solution:
import java.util.*;
class Duplicate {
public static void main(String[] args) {
int [] a = {5,3,1,4,5,6,3,7,0,10,6,4,9,1};
List<Integer> li = new ArrayList<Integer>();
int p1=0, p2=0;
for(int i=0; i<a.length;i++) {
for(int j=i+1; j<a.length;j++){
if(a[i]+a[j] == 10) {
p1 = a[i];
p2 = a[j];
if(!li.contains(Math.abs(p2-p1))) {
li.add(Math.abs(p2-p1));
System.out.println("Pairs" + ":" + p1 + "," + p2);
}
}
p1=0;
p2=0;
}
}
}
}
Here is the program which i am working on.
void primMST(int graph[N][N])
{
int parent[N] ; // Array to store constructed MST
int key[N]; // Key values used to pick minimum weight edge in cut
bool mstSet[N]; // To represent set of vertices not yet included in MST
int i, count, v;
// Initialize all keys as INFINITE
for (i = 0; i < N; i++)
key[i] = INT_MAX, mstSet[i] = false;
// Always include first 1st vertex in MST.
key[0] = 0; // Make key 0 so that this vertex is picked as first vertex
parent[0] = -1; // First node is always root of MST
// The MST will have N vertices
for (count = 0; count < N-1; count++)
{
// Pick thd minimum key vertex from the set of vertices
// not yet included in MST
int u = minKey(key, mstSet);
// Add the picked vertex to the MST Set
mstSet[u] = true;
// Update key value and parent index of the adjacent vertices of
// the picked vertex. Consider only those vertices which are not yet
// included in MST
for (v = 0; v < N; v++)
// graph[u][v] is non zero only for adjacent vertices of m
// mstSet[v] is false for vertices not yet included in MST
// Update the key only if graph[u][v] is smaller than key[v]
if (graph[u][v] && mstSet[v] == false && graph[u][v] < key[v])
parent[v] = u, key[v] = graph[u][v];
}
// print the constructed MST
printMST(parent, N, graph);
}
int main()
{
int i, j;
int** graph = (int**) malloc(sizeof(int*)*N);
for(i=0;i<N;i++)
graph[i] = (int*) malloc(sizeof(int)*N);
FILE *fp;
fp = fopen("AdjacencyMatrix_of_Graph_G_N.txt","r");
char c;
for(i=0;i<N;i++) {
for(j = 0; j < N; j++) {
fscanf(fp, "%c ", &c);
graph[i][j] = c-'0';
}
}
for(i=0;i<N;i++) {
for(j=0;j<N;j++) {
printf("%d ",graph[i][j]);
}
printf("\n");
}
primMST(graph);
fclose(fp);
return 0;
}
And i am getting the following warnings.
C:\TURBOC3\BIN\PROJECT\MST.c:125:10: warning: passing argument 1 of 'primMST' from incompatible pointer type
primMST(graph);
^
C:\TURBOC3\BIN\PROJECT\MST.c:59:6: note: expected 'int (*)[30]' but argument is of type 'int **'
void primMST(int graph[N][N])
^
Compilation results...
--------
- Errors: 0
- Warnings: 2
- Output Filename: C:\TURBOC3\BIN\PROJECT\MST.exe
- Output Size: 132.0068359375 KiB
- Compilation Time: 0.48s
as people have commented here, and int ** is not the same as int[][], and that is why the compiler complains.
also i can see that you are dynamically allocating a size of N array, this has 2 drawbacks:
1. malloc is a pretty expensive operation and calling it N times seem unnecessary.
2. the result will not be a continuous block of memory.
assuming that N is a #define, i would suggest allocating graph[N][N] on the stack in main() rather than dynamically allocating it.
I have implemented my graph in adjacency list. How can I estimate one vertex reachability from another when user provides indexes?
int isReachable(int nodes, int graph[nodes][nodes], int src, int dest)
Checking for direct neighbors is easy, but I struggle with implementing algorithm as whole.
Code from: http://www.geeksforgeeks.org/transitive-closure-of-a-graph/
int reach[V][V], i, j, k;
/* Initialize the solution matrix same as input graph matrix. Or
we can say the initial values of shortest distances are based
on shortest paths considering no intermediate vertex. */
for (i = 0; i < V; i++)
for (j = 0; j < V; j++)
reach[i][j] = graph[i][j];
/* Add all vertices one by one to the set of intermediate vertices.
---> Before start of a iteration, we have reachability values for
all pairs of vertices such that the reachability values
consider only the vertices in set {0, 1, 2, .. k-1} as
intermediate vertices.
----> After the end of a iteration, vertex no. k is added to the
set of intermediate vertices and the set becomes {0, 1, .. k} */
for (k = 0; k < V; k++)
{
// Pick all vertices as source one by one
for (i = 0; i < V; i++)
{
// Pick all vertices as destination for the
// above picked source
for (j = 0; j < V; j++)
{
// If vertex k is on a path from i to j,
// then make sure that the value of reach[i][j] is 1
reach[i][j] = reach[i][j] || (reach[i][k] && reach[k][j]);
}
}
}
// Print the shortest distance matrix
printSolution(reach);
}
SOLUTION the solution is unique to my code -- I placed srand(time(NULL)); inside the loop when it should've been placed outside
I'm trying to count the number of comparisons in a quick sort algorithm. I had a recursive version working fine, but it kept seg faulting because I was using large array sizes -- running out of stack space.
So now I've resulted to an iterative approach, and it works. That is, except for my counter for the number of comparisons.
It's returning intermittent results such as...
unsorted: [9][8][7][6][5][4][3][2][1][0]
sorted: [0][1][2][3][4][5][6][7][8][9]
Numer of comparisons: 22
unsorted: [9][8][7][6][5][4][3][2][1][0]
sorted: [0][1][2][3][4][5][6][7][8][9]
Numer of comparisons: 19749794
unsorted: [9][8][7][6][5][4][3][2][1][0]
sorted: [0][1][2][3][4][5][6][7][8][9]
Numer of comparisons: 6088231
my code for the iterative quick sort is...
#include <time.h>
#define BUFLEN 6400
extern int buf[BUFLEN];
extern int quick_count; //comparison count
struct stack {
int stk[BUFLEN];
int top;
};
struct stack s;
void push(int x);
int pop();
void iterative_quick_sort (int buf[], int n) {
int left_ptr, right_ptr, pivot_index, pivot, temp, l, r;
if (n < 2) //case the partitioning has reached the atomic element
return;
r = n - 1;
l = 0;
s.top = -1;
loop: do{
srand(time(NULL));
if ((r - l) == 0)
pivot_index = 1;
else {
pivot_index = rand() % (r - l);
pivot_index += l;
}
pivot = buf[pivot_index]; //pivot holds the value of the pivot element
left_ptr = l;
right_ptr = r;
if ((r - l) != 0 || (r - l) != 1){
while (1) {
while (buf[left_ptr] < pivot){ //loop and increment left_ptr until an element on the left side is larger than the pivot
left_ptr++;
} //now left_ptr holds the index of the value that needs to be swapped with an element from the right side
while (pivot < buf[right_ptr]){ //loop and increment right_ptr until an element on the right side is smaller than the pivot
right_ptr--;
} //now right_ptr holds the index of the value that needs to be swapped with an element from the left side
quick_count++;
if (left_ptr >= right_ptr)
break; //once the pivots reach or overlap each other, break the loop
//perform swap with temporary variable temp
temp = buf[left_ptr];
buf[left_ptr] = buf[right_ptr];
buf[right_ptr] = temp;
}
}
if (l == (n - 2))
break;
else if ((r - l) >= 2){
//goto loop with left side values
push(r);
r = pivot_index + 1;
goto loop;
}
else {
//goto loop with right side values
l = r;
r = pop();
goto loop;
}
}while(1);
}
//cite http://www.sanfoundry.com/c-program-stack-implementation/
void push (int x){
s.top = s.top + 1;
s.stk[s.top] = x;
}
int pop(){
int x = s.stk[s.top];
s.top = s.top - 1;
return x;
}
per request, I've added the function that calls quick sort (Note: quick_count is initialized to zero as a global variable -- used as an extern)
int unsorted_quick[] = {9,8,7,6,5,4,3,2,1,0}; //n = 10
//print unsorted_quick
printf("\nSecond, we sort the following array by using the quick sort algorithm\n");
for (i = 0; i < 10; i++){
printf("[%d]", unsorted_quick[i]);
}
printf("\n");
//fill buf with the unsorted quick array
for (i = 0; i < 10; i++){
buf[i] = unsorted_quick[i];
}
iterative_quick_sort(buf, 10); //call quick_sort()
//print sorted
for (i = 0; i < 10; i++){
printf("[%d]", buf[i]);
}
printf("\nNumber of comparisons: %d\n", quick_count); //print count
You are calling srand(time(NULL)) inside the loop that choose the random pivot. This function must be called once to initialise the state of the random number generator.
The generator needs a starting seed which is set by calling srand(). Then, given the seed, each subsequent call to rand() will give you a random number in a reproducible sequence.
Starting from the same seed you will get the same random sequence.
The problem is that you set the seed in the loop and the seed is the same number so you will always get the same "random" value. This happens because time(NULL) is taken from current time in seconds which means that the random number it's the same in the same second.
You must put it before the loop: do {
Here there is a nice explanation of what is happening: Problems when calling srand(time(NULL)) inside rollDice function
And also here: srand() — why call it only once?
I'm in the process of creating a maze simulation of a mouse running through a maze. Dijkstra's algorithm is great and all but isn't particularly effected when cats are involved, which is why I'm trying to modify my existing Dijkstra implementation to an A* search with a heuristic for avoiding the cats which move throughout the maze.
The problem I'm having while I look through some pseudocode is I am unsure of what structures are equivalent or what will I need to introduce to get this working. Can anyone provide any tips or nudges in the right direction?
struct path_node *shortestPath(float A[GsizeSqr][GsizeSqr], int xi, int yi, int xf, int yf)
{
/*
Solves for the shortest path between grid point (xi,yi) and (xf,yf)
on the graph encoded by A using Dijkstra's shortest path method.
The shortest path is returned as a linked list of nodes to be visited.
Keep track of visited nodes, and the predecessor
for each node that has been explored while computing the shortest path.*/
if (xi<0||xi>=Gsize&&yi<0&&yi>=Gsize||xf<0||xf>=Gsize||yf<0||yf>=Gsize)
{
fprintf(stderr,"shortestPath(): Endpoint(s) outside of the graph!\n");
return(NULL);
}
int i, j, pCount, findN, row, col, icnt, stNode, finNode, xcnt, ycnt;
finNode = yf * ceil(sqrt(GsizeSqr)) + xf; //index of start node given its row and col value
stNode = yi * ceil(sqrt(GsizeSqr)) + xi; //index of finish node given its row and col value
int p[GsizeSqr]; //predecessors
int d[GsizeSqr]; //distance from source
int flags[GsizeSqr]; //(0, 1) for unvisited, visited)
int g_score[GsizeSqr];
int f_score[GsizeSqr];
PriorityQueue Q; //Initialize priority queue that stores (priority, key) values
Q = init_heap(GsizeSqr);
path_node *start; //Maintain a pointer to the starting node
start = newPathNode(xi, yi);
start->next = NULL;
//Initialize p and d with infinity and NULL values (note: -1 means null and 1000000 means inf)
for(i=0; i < GsizeSqr; i++){
p[i] = -1;
d[i] = 10000000;
flags[i] = 0;
}
for(i=0; i < GsizeSqr; i++){
node in;
in = create_node(10000000, i);
enqueue(Q, in);
}
//(Note: PQ uses 0 as a sentinel node to make calculating left, right, and parents easier, elements begin at 1)
decrease_priority(Q, stNode+1, 0); //setting start node in PQ.
d[stNode] = 0;
g_score[stNode] = 0;
//For my heuristic, I'm thinking just using manhattan distances between mouse and cat agents
f_score[stNode] = g_score[stNode] + heuristic(xi, yi, xf, yf);
while(Q->heap_size != 1){ //while Q not empty
node u;
u = dequeue(Q);
flags[u.key] = 1;
//For each adjacent node A[u.key][i]
for(i=0; i < GsizeSqr; i++){
if(A[u.key][i] != 0){
findN = find_node(Q, i);
if(flags[i] == 0){ //If it is unvisited and new path distance is shorter
if(findN != 0 && (d[i] >= A[u.key][i] + d[u.key])){ //reset values and update PQ and mark visited
d[i] = A[u.key][i] + d[u.key];
p[i] = u.key;
flags[i] = 1;
decrease_priority(Q, findN, d[i]);
}
}
}
}
}
// Begin selectively filling our LL with values from p[]
icnt = finNode;
appendLL(start, xf, yf);
while(icnt != stNode){
icnt = p[icnt];
xcnt = icnt % (int)ceil(sqrt(GsizeSqr));
ycnt = icnt / (int)ceil(sqrt(GsizeSqr));
appendLL(start, xcnt, ycnt);
}
clean_heap(Q);
return reverseLL(start);
}
You possibly already know this, but the only theoretical difference between A* and Dijkstra's algorithm in terms of best-first search is the cost function f(n). Dijkstra's algorithm is f(n) = g(n) whilst A* is f(n) = g(n) + h(n). Read AIMA for details.
In terms of your code, it currently stores g(n) = A[u.key][i] + d[u.key] in d[i], so you need to change it store g(n) + h(n). You don't need those new g_score and f_score variables, just add the heuristic to the end of that line and the initialization of d[stNode].