I was trying to solve the problem where if I was given an array of int which allows duplicate,
I need to find the count of how many permutation of this array there are such that each adjacent pair of integer in the array whose sum is a perfect square. I have derived the dp solution, but it was wrong, I looked at the solution, it was very similar to mine but with slight difference, can someone please look at it and tell me why mines won't work but the sample answer does?
My train of thought is, if I have g[i][j] == 1 telling me i and j forms a pair whose sum is a perfect square, 0 otherwise. And I have a dp[s][j] tells me if my node visited state is s, whose binary representation tells me all the node index that I have visited, and it ends in node with index j, I need DP for hamitonian path in a graph to calculate all possible ways to reach state s that ends in node index j, then the answer will be the sum of state s where all node is visited and it ends in node from 0 to n - 1. In terms of avoiding duplicate, I sort the input array of number and if in same layer of search, if we have nums[i - 1] == nums[i] but we have not visited nums[i], it means we just back out from an earlier dfs that visited the same number, we will not do that again.
I will paste the code here
The below is my answer, which will fail if the array contains duplicate
int n = nums.length;
int[][] dp = new int[1 << n][n];
int[][] g = new int[n][n];
Arrays.sort(nums);
for(int i = 0; i < n; i++) {
for(int j = 0; j < n; j++) {
if(i != j && (Math.ceil(Math.sqrt(nums[i] + nums[j])) ==
Math.sqrt(nums[i] + nums[j]))) {
g[i][j] = 1;
}
}
}
for(int s = 0; s < (1 << n); s++) {
for(int j = 0; j < n; j++) {
if(s == (1 << j)) {
dp[1 << j][j] = (j == 0 || nums[j - 1] != nums[j]) ? 1 : 0;
continue;
}
if((s & (1 << j)) == 0) continue;
for(int i = 0; i < n; i++) {
if(g[i][j] == 0) continue;
if((s & (1 << i)) == 0) continue;
if(i > 0 && nums[i - 1] == nums[i] && ((s & (1 << (i - 1))) == 0)) continue;
dp[s][j] += dp[s & ~(1 << j)][i];
}
}
}
int res = 0;
int finish = (1 << n) - 1;
for(int i = 0; i < n; i++) {
res += dp[finish][i];
}
return res;
The below is the sample answer:
int n = nums.length;
boolean[][] g = new boolean[n][n];
int[][] dp = new int[1 << n][n];
Arrays.sort(nums);
for(int i = 0; i < n; i++) {
for(int j = 0; j < n; j++) {
if(Math.floor(Math.sqrt(nums[i] + nums[j])) * Math.floor(Math.sqrt(nums[i] + nums[j])) == nums[i] + nums[j]) {
g[i][j] = g[j][i] = true;
}
}
}
for(int i = 0; i < nums.length; i++) {
if(i == 0 || (i > 0 && nums[i - 1] != nums[i]))
dp[1 << i][i] = 1;
}
for(int s = 0; s < (1 << n); s++) {
for(int j = 0; j < n; j++) {
if((s & (1 << j)) == 0) continue;
for(int i = 0; i < n; i++) {
if(g[i][j]) {
if((s & (1 << i)) != 0) continue;
if(i > 0 && nums[i] == nums[i - 1] && ((s & (1 << (i - 1))) == 0)) continue;
dp[s | (1 << i)][i] += dp[s][j];
}
}
}
}
int ans = 0;
for(int l = 0; l < n; l++) {
ans += dp[(1 << n) - 1][l];
}
return ans;
Related
I have a function that checks all the rows of a sudoku board and outputs and exits if invalid.
(a valid row has 1-9 on a 9 square row)
I've been staring at my logic for 30 minutes and can't see why a board that I know is valid keeps spitting out invalid.
To try and make it a little easier to read, here's the part in question...
void* checkRow(void* p){
int check[9] = {0};
parameters* temp = (parameters*) p;
int tempRow = temp -> row;
int tempCol = temp -> col;
for (int i = 0; i < SIZE; i++){ // looping to find each # 1-9
for (int j = 0; j < SIZE; j++){
if (board[i][j] == 1)
check[0] = 1;
if (board[i][j] == 2)
check[1] = 1;
if (board[i][j] == 3)
check[2] = 1;
if (board[i][j] == 4)
check[3] = 1;
if (board[i][j] == 5)
check[4] = 1;
if (board[i][j] == 6)
check[5] = 1;
if (board[i][j] == 7)
check[6] = 1;
if (board[i][j] == 8)
check[7] = 1;
if (board[i][j] == 9) // changing value to 1 if found
check[8] = 1;
int k = 0;
while(k < 9){
if (check[k] == 0){
printf("invalid solution"); // should only print if 1-9 isn't found right?
exit(0);
}
k++;
}
memset(check, 0, sizeof(check)); // resetting array to zero
}
}
}
Here's everything, just in case.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#define SIZE 9
typedef struct{ // referenced from assignment page
int row;
int col;
} parameters;
int board[SIZE][SIZE]; // global variable holding board
pthread_t tRow, tCol, t1, t2, t3, t4, t5, t6, t7, t8, t9;
void setThreads();
void* checkRow(void* p);
int main(int argc, char **argv){ // referenced project 1. Supplies command line input for file
FILE *fp;
fp = fopen(argv[1], "r");
if (fp == NULL) // validity check
exit(1);
for (int i = 0; i < SIZE; i++){
for(int j = 0; j < SIZE; j++){
fscanf(fp, "%d", &board[i][j]);
}
}
setThreads();
printf("rows check out");
return 0;
}
void setThreads(){
parameters *rowcolparameter = (parameters *) malloc(sizeof(parameters));
rowcolparameter -> row = 0;
rowcolparameter -> col = 0;
parameters *square1 = (parameters *) malloc(sizeof(parameters));
square1 -> row = 0;
square1 -> col = 0;
parameters *square2 = (parameters *) malloc(sizeof(parameters));
square2 -> row = 0;
square2 -> col = 3;
parameters *square3 = (parameters *) malloc(sizeof(parameters));
square3 -> row = 0;
square3 -> col = 6;
parameters *square4 = (parameters *) malloc(sizeof(parameters));
square4 -> row = 3;
square4 -> col = 0;
parameters *square5 = (parameters *) malloc(sizeof(parameters));
square5 -> row = 3;
square5 -> col = 3;
parameters *square6 = (parameters *) malloc(sizeof(parameters));
square6 -> row = 3;
square6 -> col = 6;
parameters *square7 = (parameters *) malloc(sizeof(parameters));
square7 -> row = 6;
square7 -> col = 0;
parameters *square8 = (parameters *) malloc(sizeof(parameters));
square8 -> row = 6;
square8 -> col = 3;
parameters *square9 = (parameters *) malloc(sizeof(parameters));
square9 -> row = 6;
square9 -> col = 6;
pthread_create(&tRow, NULL, checkRow, rowcolparameter);
pthread_join(tRow, NULL);
}
void* checkRow(void* p){
int check[9] = {0};
parameters* temp = (parameters*) p;
int tempRow = temp -> row;
int tempCol = temp -> col;
for (int i = 0; i < SIZE; i++){ // looping to find each # 1-9
for (int j = 0; j < SIZE; j++){
if (board[i][j] == 1)
check[0] = 1;
if (board[i][j] == 2)
check[1] = 1;
if (board[i][j] == 3)
check[2] = 1;
if (board[i][j] == 4)
check[3] = 1;
if (board[i][j] == 5)
check[4] = 1;
if (board[i][j] == 6)
check[5] = 1;
if (board[i][j] == 7)
check[6] = 1;
if (board[i][j] == 8)
check[7] = 1;
if (board[i][j] == 9)
check[8] = 1;
int k = 0;
while(k < 9){
if (check[k] == 0){
printf("invalid solution"); // it should only say invalid if 1-9 wasn't found right?
exit(0);
}
k++;
}
memset(check, 0, sizeof(check)); // resetting array to 0
}
}
}
Thank you.
It seems that the "check part" is inside the inner loop! Shouldn't it be outside the inner loop. Like:
for (int i = 0; i < SIZE; i++){ // looping to find each # 1-9
for (int j = 0; j < SIZE; j++){
if (board[i][j] == 1)
check[0] = 1;
if (board[i][j] == 2)
check[1] = 1;
if (board[i][j] == 3)
check[2] = 1;
if (board[i][j] == 4)
check[3] = 1;
if (board[i][j] == 5)
check[4] = 1;
if (board[i][j] == 6)
check[5] = 1;
if (board[i][j] == 7)
check[6] = 1;
if (board[i][j] == 8)
check[7] = 1;
if (board[i][j] == 9) // changing value to 1 if found
check[8] = 1;
} // End the inner loop
// Now do the check
int k = 0;
while(k < 9){
if (check[k] == 0){
printf("invalid solution"); // should only print if 1-9 isn't found right?
exit(0);
}
k++;
}
memset(check, 0, sizeof(check)); // resetting array to zero
}
BTW: you can reduce the inner-loop code a lot like this:
// inner loop
for (int j = 0; j < SIZE; j++){
if (board[i][j] < 1 || board[i][j] > 9) exit(1); // Illegal board value
check[board[i][j] - 1] = 1; // Mark value as found
}
I was trying to optimize the Radix Sort code, because I felt there was room for it as traditional codes in books and on web seem a direct copy of one another and also they work very slow as they take an arbitrary number such as 10 for modulo operation. I have optimized the code as far as I could go, maybe I might have missed some optimization techniques. In that case please enlighten me.
Motivation for optimization:
http://codercorner.com/RadixSortRevisited.htm
http://stereopsis.com/radix.html
I was unable to implement all the optimizations in the articles, mostly it was beyond my skills and understanding and lack of sufficient time, if you can feel free to implement them.
EDIT 4
This Java version of Radix Sort calculates all histograms in 1 read and does not need to fill array Z with zeros after every LSB sort along with the usual ability to skip sorting and jump to next LSB sorting if all previous LSB's are same. As usual this is only for 32-bit integers but a 64-bit version can be created from it.
protected static int[] DSC(int A[])// Sorts in descending order
{
int tmp[] = new int[A.length] ;
int Z[] = new int[1024] ;
int i, Jump, Jump2, Jump3, Jump4, swap[] ;
Jump = A[0] & 255 ;
Z[Jump] = 1 ;
Jump2 = ((A[0] >> 8) & 255) + 256 ;
Z[Jump2] = 1 ;
Jump3 = ((A[0] >> 16) & 255) + 512 ;
Z[Jump3] = 1 ;
Jump4 = (A[0] >> 24) + 768 ;
Z[Jump4] = 1 ;
// Histograms creation
for (i = 1 ; i < A.length; ++i)
{
++Z[A[i] & 255] ;
++Z[((A[i] >> 8) & 255) + 256] ;
++Z[((A[i] >> 16) & 255) + 512] ;
++Z[(A[i] >> 24) + 768] ;
}
// 1st LSB Byte Sort
if( Z[Jump] != A.length )
{
Z[0] = A.length - Z[0];
for (i = 1; i < 256; ++i)
{
Z[i] = Z[i - 1] - Z[i];
}
for (i = 0; i < A.length; ++i)
{
tmp[Z[A[i] & 255]++] = A[i];
}
swap = A ; A = tmp ; tmp = swap ;
}
// 2nd LSB Byte Sort
if( Z[Jump2] != A.length )
{
Z[256] = A.length - Z[256];
for (i = 257; i < 512; ++i)
{
Z[i] = Z[i - 1] - Z[i];
}
for (i = 0; i < A.length; ++i)
{
tmp[Z[((A[i] >> 8) & 255) + 256]++] = A[i];
}
swap = A ; A = tmp ; tmp = swap ;
}
// 3rd LSB Byte Sort
if( Z[Jump3] != A.length )
{
Z[512] = A.length - Z[512];
for (i = 513; i < 768; ++i)
{
Z[i] = Z[i - 1] - Z[i];
}
for (i = 0; i < A.length; ++i)
{
tmp[Z[((A[i] >> 16) & 255) + 512]++] = A[i];
}
swap = A ; A = tmp ; tmp = swap ;
}
// 4th LSB Byte Sort
if( Z[Jump4] != A.length )
{
Z[768] = A.length - Z[768];
for (i = 769; i < Z.length; ++i)
{
Z[i] = Z[i - 1] - Z[i];
}
for (i = 0; i < A.length; ++i)
{
tmp[Z[(A[i] >> 24) + 768]++] = A[i];
}
return tmp ;
}
return A ;
}
The Java version ran faster with != sign than == sign
if( Z[Jump] != A.length )
{
// lines of code
}...
but in C the below version was on average, 25% faster (with equalto sign) than its counterpart with != sign. Your hardware might react differently.
if( Z[Jump] == A.length );
else
{
// lines of code
}...
Below is the C code ( "long" on my machine is 32 bits )
long* Radix_2_ac_long(long *A, size_t N, long *Temp)// Sorts in ascending order
{
size_t Z[1024] = {0};
long *swp;
size_t i, Jump, Jump2, Jump3, Jump4;
// Sort-circuit set-up
Jump = *A & 255;
Z[Jump] = 1;
Jump2 = ((*A >> 8) & 255) + 256;
Z[Jump2] = 1;
Jump3 = ((*A >> 16) & 255) + 512;
Z[Jump3] = 1;
Jump4 = (*A >> 24) + 768;
Z[Jump4] = 1;
// Histograms creation
for(i = 1 ; i < N ; ++i)
{
++Z[*(A+i) & 255];
++Z[((*(A+i) >> 8) & 255) + 256];
++Z[((*(A+i) >> 16) & 255) + 512];
++Z[(*(A+i) >> 24) + 768];
}
// 1st LSB byte sort
if( Z[Jump] == N );
else
{
for( i = 1 ; i < 256 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i < N ; --i )
{
*(--Z[*(A+i) & 255] + Temp) = *(A+i);
}
swp = A;
A = Temp;
Temp = swp;
}
// 2nd LSB byte sort
if( Z[Jump2] == N );
else
{
for( i = 257 ; i < 512 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i < N ; --i )
{
*(--Z[((*(A+i) >> 8) & 255) + 256] + Temp) = *(A+i);
}
swp = A;
A = Temp;
Temp = swp;
}
// 3rd LSB byte sort
if( Z[Jump3] == N );
else
{
for( i = 513 ; i < 768 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i < N ; --i )
{
*(--Z[((*(A+i) >> 16) & 255) + 512] + Temp) = *(A+i);
}
swp = A;
A = Temp;
Temp = swp;
}
// 4th LSB byte sort
if( Z[Jump4] == N );
else
{
for( i = 769 ; i < 1024 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i < N ; --i )
{
*(--Z[(*(A+i) >> 24) + 768] + Temp) = *(A+i);
}
return Temp;
}
return A;
}
EDIT 5
The sort now handles negative numbers too. Only some minor/negligible tweaks to the code did it. It runs a little slower as a result but the effect is not significant. Coded in C, below ( "long" on my system is 32 bits )
long* Radix_Sort(long *A, size_t N, long *Temp)
{
size_t Z[1024] = {0};
long *swp;
size_t Jump, Jump2, Jump3, Jump4;
long i;
// Sort-circuit set-up
Jump = *A & 255;
Z[Jump] = 1;
Jump2 = ((*A >> 8) & 255) + 256;
Z[Jump2] = 1;
Jump3 = ((*A >> 16) & 255) + 512;
Z[Jump3] = 1;
Jump4 = ((*A >> 24) & 255) + 768;
Z[Jump4] = 1;
// Histograms creation
for(i = 1 ; i < N ; ++i)
{
++Z[*(A+i) & 255];
++Z[((*(A+i) >> 8) & 255) + 256];
++Z[((*(A+i) >> 16) & 255) + 512];
++Z[((*(A+i) >> 24) & 255) + 768];
}
// 1st LSB byte sort
if( Z[Jump] == N );
else
{
for( i = 1 ; i < 256 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i >= 0 ; --i )
{
*(--Z[*(A+i) & 255] + Temp) = *(A+i);
}
swp = A;
A = Temp;
Temp = swp;
}
// 2nd LSB byte sort
if( Z[Jump2] == N );
else
{
for( i = 257 ; i < 512 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i >= 0 ; --i )
{
*(--Z[((*(A+i) >> 8) & 255) + 256] + Temp) = *(A+i);
}
swp = A;
A = Temp;
Temp = swp;
}
// 3rd LSB byte sort
if( Z[Jump3] == N );
else
{
for( i = 513 ; i < 768 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i >= 0 ; --i )
{
*(--Z[((*(A+i) >> 16) & 255) + 512] + Temp) = *(A+i);
}
swp = A;
A = Temp;
Temp = swp;
}
// 4th LSB byte sort and negative numbers sort
if( Z[Jump4] == N );
else
{
for( i = 897 ; i < 1024 ; ++i )// -ve values frequency starts after index 895, i.e at 896 ( 896 = 768 + 128 ), goes upto 1023
{
Z[i] = Z[i-1] + Z[i];
}
Z[768] = Z[768] + Z[1023];
for( i = 769 ; i < 896 ; ++i )
{
Z[i] = Z[i-1] + Z[i];
}
for( i = N-1 ; i >= 0 ; --i )
{
*(--Z[((*(A+i) >> 24) & 255) + 768] + Temp) = *(A+i);
}
return Temp;
}
return A;
}
EDIT 6
Below is the pointer optimized version ( accesses array locations via pointers ) that takes on average, approximately 20% less time to sort than the one above. It also uses 4 separate arrays for faster address calculation ( "long" on my system is 32 bits ).
long* Radix_Sort(long *A, size_t N, long *Temp)
{
long Z1[256] ;
long Z2[256] ;
long Z3[256] ;
long Z4[256] ;
long T = 0 ;
while(T != 256)
{
*(Z1+T) = 0 ;
*(Z2+T) = 0 ;
*(Z3+T) = 0 ;
*(Z4+T) = 0 ;
++T;
}
size_t Jump, Jump2, Jump3, Jump4;
// Sort-circuit set-up
Jump = *A & 255 ;
Z1[Jump] = 1;
Jump2 = (*A >> 8) & 255 ;
Z2[Jump2] = 1;
Jump3 = (*A >> 16) & 255 ;
Z3[Jump3] = 1;
Jump4 = (*A >> 24) & 255 ;
Z4[Jump4] = 1;
// Histograms creation
long *swp = A + N;
long *i = A + 1;
for( ; i != swp ; ++i)
{
++Z1[*i & 255];
++Z2[(*i >> 8) & 255];
++Z3[(*i >> 16) & 255];
++Z4[(*i >> 24) & 255];
}
// 1st LSB byte sort
if( Z1[Jump] == N );
else
{
swp = Z1+256 ;
for( i = Z1+1 ; i != swp ; ++i )
{
*i = *(i-1) + *i;
}
swp = A-1;
for( i = A+N-1 ; i != swp ; --i )
{
*(--Z1[*i & 255] + Temp) = *i;
}
swp = A;
A = Temp;
Temp = swp;
}
// 2nd LSB byte sort
if( Z2[Jump2] == N );
else
{
swp = Z2+256 ;
for( i = Z2+1 ; i != swp ; ++i )
{
*i = *(i-1) + *i;
}
swp = A-1;
for( i = A+N-1 ; i != swp ; --i )
{
*(--Z2[(*i >> 8) & 255] + Temp) = *i;
}
swp = A;
A = Temp;
Temp = swp;
}
// 3rd LSB byte sort
if( Z3[Jump3] == N );
else
{
swp = Z3 + 256 ;
for( i = Z3+1 ; i != swp ; ++i )
{
*i = *(i-1) + *i;
}
swp = A-1;
for( i = A+N-1 ; i != swp ; --i )
{
*(--Z3[(*i >> 16) & 255] + Temp) = *i;
}
swp = A;
A = Temp;
Temp = swp;
}
// 4th LSB byte sort and negative numbers sort
if( Z4[Jump4] == N );
else
{
swp = Z4 + 256 ;
for( i = Z4+129 ; i != swp ; ++i )
{
*i = *(i-1) + *i;
}
*Z4 = *Z4 + *(Z4+255) ;
swp = Z4 + 128 ;
for( i = Z4+1 ; i != swp ; ++i )
{
*i = *(i-1) + *i;
}
swp = A - 1;
for( i = A+N-1 ; i != swp ; --i )
{
*(--Z4[(*i >> 24) & 255] + Temp) = *i;
}
return Temp;
}
return A;
}
The edit 4 version is good enough if the original and temp arrays fit in cache. If the array size is much greater than cache size, most of the overhead is due to the random order writes to the arrays. A hybrid msb/lsb radix sort can avoid this issue. For example split the array into 256 bins according to the most significant byte, then do a lsb radix sort on each of the 256 bins. The idea here is that a pair (original and temp) of bins will fit within the cache, where random order writes are not an issue (for most cache implementations).
For a 8MB cache, the goal is for each of the bins to be < 4MB in size = 1 million 32 bit integers if the integers evenly distribute into the bins. This strategy would work for array size up to 256 million 32 bit integers. For larger arrays, the msb phase could split up the array into 1024 bins, for up to 1 billion 32 bit integers. On my system, sorting 16,777,216 (2^24) 32 bit integers with a classic 8,8,8,8 lsb radix sort took 0.45 seconds, while the hybrid 8 msb : 8,8,8 lsb took 0.24 seconds.
// split array into 256 bins according to most significant byte
void RadixSort(uint32_t * a, size_t count)
{
size_t aIndex[260] = {0}; // count / array
uint32_t * b = new uint32_t [count]; // allocate temp array
size_t i;
for(i = 0; i < count; i++) // generate histogram
aIndex[1+((size_t)(a[i] >> 24))]++;
for(i = 2; i < 257; i++) // convert to indices
aIndex[i] += aIndex[i-1];
for(i = 0; i < count; i++) // sort by msb
b[aIndex[a[i]>>24]++] = a[i];
for(i = 256; i; i--) // restore aIndex
aIndex[i] = aIndex[i-1];
aIndex[0] = 0;
for(i = 0; i < 256; i++) // radix sort the 256 bins
RadixSort3(&b[aIndex[i]], &a[aIndex[i]], aIndex[i+1]-aIndex[i]);
delete[] b;
}
// sort a bin by 3 least significant bytes
void RadixSort3(uint32_t * a, uint32_t *b, size_t count)
{
size_t mIndex[3][256] = {0}; // count / matrix
size_t i,j,m,n;
uint32_t u;
if(count == 0)
return;
for(i = 0; i < count; i++){ // generate histograms
u = a[i];
for(j = 0; j < 3; j++){
mIndex[j][(size_t)(u & 0xff)]++;
u >>= 8;
}
}
for(j = 0; j < 3; j++){ // convert to indices
m = 0;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 3; j++){ // radix sort
for(i = 0; i < count; i++){ // sort by current lsb
u = a[i];
m = (size_t)(u>>(j<<3))&0xff;
b[mIndex[j][m]++] = u;
}
std::swap(a, b); // swap ptrs
}
}
Example code for classic lsb radix sorts:
Example C++ lsb radix sort using 8,8,8,8 bit fields:
typedef unsigned int uint32_t;
void RadixSort(uint32_t * a, size_t count)
{
size_t mIndex[4][256] = {0}; // count / index matrix
uint32_t * b = new uint32_t [count]; // allocate temp array
size_t i,j,m,n;
uint32_t u;
for(i = 0; i < count; i++){ // generate histograms
u = a[i];
for(j = 0; j < 4; j++){
mIndex[j][(size_t)(u & 0xff)]++;
u >>= 8;
}
}
for(j = 0; j < 4; j++){ // convert to indices
m = 0;
for(i = 0; i < 256; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 4; j++){ // radix sort
for(i = 0; i < count; i++){ // sort by current lsb
u = a[i];
m = (size_t)(u>>(j<<3))&0xff;
b[mIndex[j][m]++] = u;
}
std::swap(a, b); // swap ptrs
}
delete[] b;
}
Example C++ code using 16,16 bit fields:
typedef unsigned int uint32_t;
uint32_t * RadixSort(uint32_t * a, size_t count)
{
size_t mIndex[2][65536] = {0}; // count / index matrix
uint32_t * b = new uint32_t [count]; // allocate temp array
size_t i,j,m,n;
uint32_t u;
for(i = 0; i < count; i++){ // generate histograms
u = a[i];
for(j = 0; j < 2; j++){
mIndex[j][(size_t)(u & 0xffff)]++;
u >>= 16;
}
}
for(j = 0; j < 2; j++){ // convert to indices
m = 0;
for(i = 0; i < 65536; i++){
n = mIndex[j][i];
mIndex[j][i] = m;
m += n;
}
}
for(j = 0; j < 2; j++){ // radix sort
for(i = 0; i < count; i++){ // sort by current lsb
u = a[i];
m = (size_t)(u>>(j<<4))&0xffff;
b[mIndex[j][m]++] = u;
}
std::swap(a, b); // swap ptrs
}
delete[] b;
return(a);
}
N & 15 , N & 31 , N & 63 .... and so on , which of these bitwise
operations takes least time?
They are same. Do not take it bad, but optimizing for speed without knowing how long things last may end up quite bad. And even when you know the timing, hardware is very complicated nowadays and quite unpredictable. You program in java, that is another layer of insanely complex system. The same code may be faster today and slower tomorrow. Your say approximately 2.232891909840167 times faster. In reality, you have measurement on one hardware and software configuration with one set of data and you can only hope the measurement is representative enough. Unfortunately, it is not always the case.
I rewrote your function. It is shorter and simpler, yet does not seem to be slower. Compilers tend to like code that is not too clever, as there are many optimizations for simple cases. The correction for negative numbers is not particulary nice, you can delete it if you do not like it. It seems to work best for 8 bits and 11 bits, probably due to cache sizes, have a look at comments of rcgldr.
EDIT
#ytoamn you are right, if all is in the first bucket the loop should continue, not break. That was a bug. To the other changes, I would rather avoid the contract you have done now. I think there are three natural contracts for sorting function. First one is sorting the original array and returning null. Second is sorting the original array and return it. The third is returning new sorted array and keeping the original array intact. I like the first one, as its behaviour is unambiguous. The way you have it now you should add big warning to the documentation, that the original array has changed and is returned from the function is some cases and in other not. Second thing I would avoid is the old C code style. You should define loop variable in the loop if you need it only there. Defining it globally injects dependency that may lead to bugs. And it has no advantages here, as properly defined loop variables would share the space in the end anyway. Compiler is well aware of the scope, you should use the smallest scope you need.
EDIT2
Feel free to comment directly under my post :-) Local variables are just addresses on the stack. You allocate memory when constructing object which is not the case here. As for the array, think about this code:
public static void Tst(int[] A) {
int[] tmp = new int[A.length];
A[0] = 6;
A = tmp; // changes what parameter A contains
A[0] = 7;
}
public static void main(String[] args) {
int[] A = new int[1];
A[0] = 5;
Tst(A);
System.out.println(A[0]); //prints 6
}
It prints 6. Number 7 is written into tmp array only. Array A in main is not affected.
protected static void ASC2(int A[], int bits) {
int[] origA = A;
int[] tmp = new int[A.length];
int[] Z = new int[1 << bits];
int mask = (1 << bits) - 1;
for (int shift = 0; shift < 32; shift += bits) {
if (shift > 0) {
Arrays.fill(Z, 0);
}
for (int i = 0; i < A.length; ++i) {
Z[(A[i] >> shift) & mask]++;
}
if (Z[0] == A.length) {
continue; // all in first bucket
}
Z[Z.length - 1] = A.length - Z[Z.length - 1];
for (int i = Z.length - 2; i >= 0; --i) {
Z[i] = Z[i + 1] - Z[i];
}
if (shift + bits > 31) { // negative numbers correction
int halfLength = Z.length / 2;
int positSum = Z[halfLength];
int negSum = A.length - positSum;
if (negSum > 0) {
for (int i = 0; i < halfLength; ++i) {
Z[i] += negSum;
}
for (int i = halfLength; i < Z.length; ++i) {
Z[i] -= positSum;
}
}
}
for (int i = 0; i < A.length; ++i) {
tmp[Z[(A[i] >> shift) & mask]++] = A[i];
}
int[] swap = A;
A = tmp;
tmp = swap;
}
if (A != origA) {
System.arraycopy(A, 0, origA, 0, A.length);
}
}
EDIT3
Loop unroll is a valid technique, improving short circuiting is really nice. But with using array lengths as constants you definitely start to be too clever. If you hard coded the base size, why not hard code it all like this:
protected static int[] DSC2(int A[])// sorts in descending order
{
int tmp[] = new int[A.length];
int Z[] = new int[256];
int sample, swap[];
// 1st LSB byte extraction
sample = A[0] & 255;
for (int i = 0; i < A.length; ++i) {
Z[A[i] & 255]++;
}
if (Z[sample] != A.length) {
Z[0] = A.length - Z[0];
for (int i = 1; i < Z.length; ++i) {
Z[i] = Z[i - 1] - Z[i];
}
for (int i = 0; i < A.length; ++i) {
tmp[Z[A[i] & 255]++] = A[i];
}
swap = A;
A = tmp;
tmp = swap;
Arrays.fill(Z, 0);
} else {
Z[sample] = 0;
}
// 2nd LSB byte extraction
sample = (A[0] >> 8) & 255;
for (int i = 0; i < A.length; ++i) {
Z[(A[i] >> 8) & 255]++;
}
if (Z[sample] != A.length) {
Z[0] = A.length - Z[0];
for (int i = 1; i < Z.length; ++i) {
Z[i] = Z[i - 1] - Z[i];
}
for (int i = 0; i < A.length; ++i) {
tmp[Z[(A[i] >> 8) & 255]++] = A[i];
}
swap = A;
A = tmp;
tmp = swap;
Arrays.fill(Z, 0);
} else {
Z[sample] = 0;
}
// 3rd LSB byte extraction
sample = (A[0] >> 16) & 255;
for (int i = 0; i < A.length; ++i) {
Z[(A[i] >> 16) & 255]++;
}
if (Z[sample] != A.length) {
Z[0] = A.length - Z[0];
for (int i = 1; i < Z.length; ++i) {
Z[i] = Z[i - 1] - Z[i];
}
for (int i = 0; i < A.length; ++i) {
tmp[Z[(A[i] >> 16) & 255]++] = A[i];
}
swap = A;
A = tmp;
tmp = swap;
Arrays.fill(Z, 0);
} else {
Z[sample] = 0;
}
// 4th LSB byte extraction
sample = (A[0] >> 24) & 255;
for (int i = 0; i < A.length; ++i) {
Z[(A[i] >> 24) & 255]++;
}
if (Z[sample] != A.length) {
Z[0] = A.length - Z[0];
for (int i = 1; i < Z.length; ++i) {
Z[i] = Z[i - 1] - Z[i];
}
for (int i = 0; i < A.length; ++i) {
tmp[Z[(A[i] >> 24) & 255]++] = A[i];
}
A = tmp;
}
return A;
}
I am trying to implement an algorithm to solve the Knapsack problem:
cst = 1;
for (j = 0; j < 200; j++) {
if (kk - cst < 0) {
continue;
cst++;
}
for (i = kk - cst; i >= 0; --i) {
C[i + cst] = max(C[i + cst], C[i] + index[cst]);
}
cst++;
}
The index array has the values of respective items represented by index of the array. I want to know where I'm going wrong.
In your code
if(kk-cst < 0)
{
continue;
cst++;
}
is wrong. the cst++ will never be executed. Please check and change your logic accordingly.
The increment to cst is unreachable code here. Swap the two lines
if (kk - cst < 0) {
cst++;
continue;
}
A simple dynamic programming knapsack implementation would be
int KnapSack(int W, int wt[], int val[], int n) {
int i, w;
int K[n + 1][W + 1];
for (i = 0; i <= n; i++) {
for (w = 0; w <= W; w++) {
if (i == 0 || w == 0) K[i][w] = 0;
else if (wt[i - 1] <= w)
K[i][w] = max(val[i - 1] + K[i - 1][w - wt[i - 1]], K[i - 1][w]);
else K[i][w] = K[i - 1][w];
}
}
return K[n][W];
}
as listed in http://www.geeksforgeeks.org/dynamic-programming-set-10-0-1-knapsack-problem/
I am trying to write a minesweeper program in C.
What I am trying to achieve here is when user steps on one cell, the cells near without bombs and hint numbers will be revealed.
For example, if x is the cell stepped on, o is an empty but concealed square, . is an empty but revealed cell and * is the bomb (hidden when playing of course):
x o o o o
o o o * o
o o o o o
will result in:
. . 1 o o
. . 1 * o
. . 1 o o
Here is part of the code:
while (1)
{
printf("Row? ");
scanf("%d", &row);
printf("column? ");
scanf("%d", &clos);
if (row < 9 && row >= 0 && clos < 8 && clos >= 0)
break;
printf("\nInvalid Location\n\n");
}
if (real_map[row][clos] =='*')
{
print_map_win(display_map,real_map);
printf("\n");
printf("Flags Left = %d\n\n\n", flag_left);
printf("Game Over\n");
exit(0);
}
else
{
if (real_map[row][clos] == ' ')
{
display_map[row][clos] = real_map[row][clos];
bonos_reveal(display_map, real_map, clos, row);
// [[[bonos_reveal is the function I am asking for]]]
printf("\n");
}
else
{
display_map[row][clos] = real_map[row][clos];
}
}
in which real_map has the hint number and bombs in it, and display_map is the current state of the map.
edit: I have the following code, and it only reveals in one direction:
int bonos_reveal(int disp_map[MAP_ROWS][MAP_COLS], int real_map[MAP_ROWS][MAP_COLS], int clos, int row)
{
disp_map[row][clos] = real_map[row][clos];
if (row < 9 && row >= 0 && clos < 8 && clos >= 0)
{
if (real_map[row][clos+1] == ' ')
{
bonos_reveal(disp_map, real_map, clos + 1, row);
}
else
{
disp_map[row][clos+1] = real_map[row][clos+1];
return 1;
}
}
else
{
return 1;
}
return 1;
}
I have no idea how to loop through the cells.
Okay, here's an example implementation. It uses the following values for tiles:
0 to 8: an unmined tile; the number represents the pre-calculated number of adjacent mines
9: a mine; this special value is defined as BOMB.
Covered tiles have 10 added to that, flagged tiles (not used here) have 20 added to that. You can test whether a tile is mined with:
board[row][col] % 10 == BOMB
I'll let the code do the explaining:
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define ROWS 12
#define COLS 20
#define BOMBS 8
#define BOMB 9
void inc(int board[ROWS][COLS], int row, int col)
{
if (row < 0 || row >= ROWS) return;
if (col < 0 || col >= COLS) return;
if (board[row][col] % 10 == BOMB) return;
board[row][col]++;
}
/*
* Set up board and pre-calculate adjacent bombs
*/
void board_init(int board[ROWS][COLS])
{
int i, j, n;
for (j = 0; j < ROWS; j++) {
for (i = 0; i < COLS; i++) {
board[j][i] = 10;
}
}
n = 0;
while (n < BOMBS) {
j = rand() % ROWS;
i = rand() % COLS;
if (board[j][i] % 10 != BOMB) {
board[j][i] = 19;
inc(board, j - 1, i - 1);
inc(board, j - 1, i);
inc(board, j - 1, i + 1);
inc(board, j, i - 1);
inc(board, j, i + 1);
inc(board, j + 1, i - 1);
inc(board, j + 1, i);
inc(board, j + 1, i + 1);
n++;
}
}
}
/*
* Reveal tile and propagate revelation
*/
void board_reveal(int board[ROWS][COLS], int row, int col)
{
if (row < 0 || row >= ROWS) return; /* skip off-board tiles */
if (col < 0 || col >= COLS) return;
if (board[row][col] < 10) return; /* already revealed, skip */
if (board[row][col] >= 20) return; /* must remove flag first, skip */
if (board[row][col] % 10 == BOMB) {
int i, j;
printf("Bang!\n");
for (j = 0; j < ROWS; j++) {
for (i = 0; i < COLS; i++) {
if (board[j][i] % 10 == BOMB) board[j][i] = BOMB;
}
}
} else {
board[row][col] %= 10;
if (board[row][col] == 0) {
board_reveal(board, row - 1, col);
board_reveal(board, row, col - 1);
board_reveal(board, row, col + 1);
board_reveal(board, row + 1, col);
}
}
}
void board_print(int board[ROWS][COLS])
{
int i, j;
for (j = 0; j < ROWS; j++) {
putchar(' ');
for (i = 0; i < COLS; i++) {
const char *tile = ".12345678*##########PPPPPPPPPP";
int k = board[j][i];
putchar(tile[k]);
}
putchar('\n');
}
}
int main()
{
int board[ROWS][COLS];
srand(time(NULL));
board_init(board);
board_reveal(board, 0, 0);
board_print(board);
return 0;
}
I'm working on a program which is going to be used to sort students test scores and eventually retrieve the mean, median, and the mode of the scores. For some strange reason my bubble sort is not working.. I'm unsure why.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 3
int main (void)
{
char vStudents[N][15], trans = 'y', vTemp2;
int vScores[N], vTemp, x, i = 0, j=0, NewN;
printf("\t\tWhatsamatta U Scores System\n\n");
do
{
printf("Please Enter Students Name: ");
gets(vStudents[i]);
trans = 'N';
while (trans == 'N')
{
printf("Enter Students Score: ");
scanf("%d", &vScores[i]);
fflush(stdin);
if (vScores[i] >= 0 & vScores[i] <= 100)
trans = 'y';
else
printf("Score is invalid, please re-enter score.\n");
}
i++;
j++;
} while (j != N);
for(x = 0; x < N - 1; x++)
{
if ((x < N - 1) && (vScores[i] > vScores[i + 1]))
{
vTemp = vScores[i];
vScores[i] = vScores[i + 1];
vScores[i + 1] = vTemp;
x = -1;
}
}
printf("%d %d %d\n\n", vScores[0], vScores[1], vScores[2]);
system("Pause");
return 0;
Any help would be useful, thanks in advance!
At least one error:
for(x = 0; x < vScores[N] - 1; x++)
{
if ((x < vScores[N] - 1) && (vScores[N] > vScores[N + 1]))
{
should be
for(x = 0; x <N - 1; x++)
{
if ((x < N - 1) && (vScores[N] > vScores[N + 1]))
{
//^^you should not compare index x with array elements
N is always 3. if we replace N in your code with 3, does it still make sense?
for(x = 0; x < vScores[3] - 1; x++)
{
if ((x < vScores[3] - 1) && (vScores[3] > vScores[3 + 1]))
{
vTemp = vScores[3];
vScores[3] = vScores[3 + 1];
vScores[3 + 1] = vTemp;
x = -1;
}
}
Ok, now that it is this:
for(x = 0; x < N - 1; x++)
{
if ((x < N - 1) && (vScores[i] > vScores[i + 1]))
{
vTemp = vScores[i];
vScores[i] = vScores[i + 1];
vScores[i + 1] = vTemp;
x = -1;
}
}
Ask, when does i change?