hash code no working - c

I found this Hash function written in Java and with some help from stackoverflow converted it to C. The problem is it gives a different hash value each time it runs on the same word.
Here's the original function:
long sfold(String s, int M)
{
int intLength = s.length() / 4;
long sum = 0;
for (int j = 0; j < intLength; j++)
{
char c[] = s.substring(j * 4, (j * 4) + 4).toCharArray();
long mult = 1;
for (int k = 0; k < c.length; k++)
{
sum += c[k] * mult;
mult *= 256;
}
}
char c[] = s.substring(intLength * 4).toCharArray();
long mult = 1;
for (int k = 0; k < c.length; k++)
{
sum += c[k] * mult;
mult *= 256;
}
return(Math.abs(sum) % M);
}
And here's how we rewrote it:
include <stdlib.h>
include <stdio.h>
include <math.h>
include <string.h>
long sfold(char * s, int M);
int main(void)
{
char * s = "test";
int M;
long x;
M = 525;
x = sfold(s,M);
printf("%ld\n",x);
}
long sfold(char * s, int M)
{
int intLength = strlen(s) / 4;
long sum = 0;
for (int j = 0; j < intLength; j++)
{
char c[4];
memcpy(c, s + 4 * j, 4);
//char c[] = s.substring(j * 4, (j * 4) + 4).toCharArray();
long mult = 1;
for (int k = 0; k < strlen(c); k++)
{
sum += c[k] * mult;
mult *= 256;
}
}
char c[intLength];
memcpy(c,s,intLength);
//char c[] = s.substring(intLength * 4).toCharArray();
long mult = 1;
for (int k = 0; k < strlen(c); k++)
{
sum += c[k] * mult;
mult *= 256;
}
return(abs(sum) % M);
}
Shouldn't this give the same value each time we run the program? Anyone see what's wrong?

All that string copying is really silly. What's the point of copying if all you need is the character value?
Here's how it might look in C:
long sfold(char* s, unsigned long M) {
unsigned long mult = 1, sum = 0;
while (*s) {
sum += (uint8_t)(*s++) * mult;
mult *= 256;
if (!mult) mult = 1;
}
return sum % M;
}
But it's a terrible hash algorithm. You'd be better off with a simple modular hash (which is also not great, but it's not as bad):
/* This could be any small prime */
static const unsigned long mult = 31;
long sfold(char* s, unsigned long M) {
/* Avoid having the hash of the empty string be 0 */
unsigned long sum = 0xBEA00D1FUL;
while (*s)
sum += (uint8_t)(*s++) * mult;
return sum % M;
}

I think I took care of most of the bugs for you. I made it C99 compliant, mainly out of habit. The major problem was using strlen(c): c is a character array, not a string (which is a character array terminated with the null '\0' character). You'll need to rewrite your function so that if calloc()/malloc() fails, the function terminates with an error. Or you can go back to variable length arrays like you were using before if your compiler supports it. There are likely better hash functions in other posts on StackOverflow, but this at least helps you getting yours working in a deterministic manner without invoking undefined behavior.
Code Listing
/******************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#define BUF_SIZE (4)
/******************************************************************************/
long sfold(const char* s, int M);
/******************************************************************************/
int main(void) {
const char* s = "test string";
int M;
long x;
M = 525;
x = sfold(s,M);
printf("String:%s - Hash:%ld\n", s, x);
}
/******************************************************************************/
long sfold(const char* s, int M) {
int intLength = strlen(s) / 4;
char* c = calloc(intLength, sizeof(char)); /* Warning, test if c==NULL, this
* call can fail.
*/
long sum = 0;
int j, k;
for (j=0; j<intLength; j++) {
char c[BUF_SIZE];
memcpy(c, s + BUF_SIZE * j, BUF_SIZE);
//char c[] = s.substring(j * 4, (j * 4) + 4).toCharArray();
long mult = 1;
for (k=0; k<BUF_SIZE; k++) {
sum += c[k] * mult;
mult *= 256;
}
}
memcpy(c, s, intLength);
//char c[] = s.substring(intLength * 4).toCharArray();
long mult = 1;
for (k=0; k<BUF_SIZE; k++) {
sum += c[k] * mult;
mult *= 256;
}
free(c);
return(abs(sum) % M);
}
Sample Output
for i in $(seq 1 5); do echo $i; ./a.out; done
1
String:test string - Hash:384
2
String:test string - Hash:384
3
String:test string - Hash:384
4
String:test string - Hash:384
5
String:test string - Hash:384

Related

runtime error: signed integer overflow: 99998 * 100000 cannot be represented in type 'int' [solution.c]

Trying to solve https://leetcode.com/problems/k-concatenation-maximum-sum/submissions/, I still got integer overflow when the data type is long
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
int max(int a, int b) {
return a > b ? a : b;
}
int sum(int *nums, int numSize) {
int ans = 0;
for (int i = 0; i < numSize; ++i)
ans += nums[i];
return ans;
}
int KandaneAlgo(int *nums, int numSize) {
int i, max_overall_so_far = 0, max_ending_here = 0;
for (i = 0; i < numSize; ++i) {
max_ending_here += nums[i];
if (max_ending_here < 0)
max_ending_here = 0;
if (max_overall_so_far < max_ending_here)
max_overall_so_far = max_ending_here;
}
return max_overall_so_far;
}
int kConcatenationMaxSum(int *arr, int arrSize, int k) {
int mod = pow(10, 9) + 7;
int ans;
if (k > 1) {
long tem = (k - 2) * max(sum(arr, arrSize), 0);
int t1 = tem % mod;
printf("%ld, %d, %d \n", tem, t1, mod);
int arr2[2 * arrSize];
for (int i = 0; i < 2 * arrSize; ++i)
arr2[i] = arr[i - i / arrSize * arrSize];
ans = (int)t1 + KandaneAlgo(arr2, 2 * arrSize) % mod;
} else {
ans = KandaneAlgo(arr, arrSize) % mod;
}
return ans;
}
int main() {
int arr[10] = { [0 ... 9] = 10000 };
for (int i = 0; i < 10; ++i)
printf("%d ", arr[i]);
printf("\n");
int tot = kConcatenationMaxSum(arr, 10, 100000);
printf("%d \n", tot);
return 0;
}
I locally debug with lldb and can see the output message of variable tem is wrong indeed, it should be 9999800000.
This is because 9999800000 is larger than what can be stored in 32 bits. long only provides a minimum size guarantee of 32 bits. If you use long long for all the operands and result variable in the expression, it evaluates to the correct value. long long provides a minimum guarantee of 64 bits.
Check this for more details - https://en.wikipedia.org/wiki/C_data_types#Main_types
The following snippet worked for me:
long long tem = (long long)(k-2) * (long long)max(sum(arr, arrSize), 0);
Not sure about the rest of the algorithm, but this puts the correct value into tem.

make two big number add by array in c code

I want to construct two big number by array in c programming and make them add.
The following is my code:
void add(unsigned char* a, unsigned char* b, unsigned int len)
{
int i;
unsigned short T;
unsigned char carry = 0;
for (i = len - 1; i >= 0; --i)
{
T = (unsigned short)(a[i]) + (unsigned short)(b[i]) + (unsigned short)carry;
//T = a[i] + b[i] + carry;
if (T > 0xFF)
carry = 1;
else
carry = 0;
a[i] = (unsigned char)T;
}
}
The max value in array a and b for every element is 255.
EDIT1: The highest carry is discarded. The result is save in array a.
EDIT2: replace "Byte" with "carry".
The original code is :
Integer B1(B, SM3_BLOCK_SIZE);
++B1;
for (i = 0; i < ILen; i += v)
(Integer(I + i, v) + B1).Encode(I + i, v);
I write two new function. One is as the Above add(), The other is as following:
void add_one(unsigned char *arr, unsigned int len)
{
int i;
for (i = len-1; i >= 0; --i)
{
arr[len] += 1;
if (arr[len] != 0)
return;
}
}
If my code is rigth, the original code is as following:
add_one(B, SM3_BLOCK_SIZE);
for (i = 0; i < ILen; i += v)
add(I + i, B, SM3_BLOCK_SIZE);
There is (at least) one bug. Look at this code:
void add_one(unsigned char *arr, unsigned int len)
{
int i;
for (i = len-1; i >= 0; --i)
{
arr[len] += 1; // Indexing using len is wrong
if (arr[len] != 0) // Indexing using len is wrong
return;
}
}
You probably want to use i as index.
I assumed you know that you are implementing the add function for a bigendian positive integer.
Avoid using for (i = len-1; i >= 0; --i). You can catch a runtime error when i is unsigned and len is 0. Instead, use for (i = len; i-- > 0;).
If you need a little-endian integer than use for (int i = 0; i < len; ++i)
char add(unsigned char* a, unsigned char* b, unsigned int len)
{
unsigned short carry = 0;
//for (int i = 0; i < len; ++i) // for little-endian
for (int i = len; i-- > 0;) // for big-endian
{
carry += a[i] + b[i];
a[i] = carry & 0xFF;
carry >>= 8;
}
return carry;
}
Tests
unsigned char a[5] = {255,2,3,4,5};
unsigned char b[5] = {255,256-2,256-3,4,5};
char overflow = add(a,b,5);
printf("%d %d %d %d %d / %d",a[0],a[1],a[2],a[3],a[4] , overflow);
Output
255 1 0 8 10 / 1

Runtime error on a Codechef problem: modified Fibonacci series. What's the mistake?

I'm trying to solve a problem on codechef, here's the link:
https://www.codechef.com/problems/KFIB
The given problem statement is:
Chef recently had been studying about Fibonacci numbers and wrote a code to print out the k-th term of the Fibonacci series (1, 1, 2, 3, 5, 8, 13….). He was wondering whether he could write a program to generate the k-th term for similar series. More specifically:
T(n, k) is 1 if n <= k and
T(n, k) = T(n-1, k) + T(n-2, k) + T(n-3, k) … + T(n-k, k) if n > k.
Given n and k, output T(n, k) % (1000000007) as the answer could be very large
Input : Two integers, N and K
Output : One integer, the nth term of the series mod 1000000007
Constraints : 1 ≤ N, K ≤ 2*105
example:
Input: 7 5
Output: 9
The series is as follows {1, 1, 1, 1, 1, 5, 9}
void fibo(int n, unsigned long k) {
unsigned long *a, c;
a = (unsigned long *)malloc(sizeof(unsigned long) * k);
for (unsigned long i = 0; i < k; i++) { //T(n,k)=1 when n<=k
*(a + i)=1;
}
for (unsigned long m = 0; m < n - 1; m++) {
c = *(a);
for (unsigned long j = 0; j < k - 1; j++) {
*(a + j) = *(a + j + 1);
c = c + *(a + j);
}
*(a + k - 1) = c;
}
printf("%d ", *(a) % 1000000007);
}
This works with smaller values but not with very large values. I got the result of the example but when I enter the values 200000 500, I get incorrect answers
The problem is you compute the value modulo ULONG_MAX and reduce the result modulo 1000000007 at the end. This does not give the correct result. You must reduce modulo 1000000007 at each step to avoid potential arithmetic overflow (which does not cause undefined behavior for type unsigned long but gives a different result from the expected one).
Here is a modified version of your code with a faster alternative (more than twice as fast on my laptop):
#include <stdio.h>
#include <stdlib.h>
#define DIVIDER 1000000007ul
unsigned long fibo(unsigned long n, unsigned long k) {
unsigned long c = 1;
if (n > k) {
unsigned long *a = (unsigned long *)malloc(sizeof(*a) * k);
for (unsigned long i = 0; i < k; i++) { //T(n,k)=1 when n<=k
a[i] = 1;
}
for (unsigned long m = k; m < n; m++) {
c = a[0];
for (unsigned long j = 0; j < k - 1; j++) {
a[j] = a[j + 1];
#if 0
// slower version using modulo
c = (c + a[j]) % DIVIDER;
#else
// faster version with a test
if ((c += a[j]) >= DIVIDER)
c -= DIVIDER;
#endif
}
a[k - 1] = c;
}
free(a);
}
return c;
}
int main(int argc, char *argv[]) {
if (argc <= 2) {
printf("usage: fibo n k");
return 1;
} else {
unsigned long n = strtoul(argv[1], NULL, 10);
unsigned long k = strtoul(argv[2], NULL, 10);
printf("%lu\n", fibo(n, k));
}
return 0;
}
Output:
$ time ./fibo 200000 100000
871925546
real 0m34.667s
user 0m34.288s
sys 0m0.113s
$ time ./fibo-faster 200000 100000
871925546
real 0m15.073s
user 0m14.846s
sys 0m0.064s
Given the restrictions on input values:
the values of T(n, k) are in the range [0..1000000006] which fits in an int32_t.
the sum of k terms is in the range [0..200000*1000000006] which fits in an int64_t.
hence we can compute the next term in 64 bits and use a single modulo on the result.
This gives an even faster version (more than 3 times faster):
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define DIVIDER 1000000007
uint32_t fibo(uint32_t n, uint32_t k) {
uint32_t c = 1;
if (n > k) {
uint32_t *a = (uint32_t *)malloc(sizeof(*a) * k);
uint64_t temp;
for (uint32_t i = 0; i < k; i++) { //T(n,k)=1 when n<=k
a[i] = 1;
}
for (uint32_t m = k; m < n; m++) {
temp = a[0];
for (uint32_t j = 0; j < k - 1; j++) {
temp += a[j] = a[j + 1];
}
a[k - 1] = c = temp % DIVIDER;
}
free(a);
}
return c;
}
int main(int argc, char *argv[]) {
if (argc <= 2) {
printf("usage: fibo n k");
return 1;
} else {
uint32_t n = strtoul(argv[1], NULL, 10);
uint32_t k = strtoul(argv[2], NULL, 10);
printf("%lu\n", (unsigned long)fibo(n, k));
}
return 0;
}
Output:
$ time ./fibo-faster 200000 100000
871925546
real 0m3.854s
user 0m3.800s
sys 0m0.018s
To avoid overflow, you can change below statement
c=c+*(a+j);
To
c=(c+*(a+j))%1000000007;
That means only the remainder will be keep in your heap. This won't impact the final results.
Here is the updated code and compiled by clang.(updated according to #bruno's comments)
#include <stdlib.h>
#include <stdio.h>
#define DIVIDER 1000000007ul
#define U4 unsigned long
U4 fibo(U4 n,U4 k)
{
U4 *a,c ;
if(n<=k) return 1;
a= (U4*) malloc (sizeof(U4)*k);
for (U4 i=0;i<k;i++) //T(n,k)=1 when n<=k
{
*(a+i)=1;
}
for (U4 m=k;m<n; m++)
{
c=*(a);
for (U4 j=0;j<k-1;j++)
{
*(a+j)= *(a+j+1);
c=(c+*(a+j))%DIVIDER;
}
*(a+k-1)=c;
}
free(a);
return c;
}
int main(int argc, char *argv[])
{
U4 n, k;
char *endptr;
if(argc <= 2){
printf("usage: t.exe n k");
return 0;
}
n = strtoul(argv[1], &endptr, 10);
k = strtoul(argv[2], &endptr, 10);
printf("%lu", fibo(n,k));
}
Compiler command:
$ clang test.c -o test.exe
$ test.exe 200000 500
80391289

Why isnt my program returning the correct result for Rabin-Karp substring searching algorithm?

I have to implement the Rabin-Karp string searching algorithm.
I think there is something wrong with my rkhash_next function. My professor told us to compute the next hash value in the string using this equation:
yi+1 = 256^m-1*Y[i+1] + 256^m-2*Y[i+2] + ... + Y[i+m]
= 256 * ( 256^m-2*Y[i+1] + ... Y[i+m-1]) + Y[i+m]
= 256 * ((256^m-1*Y[i] + 256^m-2*Y[i+1] + ... Y[i+m-1]) - 256^m-1*Y[i]) + Y[i+m]
= 256 * ( yi - 256^m-1 * Y[i]) + Y[i+m]
= 256 * yi - 256^m * Y[i] + Y[i+m]
I did this, but my program keeps giving me surprising hash values as it "rolls" through the string. Here is my general code for my rkhash_substring_match:
int rk_substring_match(const char *pattern, const char *doc, int *first_match_ind)
{
int plength = strlen(pattern);
int dlength = strlen(doc);
int i,j,x;
int counter = 0;
int first_match = 0;
first_match_ind = &first_match;
long long hash = 256;
long long *h;
long long phash = 0;
long long dhash = 0;
for (x = 0; x <= plength-1; x++)
hash *= 256;
phash = rkhash_init(pattern, plength,h);
dhash = rkhash_init(doc, plength,h);
for(i = 0; i <= dlength - plength; i++) {
if (phash == dhash) {
for (j = 0; j < plength; j++)
if (doc[i+j] != pattern[j])
break;
if (j == plength && counter == 0)
first_match = i;
if (j == plength)
counter++;
}
printf("%lld\n", dhash);
dhash = rkhash_next(dhash,hash,doc[i + 1], doc[i+plength]);
}
return counter;
}
In my class, long long variables are used to store hash values and 256^m for later usage in the program. Below is rkhash_init which generates the hash value and stores 256^m in the value of h:
long long rkhash_init(const char *charbuf, int m, long long *h)
{
int i,j;
long long value = 1;
long long hash = 0;
for (j = 0; j < m; j++)
value *= 256;
h = &value;
long long val = value;
for (i = 0; i < m -1 ; i++) {
value = value/256;
hash += mmul(value, charbuf[i]);
}
hash += charbuf[m-1];
return hash;
}
And here is the problem child, rkhash_next:
long long rkhash_next(long long curr_hash, long long h, char leftmost, char rightmost)
{
return madd(msub(mmul(curr_hash, 256), mmul(leftmost, h)), rightmost);
}
madd, msub, and mmul are functions given to us by the professor. They perform modulus arithmetic after completing either addition, subtraction or multiplication. For example, here is the madd function:
long long madd(long long a, long long b)
{
return (a + b) % PRIME;
}
Can anyone please help me find out why my program is not working!?

Run-time error occurs when input value is greater than 100000

I am trying to implement the sieve of eratosthenes in C. The code works for small input values, but once the input goes beyond a certain range, a run- time error is thrown. This is the second problem in the classical section of the SPOJ base. What is the mistake?
#include<stdio.h>
#include<math.h>
int prime(unsigned long int, unsigned long int);
int main()
{
int nitem;
unsigned long int sn,fn;
scanf("%d", &nitem);
while(nitem)
{
scanf("%lu", &fn);
//printf("%d",fn);
scanf("%lu", &sn);
prime(fn, sn);
nitem--;
}
return 0;
}
int prime(unsigned long int fn, unsigned long int sn)
{
unsigned long int prim[100000];
int i,j,k;
for(i = 0; i < 100000; i++)
{
prim[i] = 1;
}
prim[0] = 0;
prim[1] = 0;
//printf("%d", sn);
//printf("%d", k);
//printf("%d", (k <= sn));
for(k = 2; k <= sqrt(sn); k++)
{
// printf("alksnc%5d", k);
if(prim[k] == 1)
{
for(j = 2; (k * j) <= sn; j++)
{
//printf("%d", prim[k]);
prim[k * j] = 0;
}
}
}
for(int i = 0; i <= sn; i++)
{
if(prim[i] !=0 && i >= fn)
{
printf("%lu\n", i);
}
}
printf("\n");
return;
}
Input:
1
100000 100345
output:
run time error
Input:
1
3 5
output:
3
5
We can make more efficient use of memory (2x) by only sieving odd numbers as all the even numbers you're processing waste time and space. It's trickier to work out but gives us something like:
#include <math.h>
#include <libc.h>
#define MAX_ODD_PRIMES 1048576
void prime(unsigned long fn, unsigned long sn)
{
unsigned char primes[MAX_ODD_PRIMES];
for (unsigned long i = 0; i < MAX_ODD_PRIMES; i++)
{
primes[i] = TRUE;
}
primes[0] = 0; // preset first odd, '1'
for (unsigned long k = 3; k <= sqrt(sn) + 1; k += 2)
{
if (primes[k / 2])
{
for (unsigned long j = 3; (k * j) <= sn; j += 2)
{
primes[k * j / 2] = FALSE;
}
}
}
if (fn <= 2)
{
printf("2\n");
fn = 3;
}
for (unsigned long i = fn / 2; i * 2 + 1 <= sn; i++)
{
if (primes[i])
{
printf("%lu\n", i * 2 + 1);
}
}
}
EXAMPLE
> ./a.out
1 1999900 2000000
1999957
1999969
1999979
1999993
>
1) Array range error.
By changing code
for (j = 2; (k * j) <= sn; j++) {
if (k * j >= 100000) {
printf("Out of range %d %d\n", k, j);
exit(1);
}
prim[k * j] = 0;
}
}
With input 2, 100000
Output
Out of range 2 50000
By using an array (VLA) sized to the task, this is avoided. Many other optimizations available. Consider also a malloc() array.
void prime(unsigned long int fn, unsigned long int sn) {
unsigned long int prim[sn + 1];
2) int prime() eventually performs return; where return something; is expected. Suggest changing function to void prime()
int prime(unsigned long int fn, unsigned long int sn) {
unsigned long int prim[100000];
...
printf("\n");
return;
}

Resources