Can anyone help me out with the pollard rho implementation? I have implemented this in C. It's working fine for numbers upto 10 digits but it's not able to handle greater numbers.
Please help me out to improve it to carry out factorization of numbers upto 18 digits . My code is this:
#include<stdio.h>
#include<math.h>
int gcd(int a, int b)
{
if(b==0) return a ;
else
return(gcd(b,a%b)) ;
}
long long int mod(long long int a , long long int b , long long int n )
{
long long int x=1 , y=a ;
while(b>0)
{
if(b%2==1) x = ((x%n)*(y%n))%n ;
y = ((y%n)*(y%n))%n ;
b/=2 ;
}
return x%n ;
}
int isprimes(long long int u)
{
if(u==3)
return 1 ;
int a = 2 , i ;
long long int k , t = 0 , r , p ;
k = u-1 ;
while(k%2==0)
{ k/=2 ; t++ ; }
while(a<=3) /*der are no strong pseudoprimes common in base 2 and base 3*/
{
r = mod(a,k,u) ;
for(i = 1 ; i<=t ; i++)
{
p = ((r%u)*(r%u))%u ;
if((p==1)&&(r!=1)&&(r!=(u-1)))
{ return 0 ; }
r = p ;
}
if(p!=1)
return 0 ;
else
a++ ;
}
if(a==4)
return 1 ;
}
long long int pol(long long int u)
{
long long int x = 2 , k , i , a , y , c , s;
int d = 1 ;
k = 2 ;
i = 1 ;
y = x ;
a = u ;
if(isprimes(u)==1)
{
return 1;
}
c=-1 ;
s = 2 ;
while(1)
{
i++;
x=((x%u)*(x%u)-1)% u ;
d = gcd(abs(y-x),u) ;
if(d!=1&&d!=u)
{ printf("%d ",d);
while(a%d==0) { a=a/d; }
x = 2 ;
k = 2 ;
i = 1 ;
y = x ;
if(a==1)
{ return 0 ; }
if(isprimes(a)!=0)
{ return a ; }
u=a ;
}
if(i==k)
{y = x ; k*=2 ; c = x ;} /*floyd cycle detection*/
if(c==x)
{ x = ++s ; }
}
return ;
}
int main()
{
long long int t ;
long long int i , n , j , k , a , b , u ;
while(scanf("%lld",&n)&&n!=0)
{ u = n ; k = 0 ;
while(u%2==0)
{ u/=2 ; k = 1 ; }
if(k==1) printf("2 ") ;
if(u!=1)
t = pol(u) ;
if(u!=1)
{
if(t==1)
{ printf("%lld",u) ; }
else
if(t!=0)
{ printf("%lld",t) ; }
}
printf("\n");
}
return 0;
}
sorry for the long code ..... I am a new coder.
When you're multiplying two numbers modulo m, the intermediate product can become nearly m^2. So if you use a 64-bit unsigned integer type, the maximal modulus it can handle is 2^32, if the modulus is larger, overflow may happen. It will be rare when the modulus is only slightly larger, but that makes it only less obvious, you cannot rely on being lucky if the modulus allows the possibility of overflow.
You can gain a larger range by a factor of two if you choose a representative of the residue class modulo m of absolute value at most m/2 or something equivalent:
uint64_t mod_mul(uint64_t x, uint64_t y, uint64_t m)
{
int neg = 0;
// if x is too large, choose m-x and note that we need one negation for that at the end
if (x > m/2) {
x = m - x;
neg = !neg;
}
// if y is too large, choose m-y and note that we need one negation for that at the end
if (y > m/2) {
y = m - y;
neg = !neg;
}
uint64_t prod = (x * y) % m;
// if we had negated _one_ factor, and the product isn't 0 (mod m), negate
if (neg && prod) {
prod = m - prod;
}
return prod;
}
So that would allow moduli of up to 2^33 with a 64-bit unsigned type. Not a big step.
The recommended solution to the problem is the use of a big-integer library, for example GMP is available as a distribution package on most if not all Linux distros, and also (relatively) easily installable on Windows.
If that is not an option (really, are you sure?), you can get it to work for larger moduli (up to 2^63 for an unsigned 64-bit integer type) using Russian peasant multiplication:
x * y = 2 * (x * (y/2)) + (x * (y % 2))
so for the calculation, you only need that 2*(m-1) doesn't overflow.
uint64_t mod_mult(uint64_t x, uint64_t y, uint64_t m)
{
if (y == 0) return 0;
if (y == 1) return x % m;
uint64_t temp = mod_mult(x,y/2,m);
temp = (2*temp) % m;
if (y % 2 == 1) {
temp = (temp + x) % m;
}
return temp;
}
Note however that this algorithm needs O(log y) steps, so it's rather slow in practice. For smaller m you can speed it up, if 2^k*(m-1) doesn't overflow, you can proceed in steps of k bits instead of single bits (x*y = ((x * (y >> k)) << k) + (x * (y & ((1 << k)-1)))), which is a good improvement if your moduli are never larger than 48 or 56 bits, say.
Using that variant of modular multiplication, your algorithm will work for larger numbers (but it will be significantly slower). You can also try test for the size of the modulus and/or the factors to determine which method to use, if m < 2^32 or x < (2^64-1)/y, the simple (x * y) % m will do.
You can try this C implementation of Pollard Rho :
unsigned long long pollard_rho(const unsigned long long N) {
// Require : a composite number N, not a square.
// Ensure : res is a non-trivial factor of N.
// Option : define a timeout, define a rand function.
static const int timeout = 18;
static unsigned long long rand_val = 2994439072U;
rand_val = (rand_val * 1025416097U + 286824428U) % 4294967291LLU;
unsigned long long res = 1, a, b, c, i = 0, j = 1, x = 1, y = 1 + rand_val % (N - 1);
for (; res == 1; ++i) {
if (i == j) {
if (j >> timeout)
break;
j <<= 1;
x = y;
}
a = y, b = y;
for (y = 0; a; a & 1 ? b >= N - y ? y -= N : 0, y += b : 0, a >>= 1, (c = b) >= N - b ? c -= N : 0, b += c);
y = (1 + y) % N;
for (a = N, b = y > x ? y - x : x - y; (a %= b) && (b %= a););
res = a | b;
}
return res;
}
Otherwise there is a pure C quadratic sieve which factors numbers from 0 to 300-bit.
Related
I try two function for modular exponentiation for big base return wrong results,
One of the function is:
uint64_t modular_exponentiation(uint64_t x, uint64_t y, uint64_t p)
{
uint64_t res = 1; // Initialize result
x = x % p; // Update x if it is more than or
// equal to p
while (y > 0)
{
// If y is odd, multiply x with result
if (y & 1)
res = (res*x) % p;
// y must be even now
y = y>>1; // y = y/2
x = (x*x) % p;
}
return res;
}
For input x = 1103362698 ,y = 137911680 , p=1217409241131113809;
It return the value (x^y mod p):749298230523009574(Incorrect).
The correct value is:152166603192600961
The other function i try, gave same result, What is wrong with these functions?
The other one is :
long int exponentMod(long int A, long int B, long int C)
{
// Base cases
if (A == 0)
return 0;
if (B == 0)
return 1;
// If B is even
long int y;
if (B % 2 == 0) {
y = exponentMod(A, B / 2, C);
y = (y * y) % C;
}
// If B is odd
else {
y = A % C;
y = (y * exponentMod(A, B - 1, C) % C) % C;
}
return (long int)((y + C) % C);
}
With p = 1217409241131113809, this value as well as any intermediate values for res and x will be larger than 32 bits. This means that multiplying two of these numbers could result in a value larger than 64 bits which overflows the datatype you're using.
If you restrict the parameters to 32 bit datatypes and use 64 bit datatypes for intermediate values then the function will work. Otherwise you'll need to use a big number library to get correct output.
I want to make a C program compatible for DEV-C++ 4.9.9.2 to find integer triplets (x,y,z) such that for any integer n the equation n^x + n^y = n^z holds where n is any integer in the range [a,b]. The c program would have an input of only a and b and find such possible triplets.
The code that I wrote isn't working. What's the error in it?
for (n = a ; n <= b ; n++) {
for (x = a ; x < b ; x++) {
for (y = a ; y < b ; y++) {
for (z = a ; z = b ; z++) {
c = pow(n, x);
d = pow(n, y);
e = pow(n, z);
f = c + d;
if (e = f) {
printf("(%d , %d , %d) : %d", x,y,z,n);
}
}
}
}
}
I'm a novice in C.
C correction
Try changing
if (e=f)
into
if (e==f)
The first does assignment, the second tests equality.
(Note that you may also get overflow if the numbers tested get larger than your datatype.)
Maths approach
If y==x, then:
n^x + n^x = n^z
2n^x = n^z
=> n == 0 or n == 2
Now, assume y>x and n!=0.
n^x + n^y = n^z
n^x ( 1 + n^(y-x)) = n^z
=> 1+n^(y-x) = n^(z-x)
=> 1 = 0 ( modulo n)
=> impossible unless n==0 (in which case any x,y works) or n==1 (which does not work)
So this equation has solutions for any x,y if n==0.
Otherwise, the only solutions are with n==2, x==y and z=x+1.
Change
if (e = f)
to
if (e == f)
The first one assigns f to e, enable compiler warnings for such mistakes. The second one equates the LHS to the RHS.
Secondly, assuming your program is a brute force, i.e., loops for all values of x, y and z, you might want to change this statement:
for (z = a ; z = b ; z++)
to
for (z = a ; z < b ; z++)
Your implementation is O(n^4) , actually it can be done in O(n^3) .Here is the code
for (n = a ; n <= b ; n++) {
for (x = a ; x < b ; x++) {
for (y = a ; y < b ; y++) {
{
c = pow(n, x);
d = pow(n, y);
f = c + d;
e = pow(f,1.0/n);
if (e >= a && e < b) {
z = e;
printf("(%d , %d , %d) : %d", x,y,z,n);
}
}
}
}
}
I want to implement the pseudo-random number generator in xv6. I am trying to implement Linear congruential generator algorithm, but I am not getting how to seed it. Here is the piece of my code. I know this code won't work because X is not changing globally. I am not getting how doing that.
static int X = 1;
int random_g(int M)
{
int a = 1103515245, c = 12345;
X = (a * X + c) % M;
return X;
}
Incorrect code.
Do not use % on X, the random state variable, to update the state. Use % to form the return value.
Use unsigned types to avoid signed integer overflow (UB) - Perhaps unsigned, unsigned long, unsigned long long. Wider affords a longer sequence.
To match a = 1103515245, c = 12345, we want m = 31.
static unsigned long X = 1;
int random_g(int M) {
const unsigned long a = 1103515245, c = 12345;
#define m 0x80000000
int r = (X % M) + 1; // [1 ... M]
X = (a * X + c) % m;
return r;
}
Additional code needed to remove the typical M bias. Many SO post on that.
Ref: Why 1103515245 is used in rand? and http://wiki.osdev.org/Random_Number_Generator
I don't know how much that helps you, but if you have an Intel Ivy Bridge or later generation processor, you could try to use the RDRAND instruction. Something along these lines:
static int X;
int
random_g (int M)
{
asm volatile("byte $0x48; byte $0x0F; byte $0xC7; byte $0xF0"); // RDRAND AX
asm volatile("mov %%ax, %0": "=r"(X)); // X = rdrand_val
int a = 1103515245, c = 12345;
X = (a * X + c) % M;
return X;
}
I haven't tested the above code, as I can't build xv6 right now, but it should give you a hint as to how you can work; utilising your processor's rng.
In the following code, random_g is a self-seeding random number generator that returns values between 1 and M. The main function tests the function for the specific case where M is 8.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <time.h>
int random_g( int M )
{
static uint32_t X = 1;
static bool ready = false;
if ( !ready )
{
X = (uint32_t)time(NULL);
ready = true;
}
static const uint32_t a = 1103515245;
static const uint32_t c = 12345;
X = (a * X + c);
uint64_t temp = (uint64_t)X * (uint64_t)M;
temp >>= 32;
temp++;
return (int)temp;
}
int main(void)
{
int i, r;
int M = 8;
int *histogram = calloc( M+1, sizeof(int) );
for ( i = 0; i < 1000000; i++ )
{
r = random_g( M );
if ( i < 10 )
printf( "%d\n", r );
if ( r < 1 || r > M )
{
printf( "bad number: %d\n", r );
break;
}
histogram[r]++;
}
printf( "\n" );
for ( i = 1; i <= M; i++ )
printf( "%d %6d\n", i, histogram[i] );
free( histogram );
}
I am looking for an efficient algorithm to find nth root of a number. The answer must be an integer. I have found that newtons method and bisection method are popular methods. Are there any efficient and simple methods for integer output?
#include <math.h>
inline int root(int input, int n)
{
return round(pow(input, 1./n));
}
This works for pretty much the whole integer range (as IEEE754 8-byte doubles can represent the whole 32-bit int range exactly, which are the representations and sizes that are used on pretty much every system). And I doubt any integer based algorithm is faster on non-ancient hardware. Including ARM. Embedded controllers (the microwave washing machine kind) might not have floating point hardware though. But that part of the question was underspecified.
I know this thread is probably dead, but I don't see any answers I like and that bugs me...
int root(int a, int n) {
int v = 1, bit, tp, t;
if (n == 0) return 0; //error: zeroth root is indeterminate!
if (n == 1) return a;
tp = iPow(v,n);
while (tp < a) { // first power of two such that v**n >= a
v <<= 1;
tp = iPow(v,n);
}
if (tp == a) return v; // answer is a power of two
v >>= 1;
bit = v >> 1;
tp = iPow(v, n); // v is highest power of two such that v**n < a
while (a > tp) {
v += bit; // add bit to value
t = iPow(v, n);
if (t > a) v -= bit; // did we add too much?
else tp = t;
if ( (bit >>= 1) == 0) break;
}
return v; // closest integer such that v**n <= a
}
// used by root function...
int iPow(int a, int e) {
int r = 1;
if (e == 0) return r;
while (e != 0) {
if ((e & 1) == 1) r *= a;
e >>= 1;
a *= a;
}
return r;
}
This method will also work with arbitrary precision fixed point math in case you want to compute something like sqrt(2) to 100 decimal places...
I question your use of "algorithm" when speaking of C programs. Programs and algorithms are not the same (an algorithm is mathematical; a C program is expected to be implementing some algorithm).
But on current processors (like in recent x86-64 laptops or desktops) the FPU is doing fairly well. I guess (but did not benchmark) that a fast way of computing the n-th root could be,
inline unsigned root(unsigned x, unsigned n) {
switch (n) {
case 0: return 1;
case 1: return x;
case 2: return (unsigned)sqrt((double)x);
case 3: return (unsigned)cbrt((double)x);
default: return (unsigned) pow (x, 1.0/n);
}
}
(I made a switch because many processors have hardware to compute sqrt and some have hardware to compute cbrt ..., so you should prefer these when relevant...).
I am not sure that n-th root of a negative number makes sense in general. So my root function takes some unsigned x and returns some unsigned number.
Here is an efficient general implementation in C, using a simplified version of the "shifting nth root algorithm" to compute the floor of the nth root of x:
uint64_t iroot(const uint64_t x, const unsigned n)
{
if ((x == 0) || (n == 0)) return 0;
if (n == 1) return x;
uint64_t r = 1;
for (int s = ((ilog2(x) / n) * n) - n; s >= 0; s -= n)
{
r <<= 1;
r |= (ipow(r|1, n) <= (x >> s));
}
return r;
}
It needs this function to compute the nth power of x (using the method of exponentiation by squaring):
uint64_t ipow(uint64_t x, unsigned n)
{
if (x <= 1) return x;
uint64_t y = 1;
for (; n != 0; n >>= 1, x *= x)
if (n & 1)
y *= x;
return y;
}
and this function to compute the floor of base-2 logarithm of x:
int ilog2(uint64_t x)
{
#if __has_builtin(__builtin_clzll)
return 63 - ((x != 0) * (int)__builtin_clzll(x)) - ((x == 0) * 64);
#else
int y = -(x == 0);
for (unsigned k = 64 / 2; k != 0; k /= 2)
if ((x >> k) != 0)
{ x >>= k; y += k; }
return y;
#endif
}
Note: This assumes that your compiler understands GCC's __has_builtin test and that your compiler's uint64_t type is the same size as an unsigned long long.
You can try this C function to get the nth_root of an unsigned integer :
unsigned initial_guess_nth_root(unsigned n, unsigned nth){
unsigned res = 1;
for(; n >>= 1; ++res);
return nth ? 1 << (res + nth - 1) / nth : 0 ;
}
// return a number that, when multiplied by itself nth times, makes N.
unsigned nth_root(const unsigned n, const unsigned nth) {
unsigned a = initial_guess_nth_root(n , nth), b, c, r = nth ? a + (n > 0) : n == 1 ;
for (; a < r; b = a + (nth - 1) * r, a = b / nth)
for (r = a, a = n, c = nth - 1; c && (a /= r); --c);
return r;
}
Example of output :
24 == (int) pow(15625, 1.0/3)
25 == nth_root(15625, 3)
0 == nth_root(0, 0)
1 == nth_root(1, 0)
4 == nth_root(4096, 6)
13 == nth_root(18446744073709551614, 17) // 64-bit 20 digits
11 == nth_root(340282366920938463463374607431768211454, 37) // 128-bit 39 digits
Here is the github source.
I'm trying to convert a function that finds the nth root in C for a double value from the following link
http://rosettacode.org/wiki/Nth_root#C
to find the nth root for 8 floats at once using AVX.
Part of that code uses DBL_EPSILON * 10. However, when I convert this to use float with AVX I have to use FLT_EPSILON*1000 or the code hangs and does not converge. When I print out FLT_EPSILON I see it is order 1E-7. But this link, http://www.cplusplus.com/reference/cfloat/
, says it should be 1E-5. When I print out DBL_EPSILON it's 1E-16 but the link says it should only be 1E-9. What's going on?
Here is the code so far (not fully optimized).
#include <stdio.h>
#include <float.h>
#include <immintrin.h> // AVX
inline double abs_(double x) {
return x >= 0 ? x : -x;
}
double pow_(double x, int e)
{
double ret = 1;
for (ret = 1; e; x *= x, e >>= 1) {
if ((e & 1)) ret *= x;
}
return ret;
}
double root(double a, int n)
{
double d, x = 1;
x = a/n;
if (!a) return 0;
//if (n < 1 || (a < 0 && !(n&1))) return 0./0.; /* NaN */
int cnt = 0;
do {
cnt++;
d = (a / pow_(x, n - 1) - x) / n;
x+= d;
} while (abs_(d) >= abs_(x) * (DBL_EPSILON * 10));
printf("%d\n", cnt);
return x;
}
__m256 pow_avx(__m256 x, int e) {
__m256 ret = _mm256_set1_ps(1.0f);
for (; e; x = _mm256_mul_ps(x,x), e >>= 1) {
if ((e & 1)) ret = _mm256_mul_ps(x,ret);
}
return ret;
}
inline __m256 abs_avx (__m256 x) {
return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), x), x);
//return x >= 0 ? x : -x;
}
int get_mask(const __m256 d, const __m256 x) {
__m256 ad = abs_avx(d);
__m256 ax = abs_avx(x);
__m256i mask = _mm256_castps_si256(_mm256_cmp_ps(ad, ax, _CMP_GT_OQ));
return _mm_movemask_epi8(_mm256_castsi256_si128(mask)) + _mm_movemask_epi8(_mm256_extractf128_si256(mask,1));
}
__m256 root_avx(__m256 a, int n) {
printf("%e\n", FLT_EPSILON);
printf("%e\n", DBL_EPSILON);
printf("%e\n", FLT_EPSILON*1000.0f);
__m256 d;
__m256 x = _mm256_set1_ps(1.0f);
//if (!a) return 0;
//if (n < 1 || (a < 0 && !(n&1))) return 0./0.; /* NaN */
__m256 in = _mm256_set1_ps(1.0f/n);
__m256 xtmp;
do {
d = _mm256_rcp_ps(pow_avx(x, n - 1));
d = _mm256_sub_ps(_mm256_mul_ps(a,d),x);
d = _mm256_mul_ps(d,in);
//d = (a / pow_avx(x, n - 1) - x) / n;
x = _mm256_add_ps(x, d); //x+= d;
xtmp =_mm256_mul_ps(x, _mm256_set1_ps(FLT_EPSILON*100.0f));
//} while (abs_(d) >= abs_(x) * (DBL_EPSILON * 10));
} while (get_mask(d, xtmp));
return x;
}
int main()
{
__m256 d = _mm256_set1_ps(16.0f);
__m256 out = root_avx(d, 4);
float result[8];
int i;
_mm256_storeu_ps(result, out);
for(i=0; i<8; i++) {
printf("%f\n", result[i]);
} printf("\n");
//double x = 16;
//printf("root(%g, 15) = %g\n", x, root(x, 4));
//double x = pow_(-3.14159, 15);
//printf("root(%g, 15) = %g\n", x, root(x, 15));
return 0;
}
_mm256_rcp_ps, which maps to the rcpps instruction, performs only an approximate reciprocal. The Intel 64 and IA-32 Architectures Software Developer’s Manual says its relative error may be up to 1.5•2-12. This is insufficient to cause the root finder to converge with accuracy 100*FLT_EPSILON.
You could use an exact division, such as:
d = pow_avx(x, n-1);
d = _mm256_sub_ps(_mm256_div_ps(a, d), x);
or add some refinement steps for the reciprocal estimate.
Incidentally, if your compiler supports using regular C operators with SIMD objects, consider using the regular C operators instead:
d = pow_avx(x, n-1);
d = a/d - x;
1e-5 is simply the maximum value the C standard allows an implementation to use for FLT_EPSILON. In practice, you'll be using IEEE-754 single-precision, which has an epsilon of 2-23, which is approximately 1e-7.