I've come up with a very efficient algorithm for calculating prime numbers. It uses bit arithmetics ie AND , OR, XOR etc. and its based on the sieve of eratosthenes.
For numbers below 32 it works. For example when n = 31 I get the output:
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31,
But when I use a bigger value such as 40 I get a different output:
2,
I am at a loss to explain why this is so, I need guidance.
Below is the code:
https://github.com/rsgilbert/c/blob/master/chp2/sieve-bitset.c
#include <stdio.h>
#include <math.h>
#define MAXLINE 1000
#define A_1 1 // first number in GP
// sieve of eratosthenes
// Using bit arithmetic
long btoi(char s[]);
void itob(long n, char s[]);
long runningPrimes(long max);
void printBits(long num);
long gpSum(long a_1, long r, long n);
long numElementsInGP(long a_1, long a_n, long r);
long lastElementInBitset(long width);
long width(long factor, long n);
long paddedSum(long sum, long n, long i);
long commonRatio(long i);
long flip(long bitset);
void sieve(long n, long primes[]);
long dropRightBits(long bitset, long noToDrop);
void primeBitsetToArray(long bitset, long primes[]);
void printPrimes(long primes[], size_t size);
long greaterFirstBit(long bitset);
long main()
{
long n = 31; // 32 is not supported (??) I dont know why
long runner = runningPrimes(n);
long primes[n];
sieve(n, primes);
printPrimes(primes, n);
}
/**
* sieve of eratosthenes
* Algorithm for finding all prime numbers upto a given limit.
* We go through natural numbers starting with 2 removing out multiples of each.
*/
void sieve(long n, long primes[])
{
// 1. fill in numbers
long runner = runningPrimes(n);
long start_no = 2;
long i = start_no;
while(i <= n)
{
// printf("%d\n", i);
long w = width(i, n);
long lastEl = lastElementInBitset(w);
long r = commonRatio(i);
long numEls = numElementsInGP(A_1, lastEl, r);
long sum = gpSum(A_1, r, numEls);
// printf("non padded sum: w %d , r %d , numEls %d ", w, r, numEls);
// printBits(sum);
sum = paddedSum(sum, n, i);
// We need to flip `sum` bits because currently 1s in sum represent multiples
// If we flip 1010 it becomes 101. But notice we also want to remove 1000.
// If we dont handle this, 4 will show up in the primes
// So we first get a copy of first bit
long grtrBitset = greaterFirstBit(sum);
sum = flip(sum);
sum = grtrBitset | sum;
//
// printf("sum: gr %d ", grtrBitset);
// printBits(grtrBitset);
// printf("sum ");
// printBits(sum);
// Cancel out bits that represent multiples of i
// We are going to drop some bits. The ones that are multiples of i
// We first store some values
long bitsOnRightToDrop = n - (2 * i) + 1;
long notToChangeBits = dropRightBits(runner, bitsOnRightToDrop);
long withoutSum = runner & sum;
// printf("Runner: ");
runner = notToChangeBits | withoutSum;
// printBits(runner);
i++;
}
primeBitsetToArray(runner, primes);
}
long runningPrimes(long max)
{
return pow(2, max) - 1;
}
long paddedSum(long sum, long n, long i) {
return sum * pow(2, (n % i));
}
/** Find sum of a geometric progression
* a_1: first element in GP
* r: common ratio
* n: number of elements in GP
*/
long gpSum(long a_1, long r, long n)
{
return a_1 * (pow(r, n) - 1) / (r - 1);
}
/* Compute common ratio to be used for a given number */
long commonRatio(long i)
{
return pow(2, i);
}
/** Find number of elements in a GP
* a_1: first element in GP
* a_n: last element in GP
* r : common ratio
*/
long numElementsInGP(long a_1, long a_n, long r)
{
return log2(a_n / a_1) / log2(r) + 1;
}
/* Find last element in bitset as decimal integer
* For example if bitset is 10010 , last element is 10000 = 16
* width: Number of characters in bitset.
*/
long lastElementInBitset(long width)
{
return pow(2, (width - 1));
}
/* Produces the number of bits from first multiple greater than factor to last multiple less than n. */
long width(long factor, long n)
{
long firstMultGR = 2 * factor;
long lastMultLess = n - (n % factor);
return lastMultLess - firstMultGR + 1;
}
// -- Bit functions --
/** Convert binary to decimal integer */
long btoi(char s[])
{
long result = 0;
long i = 0;
while (s[i] != 0)
{
if (s[i] != '0' && s[i] != '1')
return -1;
result *= 2;
result += s[i] - '0';
i++;
}
return result;
}
/** convert decimal integer to binary */
void itob(long n, char s[])
{
if (n == 0)
{
s[0] = '0';
s[1] = 0;
return;
};
if (n < 0)
{
s[0] = '-';
s[1] = '1';
s[2] = 0;
return;
}
long pos = log2(n);
s[pos + 1] = 0;
while (pos != 0)
{
s[pos] = '0' + n % 2;
n = n / 2;
pos--;
}
// pos will be 0
s[pos] = '1';
}
/**
* Prints binary representation of set
*/
void printBits(long num)
{
char result[MAXLINE];
itob(num, result);
printf("%s\n", result);
}
// Flip bits. For example 10110 becomes 1001
long flip(long bitset)
{
long mask = pow(2, (long) log2(bitset) + 1) - 1;
return bitset ^ mask;
}
// Drop some bits from the right side of a bitset. For example dropRighBits(btoi("1001100110"), 6) produces 1001000000
long dropRightBits(long bitset, long noToDrop) {
long mask = pow(2, noToDrop) - 1;
long rightFlippedBitset = bitset ^ mask;
return bitset & rightFlippedBitset;
}
/* Copy bitset representing prime positions into an array of prime numbers */
void primeBitsetToArray(long bitset, long primes[])
{
char temp[MAXLINE];
itob(bitset, temp);
// printf("bitset %s\n", temp);
// in the bitset, the first position represents number 1 then 2 ... etc So our primes will start at index 1
long i = 1;
long j = 0;
while(temp[i] != 0)
{
if(temp[i] == '1')
{
// in the bitset, the first position is 1 then 2 ... etc
primes[j] = i + 1;
// printf("%d\n", i + 1);
j++;
}
i++;
}
primes[j] = -1;
}
/* Return bitset for the first bit in a given bitset.
* For example bitsetForFirstBit(10) = 8
*/
long bitsetForFirstBit(long bitset) {
return pow(2, log2(bitset));
}
/* Return bitset that is greater than given bitset but also a multiple of 2.
* For example bitsetForFirstBit(10) = 16
*/
long greaterFirstBit(long bitset) {
return pow(2, (long) log2(bitset) + 1);
}
void printPrimes(long primes[], size_t size)
{
long i = 0;
while(i < size && primes[i] != -1)
{
printf("%d, ", primes[i]);
i++;
}
printf("\n");
}
Related
Let's say I've been given two integers a, b where a is a positive integer and is smaller than b. I have to find an efficient algorithm that's going to give me the sum of number of base2 digits (number of bits) over the interval [a, b]. For example, in the interval [0, 4] the sum of digits is equal to 9 because 0 = 1 digit, 1 = 1 digit, 2 = 2 digits, 3 = 2 digits and 4 = 3 digits.
My program is capable of calculating this number by using a loop but I'm looking for something more efficient for large numbers. Here are the snippets of my code just to give you an idea:
int numberOfBits(int i) {
if(i == 0) {
return 1;
}
else {
return (int) log2(i) + 1;
}
}
The function above is for calculating the number of digits of one number in the interval.
The code below shows you how I use it in my main function.
for(i = a; i <= b; i++) {
l = l + numberOfBits(i);
}
printf("Digits: %d\n", l);
Ideally I should be able to get the number of digits by using the two values of my interval and using some special algorithm to do that.
Try this code, i think it gives you what you are needing to calculate the binaries:
int bit(int x)
{
if(!x) return 1;
else
{
int i;
for(i = 0; x; i++, x >>= 1);
return i;
}
}
The main thing to understand here is that the number of digits used to represent a number in binary increases by one with each power of two:
+--------------+---------------+
| number range | binary digits |
+==============+===============+
| 0 - 1 | 1 |
+--------------+---------------+
| 2 - 3 | 2 |
+--------------+---------------+
| 4 - 7 | 3 |
+--------------+---------------+
| 8 - 15 | 4 |
+--------------+---------------+
| 16 - 31 | 5 |
+--------------+---------------+
| 32 - 63 | 6 |
+--------------+---------------+
| ... | ... |
A trivial improvement over your brute force algorithm would then be to figure out how many times this number of digits has increased between the two numbers passed in (given by the base two logarithm) and add up the digits by multiplying the count of numbers that can be represented by the given number of digits (given by the power of two) with the number of digits.
A naive implementation of this algorithm is:
int digits_sum_seq(int a, int b)
{
int sum = 0;
int i = 0;
int log2b = b <= 0 ? 1 : floor(log2(b));
int log2a = a <= 0 ? 1 : floor(log2(a)) + 1;
sum += (pow(2, log2a) - a) * (log2a);
for (i = log2b; i > log2a; i--)
sum += pow(2, i - 1) * i;
sum += (b - pow(2, log2b) + 1) * (log2b + 1);
return sum;
}
It can then be improved by the more efficient versions of the log and pow functions seen in the other answers.
First, we can improve the speed of log2, but that only gives us a fixed factor speed-up and doesn't change the scaling.
Faster log2 adapted from: https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
The lookup table method takes only about 7 operations to find the log
of a 32-bit value. If extended for 64-bit quantities, it would take
roughly 9 operations. Another operation can be trimmed off by using
four tables, with the possible additions incorporated into each. Using
int table elements may be faster, depending on your architecture.
Second, we must re-think the algorithm. If you know that numbers between N and M have the same number of digits, would you add them up one by one or would you rather do (M-N+1)*numDigits?
But if we have a range where multiple numbers appear what do we do? Let's just find the intervals of same digits, and add sums of those intervals. Implemented below. I think that my findEndLimit could be further optimized with a lookup table.
Code
#include <stdio.h>
#include <limits.h>
#include <time.h>
unsigned int fastLog2(unsigned int v)
{
static const char LogTable256[256] =
{
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
-1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
};
register unsigned int t, tt; // temporaries
if (tt = v >> 16)
{
return (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
}
else
{
return (t = v >> 8) ? 8 + LogTable256[t] : LogTable256[v];
}
}
unsigned int numberOfBits(unsigned int i)
{
if (i == 0) {
return 1;
}
else {
return fastLog2(i) + 1;
}
}
unsigned int findEndLimit(unsigned int sx, unsigned int ex)
{
unsigned int sy = numberOfBits(sx);
unsigned int ey = numberOfBits(ex);
unsigned int mx;
unsigned int my;
if (sy == ey) // this also means sx == ex
return ex;
// assumes sy < ey
mx = (ex - sx) / 2 + sx; // will eq. sx for sx + 1 == ex
my = numberOfBits(mx);
while (ex - sx != 1) {
mx = (ex - sx) / 2 + sx; // will eq. sx for sx + 1 == ex
my = numberOfBits(mx);
if (my == ey) {
ex = mx;
ey = numberOfBits(ex);
}
else {
sx = mx;
sy = numberOfBits(sx);
}
}
return sx+1;
}
int main(void)
{
unsigned int a, b, m;
unsigned long l;
clock_t start, end;
l = 0;
a = 0;
b = UINT_MAX;
start = clock();
unsigned int i;
for (i = a; i < b; ++i) {
l += numberOfBits(i);
}
if (i == b) {
l += numberOfBits(i);
}
end = clock();
printf("Naive\n");
printf("Digits: %ld; Time: %fs\n",l, ((double)(end-start))/CLOCKS_PER_SEC);
l=0;
start = clock();
do {
m = findEndLimit(a, b);
l += (b-m + 1) * (unsigned long)numberOfBits(b);
b = m-1;
} while (b > a);
l += (b-a+1) * (unsigned long)numberOfBits(b);
end = clock();
printf("Binary search\n");
printf("Digits: %ld; Time: %fs\n",l, ((double)(end-start))/CLOCKS_PER_SEC);
}
Output
From 0 to UINT_MAX
$ ./main
Naive
Digits: 133143986178; Time: 25.722492s
Binary search
Digits: 133143986178; Time: 0.000025s
My findEndLimit can take long time in some edge cases:
From UINT_MAX/16+1 to UINT_MAX/8
$ ./main
Naive
Digits: 7784628224; Time: 1.651067s
Binary search
Digits: 7784628224; Time: 4.921520s
Conceptually, you would need to split the task to two subproblems -
1) find the sum of digits from 0..M, and from 0..N, then subtract.
2) find the floor(log2(x)), because eg for the number 77 the numbers 64,65,...77 all have 6 digits, the next 32 have 5 digits, the next 16 have 4 digits and so on, which makes a geometric progression.
Thus:
int digits(int a) {
if (a == 0) return 1; // should digits(0) be 0 or 1 ?
int b=(int)floor(log2(a)); // use any all-integer calculation hack
int sum = 1 + (b+1) * (a- (1<<b) +1); // added 1, due to digits(0)==1
while (--b)
sum += (b + 1) << b; // shortcut for (b + 1) * (1 << b);
return sum;
}
int digits_range(int a, int b) {
if (a <= 0 || b <= 0) return -1; // formulas work for strictly positive numbers
return digits(b)-digits(a-1);
}
As efficiency depends on the tools available, one approach would be doing it "analog":
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
unsigned long long pow2sum_min(unsigned long long n, long long unsigned m)
{
if (m >= n)
{
return 1;
}
--n;
return (2ULL << n) + pow2sum_min(n, m);
}
#define LN(x) (log2(x)/log2(M_E))
int main(int argc, char** argv)
{
if (2 >= argc)
{
fprintf(stderr, "%s a b\n", argv[0]);
exit(EXIT_FAILURE);
}
long a = atol(argv[1]), b = atol(argv[2]);
if (0L >= a || 0L >= b || b < a)
{
puts("Na ...!");
exit(EXIT_FAILURE);
}
/* Expand intevall to cover full dimensions: */
unsigned long long a_c = pow(2, floor(log2(a)));
unsigned long long b_c = pow(2, floor(log2(b+1)) + 1);
double log2_a_c = log2(a_c);
double log2_b_c = log2(b_c);
unsigned long p2s = pow2sum_min(log2_b_c, log2_a_c) - 1;
/* Integral log2(x) between a_c and b_c: */
double A = ((b_c * (LN(b_c) - 1))
- (a_c * (LN(a_c) - 1)))/LN(2)
+ (b+1 - a);
/* "Integer"-integral - integral of log2(x)'s inverse function (2**x) between log(a_c) and log(b_c): */
double D = p2s - (b_c - a_c)/LN(2);
/* Corrective from a_c/b_c to a/b : */
double C = (log2_b_c - 1)*(b_c - (b+1)) + log2_a_c*(a - a_c);
printf("Total used digits: %lld\n", (long long) ((A - D - C) +.5));
}
:-)
The main thing here is the number and kind of iterations done.
Number is
log(floor(b_c)) - log(floor(a_c))
times
doing one
n - 1 /* Integer decrement */
2**n + s /* One bit-shift and one integer addition */
for each iteration.
Here's an entirely look-up based approach. You don't even need the log2 :)
Algorithm
First we precompute interval limits where the number of bits would change and create a lookup table. In other words we create an array limits[2^n], where limits[i] gives us the biggest integer that can be represented with (i+1) bits. Our array is then {1, 3, 7, ..., 2^n-1}.
Then, when we want to determine the sum of bits for our range, we must first match our range limits a and b with the smallest index for which a <= limits[i] and b <= limits[j] holds, which will then tell us that we need (i+1) bits to represent a, and (j+1) bits to represent b.
If the indexes are the same, then the result is simply (b-a+1)*(i+1), otherwise we must separately get the number of bits from our value to the edge of same number of bits interval, and add up total number of bits for each interval between as well. In any case, simple arithmetic.
Code
#include <stdio.h>
#include <limits.h>
#include <time.h>
unsigned long bitsnumsum(unsigned int a, unsigned int b)
{
// generate lookup table
// limits[i] is the max. number we can represent with (i+1) bits
static const unsigned int limits[32] =
{
#define LTN(n) n*2u-1, n*4u-1, n*8u-1, n*16u-1, n*32u-1, n*64u-1, n*128u-1, n*256u-1
LTN(1),
LTN(256),
LTN(256*256),
LTN(256*256*256)
};
// make it work for any order of arguments
if (b < a) {
unsigned int c = a;
a = b;
b = c;
}
// find interval of a
unsigned int i = 0;
while (a > limits[i]) {
++i;
}
// find interval of b
unsigned int j = i;
while (b > limits[j]) {
++j;
}
// add it all up
unsigned long sum = 0;
if (i == j) {
// a and b in the same range
// conveniently, this also deals with j == 0
// so no danger to do [j-1] below
return (i+1) * (unsigned long)(b - a + 1);
}
else {
// add sum of digits in range [a, limits[i]]
sum += (i+1) * (unsigned long)(limits[i] - a + 1);
// add sum of digits in range [limits[j], b]
sum += (j+1) * (unsigned long)(b - limits[j-1]);
// add sum of digits in range [limits[i], limits[j]]
for (++i; i<j; ++i) {
sum += (i+1) * (unsigned long)(limits[i] - limits[i-1]);
}
return sum;
}
}
int main(void)
{
clock_t start, end;
unsigned int a=0, b=UINT_MAX;
start = clock();
printf("Sum of binary digits for numbers in range "
"[%u, %u]: %lu\n", a, b, bitsnumsum(a, b));
end = clock();
printf("Time: %fs\n", ((double)(end-start))/CLOCKS_PER_SEC);
}
Output
$ ./lookup
Sum of binary digits for numbers in range [0, 4294967295]: 133143986178
Time: 0.000282s
Algorithm
The main idea is to find the n2 = log2(x) rounded down. That is the number of digits in x. Let pow2 = 1 << n2. n2 * (pow2 - x + 1) is the number of digits in the values [x...pow2]. Now find the sun of digits in the powers of 2 from 1 to n2-1
Code
I am certain various simplifications can be made.
Untested code. Will review later.
// Let us use unsigned for everything.
unsigned ulog2(unsigned value) {
unsigned result = 0;
if (0xFFFF0000u & value) {
value >>= 16; result += 16;
}
if (0xFF00u & value) {
value >>= 8; result += 8;
}
if (0xF0u & value) {
value >>= 4; result += 4;
}
if (0xCu & value) {
value >>= 2; result += 2;
}
if (0x2 & value) {
value >>= 1; result += 1;
}
return result;
}
unsigned bit_count_helper(unsigned x) {
if (x == 0) {
return 1;
}
unsigned n2 = ulog2(x);
unsigned pow2 = 1u << n;
unsigned sum = n2 * (pow2 - x + 1u); // value from pow2 to x
while (n2 > 0) {
// ... + 5*16 + 4*8 + 3*4 + 2*2 + 1*1
pow2 /= 2;
sum += n2 * pow2;
}
return sum;
}
unsigned bit_count(unsigned a, unsigned b) {
assert(a < b);
return bit_count_helper(b - 1) - bit_count_helper(a);
}
For this problem your solution is the simplest, the one called "naive" where you look for every element in the sequence or in your case interval for check something or execute operations.
Naive Algorithm
Assuming that a and b are positive integers with b greater than a let's call the dimension/size of the interval [a,b], n = (b-a).
Having our number of elements n and using some notations of algorithms (like big-O notation link), the worst case cost is O(n*(numberOfBits_cost)).
From this we can see that we can speed up our algorithm by using a faster algorithm for computing numberOfBits() or we need to find a way to not look at every element of the interval that costs us n operations.
Intuition
Now looking at a possible interval [6,14] you can see that for 6 and 7 we need 3 digits, with 4 need for 8,9,10,11,12,13,14. This results in calling numberOfBits() for every number that use the same number of digits to be represented, while the following multiplication operation would be faster:
(number_in_subinterval)*digitsForThisInterval
((14-8)+1)*4 = 28
((7-6)+1)*3 = 6
So we reduced the looping on 9 elements with 9 operations to only 2.
So writing a function that use this intuition will give us a more efficient in time, not necessarily in memory, algorithm. Using your numberOfBits() function I have created this solution:
int intuitionSol(int a, int b){
int digitsForA = numberOfBits(a);
int digitsForB = numberOfBits(b);
if(digitsForA != digitsForB){
//because a or b can be that isn't the first or last element of the
// interval that a specific number of digit can rappresent there is a need
// to execute some correction operation before on a and b
int tmp = pow(2,digitsForA) - a;
int result = tmp*digitsForA; //will containt the final result that will be returned
int i;
for(i = digitsForA + 1; i < digitsForB; i++){
int interval_elements = pow(2,i) - pow(2,i-1);
result = result + ((interval_elements) * i);
//printf("NumOfElem: %i for %i digits; sum:= %i\n", interval_elements, i, result);
}
int tmp1 = ((b + 1) - pow(2,digitsForB-1));
result = result + tmp1*digitsForB;
return result;
}
else {
int elements = (b - a) + 1;
return elements * digitsForA; // or digitsForB
}
}
Let's look at the cost, this algorithm costs is the cost of doing correction operation on a and b plus the most expensive one that of the for-loop. In my solution however I'm not looping over all elements but only on numberOfBits(b)-numberOfBits(a) that in the worst case, when [0,n], become log(n)-1 thats equivalent to O(log n).
To resume we passed from a linear operations cost O(n) to a logartmic one O(log n) in the worst case. Look on this diagram the diferinces between the two.
Note
When I talk about interval or sub-interval I refer to the interval of elements that use the same number of digits to represent the number in binary.
Following there are some output of my tests with the last one that shows the difference:
Considered interval is [0,4]
YourSol: 9 in time: 0.000015s
IntuitionSol: 9 in time: 0.000007s
Considered interval is [0,0]
YourSol: 1 in time: 0.000005s
IntuitionSol: 1 in time: 0.000005s
Considered interval is [4,7]
YourSol: 12 in time: 0.000016s
IntuitionSol: 12 in time: 0.000005s
Considered interval is [2,123456]
YourSol: 1967697 in time: 0.005010s
IntuitionSol: 1967697 in time: 0.000015s
I have a function print_number.
The function checks if in front of the number there exists '-', then it reverse the number and takes every digit and prints it. The algorithm works pretty good but if i give -2.147.483.648 ( which should be the bottom limit of an integer ) it pritns -0 and i don't know why.
#include<stdio.h>
void print_char(char character)
{
printf("%c",character);
}
void print_number(int nr)
{
int reverse=0;
if (nr < 0)
{
print_char('-');
nr *= -1;
}
while(nr > 9)
{
reverse = reverse * 10 + nr % 10;
nr = nr / 10;
}
print_char(nr + '0');
while(reverse)
{
print_char(reverse % 10 + '0');
reverse = reverse / 10;
}
}
When you are doing
if (nr < 0)
{
print_char('-');
nr *= -1;
}
It inverses negative number to the positive one.
If you will run it for -2.147.483.648, you will receive
nr = 2.147.483.648 // == binary 1 0000000000000000000000000000000
As INT is 32 BIT variable in your architecture (and at least 16 BIT variable by the spec), so '1' overflows it and so on
nr = 0 // For gcc-like C realisation
And accepting the ISO9899 spec, this behaviour of signed int overflow is realisation-specified thing and may not be predicted in common.
Use long long value if you're needing to use your program for larger values.
Something like:
#include<stdio.h>
void print_char(char character)
{
printf("%c",character);
}
void print_number(long long nr)
{
int reverse=0;
if (nr < 0)
{
print_char('-');
nr *= -1;
}
while(nr > 9)
{
reverse = reverse * 10 + nr % 10;
nr = nr / 10;
}
print_char(nr + '0');
while(reverse)
{
print_char(reverse % 10 + '0');
reverse = reverse / 10;
}
}
void main(void){
print_number(-2147483648LL);
}
And test:
> gcc test.c
> ./a.out
-2147483648
Firstly, the MAX and MIN range for an INT are -2,147,483,648 and 2,147,483,647 respectively.
Negating -2,147,483,648 means a positive value 2,147,483,648 would result in an overflow by 1 as it is out of bounds for the MAX range.
This operation will result in the same value of -2,147,483,648.
Secondly, you might encounter an overflow during the integer reversing process.
Example, reversing 2147483647 causes an overflow after the intermediate result of 746384741.
Therefore, you should handle that by throwing an exception or returning 0.
Thirdly, your loop for reversing the number is inaccurate. It should loop till while(nr != 0)
Here's the complete code.
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
int main()
{
void reverseNumber(int);
reverseNumber(124249732);
return 0;
}
void reverseNumber(int nr)
{
printf("nr = %d\n", nr);
int reverse = 0;
bool neg = false;
if (nr < 0) {
neg = true;
nr *= -1;
}
while (nr != 0) {
int digit = nr % 10;
int result = reverse * 10 + digit;
if ((result - digit) / 10 != reverse) {
printf("ERROR\n");
exit(0);
}
reverse = result;
nr = nr / 10;
}
if(neg) {
printf("%c", '-');
}
printf("%d\n", reverse);
}
nr *= -1; is a problme when nr == INT_MIN as that is signed integer overflow. The result is undefined behavior (UB). Best to avoid.
Wider integers are not always available.
Using OP's general, approach, do not change the sign of nr until it is reduced.
void print_number(int nr) {
int reverse = 0;
if (nr < 0) {
print_char('-');
//nr *= -1;
}
while (nr/10) { // new test
reverse = reverse * 10 + nr % 10;
nr = nr / 10;
}
reverse = abs(reverse); // reverse = |reverse|
nr = abs(nr); // nr = |nr|
print_char(nr + '0');
while (reverse) {
print_char(reverse % 10 + '0');
reverse = reverse / 10;
}
}
This program does Prime Factorization Of Numbers In C.
#include <stdio.h>
int main(void) {
int number, i, p, n, factors, count;
int numbers[1000000];
int counter = 0;
char text[100000];
for (count = 0; count < 1000000; count++) {
fgets(text, 10000000, stdin);
if (sscanf(text, "%d", &number) == 1) {
if (number == 0)
break;
numbers[count] = number;
} else {
numbers[count] = 0;
}
}
counter = 0;
for (i = 0; i < count; i++) {
if ((numbers[i] < 0) || (numbers[i] == 0)) {
fprintf(stderr, "Error: Wrong Input!\n");
return 100;
break;
}
number = numbers[i];
printf("Prime factorization of nubmer %d is:\n", number);
factors = 0;
for (p = 2; p * p <= number; p += 1 + (p & 1)) {
if (number % p == 0) {
n = 0;
factors++;
do {
number /= p;
n++;
} while (number % p == 0);
if (n == 1) {
printf("%d ", p);
++counter;
} else
printf("%d^%d ", p, n);
++counter;
if (count > 0 && number != 1)
printf("x ");
}
}
if (factors == 0 || number != 1)
printf("%d", number);
printf("\n");
}
return 0;
}
This program works fine for numbers smaller than 108. But my question is, if there is a way to make this program even for numbers like 1012. I know that int would not be enough, but when I tried for example long int, it didn't worked. Also I heard something about malloc, but I keep failing to implement (understand) it.
Factorising large numbers usually needs a more subtle approach than simple trial division. Here is a possible outline method:
Make a list of all the primes up to, say, 25,000.
Use the list to remove all prime factors below 25,000.
If there is a remainder > 1 then check if the remainder is prime with a Miller-Rabin test or similar.
If the remainder is prime, then you have found the last factor.
If the remainder is not prime, then you are going to have to factorise it. That will inevitably be slow I'm afraid.
You can use long long. But probably, the real problem is, that it will take a veeeeerrrryyy long time to do the factorization on numbers, that don't fit in a normal int. E.g. you're trying to factorize a prime number in the range 10^12, then you will have to do about 10^6 divisions.
The thing about malloc won't help you with this problem at all, because even bigger values will take even longer to factorize. So, if you want to know, how malloc works, I suggest opening a separate question for that.
Below is a rework of the code using unsigned long long. (I tossed the file stuff to keep this to a minimal example.) Whether this works for your purpose depends on how your system defines a long long (on my system it's 64 bits). I also redid the output format to be compatible with the Unix dc command's postfix notation so I could easily check if the results were correct:
#include <stdio.h>
#include <stdlib.h>
int main() {
unsigned long long large = 18446744073709551615ULL; // 2^64 - 1
for (unsigned long long counter = large - 1000; counter < large; counter++) {
unsigned long long number = counter;
printf("Prime factorization of %llu is:", number);
unsigned long factors = 0;
for (unsigned long long p = 2; p * p <= number; p += 1 + (p & 1)) {
if (number % p == 0) {
factors++;
unsigned long n = 0;
do {
number /= p;
n++;
} while (number % p == 0);
if (n == 1) {
printf(" %llu", p);
}
else {
printf(" %llu %lu ^", p, n);
}
if (number != 1 && factors > 1) {
printf(" *");
}
}
}
if (factors == 0 || number != 1) {
factors++;
printf(" %llu", number);
}
if (factors > 1) {
printf(" *");
}
printf("\n");
}
return 0;
}
SAMPLE OUTPUT
% ./a.out
Prime factorization of 18446744073709550615 is: 5 563 * 751 * 8725722280871 *
Prime factorization of 18446744073709550616 is: 2 3 ^ 3 * 41 * 7523 * 8243 * 14479 * 20879 *
Prime factorization of 18446744073709550617 is: 79 557 * 419215600611539 *
Prime factorization of 18446744073709550618 is: 2 2298974999 * 4011949691 *
Prime factorization of 18446744073709550619 is: 3 3 ^ 1008659 * 677347590683 *
Prime factorization of 18446744073709550620 is: 2 2 ^ 5 * 7 * 149 * 233 * 3795329598449 *
Prime factorization of 18446744073709550621 is: 11 23 * 72912031911895457 *
Prime factorization of 18446744073709550622 is: 2 3 * 479909 * 6406334004193 *
Prime factorization of 18446744073709550623 is: 3421377637 5391612979 *
Prime factorization of 18446744073709550624 is: 2 5 ^ 61 * 593 * 1699 * 9379762391 *
Prime factorization of 18446744073709550625 is: 3 5 4 ^ * 13 * 756789500459879 *
Prime factorization of 18446744073709550626 is: 2 3743461 * 2463862195133 *
Prime factorization of 18446744073709550627 is: 7 1283 * 4339 * 627089 * 754877 *
Prime factorization of 18446744073709550628 is: 2 2 ^ 3 2 ^ * 101 * 293 * 42751 * 405025111 *
Prime factorization of 18446744073709550629 is: 17 43 * 613 * 66457 * 619442699 *
...
This runs slower but reasonably. You can push this further on some systems by swapping unsigned long long for a uint128_t which some compilers support somewhat:
typedef unsigned __int128 uint128_t;
(And up the unsigned long declarations to unsigned long long.) You'd need to supply number printing routines for the uint128_t type as printf() isn't going to handle them directly. I tried this with the above code and it worked:
Prime factorization of 340282366920938463426481119284349108124 is: 2 2 ^ 31 * 6131 * 7654271 * 21163829 * 21491837 * 128562653437 *
% dc
2 2 ^ 31 * 6131 * 7654271 * 21163829 * 21491837 * 128562653437 * p
340282366920938463426481119284349108124
But I never saw it complete more than one number while running it!
Using type unsigned long long for number and the prime factors will take you to 1019 at the price of longer computation times.
Note however that defining a large local array with automatic storage may cause problems, especially when it reaches a size of 8 megabytes as would be the case for type unsigned long long (this type is at least 64-bit wide). Allocating it from the heap is safer.
Here is an adapted version of the code:
#include <stdio.h>
#include <stdlib.h>
#define NUMBER_MAX 1000000
int main(void) {
unsigned long long *numbers;
unsigned long long number, p;
int i, n, factors, count;
char text[100];
numbers = calloc(NUMBER_MAX, sizeof(*numbers));
if (numbers == NULL) {
printf("cannot allocate number array\n");
return 1;
}
for (count = 0; count < NUMBER_MAX; count++) {
if (!fgets(text, sizeof text, stdin)) {
break;
}
if (sscanf(text, "%llu", &number) == 1 && number > 0) {
numbers[count] = number;
} else {
fprintf(stderr, "Error: Wrong Input!\n");
return 100;
}
}
for (i = 0; i < count; i++) {
number = numbers[i];
printf("Prime factorization of nubmer %llu is:\n", number);
factors = 0;
for (p = 2; p < 0x100000000 && p * p <= number; p += 1 + (p & 1)) {
if (number % p == 0) {
n = 0;
factors++;
do {
number /= p;
n++;
} while (number % p == 0);
if (n == 1) {
printf("%llu ", p);
} else {
printf("%llu^%d ", p, n);
}
if (number != 1) {
printf("* ");
}
}
}
if (factors == 0 || number != 1) {
printf("%llu", number);
}
printf("\n");
}
free(numbers);
return 0;
}
The number can range from 1 to 1015.
I am using this code but it is running out of time.
int distinct(long long int a)
{
int ele[10]={0},i,c=0;
if(a==0) return 1;
if(a<0) a=a*-1;
while(a)
{
int t=a%10;
ele[t]=1;
a=a/10;
}
for (i=0;i<10;i++)
if (ele[i])
c++;
return c;
}
Incorporating various ideas and to resolve UB.
IMO, suspect there is something that OP has left out that is a significant cause of slowness.
// 1 to 10^15 only
int distinct_fast(long long int a) {
int ele[10]={0},i,c=0;
do {
ele[a%10]=1;
a /= 10;
} while(a);
i=10-1;
do {
c += ele[i]; // #barak manos
}
} while (i-- > 0);
return c;
}
// entire unsigned long long range method 1
int distinct_complete1(unsigned long long int a) {
... // same code as above
// entire long long range method 2
int distinct_complete2(long long int a) {
int ele[10]={0},i,c=0;
// Use (-) numbers as there are more (or the same) number of (+) numbers
if (a > 0) a = -a;
do {
ele[-(a % 10)] = 1;
a /= 10;
} while(a);
// same as above
...
Ideas for OP to explore:
unsigned char ele[10]={0}; // smaller flags
.
do {
if (ele[a%10]++ == 0) c++;
a /= 10;
} while(a);
// This eliminates need for following loop to add `ele[]`
.
// Invoke some strategy so when when a is small enough,
// use `long` ops rather than `long long`
if (a > 1000000000) {
for (i=6; i-- > 0; ) {
if (ele[a%10]++ == 0) c++;
a /= 10;
}
}
unsigned long b = a;
do {
if (ele[b%10]++ == 0) c++;
b /= 10;
} while(b);
.
int distinct_complete3(unsigned long long int a) {
unsigned char ele[10]={0};
int c = 0;
do {
if (ele[a%10]++ == 0) c++;
a /= 10;
} while(a);
return c;
}
Several possible optimizations:
you can trade a modulo for a multiply, usually much faster: q= a / 10; m= a - 10 * q;
you can avoid the final counting loop by packing all flags in a single integer, let mask; initialize it with mask= 0; every time you find a digit (m), flag it with mask|= (1 << m); in the end, the count will be given by bits[mask], where bits is a vector containing the precomputed counts for all integers from 0 to 1023=2^10-1.
int distinct(long long int a)
{
int mask= 0;
while (a)
{
int q= a / 10, m= a - 10 * q;
mask|= 1 << m;
a= q;
}
static short bits[1024]= { 0, 1, 1, 2, 1, 2, 2, 3, ...}; // Number of bits set
return bits[mask];
}
Even better, you can work with digits in groups, say of three. Instead of converting to base 10, convert to base 1000. And for every base 1000 "digit", compute the corresponding mask that flags the constituent decimal digits (for instance, 535 yields the mask 1<<5 | 1<<3 | 1<<5 = 40).
This should be about three times faster. Anyway, some care of the leading zeroes should be added, for instance by providing a distinct array of masks for the leading triple (..1 vs 001).
int distinct(long long int a)
{
int mask= 0;
while (true)
{
int q= a / 1000, m= a - 1000 * q;
if (q == 0)
{
static short leading[1000]= { 1, 2, 4, 8, 16, 32, 64, ...}; // Mask for the leading triples
mask|= leading[m];
break;
}
else
{
static short triple[1000]= { 1, 3, 5, 9, 17, 33, 65, ...}; // Mask for the ordinary triples
mask|= triple[m];
a= q;
}
}
static short bits[1024]= { 0, 1, 1, 2, 1, 2, 2, 3, ...}; // Number of bits set
return bits[mask];
}
Use static arrays to make sure they are loaded once for all.
I have an array of unsigned chars in c I am trying to print in base 10, and I am stuck. I think this will be better explained in code, so, given:
unsigned char n[3];
char[0] = 1;
char[1] = 2;
char[2] = 3;
I would like to print 197121.
This is trivial with small base 256 arrays. One can simply 1 * 256 ^ 0 + 2 * 256 ^ 1 + 3 * 256 ^ 2.
However, if my array was 100 bytes large, then this quickly becomes a problem. There is no integral type in C that is 100 bytes large, which is why I'm storing numbers in unsigned char arrays to begin with.
How am I supposed to efficiently print this number out in base 10?
I am a bit lost.
There's no easy way to do it using only the standard C library. You'll either have to write the function yourself (not recommended), or use an external library such as GMP.
For example, using GMP, you could do:
unsigned char n[100]; // number to print
mpz_t num;
mpz_import(num, 100, -1, 1, 0, 0, n); // convert byte array into GMP format
mpz_out_str(stdout, 10, num); // print num to stdout in base 10
mpz_clear(num); // free memory for num
When I saw this question, I purpose to solve it, but at that moment I was very busy.
This last weekend I've could gain some prize hours of free time so I considered my pending challenge.
First of all, I suggest you to considered above response. I never use GMP library but I'm sure that it's better solution than a handmade code.
Also, you could be interest to analyze code of bc calculator; it can works with big numbers and I used to test my own code.
Ok, if you are still interested in a code do it by yourself (only with support C language and Standard C library) may be I can give you something.
Before all, a little bit theory. In basic numeric theory (modular arithmetic level) theres is an algorithm that inspire me to arrive at one solution; Multiply and Power algorithm to solve a^N module m:
Result := 1;
for i := k until i = 0
if n_i = 1 then Result := (Result * a) mod m;
if i != 0 then Result := (Result * Result) mod m;
end for;
Where k is number of digits less one of N in binary representation, and n_i is i binary digit. For instance (N is exponent):
N = 44 -> 1 0 1 1 0 0
k = 5
n_5 = 1
n_4 = 0
n_3 = 1
n_2 = 1
n_1 = 0
n_0 = 0
When we make a module operation, as an integer division, we can lose part of the number, so we only have to modify algorithm to don't miss relevant data.
Here is my code (take care that it is an adhoc code, strong dependency of may computer arch. Basically I play with data length of C language so, be carefully because my data length could not be the same):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
enum { SHF = 31, BMASK = 0x1 << SHF, MODULE = 1000000000UL, LIMIT = 1024 };
unsigned int scaleBigNum(const unsigned short scale, const unsigned int lim, unsigned int *num);
unsigned int pow2BigNum(const unsigned int lim, unsigned int *nsrc, unsigned int *ndst);
unsigned int addBigNum(const unsigned int lim1, unsigned int *num1, const unsigned int lim2, unsigned int *num2);
unsigned int bigNum(const unsigned short int base, const unsigned int exp, unsigned int **num);
int main(void)
{
unsigned int *num, lim;
unsigned int *np, nplim;
int i, j;
for(i = 1; i < LIMIT; ++i)
{
lim = bigNum(i, i, &num);
printf("%i^%i == ", i, i);
for(j = lim - 1; j > -1; --j)
printf("%09u", num[j]);
printf("\n");
free(num);
}
return 0;
}
/*
bigNum: Compute number base^exp and store it in num array
#base: Base number
#exp: Exponent number
#num: Pointer to array where it stores big number
Return: Array length of result number
*/
unsigned int bigNum(const unsigned short int base, const unsigned int exp, unsigned int **num)
{
unsigned int m, lim, mem;
unsigned int *v, *w, *k;
//Note: mem has the exactly amount memory to allocate (dinamic memory version)
mem = ( (unsigned int) (exp * log10( (float) base ) / 9 ) ) + 3;
v = (unsigned int *) malloc( mem * sizeof(unsigned int) );
w = (unsigned int *) malloc( mem * sizeof(unsigned int) );
for(m = BMASK; ( (m & exp) == 0 ) && m; m >>= 1 ) ;
v[0] = (m) ? 1 : 0;
for(lim = 1; m > 1; m >>= 1)
{
if( exp & m )
lim = scaleBigNum(base, lim, v);
lim = pow2BigNum(lim, v, w);
k = v;
v = w;
w = k;
}
if(exp & 0x1)
lim = scaleBigNum(base, lim, v);
free(w);
*num = v;
return lim;
}
/*
scaleBigNum: Make an (num[] <- scale*num[]) big number operation
#scale: Scalar that multiply big number
#lim: Length of source big number
#num: Source big number (array of unsigned int). Update it with new big number value
Return: Array length of operation result
Warning: This method can write in an incorrect position if we don't previous reallocate num (if it's necessary). bigNum method do it for us
*/
unsigned int scaleBigNum(const unsigned short scale, const unsigned int lim, unsigned int *num)
{
unsigned int i;
unsigned long long int n, t;
for(n = 0, t = 0, i = 0; i < lim; ++i)
{
t = (n / MODULE);
n = ( (unsigned long long int) scale * num[i] );
num[i] = (n % MODULE) + t; // (n % MODULE) + t always will be smaller than MODULE
}
num[i] = (n / MODULE);
return ( (num[i]) ? lim + 1 : lim );
}
/*
pow2BigNum: Make a (dst[] <- src[] * src[]) big number operation
#lim: Length of source big number
#src: Source big number (array of unsigned int)
#dst: Destination big number (array of unsigned int)
Return: Array length of operation result
Warning: This method can write in an incorrect position if we don't previous reallocate num (if it's necessary). bigNum method do it for us
*/
unsigned int pow2BigNum(const unsigned int lim, unsigned int *src, unsigned int *dst)
{
unsigned int i, j;
unsigned long long int n, t;
unsigned int k, c;
for(c = 0, dst[0] = 0, i = 0; i < lim; ++i)
{
for(j = i, n = 0; j < lim; ++j)
{
n = ( (unsigned long long int) src[i] * src[j] );
k = i + j;
if(i != j)
{
t = 2 * (n % MODULE);
n = 2 * (n / MODULE);
// (i + j)
dst[k] = ( (k > c) ? ((c = k), 0) : dst[k] ) + (t % MODULE);
++k; // (i + j + 1)
dst[k] = ( (k > c) ? ((c = k), 0) : dst[k] ) + ( (t / MODULE) + (n % MODULE) );
++k; // (i + j + 2)
dst[k] = ( (k > c) ? ((c = k), 0) : dst[k] ) + (n / MODULE);
}
else
{
dst[k] = ( (k > c) ? ((c = k), 0) : dst[k] ) + (n % MODULE);
++k; // (i + j)
dst[k] = ( (k > c) ? ((c = k), 0) : dst[k] ) + (n / MODULE);
}
for(k = i + j; k < (lim + j); ++k)
{
dst[k + 1] += (dst[k] / MODULE);
dst[k] %= MODULE;
}
}
}
i = lim << 1;
return ((dst[i - 1]) ? i : i - 1);
}
/*
addBigNum: Make a (num2[] <- num1[] + num2[]) big number operation
#lim1: Length of source num1 big number
#num1: First source operand big number (array of unsigned int). Should be smaller than second
#lim2: Length of source num2 big number
#num2: Second source operand big number (array of unsigned int). Should be equal or greater than first
Return: Array length of operation result or 0 if num1[] > num2[] (dosen't do any op)
Warning: This method can write in an incorrect position if we don't previous reallocate num2
*/
unsigned int addBigNum(const unsigned int lim1, unsigned int *num1, const unsigned int lim2, unsigned int *num2)
{
unsigned long long int n;
unsigned int i;
if(lim1 > lim2)
return 0;
for(num2[lim2] = 0, n = 0, i = 0; i < lim1; ++i)
{
n = num2[i] + num1[i] + (n / MODULE);
num2[i] = n % MODULE;
}
for(n /= MODULE; n; ++i)
{
num2[i] += n;
n = (num2[i] / MODULE);
}
return (lim2 > i) ? lim2 : i;
}
To compile:
gcc -o bgn <name>.c -Wall -O3 -lm //Math library if you wants to use log func
To check result, use direct output as and input to bc. Easy shell script:
#!/bin/bash
select S in ` awk -F '==' '{print $1 " == " $2 }' | bc`;
do
0;
done;
echo "Test Finished!";
We have and array of unsigned int (4 bytes) where we store at each int of array a number of 9 digits ( % 1000000000UL ); hence num[0] we will have the first 9 digits, num[1] we will have digit 10 to 18, num[2]...
I use convencional memory to work but an improvement can do it with dinamic memory. Ok, but how length It could be the array? (or how many memory we need to allocate?). Using bc calculator (bc -l with mathlib) we can determine how many digits has a number:
l(a^N) / l(10) // Natural logarith to Logarithm base 10
If we know digits, we know amount integers we needed:
( l(a^N) / (9 * l(10)) ) + 1 // Truncate result
If you work with value such as (2^k)^N you can resolve it logarithm with this expression:
( k*N*l(2)/(9*l(10)) ) + 1 // Truncate result
to determine the exactly length of integer array. Example:
256^800 = 2^(8*800) ---> l(2^(8*800))/(9*l(10)) + 1 = 8*800*l(2)/(9*l(10)) + 1
The value 1000000000UL (10^9) constant is very important. A constant like 10000000000UL (10^10) dosen't work because can produce and indetected overflow (try what's happens with number 16^16 and 10^10 constant) and a constant more little such as 1000000000UL (10^8) are correct but we need to reserve more memory and do more steps. 10^9 is key constant for unsigned int of 32 bits and unsigned long long int of 64 bits.
The code has two parts, Multiply (easy) and Power by 2 (more hard). Multiply is just multiplication and scale and propagate the integer overflow. It take the principle of associative property in math to do exactly the inverse principle, so if k(A + B + C) we want kA + kB + kC where number will be k*A*10^18 + k*B*10^9 + kC. Obiously, kC operation can generate a number bigger than 999 999 999, but never more bigger than 0xFF FF FF FF FF FF FF FF. A number bigger than 64 bits can never occur in a multiplication because C is an unsigned integer of 32 bits and k is a unsigned short of 16 bits. In worts case, we will have this number:
k = 0x FF FF;
C = 0x 3B 9A C9 FF; // 999999999
n = k*C = 0x 3B 9A | 8E 64 36 01;
n % 1000000000 = 0x 3B 99 CA 01;
n / 1000000000 = 0x FF FE;
After Mul kB we need to add 0x FF FE from last multiplication of C ( B = kB + (C / module) ), and so on (we have 18 bits arithmetic offset, enough to guarantee correct values).
Power is more complex but is in essencial, the same problem (multiplication and add), so I give some tricks about code power:
Data types are important, very important
If you try to multiplication an unsigned integer with unsigned integer, you get another unsigned integer. Use explicit cast to get unsigned long long int and don't lose data.
Always use unsigned modifier, dont forget it!
Power by 2 can directly modify 2 index ahead of current index
gdb is your friend
I've developed another method that add big numbers. These last I don't prove so much but I think it works well. Don't be cruels with me if it has a bug.
...and that's all!
PD1: Developed in a
Intel(R) Pentium(R) 4 CPU 1.70GHz
Data length:
unsigned short: 2
unsigned int: 4
unsigned long int: 4
unsigned long long int: 8
Numbers such as 256^1024 it spend:
real 0m0.059s
user 0m0.033s
sys 0m0.000s
A bucle that's compute i^i where i goes to i = 1 ... 1024:
real 0m40.716s
user 0m14.952s
sys 0m0.067s
For numbers such as 65355^65355, spent time is insane.
PD2: My response is so late but I hope my code it will be usefull.
PD3: Sorry, explain me in english is one of my worst handicaps!
Last update: I just have had an idea that with same algorithm but other implementation, improve response and reduce amount memory to use (we can use the completely bits of unsigned int). The secret: n^2 = n * n = n * (n - 1 + 1) = n * (n - 1) + n.
(I will not do this new code, but if someone are interested, may be after exams... )
I don't know if you still need a solution, but I wrote an article about this problem. It shows a very simple algorithm which can be used to convert an arbitrary long number with base X to a corresponding number of base Y. The algorithm is written in Python, but it is really only a few lines long and doesn't use any Python magic. I needed such an algorithm for a C implementation, too, but decided to describe it using Python for two reasons. First, Python is very readable by anyone who understands algorithms written in a pseudo programming language and, second, I am not allowed to post the C version, because it I did it for my company. Just have a look and you will see how easy this problem can be solved in general. An implementation in C should be straight forward...
Here is a function that does what you want:
#include <math.h>
#include <stddef.h> // for size_t
double getval(unsigned char *arr, size_t len)
{
double ret = 0;
size_t cur;
for(cur = 0; cur < len; cur++)
ret += arr[cur] * pow(256, cur);
return ret;
}
That looks perfectly readable to me. Just pass the unsigned char * array you want to convert and the size. Note that it won't be perfect - for arbitrary precision, I suggest looking into the GNU MP BigNum library, as has been suggested already.
As a bonus, I don't like your storing your numbers in little-endian order, so here's a version if you want to store base-256 numbers in big-endian order:
#include <stddef.h> // for size_t
double getval_big_endian(unsigned char *arr, size_t len)
{
double ret = 0;
size_t cur;
for(cur = 0; cur < len; cur++)
{
ret *= 256;
ret += arr[cur];
}
return ret;
}
Just things to consider.
It may be too late or too irrelevant to make this suggestion, but could you store each byte as two base 10 digits (or one base 100) instead of one base 256? If you haven't implemented division yet, then that implies all you have is addition, subtraction, and maybe multiplication; those shouldn't be too hard to convert. Once you've done that, printing it would be trivial.
As I was not satisfied with the other answers provided, I decided to write an alternative solution myself:
#include <stdlib.h>
#define BASE_256 256
char *largenum2str(unsigned char *num, unsigned int len_num)
{
int temp;
char *str, *b_256 = NULL, *cur_num = NULL, *prod = NULL, *prod_term = NULL;
unsigned int i, j, carry = 0, len_str = 1, len_b_256, len_cur_num, len_prod, len_prod_term;
//Get 256 as an array of base-10 chars we'll use later as our second operand of the product
for ((len_b_256 = 0, temp = BASE_256); temp > 0; len_b_256++)
{
b_256 = realloc(b_256, sizeof(char) * (len_b_256 + 1));
b_256[len_b_256] = temp % 10;
temp = temp / 10;
}
//Our first operand (prod) is the last element of our num array, which we'll convert to a base-10 array
for ((len_prod = 0, temp = num[len_num - 1]); temp > 0; len_prod++)
{
prod = realloc(prod, sizeof(*prod) * (len_prod + 1));
prod[len_prod] = temp % 10;
temp = temp / 10;
}
while (len_num > 1) //We'll stay in this loop as long as we still have elements in num to read
{
len_num--; //Decrease the length of num to keep track of the current element
//Convert this element to a base-10 unsigned char array
for ((len_cur_num = 0, temp = num[len_num - 1]); temp > 0; len_cur_num++)
{
cur_num = (char *)realloc(cur_num, sizeof(char) * (len_cur_num + 1));
cur_num[len_cur_num] = temp % 10;
temp = temp / 10;
}
//Multiply prod by 256 and save that as prod_term
len_prod_term = 0;
prod_term = NULL;
for (i = 0; i < len_b_256; i++)
{ //Repeat this loop 3 times, one for each element in {6,5,2} (256 as a reversed base-10 unsigned char array)
carry = 0; //Set the carry to 0
prod_term = realloc(prod_term, sizeof(*prod_term) * (len_prod + i)); //Allocate memory to save prod_term
for (j = i; j < (len_prod_term); j++) //If we have digits from the last partial product of the multiplication, add it here
{
prod_term[j] = prod_term[j] + prod[j - i] * b_256[i] + carry;
if (prod_term[j] > 9)
{
carry = prod_term[j] / 10;
prod_term[j] = prod_term[j] % 10;
}
else
{
carry = 0;
}
}
while (j < (len_prod + i)) //No remaining elements of the former prod_term, so take only into account the results of multiplying mult * b_256
{
prod_term[j] = prod[j - i] * b_256[i] + carry;
if (prod_term[j] > 9)
{
carry = prod_term[j] / 10;
prod_term[j] = prod_term[j] % 10;
}
else
{
carry = 0;
}
j++;
}
if (carry) //A carry may be present in the last term. If so, allocate memory to save it and increase the length of prod_term
{
len_prod_term = j + 1;
prod_term = realloc(prod_term, sizeof(*prod_term) * (len_prod_term));
prod_term[j] = carry;
}
else
{
len_prod_term = j;
}
}
free(prod); //We don't need prod anymore, prod will now be prod_term
prod = prod_term;
len_prod = len_prod_term;
//Add prod (formerly prod_term) to our current number of the num array, expressed in a b-10 array
carry = 0;
for (i = 0; i < len_cur_num; i++)
{
prod[i] = prod[i] + cur_num[i] + carry;
if (prod[i] > 9)
{
carry = prod[i] / 10;
prod[i] -= 10;
}
else
{
carry = 0;
}
}
while (carry && (i < len_prod))
{
prod[i] = prod[i] + carry;
if (prod[i] > 9)
{
carry = prod[i] / 10;
prod[i] -= 10;
}
else
{
carry = 0;
}
i++;
}
if (carry)
{
len_prod++;
prod = realloc(prod, sizeof(*prod) * len_prod);
prod[len_prod - 1] = carry;
carry = 0;
}
}
str = malloc(sizeof(char) * (len_prod + 1)); //Allocate memory for the return string
for (i = 0; i < len_prod; i++) //Convert the numeric result to its representation as characters
{
str[len_prod - 1 - i] = prod[i] + '0';
}
str[i] = '\0'; //Terminate our string
free(b_256); //Free memory
free(prod);
free(cur_num);
return str;
}
The idea behind it all derives from simple math. For any base-256 number, its base-10 representation can be calculated as:
num[i]*256^i + num[i-1]*256^(i-1) + (···) + num[2]*256^2 + num[1]*256^1 + num[0]*256^0
which expands to:
(((((num[i])*256 + num[i-1])*256 + (···))*256 + num[2])*256 + num[1])*256 + num[0]
So all we have to do is to multiply, step-by step, each element of the number array by 256 and add to it the next element, and so on... That way we can get the base-10 number.