I have a question regarding how the printf() method prints integers, signed or unsigned. One day, I found myself thinking about how difficult it must be to convert a binary sequence into a sequence of decimal digits that a human can understand, given that a computer has no concept of decimal.
Below, I have a printf() method (from here) with its associated methods. I've tried to understand as much as I can about how printi() works, as you can see in the comments:
#define PAD_RIGHT 1
#define PAD_ZERO 2
#include <stdarg.h>
static void printchar(char **str, int c)
{
extern int putchar(int c);
if (str) {
**str = c;
++(*str);
}
else (void)putchar(c);
}
static int prints(char **out, const char *string, int width, int pad)
{
register int pc = 0, padchar = ' ';
if (width > 0) {
register int len = 0;
register const char *ptr;
for (ptr = string; *ptr; ++ptr) ++len;
if (len >= width) width = 0;
else width -= len;
if (pad & PAD_ZERO) padchar = '0';
}
if (!(pad & PAD_RIGHT)) {
for ( ; width > 0; --width) {
printchar (out, padchar);
++pc;
}
}
for ( ; *string ; ++string) {
printchar (out, *string);
++pc;
}
for ( ; width > 0; --width) {
printchar (out, padchar);
++pc;
}
return pc;
}
/* the following should be enough for 32 bit int */
#define PRINT_BUF_LEN 12
static int printi(char **out, int i, int b, int sg, int width, int pad, int letbase)
{
/*
i is the number we are turning into a string
b is the base, i.e. base 10 for decimal
sg is if the number is signed, i.e. 1 for signed (%d), 0 for unsigned (%u)
By default, width and pad are 0, letbase is 97
*/
char print_buf[PRINT_BUF_LEN];
register char *s;
register int t, neg = 0, pc = 0;
register unsigned int u = i;
if (i == 0)
{
print_buf[0] = '0';
print_buf[1] = '\0';
return prints(out, print_buf, width, pad);
}
if (sg && b == 10 && i < 0)
{
neg = 1;
u = -i;
}
s = print_buf + PRINT_BUF_LEN - 1;
*s = '\0';
while (u)
{
t = u % b;
if (t >= 10)
t += letbase - '0' - 10;
*--s = t + '0';
u /= b;
}
if (neg)
{
if (width && (pad & PAD_ZERO))
{
printchar(out, '-');
++pc;
--width;
}
else
*--s = '-';
}
return pc + prints(out, s, width, pad);
}
static int print(char** out, const char* format, va_list args)
{
register int width, pad;
register int pc = 0;
char scr[2];
for (; *format != 0; ++format)
{
if (*format == '%')
{
++format;
width = pad = 0;
if (*format == '\0')
break;
if (*format == '%')
goto out;
if (*format == '-')
{
++format;
pad = PAD_RIGHT;
}
while (*format == '0')
{
++format;
pad |= PAD_ZERO;
}
for (; *format >= '0' && *format <= '9'; ++format)
{
width *= 10;
width += *format - '0';
}
if (*format == 's')
{
register char* s = (char*) va_arg(args, int);
pc += prints(out, s ? s : "(null)", width, pad);
continue;
}
if (*format == 'd')
{
pc += printi(out, va_arg(args, int), 10, 1, width, pad, 'a');
continue;
}
if (*format == 'x')
{
pc += printi(out, va_arg(args, int), 16, 0, width, pad, 'a');
continue;
}
if (*format == 'X')
{
pc += printi(out, va_arg(args, int), 16, 0, width, pad, 'A');
continue;
}
if (*format == 'u')
{
pc += printi(out, va_arg(args, int), 10, 0, width, pad, 'a');
continue;
}
if (*format == 'c')
{
/* char are converted to int then pushed on the stack */
scr[0] = (char) va_arg(args, int);
scr[1] = '\0';
pc += prints(out, scr, width, pad);
continue;
}
}
else
{
out:
printchar (out, *format);
++pc;
}
}
if (out)
**out = '\0';
va_end(args);
return pc;
}
int printf(const char *format, ...)
{
va_list args;
va_start( args, format );
return print( 0, format, args );
}
If there's one thing I hate about reading library source code, it's that it's hardly ever readable. Variable names with one character and no comment to explain them are a pain.
Can you please explain, in a simple way, what exactly the method is doing to convert an integer into a string of decimal digits?
The code you've pasted is not difficult to read. I suspect you may have given up early.
Ignoring the potential for a negative number for a moment, this printi() routine:
creates a buffer to print the number into, 12 characters wide
sets a character pointer s to point to the end of that buffer
** NULL-terminates it, then moves the pointer one character to the "left"
Then the routine enters a loop, for as long as the number remains > 0
MOD by 10 (that is, divide by 10 and take the remainder)
this becomes the digit that s is pointing to, so the ASCII representation is put there
s is moved to the left again
set the number to itself / 10; this removes the digit that was just printed
repeat the loop as long as there are more digits to print
The only tricky thing here is the dealing with negative numbers, but if you understand how negative numbers are stored, it's not really tricky at all.
Maybe I've been staring at template library headers too long, but that library code looks pretty readable to me!
I will explain the main loop, since the rest (juggling the sign around etc.) should be fairly easy to figure out.
while (u)
{
t = u % b;
if (t >= 10)
t += letbase - '0' - 10;
*--s = t + '0';
u /= b;
}
Basically what we are doing is extracting digits one at a time, from right to left. Suppose b == 10 (i.e. the usual case of %d or %u). The % operator, called the modulo operator, calculates the remainder that is left after integer division. The first time the line t = u % b; runs, it calculates the rightmost digit of the output string -- what is left as a remainder after you divide the number u by 10. (Suppose the number u was 493: the remainder after dividing this by 10 is 3, the rightmost digit.)
After extracting this rightmost digit into t, the if statement decides what to "call" this digit if it is 10 or larger. This fixup amounts to adjusting t so that, when '0' (the ASCII value of the digit '0', which is 48) is added in the next line the result will be a letter starting at 'a' or 'A' (to produce hex digits and other digits for bases larger than 10).
The line after that writes the digit into the buffer. It goes into the rightmost character of the print_buf buffer (notice how s is earlier initialised to point to the end of this buffer, not the start as is usually the case). The pointer s is subsequently moved one character to the left in preparation for the next character.
The following line, u /= b, simply divides u by 10, effectively discarding the rightmost digit. (This works because integer division never produces fractions, and always rounds down.) This then opens up the second-rightmost digit for the next loop iteration to process. Rinse, repeat. The loop finally stops when there is nothing left (the condition while (u) is equivalent to the condition while (u != 0)).
The method to convert a positive integer I to base 10 is basically :
if (i == 0)
printf("0");
else while (i != 0) {
unsigned int j = i / 10;
unsigned int digit = i - 10 * j;
printf("%c", digit + '0');
i = j;
}
Except that this prints out the number backward.
Related
Short story. I made a program that does addition for binary integers. I need to make it work for binary real numbers (e.g. 1010.1010(binary)=10.625(decimal)
The input is given as a binary string.
I made a lot of attempts and I couldn't find a simple way to do it. Please help create such a program.
Example: {input: 1010.1010(10.625 in decimal) 0.1(0.5 in decimal)
output: 1011.001 (11.125 in decimal)}
Code:
#include <stdio.h>
#include <string.h>
void bin_add(int c[400], int d[400])
{
int car[400]; //carry
int i = 199;
car[i] = 0;
while (i >= 0)
{
//find carry and shift it left
//find the sum
car[i - 1] = (c[i] & d[i]) | (c[i] & car[i]) | (d[i] & car[i]);
c[i] = (c[i] ^ d[i]) ^ car[i];
printf("car[i-1]=%d c[i]=%d\n", car[i - 1], c[i]);
i--;
}
// printf("\n");
}
int main()
{
int l, l1, i;//l and l1 are lengths
char a[200], b[200]; //a and b are the inputs
int c[200], d[200]; //c and d are used for processing
for (i = 0; i < 200; i++)
{
c[i] = 0;
d[i] = 0;
}
gets(a);
gets(b);
l = strlen(a);
l1 = strlen(b);
for (int i = 0; i < l; i++)
{
c[200 - l + i] = a[i] - 48;
}
////////////////////////////////////////////
for (int i = 0; i < l1; i++)
{
d[200 - l1 + i] = b[i] - 48;
}
////////////////////////////////
bin_add(c, d);
for (i = 0; i < 200; i++)
printf("%d", c[i]);
return 0;
}
What you really want to do, is handle each digit in order of increasing importance. To make that easier, you should implement the following functions:
/* Return the number of fractional bits in bs */
int bs_fractbits(const char *bs);
/* Return the number of integer bits in bs */
int bs_intbits(const char *bs);
/* Return the bit in bs corresponding to value 2**i,
0 if outside the bit string */
int bs_bit(const char *bs, int i);
/* Return -1 if bs is negative,
0 if bs is zero or NULL,
+1 if bs is positive */
int bs_sign(const char *bs);
/* Return -1 if bs1 < bs2,
0 if bs1 == bs2,
+1 if bs1 > bs2. */
int bs_cmp(const char *bs1, const char *bs2);
To support negative values, you'll need to implement both addition and subtraction (of "unsigned" bit strings):
Addition: The result has as many fractional bits as the term that has most fractional bits, and possibly one more integer bit than the term that has most integer bits. Start at the least significant bit in either term, and work your way up to the most significant bit in either term, summing each bit, and keeping the "carry bit" along, just like you'd do by hand. If the carry is nonzero at end, you'll get that one additional bit.
Subtraction: Always subtract smaller from larger. If that changes the order of the terms, negate the result. The result has at most as many fractional bits as the term that has most fractional bits, and at most as many integer bits as the term that has most integer bits. This is just like addition, except you subtract the bits, and instead of "carry bit", you use a "borrow bit". Because you subtract smaller unsigned value from larger unsigned value, the "borrow bit" will be zero at end.
Multiplication: The integer part has the number of integer bits, and the number of fractional bits, as the terms have in total (summed). You can implement the operation as if multiplying two unsigned integer values, and just insert the bit at end. (So that the result has as many fractional bits as the input terms have in total.) This usually involves a double loop, just like in long multiplication by hand.
Note that the same logic also works if you use larger radix instead of 2. The "carry"/"borrow" is a digit, between zero and one less than the radix.
Personally, I'd be very tempted to use a structure to describe each digit string:
typedef struct {
int idigits; /* Number of integral digits before point */
int fdigits; /* Number of fractional digits after point */
int size; /* Number of chars dynamically allocated at data */
char *point; /* Location of decimal point */
char *data; /* Dynamically allocated buffer */
} digitstring;
#define DIGITSTRING_INIT { 0, 0, 0, NULL, NULL }
with an additional flag if negative digit strings are to be supported.
Digit D with numerical value D×Bi, where B is the radix (number of unique digits used) and i being the position of said digit, is located at point[-i] if i < 0 (and -i <= fdigits), or at point[-i-1] if i >= 0 (and i < idigits). point[0] itself is where the decimal point is, if there is one.
For example, if we have string 0100.00, then idigits = 4, fdigits = 2, and the only nonzero digit is at position 2. (Position 0 is on the left side of the decimal point, and -1 on the right side.)
size and data fields allow reuse of the dynamically allocated buffer. Each declaration of a digitstring must be initialized, digitstring value = DIGITSTRING_INIT;, because there is no initialization function; this way you are less likely to leak memory (unless you forget to free a digitstring when no longer needed):
/* Free the specified digit string. */
static inline void digitstring_free(digitstring *ds)
{
if (ds) {
if (ds->data)
free(ds->data);
ds->idigits = 0;
ds->fdigits = 0;
ds->size = 0;
ds->point = NULL;
ds->data = NULL;
}
}
To use the digit string as a C string, you use a helper function to obtain the pointer to the most significant digit in the digit string:
/* Return a pointer to a printable version of the digit string. */
static const char *digitstring_str(const digitstring *ds, const char *none)
{
if (ds && ds->point)
return (const char *)(ds->point - ds->idigits);
else
return none;
}
I've found that rather than crash, it is often useful to pass an extra parameter that is only used for the return value when the return value is otherwise undefined. For example, if you have an initialized digit string foo without any contents, then digitstring_str(&foo, "0") returns the string literal "0".
The main point of the digit string structure is to have accessor functions that get and set each individual digit:
/* Get the value of a specific digit. */
static inline unsigned int digitstring_get(const digitstring *ds, const int position)
{
if (ds) {
if (position < 0) {
if (-position <= ds->fdigits)
return digit_to_value(ds->point[-position]);
else
return 0;
} else {
if (position < ds->idigits)
return digit_to_value(ds->point[-position-1]);
else
return 0;
}
} else
return 0;
}
/* Set the value of a specific digit. */
static inline void digitstring_set(digitstring *ds, const int position, const unsigned int value)
{
if (!ds) {
fprintf(stderr, "digitstring_set(): NULL digitstring specified.\n");
exit(EXIT_FAILURE);
} else
if (position < 0) {
if (-position > ds->fdigits) {
fprintf(stderr, "digitstring_set(): Digit position underflow (in fractional part).\n");
exit(EXIT_FAILURE);
}
ds->point[-position] = value_to_digit(value);
} else {
if (position >= ds->idigits) {
fprintf(stderr, "digitstring_set(): Digit position overflow (in integer part).\n");
exit(EXIT_FAILURE);
}
ds->point[-position-1] = value_to_digit(value);
}
}
Above, value_to_digit() is a helper function that converts a numerical value to the corresponding character, and digit_to_value() converts a character to the corresponding numerical value.
All operations (from parsing to arithmetic operators) really need a "constructor", that creates a new digit string with sufficient number of digits. (The number of digits is known beforehand for each operation, and depends only on the number of significant digits in the terms.) For this, I created a function that constructs a zero of desired size:
/* Clear the specified digit string to zero. */
static inline void digitstring_zero(digitstring *ds, int idigits, int fdigits)
{
int size;
char *data;
if (!ds) {
fprintf(stderr, "digitstring_zero(): No digitstring specified.\n");
exit(EXIT_FAILURE);
}
/* Require at least one integral digit. */
if (idigits < 1)
idigits = 1;
if (fdigits < 0)
fdigits = 0;
/* Total number of chars needed, including decimal point
and string-terminating nul char. */
size = idigits + 1 + fdigits + 1;
/* Check if dynamically allocated buffer needs resizing. */
if (ds->size < size) {
if (ds->data)
data = realloc(ds->data, size);
else
data = malloc(size);
if (!data) {
fprintf(stderr, "digitstring_zero(): Out of memory.\n");
exit(EXIT_FAILURE);
}
ds->data = data;
ds->size = size;
} else {
data = ds->data;
size = ds->size;
}
/* Fill it with zeroes. */
memset(data, value_to_digit(0), idigits + 1 + fdigits);
/* Pad the unused space with nul chars, terminating the string. */
memset(data + idigits + 1 + fdigits, '\0', size - idigits - 1 - fdigits);
/* Assign the decimal point. */
ds->point = data + idigits;
/* If there are no decimals, no need for a decimal point either. */
if (fdigits > 0)
ds->point[0] = decimal_point;
else
ds->point[0] = '\0';
/* After setting the desired digits, use digitstring_trim(). */
ds->idigits = idigits;
ds->fdigits = fdigits;
}
It will ensure the digit string has enough room for the specified number of digits, reallocating its dynamically allocated buffer if necessary, reusing it if already large enough.
The idea is that to implement an operation, you first find out the maximum number of integral and fractional digits the result can have. You use the above to create the result digit string, then digitstring_set() to set each digit to their respective values. You will typically operate in increasing digit significance, which means increasing digit "positions".
If we have additional helper functions int digits(const char *src), which returns the number of consecutive valid digit characters starting at src, and int decimal_points(const char *src), which returns 1 if src points to a decimal point, and 0 otherwise, we can parse input strings into digit strings using
/* Parse a string into a digit string, returning the pointer
to the first unparsed character, or NULL if an error occurs. */
static const char *digitstring_parse(digitstring *ds, const char *src)
{
const int zero = value_to_digit(0);
const char *idigit, *fdigit;
int idigits, fdigits, fextra, n;
/* Fail if nothing to parse. */
if (!src)
return NULL;
/* Skip leading whitespace. */
while (isspace((unsigned char)(*src)))
src++;
/* Fail if nothing to parse. */
if (*src == '\0')
return NULL;
/* Scan integer digits. */
idigit = src;
src += digits(src);
idigits = (int)(src - idigit);
/* Decimal point? */
fextra = 0;
n = decimal_points(src);
if (n > 0) {
src += n;
/* Scan fractional digits. */
fdigit = src;
src += digits(src);
fdigits = (int)(src - fdigit);
if (fdigits < 1)
fextra = 1;
} else {
fdigit = src;
fdigits = 0;
}
/* No digits? */
if (idigit == 0 && fdigit == 0)
return NULL;
/* Trim leading zeroes. */
while (idigits > 1 && *idigit == zero) {
idigits--;
idigit++;
}
/* Trim trailing zeroes. */
while (fdigits > 1 && fdigit[fdigits - 1] == zero)
fdigits--;
/* Create the necessary digit string, */
digitstring_zero(ds, idigits, fdigits + fextra);
/* copy the integer digits, if any, */
if (idigits > 0)
memcpy(ds->point - idigits, idigit, idigits);
/* and the fractional digits, if any. */
if (fdigits > 0)
memcpy(ds->point + 1, fdigit, fdigits);
/* Return a pointer to the first unparsed character. */
return src;
}
After updating its digits, one can call a helper function to remove any extra leading zeroes:
static inline void digitstring_ltrim(digitstring *ds)
{
if (ds && ds->point) {
const int zero = value_to_digit(0);
while (ds->idigits > 1 && ds->point[-ds->idigits] == zero)
ds->idigits--;
}
}
Adding two (unsigned) digit strings, possibly reusing one of the terms, is now quite simple to implement:
static void digitstring_add(digitstring *to, const digitstring *src1, const digitstring *src2)
{
digitstring result = DIGITSTRING_INIT;
unsigned int carry = 0;
int i, idigits, fdigits;
if (!to || !src1 || !src2) {
fprintf(stderr, "digitstring_add(): NULL digitstring specified.\n");
exit(EXIT_FAILURE);
}
/* For addition, the result has as many digits
as the longer source term. */
idigits = (src1->idigits >= src2->idigits) ? src1->idigits : src2->idigits;
fdigits = (src1->fdigits >= src2->fdigits) ? src1->fdigits : src2->fdigits;
/* Result needs possibly one more integer digit,
in case carry overflows. */
digitstring_zero(&result, idigits + 1, fdigits);
/* Addition loop, in order of increasing digit significance. */
for (i = -fdigits; i < idigits; i++) {
const unsigned int sum = digitstring_get(src1, i)
+ digitstring_get(src2, i)
+ carry;
digitstring_set(&result, i, sum % RADIX);
carry = sum / RADIX;
}
digitstring_set(&result, idigits, carry);
/* Trim leading zeroes. */
digitstring_ltrim(&result);
/* At this point, we can discard the target, even if it is actually
one of the sources, and copy the result to it. */
digitstring_free(to);
*to = result;
}
where RADIX is the radix used (the number of unique digits, 2 for binary). Pay extra attention to the digit loop. -fdigits is the least significant position in the result, and idigits-1 the most significant position. We need the accessor functions, because the source terms might not contain those digits at all (they are logically zero then).
These functions have been tested to work on both binary and octal number strings. I like this implementation, because it omits the decimal point if all terms are integers (so you get 12 + 33 = 45), but (due to fextra in digitstring_parse()) if any of the terms have a decimal point, then the result will have at least one fractional digit (so 12. + 33 = 45.0).
After all the beautiful ideas in Animals' answer I felt the strange urge, to present my own solution:
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#define MAX(x, y) ((x) > (y) ? (x) : (y))
size_t gpp(char const *s)
{
char *n = strchr(s, '.');
return n ? n - s + 1 : 0;
}
char* bin_add(char const *a, char const *b)
{
char const *inp[] = { a, b };
size_t ll[] = { strlen(a), strlen(b) };
size_t pp[] = { gpp(a), gpp(b) };
size_t OO[2], off[2];
for (size_t i = 0; i < 2; ++i) {
OO[i] = pp[i] ? pp[i] - 1 : ll[i];
pp[i] = pp[i] ? ll[i] - pp[i] : 0;}
for (size_t i = 0; i < 2; ++i)
off[i] = OO[i] < OO[!i] ? OO[!i] - OO[i] : 0;
size_t ML = MAX(OO[0], OO[1]) + MAX(pp[0], pp[1]) + (!!pp[0] || !!pp[1]);
char *Ol = calloc(ML + 2, 1);
if(!Ol) return Ol;
char ops[2];
int xc = 0;
size_t lO = ML;
unsigned cc[2] = { 0 };
for (size_t i = ML; i; --i) {
bool pt = false;
for (size_t l = 0; l < 2; ++l) {
ops[l] = i <= ll[l] + off[l] && i - off[l] - 1
< ll[l] ? inp[l][i - off[l] - 1] : '0';
if (ops[l] == '.') {
if (cc[l]) {
free(Ol);
return NULL;
}
pt = true;
++cc[l];
}
ops[l] -= '0';
}
if (pt) {
Ol[i] = '.';
continue;
}
if ((Ol[i] = ops[0] + ops[1] + xc) > 1) {
Ol[i] = 0;
xc = 1;
}
else xc = 0;
lO = (Ol[i] += '0') == '1' ? i : lO;
}
if((Ol[0] = '0' + xc) == '1') return Ol;
for (size_t i = 0; i <= ML - lO + 1; ++i)
Ol[i] = Ol[lO + i];
return Ol;
}
int main(void)
{
char a[81], b[81];
while (scanf(" %80[0.1] %80[0.1]", a, b) & 1 << 1) {
char *c = bin_add(a, b);
if (!c && errno == ENOMEM) {
fputs("Not enough memory :(\n\n", stderr);
return EXIT_FAILURE;
}
else if (!c) {
fputs("Input error :(\n\n", stderr);
goto clear;
}
char* O[] = { a, b, c };
size_t lO[3], Ol = 0;
for (size_t i = 0; i < 3; ++i) {
lO[i] = gpp(O[i]);
lO[i] = lO[i] ? lO[i] : strlen(i[O]) + 1;
Ol = lO[i] > Ol ? lO[i] : Ol;
}
putchar('\n');
for (size_t i = 0; i < 3; ++i) {
for (size_t l = 0; l < Ol - lO[i]; ++l, putchar(' '));
puts(O[i]);
}
putchar('\n');
free(c);
clear :{ int c; while ((c = getchar()) != '\n' && c != EOF); }
}
}
Sample Output:
11001001 .11001001
11001001
.11001001
11001001.11001001
11001001 1010
11001001
1010
11010011
111111 1
111111
1
1000000
1010101 010111001.0101110101010
1010101
010111001.0101110101010
100001110.0101110101010
1001001.010111010101 10100101100.10010111101
1001001.010111010101
10100101100.10010111101
10101110101.111000001111
. .
.
.
0
.. .
Input error :(
A
Press any key to continue . . .
I contemplated wheter I should ask for a review at codereview. But I think I schould rather not.
There are two answers, depending upon whether you desire fixed- or floating- point arithmetic.
The first issue is reading the number. strtol() is your friend here:
char input[BUFFER_SIZE];
char * tmp;
long integral, fractional;
fgets(input, BUFFER_SIZE-1, stdin);
integral = strtol(input, &tmp, 2); /* Read BINARY integral part */
tmp++; /* Pass over the binary point. You may want to check that it is actually a dot */
fractional = strtol(tmp, null, 2); /* Read BINARY fractional part */
The next issue is figuring out how you will do the arithmetic. fractional must be bit-shifted an amount depending on how many digits past the point were provided and your desired precision. Fixed point arithmetic is simple: fractional <<= FRAC_BITS - strlen(tmp) then add the fractional parts together. Mask by ((1<<FRAC_BITS)-1) for the fractional part of the sum, shift the remaining bits and add them to the integral parts for the integral part of the sum. Floating-point is a little more finicky, but not too much harder.
For real numbers, convert non-fraction and fraction part to decimal, do the addition and print it as binary. This will require function to convert a number to binary string. Just a note that real numbers are float numbers in C and they are represented in binary with mantessa form like 2e^3 which is 2 multiplied by exponent to the power of 3.
I'm trying to write my first kernel module so I'm not able to include libraries for atoi, strtol, etc. How can I convert a string to int without these built-in functions? I tried:
int num;
num = string[0] - '0';
which works for the first character, but if I remove the [0] to try and convert the full string it gives me a warning: assignment makes integer from pointer without a cast. So what do I do?
When creating your own string to int function, make sure you check and protect against overflow. For example:
/* an atoi replacement performing the conversion in a single
pass and incorporating 'err' to indicate a failed conversion.
passing NULL as error causes it to be ignored */
int strtoi (const char *s, unsigned char *err)
{
char *p = (char *)s;
int nmax = (1ULL << 31) - 1; /* INT_MAX */
int nmin = -nmax - 1; /* INT_MIN */
long long sum = 0;
char sign = *p;
if (*p == '-' || *p == '+') p++;
while (*p >= '0' && *p <= '9') {
sum = sum * 10 - (*p - '0');
if (sum < nmin || (sign != '-' && -sum > nmax)) goto error;
p++;
}
if (sign != '-') sum = -sum;
return (int)sum;
error:
fprintf (stderr, "strtoi() error: invalid conversion for type int.\n");
if (err) *err = 1;
return 0;
}
You can't remove the [0]. That means that you are subtracting '0' from the pointer string, which is meaningless. You still need to dereference it:
num = string[i] - '0';
A string is an array of characters, represented by an address (a.k.a pointer).
An pointer has an value that might look something like 0xa1de2bdf. This value tells me where the start of the array is.
You cannot subtract a pointer type with a character type (e.g 0xa1de2bdf - 'b' does not really make sense).
To convert a string to a number, you could try this:
//Find the length of the string
int len = 0;
while (str[len] != '\0') {
len++;
}
//Loop through the string
int num = 0, i = 0, digit;
for (i=0; i<len; i++) {
//Extract the digit
digit = ing[i] - '0';
//Multiply the digit with its correct position (ones, tens, hundreds, etc.)
num += digit * pow(10, (len-1)-i);
}
Of course if you are not allowed to use math.h library, you could write your own pow(a,b) function which gives you the value of a^b.
int mypowfunc(int a, int b) {
int i=0, ans=1;
//multiply the value a for b number of times
for (i=0; i<b; i++) {
ans *= a;
}
return ans;
}
I have written the code above in a way that is simple to understand. It assumes that your string has a null character ('\0') right behind the last useful character (which is good practice).
Also, you might want to check that the string is actually a valid string with only digits (e.g '0', '1', '2', etc.). You could do this by including an if... else.. statement while looping through the string.
In modern kernels you want to use kstrto*:
http://lxr.free-electrons.com/source/include/linux/kernel.h#L274
274 /**
275 * kstrtoul - convert a string to an unsigned long
276 * #s: The start of the string. The string must be null-terminated, and may also
277 * include a single newline before its terminating null. The first character
278 * may also be a plus sign, but not a minus sign.
279 * #base: The number base to use. The maximum supported base is 16. If base is
280 * given as 0, then the base of the string is automatically detected with the
281 * conventional semantics - If it begins with 0x the number will be parsed as a
282 * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
283 * parsed as an octal number. Otherwise it will be parsed as a decimal.
284 * #res: Where to write the result of the conversion on success.
285 *
286 * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
287 * Used as a replacement for the obsolete simple_strtoull. Return code must
288 * be checked.
289 */
This function skips leading and trailing whitespace, handles one optional + / - sign, and returns 0 on invalid input,
// Convert standard null-terminated string to an integer
// - Skips leading whitespaces.
// - Skips trailing whitespaces.
// - Allows for one, optional +/- sign at the front.
// - Returns zero if any non-+/-, non-numeric, non-space character is encountered.
// - Returns zero if digits are separated by spaces (eg "123 45")
// - Range is checked against Overflow/Underflow (INT_MAX / INT_MIN), and returns 0.
int StrToInt(const char* s)
{
int minInt = 1 << (sizeof(int)*CHAR_BIT-1);
int maxInt = -(minInt+1);
char* w;
do { // Skip any leading whitespace
for(w=" \t\n\v\f\r"; *w && *s != *w; ++w) ;
if (*s == *w) ++s; else break;
} while(*s);
int sign = 1;
if ('-' == *s) sign = -1;
if ('+' == *s || '-' == *s) ++s;
long long i=0;
while('0' <= *s && *s <= '9')
{
i = 10*i + *s++ - '0';
if (sign*i < minInt || maxInt < sign*i)
{
i = 0;
break;
}
}
while (*s) // Skip any trailing whitespace
{
for(w=" \t\n\v\f\r"; *w && *s != *w; ++w) ;
if (*w && *s == *w) ++s; else break;
}
return (int)(!*s*sign*i);
}
" not able to include libraries" --> Unclear if code is allowed access to INT_MAX, INT_MIN. There is no way to determine the minimum/maximum signed integer in a completely portable fashion without using the language provided macros like INT_MAX, INT_MIN.
Use INT_MAX, INT_MIN is available. Else we could guess the char width is 8. We could guess there are no padding bits. We could guess that integers are 2's complement. With these reasonable assumptions, minimum and maximum are defined below.
Note: Shifting into the sign bit is undefined behavior (UB), so don't do that.
Let us add another restriction: make a solution that works for any signed integer from signed char to intmax_t. This disallows code from using a wider type, as there may not be a wider type.
typedef int Austin_int;
#define Austin_INT_MAXMID ( ((Austin_int)1) << (sizeof(Austin_int)*8 - 2) )
#define Austin_INT_MAX (Austin_INT_MAXMID - 1 + Austin_INT_MAXMID)
#define Austin_INT_MIN (-Austin_INT_MAX - 1)
int Austin_isspace(int ch) {
const char *ws = " \t\n\r\f\v";
while (*ws) {
if (*ws == ch) return 1;
ws++;
}
return 0;
}
// *endptr points to where parsing stopped
// *errorptr indicates overflow
Austin_int Austin_strtoi(const char *s, char **endptr, int *errorptr) {
int error = 0;
while (Austin_isspace(*s)) {
s++;
}
char sign = *s;
if (*s == '-' || *s == '+') {
s++;
}
Austin_int sum = 0;
while (*s >= '0' && *s <= '9') {
int ch = *s - '0';
if (sum <= Austin_INT_MIN / 10 &&
(sum < Austin_INT_MIN / 10 || -ch < Austin_INT_MIN % 10)) {
sum = Austin_INT_MIN;
error = 1;
} else {
sum = sum * 10 - ch;
}
s++;
}
if (sign != '-') {
if (sum < -Austin_INT_MAX) {
sum = Austin_INT_MAX;
error = 1;
} else {
sum = -sum;
}
}
if (endptr) {
*endptr = (char *) s;
}
if (errorptr) {
*errorptr = error;
}
return sum;
}
The above depends on C99 or later in the Austin_INT_MIN Austin_INT_MIN % 10 part.
This is the cleanest and safest way I could come up with
int str_to_int(const char * str, size_t n, int * int_value) {
int i;
int cvalue;
int value_muliplier = 1;
int res_value = 0;
int neg = 1; // -1 for negative and 1 for whole.
size_t str_len; // String length.
int end_at = 0; // Where loop should end.
if (str == NULL || int_value == NULL || n <= 0)
return -1;
// Get string length
str_len = strnlen(str, n);
if (str_len <= 0)
return -1;
// Is negative.
if (str[0] == '-') {
neg = -1;
end_at = 1; // If negative 0 item in 'str' is skipped.
}
// Do the math.
for (i = str_len - 1; i >= end_at; i--) {
cvalue = char_to_int(str[i]);
// Character not a number.
if (cvalue == -1)
return -1;
// Do the same math that is down below.
res_value += cvalue * value_muliplier;
value_muliplier *= 10;
}
/*
* "436"
* res_value = (6 * 1) + (3 * 10) + (4 * 100)
*/
*int_value = (res_value * neg);
return 0;
}
int char_to_int(char c) {
int cvalue = (int)c;
// Not a number.
// 48 to 57 is 0 to 9 in ascii.
if (cvalue < 48 || cvalue > 57)
return -1;
return cvalue - 48; // 48 is the value of zero in ascii.
}
I can't understand the following atoi implementation code, specifically this line:
k = (k << 3) + (k << 1) + (*p) - '0';
Here is the code:
int my_atoi(char *p) {
int k = 0;
while (*p) {
k = (k << 3) + (k << 1) + (*p) - '0';
p++;
}
return k;
}
Can someone explain it to me ?
Another question: what should be the algorithm of atof implementation ?
<< is bit shift, (k<<3)+(k<<1) is k*10, written by someone who thought he was more clever than a compiler (well, he was wrong...)
(*p) - '0' is subtracting the value of character 0 from the character pointed by p, effectively converting the character to a number.
I hope you can figure out the rest... just remember how the decimal system works.
Here is a specification for the standard function atoi. Sorry for not quoting the standard, but this will work just as fine (from: http://www.cplusplus.com/reference/clibrary/cstdlib/atoi/ )
The function first discards as many whitespace characters (as in
isspace) as necessary until the first non-whitespace character is
found. Then, starting from this character, takes an optional initial
plus or minus sign followed by as many base-10 digits as possible, and
interprets them as a numerical value.
The string can contain additional characters after those that form the
integral number, which are ignored and have no effect on the behavior
of this function.
If the first sequence of non-whitespace characters in str is not a
valid integral number, or if no such sequence exists because either
str is empty or it contains only whitespace characters, no conversion
is performed and zero is returned.
k = (k << 3) + (k << 1);
means
k = k * 2³ + k * 2¹ = k * 8 + k * 2 = k * 10
Does that help?
The *p - '0' term adds the value of the next digit; this works because C requires that the digit characters have consecutive values, so that '1' == '0' + 1, '2' == '0' + 2, etc.
As for your second question (atof), that should be its own question, and it's the subject for a thesis, not something simple to answer...
#include <stdio.h>
#include <errno.h>
#include <limits.h>
double atof(const char *string);
int debug=1;
int main(int argc, char **argv)
{
char *str1="3.14159",*str2="3",*str3="0.707106",*str4="-5.2";
double f1,f2,f3,f4;
if (debug) printf("convert %s, %s, %s, %s\n",str1,str2,str3,str4);
f1=atof(str1);
f2=atof(str2);
f3=atof(str3);
f4=atof(str4);
if (debug) printf("converted values=%f, %f, %f, %f\n",f1,f2,f3,f4);
if (argc > 1)
{
printf("string %s is floating point %f\n",argv[1],atof(argv[1]));
}
}
double atof(const char *string)
{
double result=0.0;
double multiplier=1;
double divisor=1.0;
int integer_portion=0;
if (!string) return result;
integer_portion=atoi(string);
result = (double)integer_portion;
if (debug) printf("so far %s looks like %f\n",string,result);
/* capture whether string is negative, don't use "result" as it could be 0 */
if (*string == '-')
{
result *= -1; /* won't care if it was 0 in integer portion */
multiplier = -1;
}
while (*string && (*string != '.'))
{
string++;
}
if (debug) printf("fractional part=%s\n",string);
// if we haven't hit end of string, go past the decimal point
if (*string)
{
string++;
if (debug) printf("first char after decimal=%c\n",*string);
}
while (*string)
{
if (*string < '0' || *string > '9') return result;
divisor *= 10.0;
result += (double)(*string - '0')/divisor;
if (debug) printf("result so far=%f\n",result);
string++;
}
return result*multiplier;
}
Interestingly, the man page for atoi doesn't indicate setting of errno so if you're talking any number > (2^31)-1, you're out of luck and similarly for numbers less than -2^31 (assuming 32-bit int). You'll get back an answer but it won't be what you want. Here's one that could take a range of -((2^31)-1) to (2^31)-1, and return INT_MIN (-(2^31)) if in error. errno could then be checked to see if it overflowed.
#include <stdio.h>
#include <errno.h> /* for errno */
#include <limits.h> /* for INT_MIN */
#include <string.h> /* for strerror */
extern int errno;
int debug=0;
int atoi(const char *c)
{
int previous_result=0, result=0;
int multiplier=1;
if (debug) printf("converting %s to integer\n",c?c:"");
if (c && *c == '-')
{
multiplier = -1;
c++;
}
else
{
multiplier = 1;
}
if (debug) printf("multiplier = %d\n",multiplier);
while (*c)
{
if (*c < '0' || *c > '9')
{
return result * multiplier;
}
result *= 10;
if (result < previous_result)
{
if (debug) printf("number overflowed - return INT_MIN, errno=%d\n",errno);
errno = EOVERFLOW;
return(INT_MIN);
}
else
{
previous_result *= 10;
}
if (debug) printf("%c\n",*c);
result += *c - '0';
if (result < previous_result)
{
if (debug) printf("number overflowed - return MIN_INT\n");
errno = EOVERFLOW;
return(INT_MIN);
}
else
{
previous_result += *c - '0';
}
c++;
}
return(result * multiplier);
}
int main(int argc,char **argv)
{
int result;
printf("INT_MIN=%d will be output when number too high or too low, and errno set\n",INT_MIN);
printf("string=%s, int=%d\n","563",atoi("563"));
printf("string=%s, int=%d\n","-563",atoi("-563"));
printf("string=%s, int=%d\n","-5a3",atoi("-5a3"));
if (argc > 1)
{
result=atoi(argv[1]);
printf("atoi(%s)=%d %s",argv[1],result,(result==INT_MIN)?", errno=":"",errno,strerror(errno));
if (errno) printf("%d - %s\n",errno,strerror(errno));
else printf("\n");
}
return(errno);
}
Here is my implementation(tested successfully with cases containing and starting with letters, +, - and zero's).
I tried to reverse-engineer atoi function in Visual Studio. If the input string only contained numerical characters, it could be implemented in one loop. but it gets complicated because you should take care of -,+ and letters.
int atoi(char *s)
{
int c=1, a=0, sign, start, end, base=1;
//Determine if the number is negative or positive
if (s[0] == '-')
sign = -1;
else if (s[0] <= '9' && s[0] >= '0')
sign = 1;
else if (s[0] == '+')
sign = 2;
//No further processing if it starts with a letter
else
return 0;
//Scanning the string to find the position of the last consecutive number
while (s[c] != '\n' && s[c] <= '9' && s[c] >= '0')
c++;
//Index of the last consecutive number from beginning
start = c - 1;
//Based on sign, index of the 1st number is set
if (sign==-1)
end = 1;
else if (sign==1)
end = 0;
//When it starts with +, it is actually positive but with a different index
//for the 1st number
else
{
end = 1;
sign = 1;
}
//This the main loop of algorithm which generates the absolute value of the
//number from consecutive numerical characters.
for (int i = start; i >=end ; i--)
{
a += (s[i]-'0') * base;
base *= 10;
}
//The correct sign of generated absolute value is applied
return sign*a;
}
about atoi() hint code from here:
and based on the atoi(), my implementation of atof():
[have same limitation of original code, doesn't check length, etc]
double atof(const char* s)
{
double value_h = 0;
double value_l = 0;
double sign = 1;
if (*s == '+' || *s == '-')
{
if (*s == '-') sign = -1;
++s;
}
while (*s >= 0x30 && *s <= 0x39)
{
value_h *= 10;
value_h += (double)(*s - 0x30);
++s;
}
// 0x2E == '.'
if (*s == 0x2E)
{
double divider = 1;
++s;
while (*s >= 0x30 && *s <= 0x39)
{
divider *= 10;
value_l *= 10;
value_l += (double)(*s - 0x30);
++s;
}
return (value_h + value_l/divider) * sign;
}
else
{
return value_h * sign;
}
}
I have an integer like 1191223
and I want to iterate over the digits. I am not sure how to do this in C, is there any easy way to do this?
Thanks.
Forwards, or backwards?
Assuming a positive integer:
unsigned int n = 1191223;
while (n != 0) {
doSomething (n % 10);
n /= 10;
}
…will work smallest to largest, or…
EDIT I'd forgotten all about this non-working solution I had here. Note that Very Smart People™ seem to use the smallest-to-largest iteration consistently (both Linux kernel and GLibC's printf, for example, just iterate backwards) but here's a lousy way to do it if you really don't want to use snprintf for some reason…
int left_to_right (unsigned int n) {
unsigned int digit = 0;
if (0 == n) {
doSomething (0);
} else {
digit = pow(10, 1.0+ floor(log10(n)));
while (digit /= 10) {
doSomething ( (n / digit) % 10 );
}
}
}
I assume that it's very silly to assume that you have log10 and pow but not snprintf, so an alternate plan would be
int left_to_right_fixed_max (unsigned int n) {
unsigned int digit = 1000000000; /* make this very big */
unsigned int n10 = 10 * n;
if (0 == n) {
doSomething (0);
} else {
while (digit > n10) { digit /= 10; }
while (digit /= 10) {
doSomething ( (n / digit) % 10 );
}
}
}
… or, if you really don't have hardware multiply/divide, you can resort to using a table of powers of ten.
int left_to_right (unsigned int n) {
static const unsigned int digit [] =
{ 1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000 /* make this very big */
};
static const unsigned char max_place = 10;
/* length of the above array */
unsigned char decimal;
unsigned char place;
unsigned char significant = 0; /* boolean */
if (0 == n) {
doSomething (0);
} else {
place = max_place;
while (place--) {
decimal = 0;
while (n >= digit[place]) {
decimal++;
n -= digit[place];
}
if (decimal | significant) {
doSomething (decimal);
significant |= decimal;
}
}
}
}
…which I have adapted from http://www.piclist.com/techref/language/ccpp/convertbase.htm into a somewhat more general-purpose version.
In the following I assume you mean decimal digits (base 10). Probably you are able to adapt the solutions to other numeral systems by substituting the 10s.
Note, that the modulo operation is a tricky thing concerning negative operands. Therefore I have chosen the data type to be an unsigned integer.
If you want to process the least significant digit first, you could try the following untested approach:
uint32_t n = 1191223;
do {
uint32_t digit = n%10;
// do something with digit
}
while (n/=10);
If you prefer to walk through the digits starting from the most significant digit, you could try to adapt the following untested code:
uint32_t n = 1191223;
#define MAX_DIGITS 10 // log10((double)UINT32_MAX)+1
uint32_t div = pow(10, MAX_DIGITS);
// skip the leading zero digits
while ( div && !(n/div) ) div/=10;
if ( !div ) div = 10; // allow n being zero
do {
uint32_t digit = (n/div)%10;
// do something with digit
}
while (div/=10);
You want to iterate over base-10 digits, but an integer has no concept of arabic notation and digits. Convert it to a string first:
int i = 1191223;
char buffer[16];
char *j;
snprintf(buffer, 16, "%i", i);
for ( j = buffer; *j; ++j ) { /* digit is in *j - '0' */ }
You can use sprintf() to convert it into a char array, and then iterate through that, like so (untested, just to get you started):
int a = 1191223;
char arr[16];
int rc = sprintf(arr, "%d", a);
if (rc < 0) {
// error
}
for (int i = 0; i < rc; i++) {
printf("digit %d = %d\n", i, arr[i]);
}
void access_digits(int n)
{
int digit;
if (n < 0) n = -n;
do {
digit = n % 10;
/* Here you can do whatever you
want to do with the digit */
} while ((n/=10) > 0);
}
Something like this:
char data[128];
int digits = 1191223;
sprintf(data, "%d", digits);
int length = strlen(data);
for(int i = 0; i < length; i++) {
// iterate through each character representing a digit
}
Notice that if you use an octal number like 0100 you also need to change the sprintf(data, "%d", digits); to sprintf(data, "%o", digits);.
For my purposes the following short code did the trick.
Having a an integer variable the_integer, and an integer variable sum_of_digits initialized. (line 1) You could do the following:
1) Convert the integer variable to a variable of type string with use of the std::to_string(int) function.
2) Iterate of the characters of the resulting string. for(char& c: str::to_string(the_integer))
3) To convert the characters back to integers use c -'0' . For this solution take a look at the discussion in (Convert char to int in C and C++).
4) .. and adding them the digits together: sum_of_digits += c-'0'
*) you can then print your variables: lines 3 and 4.
int the_integer = 123456789; int sum_of_digits;
for (char& c: std::to_string(the_integer)) {sum_of_digits += c-'0';}
std::cout << "Integer: " << the_integer << std::endl;
std::cout << "Sum of Digits << sum_of_digits << std::endl;
Note that std::to_string() has some notes, please consult the c++ references to see if the code is still relevant for your purposes.
A hackish way is to convert this to string (see strtol) and then reconvert this to a number.
you could use something like character you want - '0'
Off the top of my head: "i % 100000", "i % 100000", ...
A recursive solution would let you start from "i%10".
Can anyone tell me what is wrong with the following code?
__inline__
char* ut_byte_to_long (ulint nb) {
char* a = malloc(sizeof(nb));
int i = 0;
for (i=0;i<sizeof(nb);i++) {
a[i] = (nb>>(i*8)) & 0xFF;
}
return a;
}
This string is then concatenated as part of a larger one using strcat. The string prints fine but for the integers which are represented as character symbols. I'm using %s and fprintf to check the result.
Thanks a lot.
EDIT
I took one of the comments below (I was adding the terminating \0 separately, before calling fprintf, but after strcat. Modifying my initial function...
__inline__
char* ut_byte_to_long (ulint nb) {
char* a = malloc(sizeof(nb) + 1);
int i = 0;
for (i=0;i<sizeof(nb);i++) {
a[i] = (nb>>(i*8)) & 0xFF;
}
a[nb] = '\0' ;
return a;
}
This sample code still isn't printing out a number...
char* tmp;
tmp = ut_byte_to_long(start->id);
fprintf(stderr, "Value of node is %s \n ", tmp);
strcat is expecting a null byte terminating the string.
Change your malloc size to sizeof(nb) + 1 and append '\0' to the end.
You have two problems.
The first is that the character array a contains numbers, such as 2, instead of ASCII codes representing those numbers, such as '2' (=50 on ASCII, might be different in other systems). Try modifying your code to
a[i] = (nb>>(i*8)) & 0xFF + '0';
The second problem is that the result of the above computation can be anything between 0 and 255, or in other words, a number which requires more than one digit to print.
If you want to print hexadecimal numbers (0-9, A-F), two digits per such computation will be enough, and you can write something like
a[2*i + 0] = int2hex( (nb>>(i*8)) & 0x0F ); //right hexa digit
a[2*i + 1] = int2hex( (nb>>(i*8+4)) & 0x0F ); //left hexa digit
where
char int2hex(int n) {
if (n <= 9 && n >= 0)
return n + '0';
else
return (n-10) + 'A';
}
if you dont want to use sprintf(target_string,"%lu",source_int) or the non standard itoa(), here is a version of the function that transform a long to a string :
__inline__
char* ut_byte_to_long (ulint nb) {
char* a = (char*) malloc(22*sizeof(char));
int i=21;
int j;
do
{
i--;
a[i] = nb % 10 + '0';
nb = nb/10;
}while (nb > 0);
// the number is stored from a[i] to a[21]
//shifting the string to a[0] : a[21-i]
for(j = 0 ; j < 21 && i < 21 ; j++ , i++)
{
a[j] = a[i];
}
a[j] = '\0';
return a;
}
I assumed that an unsigned long contain less than 21 digits. (biggest number is 18,446,744,073,709,551,615 which equals 2^64 − 1 : 20 digits)