I would like to convert a very long (arbitrary length, possibly 1000 characters long; university assignment) string into binary. How should I approach this problem? I have thought about it for a while, but I just can't seem to think of anything viable.
The string will be passed to me as const char *str. I want to read the number, which will be in Base 10, and convert it into binary.
Should I read certain number of least significant numbers and store them in unsigned long long int, and then work from there? Is there a better solution? I don't know how the method I suggested would pan out. I wanted to know if there's a better/easier way to do it.
Thank you.
Assuming your input is too large for the biggest integer type, you have to convert it to a unlimited size integer. For this purpose you can use gmplib. If you are not allowed to use external libraries, you can use a different approach:
is your string divisible by two (look at the last digit)?
if yes, write 0 to left side of your output
else, write 1 to left side of your output
divide the string by 2 (every digit)
repeat while string is not filled with 0
I am going to edit this answer, as soon as I wrote the code.
Here you go:
#include<stdbool.h>
#include<stdlib.h>
#include<memory.h>
#include<stdio.h>
typedef struct char_queue {
unsigned int len;
unsigned int capacity;
char* data;
} char_queue;
char_queue init_char_queue() {
return (char_queue) {
0,
4096,
malloc(4096)
};
}
void enqueue(char_queue* queue, char val) {
if (queue->len == queue->capacity) {
char* new_queue_data = malloc(queue->capacity + 4096);
memmove(new_queue_data, queue->data, queue->capacity);
free(queue->data);
queue->data = new_queue_data;
}
queue->len++;
queue->data[queue->capacity - queue->len] = val;
}
char* queue_get_arr(char_queue* queue) {
char* output = malloc(queue->len);
memcpy(output, &queue->data[queue->capacity - queue->len], queue->len);
return output;
}
void free_char_queue(char_queue* queue) {
if (queue->data) free(queue->data);
}
void convert_to_digit_arr(char* input, unsigned int len) {
for (unsigned int i = 0; i < len; i++) {
input[i] = input[i] - '0'; // '5' - '0' = 5
}
}
bool is_null(char* input, unsigned int len) {
for (unsigned int i = 0; i < len; i++) {
if (input[i] != 0) return false;
}
return true;
}
bool divisible_by_two(char* digit_arr, unsigned int len) {
return digit_arr[len - 1] % 2 == 0;
}
void divide_by_two(char* digit_arr, unsigned int len) {
for (unsigned int i = 0; i < len; i++) {
bool is_odd = digit_arr[i] % 2 == 1;
digit_arr[i] /= 2;
if (is_odd && i + 1 < len) { // and is not last (right) digit
digit_arr[i + 1] += 10;
}
}
}
int main(int argc, char** argv) {
for (int i = 1; i < argc; i++) {
unsigned int input_len = strlen(argv[i]);
char* input = malloc(input_len + 1);
strcpy(input, argv[i]);
convert_to_digit_arr(input, input_len);
char_queue queue = init_char_queue();
enqueue(&queue, 0); // null terminator to use the queue content as a string
while (!is_null(input, input_len)) {
enqueue(&queue, divisible_by_two(input, input_len) ? '0' : '1');
divide_by_two(input, input_len);
}
free(input);
char* output = queue_get_arr(&queue);
printf("%s\n", output);
free(output);
free_char_queue(&queue);
}
}
This is not the fastest approach, but it is very simple. Also feel free to optimize it.
How do I convert a really long string (as decimal characters) to binary?
Let us look at printing this.
print2(s)
If the decimal string is at least "2",
__ Divide the decimal string by 2 and notice its remainder.
__ Recursively call print2(s)
__ Print the remainder.
Else print the string.
Example code:
#include <stdio.h>
unsigned decimal_string_divide(char *dividend, unsigned divisor) {
// Remove a potential leading '0'
if (*dividend == '0') {
memmove(dividend, dividend+1, strlen(dividend));
}
// "divide", like we learned in grade school.
unsigned remainder = 0;
while (*dividend) {
unsigned sum = remainder*10 + (*dividend - '0');
remainder = sum%divisor;
*dividend = sum/divisor + '0';
dividend++;
}
return remainder;
}
void decimal_string_print_binary(char *dividend) {
//printf("<%s>\n", dividend); fflush(stdout);
if (dividend[0]) {
// If at least 2 digits or at least "2"
if (dividend[1] || (dividend[0] >= '2')) {
unsigned bit = decimal_string_divide(dividend, 2);
decimal_string_print_binary(dividend);
printf("%c", bit + '0');
} else {
printf("%c", *dividend);
}
}
}
void decimal_string_print_2(const char *dividend) {
printf("%-25s", dividend);
size_t sz = strlen(dividend) + 1;
char buf[sz]; // Use a VLA or allocate memory
strcpy(buf, dividend);
decimal_string_print_binary(buf);
printf("\n");
}
Test
int main(void) {
decimal_string_print_2("0");
decimal_string_print_2("1");
decimal_string_print_2("42");
decimal_string_print_2("8675309");
decimal_string_print_2("18446744073709551615");
}
Output
0 0
1 1
42 101010
8675309 100001000101111111101101
18446744073709551615 1111111111111111111111111111111111111111111111111111111111111111
To instead convert the string from decimal form into a binary string, allocate sufficient buffer (about log10(2) times string length) and instead of printing above, save to the buffer. Left for OP to do.
I am suggesting a better approach. whereby any arguments passed to a function that is not intended to be mutated, be received as a "const", and a local pointer be used to access the data of this "const".
IE:
void to_binary(const char *str) {
char *ptr = str;
...
Then use ptr.
I know that in this case, my argument is purely trivial and academic, but it is a good practice to get used to and may save you many headaches in the future.
Also, when dealing with binary data, use "unsigned char", to ensure that no type conversions are used. You will need bit 7 if the data is not ASCII or alike.
#include <stdio.h>
void DectoBin(int *n);
int *p;
int position;
int main()
{
int num;
printf("Input number : ");
scanf("%d", &num);
DectoBin(&num);
for (int i = position - 1; i >= 0; i--)
{
printf("%d", p[i]);
}
}
when launch this code, this code compile well...
but I have a error message 'zsh : segmentation fault'
void DectoBin(int *n)
{
int binary[20] = { 0, };
p = binary;
while (1)
{
binary[position++] = *n % 2;
*n = *n / 2;
if (n == 0)
break;
}
return;
}
so, What parts should be corrected to solve the problem??
I think the minimum change to make it seem to work is this:
// you use *n as an integer, but test "n"
// (which is its address, and will never be zero).
// So the while loop goes on indefinitely, eventually overflowing the buffer.
if (*n == 0)
break;
A more serious problem is that the buffer is allocated backwards: you have the pointer outside the function and the buffer is allocated on the stack inside the function. So, as soon as the function exits, the buffer is no longer "legal". The data is probably still there (on my system, it is) and you might even be able to use it as if nothing was amiss. On a short program, and depending on your system, you might not notice that the code has become a time bomb.
You ought to estimate how much of a buffer you need (how many binary digits), then allocate memory on the heap, using malloc(), check it worked, and pass that memory to the function - which will allocate nothing - together with the allocated size, so it can ensure it doesn't overflow the buffer.
The function would then return how many digits to actually print.
#include <stdio.h>
#include <malloc.h>
size_t DectoBin(size_t maxdigits, int *digits, int number);
int main() {
int num;
int *buffer;
size_t maxdigits, position;
printf("Input number : ");
scanf("%d", &num);
// Always check your inputs.
if (num < 0) {
printf("Negative numbers are not supported yet\n");
return 1;
}
maxdigits = 1;
{
// Calculate how many digits are required. Could use ceiling of base-2 logarithm of num.
int tmp = num;
while (tmp > 0) {
maxdigits ++;
tmp /= 2;
}
}
buffer = malloc(maxdigits * sizeof(int));
if (NULL == buffer) {
fprintf(stderr, "Out of memory\n");
return 2;
}
position = DectoBin(maxdigits, buffer, num);
for (size_t i = position; i > 0; i--) {
printf("%d", buffer[i-1]);
}
printf("\n");
return 0;
}
size_t DectoBin(size_t maxdigits, int *digits, int number) {
size_t pos = 0;
do {
digits[pos++] = number % 2;
number /= 2;
} while (number && pos < maxdigits);
return pos;
}
For starters there is a typo
if (n == 0)
It seems you mean
if (*n == 0)
Though there is no any sense to accept a number indirectly through a pointer to it.
You are using the local array binary with automatic storage duration within the function DectoBin
void DectoBin(int *n)
{
int binary[20] = {
0,
};
//...
that will not be alive after exiting the function. So the pointer p will have an invalid value.
Also it is unclear why you are using the magic number 20 in the array declaration. At least you should use the value of the expression sizeof( int ) * CHAR_BIT.
Also it is a bad idea to use global variables.
At Least you could declare the array within the function with the storage-class specifier static and return a pointer to the array from the function.
Pay attention to that for negative numbers you can get an incorrect result.
For example the function can be implemented the following way as shown in the demonstration program below.
#include <stdio.h>
#include <limits.h>
#include <string.h>
const char * DecToBin( int n )
{
static char binary[CHAR_BIT * sizeof( int ) + 1 ];
memset( binary, 0, sizeof( binary ) );
unsigned int un = n;
char *p = binary + sizeof( binary ) - 1;
do
{
*--p = '0' + ( un & 1 );
} while ( un >>= 1 );
return p;
}
int main( void )
{
printf( "%d -> %s\n", 123, DecToBin( 123 ) );
printf( "%d -> %s\n", -123, DecToBin( -123 ) );
}
The program output is
123 -> 1111011
-123 -> 11111111111111111111111110000101
int function(char * WordList[], int nSize, char * param_str)
{
int i = 0;
int bFlag = 0;
while(i < nSize && !bFlag)
{
if(WordList[i] == param_str){
return bFlag = 1;
}
i++;
}
return bFlag;
i tried using strcmp and made it work [if(strcmp(WordList[i], param_str) == 0)] but im wondering whats wrong with my current code.
WordList[i] is a pointer
param_str is another pointer.
WordList[i] == param_str tests if the 2 pointers point to the same location.
What these pointers reference is not considered. WordList[i] == param_str is not a string compare, just an address compare.
strcmp(a,b) compares the data at locations a and b.
To do the same, without strcmp(), form your own ...
int my_strcmp(const char *a, const char *b) {
// Pseudo code
while what `a` points to is the same as what `b` points to and `*a` is not 0
advance both pointers
return the sign of the difference of `*a` and `*b`.
}
How can I do reverse memory comparison? As in, I give the ends of two sequences and I want the pointer to be decremented towards the beginning, not incremented towards the end.
There's no built-in function in the C standard library to do it. Here's a simple way to roll your own:
int memrcmp(const void *s1, const void *s2, size_t n)
{
if(n == 0)
return 0;
// Grab pointers to the end and walk backwards
const unsigned char *p1 = (const unsigned char*)s1 + n - 1;
const unsigned char *p2 = (const unsigned char*)s2 + n - 1;
while(n > 0)
{
// If the current characters differ, return an appropriately signed
// value; otherwise, keep searching backwards
if(*p1 != *p2)
return *p1 - *p2;
p1--;
p2--;
n--;
}
return 0;
}
If you something high-performance, you should compare 4-byte words at a time instead of individual bytes, since memory latency will be the bottleneck; however, that solution is significantly more complex and not really worth it.
Much like in a post (C memcpy in reverse) originally linked by Vlad Lazarenko, here is a solution based on that, that I haven't yet tested but should get you started.
int reverse_memcmp(const void *s1, const void *s2, size_t n)
{
unsigned char *a, *b;
a = s1;
b = s2;
size_t i = 0;
// subtracting i from last position and comparing
for (i = 0; i < n; i++) {
if (a[n-1-i] != b[n-1-i]) {
// return differences between different byte, strcmp()-style
return (a[n-1-i] - b[n-1-i]);
}
}
return 0;
}
All you need to do is specify your two ends and the size that you'd like to compare, as well as a step size. Please note especially that the step size may be the most important part for getting the expected results. It will greatly ease the implementation if you restrict the sizes. For the size of a char you could do something like:
int compare (void *one, void *two, size_t size)
{
char *one_char = (char *)one;
char *two_char = (char *)two;
size_t i;
for (i = 0; i < size; i++)
{
if (*(one_char - i) != *(two_char - i))
return(NOT_EQUAL);
}
return(EQUAL);
}
shorter code (C code doesn't need force pointer type cast):
int reverse_memcmp(const void *end1, const void *end2, size_t n) {
const unsigned char *a = end1, *b = end2;
for (; n; --n)
if (*--a != *--b) return *a - *b;
return 0;
}
I was wondering if my implementation of an "itoa" function is correct. Maybe you can help me getting it a bit more "correct", I'm pretty sure I'm missing something. (Maybe there is already a library doing the conversion the way I want it to do, but... couldn't find any)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char * itoa(int i) {
char * res = malloc(8*sizeof(int));
sprintf(res, "%d", i);
return res;
}
int main(int argc, char *argv[]) {
...
// Yet, another good itoa implementation
// returns: the length of the number string
int itoa(int value, char *sp, int radix)
{
char tmp[16];// be careful with the length of the buffer
char *tp = tmp;
int i;
unsigned v;
int sign = (radix == 10 && value < 0);
if (sign)
v = -value;
else
v = (unsigned)value;
while (v || tp == tmp)
{
i = v % radix;
v /= radix;
if (i < 10)
*tp++ = i+'0';
else
*tp++ = i + 'a' - 10;
}
int len = tp - tmp;
if (sign)
{
*sp++ = '-';
len++;
}
while (tp > tmp)
*sp++ = *--tp;
return len;
}
// Usage Example:
char int_str[15]; // be careful with the length of the buffer
int n = 56789;
int len = itoa(n,int_str,10);
The only actual error is that you don't check the return value of malloc for null.
The name itoa is kind of already taken for a function that's non-standard, but not that uncommon. It doesn't allocate memory, rather it writes to a buffer provided by the caller:
char *itoa(int value, char * str, int base);
If you don't want to rely on your platform having that, I would still advise following the pattern. String-handling functions which return newly allocated memory in C are generally more trouble than they're worth in the long run, because most of the time you end up doing further manipulation, and so you have to free lots of intermediate results. For example, compare:
void delete_temp_files() {
char filename[20];
strcpy(filename, "tmp_");
char *endptr = filename + strlen(filename);
for (int i = 0; i < 10; ++i) {
itoa(endptr, i, 10); // itoa doesn't allocate memory
unlink(filename);
}
}
vs.
void delete_temp_files() {
char filename[20];
strcpy(filename, "tmp_");
char *endptr = filename + strlen(filename);
for (int i = 0; i < 10; ++i) {
char *number = itoa(i, 10); // itoa allocates memory
strcpy(endptr, number);
free(number);
unlink(filename);
}
}
If you had reason to be especially concerned about performance (for instance if you're implementing a stdlib-style library including itoa), or if you were implementing bases that sprintf doesn't support, then you might consider not calling sprintf. But if you want a base 10 string, then your first instinct was right. There's absolutely nothing "incorrect" about the %d format specifier.
Here's a possible implementation of itoa, for base 10 only:
char *itobase10(char *buf, int value) {
sprintf(buf, "%d", value);
return buf;
}
Here's one which incorporates the snprintf-style approach to buffer lengths:
int itobase10n(char *buf, size_t sz, int value) {
return snprintf(buf, sz, "%d", value);
}
A good int to string or itoa() has these properties;
Works for all [INT_MIN...INT_MAX], base [2...36] without buffer overflow.
Does not assume int size.
Does not require 2's complement.
Does not require unsigned to have a greater positive range than int. In other words, does not use unsigned.
Allows use of '-' for negative numbers, even when base != 10.
Tailor the error handling as needed. (needs C99 or later):
char* itostr(char *dest, size_t size, int a, int base) {
// Max text needs occur with itostr(dest, size, INT_MIN, 2)
char buffer[sizeof a * CHAR_BIT + 1 + 1];
static const char digits[36] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
if (base < 2 || base > 36) {
fprintf(stderr, "Invalid base");
return NULL;
}
// Start filling from the end
char* p = &buffer[sizeof buffer - 1];
*p = '\0';
// Work with negative `int`
int an = a < 0 ? a : -a;
do {
*(--p) = digits[-(an % base)];
an /= base;
} while (an);
if (a < 0) {
*(--p) = '-';
}
size_t size_used = &buffer[sizeof(buffer)] - p;
if (size_used > size) {
fprintf(stderr, "Scant buffer %zu > %zu", size_used , size);
return NULL;
}
return memcpy(dest, p, size_used);
}
I think you are allocating perhaps too much memory. malloc(8*sizeof(int)) will give you 32 bytes on most machines, which is probably excessive for a text representation of an int.
i found an interesting resource dealing with several different issues with the itoa implementation
you might wanna look it up too
itoa() implementations with performance tests
I'm not quite sure where you get 8*sizeof(int) as the maximum possible number of characters -- ceil(8 / (log(10) / log(2))) yields a multiplier of 3*. Additionally, under C99 and some older POSIX platforms you can create an accurately-allocating version with sprintf():
char *
itoa(int i)
{
int n = snprintf(NULL, 0, "%d", i) + 1;
char *s = malloc(n);
if (s != NULL)
snprintf(s, n, "%d", i);
return s;
}
HTH
You should use a function in the printf family for this purpose. If you'll be writing the result to stdout or a file, use printf/fprintf. Otherwise, use snprintf with a buffer big enough to hold 3*sizeof(type)+2 bytes or more.
sprintf is quite slow, if performance matters it is probably not the best solution.
if the base argument is a power of 2 the conversion can be done with a shift and masking, and one can avoid reversing the string by recording the digits from the highest positions. For instance, something like this for base=16
int num_iter = sizeof(int) / 4;
const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
/* skip zeros in the highest positions */
int i = num_iter;
for (; i >= 0; i--)
{
int digit = (value >> (bits_per_digit*i)) & 15;
if ( digit > 0 ) break;
}
for (; i >= 0; i--)
{
int digit = (value >> (bits_per_digit*i)) & 15;
result[len++] = digits[digit];
}
For decimals there is a nice idea to use a static array big enough to record the numbers in the reversed order, see here
Integer-to-ASCII needs to convert data from a standard integer type
into an ASCII string.
All operations need to be performed using pointer arithmetic, not array indexing.
The number you wish to convert is passed in as a signed 32-bit integer.
You should be able to support bases 2 to 16 by specifying the integer value of the base you wish to convert to (base).
Copy the converted character string to the uint8_t* pointer passed in as a parameter (ptr).
The signed 32-bit number will have a maximum string size (Hint: Think base 2).
You must place a null terminator at the end of the converted c-string Function should return the length of the converted data (including a negative sign).
Example my_itoa(ptr, 1234, 10) should return an ASCII string length of 5 (including the null terminator).
This function needs to handle signed data.
You may not use any string functions or libraries.
.
uint8_t my_itoa(int32_t data, uint8_t *ptr, uint32_t base){
uint8_t cnt=0,sgnd=0;
uint8_t *tmp=calloc(32,sizeof(*tmp));
if(!tmp){exit(1);}
else{
for(int i=0;i<32;i++){
if(data<0){data=-data;sgnd=1;}
if(data!=0){
if(data%base<10){
*(tmp+i)=(data%base)+48;
data/=base;
}
else{
*(tmp+i)=(data%base)+55;
data/=base;
}
cnt++;
}
}
if(sgnd){*(tmp+cnt)=45;++cnt;}
}
my_reverse(tmp, cnt);
my_memcopy(tmp,ptr,cnt);
return ++cnt;
}
ASCII-to-Integer needs to convert data back from an ASCII represented string into an integer type.
All operations need to be performed using pointer arithmetic, not array indexing
The character string to convert is passed in as a uint8_t * pointer (ptr).
The number of digits in your character set is passed in as a uint8_t integer (digits).
You should be able to support bases 2 to 16.
The converted 32-bit signed integer should be returned.
This function needs to handle signed data.
You may not use any string functions or libraries.
.
int32_t my_atoi(uint8_t *ptr, uint8_t digits, uint32_t base){
int32_t sgnd=0, rslt=0;
for(int i=0; i<digits; i++){
if(*(ptr)=='-'){*ptr='0';sgnd=1;}
else if(*(ptr+i)>'9'){rslt+=(*(ptr+i)-'7');}
else{rslt+=(*(ptr+i)-'0');}
if(!*(ptr+i+1)){break;}
rslt*=base;
}
if(sgnd){rslt=-rslt;}
return rslt;
}
I don't know about good, but this is my implementation that I did while learning C
static int ft_getintlen(int value)
{
int l;
int neg;
l = 1;
neg = 1;
if (value < 0)
{
value *= -1;
neg = -1;
}
while (value > 9)
{
l++;
value /= 10;
}
if (neg == -1)
{
return (l + 1);
}
return (l);
}
static int ft_isneg(int n)
{
if (n < 0)
return (-1);
return (1);
}
static char *ft_strcpy(char *dest, const char *src)
{
unsigned int i;
i = 0;
while (src[i] != '\0')
{
dest[i] = src[i];
i++;
}
dest[i] = src[i];
return (dest);
}
char *ft_itoa(int n)
{
size_t len;
char *instr;
int neg;
neg = ft_isneg(n);
len = ft_getintlen(n);
instr = (char *)malloc((sizeof(char) * len) + 1);
if (n == -2147483648)
return (ft_strcpy(instr, "-2147483648"));
if (!instr)
return (NULL);
if (neg == -1)
n *= -1;
instr[len--] = 0;
if (n == 0)
instr[len--] = 48;
while (n)
{
instr[len--] = ((n % 10) + 48);
n /= 10;
}
if (neg == -1)
instr[len] = '-';
return (instr);
}
This should work:
#include <string.h>
#include <stdlib.h>
#include <math.h>
char * itoa_alloc(int x) {
int s = x<=0 ? 1 ? 0; // either space for a - or for a 0
size_t len = (size_t) ceil( log10( abs(x) ) );
char * str = malloc(len+s + 1);
sprintf(str, "%i", x);
return str;
}
If you don't want to have to use the math/floating point functions (and have to link in the math libraries) you should be able to find non-floating point versions of log10 by searching the Web and do:
size_t len = my_log10( abs(x) ) + 1;
That might give you 1 more byte than you needed, but you'd have enough.
There a couple of suggestions I might make. You can use a static buffer and strdup to avoid repeatedly allocating too much memory on subsequent calls. I would also add some error checking.
char *itoa(int i)
{
static char buffer[12];
if (snprintf(buffer, sizeof(buffer), "%d", i) < 0)
return NULL;
return strdup(buffer);
}
If this will be called in a multithreaded environment, remove "static" from the buffer declaration.
This is chux's code without safety checks and the ifs. Try it online:
char* itostr(char * const dest, size_t const sz, int a, int const base) {
bool posa = a >= 0;
char buffer[sizeof a * CHAR_BIT + 1];
static const char digits[36] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
char* p = &buffer[sizeof buffer - 1];
do {
*(p--) = digits[abs(a % base)];
a /= base;
} while (a);
*p = '-';
p += posa;
size_t s = &buffer[sizeof(buffer)] - p;
memcpy(dest, p, s);
dest[s] = '\0';
return dest;
}
main()
{
int i=1234;
char stmp[10];
#if _MSC_VER
puts(_itoa(i,stmp,10));
#else
puts((sprintf(stmp,"%d",i),stmp));
#endif
return 0;
}