I have a short string s (max 8 characters) which I want to search in many strings. Actually I want to search the first occurrence in each string of a stream. Finding the first index of s has to be as fast as possible for my use-case, because a massive amount of strings per second is processed and the latency is very critical. Of course, machines can be scaled, but a big point is to reduce costs (and latency).
In general I want to create a C (or C++) function which behaves like strstr, but for a fixed "needle". The needle is not known at compile-time, but only at runtime (at the startup). However, it's okay to generate code at runtime and compile it (or any other "expensive" initialization is fine). Once the needle is known it won't change anymore.
Another detail: The needle will be in almost every string of the input stream. So it's okay if the algorithm is slower for the case that the needle is not available (because that will almost never happen). Also maybe important: The input strings have always extra 64 byte allocated at the end (which might be helpful for SIMD operations).
I was actually surprised that strstr is already quite fast, but I guess there might be a more optimal algorithm for the case that the needle is does not change?
Thanks a lot
If the needle searched starts with a byte value that is rare in the haystack, a simplistic implementation will beat more complicated alternatives:
char *mystrstr_naive(const char *s, const char *needle) {
char *p = (char *)(uintptr_t)s;
int c = *needle++;
if (c == '\0')
return p;
if (*needle == '\0')
return strchr(p, c);
size_t len = strlen(needle);
while ((p = strchr(p, c)) != NULL) {
p++;
if (!memcmp(p, needle, len))
return p - 1;
}
return NULL;
}
And even faster if the string length is known:
char *mystrstr_naive_len(const char *s, size_t slen, const char *needle) {
char *p = (char *)(uintptr_t)s;
int c = *needle++;
if (c == '\0')
return p;
if (*needle == '\0')
return memchr(p, c, slen);
size_t len = strlen(needle);
if (len < slen) {
char *e = p + slen - len;
while ((p = memchr(p, c, e - p)) != NULL) {
p++;
if (!memcmp(p, needle, len))
return p - 1;
}
}
return NULL;
}
On my system, if the needle searched starts with a byte value that is rare in the haystack, this is 10 to 20 times faster than strstr and the alternatives presented in my other answer.
To improve performance on specific data, you must study the data carefully. The needle being known in advance is a interesting clue, but some other characteristics might be more fruitful.
If your target handles unaligned reads gracefully, you could use this approach:
#include <stddef.h>
#include <stdint.h>
char *mystrstr8(const char *s, uint64_t str8, uint64_t mask8) {
for (const char *p = s; *p; p++) {
const uint64_t *p64 = (const uint64_t *)(uintptr_t)p;
if ((*p64 & mask8) == str8)
return (char *)(uintptr_t)p;
}
return NULL;
}
If the string is modifiable, has extra slack and its length is provided, you can remove the terminator test:
#include <stddef.h>
#include <stdint.h>
char *mystrstr8_len(char *s, size_t len, uint64_t str8, uint64_t mask8) {
char *end = s + len;
uint64_t *e64 = (uint64_t *)(uintptr_t)end;
uint64_t ee = *e64;
*e64 = str8;
for (const char *p = s;; p++) {
const uint64_t *p64 = (const uint64_t *)(uintptr_t)p;
if ((*p64 & mask8) == str8) {
*e64 = ee;
if (p < end)
return (char *)(uintptr_t)p;
else
return NULL;
}
}
}
str8 and mask8 must be precomputed from the bytes of the needle string and according to the target endianness. For example, to search for Hello on a little endian machine str8 is 0x6f6c6c6548 and mask8 is 0xffffffffff.
For short strings, this simplistic brute force approach might perform better than using a tailored Boyer Moore implementation, depending on your specific data: array and needle lengths and contents... You can start by comparing the performance with that of your standard library's strstr function.
Here is a benchmark for various string lengths:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
char *mystrstr8(const char *s, uint64_t str8, uint64_t mask8) {
for (const char *p = s; *p; p++) {
const uint64_t *p64 = (const uint64_t *)(uintptr_t)p;
if ((*p64 & mask8) == str8)
return (char *)(uintptr_t)p;
}
return NULL;
}
char *mystrstr8_8(const char *s, uint64_t str8) {
for (const char *p = s; *p; p++) {
const uint64_t *p64 = (const uint64_t *)(uintptr_t)p;
if (*p64 == str8)
return (char *)(uintptr_t)p;
}
return NULL;
}
char *mystrstr8_len(char *s, size_t len, uint64_t str8, uint64_t mask8) {
char *end = s + len;
uint64_t *e64 = (uint64_t *)(uintptr_t)end;
uint64_t ee = *e64;
*e64 = str8;
for (char *p = s;; p++) {
uint64_t *p64 = (uint64_t *)(uintptr_t)p;
if ((*p64 & mask8) == str8) {
*e64 = ee;
if (p < end)
return p;
else
return NULL;
}
}
}
char *mystrstr8_len8(char *s, size_t len, uint64_t str8) {
char *end = s + len;
uint64_t *e64 = (uint64_t *)(uintptr_t)end;
uint64_t ee = *e64;
*e64 = str8;
for (char *p = s;; p++) {
uint64_t *p64 = (uint64_t *)(uintptr_t)p;
if (*p64 == str8) {
*e64 = ee;
if (p < end)
return p;
else
return NULL;
}
}
}
int benchmark(int len, const char *needle, char *a) {
char buf[9] = { 0 };
strncat(buf, needle, 8);
int needle_len = strlen(buf);
uint64_t mask8 = needle_len ? 0xFFFFFFFFFFFFFFFF >> (64 - needle_len * 8) : 0;
uint64_t str8;
memcpy(&str8, buf, 8);
memset(a, 'x', len);
a[len] = '\0';
int pos = len - needle_len;
if (pos >= 0 && pos <= len - needle_len)
memcpy(a + pos, needle, needle_len);
clock_t c;
long c1, c2, c3;
long b1 = 1000000, b2 = 1000000, b3 = 1000000;
long n1 = 0, n2 = 0, n3 = 0;
int rep = 100000 / len;
rep += rep == 0;
int res = 0;
void *p1[rep], *p2[rep], *p3[rep];
while (n1 < 10000) {
c = clock();
for (int i = 0; i < rep; i++)
p1[i] = strstr(a, needle);
c1 = clock() - c;
if (needle_len == 8) {
c = clock();
for (int i = 0; i < rep; i++)
p2[i] = mystrstr8_8(a, str8);
c2 = clock() - c;
c = clock();
for (int i = 0; i < rep; i++)
p3[i] = mystrstr8_len8(a, len, str8);
c3 = clock() - c;
} else {
c = clock();
for (int i = 0; i < rep; i++)
p2[i] = mystrstr8(a, str8, mask8);
c2 = clock() - c;
c = clock();
for (int i = 0; i < rep; i++)
p3[i] = mystrstr8_len(a, len, str8, mask8);
c3 = clock() - c;
}
n1 += c1;
n2 += c2;
n3 += c3;
b1 -= (b1 - c1) * (b1 > c1);
b2 -= (b2 - c2) * (b2 > c2);
b3 -= (b3 - c3) * (b3 > c3);
res = (p1[rep - 1] != p2[rep - 1] || p1[rep - 1] != p3[rep - 1]);
}
if (p2[0] != p1[0]) {
printf("bench(%d, '%s'): mystrstr8 failure: %p, expected %p\n",
len, needle, p2[0], p1[0]);
}
if (p3[0] != p1[0]) {
printf("bench(%d, '%s'): mystrstr8_len failure: %p, expected %p\n",
len, needle, p3[0], p1[0]);
}
if (res == 0) {
printf("%-8d %-8s %13.3f %13.3f %13.3f\n", len, needle,
(double)b1 / rep, (double)b2 / rep, (double)b3 / rep);
}
return res;
}
#define MAX_LEN 1000000
int main(int argc, char *argv[]) {
char *a = malloc(MAX_LEN + 8);
// ensure full output is buffered
setvbuf(stdout, NULL, _IOFBF, 16384);
printf("%-8s %-8s %13s %13s %13s\n",
"len", "needle", "strstr", "mystrstr8", "mystrstr8_len");
for (int len = 10; len <= MAX_LEN; len *= 10) {
benchmark(len, "a", a);
benchmark(len, "ab", a);
benchmark(len, "abc", a);
benchmark(len, "abcd", a);
benchmark(len, "abcde", a);
benchmark(len, "abcdef", a);
benchmark(len, "abcdefg", a);
benchmark(len, "abcdefgh", a);
}
free(a);
return 0;
}
Here are the results on my 2015 Mac x86_64 laptop:
len needle strstr mystrstr8 mystrstr8_len
10 a 0.013 0.005 0.008
10 ab 0.013 0.005 0.008
10 abc 0.014 0.005 0.008
10 abcd 0.013 0.004 0.007
10 abcde 0.013 0.004 0.007
10 abcdef 0.013 0.003 0.007
10 abcdefg 0.012 0.003 0.007
10 abcdefgh 0.012 0.002 0.002
100 a 0.076 0.057 0.046
100 ab 0.076 0.056 0.045
100 abc 0.077 0.056 0.045
100 abcd 0.076 0.055 0.044
100 abcde 0.077 0.055 0.044
100 abcdef 0.076 0.054 0.044
100 abcdefg 0.076 0.054 0.043
100 abcdefgh 0.076 0.045 0.040
1000 a 0.610 0.480 0.410
1000 ab 0.610 0.470 0.410
1000 abc 0.610 0.480 0.410
1000 abcd 0.610 0.480 0.410
1000 abcde 0.610 0.470 0.400
1000 abcdef 0.610 0.470 0.410
1000 abcdefg 0.610 0.470 0.400
1000 abcdefgh 0.610 0.400 0.370
10000 a 5.900 4.800 4.100
10000 ab 5.900 4.800 4.100
10000 abc 5.900 4.800 4.100
10000 abcd 5.900 4.800 4.100
10000 abcde 5.900 4.800 4.100
10000 abcdef 5.900 4.800 4.100
10000 abcdefg 5.900 4.800 4.100
10000 abcdefgh 5.900 4.000 3.800
100000 a 59.000 50.000 41.000
100000 ab 59.000 49.000 41.000
100000 abc 59.000 49.000 41.000
100000 abcd 59.000 49.000 41.000
100000 abcde 59.000 49.000 41.000
100000 abcdef 59.000 49.000 41.000
100000 abcdefg 59.000 50.000 41.000
100000 abcdefgh 59.000 40.000 39.000
1000000 a 593.000 493.000 415.000
1000000 ab 589.000 472.000 415.000
1000000 abc 592.000 496.000 413.000
1000000 abcd 590.000 496.000 416.000
1000000 abcde 589.000 495.000 415.000
1000000 abcdef 589.000 495.000 416.000
1000000 abcdefg 589.000 495.000 417.000
1000000 abcdefgh 589.000 406.000 385.000
This hack consistently improves performance by 15 to 30% on long strings and even more on shorter ones. I made a special case of 8 byte needles that could be adapted for 1, 2 and 4 byte needles too.
Related
I am trying to use this code to compute CRC16 bypass. For regular characters it works but for a hex sequence like 0xA0 0x00 0x01 0x01 it fails, not returning the correct value. Go easy on me, usually I do not write C code.
#include <stdio.h>
#include <string.h>
unsigned short CalculateCRC(unsigned char* a_szBuffuer, short a_sBufferLen){
unsigned short usCRC = 0;
for (short j = 0; j < a_sBufferLen; j++)
{
unsigned char* pucPtr = (unsigned char *)&usCRC;
*(pucPtr + 1) = *(pucPtr + 1) ^ *a_szBuffuer++;
for (short i = 0; i <= 7; i++)
{
if (usCRC & ((unsigned short) 0x8000))
{
usCRC = usCRC << 1;
usCRC = usCRC ^ ((unsigned short) 0x8005);
}
else
usCRC = usCRC << 1;
}
}
return (usCRC);
}
void append(char* s, char c)
{
int len = strlen(s);
s[len] = c;
s[len+1] = '\0';
}
int main() {
char d = (char)0xA0;
char d1 = (char)0x00;
char d2 = (char)0x01;
char d3 = (char)0x01;
char sss[256]="";
append(sss, d);
append(sss, d1);
append(sss, d2);
append(sss, d3);
unsigned char* uCB1 = (unsigned char*)sss;
unsigned short CRC= CalculateCRC(uCB1,4);
printf("CRC = %i\n", CRC);
printf("%s\n", sss);
printf("%x\n", CRC);
}
strlen(s); is for finding the length of a string, not for finding the length of a character array that is used, especially if it that may contain '\0'. #Eugene Sh.
Code needs to keep track of the array size used with another variable.
// void append(char* s, char c) {
void append(char* s, size_t *sz, char c) {
s[*sz] = c;
(*sz)++;
}
...
size_t size = 0;
append(sss, &size, d);
append(sss, &size, d1);
append(sss, &size, d2);
append(sss, &size, d3);
unsigned char* uCB1 = (unsigned char*)sss;
unsigned short CRC= CalculateCRC(uCB1, size);
// printf("%s\n", sss);
write a loop to print each `sss[]`.
I have made the following code to perform multiplication between 2 arbitrary number stored in char * in C :
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void mult(char *n1, char *n2)
{
char *res;
int mul, i, j;
res = malloc(sizeof(*res) * (strlen(n1) + strlen(n2) + 1));
memset(res, '0', strlen(n1) + strlen(n2));
res[strlen(n1) + strlen(n2)] = 0;
for (i = strlen(n1) - 1; i >= 0; i--)
{
for (j = strlen(n2) - 1; j >= 0; j--)
{
mul = (n1[i] - '0') * (n2[j] - '0');
res[i + j] += ((res[i + j + 1] + mul - '0') / 10);
res[i + j + 1] = ((res[i + j + 1] + mul - '0') % 10) + '0';
}
}
printf("%s\n", res);
free(res);
}
I compiled it with -O3 flag but it takes about 30 seconds for big numbers with 86 k digits.
How I can make it faster?
Summing up comments:
Try to avoid integer division. E.g. with lookup table. Product of two digits mean a maximum of 9^2. Use result of /10 or %10 for the other.
Make sure the strlen() calls are stored, explicitly dedicate variables for them.
Perform the +/-'0' trafo separately.
Consider the base 10000 suggestion.
Perhaps using dedicated struct (dyn array + maintained size info) would be better than string. Length and the +/-'0' are performed only when I/O of numbers is neccessary.
The following would then be a somewhat optimized version, using many of the comments' suggestions:
void mult(char *n1, char *n2)
{
char *res, *pres, *p1, *p2;
int mul, i, j, l1= strlen(n1), l2= strlen(n2);
res = malloc(sizeof(*res) * (l1 + l2 + 1));
memset(res, 0, l1 + l2);
res[l1 + l2] = 0;
for (i=0, p1=n1; i<l1; i++) *p1++ -= '0';
for (j=0, p2=n2; j<l2; j++) *p2++ -= '0';
p1= n1+l1-1;
for (i= l1 - 1; i >= 0; i--, p1--)
{
p2= n2+l2-1;
pres= res+i;
for (j = l2 - 1; j >= 0; j--, p2--)
{
mul = *p1 * *p2;
pres[j] += ((pres[j + 1] + mul) / 10);
pres[j + 1] = ((pres[j + 1] + mul) % 10);
}
}
for (i=0, p1=res; i<l1+l2; i++) *p1++ += '0';
printf("%s\n", res);
free(res);
}
Avoid calling strlen() repeatedly. #Michael Walz as for (j = strlen(n2) ... inside the for (i ... loop.
Rather than subtract/add '0' repeated, do it once at the beginning and end. This is worthwhile for very long strings, less so for short ones. #Michael Walz #Eugene Sh.
Rather than touch 2 product elements in the inner loop (once to add to product, once for the carry), do so once and form a carry.
Take advantage of leading zeros.
Some untested code. Be sure to free the free the returned pointer. Coded for clarity vs. code-golfing.
#include <stdlib.h>
#include <string.h>
static void string_offset(char *s, size_t sz, int offset) {
while (sz > 0) {
sz--;
s[sz] += offset;
}
}
char * string_string_mult(char *a, char *b) {
while (*a == '0') a++;
size_t alen = strlen(a);
size_t asz = alen + 1;
string_offset(a, alen, -'0');
while (*b == '0') b++;
size_t blen = strlen(b);
size_t bsz = blen + 1;
if (a != b) { // TBD to fully detect a,b, overlap
string_offset(b, blen, -'0');
}
size_t psz = asz + bsz;
char *product = calloc(psz, sizeof *product); // account for potential carry and \0
if (product == NULL) return NULL;
for (size_t ai = alen; ai > 0;) {
ai--;
char *p = product + ai + blen + 1;
int acc = 0;
for (size_t bi = blen; bi > 0;) {
bi--;
acc = *p + a[ai] * b[bi] + acc;
*p = acc %= 10;
p--;
acc /= 10;
}
*p += acc;
}
string_offset(product, psz - 1, +'0');
// test for an unneeded leading zero
if (product[0] == '0' && (alen + blen > 0)) {
memmove(product, product + 1, psz - 1);
}
string_offset(a, alen, +'0');
if (a != b) { // TBD to fully detect a,b, overlap
string_offset(b, blen, +'0');
}
return product;
}
Details
a and b may point to the same string.
Memory allocation may fail.
size_t is the best type to use for array indexing.
The product may/may not form a final carry.
I checked most of the suggested optimizations (except LutzL's base 10000 scheme which has promise) and they don't make a significant difference. Since you've already identified bc as having the right stuff, why not simply use it instead of trying to reimplement it:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define BUFFER_SIZE 1024
void mult(const char *n1, const char *n2) {
FILE *pipe = popen("/usr/bin/bc", "r+");
fprintf(pipe, "%s * %s\nquit\n", n1, n2);
if (ferror(pipe)) {
fprintf(stderr, "Output to pipe failed.\n");
exit(EXIT_FAILURE);
}
char *buffer = malloc(BUFFER_SIZE); // bc long lines are limited to 70 characters
char *result = fgets(buffer, BUFFER_SIZE, pipe);
while (result != NULL) {
char *s = rindex(buffer, '\\'); // find and remove line continuation marker
if (s != NULL) {
*s = '\0';
}
fputs(buffer, stdout);
result = fgets(buffer, BUFFER_SIZE, pipe);
}
(void) pclose(pipe);
}
On my system this multiplied two 86K digit numbers in under a second.
Note that my system (OSX) has bidirectional popen() which is unidirectional on many system so you'll need to use the recommended work around for your system for bidirectional communication.
The above code could use more error checking and you could play with the bc environment variable BC_LINE_LENGTH to generate longer lines for efficiency, and on some systems set it to zero to avoid line breaks.
For example if I have the number as 390000 it would need to be printed as 390 000.
If I have the number as 1200000 it would be printed as 1 200 000 etc.
Initially I thought about assigning the long long int to a string, and then writing if statements for each possible string length as there's only 11 possible lengths but this just seems really cumbersome and unnecessary.
Is there an easier way to do it?
If you're on a POSIX system and one of your locales uses the space as digit grouping symbol, use the apostrophe (') in printf() conversion specifier.
setlocale(LC_NUMERIC, "SOMETHING");
printf("%'d", 345678); // 345 678
See a previous answer of mine to the question "convert astronomically large numbers into human readable form in C/C++"
You can do it fairly easily by starting with a buffer sufficient in size to store the full long long number (and separators), and then using pointer arithmetic to insert a separator every 3rd number. For example, where MAXN is 32, you can do something like:
/** separate long long value every 3rd char into 's' */
char *sepnumber (char *s, long long val)
{
char numstr[MAXN] = "";
char *p = s + MAXN - 2;
size_t idx = 0, len = 0;
len = sprintf (numstr, "%lld", val);
while (len--) {
if (idx++ == 3) {
idx = 1;
*p-- = ' ';
}
*p = numstr[len];
if (len) p--;
}
for (idx = 0; *p; p++, idx++) s[idx] = *p; /* copy to s */
s[idx] = *p; /* nul-terminate */
return s;
}
A quick example would be:
#include <stdio.h>
#include <stdlib.h>
#define MAXN 32
char *sepnumber (char *s, long long val);
int main (int argc, char **argv) {
long long int lv = argc > 1 ? strtoll (argv[1], NULL, 10) : 1931583282;
char fmtnum[MAXN] = "";
printf (" %lld => %s\n", lv, sepnumber (fmtnum, lv));
return 0;
}
/** separate long long value every 3rd char into 's' */
char *sepnumber (char *s, long long val)
{
char numstr[MAXN] = "";
char *p = s + MAXN - 2;
size_t idx = 0, len = 0;
len = sprintf (numstr, "%lld", val);
while (len--) {
if (idx++ == 3) {
idx = 1;
*p-- = ' ';
}
*p = numstr[len];
if (len) p--;
}
for (idx = 0; *p; p++, idx++) s[idx] = *p; /* copy to s */
s[idx] = *p; /* nul-terminate */
return s;
}
Example Use/Output
$ ./bin/sepnum
1931583282 => 1 931 583 282
$ ./bin/sepnum 2212
2212 => 2 212
Here is a simple version without any nonsense. There are probably more efficient ways still, but if so, they do not include string handling or float numbers.
#include <stdio.h>
#include <inttypes.h>
#include <stdbool.h>
static void print_number (uint32_t n)
{
static bool remove_zeroes = true;
if(remove_zeroes)
{
remove_zeroes = false;
printf("%" PRIu32 " ", n);
}
else
{
printf("%.3" PRIu32 " ", n);
}
}
void print_number_with_spaces (uint32_t n)
{
bool remove_zeroes = true;
while(n > 1000)
{
uint32_t tmp = n;
uint32_t subtract = 1;
while(tmp > 1000)
{
tmp /= 1000;
subtract *= 1000;
}
print_number(tmp);
if(subtract >= 1000)
{
n -= tmp*subtract;
}
}
print_number(n);
}
int main(int argc, char* argv[])
{
print_number_with_spaces(1200000);
}
An easy way to print with spaces separating the thousands places uses recursion. Works for negative values too.
void print_1000(long long x) {
long long msgroups = x / 1000;
int lsgroup = x % 1000;
if (msgroups) {
print_1000(msgroups);
char separator = ' '; // adjust as deisred
printf("%c%03d", separator, abs(lsgroup));
} else {
printf("%d", lsgroup);
}
}
Sample usage
int main(void) {
print_1000(0); puts("");
print_1000(-1); puts("");
print_1000(LLONG_MAX); puts("");
print_1000(LLONG_MIN); puts("");
return 0;
}
Output
0
-1
9 223 372 036 854 775 807
-9 223 372 036 854 775 808
Another way using a Duff's Device:
#include <stdio.h>
static char *sep(char *str, long long value)
{
char tmp[32];
char *p = tmp;
char *q = str;
int len;
len = snprintf(tmp, sizeof tmp, "%lld", value);
if (*p == '-') { // Support for negatives
*q++ = *p++;
len--;
}
/*
* Consider 5 mod 3
* len % 3 is evaluated only once. E.g. 5 % 3 = 2
* jumps into the middle of the loop (in this case to "case 2:")
* case 2 and case 1 are evaluated
* loops until NUL is reached evaluating all cases
*/
switch (len % 3) do {
*q++ = ' ';
case 0: *q++ = *p++;
case 2: *q++ = *p++;
case 1: *q++ = *p++;
} while (*p);
*q = '\0';
return str;
}
int main(void)
{
char str[32];
printf("%s\n", sep(str, 1200000));
return 0;
}
Output
1 200 000
A quick solution I came up with is:
output = []
number = '1209873'
aux = reversed(list(number))
counter = 0
for digit in aux:
output.append(digit)
counter += 1
if counter % 3 == 0:
output.append(' ')
spaced_number = reduce(lambda x, y: x + y, reversed(output))
But I'm sure there are more efficient ways to solve your problem. ;)
I want to convert a float number for example 2.45 to the 4 byte char array.
so the 2.45 should look like this '#' 'FS' 'Ì' 'Í' which is binary the ieee representation of 2.45 = 01000000 00011100 11001100 11001101?
I've solved the problem but it has a bad complexity. do you have any good ideas?
Thanks for the good answers.
can you please tell me the way back from the char array to the float number ?
Just use memcpy:
#include <string.h>
float f = 2.45f;
char a[sizeof(float)];
memcpy(a, &f, sizeof(float));
If you require the opposite endianness then it is a trivial matter to reverse the bytes in a afterwards, e.g.
int i, j;
for (i = 0, j = sizeof(float) - 1; i < j; ++i, --j)
{
char temp = a[i];
a[i] = a[j];
a[j] = temp;
}
You have a few ways of doing this, including these two:
Use typecasting and pointers:
float f = 2.45;
char *s = (char *) &f;
Note that this isn't safe in any way and that there is no string terminator after the "string".
Use a union:
union u
{
float f;
char s[sizeof float];
};
union u foo;
foo.f = 2.45;
The char array can now be accessed to get the byte values. Also note like the first alternative there is no string terminator.
one can convert a float to a char using typecasting and pointers as follows:
float t= -2.63646464;
char *float2CharArr;
float2CharArr = (char*) &t;
Mind you above there's no \0 string terminator. To append it one can do it, for instance, like so:
char* tmp = realloc(float2CharArr, sizeof(*float2CharArr)+1);
tmp[sizeof(*float2CharArr)+1] = '\0';
float2CharArr=tmp;
Another, more elaborate way to convert a float value into a char string can be achieved using the following code below:
union int32_Float_t
{
int32_t Long;
float Float;
};
#ifndef HUGE_VALF
#define HUGE_VALF (__builtin_huge_valf())
#endif
#ifndef FLT_MIN_EXP
#define FLT_MIN_EXP (-999)
#endif
#ifndef FLT_MAX_EXP
#define FLT_MAX_EXP (999)
#endif
#define _FTOA_TOO_LARGE -2 // |input| > 2147483520
#define _FTOA_TOO_SMALL -1 // |input| < 0.0000001
//precision 0-9
#define PRECISION 7
//_ftoa function
void _ftoa(float f, char *p, int *status)
{
int32_t mantissa, int_part, frac_part;
int16_t exp2;
int32_Float_t x;
*status = 0;
if (f == 0.0)
{
*p++ = '0';
*p++ = '.';
*p++ = '0';
*p = 0;
return;
}
x.Float = f;
exp2 = (unsigned char)(x.Long>>23) - 127;
mantissa = (x.Long&0xFFFFFF) | 0x800000;
frac_part = 0;
int_part = 0;
if (exp2 >= 31)
{
*status = _FTOA_TOO_LARGE;
return;
}
else if (exp2 < -23)
{
*status = _FTOA_TOO_SMALL;
return;
}
else if (exp2 >= 23)
{
int_part = mantissa<<(exp2 - 23);
}
else if (exp2 >= 0)
{
int_part = mantissa>>(23 - exp2);
frac_part = (mantissa<<(exp2 + 1))&0xFFFFFF;
}
else
{
//if (exp2 < 0)
frac_part = (mantissa&0xFFFFFF)>>-(exp2 + 1);
}
if (x.Long < 0)
*p++ = '-';
if (int_part == 0)
*p++ = '0';
else
{
ltoa(int_part, p, 10);
while (*p)
p++;
}
*p++ = '.';
if (frac_part == 0)
*p++ = '0';
else
{
char m;
for (m=0; m<PRECISION; m++)
{
//frac_part *= 10;
frac_part = (frac_part<<3) + (frac_part<<1);
*p++ = (frac_part>>24) + '0';
frac_part &= 0xFFFFFF;
}
//delete ending zeroes
for (--p; p[0] == '0' && p[-1] != '.'; --p)
;
++p;
}
*p = 0;
}
Below is an example on how to using on Arduino Studio and related coding platforms for MCU programming:
void setup(void)
{
int i, stat;
char s[20];
float f[] = { 0.0, -0.0, 42.0, 123.456789, 0.0000018, 555555.555,
-888888888.8888888, 11111111.2 };
Serial.begin(9600);
for ( i=0; i<8; i++ )
{
if ( _ftoa(f[i], s, &stat) == 0 )
{
Serial.print( "_ftoa: " );
Serial.println( s );
}
}
}
void loop(void) { }
This source code is posted around the internet in various places, mostly without attribution. For instance here and here. So credit to the anonymous author.
If I had two strings:
a = "1234"
b = "4321"
I could add the two numbers together like this:
for(i=0; i<width-1; i++){
sum = (a[width-2-i]-48) + (b[width-2-i]-48) + carry;
carry = 0;
if(sum > 9){
carry = 1;
sum-=10;
}
answer[i] = sum+48;
}
if(carry) answer[i++] = carry+48;
answer[i]= 0;
And then reverse it (width is equal to strlen(a)).
How could I do the same thing if the following?
a = "12345"
b = "4321"
Would I need to reallocate memory? Or what?
(BTW - the problem I'm trying to solve is using many numbers all with 50 digits, so strtoul or strtoull is out of the question as I understand. Here's my code so far.)
int getcharval(const char *s, int idx) {
if (idx < strlen(s))
return s[strlen(s) - idx - 1] - 48;
return 0;
}
void add() {
const char *a = "1234";
const char *b = "13210";
char answer[256];
int i, wa=strlen(a), wb=strlen(b), width, sum, carry;
width = wa > wb ? wa : wb;
for(i=0; i<width; i++){
char ca = getcharval(a, i);
char cb = getcharval(b, i);
printf("%d %d\n", ca, cb);
sum = ca + cb + carry;
carry = 0;
if(sum > 9){
carry = 1;
sum-=10;
}
answer[i] = sum+48;
}
if(carry) answer[i++] = carry+48;
answer[i]= 0;
for (i = 0; i < strlen(answer) / 2; i++) {
char t = answer[i];
answer[i] = answer[strlen(answer) - i - 1];
answer[strlen(answer) - i - 1] = t;
}
printf("%s\n", answer);
}
If you insist on using the "elementary school addition", find the length of both strings, advance to their ends, and then move back until the shorter string's length is exhausted. Then continue moving in only the longer string, assuming that the remaining digits of the shorter string are zeros:
12345
04321
You need to move all the way to the beginning of the longer string, and process the carry there. Note that you need to allocate a new result anyway, because adding two N-digit numbers may result in a N+1-digit number due to the carry.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define c2d(c) (c-'0')
#define d2c(c) (c+'0')
char* add(const char *a, const char *b, char *ans){
int alen, blen;
int i, carry=0;
char *wk;
char *awk=strdup(a);
char *bwk=strdup(b);
alen=strlen(strrev(awk));
blen=strlen(strrev(bwk));
if(alen<blen){
alen ^= blen;blen ^= alen;alen ^= blen;//swap
wk = awk ; awk = bwk ; bwk = wk;
}
ans[alen+1]=ans[alen]='\0';
for(i=0;i<alen;++i){
int sum = c2d(awk[i])+(i<blen ? c2d(bwk[i]): 0)+carry;
ans[i] = d2c(sum % 10);
carry = sum / 10;
}
if(carry){
ans[i++]='1';
}
free(awk);
free(bwk);
return strrev(ans);
}
int main(){
const char *a="12345";
const char *b="4321";
char ans[6];
printf("{%s}+{%s}={%s}\n", a, b, add(a,b, ans));
return 0;
}
cited from C - Adding the numbers in 2 strings together if a different length
answer, I write a more readable code:
void str_reverse(char *beg, char *end){
if(!beg || !end)return;
char cTmp;
while(beg < end){
cTmp = *beg;
*beg++ = *end;
*end-- = cTmp;
}
}
#define c2d(c) (c - '0')
#define d2c(d) (d + '0')
void str_add(const char* s1, const char* s2, char* s_ret){
int s1_len = strlen(s1);
int s2_len = strlen(s2);
int max_len = s1_len;
int min_len = s2_len;
const char *ps_max = s1;
const char *ps_min = s2;
if(s2_len > s1_len){
ps_min = s1;min_len = s1_len;
ps_max = s2;max_len = s2_len;
}
int carry = 0;
int i, j = 0;
for (i = max_len - 1; i >= 0; --i) {
// this wrong-prone
int idx = (i - max_len + min_len) >=0 ? (i - max_len + min_len) : -1;
int sum = c2d(ps_max[i]) + (idx >=0 ? c2d(ps_min[idx]) : 0) + carry;
carry = sum / 10;
sum = sum % 10;
s_ret[j++] = d2c(sum);
}
if(carry)s_ret[j] = '1';
str_reverse(s_ret, s_ret + strlen(s_ret) - 1);
}
test code as below:
void test_str_str_add(){
char s1[] = "123";
char s2[] = "456";
char s3[10] = {'\0'};
str_add(s1, s2, s3);
std::cout<<s3<<std::endl;
char s4[] = "456789";
char s5[10] = {'\0'};
str_add(s1, s4, s5);
std::cout<<s5<<std::endl;
char s7[] = "99999";
char s8[] = "21";
char s9[10] = {'\0'};
str_add(s7, s8, s9);
std::cout<<s9<<std::endl;
}
output:
579
456912
100020
int num(char x,int len){
if(len <0)
return 0;
return ((x=='1') ? 1 : (x=='2') ? 2 : (x=='3') ? 3 : (x=='4') ? 4 : (x=='5') ? 5 : (x=='6') ? 6 : (x=='7') ? 7 : (x=='8') ? 8 : 9);
}
int main(){
int result[100];
int i=0;
char num1[] = "123456789123456789";
char num2[] = "1234567811111111111111111111";
int carry = 0;
int l1= strlen(num1)-1;
int l2 = strlen(num2)-1;
int result1;
while(1){
if(l1 < 0 && l2 <0 && carry == 0)
break;
result1 = num(num1[l1],l1) + num(num2[l2],l2);
l1--;
l2--;
if(carry>0){
result1 +=carry;
carry = 0;
}
carry = result1 / 10;
result[i] = (result1 % 10);
i++;
}
i--;
printf("\n");
while(i>=0){
printf("%d",result[i]);
i--;
}
}