converting binary to corresponding ASCII character - c

So I'm trying to create a program where the user enters a 12-bit binary hamming code sequence like "100010010001" and it should print out its corresponding ASCII character, which in this case is 'A'.
I'm trying to get my program to ignore the 4 parity bits which are positioned in _ _ 0 _ 1 0 0 _ 0 0 0 1 and shift the other 8 bits over so they're together. In the else statement, I tried to convert the remaining 8 bits to a character. When I attempt to run the program however, the program crashes after I type my binary sequence and press enter. This is the part of the program that I'm struggling with and I was wondering if someone could help me or give me hints as to what I'm doing wrong?
char charToBin(char usersInput[]) {
char c = " ";
for (int i = 12; i >= 0; i--) {
if((i == 0) || (i == 1) || (i == 3) || (i == 7)){
usersInput[i] = usersInput[i + 1];
}else{
c = strtol(usersInput[i], (char **)NULL, 2);
}
}
return c;
}

For your code, you can't use "strtol" without a twist. The char array that you give to "strtol" may not end with "\0". Also, does not matter what you do your array will always have 12 indexes unless you copy a "\0" to index 9 so that "strtol" know that it is the end of the input.
Also, sometimes loops are not the best. For your case, you already know how many indexes you are working with. There is no point in using a loop. Nonetheless, I wrote two methods and included the test code as an example below.
#include <stdio.h>
/*
* This function generate a hammer binary digit string for testing.
* It does not care about the validity of the hammer bit.
* The array that is passed to this function should be the length of 12.
*/
void generateChar(int value, char * output){
output[0] = '0';
output[1] = '0';
output[3] = '0';
output[7] = '0';
output[2] = (value & 0b10000000) > 0? '1' : '0';
output[4] = (value & 0b01000000) > 0? '1' : '0';
output[5] = (value & 0b00100000) > 0? '1' : '0';
output[6] = (value & 0b00010000) > 0? '1' : '0';
output[8] = (value & 0b00001000) > 0? '1' : '0';
output[9] = (value & 0b00000100) > 0? '1' : '0';
output[10] = (value & 0b00000010) > 0? '1' : '0';
output[11] = (value & 0b00000001) > 0? '1' : '0';
}
/*
* First method.
*
*/
char charToBin(char usersInput[]) {
char c = 0;
c = usersInput[2] == '1'? c | 0b10000000 : c;
c = usersInput[4] == '1'? c | 0b01000000 : c;
c = usersInput[5] == '1'? c | 0b00100000 : c;
c = usersInput[6] == '1'? c | 0b00010000 : c;
c = usersInput[8] == '1'? c | 0b00001000 : c;
c = usersInput[9] == '1'? c | 0b00000100 : c;
c = usersInput[10] == '1'? c | 0b00000010 : c;
c = usersInput[11] == '1'? c | 0b00000001 : c;
return c;
}
/*
* Second method.
*/
char charToBin2(char usersInput[]) {
char temp[9];
int pos = 0;
temp[8] = '\0'; // Protect from overflow.
for ( int i = 2; i < 12; i++ ){
if ( i == 3 || i == 7 ) continue;
temp[pos] = usersInput[i];
pos++;
}
return (char) strtol(temp, (char **)NULL, 2);
}
int main(){
char a[] = "100010010001";
char t[12];
int b;
// Test for method 1
for ( int i = 0; i < 256; i++ ){
generateChar(i, t);
b = charToBin(t);
printf("%d ", (unsigned char) b );
}
printf("\n\n");
// Test for method 2
for ( int i = 0; i < 256; i++ ){
generateChar(i, t);
b = charToBin2(t);
printf("%d ", (unsigned char) b );
}
return 0;
}

if((i == 0) || (i == 1) || (i == 3) || (i == 7)){
usersInput[i] = usersInput[i + 1];
}else{
In here your if (condition), the curly bracket after that is not necessary.
if((i == 0) || (i == 1) || (i == 3) || (i == 7))
usersInput[i] = usersInput[i + 1];
else{
that would fix a bit maybe

You program has two compile error:
You can not assign string to character ( c= " ") ;
The strtol call takes a string, not a character
After fixing the compile error, two fixes are needed to logic:
1. Perform the filtering of the input string from left to right, to avoid copying position 12 to 11, 11 to 10, which will result in duplicating the last positions. An extra counter is needed to help with the compaction.
2. Perform the strtol once, after the input is fully compacted.
char charToBin(char usersInput[]) {
char j = 0 ;
// Copy relevant input positions, INCLUDING terminating NUL byte at position 12.
for (int i = 0; i <= 12 ; i++) {
if((i == 0) || (i == 1) || (i == 3) || (i == 7)){
continue ;
} ;
usersInput[j] = usersInput[i] ;
j++ ;
} ;
char c = strtol(usersInput, (char **)NULL, 2);
return c;
}

Alternatively you could use bit operations. Something like:
char charToBin(char usersInput[]) {
unsigned c = strtol(usersInput, NULL, 2);
unsigned part1 = c & 0xFu;
unsigned part2 = c >> 1u & 0x70u;
unsigned part3 = c >> 2u & 0x80u;
return (char) (part1 | part2 | part3);
}
Which would give with your input of
char userInput[] = "100010010001";
char ch = charToBin(userInput);
printf("result: %c(%d)\n", ch, ch);
the following output on the console:
result: A(65)

Related

A function encode to convert a number in base 10 to a number in base 'b' don't print nothing

I created this function to convert a number 'd' in base 10 to a number x in base 'b', unfortunately the function does not print, could someone help me?
// I don't understand why it doesn't print me the encode
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 20
// the function created to convert a number 'd' in base 10 to a number in base 'b'
char *encode(unsigned int d, unsigned char b){
if(b<2||b>16) return NULL;
char * resto = calloc (SIZE, sizeof(char));
int i=1;
while(d>0){//rimanenza d%b
char rimanenza = d%b ;
d = d/b;
if (b>10 && b<=16){//if the base is between 10 and 16 'rimanenza' can't be a number
if (rimanenza == 10) rimanenza = 'A';
if (rimanenza == 11) rimanenza = 'B';
if (rimanenza == 12) rimanenza = 'C';
if (rimanenza == 13) rimanenza = 'D';
if (rimanenza == 14) rimanenza = 'E';
if (rimanenza == 15) rimanenza = 'F';
}// save into resto from the end
resto [SIZE - i] = rimanenza;
i++;
}
return resto ;
}
int main (){
unsigned int d = 126;
unsigned char b = 3;
char *encoded = encode (d,b);
printf ("%u in base %u = %s\n",d,b,encoded);
free(encoded);
}
At least these problems:
Digit not converted to correct character when rimanenza < 10
When rimanenza is in the [0...9] range, convert it to characters ['0'...'9'].
rimanenza += '0';
while(d>0){ fails to form "0" when d originally 0.
Return wrong offset
return resto ; fails to return the beginning of the populated part of resto[].
More like:
// return resto ;
return memmove(resto, &resto[SIZE - i - 1], SIZE - i);
Fails to test if calloc() failed
Size too small
#define SIZE 20 is insufficient for encode(UINT_MAX, 2). #M Oehm
Lack of trailing null character
Code needs a '\0' after all the characters to be a string.
Code allocation is not sized to the data
Consider only allocating what is needed.
Untested repairs
#include <limits.h>
#define ENCODE_SZ (sizeof (unsigned) * CHAR_BIT + 1) // Size buffer to base 2 worst case
char* encode(unsigned d, unsigned b) {
if (b < 2 || b > 16) {
return NULL;
}
char buffer[ENCODE_SZ];
char *p = &buffer[ENCODE_SZ - 1]; // Point to last array element.
*p = '\0'; // Terminate array with a null character.
// Use a do loop to make sure code assigns at least 1 character.
do {
unsigned digit = d % b;
// Look-up character;
char ch = "0123456789ABCDEF"[digit];
p--;
*p = ch;
d /= b;
} while (d > 0);
// Determine string length
unsigned length = &buffer[ENCODE_SZ - 1] - p;
unsigned size = length + 1;
char *resto = malloc(size);
if (resto == NULL) {
return NULL;
}
// Copy and return.
return strcpy(resto, p);
}

C programming : String calculation Using ASCII code [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
If I get the numbers in character string like this and store them in array x[100] and y[100],
x[100] '1' '0' '0' '0' '\0'
y[100] '9' '9' '\0'
The added result should be stored as string, too.
result[100] '1' '0' '9' '9' '\0'
I have tried to use ASCII code to solve this problem, for example:
char number[2] = {'1','2'};
char result;
result = (number[0] - '0') + (number[1] - '0') + '0';
But I am having trouble adjusting digits of x[100] and y[100].
I really need your help :(
You may insist on avoiding the library functions to convert strings to numbers and vice versa, as mentioned by #sjsam.
If you think about it, numbers really start at the LSD (least significant digit, not acid), or on the right side. So start your loop at the last character of each array before the closing \0 and iterate backwards. It won't be trivial, but that's how atoi() works too.
It's rather unfortunate that the Romans started writing from left to right, but that's the root cause of this problem. Otherwise integration of right-to-left Arabic numbers would have been much easier.
Discussing the idiocy of Roman numerals is out of the scope of this answer, let's suffice it to say that their lack of zero prevented any of their programs to finish with a successful exit status, which in turn led to the collapse of the Roman Empire.
Instead of adding byte by byte you may use the functions atoi to convert a string to integer and once you calculate the sum, you may use itoa/snprintf to convert the sum(integer) to its string representation.
See an example below :
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
int main(void)
{
char buffer[100];
int no1,no2,no3;
char number1[] = {'1','0','0','0','\0'};
char number2[] = {'9','0','0','\0'};
/* Converting strings to respective integers */
no1=atoi(number1);
no2=atoi(number2);
no3=no1+no2;
/* Convert integer to a null terminated string
* You could also use itoa(no3,buffer,10);
*/
snprintf(buffer,99,"%d",no3);
printf("no1 : %d\n",no1);
printf("no2 : %d\n",no2);
printf("no3 : %s\n",buffer); // printing the string
return 0;
}
itoa is not supported by some compilers, so the best bet is to use snprintf.
Output
Here buffer is a null terminated array
{'1','9','0','0','\0'}
which we have printed using the %s format specifier.
References
itoa manaul.
snprintf manual.
Note
I have set the buffer size to 100 for this example. However, the maximum value of number of bytes that buffer can hold is the depends upon the maximum value an integer(signed in your case) can contain in your system. (Thanks #BLUEPIXY for reminding this stuff. :) )
Sum ASCII numbers while avoiding converting to binary/decimal:
#include <stdio.h>
#include <string.h>
#include <libc.h>
char *sum(char *x, char *y) {
size_t x_idx = strlen(x);
size_t y_idx = strlen(y);
size_t z_idx = MAX(x_idx, y_idx) + 1;
char *z = malloc(z_idx + 1);
char carry = '0';
while (x_idx > 0 || y_idx > 0 || carry == '1') {
char digit = carry;
if (x_idx > 0 && y_idx > 0) {
digit = (x[--x_idx] + y[--y_idx] + carry) % '0' + '0';
} else if (x_idx > 0) {
digit = (x[--x_idx] + carry) % '0' + '0';
} else if (y_idx > 0) {
digit = (y[--y_idx] + carry) % '0' + '0';
}
carry = (digit > '9') ? '1' : '0';
if (carry == '1') {
digit -= 10;
}
z[--z_idx] = digit;
}
while (z_idx > 0) {
z[--z_idx] = ' '; // pad for now; for production, shift array
}
return z;
}
int main(int argc, char* argv[]) {
char *x = argv[1];
char *y = argv[2];
char *z = sum(x, y);
printf("%s + %s = %s\n", x, y, z);
free(z);
}
USAGE
> ./a.out 1000 99
1000 + 99 = 1099
>
> ./a.out 999 999
999 + 999 = 1998
>
#include <stdio.h>
#include <string.h>
int main(void) {
char x[100] = "1000";
char y[100] = "99";
char result[100+1] = " ";
int x_i = strlen(x)-1;
int y_i = strlen(y)-1;
int r_i = 1 + ((x_i > y_i) ? x_i + 1 : y_i + 1);
int carray = 0, sum;
result[r_i] = 0;
while(--r_i>0){
if(x_i >= 0 && y_i >= 0)
sum = x[x_i--] - '0' + y[y_i--] - '0' + carray;
//else if(x_i < 0 && y_i < 0)
// sum = carray;
else if(y_i < 0)// && x_i >= 0){
sum = x[x_i--] - '0' + carray;
else// if (x_i < 0 && y_i >= 0){
sum = y[y_i--] - '0' + carray;
carray = sum > 9;
result[r_i] = sum % 10 + '0';
}
if(carray)
result[0] = '1';
printf("%s\n", result);
return 0;
}
Here's some code that should give you a good idea.
This begins to handle carrying, but doesn't cover all cases.
It should be a good start.
#include <stdio.h>
#include <string.h>
int main(void) {
char y[100] = "1032";
char x[100] = "2399";
int carry = 0;
char* b = (strlen(x) > strlen(y))? x : y;
char* s = (strlen(x) <= strlen(y))? x : y;
for(int i=strlen(s)-1, j=strlen(b)-1; i>=0; --i,--j)
{
b[j] = (b[j]+s[i]+carry-'0');
carry = 0;
if (b[j] > '9')
{
b[j] = (b[j]-'0')%10+'0';
carry = 1;
}
}
puts(b);
return 0;
}

C string to int without any libraries

I'm trying to write my first kernel module so I'm not able to include libraries for atoi, strtol, etc. How can I convert a string to int without these built-in functions? I tried:
int num;
num = string[0] - '0';
which works for the first character, but if I remove the [0] to try and convert the full string it gives me a warning: assignment makes integer from pointer without a cast. So what do I do?
When creating your own string to int function, make sure you check and protect against overflow. For example:
/* an atoi replacement performing the conversion in a single
pass and incorporating 'err' to indicate a failed conversion.
passing NULL as error causes it to be ignored */
int strtoi (const char *s, unsigned char *err)
{
char *p = (char *)s;
int nmax = (1ULL << 31) - 1; /* INT_MAX */
int nmin = -nmax - 1; /* INT_MIN */
long long sum = 0;
char sign = *p;
if (*p == '-' || *p == '+') p++;
while (*p >= '0' && *p <= '9') {
sum = sum * 10 - (*p - '0');
if (sum < nmin || (sign != '-' && -sum > nmax)) goto error;
p++;
}
if (sign != '-') sum = -sum;
return (int)sum;
error:
fprintf (stderr, "strtoi() error: invalid conversion for type int.\n");
if (err) *err = 1;
return 0;
}
You can't remove the [0]. That means that you are subtracting '0' from the pointer string, which is meaningless. You still need to dereference it:
num = string[i] - '0';
A string is an array of characters, represented by an address (a.k.a pointer).
An pointer has an value that might look something like 0xa1de2bdf. This value tells me where the start of the array is.
You cannot subtract a pointer type with a character type (e.g 0xa1de2bdf - 'b' does not really make sense).
To convert a string to a number, you could try this:
//Find the length of the string
int len = 0;
while (str[len] != '\0') {
len++;
}
//Loop through the string
int num = 0, i = 0, digit;
for (i=0; i<len; i++) {
//Extract the digit
digit = ing[i] - '0';
//Multiply the digit with its correct position (ones, tens, hundreds, etc.)
num += digit * pow(10, (len-1)-i);
}
Of course if you are not allowed to use math.h library, you could write your own pow(a,b) function which gives you the value of a^b.
int mypowfunc(int a, int b) {
int i=0, ans=1;
//multiply the value a for b number of times
for (i=0; i<b; i++) {
ans *= a;
}
return ans;
}
I have written the code above in a way that is simple to understand. It assumes that your string has a null character ('\0') right behind the last useful character (which is good practice).
Also, you might want to check that the string is actually a valid string with only digits (e.g '0', '1', '2', etc.). You could do this by including an if... else.. statement while looping through the string.
In modern kernels you want to use kstrto*:
http://lxr.free-electrons.com/source/include/linux/kernel.h#L274
274 /**
275 * kstrtoul - convert a string to an unsigned long
276 * #s: The start of the string. The string must be null-terminated, and may also
277 * include a single newline before its terminating null. The first character
278 * may also be a plus sign, but not a minus sign.
279 * #base: The number base to use. The maximum supported base is 16. If base is
280 * given as 0, then the base of the string is automatically detected with the
281 * conventional semantics - If it begins with 0x the number will be parsed as a
282 * hexadecimal (case insensitive), if it otherwise begins with 0, it will be
283 * parsed as an octal number. Otherwise it will be parsed as a decimal.
284 * #res: Where to write the result of the conversion on success.
285 *
286 * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error.
287 * Used as a replacement for the obsolete simple_strtoull. Return code must
288 * be checked.
289 */
This function skips leading and trailing whitespace, handles one optional + / - sign, and returns 0 on invalid input,
// Convert standard null-terminated string to an integer
// - Skips leading whitespaces.
// - Skips trailing whitespaces.
// - Allows for one, optional +/- sign at the front.
// - Returns zero if any non-+/-, non-numeric, non-space character is encountered.
// - Returns zero if digits are separated by spaces (eg "123 45")
// - Range is checked against Overflow/Underflow (INT_MAX / INT_MIN), and returns 0.
int StrToInt(const char* s)
{
int minInt = 1 << (sizeof(int)*CHAR_BIT-1);
int maxInt = -(minInt+1);
char* w;
do { // Skip any leading whitespace
for(w=" \t\n\v\f\r"; *w && *s != *w; ++w) ;
if (*s == *w) ++s; else break;
} while(*s);
int sign = 1;
if ('-' == *s) sign = -1;
if ('+' == *s || '-' == *s) ++s;
long long i=0;
while('0' <= *s && *s <= '9')
{
i = 10*i + *s++ - '0';
if (sign*i < minInt || maxInt < sign*i)
{
i = 0;
break;
}
}
while (*s) // Skip any trailing whitespace
{
for(w=" \t\n\v\f\r"; *w && *s != *w; ++w) ;
if (*w && *s == *w) ++s; else break;
}
return (int)(!*s*sign*i);
}
" not able to include libraries" --> Unclear if code is allowed access to INT_MAX, INT_MIN. There is no way to determine the minimum/maximum signed integer in a completely portable fashion without using the language provided macros like INT_MAX, INT_MIN.
Use INT_MAX, INT_MIN is available. Else we could guess the char width is 8. We could guess there are no padding bits. We could guess that integers are 2's complement. With these reasonable assumptions, minimum and maximum are defined below.
Note: Shifting into the sign bit is undefined behavior (UB), so don't do that.
Let us add another restriction: make a solution that works for any signed integer from signed char to intmax_t. This disallows code from using a wider type, as there may not be a wider type.
typedef int Austin_int;
#define Austin_INT_MAXMID ( ((Austin_int)1) << (sizeof(Austin_int)*8 - 2) )
#define Austin_INT_MAX (Austin_INT_MAXMID - 1 + Austin_INT_MAXMID)
#define Austin_INT_MIN (-Austin_INT_MAX - 1)
int Austin_isspace(int ch) {
const char *ws = " \t\n\r\f\v";
while (*ws) {
if (*ws == ch) return 1;
ws++;
}
return 0;
}
// *endptr points to where parsing stopped
// *errorptr indicates overflow
Austin_int Austin_strtoi(const char *s, char **endptr, int *errorptr) {
int error = 0;
while (Austin_isspace(*s)) {
s++;
}
char sign = *s;
if (*s == '-' || *s == '+') {
s++;
}
Austin_int sum = 0;
while (*s >= '0' && *s <= '9') {
int ch = *s - '0';
if (sum <= Austin_INT_MIN / 10 &&
(sum < Austin_INT_MIN / 10 || -ch < Austin_INT_MIN % 10)) {
sum = Austin_INT_MIN;
error = 1;
} else {
sum = sum * 10 - ch;
}
s++;
}
if (sign != '-') {
if (sum < -Austin_INT_MAX) {
sum = Austin_INT_MAX;
error = 1;
} else {
sum = -sum;
}
}
if (endptr) {
*endptr = (char *) s;
}
if (errorptr) {
*errorptr = error;
}
return sum;
}
The above depends on C99 or later in the Austin_INT_MIN Austin_INT_MIN % 10 part.
This is the cleanest and safest way I could come up with
int str_to_int(const char * str, size_t n, int * int_value) {
int i;
int cvalue;
int value_muliplier = 1;
int res_value = 0;
int neg = 1; // -1 for negative and 1 for whole.
size_t str_len; // String length.
int end_at = 0; // Where loop should end.
if (str == NULL || int_value == NULL || n <= 0)
return -1;
// Get string length
str_len = strnlen(str, n);
if (str_len <= 0)
return -1;
// Is negative.
if (str[0] == '-') {
neg = -1;
end_at = 1; // If negative 0 item in 'str' is skipped.
}
// Do the math.
for (i = str_len - 1; i >= end_at; i--) {
cvalue = char_to_int(str[i]);
// Character not a number.
if (cvalue == -1)
return -1;
// Do the same math that is down below.
res_value += cvalue * value_muliplier;
value_muliplier *= 10;
}
/*
* "436"
* res_value = (6 * 1) + (3 * 10) + (4 * 100)
*/
*int_value = (res_value * neg);
return 0;
}
int char_to_int(char c) {
int cvalue = (int)c;
// Not a number.
// 48 to 57 is 0 to 9 in ascii.
if (cvalue < 48 || cvalue > 57)
return -1;
return cvalue - 48; // 48 is the value of zero in ascii.
}

How does this function for reading a number from input work?

How has the string been manipulated to integer and what is use of bitwise operators..this function has been used in c to take input from string of numbers
"gc and ll are defined like this.. typedef long long LL;
#define gc getchar_unlocked()"
inline LL inp()
{
LL num = 0;
char p = gc;
while(p<33)p=gc;
while(p>33) {
num = (num << 3)+ (num << 1)+ (p -'0');
p = gc;
}
return num;
};
I gather that char p = gc obtains a character from the input.
while(p<33)p=gc; just keeps getting input until something other than a space is entered? (space is character 32 in decimal).
Then, while the input isn't a space,
(num << 3) + (num << 1) is equivalent to num * 10 (num << 3 is equivalent to num * 8, num << 1 is equivalent to num * 2, num * 8 + num * 2 can be written as num * (8+2) which simplifies to num * 10).
p - '0' turns the input character (say '9' [char 57] for example) into the corresponding integer (eg, just 9).
If the input is "123 " then num will be equal to 123, because:
num = 0;
num = 0 * 10 + 1; (== 1)
num = 1 * 10 + 2; (== 12)
num = 12 * 10 + 3; (== 123)
I hope that sheds some light. It is pretty bad code, and will not behave correctly if anything other than the numbers 0-9 are entered.
Writing it like this might be better:
// function to read one character from the 'input'
char gc();
// I'm not sure what LL is here, could be long long?
inline LL inp()
{
LL num = 0;
char p = gc();
// keep reading characters until a number is encountered
while((p < '0') || (p > '9'))
{
p = gc();
}
// loop until a non-number is encountered
while((p >= '0') && (p <= '9'))
{
// Shift the previously read digits up by one 'tens' column
num *= 10;
// Add in the newly read digit
num += p -'0';
// Read the next character
p = gc();
}
return num;
}

A possible algorithm for determining whether two strings are anagrams of one another? [closed]

Closed. This question is off-topic. It is not currently accepting answers.
Want to improve this question? Update the question so it's on-topic for Stack Overflow.
Closed 10 years ago.
Improve this question
I have this idea (using C language) for checking whether two strings formed from ASCII letters are anagrams of one another:
Check if the strings are the same length.
Check if the sum of the ASCII values of all chars is the same for both strings.
Check if the product of the ASCII values of all chars is the same for both strings.
I believe that if all three are correct, then the strings must be anagrams of one another. However, I can't prove it. Can someone help me prove or disprove that this would work?
Thanks!
I wrote a quick program to brute-force search for conflicts and found that this approach does not always work. The strings ABFN and AAHM have the same ASCII sum and product, but are not anagrams of one another. Their ASCII sum is 279 and ASCII product is 23,423,400.
There are a lot more conflicts than this. My program, searching over all length-four strings, found 11,737 conflicts.
For reference, here's the C++ source code:
#include <iostream>
#include <map>
#include <string>
#include <vector>
using namespace std;
int main() {
/* Sparse 2D table where used[sum][prod] is either nothing or is a string
* whose characters sum to "sum" and whose product is "prod".
*/
map<int, map<int, string> > used;
/* List of all usable characters in the string. */
vector<char> usable;
for (char ch = 'A'; ch <= 'Z'; ch++) {
usable.push_back(ch);
}
for (char ch = 'a'; ch <= 'z'; ch++) {
usable.push_back(ch);
}
/* Brute-force search over all possible length-four strings. To avoid
* iterating over anagrams, the search only explores strings whose letters
* are in increasing ASCII order.
*/
for (int a = 0; a < usable.size(); a++) {
for (int b = a; b < usable.size(); b++) {
for (int c = b; c < usable.size(); c++) {
for (int d = c; d < usable.size(); d++) {
/* Compute the sum and product. */
int sum = usable[a] + usable[b] + usable[c] + usable[d];
int prod = usable[a] * usable[b] * usable[c] * usable[d];
/* See if we have already seen this. */
if (used.count(sum) &&
used[sum].count(prod)) {
cout << "Conflict found: " << usable[a] << usable[b] << usable[c] << usable[d] << " conflicts with " << used[sum][prod] << endl;
}
/* Update the table. */
used[sum][prod] = string() + usable[a] + usable[b] + usable[c] + usable[d];
}
}
}
}
}
Hope this helps!
Your approach is false; I can't explain why because I don't understand it, but there are different sets at least for cardinality 3 that have the same sum and product: https://math.stackexchange.com/questions/38671/two-sets-of-3-positive-integers-with-equal-sum-and-product
The letters a-z and A-Z are used to index an array of 26 primes, and the product of these primes is used as a hash value for the word. Equal product <--> same letters.
(the order of the hashvalues in the primes26[] array in the below fragment is based on the letter frequencies in the Dutch language, as an attempt mimimise the expected product)
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define COUNTOF(a) (sizeof (a)/ sizeof (a)[0])
typedef unsigned long long HashVal;
HashVal hashmem (char *str, size_t len);
unsigned char primes26[] =
{
5,71,79,19,2,83,31,43,11,53,37,23,41,3,13,73,101,17,29,7,59,47,61,97,89,67,
};
struct anahash {
struct anahash *next;
unsigned freq;
HashVal hash;
char word[1];
};
struct anahash *hashtab[1024*1024] = {NULL,};
struct anahash *new_word(char *str, size_t len);
struct anahash **hash_find(struct anahash *wp);
/*********************************************/
HashVal hashmem (char *str, size_t len)
{
size_t idx;
HashVal val=1;
if (!len) return 0;
for (idx = 0; idx < len; idx++) {
char ch = str[idx];
if (ch >= 'A' && ch <= 'Z' ) val *= primes26[ ch - 'A'];
else if (ch >= 'a' && ch <= 'z' ) val *= primes26[ ch - 'a'];
else continue;
}
return val;
}
struct anahash *new_word(char *str, size_t len)
{
struct anahash *wp;
if (!len) len = strlen(str);
wp = malloc(len + sizeof *wp );
wp->hash = hashmem(str, len);
wp->next = NULL;
wp->freq = 0;
memcpy (wp->word, str, len);
wp->word[len] = 0;
return wp;
}
struct anahash **hash_find(struct anahash *wp)
{
unsigned slot;
struct anahash **pp;
slot = wp->hash % COUNTOF(hashtab);
for (pp = &hashtab[slot]; *pp; pp= &(*pp)->next) {
if ((*pp)->hash < wp->hash) continue;
if (strcmp( wp->word, (*pp)->word ) > 0) continue;
break;
}
return pp;
}
char buff [16*4096];
int main (void)
{
size_t pos,end;
struct anahash *wp, **pp;
HashVal val;
memset(hashtab, 0, sizeof hashtab);
while (fgets(buff, sizeof buff, stdin)) {
for (pos=0; pos < sizeof buff && buff[pos]; ) {
for(end = pos; end < sizeof buff && buff[end]; end++ ) {
if (buff[end] < 'A' || buff[end] > 'z') break;
if (buff[end] > 'Z' && buff[end] < 'a') break;
}
if (end > pos) {
wp = new_word(buff+pos, end-pos);
if (!wp) {pos=end; continue; }
pp = hash_find(wp);
if (!*pp) *pp = wp;
else if ((*pp)->hash == wp->hash
&& !strcmp((*pp)->word , wp->word)) free(wp);
else { wp->next = *pp; *pp = wp; }
(*pp)->freq +=1;
}
pos = end;
for(end = pos; end < sizeof buff && buff[end]; end++ ) {
if (buff[end] >= 'A' && buff[end] <= 'Z') break;
if (buff[end] >= 'z' && buff[end] <= 'a') break;
}
pos = end;
}
}
for (pos = 0; pos < COUNTOF(hashtab); pos++) {
if (! &hashtab[pos] ) continue;
for (pp = &hashtab[pos]; wp = *pp; pp = &wp->next) {
if (val != wp->hash) {
fprintf (stdout, "\nSlot:%u:\n", pos );
val = wp->hash;
}
fprintf (stdout, "\t%llx:%u:%s\n", wp->hash, wp->freq, wp->word);
}
}
return 0;
}
Thanks for such a great question! Instead of trying to disprove your proposition altogether, I spent sometime trying to find ways to augment it so it becomes true. I have the sense that if the standard deviations are equal then the two are equal. But instead of testing that far, I do a simpler test and have not found a counter example as yet. Here is what I have tested:
In addition to the conditions you mentioned before,
ASCII square-root of the sum of the squares must be equal:
I use the following python program. I have no complete proof, but maybe my response will help. Anyway, take a look.
from math import sqrt
class Nothing:
def equalString( self, strA, strB ):
prodA, prodB = 1, 1
sumA, sumB = 0, 0
geoA, geoB = 0, 0
for a in strA:
i = ord( a )
prodA *= i
sumA += i
geoA += ( i ** 2 )
geoA = sqrt( geoA )
for b in strB:
i = ord( b )
prodB *= i
sumB += i
geoB += ( i ** 2 )
geoB = sqrt( geoB )
if prodA == prodB and sumA == sumB and geoA == geoB:
return True
else:
return False
def compareStrings( self ):
first, last = ord( 'A' ), ord( 'z' )
for a in range( first, last + 1 ):
for b in range( a, last + 1 ):
for c in range( b, last + 1 ):
for d in range( c, last + 1 ):
strA = chr( a ) + chr( b ) + chr( c ) + chr( d )
strB = chr( d ) + chr( c ) + chr( b ) + chr( a )
if not self.equalString( strA, strB ):
print "%s and %s should be equal.\n" % ( strA, strB )
print "Done"
If you don't mind modifying the strings, sort each of them and compare the two signatures.

Resources