I have two bytes, 8 bit octets, which should be read as: [3 bits][4 bits][3 bits].
Example:
unsigned char octet1 = 0b11111111; // binary values
unsigned char octet2 = 0b00000011;
As integers: [7][15][7].
Anybody can give me a hint where to start?
In a kind of pseudocode
octet1 = 0b11111111
octet2 = 0b00000011
word = octet1 | octet2<<8
n1 = word & 0b111
n2 = word>>3 & 0b1111
n3 = word>>7 & 0b111
Hi here is a method that is tested and compiled using VC++9
#pragma pack( 1 )
union
{
struct
{
unsigned short val1:3;
unsigned short val2:4;
unsigned short val3:3;
unsigned short val4:6;
} vals;
struct
{
unsigned char octet1:8;
unsigned char octet2:8;
} octets;
short oneVal;
} u = {0xFFFF};
unsigned char octet1 = 0xFF; //1 1111 111
unsigned char octet2 = 0x03; //000000 11
//000000 111 1111 111 0 7 15 7
u.octets.octet1 = octet1;
u.octets.octet2 = octet2;
cout << "size of u.vals:" << sizeof(u.vals)<< endl;
cout << "size of u.octets:" << sizeof(u.octets)<< endl;
cout << "size of u.oneVal:" << sizeof(u.oneVal)<< endl;
cout << "size of u:" << sizeof(u)<< endl;
cout << endl;
cout << "Your values:" << endl;
cout << "oneVal in Hex: 0x";
cout.fill( '0' );
cout.width( 4 );
cout<< hex << uppercase << u.oneVal << endl;
cout << "val1: " << (int)u.vals.val1 << endl;
cout << "val2: " << (int)u.vals.val2 << endl;
cout << "val3: " << (int)u.vals.val3 << endl;
cout << "val4: " << (int)u.vals.val4 << endl;
cout << endl;
octet1 = 0xCC; //1 1001 100
octet2 = 0xFA; //111110 10
//111110 101 1001 100 62 5 9 4
u.octets.octet1 = octet1;
u.octets.octet2 = octet2;
cout << "Some other values:" << endl;
cout << "oneVal in Hex: 0x";
cout.fill( '0' );
cout.width( 4 );
cout<< hex << uppercase << u.oneVal << endl;
cout << dec;
cout << "val1: " << (int)u.vals.val1 << endl;
cout << "val2: " << (int)u.vals.val2 << endl;
cout << "val3: " << (int)u.vals.val3 << endl;
cout << "val4: " << (int)u.vals.val4 << endl;
cout << endl;
octet1 = 0xCC; //1 1001 100
octet2 = 0xFA; //111110 10
//111110 101 1001 100 62 5 9 4
u.oneVal = ( (( unsigned short )octet2 ) << 8 ) | ( unsigned short )octet1;
cout << "Some thing diffrent asignment:" << endl;
cout << "oneVal in Hex: 0x";
cout.fill( '0' );
cout.width( 4 );
cout<< hex << uppercase << u.oneVal << endl;
cout << dec;
cout << "val1: " << (int)u.vals.val1 << endl;
cout << "val2: " << (int)u.vals.val2 << endl;
cout << "val3: " << (int)u.vals.val3 << endl;
cout << "val4: " << (int)u.vals.val4 << endl;
cout << endl;
Also note that I uses #pragma pack( 1 ) to set the structure packing to 1 byte.
I also included a way of assigning the two octets into the one short value. Did this using bitwise shift "<<" and bitwise or "|"
You can simplify the access to u by dropping the named structures. But I wanted to show the sizes that is used for the structures.
Like this:
union
{
struct
{
unsigned short val1:3;
unsigned short val2:4;
unsigned short val3:3;
unsigned short val4:6;
};
struct
{
unsigned char octet1:8;
unsigned char octet2:8;
};
short oneVal;
} u = {0xFFFF};
Access would now be as simple as
u.oneVal = 0xFACC;
or
u.octet1 = 0xCC;
u.octet2 = 0xFA;
you can also drop either oneVal or octet1 and octet2 depending on what access method you like.
No need to put the two bytes together before extracting bits we want.
#include <stdio.h>
main()
{
unsigned char octet1 = 0b11111111;
unsigned char octet2 = 0b00000011;
unsigned char n1 = octet1 & 0b111;
unsigned char n2 = (octet1 >> 3) & 0b1111;
unsigned char n3 = (octet1 >> 7) | (octet2 + octet2);
printf("octet1=%u octet2=%u n1=%u n2=%u n3=%u\n",
octet1, octet2, n1, n2, n3);
}
oct2| oct1
000011|1 1111 111
---- ---- ---
7 0xf 7
Just a hint,(assuming it is homework)
Note: 0x11111111 doesn't mean 8 bits all set to 1. It's an hexadecimal number of 4 bytes, where any byte is set to 0x11.
0xFF is a single byte (8 bit) where any bit is set to 1.
Then, to achieve what you want you could use some MACROs to isolate the bits you need:
#define TOKEN1(x) ((x)>>7)
#define TOKEN2(x) ( ((x)>>3) & (0xFF>>5) )
#define TOKEN3(x) ( ((x)>>5) & (0xFF>>5) )
Didn't test it.
Another idea, could be that of putting in an union a char and a struct using bitfield chars
union {
struct { char a:3; char b:4; char c:3; };
char x;
};
This way you can use x to edit the whole octet, and a, b and c to access to the single tokens...
Edit: 3+4+3 != 8.
If you need 10 bits you should use a short instead of a char. If, instead, some of those bits are overlapping, the solution using MACRO will probably be easier: you'll need more than one struct inside the union to achieve the same result with the second solution...
You can use boolean opeations to get and set the individual values.
It's unclear which bits of your octets apply to which values but you only need & (bitwise AND), | (bitwise OR), << (left shift) and >> (right shift) to do this.
Start from writing a packing/unpacking functions for your 2-byte hybrids.
If you so the work with C/C++ - you may use the intrinsic support for this:
struct Int3 {
int a : 3;
int b : 4;
int c : 3;
};
Bitfields could be one option. You may need to pad your struct to get the bits to line up the way you want them to.
Refer to Bitfields in C++ or search StackOverflow for C++ and bitfields or you can use bitwise operators as outline in the other answers.
Do you mean, if we "concatenate" octets as octet2.octet1, we will get 000000[111][1111][111]?
Then you may use bit operations:
">>" and "<<" to shift bits in your value,
"&" to mask bits
"+" to combine values
For example, "middle" value (which is 4-bits-length) may be received in the following way:
middle = 15 & (octet1 >> 3);
assert(CHAR_BIT == 8);
unsigned int twooctets = (octet2 << 8) | (octet1); /* thanks, Jonas */
unsigned int bits0to2 = (twooctets & /* 0b0000_0000_0111 */ 0x007) >> 0;
unsigned int bits3to6 = (twooctets & /* 0b0000_0111_1000 */ 0x078) >> 3;
unsigned int bits7to9 = (twooctets & /* 0b0011_1000_0000 */ 0x380) >> 7;
Related
I do not know how to put an appropriate title to explain the problem. Thus feel free if you have a more informative title to edit.
To understand the problem, let me explain what I am doing.
I have created a structure as following:
typedef union __attribute__ ((__packed__)) adcs_measurements_t
{
unsigned char raw[72];
struct __attribute__ ((__packed__)) //191
{
int magneticFieldX : 16;
int magneticFieldY : 16;
int magneticFieldZ : 16;
int coarseSunX : 16;
int coarseSunY : 16;
int coarseSunZ : 16;
int sunX : 16;
int sunY : 16;
int sunZ : 16;
int nadirX : 16;
int nadirY : 16;
int nadirZ : 16;
int XAngularRate : 16;
int YAngularRate : 16;
int ZAngularRate : 16;
int XWheelSpeed : 16;
int YWheelSpeed : 16;
int ZWheelSpeed : 16;
int star1BX : 16;
int star1BY : 16;
int star1BZ : 16;
int star1OX : 16;
int star1OY : 16;
int star1OZ : 16;
int star2BX : 16;
int star2BY : 16;
int star2BZ : 16;
int star2OX : 16;
int star2OY : 16;
int star2OZ : 16;
int star3BX : 16;
int star3BY : 16;
int star3BZ : 16;
int star3OX : 16;
int star3OY : 16;
int star3OZ : 16;
} fields;
} adcs_measurements_t;
I populate the structure by calling a function as following:
void adcsTM191_measurements(adcs_measurements_t* dataOut)
{
int pass;
unsigned char TMID = 191;
unsigned char readBuff[72] = {0};
pass = I2C_write(ADCS_ADDR, &TMID, 1);
if(pass != 0)
{
printf("write error %d\n", pass);
}
pass = I2C_read(ADCS_ADDR, readBuff, 72);
if(pass != 0)
{
printf("read error %d\n", pass);
}
dataOut->fields.magneticFieldX = (readBuff[1] & 0x00FF) << 8 | (readBuff[0] & 0x00FF);
dataOut->fields.magneticFieldY = (readBuff[3] & 0x00FF) << 8 | (readBuff[2] & 0x00FF);
dataOut->fields.magneticFieldZ = (readBuff[5] & 0x00FF) << 8 | (readBuff[4] & 0x00FF);
dataOut->fields.coarseSunX = (readBuff[7] & 0x00FF) << 8 | (readBuff[6] & 0x00FF);
dataOut->fields.coarseSunY = (readBuff[9] & 0x00FF) << 8 | (readBuff[8] & 0x00FF);
dataOut->fields.coarseSunZ = (readBuff[11] & 0x00FF) << 8 | (readBuff[10] & 0x00FF);
dataOut->fields.sunX = (readBuff[13] & 0x00FF) << 8 | (readBuff[12] & 0x00FF);
dataOut->fields.sunY = (readBuff[15] & 0x00FF) << 8 | (readBuff[14] & 0x00FF);
dataOut->fields.sunZ = (readBuff[17] & 0x00FF) << 8 | (readBuff[16] & 0x00FF);
dataOut->fields.nadirX = (readBuff[19] & 0x00FF) << 8 | (readBuff[18] & 0x00FF);
dataOut->fields.nadirY = (readBuff[21] & 0x00FF) << 8 | (readBuff[20] & 0x00FF);
dataOut->fields.nadirZ = (readBuff[23] & 0x00FF) << 8 | (readBuff[22] & 0x00FF);
dataOut->fields.XAngularRate = (readBuff[25] & 0x00FF) << 8 | (readBuff[24] & 0x00FF);
dataOut->fields.YAngularRate = (readBuff[27] & 0x00FF) << 8 | (readBuff[26] & 0x00FF);
dataOut->fields.ZAngularRate = (readBuff[29] & 0x00FF) << 8 | (readBuff[28] & 0x00FF);
dataOut->fields.XWheelSpeed = (readBuff[31] & 0x00FF) << 8 | (readBuff[30] & 0x00FF);
dataOut->fields.YWheelSpeed = (readBuff[33] & 0x00FF) << 8 | (readBuff[32] & 0x00FF);
dataOut->fields.ZWheelSpeed = (readBuff[35] & 0x00FF) << 8 | (readBuff[34] & 0x00FF);
dataOut->fields.star1BX = (readBuff[37] & 0x00FF) << 8 | (readBuff[36] & 0x00FF);
dataOut->fields.star1BY = (readBuff[39] & 0x00FF) << 8 | (readBuff[38] & 0x00FF);
dataOut->fields.star1BZ = (readBuff[41] & 0x00FF) << 8 | (readBuff[40] & 0x00FF);
dataOut->fields.star1OX = (readBuff[43] & 0x00FF) << 8 | (readBuff[42] & 0x00FF);
dataOut->fields.star1OY = (readBuff[45] & 0x00FF) << 8 | (readBuff[44] & 0x00FF);
dataOut->fields.star1OZ = (readBuff[47] & 0x00FF) << 8 | (readBuff[46] & 0x00FF);
dataOut->fields.star2BX = (readBuff[49] & 0x00FF) << 8 | (readBuff[48] & 0x00FF);
dataOut->fields.star2BY = (readBuff[51] & 0x00FF) << 8 | (readBuff[50] & 0x00FF);
dataOut->fields.star2BZ = (readBuff[53] & 0x00FF) << 8 | (readBuff[52] & 0x00FF);
dataOut->fields.star2OX = (readBuff[55] & 0x00FF) << 8 | (readBuff[54] & 0x00FF);
dataOut->fields.star2OY = (readBuff[57] & 0x00FF) << 8 | (readBuff[56] & 0x00FF);
dataOut->fields.star2OZ = (readBuff[59] & 0x00FF) << 8 | (readBuff[58] & 0x00FF);
dataOut->fields.star3BX = (readBuff[61] & 0x00FF) << 8 | (readBuff[60] & 0x00FF);
dataOut->fields.star3BY = (readBuff[63] & 0x00FF) << 8 | (readBuff[62] & 0x00FF);
dataOut->fields.star3BZ = (readBuff[65] & 0x00FF) << 8 | (readBuff[64] & 0x00FF);
dataOut->fields.star3OX = (readBuff[67] & 0x00FF) << 8 | (readBuff[66] & 0x00FF);
dataOut->fields.star3OY = (readBuff[69] & 0x00FF) << 8 | (readBuff[68] & 0x00FF);
dataOut->fields.star3OZ = (readBuff[71] & 0x00FF) << 8 | (readBuff[70] & 0x00FF);
}
Finally I print, for instance YWheelSpeed.
adcsTM191_measurements(&temp);
printf("structure y wheel speed is: %d \n", temp.fields.YWheelSpeed);
This value should print a negative value and it does:
structure y wheel speed is: -97
Now here is the thing, if I print (readBuff[27] & 0x00FF) << 8 | (readBuff[26] & 0x00FF), which corresponds to what was populated inside the Y wheel speed variable, anywhere inside adcsTM191_measurements(adcs_measurements_t* dataOut) it does not print this negative value. Rather it prints the maximum value of an unsigned char (65,535).
int y = (int) (readBuff[33] & 0x00FF) << 8 | (readBuff[32] & 0x00FF);
printf("inside struct y is: %d", y);
I am expecting that storing inside the structure does a kind of implicit cast and so it prints the negative value as expected. How is it doing it? How can I print the correct value without the use of the structure?
According to C 2018 footnote 128, it is implementation-defined whether a bit-field defined with int, as in int YWheelSpeed is signed or unsigned. Since your implementation is showing a negative value for it, presumably it is signed, and therefore, as a 16-bit signed integer, it can represent values from −32,768 to 32,767.
We can also deduce that int in your implementation is more than 16 bits, likely 32 bits (from the fact that “65535” is printed in one case when int y is printed with “%d”).
Consider this assignment:
dataOut->fields.YWheelSpeed = (readBuff[33] & 0x00FF) << 8 | (readBuff[32] & 0x00FF);`
In this expression, readBuff[33] and readBuff[32] are converted to int by the usual promotions. 0x00FF is also an int.
If we suppose readBuff[33] is 255 and readBufff[32] is 159 (which is 28−97), then the value of the expression on the right side of the = is 65,439 (which is 216−97). In an assignment, the right operand is converted to the type of the left operand, which is a 16-bit signed integer. In this case, the value, 65,439, cannot be represented in a 16-bit signed integer. C 2018 6.3.1.3 3 tells us “either the result is implementation-defined or an implementation-defined signal is raised.”
A common implementation of this conversion is to produce the result modulo 216 or, equivalently, to reinterpret the 16 low bits of the int as a two’s complement 16-bit integer. This produces −97. Since your implementation subsequently showed −97 for the value, presumably this is what your implementation did.
Thus, dataOut->fields.YWheelSpeed is assigned the value −97. When it is later printed with:
printf("structure y wheel speed is: %d \n", temp.fields.YWheelSpeed);
then the default argument promotions, which include the usual integer promotions, convert temp.fields.YWheelSpeed from a signed 16-bit integer with value −97 to an int with value −97, and “-97” is printed.
In contrast, suppose (readBuff[33] & 0x00FF) << 8 | (readBuff[32] & 0x00FF) is printed with %d. As we saw above, the value of this expression is 65,439, so “65439” should be printed.
The question states:
Now here is the thing, if I print (readBuff[27] & 0x00FF) << 8 | (readBuff[26] & 0x00FF), which corresponds to what was populated inside the Y wheel speed variable,… it prints the maximum value of an unsigned char (65,535).
However, (readBuff[27] & 0x00FF) << 8 | (readBuff[26] & 0x00FF) is not the value that was assigned to YWheelSpeed, which is presumably the “Y wheel speed variable”. YWheelSpeed was assigned from readBuff elements 32 and 33, not 26 and 27. Thus we should not be surprised that some different value is printed rather than 65,439.
You probably have 32-bit int, so the initialization never sets the sign bit. But the structure field is only 16 bits, and will be sign-extended when it's converted to int for the printf() call.
I want to make an extension to my uint8 variable with 2 other bits to have let say uint10. For that I used this method, but it takes only the mask in the consideration.
void splitbits(unsigned int x){ //x=11001010 and i want to have this for any given X
split[0]=(x>> 6) & 0x01 ; //split[0]=11
split[1]=(x>> 4) & 0x01 ; //split[1]=00
split[2]=(x>> 2) & 0x01 ; //split[2]=10
split[3]=x & 0x01 ; //split[3]=10
}
split[0]=(x>> 6) & 0x01 ; //split[0]=11
The comment is factually incorrect. You mask out all but the least significant bit, not the two least significant bits. You need to mask with binary 11 which is 3 in decimal or hex.
split[0]=(x>> 6) & 0x03 ; //split[0]=11
// ^ binary 11
However, if you are wanting to make a 10 bit number out of any other number, you can do it all in one go. i.e.
uint16_t my10Bit number = anotherNumber & 0x3ff // 3ff is 1111111111 in binary
I used uint16_t because that is the smallest portable type that will contain 10 bits.
Some code for spliting buffer of bytes on couples of bits
unsigned char* splitcouplebits(unsigned char* buffer, int couplecount){
unsigned char* v1 = (unsigned char*)malloc(couplecount);
for (int i = 0; i < couplecount; i++) {
int bn = i / 4;
int offs = 3 + bn * 2 - i;
offs *= 2;
v1[i] = (buffer[bn] >> offs) & 0x03;
}
return v1;
}
//... main
{
unsigned char src[2];
src[0] = 0xBB;
src[1] = 0x13;
//1011101100010011
unsigned char* splits = splitcouplebits((unsigned char*)&src, 8);
//first 5 couples of bits
cout << (int)splits[0] << endl; //10
cout << (int)splits[1] << endl; //11
cout << (int)splits[2] << endl; //10
cout << (int)splits[3] << endl; //11
cout << (int)splits[4] << endl; //00
//....
delete splits;
}
I am trying to print a Russian "ф" (U+0444 CYRILLIC SMALL LETTER EF) character, which is given a code of decimal 1092. Using C++, how can I print out this character? I would have thought something along the lines of the following would work, yet...
int main (){
wchar_t f = '1060';
cout << f << endl;
}
To represent the character you can use Universal Character Names (UCNs). The character 'ф' has the Unicode value U+0444 and so in C++ you could write it '\u0444' or '\U00000444'. Also if the source code encoding supports this character then you can just write it literally in your source code.
// both of these assume that the character can be represented with
// a single char in the execution encoding
char b = '\u0444';
char a = 'ф'; // this line additionally assumes that the source character encoding supports this character
Printing such characters out depends on what you're printing to. If you're printing to a Unix terminal emulator, the terminal emulator is using an encoding that supports this character, and that encoding matches the compiler's execution encoding, then you can do the following:
#include <iostream>
int main() {
std::cout << "Hello, ф or \u0444!\n";
}
This program does not require that 'ф' can be represented in a single char. On OS X and most any modern Linux install this will work just fine, because the source, execution, and console encodings will all be UTF-8 (which supports all Unicode characters).
Things are harder with Windows and there are different possibilities with different tradeoffs.
Probably the best, if you don't need portable code (you'll be using wchar_t, which should really be avoided on every other platform), is to set the mode of the output file handle to take only UTF-16 data.
#include <iostream>
#include <io.h>
#include <fcntl.h>
int main() {
_setmode(_fileno(stdout), _O_U16TEXT);
std::wcout << L"Hello, \u0444!\n";
}
Portable code is more difficult.
When compiling with -std=c++11, one can simply
const char *s = u8"\u0444";
cout << s << endl;
Ultimately, this is completely platform-dependent. Unicode-support is, unfortunately, very poor in Standard C++. For GCC, you will have to make it a narrow string, as they use UTF-8, and Windows wants a wide string, and you must output to wcout.
// GCC
std::cout << "ф";
// Windoze
wcout << L"ф";
This code works in Linux (C++11, geany, g++ 7.4.0):
#include <iostream>
using namespace std;
int utf8_to_unicode(string utf8_code);
string unicode_to_utf8(int unicode);
int main()
{
cout << unicode_to_utf8(36) << '\t';
cout << unicode_to_utf8(162) << '\t';
cout << unicode_to_utf8(8364) << '\t';
cout << unicode_to_utf8(128578) << endl;
cout << unicode_to_utf8(0x24) << '\t';
cout << unicode_to_utf8(0xa2) << '\t';
cout << unicode_to_utf8(0x20ac) << '\t';
cout << unicode_to_utf8(0x1f642) << endl;
cout << utf8_to_unicode("$") << '\t';
cout << utf8_to_unicode("¢") << '\t';
cout << utf8_to_unicode("€") << '\t';
cout << utf8_to_unicode("🙂") << endl;
cout << utf8_to_unicode("\x24") << '\t';
cout << utf8_to_unicode("\xc2\xa2") << '\t';
cout << utf8_to_unicode("\xe2\x82\xac") << '\t';
cout << utf8_to_unicode("\xf0\x9f\x99\x82") << endl;
return 0;
}
int utf8_to_unicode(string utf8_code)
{
unsigned utf8_size = utf8_code.length();
int unicode = 0;
for (unsigned p=0; p<utf8_size; ++p)
{
int bit_count = (p? 6: 8 - utf8_size - (utf8_size == 1? 0: 1)),
shift = (p < utf8_size - 1? (6*(utf8_size - p - 1)): 0);
for (int k=0; k<bit_count; ++k)
unicode += ((utf8_code[p] & (1 << k)) << shift);
}
return unicode;
}
string unicode_to_utf8(int unicode)
{
string s;
if (unicode>=0 and unicode <= 0x7f) // 7F(16) = 127(10)
{
s = static_cast<char>(unicode);
return s;
}
else if (unicode <= 0x7ff) // 7FF(16) = 2047(10)
{
unsigned char c1 = 192, c2 = 128;
for (int k=0; k<11; ++k)
{
if (k < 6) c2 |= (unicode % 64) & (1 << k);
else c1 |= (unicode >> 6) & (1 << (k - 6));
}
s = c1; s += c2;
return s;
}
else if (unicode <= 0xffff) // FFFF(16) = 65535(10)
{
unsigned char c1 = 224, c2 = 128, c3 = 128;
for (int k=0; k<16; ++k)
{
if (k < 6) c3 |= (unicode % 64) & (1 << k);
else if (k < 12) c2 |= (unicode >> 6) & (1 << (k - 6));
else c1 |= (unicode >> 12) & (1 << (k - 12));
}
s = c1; s += c2; s += c3;
return s;
}
else if (unicode <= 0x1fffff) // 1FFFFF(16) = 2097151(10)
{
unsigned char c1 = 240, c2 = 128, c3 = 128, c4 = 128;
for (int k=0; k<21; ++k)
{
if (k < 6) c4 |= (unicode % 64) & (1 << k);
else if (k < 12) c3 |= (unicode >> 6) & (1 << (k - 6));
else if (k < 18) c2 |= (unicode >> 12) & (1 << (k - 12));
else c1 |= (unicode >> 18) & (1 << (k - 18));
}
s = c1; s += c2; s += c3; s += c4;
return s;
}
else if (unicode <= 0x3ffffff) // 3FFFFFF(16) = 67108863(10)
{
; // actually, there are no 5-bytes unicodes
}
else if (unicode <= 0x7fffffff) // 7FFFFFFF(16) = 2147483647(10)
{
; // actually, there are no 6-bytes unicodes
}
else ; // incorrect unicode (< 0 or > 2147483647)
return "";
}
More:
https://sites.google.com/view/technik-informatyk-nysa/porady/porady-c-cpp#h.p_lz0skneGFILy
https://en.wikipedia.org/wiki/UTF-8
If you use Windows (note, we are using printf(), not cout):
//Save As UTF8 without signature
#include <stdio.h>
#include<windows.h>
int main (){
SetConsoleOutputCP(65001);
printf("ф\n");
}
Not Unicode but working - 1251 instead of UTF8:
//Save As Windows 1251
#include <iostream>
#include<windows.h>
using namespace std;
int main (){
SetConsoleOutputCP(1251);
cout << "ф" << endl;
}
'1060' is four characters, and won't compile under the standard. You should just treat the character as a number, if your wide characters match 1:1 with Unicode (check your locale settings).
int main (){
wchar_t f = 1060;
wcout << f << endl;
}
I needed to show the string in UI as well as save that to an xml configuration file. The above specified format is good for string in c++, I would add we can have the xml compatible string for the special character by replacing "\u" by "&#x" and adding a ";" at the end.
For example :
C++ : "\u0444" --> XML : "ф"
In Linux, I can just do:
std::cout << "ф";
I just copy-pasted characters from here and it didn't fail for at least the random sample that I tried on.
Another solution in Linux:
string a = "Ф";
cout << "Ф = \xd0\xa4 = " << hex
<< int(static_cast<unsigned char>(a[0]))
<< int(static_cast<unsigned char>(a[1])) << " (" << a.length() << "B)" << endl;
string b = "√";
cout << "√ = \xe2\x88\x9a = " << hex
<< int(static_cast<unsigned char>(b[0]))
<< int(static_cast<unsigned char>(b[1]))
<< int(static_cast<unsigned char>(b[2])) << " (" << b.length() << "B)" << endl;
Special thanks to the answer here for more-or-less the same question.
For me, all I needed was setlocale(LC_ALL, "en_US.UTF-8");
Then, I could use even raw wchar_t characters.
On Linux, Unicode character (UTF-16 / UTF-32) can be converted to UTF-8 and printed to std::cout. I used these functions.
What is the best way to zip two (or more) numbers' bit representations together in C/C++/Obj-C?
I have num one to three. Their binary representations are [abc, ABC, xyz]. I would like to produce a num with binary [aAxbBycCz]. I'm mainly working with numbers that are over 21 bits.
(Ignoring the limit on integers, endian-ness and whatnot).
Thanks, happy holidays guys :)
A solution that should work for any number of bits:
const unsigned int BITS = 21;
unsigned int zipper(unsigned a0, unsigned a1, unsigned a2)
{
unsigned int result = 0;
for (unsigned int mask = 1<<BITS; mask != 0; mask >>= 1)
{
result |= a0 & mask;
result <<= 1;
result |= a1 & mask;
result <<= 1;
result |= a2 & mask;
}
return result;
}
If you need more speed, do some precalculation:
static unsigned explode[]= { 0, 1, 0x1000, 0x1001, 0x1000000, 0x1000001, 0x1001000, 0x1001001 } ;
unsigned int zipper(unsigned a0, unsigned a1, unsigned a2)
{
return explode[a0] | ( explode[a1] << 1) | ( explode[a2] << 2 ) ;
}
With the usual caveats for out of bounds, etc.
I would just do it by brute force:
unsigned int binaryZip(unsigned int a0, unsigned int a1, unsigned int a2)
{
return ((a0 << 0) & 0x001) |
((a1 << 1) & 0x002) |
((a2 << 2) & 0x004) |
((a0 << 2) & 0x008) |
((a1 << 3) & 0x010) |
((a2 << 4) & 0x020) |
((a0 << 4) & 0x040) |
((a1 << 5) & 0x080) |
((a2 << 6) & 0x100);
}
I just want to ask if my method is correct to convert from little endian to big endian, just to make sure if I understand the difference.
I have a number which is stored in little-endian, here are the binary and hex representations of the number:
0001 0010 0011 0100 0101 0110 0111 1000
12345678
In big-endian format I believe the bytes should be swapped, like this:
1000 0111 0110 0101 0100 0011 0010 0001
87654321
Is this correct?
Also, the code below attempts to do this but fails. Is there anything obviously wrong or can I optimize something? If the code is bad for this conversion can you please explain why and show a better method of performing the same conversion?
uint32_t num = 0x12345678;
uint32_t b0,b1,b2,b3,b4,b5,b6,b7;
uint32_t res = 0;
b0 = (num & 0xf) << 28;
b1 = (num & 0xf0) << 24;
b2 = (num & 0xf00) << 20;
b3 = (num & 0xf000) << 16;
b4 = (num & 0xf0000) << 12;
b5 = (num & 0xf00000) << 8;
b6 = (num & 0xf000000) << 4;
b7 = (num & 0xf0000000) << 4;
res = b0 + b1 + b2 + b3 + b4 + b5 + b6 + b7;
printf("%d\n", res);
OP's sample code is incorrect.
Endian conversion works at the bit and 8-bit byte level. Most endian issues deal with the byte level. OP's code is doing a endian change at the 4-bit nibble level. Recommend instead:
// Swap endian (big to little) or (little to big)
uint32_t num = 9;
uint32_t b0,b1,b2,b3;
uint32_t res;
b0 = (num & 0x000000ff) << 24u;
b1 = (num & 0x0000ff00) << 8u;
b2 = (num & 0x00ff0000) >> 8u;
b3 = (num & 0xff000000) >> 24u;
res = b0 | b1 | b2 | b3;
printf("%" PRIX32 "\n", res);
If performance is truly important, the particular processor would need to be known. Otherwise, leave it to the compiler.
[Edit] OP added a comment that changes things.
"32bit numerical value represented by the hexadecimal representation (st uv wx yz) shall be recorded in a four-byte field as (st uv wx yz)."
It appears in this case, the endian of the 32-bit number is unknown and the result needs to be store in memory in little endian order.
uint32_t num = 9;
uint8_t b[4];
b[0] = (uint8_t) (num >> 0u);
b[1] = (uint8_t) (num >> 8u);
b[2] = (uint8_t) (num >> 16u);
b[3] = (uint8_t) (num >> 24u);
[2016 Edit] Simplification
... The type of the result is that of the promoted left operand.... Bitwise shift operators C11 §6.5.7 3
Using a u after the shift constants (right operands) results in the same as without it.
b3 = (num & 0xff000000) >> 24u;
b[3] = (uint8_t) (num >> 24u);
// same as
b3 = (num & 0xff000000) >> 24;
b[3] = (uint8_t) (num >> 24);
Sorry, my answer is a bit too late, but it seems nobody mentioned built-in functions to reverse byte order, which in very important in terms of performance.
Most of the modern processors are little-endian, while all network protocols are big-endian. That is history and more on that you can find on Wikipedia. But that means our processors convert between little- and big-endian millions of times while we browse the Internet.
That is why most architectures have a dedicated processor instructions to facilitate this task. For x86 architectures there is BSWAP instruction, and for ARMs there is REV. This is the most efficient way to reverse byte order.
To avoid assembly in our C code, we can use built-ins instead. For GCC there is __builtin_bswap32() function and for Visual C++ there is _byteswap_ulong(). Those function will generate just one processor instruction on most architectures.
Here is an example:
#include <stdio.h>
#include <inttypes.h>
int main()
{
uint32_t le = 0x12345678;
uint32_t be = __builtin_bswap32(le);
printf("Little-endian: 0x%" PRIx32 "\n", le);
printf("Big-endian: 0x%" PRIx32 "\n", be);
return 0;
}
Here is the output it produces:
Little-endian: 0x12345678
Big-endian: 0x78563412
And here is the disassembly (without optimization, i.e. -O0):
uint32_t be = __builtin_bswap32(le);
0x0000000000400535 <+15>: mov -0x8(%rbp),%eax
0x0000000000400538 <+18>: bswap %eax
0x000000000040053a <+20>: mov %eax,-0x4(%rbp)
There is just one BSWAP instruction indeed.
So, if we do care about the performance, we should use those built-in functions instead of any other method of byte reversing. Just my 2 cents.
I think you can use function htonl(). Network byte order is big endian.
"I swap each bytes right?" -> yes, to convert between little and big endian, you just give the bytes the opposite order.
But at first realize few things:
size of uint32_t is 32bits, which is 4 bytes, which is 8 HEX digits
mask 0xf retrieves the 4 least significant bits, to retrieve 8 bits, you need 0xff
so in case you want to swap the order of 4 bytes with that kind of masks, you could:
uint32_t res = 0;
b0 = (num & 0xff) << 24; ; least significant to most significant
b1 = (num & 0xff00) << 8; ; 2nd least sig. to 2nd most sig.
b2 = (num & 0xff0000) >> 8; ; 2nd most sig. to 2nd least sig.
b3 = (num & 0xff000000) >> 24; ; most sig. to least sig.
res = b0 | b1 | b2 | b3 ;
You could do this:
int x = 0x12345678;
x = ( x >> 24 ) | (( x << 8) & 0x00ff0000 )| ((x >> 8) & 0x0000ff00) | ( x << 24) ;
printf("value = %x", x); // x will be printed as 0x78563412
One slightly different way of tackling this that can sometimes be useful is to have a union of the sixteen or thirty-two bit value and an array of chars. I've just been doing this when getting serial messages that come in with big endian order, yet am working on a little endian micro.
union MessageLengthUnion
{
uint16_t asInt;
uint8_t asChars[2];
};
Then when I get the messages in I put the first received uint8 in .asChars[1], the second in .asChars[0] then I access it as the .asInt part of the union in the rest of my program.
If you have a thirty-two bit value to store you can have the array four long.
I am assuming you are on linux
Include "byteswap.h" & Use int32_t bswap_32(int32_t argument);
It is logical view, In actual see, /usr/include/byteswap.h
one more suggestion :
unsigned int a = 0xABCDEF23;
a = ((a&(0x0000FFFF)) << 16) | ((a&(0xFFFF0000)) >> 16);
a = ((a&(0x00FF00FF)) << 8) | ((a&(0xFF00FF00)) >>8);
printf("%0x\n",a);
A Simple C program to convert from little to big
#include <stdio.h>
int main() {
unsigned int little=0x1234ABCD,big=0;
unsigned char tmp=0,l;
printf(" Little endian little=%x\n",little);
for(l=0;l < 4;l++)
{
tmp=0;
tmp = little | tmp;
big = tmp | (big << 8);
little = little >> 8;
}
printf(" Big endian big=%x\n",big);
return 0;
}
OP's code is incorrect for the following reasons:
The swaps are being performed on a nibble (4-bit) boundary, instead of a byte (8-bit) boundary.
The shift-left << operations of the final four swaps are incorrect, they should be shift-right >> operations and their shift values would also need to be corrected.
The use of intermediary storage is unnecessary, and the code can therefore be rewritten to be more concise/recognizable. In doing so, some compilers will be able to better-optimize the code by recognizing the oft-used pattern.
Consider the following code, which efficiently converts an unsigned value:
// Swap endian (big to little) or (little to big)
uint32_t num = 0x12345678;
uint32_t res =
((num & 0x000000FF) << 24) |
((num & 0x0000FF00) << 8) |
((num & 0x00FF0000) >> 8) |
((num & 0xFF000000) >> 24);
printf("%0x\n", res);
The result is represented here in both binary and hex, notice how the bytes have swapped:
0111 1000 0101 0110 0011 0100 0001 0010
78563412
Optimizing
In terms of performance, leave it to the compiler to optimize your code when possible. You should avoid unnecessary data structures like arrays for simple algorithms like this, doing so will usually cause different instruction behavior such as accessing RAM instead of using CPU registers.
#include <stdio.h>
#include <inttypes.h>
uint32_t le_to_be(uint32_t num) {
uint8_t b[4] = {0};
*(uint32_t*)b = num;
uint8_t tmp = 0;
tmp = b[0];
b[0] = b[3];
b[3] = tmp;
tmp = b[1];
b[1] = b[2];
b[2] = tmp;
return *(uint32_t*)b;
}
int main()
{
printf("big endian value is %x\n", le_to_be(0xabcdef98));
return 0;
}
You can use the lib functions. They boil down to assembly, but if you are open to alternate implementations in C, here they are (assuming int is 32-bits) :
void byte_swap16(unsigned short int *pVal16) {
//#define method_one 1
// #define method_two 1
#define method_three 1
#ifdef method_one
unsigned char *pByte;
pByte = (unsigned char *) pVal16;
*pVal16 = (pByte[0] << 8) | pByte[1];
#endif
#ifdef method_two
unsigned char *pByte0;
unsigned char *pByte1;
pByte0 = (unsigned char *) pVal16;
pByte1 = pByte0 + 1;
*pByte0 = *pByte0 ^ *pByte1;
*pByte1 = *pByte0 ^ *pByte1;
*pByte0 = *pByte0 ^ *pByte1;
#endif
#ifdef method_three
unsigned char *pByte;
pByte = (unsigned char *) pVal16;
pByte[0] = pByte[0] ^ pByte[1];
pByte[1] = pByte[0] ^ pByte[1];
pByte[0] = pByte[0] ^ pByte[1];
#endif
}
void byte_swap32(unsigned int *pVal32) {
#ifdef method_one
unsigned char *pByte;
// 0x1234 5678 --> 0x7856 3412
pByte = (unsigned char *) pVal32;
*pVal32 = ( pByte[0] << 24 ) | (pByte[1] << 16) | (pByte[2] << 8) | ( pByte[3] );
#endif
#if defined(method_two) || defined (method_three)
unsigned char *pByte;
pByte = (unsigned char *) pVal32;
// move lsb to msb
pByte[0] = pByte[0] ^ pByte[3];
pByte[3] = pByte[0] ^ pByte[3];
pByte[0] = pByte[0] ^ pByte[3];
// move lsb to msb
pByte[1] = pByte[1] ^ pByte[2];
pByte[2] = pByte[1] ^ pByte[2];
pByte[1] = pByte[1] ^ pByte[2];
#endif
}
And the usage is performed like so:
unsigned short int u16Val = 0x1234;
byte_swap16(&u16Val);
unsigned int u32Val = 0x12345678;
byte_swap32(&u32Val);
Below is an other approach that was useful for me
convertLittleEndianByteArrayToBigEndianByteArray (byte littlendianByte[], byte bigEndianByte[], int ArraySize){
int i =0;
for(i =0;i<ArraySize;i++){
bigEndianByte[i] = (littlendianByte[ArraySize-i-1] << 7 & 0x80) | (littlendianByte[ArraySize-i-1] << 5 & 0x40) |
(littlendianByte[ArraySize-i-1] << 3 & 0x20) | (littlendianByte[ArraySize-i-1] << 1 & 0x10) |
(littlendianByte[ArraySize-i-1] >>1 & 0x08) | (littlendianByte[ArraySize-i-1] >> 3 & 0x04) |
(littlendianByte[ArraySize-i-1] >>5 & 0x02) | (littlendianByte[ArraySize-i-1] >> 7 & 0x01) ;
}
}
Below program produce the result as needed:
#include <stdio.h>
unsigned int Little_To_Big_Endian(unsigned int num);
int main( )
{
int num = 0x11223344 ;
printf("\n Little_Endian = 0x%X\n",num);
printf("\n Big_Endian = 0x%X\n",Little_To_Big_Endian(num));
}
unsigned int Little_To_Big_Endian(unsigned int num)
{
return (((num >> 24) & 0x000000ff) | ((num >> 8) & 0x0000ff00) | ((num << 8) & 0x00ff0000) | ((num << 24) & 0xff000000));
}
And also below function can be used:
unsigned int Little_To_Big_Endian(unsigned int num)
{
return (((num & 0x000000ff) << 24) | ((num & 0x0000ff00) << 8 ) | ((num & 0x00ff0000) >> 8) | ((num & 0xff000000) >> 24 ));
}
#include<stdio.h>
int main(){
int var = 0X12345678;
var = ((0X000000FF & var)<<24)|
((0X0000FF00 & var)<<8) |
((0X00FF0000 & var)>>8) |
((0XFF000000 & var)>>24);
printf("%x",var);
}
Here is a little function I wrote that works pretty good, its probably not portable to every single machine or as fast a single cpu instruction, but should work for most. It can handle numbers up to 32 byte (256 bit) and works for both big and little endian swaps. The nicest part about this function is you can point it into a byte array coming off or going on the wire and swap the bytes inline before converting.
#include <stdio.h>
#include <string.h>
void byteSwap(char**,int);
int main() {
//32 bit
int test32 = 0x12345678;
printf("\n BigEndian = 0x%X\n",test32);
char* pTest32 = (char*) &test32;
//convert to little endian
byteSwap((char**)&pTest32, 4);
printf("\n LittleEndian = 0x%X\n", test32);
//64 bit
long int test64 = 0x1234567891234567LL;
printf("\n BigEndian = 0x%lx\n",test64);
char* pTest64 = (char*) &test64;
//convert to little endian
byteSwap((char**)&pTest64,8);
printf("\n LittleEndian = 0x%lx\n",test64);
//back to big endian
byteSwap((char**)&pTest64,8);
printf("\n BigEndian = 0x%lx\n",test64);
return 0;
}
void byteSwap(char** src,int size) {
int x = 0;
char b[32];
while(size-- >= 0) { b[x++] = (*src)[size]; };
memcpy(*src,&b,x);
}
output:
$gcc -o main *.c -lm
$main
BigEndian = 0x12345678
LittleEndian = 0x78563412
BigEndian = 0x1234567891234567
LittleEndian = 0x6745239178563412
BigEndian = 0x1234567891234567