Im writing a go application that listens for UDP packets over a network and parses them.
the udp packets are written in C and the struct definition (according to their docs) is something like the below. (Please understand that new to C and networking in general)
typedef struct foo
{
int code;
char seg[10];
char sym[25];
short type;
long amtToday;
long price;
...etc
} foo;
A sample network packet is something like the below
[233 3 0 0 99 100 101 95 102 111 0 0 0 0 55 52 51 57 0 69 69 68 49 48 50 48 74 65 78 50 48 50 49 0 0 58 254 127 0 0 1 0 166 58 254 127 0 0 255 255 255 255 255 255 255 255 32 232 141 0 0 0 0 0 0 135 166 58 254 127 0 0 ... etc]
in short, im having trouble getting the right values after the sym field.
i read up a bit about struct alignment in C and guessing that im ignoring the padded values. But im a bit confused as to where the padding occurs
is it this
typedef struct foo
{
int code;
char seg[10];
**char pad[6];**
char sym[25];
**char pad[7];**
short type;
long amtToday;
long price;
...etc
} foo;
i.e padding is added after each char field
or is it more like this
typedef struct foo
{
int code;
char seg[10];
char sym[25];
**char pad[1];**
short type;
long amtToday;
long price;
...etc
} foo;
the problem is that there is no way for me to determine if either of these are right at my end. I need to parse the entire struct before confirming - but unable to do so because of the padding issue
or am i heading in the wrong direction parsing this packet?
The best way to convince yourself of how it works is probably to write C-code that computes the offsets:
#include <stdio.h>
typedef struct foo
{
int code;
char seg[10];
char sym[25];
short type;
long amtToday;
long price;
} foo;
int main() {
// What are the memory offsets between individual struct members?
foo x;
printf(
"code: %ld, seg: %ld, sym: %ld, type: %ld, amtToday: %ld, price: %ld\n",
(long)&x.code - (long)&x,
(long)&x.seg - (long)&x,
(long)&x.sym - (long)&x,
(long)&x.type - (long)&x,
(long)&x.amtToday - (long)&x,
(long)&x.price - (long)&x
);
// How much space does the struct take up if we create an array for it?
foo y[2];
printf("offset: %ld\n", (long)&y[1] - (long)&y[0]);
return 0;
}
Output:
code: 0, seg: 4, sym: 14, type: 40, amtToday: 48, price: 56
offset: 64
The offsets can depend on architecture and the compiler being used. If you are able to edit the C-program, adding explicit padding to the struct might be the best way to guarantee the same offsets on 32-bit and 64-bit systems.
Related
I transfered this C struct:
85 struct spi_ioc_transfer {
86 __u64 tx_buf;
87 __u64 rx_buf;
88
89 __u32 len;
90 __u32 speed_hz;
91
92 __u16 delay_usecs;
93 __u8 bits_per_word;
94 __u8 cs_change;
95 __u32 pad;
96
97 /* If the contents of 'struct spi_ioc_transfer' ever change
98 * incompatibly, then the ioctl number (currently 0) must change;
99 * ioctls with constant size fields get a bit more in the way of
100 * error checking than ones (like this) where that field varies.
101 *
102 * NOTE: struct layout is the same in 64bit and 32bit userspace.
103 */
104 };
to Rust:
struct SpiIocTransfer {
tx_buf: u64,
rx_buf: u64,
len: u32,
speed_hz: u32,
delay_usecs: u16,
bits_per_word: u8,
cs_change: u8,
pad: u32
/* If the contents of 'struct spi_ioc_transfer' ever change
* incompatibly, then the ioctl number (currently 0) must change;
* ioctls with constant size fields get a bit more in the way of
* error checking than ones (like this) where that field varies.
*
* NOTE: struct layout is the same in 64bit and 32bit userspace.
*/
}
However, I'm having problems in filling tx_buf and rx_buf:
C version:
int wiringPiSPIDataRW (int channel, unsigned char *data, int len)
{
struct spi_ioc_transfer spi ;
channel &= 1 ;
memset (&spi, 0, sizeof (spi)) ;
spi.tx_buf = (unsigned long)data ;
spi.rx_buf = (unsigned long)data ;
spi.len = len ;
My Rust version:
pub fn spi_data_rw(&mut self, channel_: i32, data: &[u8], len: u32) -> i32 {
let channel = channel_ & 1;
let spi = SpiIocTransfer::new();
spi.tx_buf = data.as_mut_ptr();//(unsigned long) ?
spi.rx_buf = data.as_mut_ptr();//(unsigned long) ?
Error:
error[E0308]: mismatched types
--> src/w25q.rs:68:29
|
68 | spi.rx_buf = data.as_mut_ptr();//(unsigned long) ?
| ^^^^^^^^^^^^^^^^^ expected `u64`, found *-ptr
|
= note: expected type `u64`
found raw pointer `*mut u8`
How do I get the address of the data slice as an u64?
I'm attempting to recreate the wc command in c and having issues getting the proper number of words in any file containing machine code (core files or compiled c). The number of logged words always comes up around 90% short of the amount returned by wc.
For reference here is the project info
Compile statement
gcc -ggdb wordCount.c -o wordCount -std=c99
wordCount.c
/*
* Author(s) - Colin McGrath
* Description - Lab 3 - WC LINUX
* Date - January 28, 2015
*/
#include<stdio.h>
#include<string.h>
#include<dirent.h>
#include<sys/stat.h>
#include<ctype.h>
struct counterStruct {
int newlines;
int words;
int bt;
};
typedef struct counterStruct ct;
ct totals = {0};
struct stat st;
void wc(ct counter, char *arg)
{
printf("%6lu %6lu %6lu %s\n", counter.newlines, counter.words, counter.bt, arg);
}
void process(char *arg)
{
lstat(arg, &st);
if (S_ISDIR(st.st_mode))
{
char message[4056] = "wc: ";
strcat(message, arg);
strcat(message, ": Is a directory\n");
printf(message);
ct counter = {0};
wc(counter, arg);
}
else if (S_ISREG(st.st_mode))
{
FILE *file;
file = fopen(arg, "r");
ct currentCount = {0};
if (file != NULL)
{
char holder[65536];
while (fgets(holder, 65536, file) != NULL)
{
totals.newlines++;
currentCount.newlines++;
int c = 0;
for (int i=0; i<strlen(holder); i++)
{
if (isspace(holder[i]))
{
if (c != 0)
{
totals.words++;
currentCount.words++;
c = 0;
}
}
else
c = 1;
}
}
}
currentCount.bt = st.st_size;
totals.bt = totals.bt + st.st_size;
wc(currentCount, arg);
}
}
int main(int argc, char *argv[])
{
if (argc > 1)
{
for (int i=1; i<argc; i++)
{
//printf("%s\n", argv[i]);
process(argv[i]);
}
}
wc(totals, "total");
return 0;
}
Sample wc output:
135 742 360448 /home/cpmcgrat/53/labs/lab-2/core.22321
231 1189 192512 /home/cpmcgrat/53/labs/lab-2/core.26554
5372 40960 365441 /home/cpmcgrat/53/labs/lab-2/file
24 224 12494 /home/cpmcgrat/53/labs/lab-2/frequency
45 116 869 /home/cpmcgrat/53/labs/lab-2/frequency.c
5372 40960 365441 /home/cpmcgrat/53/labs/lab-2/lineIn
12 50 1013 /home/cpmcgrat/53/labs/lab-2/lineIn2
0 0 0 /home/cpmcgrat/53/labs/lab-2/lineOut
39 247 11225 /home/cpmcgrat/53/labs/lab-2/parseURL
138 318 2151 /home/cpmcgrat/53/labs/lab-2/parseURL.c
41 230 10942 /home/cpmcgrat/53/labs/lab-2/roman
66 162 1164 /home/cpmcgrat/53/labs/lab-2/roman.c
13 13 83 /home/cpmcgrat/53/labs/lab-2/romanIn
13 39 169 /home/cpmcgrat/53/labs/lab-2/romanOut
7 6 287 /home/cpmcgrat/53/labs/lab-2/URLs
11508 85256 1324239 total
Sample rebuild output (./wordCount):
139 76 360448 /home/cpmcgrat/53/labs/lab-2/core.22321
233 493 192512 /home/cpmcgrat/53/labs/lab-2/core.26554
5372 40960 365441 /home/cpmcgrat/53/labs/lab-2/file
25 3 12494 /home/cpmcgrat/53/labs/lab-2/frequency
45 116 869 /home/cpmcgrat/53/labs/lab-2/frequency.c
5372 40960 365441 /home/cpmcgrat/53/labs/lab-2/lineIn
12 50 1013 /home/cpmcgrat/53/labs/lab-2/lineIn2
0 0 0 /home/cpmcgrat/53/labs/lab-2/lineOut
40 6 11225 /home/cpmcgrat/53/labs/lab-2/parseURL
138 318 2151 /home/cpmcgrat/53/labs/lab-2/parseURL.c
42 3 10942 /home/cpmcgrat/53/labs/lab-2/roman
66 162 1164 /home/cpmcgrat/53/labs/lab-2/roman.c
13 13 83 /home/cpmcgrat/53/labs/lab-2/romanIn
13 39 169 /home/cpmcgrat/53/labs/lab-2/romanOut
7 6 287 /home/cpmcgrat/53/labs/lab-2/URLs
11517 83205 1324239 total
Notice the difference in the word count (second int) from the first two files (core files) as well as the roman file and parseURL files (machine code, no extension).
C strings do not store their length. They are terminated by a single NUL (0) byte.
Consequently, strlen needs to scan the entire string, character by character, until it reaches the NUL. That makes this:
for (int i=0; i<strlen(holder); i++)
desperately inefficient: for every character in holder, it needs to count all the characters in holder in order to test whether i is still in range. That transforms a simple linear Θ(N) algorithm into an Θ(N2) cycle-burner.
But in this case, it also produces the wrong result, since binary files typically include lots of NUL characters. Since strlen will actually tell you where the first NUL is, rather than how long the "line" is, you'll end up skipping a lot of bytes in the file. (On the bright side, that makes the scan quadratically faster, but computing the wrong result more rapidly is not really a win.)
You cannot use fgets to read binary files because the fgets interface doesn't tell you how much it read. You can use the Posix 2008 getline interface instead, or you can do binary input with fread, which is more efficient but will force you to count newlines yourself. (Not the worst thing in the world; you seem to be getting that count wrong, too.)
Or, of course, you could read the file one character at a time with fgetc. For a school exercise, that's not a bad solution; the resulting code is easy to write and understand, and typical implementations of fgetc are more efficient than the FUD would indicate.
I find myself writing a simple program to extract data from a bmp file. I just got started and I am at one of those WTF moments.
When I run the program and supply this image: http://www.hack4fun.org/h4f/sites/default/files/bindump/lena.bmp
I get the output:
type: 19778
size: 12
res1: 0
res2: 54
offset: 2621440
The actual image size is 786,486 bytes. Why is my code reporting 12 bytes?
The header format specified in,
http://en.wikipedia.org/wiki/BMP_file_format matches my BMP_FILE_HEADER structure. So why is it getting filled with wrong information?
The image file doesn't appear to be corrupt and other images are giving equally wrong outputs. What am I missing?
#include <stdio.h>
#include <stdlib.h>
typedef struct {
unsigned short type;
unsigned int size;
unsigned short res1;
unsigned short res2;
unsigned int offset;
} BMP_FILE_HEADER;
int main (int args, char ** argv) {
char *file_name = argv[1];
FILE *fp = fopen(file_name, "rb");
BMP_FILE_HEADER file_header;
fread(&file_header, sizeof(BMP_FILE_HEADER), 1, fp);
if (file_header.type != 'MB') {
printf("ERROR: not a .bmp");
return 1;
}
printf("type: %i\nsize: %i\nres1: %i\nres2: %i\noffset: %i\n", file_header.type, file_header.size, file_header.res1, file_header.res2, file_header.offset);
fclose(fp);
return 0;
}
Here the header in hex:
0000000 42 4d 36 00 0c 00 00 00 00 00 36 00 00 00 28 00
0000020 00 00 00 02 00 00 00 02 00 00 01 00 18 00 00 00
The length field is the bytes 36 00 0c 00`, which is in intel order; handled as a 32-bit value, it is 0x000c0036 or decimal 786,486 (which matches the saved file size).
Probably your C compiler is aligning each field to a 32-bit boundary. Enable a pack structure option, pragma, or directive.
There are two mistakes I could find in your code.
First mistake: You have to pack the structure to 1, so every type size is exactly the size its meant to be, so the compiler doesn't align it for example in 4 bytes alignment. So in your code, short, instead of being 2 bytes, it was 4 bytes. The trick for this, is using a compiler directive for packing the nearest struct:
#pragma pack(1)
typedef struct {
unsigned short type;
unsigned int size;
unsigned short res1;
unsigned short res2;
unsigned int offset;
} BMP_FILE_HEADER;
Now it should be aligned properly.
The other mistake is in here:
if (file_header.type != 'MB')
You are trying to check a short type, which is 2 bytes, with a char type (using ''), which is 1 byte. Probably the compiler is giving you a warning about that, it's canonical that single quotes contain just 1 character with 1-byte size.
To get this around, you can divide this 2 bytes into 2 1-byte characters, which are known (M and B), and put them together into a word. For example:
if (file_header.type != (('M' << 8) | 'B'))
If you see this expression, this will happen:
'M' (which is 0x4D in ASCII) shifted 8 bits to the left, will result in 0x4D00, now you can just add or or the next character to the right zeroes: 0x4D00 | 0x42 = 0x4D42 (where 0x42 is 'B' in ASCII). Thinking like this, you could just write:
if (file_header.type != 0x4D42)
Then your code should work.
Is it possible to generate a specific permutation of an array with a macro in C?
i.e. If I have an array X with elements:
0 1 2 3 4 5
x = ["0","1","1","0","1","0"]
I was thinking there may be some macro foo for something like this:
#define S_2Permute(x) = [x[5], x[3], x[4], x[2], x[1]]
where I redefine the order of the array, so the element in the original position 5 is now in position 0.
Any ideas?
EXAMPLE USE
I am starting to create an implementation of the DES encryption algorithm. DES requires several permutation/expansions where I would have to re-order all of the elements in the array, sometimes shrinking the array and sometimes expanding it. I was hoping to just be able to define a macro to permute the arrays for me.
EDIT2
Well in DES the first step is something called the initial permutation. So initially I have some 64-bit key, which for this example can be 0-15 hex:
0123456789ABCDEF
which expands to:
0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
The IP (initial permutation) would permute this string so that every element in the array would be in a new position:
IP =
58 50 42 34 26 18 10 2
60 52 44 36 28 20 12 4
62 54 46 38 30 22 14 6
64 56 48 40 32 24 16 8
57 49 41 33 25 17 9 1
59 51 43 35 27 19 11 3
61 53 45 37 29 21 13 5
63 55 47 39 31 23 15 7
So the new 1st element in the bitstring would be the 58th element(bit) from the original bitstring.
So I would have all of these bits stored in an array of characters:
x = [0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,0,1,0,1,1,0,0,
1,1,1,1,0,0,0,1,0,0,1,1,0,1,0,1,0,1,1,1,1,0,0,1,1,0,1,1,1,1,0,1,1,1,1]
and then just call
IP_PERMUTE(x);
And macro magic will have moved all of the bits into the new correct positions.
Absolutely - you're almost there with your example. Try this:
#define S_2Permute(x) {x[5], x[3], x[4], x[2], x[1]}
Then later:
int x[] = {1,2,3,4,5,6};
int y[] = S_2Permute(x); // y is now {6,4,5,3,2}
Two things to remember:
1) in C, arrays are numbered from 0, so it's possible you meant:
#define S_2Permute(x) {x[4], x[2], x[3], x[1], x[0]}
2) If you're using gcc, you can compile with -E to see the output from the preprocessor (very good for debugging macro expansions).
However, I don't think I'd actually do it this way - I'd say the code will be easier to read (and potentially less error prone) if you generate the permutations programmatically - and I doubt that it'll be a large performance hit.
Since you say you're having trouble compiling this, here's a test program that works for me in gcc 4.6.1:
#include <stdio.h>
#define S_2Permute(x) {x[5], x[3], x[4], x[2], x[1]}
int main(void) {
int x[] = {1,2,3,4,5,6};
int y[] = S_2Permute(x);
for(int i = 0; i < 5; i++) {
printf("%d,",y[i]);
}
printf("\n");
}
I compiled with gcc test.c -std=c99 -Wall
I'm new so apologies if it's not ok to offer a different means of solution but have you considered using an inline function instead of a macro?
I love single lines of code that do a lot as much as the next guy, but it makes more sense to me to do it this way:
//I would have an array that defined how I wanted to swap the positions, I'll assume 5 elements
short reordering[5] = {4,1,3,2,0};
inline void permuteArray(char array[]) {
char swap = array[reordering[0]];
array[reordering[0]] = array[reordinger[1]];
array[reordering[1]] = array[reordinger[2]];
array[reordering[2]] = array[reordinger[3]];
array[reordering[3]] = array[reordinger[4]];
array[reordering[4]] = swap;
}
This may not be as pretty or efficient as a macro, but it could save you some headaches managing and maintaining your code (and could always be swapped for the macro version Timothy suggest.
I am doing something vary similar. This is my code. The variable that comes in is a ulong, so then i convert it to a bit array and then rearrange all the bits and then turn it back into a ulong.
public override ulong Permutation(ulong input, int[] permuation)
{
byte[] test = BitConverter.GetBytes(input);
BitArray test2 = new BitArray(test);
BitArray final = new BitArray(test);
ulong x = 0;
ulong y = 1;
for (int i = 0; i < permuation.Length; i++)
{
final[i] = test2[(permuation[i]-1)];
}
for (int i = 0; i < final.Length; i++)
{
if (final[i] == true)
{
x += (1 * y);
}
else
{
x += (0 * y);
}
y = y * 2;
}
return x;
}
Im trying to parse a bmp file with fread() and when I begin to parse, it reverses the order of my bytes.
typedef struct{
short magic_number;
int file_size;
short reserved_bytes[2];
int data_offset;
}BMPHeader;
...
BMPHeader header;
...
The hex data is 42 4D 36 00 03 00 00 00 00 00 36 00 00 00;
I am loading the hex data into the struct by fread(&header,14,1,fileIn);
My problem is where the magic number should be 0x424d //'BM' fread() it flips the bytes to be 0x4d42 // 'MB'
Why does fread() do this and how can I fix it;
EDIT: If I wasn't specific enough, I need to read the whole chunk of hex data into the struct not just the magic number. I only picked the magic number as an example.
This is not the fault of fread, but of your CPU, which is (apparently) little-endian. That is, your CPU treats the first byte in a short value as the low 8 bits, rather than (as you seem to have expected) the high 8 bits.
Whenever you read a binary file format, you must explicitly convert from the file format's endianness to the CPU's native endianness. You do that with functions like these:
/* CHAR_BIT == 8 assumed */
uint16_t le16_to_cpu(const uint8_t *buf)
{
return ((uint16_t)buf[0]) | (((uint16_t)buf[1]) << 8);
}
uint16_t be16_to_cpu(const uint8_t *buf)
{
return ((uint16_t)buf[1]) | (((uint16_t)buf[0]) << 8);
}
You do your fread into an uint8_t buffer of the appropriate size, and then you manually copy all the data bytes over to your BMPHeader struct, converting as necessary. That would look something like this:
/* note adjustments to type definition */
typedef struct BMPHeader
{
uint8_t magic_number[2];
uint32_t file_size;
uint8_t reserved[4];
uint32_t data_offset;
} BMPHeader;
/* in general this is _not_ equal to sizeof(BMPHeader) */
#define BMP_WIRE_HDR_LEN (2 + 4 + 4 + 4)
/* returns 0=success, -1=error */
int read_bmp_header(BMPHeader *hdr, FILE *fp)
{
uint8_t buf[BMP_WIRE_HDR_LEN];
if (fread(buf, 1, sizeof buf, fp) != sizeof buf)
return -1;
hdr->magic_number[0] = buf[0];
hdr->magic_number[1] = buf[1];
hdr->file_size = le32_to_cpu(buf+2);
hdr->reserved[0] = buf[6];
hdr->reserved[1] = buf[7];
hdr->reserved[2] = buf[8];
hdr->reserved[3] = buf[9];
hdr->data_offset = le32_to_cpu(buf+10);
return 0;
}
You do not assume that the CPU's endianness is the same as the file format's even if you know for a fact that right now they are the same; you write the conversions anyway, so that in the future your code will work without modification on a CPU with the opposite endianness.
You can make life easier for yourself by using the fixed-width <stdint.h> types, by using unsigned types unless being able to represent negative numbers is absolutely required, and by not using integers when character arrays will do. I've done all these things in the above example. You can see that you need not bother endian-converting the magic number, because the only thing you need to do with it is test magic_number[0]=='B' && magic_number[1]=='M'.
Conversion in the opposite direction, btw, looks like this:
void cpu_to_le16(uint8_t *buf, uint16_t val)
{
buf[0] = (val & 0x00FF);
buf[1] = (val & 0xFF00) >> 8;
}
void cpu_to_be16(uint8_t *buf, uint16_t val)
{
buf[0] = (val & 0xFF00) >> 8;
buf[1] = (val & 0x00FF);
}
Conversion of 32-/64-bit quantities left as an exercise.
I assume this is an endian issue. i.e. You are putting the bytes 42 and 4D into your short value. But your system is little endian (I could have the wrong name), which actually reads the bytes (within a multi-byte integer type) left to right instead of right to left.
Demonstrated in this code:
#include <stdio.h>
int main()
{
union {
short sval;
unsigned char bval[2];
} udata;
udata.sval = 1;
printf( "DEC[%5hu] HEX[%04hx] BYTES[%02hhx][%02hhx]\n"
, udata.sval, udata.sval, udata.bval[0], udata.bval[1] );
udata.sval = 0x424d;
printf( "DEC[%5hu] HEX[%04hx] BYTES[%02hhx][%02hhx]\n"
, udata.sval, udata.sval, udata.bval[0], udata.bval[1] );
udata.sval = 0x4d42;
printf( "DEC[%5hu] HEX[%04hx] BYTES[%02hhx][%02hhx]\n"
, udata.sval, udata.sval, udata.bval[0], udata.bval[1] );
return 0;
}
Gives the following output
DEC[ 1] HEX[0001] BYTES[01][00]
DEC[16973] HEX[424d] BYTES[4d][42]
DEC[19778] HEX[4d42] BYTES[42][4d]
So if you want to be portable you will need to detect the endian-ness of your system and then do a byte shuffle if required. There will be plenty of examples round the internet of swapping the bytes around.
Subsequent question:
I ask only because my file size is 3 instead of 196662
This is due to memory alignment issues. 196662 is the bytes 36 00 03 00 and 3 is the bytes 03 00 00 00. Most systems need types like int etc to not be split over multiple memory words. So intuitively you think your struct is laid out im memory like:
Offset
short magic_number; 00 - 01
int file_size; 02 - 05
short reserved_bytes[2]; 06 - 09
int data_offset; 0A - 0D
BUT on a 32 bit system that means files_size has 2 bytes in the same word as magic_number and two bytes in the next word. Most compilers will not stand for this, so the way the structure is laid out in memory is actually like:
short magic_number; 00 - 01
<<unused padding>> 02 - 03
int file_size; 04 - 07
short reserved_bytes[2]; 08 - 0B
int data_offset; 0C - 0F
So when you read your byte stream in the 36 00 is going into your padding area which leaves your file_size as getting the 03 00 00 00. Now if you used fwrite to create this data it should have been OK as the padding bytes would have been written out. But if your input is always going to be in the format you have specified it is not appropriate to read the whole struct as one with fread. Instead you will need to read each of the elements individually.
Writing a struct to a file is highly non-portable -- it's safest to just not try to do it at all. Using a struct like this is guaranteed to work only if a) the struct is both written and read as a struct (never a sequence of bytes) and b) it's always both written and read on the same (type of) machine. Not only are there "endian" issues with different CPUs (which is what it seems you've run into), there are also "alignment" issues. Different hardware implementations have different rules about placing integers only on even 2-byte or even 4-byte or even 8-byte boundaries. The compiler is fully aware of all this, and inserts hidden padding bytes into your struct so it always works right. But as a result of the hidden padding bytes, it's not at all safe to assume a struct's bytes are laid out in memory like you think they are. If you're very lucky, you work on a computer that uses big-endian byte order and has no alignment restrictions at all, so you can lay structs directly over files and have it work. But you're probably not that lucky -- certainly programs that need to be "portable" to different machines have to avoid trying to lay structs directly over any part of any file.