Find memory address in hex string in kernel module - c

I'm writing a kernel module to find the the memory address of do_debug (0xffffffff8134f709) by first searching for the hex bytes next to the address. I'm not sure I am using the correct hex bytes: "\xe8\x61\x07\x00\x00" (I wish to stick to C and not assembly.)
struct desc_ptr idt_register;
store_idt(&idt_register);
printk("idt_register.address: %lx\n", idt_register.address); // same as in /boot/System.map*
gate_desc *idt_table = (gate_desc *)idt_register.address;
unsigned char *debug = (unsigned char *)gate_offset(idt_table[0x1]);
printk("debug: %lx\n", (unsigned long)debug); // same as in /boot/System.map*
int count = 0;
while(count < 150){
if( (*(debug) == 0xe8) && (*(debug + 1) == 0x61) && (*(debug + 2) == 0x07) && (*(debug + 3) == 0x00) && (*(debug + 4) == 0x00) ){
debug += 5;
unsigned long *do_debug = (unsigned long *)(0xffffffff00000000 | *((unsigned long *)(debug)));
if((unsigned long)do_debug != 0xffffffff8134f709){
printk("do_debug: %lx\n", (unsigned long)do_debug); // wrong address !
return;
}
break;
}
debug++;
count++;
}
gdb:
gdb ./vmlinux-3.2.0-4-amd64
...
(gdb) info line debug
Line 54 of "/build/linux-s5x2oE/linux-3.2.46/drivers/pci/hotplug/pci_hotplug_core.c" is at address 0xffffffff811cf02b <power_read_file+2> but contains no code.
Line 1322 of "/build/linux-s5x2oE/linux-3.2.46/arch/x86/kernel/entry_64.S"
starts at address 0xffffffff8134ef80 and ends at 0xffffffff8134efc0.
(gdb) disas 0xffffffff8134ef80,0xffffffff8134efc0
Dump of assembler code from 0xffffffff8134ef80 to 0xffffffff8134efc0:
0xffffffff8134ef80: callq *0x2c687a(%rip) # 0xffffffff81615800
0xffffffff8134ef86: pushq $0xffffffffffffffff
0xffffffff8134ef88: sub $0x78,%rsp
0xffffffff8134ef8c: callq 0xffffffff8134ed40
0xffffffff8134ef91: mov %rsp,%rdi
0xffffffff8134ef94: xor %esi,%esi
0xffffffff8134ef96: subq $0x1000,%gs:0x1137c
0xffffffff8134efa3: callq 0xffffffff8134f709 <do_debug>
0xffffffff8134efa8: addq $0x1000,%gs:0x1137c
0xffffffff8134efb5: jmpq 0xffffffff8134f160
0xffffffff8134efba: nopw 0x0(%rax,%rax,1)
End of assembler dump.
(gdb) x/i 0xffffffff8134efa3
0xffffffff8134efa3: callq 0xffffffff8134f709 <do_debug>
(gdb) x/xw 0xffffffff8134efa3
0xffffffff8134efa3: 0x000761e8
(gdb)
readelf:
ffffffff8134ef96: 65 48 81 2c 25 7c 13 subq $0x1000,%gs:0x1137c
ffffffff8134ef9d: 01 00 00 10 00 00
ffffffff8134efa3: e8 61 07 00 00 callq ffffffff8134f709 <do_debug>
ffffffff8134efa8: 65 48 81 04 25 7c 13 addq $0x1000,%gs:0x1137c
ffffffff8134efaf: 01 00 00 10 00 00
EDIT:
(gdb) print do_debug
$1 = {void (struct pt_regs *, long int)} 0xffffffff8134f709 <do_debug>
(gdb)

I am not familiar with linux kernel.
In the while statement, when you go into the first if statement, the debug then point to ffffffff8134efa8
ffffffff8134efa8: 65 48 81 04 25 7c 13 addq $0x1000,%gs:0x1137c # debug point here
ffffffff8134efaf: 01 00 00 10 00 00
the next statement
unsigned long *do_debug = (unsigned long *)(0xffffffff00000000 |
*((unsigned long *)(debug)));
will get this result.
do_debug = (unsigned long *)(0xffffffff00000000 | 0x01137c2504814865) = 0xffffffff04814865
if you want to get the do_debug address, you should do this:
if( (*(debug) == 0xe8) && (*(debug + 1) == 0x61) && (*(debug + 2) == 0x07) && (*(debug + 3) == 0x00) && (*(debug + 4) == 0x00) ){
//debug += 5;
uint32_t offset = ntohl(*(unsigned int *)(debug+1)); //get the do_debug function offset
unsigned long *do_debug = (debug+5)+offset; // next_code_instruction + offset
//debug+5 point to ffffffff8134efa8
//offset is 0x761
//so ffffffff8134efa8+0x761 = 0xffffffff8134f709
if((unsigned long)do_debug != 0xffffffff8134f709){
printk("do_debug: %lx\n", (unsigned long)do_debug); // wrong address !
return;
}
break;
}

Related

Why final pointer is being aligned to size of int?

Here's the code under consideration:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
char buffer[512];
int pos;
int posf;
int i;
struct timeval *tv;
int main(int argc, char **argv)
{
pos = 0;
for (i = 0; i < 512; i++) buffer[i] = 0;
for (i = 0; i < 4; i++)
{
printf("pos = %d\n", pos);
*(int *)(buffer + pos + 4) = 0x12345678;
pos += 9;
}
for (i = 0; i < 9 * 4; i++)
{
printf(" %02X", (int)(unsigned char)*(buffer + i));
if ((i % 9) == 8) printf("\n");
}
printf("\n");
// ---
pos = 0;
for (i = 0; i < 512; i++) buffer[i] = 0;
*(int *)(buffer + 4) = 0x12345678;
*(int *)(buffer + 9 + 4) = 0x12345678;
*(int *)(buffer + 18 + 4) = 0x12345678;
*(int *)(buffer + 27 + 4) = 0x12345678;
for (i = 0; i < 9 * 4; i++)
{
printf(" %02X", (int)(unsigned char)*(buffer + i));
if ((i % 9) == 8) printf("\n");
}
printf("\n");
return 0;
}
And the output of code is
pos = 0
pos = 9
pos = 18
pos = 27
00 00 00 00 78 56 34 12 00
00 00 00 78 56 34 12 00 00
00 00 78 56 34 12 00 00 00
00 78 56 34 12 00 00 00 00
00 00 00 00 78 56 34 12 00
00 00 00 00 78 56 34 12 00
00 00 00 00 78 56 34 12 00
00 00 00 00 78 56 34 12 00
I can not get why
*(int *)(buffer + pos + 4) = 0x12345678;
is being placed into the address aligned to size of int (4 bytes). I expect the following actions during the execution of this command:
pointer to buffer, which is char*, increased by the value of pos (0, 9, 18, 27) and then increased by 4. The resulting pointer is char* pointing to char array index [pos + 4];
char* pointer in the brackets is being converted to the int*, causing resulting pointer addressing integer of 4 bytes size at base location (buffer + pos + 4) and integer array index [0];
resulting int* location is being stored with bytes 78 56 34 12 in this order (little endian system).
Instead I see pointer in brackets being aligned to size of int (4 bytes), however direct addressing using constants (see second piece of code) works properly as expected.
target CPU is i.MX287 (ARM9);
target operating system is OpenWrt Linux [...] 3.18.29 #431 Fri Feb 11 15:57:31 2022 armv5tejl GNU/Linux;
compiled on Linux [...] 4.15.0-142-generic #146~16.04.1-Ubuntu SMP Tue Apr 13 09:27:15 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux, installed in Virtual machine;
GCC compiler version gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609
I compile as a part of whole system image compilation, flags are CFLAGS = -Os -Wall -Wmissing-declarations -g3.
Update: thanks to Andrew Henle, I now replace
*(int*)(buffer + pos + 4) = 0x12345678;
with
buffer[pos + 4] = value & 0xff;
buffer[pos + 5] = (value >> 8) & 0xff;
buffer[pos + 6] = (value >> 16) & 0xff;
buffer[pos + 7] = (value >> 24) & 0xff;
and can't believe I must do it on 32-bit microprocessor system, whatever architecture it has, and that GCC is not able to properly slice int into bytes or partial int words and perform RMW for those parts.
char* pointer in the brackets is being converted to the int*, causing resulting pointer addressing integer of 4 bytes size at base location (buffer + pos + 4) and integer array index [0]
This incurs undefined behavior (UB) when the alignments requirements of int * are not met.
Instead copy with memcpy(). A good compiler will emit valid optimized code.
// *(int*)(buffer + pos + 4) = 0x12345678;
memcpy(buffer + pos + 4, &(int){0x12345678}, sizeof (int));

Pixel conversion in decompiled assembly from Ghidra

I'm reversing an assembly function that I believe is converting an array of pixels (RGB) or photosites (RGGB) into a final array with an original (I think) encoding made of 10bits or 12bits per pixel. My goal is to understand in terms of c/c++ operations or natural language in the context of image format conversion what is happening, and in the end name in a meaningful way the variables.
There are few operation that I do not understand such as:
In the case of 10 bits per sample (1 sample ie., R|G|B):
uVar7 = (int64)iVar3 / 3 + ((int64)iVar3 >> 0x3f) & 0xffffffff; // 3f = 63
int iVar4 = (int)uVar7 + (int)(uVar7 >> 0x1f); // 1f = 31
puVar1 = (uint*)((int64)outData + (int64)iVar4 * 4);
*puVar1 = *puVar1 | (*inPixel & 0x3ff) <<
(((char)iVar3 + (char)iVar4 * -3) * '\n' + 2U & 0x1f);
The first two lines are considering the strongest bit (>> 0x3f and >> 0x1f); but why divide by 3 ?
why add the strongest bit ? I don't get the 4th line at all; * -3 * '\n' in particular, and 2U & 0x1f: why 31 as mask ?
and in the case of 12 bits per samples:
uint uVar8 = *inPixel & 0xfff; // considering an array of 3 bits (likely 4 times at some point)
uint uVar5 = surfaceSizeBits + position_ >> 0x1f & 7; // 7 = b111
& 7 is zeroing all but the 3 lowests bits, but why after considering the strongest bit just before ?
dataSizeBytes_ = surfaceSizeBits + position_ + uVar5;
iVar3 = ((dataSizeBytes_ & 7) - uVar5) * 12;
I don't get the purpose of - uVar5 and then *12; it's possible that dataSizeBytes is not data size in bytes here.
uint uVar6 = iVar3 >> 0x1f & 0x1f;
uVar5 = iVar3 + uVar6;
iVar3 = (uVar5 & 0x1f) - uVar6;
puVar1 = (uint*)((int64)outData +
((int64)((int)dataSizeBytes_ >> 3) * 3 +
(int64)((int)uVar5 >> 5)) * 4);
>>3 is "dropping the 2 weakest bits" but that's all I understand here.
byte bVar2 = (byte)iVar3;
*puVar1 = *puVar1 | uVar8 << (bVar2 & 0x1f);
if (0x14 < iVar3) // 0x14 = 20
puVar1[1] = puVar1[1] | uVar8 >> (0x20 - bVar2 & 0x1f); //0x20 = 32
Here I understand the bit operations but I can't move to a higher level.
End of 12 bits case.
For the decompiled code, I provided those definitions in order to help understanding:
typedef unsigned int uint;
typedef unsigned long long uint64;
typedef long long int64;
typedef int undefined; //may be wrong, may be 1 byte (int4) instead
typedef long int32;
typedef long undefined8;
typedef int int16;
typedef int int16;
typedef short int8;
typedef unsigned short uint8;
typedef unsigned short ushort;
typedef long code;
char LogBuffer[256];
The undefined* are from ghidra, I did not check yet in dynamic analysis if my translation is correct.
The decompiled code from ghidra is:
(modulo few renamings and type definitions)
undefined8
PixelConversionInOut
(undefined8 param_1, void* inData, void* outData, int height, int widthStride,
int bitsPerSample, int dstBufferSize, uint* bufferResult)
{
uint* puVar1;
int surfaceSizeBits;
undefined8 status;
int iVar3;
uint64 uVar7;
int64 currentPosition;
uint dataSizeBytes_;
GetCurrentThreadId();
if (inData != (void*)nullptr && outData != (void*)nullptr)
{
int heightPadding = 0;
*bufferResult = 0;
if (bitsPerSample == 10)
{
if ((height == 0x818) || (height == 0x1010))
heightPadding = 4;
else if (height == 0x1018)
heightPadding = 2;
}
int heightPadded = heightPadding + height;
if (bitsPerSample == 10)
{
currentPosition = (int64)(heightPadded * widthStride * 4);
uVar7 = currentPosition / 3 + (currentPosition >> 0x3f) & 0xffffffff;
*bufferResult = (int)uVar7 + (int)(uVar7 >> 0x1f);
}
else
{
if (bitsPerSample == 0xc)
{
surfaceSizeBits = heightPadded * widthStride * 0xc;
dataSizeBytes_ = (int)(surfaceSizeBits + (surfaceSizeBits >> 0x1f & 7U)) >> 3;
}
else
dataSizeBytes_ = heightPadded * widthStride * 4 / 2;
*bufferResult = dataSizeBytes_;
}
dataSizeBytes_ = *bufferResult;
if (dstBufferSize < (int)dataSizeBytes_)
{
sprintf(LogBuffer, "dstBuffer must be allocated at least %d bytes.", (uint64)dataSizeBytes_);
status = 0x80000003;
}
else
{
if (bitsPerSample == 0x10)
{
memcpy(outData, inData, (int64)(int)dataSizeBytes_);
GetCurrentThreadId();
status = 0;
}
else
{
memset(outData, 0, (int64)(int)dataSizeBytes_);
currentPosition = 0;
if (0 < widthStride)
{
surfaceSizeBits = 0;
do
{
int position_ = 0;
if (0 < (int64)(heightPadded - heightPadding))
{
ushort* inPixel = (ushort*)((int64)inData + height * currentPosition * 2);
int64 rowsRemaining = (int64)(heightPadded - heightPadding);
do
{
if (bitsPerSample == 10)
{
iVar3 = surfaceSizeBits + position_;
uVar7 = (int64)iVar3 / 3 + ((int64)iVar3 >> 0x3f) & 0xffffffff;
int iVar4 = (int)uVar7 + (int)(uVar7 >> 0x1f);
puVar1 = (uint*)((int64)outData + (int64)iVar4 * 4);
*puVar1 = *puVar1 | (*inPixel & 0x3ff) <<
(((char)iVar3 + (char)iVar4 * -3) * '\n' + 2U & 0x1f);
}
else
{
if (bitsPerSample == 12)
{
uint uVar8 = *inPixel & 0xfff;
uint uVar5 = surfaceSizeBits + position_ >> 0x1f & 7;
dataSizeBytes_ = surfaceSizeBits + position_ + uVar5;
iVar3 = ((dataSizeBytes_ & 7) - uVar5) * 12;
uint uVar6 = iVar3 >> 0x1f & 0x1f;
uVar5 = iVar3 + uVar6;
iVar3 = (uVar5 & 0x1f) - uVar6;
puVar1 = (uint*)((int64)outData +
((int64)((int)dataSizeBytes_ >> 3) * 3 +
(int64)((int)uVar5 >> 5)) * 4);
byte bVar2 = (byte)iVar3;
*puVar1 = *puVar1 | uVar8 << (bVar2 & 0x1f);
if (0x14 < iVar3)
puVar1[1] = puVar1[1] | uVar8 >> (0x20 - bVar2 & 0x1f);
}
}
position_ = position_ + 1;
inPixel = inPixel + 1;
rowsRemaining = rowsRemaining + -1;
}
while (rowsRemaining != 0);
}
currentPosition = currentPosition + 1;
surfaceSizeBits = surfaceSizeBits + heightPadded;
}
while (currentPosition < widthStride);
}
GetCurrentThreadId();
status = 0;
}
}
return status;
}
return 0x80000023;
}
Here is the original assembly but ghidra did not complain or warn about the result.
**************************************************************
* FUNCTION *
**************************************************************
undefined8 __fastcall PixelConversionInOut(undefined8 pa
undefined8 RAX:8 <RETURN> XREF[1]: 180037b9c(W)
undefined8 RCX:8 param_1
void * RDX:8 inData
void * R8:8 outData
int R9D:4 height
int Stack[0x28]:4 widthStride XREF[1]: 180037b66(R)
int Stack[0x30]:4 bitsPerSample XREF[1]: 180037b26(R)
int Stack[0x38]:4 dstBufferSize XREF[1]: 180037bb8(R)
uint * Stack[0x40]:8 bufferResult XREF[1]: 180037b19(R)
undefined4 EDI:4 heightPadding XREF[2]: 180037b62(W),
180037d1e(W)
undefined4 EAX:4 surfaceSizeBits XREF[2]: 180037b9c(W),
180037bd7(W)
undefined8 RAX:8 status XREF[1]: 180037bd7(W)
undefined8 R10:8 inPixel XREF[1]: 180037c71(W)
undefined4 R11D:4 position_ XREF[1]: 180037d17(W)
undefined8 RDI:8 rowsRemaining XREF[1]: 180037d1e(W)
undefined8 RBP:8 currentPosition XREF[1]: 180037d2c(W)
undefined4 Stack[0x20]:4 local_res20 XREF[2]: 180037ae0(W),
180037c5c(R)
undefined8 Stack[0x18]:8 local_res18 XREF[2]: 180037b21(W),
180037be1(R)
undefined8 Stack[0x10]:8 local_res10 XREF[2]: 180037ae5(W),
180037c61(R)
undefined8 Stack[0x8]:8 local_res8 XREF[4]: 180037aea(W),
180037c23(W),
180037c40(R),
180037d27(R)
undefined8 Stack[-0x20]:8 local_20 XREF[2]: 180037c14(W),
180037d40(R)
undefined8 Stack[-0x28]:8 local_28 XREF[2]: 180037b2d(W),
180037bdc(R)
undefined8 Stack[-0x30]:8 local_30 XREF[2]: 180037b34(W),
180037be6(R)
undefined8 Stack[-0x38]:8 local_38 XREF[2]: 180037c33(W),
180037d3b(R)
undefined4 HASH:27f0811 heightPadded
undefined4 HASH:5fcc647 dataSizeBytes_
PixelConversionInOut XREF[2]: GetDecompressedData:1800384fa(c),
FUN_180038630:18003875a(c)
180037ae0 44 89 4c 24 20 MOV dword ptr [RSP + local_res20],height
180037ae5 48 89 54 24 10 MOV qword ptr [RSP + local_res10],inData
180037aea 48 89 4c 24 08 MOV qword ptr [RSP + local_res8],param_1
180037aef 56 PUSH RSI
180037af0 41 56 PUSH R14
180037af2 41 57 PUSH R15
180037af4 48 83 ec 40 SUB RSP,0x40
180037af8 41 8b f1 MOV ESI,height
180037afb 4d 8b f8 MOV R15,outData
180037afe 4c 8b f2 MOV R14,inData
180037b01 ff 15 29 7d CALL qword ptr [->KERNEL32.DLL::GetCurrentThreadId]
9d 00
180037b07 4d 85 f6 TEST R14,R14
180037b0a 0f 84 42 02 JZ LAB_180037d52
00 00
180037b10 4d 85 ff TEST R15,R15
180037b13 0f 84 39 02 JZ LAB_180037d52
00 00
180037b19 4c 8b 94 24 MOV R10,qword ptr [RSP + bufferResult]
98 00 00 00
180037b21 48 89 5c 24 70 MOV qword ptr [RSP + local_res18],RBX
180037b26 8b 9c 24 88 MOV EBX,dword ptr [RSP + bitsPerSample]
00 00 00
180037b2d 48 89 7c 24 30 MOV qword ptr [RSP + local_28],RDI
180037b32 33 ff XOR EDI,EDI
180037b34 4c 89 64 24 28 MOV qword ptr [RSP + local_30],R12
180037b39 41 89 3a MOV dword ptr [R10],EDI
180037b3c 83 fb 0a CMP EBX,0xa
180037b3f 75 21 JNZ LAB_180037b62
180037b41 8b ce MOV param_1,ESI
180037b43 81 e9 18 08 SUB param_1,0x818
00 00
180037b49 74 12 JZ LAB_180037b5d
180037b4b 81 e9 f8 07 SUB param_1,0x7f8
00 00
180037b51 74 0a JZ LAB_180037b5d
180037b53 83 f9 08 CMP param_1,0x8
180037b56 75 0a JNZ LAB_180037b62
180037b58 8d 7b f8 LEA EDI,[RBX + -0x8]
180037b5b eb 05 JMP LAB_180037b62
LAB_180037b5d XREF[2]: 180037b49(j), 180037b51(j)
180037b5d bf 04 00 00 00 MOV EDI,0x4
LAB_180037b62 XREF[3]: 180037b3f(j), 180037b56(j),
180037b5b(j)
180037b62 44 8d 24 37 LEA R12D,[heightPadding + RSI*0x1]
180037b66 8b b4 24 80 MOV ESI,dword ptr [RSP + widthStride]
00 00 00
180037b6d 83 fb 0a CMP EBX,0xa
180037b70 75 1c JNZ LAB_180037b8e
180037b72 41 8b cc MOV param_1,R12D
180037b75 b8 56 55 55 55 MOV EAX,0x55555556
180037b7a 0f af ce IMUL param_1,ESI
180037b7d c1 e1 02 SHL param_1,0x2
180037b80 f7 e9 IMUL param_1
180037b82 8b c2 MOV EAX,inData
180037b84 c1 e8 1f SHR EAX,0x1f
180037b87 03 d0 ADD inData,EAX
180037b89 41 89 12 MOV dword ptr [R10],inData
180037b8c eb 27 JMP LAB_180037bb5
LAB_180037b8e XREF[1]: 180037b70(j)
180037b8e 41 8b c4 MOV EAX,R12D
180037b91 0f af c6 IMUL EAX,ESI
180037b94 83 fb 0c CMP EBX,0xc
180037b97 75 11 JNZ LAB_180037baa
180037b99 8d 04 40 LEA EAX,[RAX + RAX*0x2]
180037b9c c1 e0 02 SHL surfaceSizeBits,0x2
180037b9f 99 CDQ
180037ba0 83 e2 07 AND inData,0x7
180037ba3 03 c2 ADD surfaceSizeBits,inData
180037ba5 c1 f8 03 SAR surfaceSizeBits,0x3
180037ba8 eb 08 JMP LAB_180037bb2
LAB_180037baa XREF[1]: 180037b97(j)
180037baa c1 e0 02 SHL surfaceSizeBits,0x2
180037bad 99 CDQ
180037bae 2b c2 SUB surfaceSizeBits,inData
180037bb0 d1 f8 SAR surfaceSizeBits,1
LAB_180037bb2 XREF[1]: 180037ba8(j)
180037bb2 41 89 02 MOV dword ptr [R10],surfaceSizeBits
LAB_180037bb5 XREF[1]: 180037b8c(j)
180037bb5 49 63 02 MOVSXD surfaceSizeBits,dword ptr [R10]
180037bb8 3b 84 24 90 CMP surfaceSizeBits,dword ptr [RSP + dstBufferSize]
00 00 00
180037bbf 7e 34 JLE LAB_180037bf5
180037bc1 48 8d 15 30 LEA inData,[s_dstBuffer_must_be_allocated_at_l_180b675f8] = "dstBuffer must be allocated at least %d bytes."
fa b2 00
180037bc8 48 8d 0d 81 LEA param_1,[LogBuffer] = ??
db bc 00
180037bcf 44 8b c0 MOV outData,surfaceSizeBits
180037bd2 e8 bd 28 1c 00 CALL sprintf int sprintf(char * _Dest, char *
180037bd7 b8 03 00 00 80 MOV status,0x80000003
LAB_180037bdc XREF[2]: 180037c10(j), 180037d4d(j)
180037bdc 48 8b 7c 24 30 MOV heightPadding,qword ptr [RSP + local_28]
180037be1 48 8b 5c 24 70 MOV RBX,qword ptr [RSP + local_res18]
180037be6 4c 8b 64 24 28 MOV R12,qword ptr [RSP + local_30]
180037beb 48 83 c4 40 ADD RSP,0x40
180037bef 41 5f POP R15
180037bf1 41 5e POP R14
180037bf3 5e POP RSI
180037bf4 c3 RET
LAB_180037bf5 XREF[1]: 180037bbf(j)
180037bf5 4c 8b c0 MOV outData,status
180037bf8 49 8b cf MOV param_1,R15
180037bfb 83 fb 10 CMP EBX,0x10
180037bfe 75 12 JNZ LAB_180037c12
180037c00 49 8b d6 MOV inData,R14
180037c03 e8 f8 d9 1b 00 CALL memcpy void * memcpy(void * _Dst, void
180037c08 ff 15 22 7c CALL qword ptr [->KERNEL32.DLL::GetCurrentThreadId]
9d 00
180037c0e 33 c0 XOR status,status
180037c10 eb ca JMP LAB_180037bdc
LAB_180037c12 XREF[1]: 180037bfe(j)
180037c12 33 d2 XOR inData,inData
180037c14 48 89 6c 24 38 MOV qword ptr [RSP + local_20],RBP
180037c19 e8 d2 17 1c 00 CALL memset void * memset(void * _Dst, int _
180037c1e 48 63 c6 MOVSXD status,ESI
180037c21 33 ed XOR EBP,EBP
180037c23 48 89 44 24 60 MOV qword ptr [RSP + local_res8],status
180037c28 85 f6 TEST ESI,ESI
180037c2a 0f 8e 10 01 JLE LAB_180037d40
00 00
180037c30 41 8b c4 MOV status,R12D
180037c33 4c 89 6c 24 20 MOV qword ptr [RSP + local_38],R13
180037c38 2b c7 SUB status,heightPadding
180037c3a 45 33 f6 XOR R14D,R14D
180037c3d 4c 63 e8 MOVSXD R13,status
180037c40 48 8b 44 24 60 MOV status,qword ptr [RSP + local_res8]
180037c45 66 66 66 0f NOP word ptr [RAX + RAX*0x1]
1f 84 00 00
00 00 00
LAB_180037c50 XREF[1]: 180037d35(j)
180037c50 45 33 db XOR R11D,R11D
180037c53 4d 85 ed TEST R13,R13
180037c56 0f 8e d0 00 JLE LAB_180037d2c
00 00
180037c5c 48 63 44 24 78 MOVSXD status,dword ptr [RSP + local_res20]
180037c61 48 8b 4c 24 68 MOV param_1,qword ptr [RSP + local_res10]
180037c66 49 8b fd MOV heightPadding,R13
180037c69 48 0f af c5 IMUL status,RBP
180037c6d 4c 8d 14 41 LEA R10,[param_1 + status*0x2]
LAB_180037c71 XREF[1]: 180037d21(j)
180037c71 45 0f b7 02 MOVZX outData,word ptr [inPixel]
180037c75 83 fb 0a CMP EBX,0xa
180037c78 75 37 JNZ LAB_180037cb1
180037c7a 43 8d 0c 1e LEA param_1,[R14 + R11*0x1]
180037c7e b8 56 55 55 55 MOV status,0x55555556
180037c83 41 81 e0 ff AND outData,0x3ff
03 00 00
180037c8a f7 e9 IMUL param_1
180037c8c 8b c2 MOV status,inData
180037c8e c1 e8 1f SHR status,0x1f
180037c91 03 d0 ADD inData,status
180037c93 48 63 c2 MOVSXD status,inData
180037c96 4d 8d 0c 87 LEA height,[R15 + status*0x4]
180037c9a 8d 04 52 LEA status,[RDX + RDX*0x2]
180037c9d 2b c8 SUB param_1,status
180037c9f 8d 0c 89 LEA param_1,[RCX + RCX*0x4]
180037ca2 8d 0c 4d 02 LEA param_1,[0x2 + param_1*0x2]
00 00 00
180037ca9 41 d3 e0 SHL outData,param_1
180037cac 45 09 01 OR dword ptr [height],outData
180037caf eb 66 JMP LAB_180037d17
LAB_180037cb1 XREF[1]: 180037c78(j)
180037cb1 83 fb 0c CMP EBX,0xc
180037cb4 75 61 JNZ LAB_180037d17
180037cb6 43 8d 04 1e LEA status,[R14 + R11*0x1]
180037cba 41 81 e0 ff AND outData,0xfff
0f 00 00
180037cc1 99 CDQ
180037cc2 83 e2 07 AND inData,0x7
180037cc5 03 c2 ADD status,inData
180037cc7 8b c8 MOV param_1,status
180037cc9 83 e0 07 AND status,0x7
180037ccc 2b c2 SUB status,inData
180037cce c1 f9 03 SAR param_1,0x3
180037cd1 8d 04 40 LEA status,[RAX + RAX*0x2]
180037cd4 48 63 c9 MOVSXD param_1,param_1
180037cd7 c1 e0 02 SHL status,0x2
180037cda 99 CDQ
180037cdb 83 e2 1f AND inData,0x1f
180037cde 03 c2 ADD status,inData
180037ce0 44 8b c8 MOV height,status
180037ce3 83 e0 1f AND status,0x1f
180037ce6 2b c2 SUB status,inData
180037ce8 41 c1 f9 05 SAR height,0x5
180037cec 48 8d 14 49 LEA inData,[RCX + RCX*0x2]
180037cf0 49 63 c9 MOVSXD param_1,height
180037cf3 48 03 d1 ADD inData,param_1
180037cf6 8b c8 MOV param_1,status
180037cf8 4d 8d 0c 97 LEA height,[R15 + inData*0x4]
180037cfc 41 8b d0 MOV inData,outData
180037cff d3 e2 SHL inData,param_1
180037d01 41 09 11 OR dword ptr [height],inData
180037d04 83 f8 14 CMP status,0x14
180037d07 7e 0e JLE LAB_180037d17
180037d09 b9 20 00 00 00 MOV param_1,0x20
180037d0e 2b c8 SUB param_1,status
180037d10 41 d3 e8 SHR outData,param_1
180037d13 45 09 41 04 OR dword ptr [height + 0x4],outData
LAB_180037d17 XREF[3]: 180037caf(j), 180037cb4(j),
180037d07(j)
180037d17 41 ff c3 INC position_
180037d1a 49 83 c2 02 ADD inPixel,0x2
180037d1e 48 ff cf DEC rowsRemaining
180037d21 0f 85 4a ff JNZ LAB_180037c71
ff ff
180037d27 48 8b 44 24 60 MOV status,qword ptr [RSP + local_res8]
LAB_180037d2c XREF[1]: 180037c56(j)
180037d2c 48 ff c5 INC currentPosition
180037d2f 45 03 f4 ADD R14D,R12D
180037d32 48 3b e8 CMP currentPosition,status
180037d35 0f 8c 15 ff JL LAB_180037c50
ff ff
180037d3b 4c 8b 6c 24 20 MOV R13,qword ptr [RSP + local_38]
LAB_180037d40 XREF[1]: 180037c2a(j)
180037d40 48 8b 6c 24 38 MOV currentPosition,qword ptr [RSP + local_20]
180037d45 ff 15 e5 7a CALL qword ptr [->KERNEL32.DLL::GetCurrentThreadId]
9d 00
180037d4b 33 c0 XOR status,status
180037d4d e9 8a fe ff ff JMP LAB_180037bdc
LAB_180037d52 XREF[2]: 180037b0a(j), 180037b13(j)
180037d52 b8 23 00 00 80 MOV status,0x80000023
180037d57 48 83 c4 40 ADD RSP,0x40
180037d5b 41 5f POP R15
180037d5d 41 5e POP R14
180037d5f 5e POP RSI
180037d60 c3 RET
180037d61 cc cc cc cc align align(15)
cc cc cc cc
cc cc cc cc
I may add that it's an AMD64/Intelx64 64bits assembly for Windows 10.

correct way to copy binary file to buffer in c

I'm trying to open a bin file an get its data into a buffer and copy the data from that buffer to a section that I made called .my_data.
When I do hexdump binfile.bin I see
00000000 4455 e589 00bf 0000 e800 0000 00b8
00000010 5d00 00c3
00000015
and when I printf the buffer and buffer2 I see
55 48 89 e5 bf 00 00 00 00 e8 00 00 00 00 b8 00 00 00 00 5d c3
55 48 89 e5 bf 00 00 00 00 e8 00 00 00 00 b8 00 00 00 00 5d c3
which matches the output from hexdump. However when load buffer2 into my new section (I didn't include that part of the code), I used objdump -s -j .my_data foo and I see some other data not similar to the output above.
I apologize for the dumb question but I just want to confirm if the memcpy I used it's actually copying the data from buffer to buffer2 so I can discard this part and look for the error in the rest of my program. From the output of the second for loop I would think it is. I'm also using gcc, thanks.
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
int main (){
uint8_t buffer[21];
uint8_t buffer2[21];
FILE *ptr;
ptr = fopen("binfile.bin", "rb");
size_t file_size = 0;
if(!ptr)
{
printf("unable to open file");
return 1;
}
fseek(ptr, 0, SEEK_END);
file_size = ftell(ptr);
rewind(ptr);
int i = 0;
fread(buffer, sizeof(buffer), 1, ptr);
memcpy(buffer2, buffer, sizeof(buffer));
for(i = 0; i < 21; i++)
{
printf("%02x ", buffer[i]);
}
printf("\n\n");
for(i = 0; i < 21; i++)
{
printf("%02x ", buffer2[i]);
}
fclose(ptr);
return 0;
}
You are thinking right as far as how you are approaching copying buffer to buffer2, but you are failing to ensure you only attempt to access the elements of buffer that are validly initialized. Since your file contains only 20 bytes, when you attempt to access buffer[20] you are attempting to access a variable with automatic storage duration while the value is indeterminate invoking Undefined Behavior. All bets are off.
You can handle this by fully initializing buffer, e.g.
uint8_t buffer[21] = {0);
which if you fill less than all elements with fread is still okay because all elements are validly initialized.
The other way is simply to read with the size parameter to fread set at 1 so fread returns the number of bytes read. Then you can use that value to output your results, e.g.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXC 1024 /* max number of characters (bytes) */
int main (int argc, char **argv) {
size_t nbytes = 0;
unsigned char buf1[MAXC] = {0}, buf2[MAXC] = {0};
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "rb") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
nbytes = fread (buf1, 1, MAXC, fp); /* with size=1, no. of bytes returned */
if (fp != stdin) /* close file if not stdin */
fclose (fp);
memcpy (buf2, buf1, nbytes); /* copy nbytes buf1, buf2 */
for (size_t i = 0; i < nbytes; i++) /* outuput buf1 contents */
printf (" %02x", buf1[i]);
putchar ('\n');
for (size_t i = 0; i < nbytes; i++) /* output buf2 contents */
printf (" %02x", buf2[i]);
putchar ('\n');
}
Example Use/Output
With your data in the file dat/bytes20.bin, you would receive:
$ ./bin/fread_bytes dat/bytes20.bin
44 55 e5 89 00 bf 00 00 e8 00 00 00 00 b8 5d 00 00 c3
44 55 e5 89 00 bf 00 00 e8 00 00 00 00 b8 5d 00 00 c3
Let me know if you have further questions.
here is the corrected code: note the & 0xFF when printing
uint8_t buffer[21];
uint8_t buffer2[21];
FILE *ptr;
ptr = fopen("binfile.bin", "rb");
size_t file_size = 0;
if (!ptr)
{
printf("unable to open file");
return 1;
}
fseek(ptr, 0, SEEK_END);
file_size = ftell(ptr);
rewind(ptr);
int i = 0;
fread(buffer, sizeof(buffer), 1, ptr);
memcpy(buffer2, buffer, sizeof(buffer));
for (i = 0; i < sizeof buffer; i++)
{
printf("%02x ", buffer[i] & 0xFF);
}
printf("\n\n");
for (i = 0; i < sizeof buffer2; i++)
{
printf("%02x ", buffer2[i] & 0xFF);
}
fclose(ptr);
return 0;
here is the output
55 48 89 e5 bf 00 00 00 00 e8 00 00 00 00 b8 00 00 00 00 5d c3
55 48 89 e5 bf 00 00 00 00 e8 00 00 00 00 b8 00 00 00 00 5d c3

Trouble with compression in LZW

I'm having trouble while implementing the compressor of the LZW. The compressor seems to work fine but while processing some streams it doesn't put the end of stream character (defined with the value 256), the result is that the decompressor will loop infinitely.
The code of the compressor is the following:
int compress1(FILE* input, BIT_FILE* output) {
CODE next_code; // next node
CODE current_code; // current node
CODE index; // node of the found character
int character;
int ret;
next_code = FIRST_CODE;
dictionary_init();
if ((current_code = getc(input)) == EOF)
current_code = EOS;
while ((character = getc(input)) != EOF) {
index = dictionary_lookup(current_code, (SYMBOL)character);
if (dictionary[index].code != UNUSED) {
current_code = dictionary[index].code;
}
else {
if (next_code <= MAX_CODE-1) {
dictionary[index].code = next_code++;
dictionary[index].parent = current_code;
dictionary[index].symbol = (SYMBOL)character;
}
else {
// handling full dictionary
dictionary_init();
next_code = FIRST_CODE;
}
ret = bit_write(output, (uint64_t) current_code, BITS);
if( ret != 0)
return -1;
current_code = (CODE)character;
}
}
ret = bit_write(output, (uint64_t) current_code, BITS);
if (ret != 0)
return -1;
ret = bit_write(output, (uint64_t) EOS, BITS);
if (ret != 0)
return -1;
if (bit_close(output) == -1) {
printf("Ops: error during closing\n");
return -1;
}
return 0;
}
CODE and SYMBOL are typedef of, respectively, uint32_t and uint16_t, FIRST_CODE is defined as 257. The funtion dictionary_init() simply initializes the dictionary, dictionary_lookup() returns the index of a child having symbol "character" of the parent node "current_node" (if it exists).
The writing of the binary file is defined as:
int bit_write(BIT_FILE* bf, uint64_t data, int len)
{
int space, result, offset, wbits, udata;
uint64_t* p;
uint64_t tmp;
udata = (int)data;
if (bf == NULL || len < 1 || len > (8* sizeof(data)))
return -1;
if (bf->reading == true)
return -1;
while (len > 0) {
space = bf->end - bf->next;
if (space < 0) {
return -1;
}
// if buffer is full, flush data to file and reinit BIT_IO struct
if (space == 0) {
result = bit_flush(bf);
if (result < 0)
return -1;
}
p = bf->buf + (bf->next/64);
offset = bf->next % 64;
wbits = 64 - offset;
if (len < wbits)
wbits = len;
tmp = le64toh(*p);
tmp |= (data << offset);
*p = htole64(tmp);
bf->next += wbits;
len -= wbits;
data >>= wbits;
}
return 0;
}
I already opened the file using another function, so the bit_write take as input the pointer to the bf structure.
Can someone help me finding the error?
An example of when this problem arises is the following:
If the input string is "Nel mezzo del cammi" everything works fine and I have the following compressed file (in Hexadecimal, using 12 Bits for encoding symbols):
4E 50 06 6C 00 02 6D 50 06 7A A0 07 6F 00 02 64
20 10 20 30 06 61 D0 06 6D 90 06 0D A0 00 00 01
If I add another character to the string, in particular "Nel mezzo del cammin", I have the following result:
4E 50 06 6C 00 02 6D 50 06 7A A0 07 6F 00 02 64
20 10 20 30 06 61 D0 06 6D 90 06 6E D0 00 0A 00
10
In the second case it doesn't write the End of Stream correctly.
SOLUTION: check that there is enough space in the buffer for the whole coded symbol I am going to write. Just change:
if (space == 0)
to:
if(space == 0 && space < len)

How to parse hex dump

I have a flash memory dump file that spits out addresses and data.
I want to parse the data so that it will tell me the valid tags
The '002F0900' column are the starting addresses.
An example of a valid tag is "DC 08 00 06 00 00 07 26 01 25 05 09" where "DC 08" = tag number, "00 06" = tag data length, "00 00" = tag version. Tag data starts after the version and in this case would be "07 26 01 25 05 09" and the next tag would start "DC 33".
I'm able to print out the first tag up to the data length but I'm not sure how to print the data because I have to consider if the data will go onto the next line so I'd have to skip the address somehow. Each line contains 58 columns. Each address is 8 characters long plus a colon and 2 spaces until the next hex value starts.
I also will eventually have to consider when "DC" shows up in the address column.
If anyone could give some advice because I know how I'm doing this isn't the best way to do this. I'm just trying to get it to work first.
The text file is thousands of lines that look like this:
002F0900: 09 FF DC 08 00 06 00 00 07 26 01 25 05 09 DC 33
002F0910: 00 07 00 00 1F A0 26 01 25 05 09 FF 9C 3E 00 08
002F0920: 00 01 07 DD 0A 0D 00 29 35 AD 9C 41 00 0A 00 01
002F0930: 07 DD 0A 0D 00 29 36 1C 1D 01 9C 40 00 02 00 01
002F0940: 01 00 9C 42 00 0A 00 01 07 DD 0A 0D 00 29 36 21
002F0950: 1D AD 9C 15 00 20 00 00 01 00 00 00 00 04 AD AE
002F0960: C8 0B C0 8A 5B 52 01 00 00 00 00 00 FF 84 36 BA
002F0970: 4E 92 E4 16 28 86 75 C0 DC 10 00 05 00 00 00 00
002F0980: 00 00 01 FF DC 30 00 04 00 01 00 00 00 01 9C 41
Example output would be:
Tag Number: DC 08
Address: 002E0000
Data Length: 06
Tag Data: 07 26 01 25 05 09
Source Code:
#include<stdio.h>
FILE *fp;
main()
{
int i=0;
char ch;
char address[1024];
char tag_number[5];
char tag_length[4];
int number_of_addresses = 0;
long int length;
fp = fopen(FILE_NAME,"rb");
if(fp == NULL) {
printf("error opening file");
}
else {
printf("File opened\n");
while(1){
if((address[i]=fgetc(fp)) ==':')
break;
number_of_addresses++;
i++;
}
printf("\nAddress:");
for (i = 0; i < number_of_addresses;i++)
printf("%c",address[i]);
while((ch = fgetc(fp)) != 'D'){ //Search for valid tag
}
tag_number[0] = ch;
if((ch = fgetc(fp)) == 'C') //We have a valid TAG
{
tag_number[1] = ch;
tag_number[2] = fgetc(fp);
tag_number[3] = fgetc(fp);
tag_number[4] = fgetc(fp);
}
printf("\nNumber:");
for(i=0;i<5;i++)
printf("%c",tag_number[i]);
fgetc(fp); //For space
tag_length[0] = fgetc(fp);
tag_length[1] = fgetc(fp);
fgetc(fp); //For space
tag_length[2] = fgetc(fp);
tag_length[3] = fgetc(fp);
printf("\nLength:");
for(i=0;i<4;i++)
printf("%c",tag_length[i]);
length = strtol(tag_length,&tag_length[4], 16);
printf("\nThe decimal equilvant is: %ld",length);
for (i = 0;i<165;i++)
printf("\n%d:%c",i,fgetc(fp));
}
fclose(fp);
}
Update #ooga:The tags are written arbitrarily. If we also consider invalid tag in the logic then I should be able to figure out the rest if I spend some time. Thanks
This is just an idea to get you started since I'm not entirely sure what you need. The basic idea is that read_byte returns the next two-digit hex value as a byte and also returns its address.
#include <stdio.h>
#include <stdlib.h>
#define FILE_NAME "UA201_dump.txt"
void err(char *msg) {
fprintf(stderr, "Error: %s\n", msg);
exit(EXIT_FAILURE);
}
// read_byte
// Reads a single two-digit "byte" from the hex dump, also
// reads the address (if necessary).
// Returns the byte and current address through pointers.
// Returns 1 if it was able to read a byte, 0 otherwise.
int read_byte(FILE *fp, unsigned *byte, unsigned *addr_ret) {
// Save current column and address between calls.
static int column = 0;
static unsigned addr;
// If it's the beginning of a line...
if (column == 0)
// ... read the address.
if (fscanf(fp, "%x:", &addr) != 1)
// Return 0 if no address could be read.
return 0;
// Read the next two-digit hex value into *byte.
if (fscanf(fp, "%x", byte) != 1)
// Return 0 if no byte could be read.
return 0;
// Set return address to current address.
*addr_ret = addr;
// Increment current address for next time.
++addr;
// Increment column, wrapping back to 0 when it reaches 16.
column = (column + 1) % 16;
// Return 1 on success.
return 1;
}
int main() {
unsigned byte, addr, afterdc, length, version, i;
FILE *fp = fopen(FILE_NAME,"r");
if (!fp) {
fprintf(stderr, "Can't open %s\n", FILE_NAME);
exit(EXIT_FAILURE);
}
while (read_byte(fp, &byte, &addr)) {
if (byte == 0xDC) {
// Read additional bytes like this:
if (!read_byte(fp, &afterdc, &addr)) err("EOF 1");
if (!read_byte(fp, &length, &addr)) err("EOF 2");
if (!read_byte(fp, &byte, &addr)) err("EOF 3");
length = (length << 8) | byte;
if (!read_byte(fp, &version, &addr)) err("EOF 4");
if (!read_byte(fp, &byte, &addr)) err("EOF 5");
version = (version << 8) | byte;
printf("DC: %02X, %u, %u\n ", afterdc, length, version);
for (i = 0; i < length; ++i) {
if (!read_byte(fp, &byte, &addr)) err("EOF 6");
printf("%02X ", byte);
}
putchar('\n');
}
}
fclose(fp);
return 0;
}
Some explanation:
Every time read_byte is called, it reads the next printed byte (the two-digit hex values) from the hex dump. It returns that byte and also the address of that byte.
There are 16 two-digit hex values on each line. The column number (0 to 15) is retained in a static variable between calls. The column is incremented after reading each byte and reset to 0 every time the column reaches 16.
Any time the column number is 0, it reads the printed address, retaining it between calls in a static variable. It also increments the static addr variable so it can tell you the address of a byte anywhere in the line (when the column number is not zero).
As an example, you could use read_bye like this, which prints each byte value and it's address on a separate line:
// after opening file as fp
while (read_byte(fp, &byte, &addr))
printf("%08X- %02X\n", addr, byte);
(Not that it would be useful to do that, but to test it you could run it with the snippet you provided in your question.)

Resources