I'm reversing an assembly function that I believe is converting an array of pixels (RGB) or photosites (RGGB) into a final array with an original (I think) encoding made of 10bits or 12bits per pixel. My goal is to understand in terms of c/c++ operations or natural language in the context of image format conversion what is happening, and in the end name in a meaningful way the variables.
There are few operation that I do not understand such as:
In the case of 10 bits per sample (1 sample ie., R|G|B):
uVar7 = (int64)iVar3 / 3 + ((int64)iVar3 >> 0x3f) & 0xffffffff; // 3f = 63
int iVar4 = (int)uVar7 + (int)(uVar7 >> 0x1f); // 1f = 31
puVar1 = (uint*)((int64)outData + (int64)iVar4 * 4);
*puVar1 = *puVar1 | (*inPixel & 0x3ff) <<
(((char)iVar3 + (char)iVar4 * -3) * '\n' + 2U & 0x1f);
The first two lines are considering the strongest bit (>> 0x3f and >> 0x1f); but why divide by 3 ?
why add the strongest bit ? I don't get the 4th line at all; * -3 * '\n' in particular, and 2U & 0x1f: why 31 as mask ?
and in the case of 12 bits per samples:
uint uVar8 = *inPixel & 0xfff; // considering an array of 3 bits (likely 4 times at some point)
uint uVar5 = surfaceSizeBits + position_ >> 0x1f & 7; // 7 = b111
& 7 is zeroing all but the 3 lowests bits, but why after considering the strongest bit just before ?
dataSizeBytes_ = surfaceSizeBits + position_ + uVar5;
iVar3 = ((dataSizeBytes_ & 7) - uVar5) * 12;
I don't get the purpose of - uVar5 and then *12; it's possible that dataSizeBytes is not data size in bytes here.
uint uVar6 = iVar3 >> 0x1f & 0x1f;
uVar5 = iVar3 + uVar6;
iVar3 = (uVar5 & 0x1f) - uVar6;
puVar1 = (uint*)((int64)outData +
((int64)((int)dataSizeBytes_ >> 3) * 3 +
(int64)((int)uVar5 >> 5)) * 4);
>>3 is "dropping the 2 weakest bits" but that's all I understand here.
byte bVar2 = (byte)iVar3;
*puVar1 = *puVar1 | uVar8 << (bVar2 & 0x1f);
if (0x14 < iVar3) // 0x14 = 20
puVar1[1] = puVar1[1] | uVar8 >> (0x20 - bVar2 & 0x1f); //0x20 = 32
Here I understand the bit operations but I can't move to a higher level.
End of 12 bits case.
For the decompiled code, I provided those definitions in order to help understanding:
typedef unsigned int uint;
typedef unsigned long long uint64;
typedef long long int64;
typedef int undefined; //may be wrong, may be 1 byte (int4) instead
typedef long int32;
typedef long undefined8;
typedef int int16;
typedef int int16;
typedef short int8;
typedef unsigned short uint8;
typedef unsigned short ushort;
typedef long code;
char LogBuffer[256];
The undefined* are from ghidra, I did not check yet in dynamic analysis if my translation is correct.
The decompiled code from ghidra is:
(modulo few renamings and type definitions)
undefined8
PixelConversionInOut
(undefined8 param_1, void* inData, void* outData, int height, int widthStride,
int bitsPerSample, int dstBufferSize, uint* bufferResult)
{
uint* puVar1;
int surfaceSizeBits;
undefined8 status;
int iVar3;
uint64 uVar7;
int64 currentPosition;
uint dataSizeBytes_;
GetCurrentThreadId();
if (inData != (void*)nullptr && outData != (void*)nullptr)
{
int heightPadding = 0;
*bufferResult = 0;
if (bitsPerSample == 10)
{
if ((height == 0x818) || (height == 0x1010))
heightPadding = 4;
else if (height == 0x1018)
heightPadding = 2;
}
int heightPadded = heightPadding + height;
if (bitsPerSample == 10)
{
currentPosition = (int64)(heightPadded * widthStride * 4);
uVar7 = currentPosition / 3 + (currentPosition >> 0x3f) & 0xffffffff;
*bufferResult = (int)uVar7 + (int)(uVar7 >> 0x1f);
}
else
{
if (bitsPerSample == 0xc)
{
surfaceSizeBits = heightPadded * widthStride * 0xc;
dataSizeBytes_ = (int)(surfaceSizeBits + (surfaceSizeBits >> 0x1f & 7U)) >> 3;
}
else
dataSizeBytes_ = heightPadded * widthStride * 4 / 2;
*bufferResult = dataSizeBytes_;
}
dataSizeBytes_ = *bufferResult;
if (dstBufferSize < (int)dataSizeBytes_)
{
sprintf(LogBuffer, "dstBuffer must be allocated at least %d bytes.", (uint64)dataSizeBytes_);
status = 0x80000003;
}
else
{
if (bitsPerSample == 0x10)
{
memcpy(outData, inData, (int64)(int)dataSizeBytes_);
GetCurrentThreadId();
status = 0;
}
else
{
memset(outData, 0, (int64)(int)dataSizeBytes_);
currentPosition = 0;
if (0 < widthStride)
{
surfaceSizeBits = 0;
do
{
int position_ = 0;
if (0 < (int64)(heightPadded - heightPadding))
{
ushort* inPixel = (ushort*)((int64)inData + height * currentPosition * 2);
int64 rowsRemaining = (int64)(heightPadded - heightPadding);
do
{
if (bitsPerSample == 10)
{
iVar3 = surfaceSizeBits + position_;
uVar7 = (int64)iVar3 / 3 + ((int64)iVar3 >> 0x3f) & 0xffffffff;
int iVar4 = (int)uVar7 + (int)(uVar7 >> 0x1f);
puVar1 = (uint*)((int64)outData + (int64)iVar4 * 4);
*puVar1 = *puVar1 | (*inPixel & 0x3ff) <<
(((char)iVar3 + (char)iVar4 * -3) * '\n' + 2U & 0x1f);
}
else
{
if (bitsPerSample == 12)
{
uint uVar8 = *inPixel & 0xfff;
uint uVar5 = surfaceSizeBits + position_ >> 0x1f & 7;
dataSizeBytes_ = surfaceSizeBits + position_ + uVar5;
iVar3 = ((dataSizeBytes_ & 7) - uVar5) * 12;
uint uVar6 = iVar3 >> 0x1f & 0x1f;
uVar5 = iVar3 + uVar6;
iVar3 = (uVar5 & 0x1f) - uVar6;
puVar1 = (uint*)((int64)outData +
((int64)((int)dataSizeBytes_ >> 3) * 3 +
(int64)((int)uVar5 >> 5)) * 4);
byte bVar2 = (byte)iVar3;
*puVar1 = *puVar1 | uVar8 << (bVar2 & 0x1f);
if (0x14 < iVar3)
puVar1[1] = puVar1[1] | uVar8 >> (0x20 - bVar2 & 0x1f);
}
}
position_ = position_ + 1;
inPixel = inPixel + 1;
rowsRemaining = rowsRemaining + -1;
}
while (rowsRemaining != 0);
}
currentPosition = currentPosition + 1;
surfaceSizeBits = surfaceSizeBits + heightPadded;
}
while (currentPosition < widthStride);
}
GetCurrentThreadId();
status = 0;
}
}
return status;
}
return 0x80000023;
}
Here is the original assembly but ghidra did not complain or warn about the result.
**************************************************************
* FUNCTION *
**************************************************************
undefined8 __fastcall PixelConversionInOut(undefined8 pa
undefined8 RAX:8 <RETURN> XREF[1]: 180037b9c(W)
undefined8 RCX:8 param_1
void * RDX:8 inData
void * R8:8 outData
int R9D:4 height
int Stack[0x28]:4 widthStride XREF[1]: 180037b66(R)
int Stack[0x30]:4 bitsPerSample XREF[1]: 180037b26(R)
int Stack[0x38]:4 dstBufferSize XREF[1]: 180037bb8(R)
uint * Stack[0x40]:8 bufferResult XREF[1]: 180037b19(R)
undefined4 EDI:4 heightPadding XREF[2]: 180037b62(W),
180037d1e(W)
undefined4 EAX:4 surfaceSizeBits XREF[2]: 180037b9c(W),
180037bd7(W)
undefined8 RAX:8 status XREF[1]: 180037bd7(W)
undefined8 R10:8 inPixel XREF[1]: 180037c71(W)
undefined4 R11D:4 position_ XREF[1]: 180037d17(W)
undefined8 RDI:8 rowsRemaining XREF[1]: 180037d1e(W)
undefined8 RBP:8 currentPosition XREF[1]: 180037d2c(W)
undefined4 Stack[0x20]:4 local_res20 XREF[2]: 180037ae0(W),
180037c5c(R)
undefined8 Stack[0x18]:8 local_res18 XREF[2]: 180037b21(W),
180037be1(R)
undefined8 Stack[0x10]:8 local_res10 XREF[2]: 180037ae5(W),
180037c61(R)
undefined8 Stack[0x8]:8 local_res8 XREF[4]: 180037aea(W),
180037c23(W),
180037c40(R),
180037d27(R)
undefined8 Stack[-0x20]:8 local_20 XREF[2]: 180037c14(W),
180037d40(R)
undefined8 Stack[-0x28]:8 local_28 XREF[2]: 180037b2d(W),
180037bdc(R)
undefined8 Stack[-0x30]:8 local_30 XREF[2]: 180037b34(W),
180037be6(R)
undefined8 Stack[-0x38]:8 local_38 XREF[2]: 180037c33(W),
180037d3b(R)
undefined4 HASH:27f0811 heightPadded
undefined4 HASH:5fcc647 dataSizeBytes_
PixelConversionInOut XREF[2]: GetDecompressedData:1800384fa(c),
FUN_180038630:18003875a(c)
180037ae0 44 89 4c 24 20 MOV dword ptr [RSP + local_res20],height
180037ae5 48 89 54 24 10 MOV qword ptr [RSP + local_res10],inData
180037aea 48 89 4c 24 08 MOV qword ptr [RSP + local_res8],param_1
180037aef 56 PUSH RSI
180037af0 41 56 PUSH R14
180037af2 41 57 PUSH R15
180037af4 48 83 ec 40 SUB RSP,0x40
180037af8 41 8b f1 MOV ESI,height
180037afb 4d 8b f8 MOV R15,outData
180037afe 4c 8b f2 MOV R14,inData
180037b01 ff 15 29 7d CALL qword ptr [->KERNEL32.DLL::GetCurrentThreadId]
9d 00
180037b07 4d 85 f6 TEST R14,R14
180037b0a 0f 84 42 02 JZ LAB_180037d52
00 00
180037b10 4d 85 ff TEST R15,R15
180037b13 0f 84 39 02 JZ LAB_180037d52
00 00
180037b19 4c 8b 94 24 MOV R10,qword ptr [RSP + bufferResult]
98 00 00 00
180037b21 48 89 5c 24 70 MOV qword ptr [RSP + local_res18],RBX
180037b26 8b 9c 24 88 MOV EBX,dword ptr [RSP + bitsPerSample]
00 00 00
180037b2d 48 89 7c 24 30 MOV qword ptr [RSP + local_28],RDI
180037b32 33 ff XOR EDI,EDI
180037b34 4c 89 64 24 28 MOV qword ptr [RSP + local_30],R12
180037b39 41 89 3a MOV dword ptr [R10],EDI
180037b3c 83 fb 0a CMP EBX,0xa
180037b3f 75 21 JNZ LAB_180037b62
180037b41 8b ce MOV param_1,ESI
180037b43 81 e9 18 08 SUB param_1,0x818
00 00
180037b49 74 12 JZ LAB_180037b5d
180037b4b 81 e9 f8 07 SUB param_1,0x7f8
00 00
180037b51 74 0a JZ LAB_180037b5d
180037b53 83 f9 08 CMP param_1,0x8
180037b56 75 0a JNZ LAB_180037b62
180037b58 8d 7b f8 LEA EDI,[RBX + -0x8]
180037b5b eb 05 JMP LAB_180037b62
LAB_180037b5d XREF[2]: 180037b49(j), 180037b51(j)
180037b5d bf 04 00 00 00 MOV EDI,0x4
LAB_180037b62 XREF[3]: 180037b3f(j), 180037b56(j),
180037b5b(j)
180037b62 44 8d 24 37 LEA R12D,[heightPadding + RSI*0x1]
180037b66 8b b4 24 80 MOV ESI,dword ptr [RSP + widthStride]
00 00 00
180037b6d 83 fb 0a CMP EBX,0xa
180037b70 75 1c JNZ LAB_180037b8e
180037b72 41 8b cc MOV param_1,R12D
180037b75 b8 56 55 55 55 MOV EAX,0x55555556
180037b7a 0f af ce IMUL param_1,ESI
180037b7d c1 e1 02 SHL param_1,0x2
180037b80 f7 e9 IMUL param_1
180037b82 8b c2 MOV EAX,inData
180037b84 c1 e8 1f SHR EAX,0x1f
180037b87 03 d0 ADD inData,EAX
180037b89 41 89 12 MOV dword ptr [R10],inData
180037b8c eb 27 JMP LAB_180037bb5
LAB_180037b8e XREF[1]: 180037b70(j)
180037b8e 41 8b c4 MOV EAX,R12D
180037b91 0f af c6 IMUL EAX,ESI
180037b94 83 fb 0c CMP EBX,0xc
180037b97 75 11 JNZ LAB_180037baa
180037b99 8d 04 40 LEA EAX,[RAX + RAX*0x2]
180037b9c c1 e0 02 SHL surfaceSizeBits,0x2
180037b9f 99 CDQ
180037ba0 83 e2 07 AND inData,0x7
180037ba3 03 c2 ADD surfaceSizeBits,inData
180037ba5 c1 f8 03 SAR surfaceSizeBits,0x3
180037ba8 eb 08 JMP LAB_180037bb2
LAB_180037baa XREF[1]: 180037b97(j)
180037baa c1 e0 02 SHL surfaceSizeBits,0x2
180037bad 99 CDQ
180037bae 2b c2 SUB surfaceSizeBits,inData
180037bb0 d1 f8 SAR surfaceSizeBits,1
LAB_180037bb2 XREF[1]: 180037ba8(j)
180037bb2 41 89 02 MOV dword ptr [R10],surfaceSizeBits
LAB_180037bb5 XREF[1]: 180037b8c(j)
180037bb5 49 63 02 MOVSXD surfaceSizeBits,dword ptr [R10]
180037bb8 3b 84 24 90 CMP surfaceSizeBits,dword ptr [RSP + dstBufferSize]
00 00 00
180037bbf 7e 34 JLE LAB_180037bf5
180037bc1 48 8d 15 30 LEA inData,[s_dstBuffer_must_be_allocated_at_l_180b675f8] = "dstBuffer must be allocated at least %d bytes."
fa b2 00
180037bc8 48 8d 0d 81 LEA param_1,[LogBuffer] = ??
db bc 00
180037bcf 44 8b c0 MOV outData,surfaceSizeBits
180037bd2 e8 bd 28 1c 00 CALL sprintf int sprintf(char * _Dest, char *
180037bd7 b8 03 00 00 80 MOV status,0x80000003
LAB_180037bdc XREF[2]: 180037c10(j), 180037d4d(j)
180037bdc 48 8b 7c 24 30 MOV heightPadding,qword ptr [RSP + local_28]
180037be1 48 8b 5c 24 70 MOV RBX,qword ptr [RSP + local_res18]
180037be6 4c 8b 64 24 28 MOV R12,qword ptr [RSP + local_30]
180037beb 48 83 c4 40 ADD RSP,0x40
180037bef 41 5f POP R15
180037bf1 41 5e POP R14
180037bf3 5e POP RSI
180037bf4 c3 RET
LAB_180037bf5 XREF[1]: 180037bbf(j)
180037bf5 4c 8b c0 MOV outData,status
180037bf8 49 8b cf MOV param_1,R15
180037bfb 83 fb 10 CMP EBX,0x10
180037bfe 75 12 JNZ LAB_180037c12
180037c00 49 8b d6 MOV inData,R14
180037c03 e8 f8 d9 1b 00 CALL memcpy void * memcpy(void * _Dst, void
180037c08 ff 15 22 7c CALL qword ptr [->KERNEL32.DLL::GetCurrentThreadId]
9d 00
180037c0e 33 c0 XOR status,status
180037c10 eb ca JMP LAB_180037bdc
LAB_180037c12 XREF[1]: 180037bfe(j)
180037c12 33 d2 XOR inData,inData
180037c14 48 89 6c 24 38 MOV qword ptr [RSP + local_20],RBP
180037c19 e8 d2 17 1c 00 CALL memset void * memset(void * _Dst, int _
180037c1e 48 63 c6 MOVSXD status,ESI
180037c21 33 ed XOR EBP,EBP
180037c23 48 89 44 24 60 MOV qword ptr [RSP + local_res8],status
180037c28 85 f6 TEST ESI,ESI
180037c2a 0f 8e 10 01 JLE LAB_180037d40
00 00
180037c30 41 8b c4 MOV status,R12D
180037c33 4c 89 6c 24 20 MOV qword ptr [RSP + local_38],R13
180037c38 2b c7 SUB status,heightPadding
180037c3a 45 33 f6 XOR R14D,R14D
180037c3d 4c 63 e8 MOVSXD R13,status
180037c40 48 8b 44 24 60 MOV status,qword ptr [RSP + local_res8]
180037c45 66 66 66 0f NOP word ptr [RAX + RAX*0x1]
1f 84 00 00
00 00 00
LAB_180037c50 XREF[1]: 180037d35(j)
180037c50 45 33 db XOR R11D,R11D
180037c53 4d 85 ed TEST R13,R13
180037c56 0f 8e d0 00 JLE LAB_180037d2c
00 00
180037c5c 48 63 44 24 78 MOVSXD status,dword ptr [RSP + local_res20]
180037c61 48 8b 4c 24 68 MOV param_1,qword ptr [RSP + local_res10]
180037c66 49 8b fd MOV heightPadding,R13
180037c69 48 0f af c5 IMUL status,RBP
180037c6d 4c 8d 14 41 LEA R10,[param_1 + status*0x2]
LAB_180037c71 XREF[1]: 180037d21(j)
180037c71 45 0f b7 02 MOVZX outData,word ptr [inPixel]
180037c75 83 fb 0a CMP EBX,0xa
180037c78 75 37 JNZ LAB_180037cb1
180037c7a 43 8d 0c 1e LEA param_1,[R14 + R11*0x1]
180037c7e b8 56 55 55 55 MOV status,0x55555556
180037c83 41 81 e0 ff AND outData,0x3ff
03 00 00
180037c8a f7 e9 IMUL param_1
180037c8c 8b c2 MOV status,inData
180037c8e c1 e8 1f SHR status,0x1f
180037c91 03 d0 ADD inData,status
180037c93 48 63 c2 MOVSXD status,inData
180037c96 4d 8d 0c 87 LEA height,[R15 + status*0x4]
180037c9a 8d 04 52 LEA status,[RDX + RDX*0x2]
180037c9d 2b c8 SUB param_1,status
180037c9f 8d 0c 89 LEA param_1,[RCX + RCX*0x4]
180037ca2 8d 0c 4d 02 LEA param_1,[0x2 + param_1*0x2]
00 00 00
180037ca9 41 d3 e0 SHL outData,param_1
180037cac 45 09 01 OR dword ptr [height],outData
180037caf eb 66 JMP LAB_180037d17
LAB_180037cb1 XREF[1]: 180037c78(j)
180037cb1 83 fb 0c CMP EBX,0xc
180037cb4 75 61 JNZ LAB_180037d17
180037cb6 43 8d 04 1e LEA status,[R14 + R11*0x1]
180037cba 41 81 e0 ff AND outData,0xfff
0f 00 00
180037cc1 99 CDQ
180037cc2 83 e2 07 AND inData,0x7
180037cc5 03 c2 ADD status,inData
180037cc7 8b c8 MOV param_1,status
180037cc9 83 e0 07 AND status,0x7
180037ccc 2b c2 SUB status,inData
180037cce c1 f9 03 SAR param_1,0x3
180037cd1 8d 04 40 LEA status,[RAX + RAX*0x2]
180037cd4 48 63 c9 MOVSXD param_1,param_1
180037cd7 c1 e0 02 SHL status,0x2
180037cda 99 CDQ
180037cdb 83 e2 1f AND inData,0x1f
180037cde 03 c2 ADD status,inData
180037ce0 44 8b c8 MOV height,status
180037ce3 83 e0 1f AND status,0x1f
180037ce6 2b c2 SUB status,inData
180037ce8 41 c1 f9 05 SAR height,0x5
180037cec 48 8d 14 49 LEA inData,[RCX + RCX*0x2]
180037cf0 49 63 c9 MOVSXD param_1,height
180037cf3 48 03 d1 ADD inData,param_1
180037cf6 8b c8 MOV param_1,status
180037cf8 4d 8d 0c 97 LEA height,[R15 + inData*0x4]
180037cfc 41 8b d0 MOV inData,outData
180037cff d3 e2 SHL inData,param_1
180037d01 41 09 11 OR dword ptr [height],inData
180037d04 83 f8 14 CMP status,0x14
180037d07 7e 0e JLE LAB_180037d17
180037d09 b9 20 00 00 00 MOV param_1,0x20
180037d0e 2b c8 SUB param_1,status
180037d10 41 d3 e8 SHR outData,param_1
180037d13 45 09 41 04 OR dword ptr [height + 0x4],outData
LAB_180037d17 XREF[3]: 180037caf(j), 180037cb4(j),
180037d07(j)
180037d17 41 ff c3 INC position_
180037d1a 49 83 c2 02 ADD inPixel,0x2
180037d1e 48 ff cf DEC rowsRemaining
180037d21 0f 85 4a ff JNZ LAB_180037c71
ff ff
180037d27 48 8b 44 24 60 MOV status,qword ptr [RSP + local_res8]
LAB_180037d2c XREF[1]: 180037c56(j)
180037d2c 48 ff c5 INC currentPosition
180037d2f 45 03 f4 ADD R14D,R12D
180037d32 48 3b e8 CMP currentPosition,status
180037d35 0f 8c 15 ff JL LAB_180037c50
ff ff
180037d3b 4c 8b 6c 24 20 MOV R13,qword ptr [RSP + local_38]
LAB_180037d40 XREF[1]: 180037c2a(j)
180037d40 48 8b 6c 24 38 MOV currentPosition,qword ptr [RSP + local_20]
180037d45 ff 15 e5 7a CALL qword ptr [->KERNEL32.DLL::GetCurrentThreadId]
9d 00
180037d4b 33 c0 XOR status,status
180037d4d e9 8a fe ff ff JMP LAB_180037bdc
LAB_180037d52 XREF[2]: 180037b0a(j), 180037b13(j)
180037d52 b8 23 00 00 80 MOV status,0x80000023
180037d57 48 83 c4 40 ADD RSP,0x40
180037d5b 41 5f POP R15
180037d5d 41 5e POP R14
180037d5f 5e POP RSI
180037d60 c3 RET
180037d61 cc cc cc cc align align(15)
cc cc cc cc
cc cc cc cc
I may add that it's an AMD64/Intelx64 64bits assembly for Windows 10.
Related
I'm trying to optimize my shellcode project by introducing a method to store strings as unit64_t values in an array. The shellcode project is written without any optimizations (Debug only) as I want to control everything at the opcode level i.e., the shellcode has precomputed offsets to local variables. If /O2 optimizations were enabled, it might rearrange the C shellcode and mess things up?
For example, currently the shellcode has these stack arrays defined
char calc[] = { 'c','a','l','c','.','e','x','e',0 };
char winexec[] = { 'W','i','n','E','x','e','c',0 };
char sleep[] = { 'S','l','e','e','p',0 };
char createtoolhelp32snapshot[] = { 'C','r','e','a','t','e','T','o','o','l','h','e','l','p','3','2','S','n','a','p','s','h','o','t',0 };
char process32first[] = { 'P','r','o','c','e','s','s','3','2','F','i','r','s','t',0 };
char process32next[] = { 'P','r','o','c','e','s','s','3','2','N','e','x','t',0 };
char closehandle[] = { 'C','l','o','s','e','H','a','n','d','l','e',0 };
char openprocess[] = { 'O','p','e','n','P','r','o','c','e','s','s',0 };
char virtualallocex[] = { 'V','i','r','t','u','a','l','A','l','l','o','c','E','x',0 };
char virtualfreeex[] = { 'V','i','r','t','u','a','l','F','r','e','e','E','x',0 };
char writeprocessmemory[] = { 'W','r','i','t','e','P','r','o','c','e','s','s','M','e','m','o','r','y',0 };
char createremotethread[] = { 'C','r','e','a','t','e','R','e','m','o','t','e','T','h','r','e','a','d',0 };
char comparestringa[] = { 'C','o','m','p','a','r','e','S','t','r','i','n','g','A',0};
char notepad[] = { 'n','o','t','e','p','a','d','.','e','x','e',0 };
char getcurrentprocessid[] = { 'G','e','t','C','u','r','r','e','n','t','P','r','o','c','e','s','s','I','d',0 };
char exitthread[] = { 'E','x','i','t','T','h','r','e','a','d',0 };
The generated assembly is
52: char calc[] = { 'c','a','l','c','.','e','x','e',0 };
00007FF6248B1C6D C6 45 08 63 mov byte ptr [calc],63h
00007FF6248B1C71 C6 45 09 61 mov byte ptr [rbp+9],61h
00007FF6248B1C75 C6 45 0A 6C mov byte ptr [rbp+0Ah],6Ch
00007FF6248B1C79 C6 45 0B 63 mov byte ptr [rbp+0Bh],63h
00007FF6248B1C7D C6 45 0C 2E mov byte ptr [rbp+0Ch],2Eh
00007FF6248B1C81 C6 45 0D 65 mov byte ptr [rbp+0Dh],65h
00007FF6248B1C85 C6 45 0E 78 mov byte ptr [rbp+0Eh],78h
00007FF6248B1C89 C6 45 0F 65 mov byte ptr [rbp+0Fh],65h
00007FF6248B1C8D C6 45 10 00 mov byte ptr [rbp+10h],0
53: char winexec[] = { 'W','i','n','E','x','e','c',0 };
00007FF6248B1C91 C6 45 38 57 mov byte ptr [winexec],57h
00007FF6248B1C95 C6 45 39 69 mov byte ptr [rbp+39h],69h
00007FF6248B1C99 C6 45 3A 6E mov byte ptr [rbp+3Ah],6Eh
00007FF6248B1C9D C6 45 3B 45 mov byte ptr [rbp+3Bh],45h
00007FF6248B1CA1 C6 45 3C 78 mov byte ptr [rbp+3Ch],78h
00007FF6248B1CA5 C6 45 3D 65 mov byte ptr [rbp+3Dh],65h
00007FF6248B1CA9 C6 45 3E 63 mov byte ptr [rbp+3Eh],63h
00007FF6248B1CAD C6 45 3F 00 mov byte ptr [rbp+3Fh],0
To improve performance, should those arrays be replaced with
uint64_t calc[] = { 0x6578652e636c6163, 0x0000000000000000 };
uint64_t winexec[] = { 0x00636578456e6957 };
uint64_t sleep[] = { 0x0000007065656c53 };
uint64_t createtoolhelp32snapshot[] = { 0x6f54657461657243, 0x3233706c65686c6f, 0x746f687370616e53, 0x0000000000000000 };
uint64_t process32first[] = { 0x33737365636f7250, 0x0000747372694632 };
uint64_t process32next[] = { 0x33737365636f7250, 0x0000007478654e32 };
uint64_t closehandle[] = { 0x6e614865736f6c43, 0x0000000000656c64 };
uint64_t openprocess[] = { 0x636f72506e65704f, 0x0000000000737365 };
uint64_t virtualallocex[] = { 0x416c617574726956, 0x00007845636f6c6c };
uint64_t virtualfreeex[] = { 0x466c617574726956, 0x0000007845656572 };
uint64_t writeprocessmemory[] = { 0x6f72506574697257, 0x6f6d654d73736563, 0x0000000000007972 };
uint64_t createremotethread[] = { 0x6552657461657243, 0x6572685465746f6d, 0x0000000000006461 };
uint64_t comparestringa[] = { 0x53657261706d6f43, 0x000041676e697274 };
uint64_t notepad[] = { 0x2e64617065746f6e, 0x0000000000657865 };
uint64_t getcurrentprocessid[] = { 0x6572727543746547, 0x7365636f7250746e, 0x0000000000644973 };
uint64_t exitthread[] = { 0x6572685474697845, 0x0000000000006461 };
The generated assembly is
71: unsigned long long calc[] = { 0x6578652e636c6163, 0x0000000000000000 };
00007FF7EF7C1C5C 48 B8 63 61 6C 63 2E 65 78 65 mov rax,6578652E636C6163h
00007FF7EF7C1C66 48 89 45 08 mov qword ptr [calc],rax
00007FF7EF7C1C6A 48 C7 45 10 00 00 00 00 mov qword ptr [rbp+10h],0
72: unsigned long long winexec[] = {0x00636578456e6957};
00007FF7EF7C1C72 48 B8 57 69 6E 45 78 65 63 00 mov rax,636578456E6957h
00007FF7EF7C1C7C 48 89 45 38 mov qword ptr [winexec],rax
Question
The shellcode works correctly with both definitions but is the extra performance worth degrading the readability?
Tool
https://godbolt.org/z/eYhWPG9E6
This is my ebpf program
#include <linux/ptrace.h>
#include <linux/version.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/types.h>
#define __TRACE_COMMON_H
#define SYSCALL(SYS) "__x64_" __stringify(SYS)
struct bpf_map_def SEC("maps") my_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 42,
.map_flags = 0
};
SEC("kprobe/sys_write")
int bpf_prog1(struct pt_regs *ctx)
{
struct S {
int pid;
int cookie;
} data;
data.pid = bpf_get_current_pid_tgid();
data.cookie = 99;
bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = 190;
And I am trying to compile ebpf program and load it like following
root#this:/home/ubuntu/Desktop/ebpf/kern# clang -g -target bpf -c trace_output_kern.c -o trace_output_kern.o
but when I run it it throws error that
root#this:/home/ubuntu/Desktop/ebpf/kern# ./trace_output_user a
libbpf: Error loading BTF: Invalid argument(22)
libbpf: magic: 0xeb9f
version: 1
flags: 0x0
hdr_len: 24
type_off: 0
type_len: 900
str_off: 900
str_len: 600
btf_total_size: 1524
[1] PTR (anon) type_id=3
[2] INT int size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
[3] ARRAY (anon) type_id=2 index_type_id=4 nr_elems=4
[4] INT __ARRAY_SIZE_TYPE__ size=4 bits_offset=0 nr_bits=32 encoding=(none)
[5] PTR (anon) type_id=6
[6] ARRAY (anon) type_id=2 index_type_id=4 nr_elems=2
[7] STRUCT (anon) size=32 vlen=4
type type_id=1 bits_offset=0
key_size type_id=1 bits_offse
key_size type_id=1 bits_offset=64
value_size type_id=1 bits_offset=128
max_entries type_id=5 bits_offset=192
[8] VAR my_map type_id=7 linkage=1
[9] FUNC_PROTO (anon) return=2 args=(10 (anon))
[10] PTR (anon) type_id=11
[11] STRUCT pt_regs size=168 vlen=21
r15 type_id=12 bits_offset=0
r14 type_id=12 bits_offset=64
r13 type_id=12 bits_offset=128
r12 type_id=12 bits_offset=192
rbp type_id=12 bits_offset=256
rbx type_id=12 bits_offset=320
r11 type_id=12 bits_offset=384
r10 type_id=12 bits_offset=448
r9 type_id=12 bits_offset=512
r8 type_id=12 bits_offset=576
rax type_id=12 bits_offset=640
rcx type_id=12 bits_offset=704
rdx type_id=12 bits_offset=768
rsi type_id=12 bits_offset=832
rdi type_id=12 bits_offset=896
orig_rax type_id=12 bits_offset=960
rip type_id=12 bits_offset=1024
cs type_id=12 bits_offset=1088
eflags type_id=12 bits_offset=1152
rsp type_id=12 bits_offset=1216
ss type_id=12 bits_offset=1280
[12] INT long unsigned int size=8 bits_offset=0 nr_bits=64 encoding=(none)
[13] FUNC bpf_prog1 type_id=9
[14] PTR (anon) type_id=15
[15] FUNC_PROTO (anon) return=16 args=(void)
[16] TYPEDEF __u64 type_id=17
[17] INT long long unsigned int size=8 bits_offset=0 nr_bits=64 encoding=(none)
[18] VAR bpf_get_current_pid_tgid type_id=14 linkage=0
[19] PTR (anon) type_id=20
[20] FUNC_PROTO (anon) return=21 args=(22 (anon), 22 (anon), 16 (anon), 22 (anon), 16 (anon))
[21] INT long int size=8 bits_offset=0 nr_bits=64 encoding=SIGNED
[22] PTR (anon) type_id=0
[23] VAR bpf_perf_event_output type_id=19 linkage=0
[24] INT char size=1 bits_offset=0 nr_bits=8 encoding=SIGNED
[25] ARRAY (anon) type_id=24 index_type_id=4 nr_elems=4
[26] VAR _license type_id=25 linkage=1
[27] TYPEDEF uint32_t type_id=28
[28] TYPEDEF __u32 type_id=29
[29] INT unsigned int size=4 bits_offset=0 nr_bits=32 encoding=(none)
[30] VAR _version type_id=27 linkage=1
[31] DATASEC .data size=16 vlen=2
type_id=18 offset=0 size=8
type_id=23 offset=8 size=8
[32] DATASEC .maps size=32 vlen=1
type_id=8 offset=0 size=32
[33] DATASEC license size=4 vlen=1
type_id=26 offset=0 size=4
[34] DATASEC version size=4 vlen=1
type_id=30 offset=0 size=4
[13] FUNC bpf_prog1 type_id=9 Invalid arg#1
libbpf: Error loading .BTF into kernel: -22. BTF is optional, ignoring.
libbpf: load bpf program failed: Invalid argument
libbpf: -- BEGIN DUMP LOG ---
libbpf:
unknown opcode 8d
processed 0 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
libbpf: -- END LOG --
libbpf: failed to load program 'bpf_prog1'
libbpf: failed to load object './trace_output_kern.o'
ERROR: loading BPF object file failed
Update
and this is my trace_output_user.c
// SPDX-License-Identifier: GPL-2.0-only
#include <stdio.h>
#include <fcntl.h>
#include <poll.h>
#include <time.h>
#include <signal.h>
#include <bpf/libbpf.h>
//create .o file root#this:/home/ubuntu/Desktop/ebpf/kern# clang -I /lib/modules/5.14.1/build -I /usr/include/bpf/ -O2 -Wall -c trace_output_user.c
static __u64 time_get_ns(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000ull + ts.tv_nsec;
}
static __u64 start_time;
static __u64 cnt;
#define MAX_CNT 100000ll
static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size)
{
struct {
int pid;
int cookie;
} *e = data;
if (e->cookie != 0x12345678) {
printf("BUG pid %llx cookie %llx sized %d\n",
e->pid, e->cookie, size);
return;
}
cnt++;
if (cnt == MAX_CNT) {
printf("recv %lld events per sec\n",
MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
return;
}
}
int main(int argc, char **argv)
{
struct perf_buffer_opts pb_opts = {};
struct bpf_link *link = NULL;
struct bpf_program *prog;
struct perf_buffer *pb;
struct bpf_object *obj;
int map_fd, ret = 0;
char filename[256];
FILE *f;
//snprintf(filename, sizeof(filename), "..o", argv[0]);
obj = bpf_object__open_file("./trace_output_kern.o", NULL);
if (libbpf_get_error(obj)) {
fprintf(stderr, "ERROR: opening BPF object file failed\n");
return 0;
}
/* load BPF program */
if (bpf_object__load(obj)) {
fprintf(stderr, "ERROR: loading BPF object file failed\n");
goto cleanup;
}
map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
if (map_fd < 0) {
fprintf(stderr, "ERROR: finding a map in obj file failed\n");
goto cleanup;
}
prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
if (libbpf_get_error(prog)) {
fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
goto cleanup;
}
link = bpf_program__attach(prog);
if (libbpf_get_error(link)) {
fprintf(stderr, "ERROR: bpf_program__attach failed\n");
link = NULL;
goto cleanup;
}
pb_opts.sample_cb = print_bpf_output;
pb = perf_buffer__new(map_fd, 8, &pb_opts);
ret = libbpf_get_error(pb);
if (ret) {
printf("failed to setup perf_buffer: %d\n", ret);
return 1;
}
f = popen("taskset 1 dd if=/dev/zero of=/dev/null", "r");
(void) f;
start_time = time_get_ns();
while ((ret = perf_buffer__poll(pb, 1000)) >= 0 && cnt < MAX_CNT) {
}
kill(0, SIGINT);
cleanup:
bpf_link__destroy(link);
bpf_object__close(obj);
return ret;
}
llvm-objdump output
trace_output_kern.o: file format elf64-bpf
Disassembly of section kprobe/sys_write:
0000000000000000 <bpf_prog1>:
; {
0: 7b 1a f8 ff 00 00 00 00 *(u64 *)(r10 - 8) = r1
; data.pid = bpf_get_current_pid_tgid();
1: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
3: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0)
4: 8d 00 00 00 01 00 00 00 callx r1
5: 7b 0a e8 ff 00 00 00 00 *(u64 *)(r10 - 24) = r0
6: b7 01 00 00 78 56 34 12 r1 = 305419896
; data.cookie = 0x12345678;
7: 7b 1a f0 ff 00 00 00 00 *(u64 *)(r10 - 16) = r1
; bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
8: 18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll
10: 79 10 00 00 00 00 00 00 r0 = *(u64 *)(r1 + 0)
11: 79 a1 f8 ff 00 00 00 00 r1 = *(u64 *)(r10 - 8)
12: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
14: b7 03 00 00 00 00 00 00 r3 = 0
15: 7b 3a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r3
16: bf a4 00 00 00 00 00 00 r4 = r10
; data.pid = bpf_get_current_pid_tgid();
17: 07 04 00 00 e8 ff ff ff r4 += -24
18: b7 05 00 00 10 00 00 00 r5 = 16
; bpf_perf_event_output(ctx, &my_map, 0, &data, sizeof(data));
19: 8d 00 00 00 00 00 00 00 callx r0
; return 0;
20: 79 a0 e0 ff 00 00 00 00 r0 = *(u64 *)(r10 - 32)
21: 95 00 00 00 00 00 00 00 exit
root#this:/home/ubuntu/Desktop/ebpf/kern# ^C
root#this:/home/ubuntu/Desktop/ebpf/kern# ^C
I also encountered the same problem, and the solution is as follows,Add the compile option -O2 and use LLVM_STRIP
$(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) -c $(filter %.bpf.c,$^) -o $#
$(Q)$(LLVM_STRIP) -g $# # strip useless DWARF info
I've tried to compare output from compiler with and without -O3 option, and I cannot follow how optimized code works.
Here's C code:
#include <stdio.h>
#include <limits.h>
int main(int argc, char *argv[]) {
int a = 3;
int b = argc;
while (b) {
++a;
--b;
}
printf("%d\n", a);
return 0;
}
And this is objdump output without optimization. Looks preety straightforward:
000000000040052d <main>:
#include <stdio.h>
#include <limits.h>
int main(int argc, char *argv[]) {
40052d: 55 push %rbp
40052e: 48 89 e5 mov %rsp,%rbp
400531: 48 83 ec 20 sub $0x20,%rsp
400535: 89 7d ec mov %edi,-0x14(%rbp)
400538: 48 89 75 e0 mov %rsi,-0x20(%rbp)
int a = 3;
40053c: c7 45 f8 03 00 00 00 movl $0x3,-0x8(%rbp)
int b = argc;
400543: 8b 45 ec mov -0x14(%rbp),%eax
400546: 89 45 fc mov %eax,-0x4(%rbp)
while (b) {
400549: eb 08 jmp 400553 <main+0x26>
++a;
40054b: 83 45 f8 01 addl $0x1,-0x8(%rbp)
--b;
40054f: 83 6d fc 01 subl $0x1,-0x4(%rbp)
int main(int argc, char *argv[]) {
int a = 3;
int b = argc;
while (b) {
400553: 83 7d fc 00 cmpl $0x0,-0x4(%rbp)
400557: 75 f2 jne 40054b <main+0x1e>
++a;
--b;
}
printf("%d\n", a);
400559: 8b 45 f8 mov -0x8(%rbp),%eax
40055c: 89 c6 mov %eax,%esi
40055e: bf 04 06 40 00 mov $0x400604,%edi
400563: b8 00 00 00 00 mov $0x0,%eax
400568: e8 a3 fe ff ff callq 400410 <printf#plt>
return 0;
40056d: b8 00 00 00 00 mov $0x0,%eax
}
But after adding -O3 option to gcc I got this:
0000000000400470 <main>:
#include <stdio.h>
#include <limits.h>
int main(int argc, char *argv[]) {
400470: 8d 47 03 lea 0x3(%rdi),%eax
400473: 48 83 ec 08 sub $0x8,%rsp
400477: ba 03 00 00 00 mov $0x3,%edx
40047c: 85 ff test %edi,%edi
}
__fortify_function int
printf (const char *__restrict __fmt, ...)
{
return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, __va_arg_pack ());
40047e: be 14 06 40 00 mov $0x400614,%esi
400483: bf 01 00 00 00 mov $0x1,%edi
400488: 0f 45 d0 cmovne %eax,%edx
40048b: 31 c0 xor %eax,%eax
40048d: e8 ce ff ff ff callq 400460 <__printf_chk#plt>
}
printf("%d\n", a);
return 0;
}
400492: 31 c0 xor %eax,%eax
400494: 48 83 c4 08 add $0x8,%rsp
400498: c3 retq
I can't see any jumps, or any effective add, sub instructions except manipulation with stack. Can someone explain this to me?
Thanks
Your code is optimized to printf("%d\n", 3 + argc), because gcc can figure out what the loop does.
The 3+argc is the first lea 0x3(%rdi),%eax.
actually, gcc doesn't quite solve the loop:
The test/cmov stuff seems to be doing:
int a = 3;
if (argc != 0)
a = argc + 3;
aka
int a = argc ? 3 + argc : 3;
I'm trying to understand the OpenSSL library in more detail. So rather than using the set of higher-level EVP functions, I've been trying to use the AES_* functions. Following the general set of calls in this question (though I'm using CBC instead of counter mode), I've come up with this code:
void ctr(log_t* log)
{
unsigned char ivec[16];
/* Out buffer for ciphertext */
unsigned char outBuf[16];
blockReader_t* br = blockReaderInit(log, "./input.txt", 128);
int outFD;
if ((outFD = open("out.bin", O_WRONLY)) == -1)
{
logPrint(br->log, LOG_ARGS, LOG_ERR, "open: %s", strerror(errno));
logExit(br->log, LOG_ARGS, EXIT_FAILURE);
}
memset(ivec, 0, 16);
unsigned char* ivec2 = ivec + 8;
unsigned long* ivec3 = (unsigned long*) ivec2;
*ivec3 = (unsigned long) 0xfd0;
AES_KEY aesKey;
char* myKey = "Pampers baby-dry";
int res;
if (!(res = AES_set_encrypt_key((unsigned char*) myKey, 16, &aesKey)))
{
logPrint(log, LOG_ARGS, LOG_ERR, "AES_set_encrypt_key: returned %d", res);
logExit(log, LOG_ARGS, EXIT_FAILURE);
}
unsigned char* buf;
while ((buf = blockReaderGet(br)) != NULL)
{
logPrint(log, LOG_ARGS, LOG_INFO, "ivec =");
logHexdump(log, LOG_ARGS, LOG_INFO, (char*) ivec, 16);
logPrint(log, LOG_ARGS, LOG_INFO, "buf =");
logHexdump(log, LOG_ARGS, LOG_INFO, (char*) buf, 16);
AES_cbc_encrypt(buf, outBuf, 16, &aesKey, ivec, 1);
logPrint(log, LOG_ARGS, LOG_INFO, "outBuf =");
logHexdump(log, LOG_ARGS, LOG_INFO, (char*) outBuf, 16);
int res = write(outFD, outBuf, 16);
if (res == -1)
{
logPrint(log, LOG_ARGS, LOG_ERR, "write: %s", strerror(errno));
logExit(log, LOG_ARGS, EXIT_FAILURE);
}
else if (res < 16)
{
logPrint(log, LOG_ARGS, LOG_WARN, "Unexpectedly wrote < 16 bytes");
}
}
if ((close(outFD)) == -1)
{
logPrint(log, LOG_ARGS, LOG_ERR, "close: %s", strerror(errno));
logExit(log, LOG_ARGS, EXIT_FAILURE);
}
}
The log_t struct and calls to log*() are my own logging framework which I am using to help debug this code. blockReader_t is another framework for reading files in sets of bytes. blockReaderGet() simply fills the destination buffer with the predetermined number of bytes of data (in this case 128 bits/16 bytes).
Contents of input.txt:
$ hexdump -C input.txt
00000000 4d 69 64 6e 69 67 68 74 5f 4d 61 72 6c 69 6e 05 |Midnight_Marlin.|
00000010 52 69 63 68 61 72 64 52 69 63 68 61 72 64 06 07 |RichardRichard..|
00000020
Output (ran in GDB):
(gdb) run
Starting program: /home/adam/crypto/openssl/aes/aes_128
[ 0.000020] <aes_128.c:83> "main" INFO: Log library started (v1.9.0)
...
[ 0.000054] <aes_128.c:50> "ctr" INFO: ivec =
[ 0.000057] <aes_128.c:51> "ctr" INFO: HEX (16 bytes)
---BEGIN_HEX---
00000000 00 00 00 00 00 00 00 00 d0 0f 00 00 00 00 00 00 |................|
00000010
---END_HEX---
[ 0.000069] <aes_128.c:53> "ctr" INFO: buf =
[ 0.000071] <aes_128.c:54> "ctr" INFO: HEX (16 bytes)
---BEGIN_HEX---
00000000 4d 69 64 6e 69 67 68 74 5f 4d 61 72 6c 69 6e 05 |Midnight_Marlin.|
00000010
---END_HEX---
Program received signal SIGSEGV, Segmentation fault.
_x86_64_AES_encrypt_compact () at aes-x86_64.s:170
170 xorl 0(%r15),%eax
I'm using an OpenSSL from GitHub that I've built myself and linked against locally; specifically the OpenSSL_1_0_2e tag, which I gather is the latest stable version.
The Perl file that generates this assembly file uses the $key variable to name what r15 represents. But given that AES_set_encrypt_key() returns success, I'm not sure what's wrong.
Could anyone please offer any pointers to what might be wrong here?
EDIT:
Despite compiling OpenSSL with -g3 instead of -O3, the backtrace isn't useful:
(gdb) bt
#0 _x86_64_AES_encrypt_compact () at aes-x86_64.s:170
#1 0x0000000000402b6b in AES_cbc_encrypt () at aes-x86_64.s:1614
#2 0x00007fffffffe0a0 in ?? ()
#3 0x000080007dfc19a0 in ?? ()
#4 0x00007fffffffe050 in ?? ()
#5 0x0000000000635080 in ?? ()
#6 0x00007fffffffe1a0 in ?? ()
#7 0x0000000000000010 in ?? ()
#8 0x00007ffff7bdf9a0 in ?? ()
#9 0x00007fffffffe1b0 in ?? ()
#10 0x00007fff00000001 in ?? ()
#11 0x00007ffff7bdf4c8 in ?? ()
#12 0x00007fffffffda40 in ?? ()
#13 0x0000000000000000 in ?? ()
EDIT 2:
CFLAG has been changed:
CFLAG= -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -Wa,--noexecstack -m64 -DL_ENDIAN -O0 -ggdb -Wall -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_MONT5 -DOPENSSL_BN_ASM_GF2m -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DVPAES_ASM -DBSAES_ASM -DWHIRLPOOL_ASM -DGHASH_ASM -DECP_NISTZ256_ASM
Note the -O0 -ggdb. Backtrace is the same:
(gdb) bt
#0 _x86_64_AES_encrypt_compact () at aes-x86_64.s:170
#1 0x0000000000402b6b in AES_cbc_encrypt () at aes-x86_64.s:1614
#2 0x00007fffffffe0a0 in ?? ()
#3 0x000080007dfc19a0 in ?? ()
#4 0x00007fffffffe050 in ?? ()
#5 0x0000000000635080 in ?? ()
#6 0x00007fffffffe1a0 in ?? ()
#7 0x0000000000000010 in ?? ()
#8 0x00007ffff7bdf9a0 in ?? ()
#9 0x00007fffffffe1b0 in ?? ()
#10 0x00007fff00000001 in ?? ()
#11 0x00007ffff7bdf4c8 in ?? ()
#12 0x00007fffffffda40 in ?? ()
#13 0x0000000000000000 in ?? ()
EDIT: MCVE example
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <openssl/aes.h>
unsigned char input[] = {0x4du, 0x69u, 0x64u, 0x6eu, 0x69u, 0x67u, 0x68u, 0x74u,
0x5fu, 0x4du, 0x61u, 0x72u, 0x6cu, 0x69u, 0x6eu, 0x05u,
0x52u, 0x69u, 0x63u, 0x68u, 0x61u, 0x72u, 0x64u, 0x52u,
0x69u, 0x63u, 0x68u, 0x61u, 0x72u, 0x64u, 0x06u, 0x07u};
int main()
{
unsigned char ivec[16];
/* ivec[0..7] is the IV, ivec[8..15] is the big endian counter. */
unsigned char outBuf[16];
int outFD;
if ((outFD = open("out.bin", O_WRONLY)) == -1)
{
perror("open");
return EXIT_FAILURE;
}
memset(ivec, 0, 16);
unsigned char* ivec2 = ivec + 8;
unsigned long* ivec3 = (unsigned long*) ivec2;
*ivec3 = (unsigned long) 0xfd0;
AES_KEY aesKey;
char* myKey = "Pampers baby-dry";
int res;
if (!(res = AES_set_encrypt_key((unsigned char*) myKey, 16, &aesKey)))
{
fprintf(stderr, "AES_set_encrypt_key: returned %d", res);
return EXIT_FAILURE;
}
for (int i = 0; i < 32; i += 16)
{
printf("ivec = ");
for (int j = 0; j < 16; j++)
printf("%.02hhx ", ivec[j]);
putchar('\n');
printf("input = ");
for (int j = i; j < (i + 16); j++)
printf("%.02hhx ", input[j]);
putchar('\n');
AES_cbc_encrypt(&input[i], outBuf, 16, &aesKey, ivec, 1);
printf("outBuf = ");
for (int j = 0; j < 16; j++)
printf("%.02hhx ", outBuf[j]);
putchar('\n');
int res = write(outFD, outBuf, 16);
if (res == -1)
{
perror("write");
return EXIT_FAILURE;
}
else if (res < 16)
{
printf("Warning: unexpectedly wrote < 16 bytes");
}
}
if ((close(outFD)) == -1)
{
perror("close");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
So there are several major bugs here. I'll go through all the ones I caught, but there may be more, as I didn't do a thorough code review.
You are using sentinel values everywhere (ie: the 16 integer literals. Swap these out with a preprocessor macro, or even better, a const int).
The output buffer needs to be at least as big as your input buffer, and should be rounded up the the nearest multiple of the block size, plus one more block.
You are looping through each element of the the input data and trying to encrypt one byte at a time. Unless you are implementing some obscure layer on top of AES, this is wrong. You iterate over blocks of data, not individual bytes. The loop is completely unnecessary.
Your input data buffer appears to be bigger than your output data buffer. With your current implementation, the last 16 bytes I think will be truncated/lost, since the input buffer has 32 bytes of data, but the output buffer is 16 bytes. In your specific example, input should be 32 bytes, output should be 32+1.
In addition to the loop being unnecessary, with some modifications it would run (incorrectly, corrupting data), and eventually access invalid memory (ie: pointing to near the end of the input buffer, and telling the encrypt function to ask for 16 bytes of data after that point).
I've provided an updated code listing and sample output that should get you on the right track. Here's a working example that should also guide you along.
Good luck!
Modified Code Listing
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <openssl/aes.h>
#define BLOCK_SIZE (128)
unsigned char input[BLOCK_SIZE] = {
0x4du, 0x69u, 0x64u, 0x6eu, 0x69u, 0x67u, 0x68u, 0x74u,
0x5fu, 0x4du, 0x61u, 0x72u, 0x6cu, 0x69u, 0x6eu, 0x05u,
0x52u, 0x69u, 0x63u, 0x68u, 0x61u, 0x72u, 0x64u, 0x52u,
0x69u, 0x63u, 0x68u, 0x61u, 0x72u, 0x64u, 0x06u, 0x07u};
int main()
{
unsigned char ivec[BLOCK_SIZE];
/* ivec[0..7] is the IV, ivec[8..15] is the big endian counter. */
unsigned char outBuf[BLOCK_SIZE+1];
int outFD;
if ((outFD = open("out.bin", O_CREAT | O_RDWR)) == -1)
{
perror("open");
return EXIT_FAILURE;
}
memset(ivec, 0, BLOCK_SIZE);
unsigned char* ivec2 = ivec + 8;
unsigned long* ivec3 = (unsigned long*) ivec2;
*ivec3 = (unsigned long) 0xfd0;
AES_KEY aesKey;
char* myKey = "Pampers baby-dry";
int res;
if ((res = AES_set_encrypt_key((unsigned char*) myKey, BLOCK_SIZE, &aesKey)) < 0)
{
fprintf(stderr, "AES_set_encrypt_key: returned %d", res);
return EXIT_FAILURE;
}
int i = 0;
//for (int i = 0; i < 32; i += BLOCK_SIZE)
{
printf("ivec = ");
for (int j = 0; j < BLOCK_SIZE; j++)
printf("%.02hhx ", ivec[j]);
putchar('\n');
printf("input = ");
for (int j = i; j < (i + BLOCK_SIZE); j++)
printf("%.02hhx ", input[j]);
putchar('\n');
putchar('\n');
putchar('\n');
putchar('\n');
AES_cbc_encrypt(input, outBuf, BLOCK_SIZE, &aesKey, ivec, AES_ENCRYPT);
printf("outBuf = ");
for (int j = 0; j < BLOCK_SIZE; j++)
printf("%.02hhx ", outBuf[j]);
putchar('\n');
int res = write(outFD, outBuf, BLOCK_SIZE);
if (res == -1)
{
perror("write");
return EXIT_FAILURE;
}
else if (res < BLOCK_SIZE)
{
printf("Warning: unexpectedly wrote < %d bytes.\n", BLOCK_SIZE);
}
}
if ((close(outFD)) == -1)
{
perror("close");
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Build Command
gcc -O0 -ggdb test.c --std=c99 -lssl -lcrypto && ./a.out
Sample Output
ivec = 00 00 00 00 00 00 00 00 d0 0f 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
input = 4d 69 64 6e 69 67 68 74 5f 4d 61 72 6c 69 6e 05 52 69 63 68 61 72 64 52 69 63 68 61 72 64 06 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
outBuf = 81 ee 91 c0 9f f6 40 db 3c 6d 32 dd 5e 86 6f f8 4e 7b aa 15 38 36 b8 20 bc 04 bd 4f 6c 53 0e 02 72 c2 b7 e8 79 35 f2 b2 e1 c1 6e 1e 3b 1e 75 81 6a 56 43 d8 9d 9c 4c 1e 04 bd 99 29 3a 55 c9 a4 90 48 20 13 5e 51 4a 0c 4b 35 bc db da 54 f1 2b 66 f6 1b 1a 42 25 33 30 0e 35 87 9d 4b 1f d5 3a 5d 3a 8e 8c c8 48 c0 52 72 c0 4e b3 b8 f5 37 03 1c 87 15 61 3b 64 2b 06 5e 12 8f c7 b5 21 98 06
I am trying to convert a hexadecimal data coming from a port( stored in a buffer) into integer format using C program. Before converting from buffer, I wanted to test my code by giving some input in the program. The following is the program I am using from a online source.
#include <stdio.h>
#include <stdlib.h>
int hexToInt(char s[]) {
int hexdigit, i, inhex, n;
i=0;
if(s[i] == '0') {
++i;
if(s[i] == 'x' || s[i] == 'X'){
++i;
}
}
n = 0;
inhex = 1;
for(; inhex == 1; ++i) {
if(s[i] >= '0' && s[i] <= '9') {
hexdigit = s[i] - '0';
} else if(s[i] >= 'a' && s[i] <= 'f') {
hexdigit = s[i] - 'a' + 10;
} else if(s[i] >= 'A' && s[i] <= 'F') {
hexdigit = s[i] - 'A' + 10;
} else {
inhex = 0;
}
if(inhex == 1) {
n = 16 * n + hexdigit;
}
}
return n;
}
int main(int argc, char** argv) {
char hex[] = "93 BC";
int digit = hexToInt(hex);
printf("The Integer is %d", digit);
return 0;
}
When I run this program, it converts one input of hexadecimal into a integer. But if I had to convert an array of hex input as listed below:
00 00 00 05 00 00 00 01 93 BC C0 06 00 00 00 00 ................
00 28 17 00 FC 26 CC 62 00 00 00 07 00 00 00 01 .(...&.b........
00 00 00 D0 00 E3 37 19 00 00 00 1D 00 00 01 00 ......7.........
AB B6 CD 14 00 11 1F 3C 00 00 00 1D 00 00 00 00 .......<........
00 00 00 02 00 00 00 01 00 00 00 90 00 00 00 01 ................
00 00 05 EE 00 00 00 04 00 00 00 80 F0 92 1C 48 ...........�...H
C2 00 00 0E 0C 30 C7 C7 08 00 45 00 05 DC 32 70 .....0....E...2p
40 00 2D 06 41 C8 2D 3A 4A 01 93 BC C8 EC 01 BB #.-.A.-:J.......
C1 58 C5 8D 53 88 05 72 46 E6 80 10 00 53 DC 34
Then how I can convert it into corresponding integer values?
"to convert of array of hex input", modify hexToInt() to hexToInt(const char *s, char **endptr) and have it set *endptr to where the parsing stopped. If no parsing occurred, have *endptr = s.
int main(void) {
char hex[] = "93 BC";
char *p = hex;
while (*p) {
char *endptr;
int digit = hexToInt(p, &endptr);
if (p == endptr) break;
printf("The Integer is %d", digit);
p = endptr;
}
return 0;
}