So, I'm struggling a little on how Stack Machines & 3AC (Register) Machines interpret expressions. Take this expression for an example:
4 * 2 - 3
As a 3AC Machine (instruction sets feature three-operands, a type of Register Register Machine), I think it would look something like this
(Note - I use '#' for comments, pretty sure that this isn't the correct syntax):
LOAD 4, r0 # register 0 stores 4
LOAD 2, r1 # register 1 stores 2
MUL r0, r1, r0 # register 0 now stores 8 (4 x 2)
LOAD 3, r1 # register 1 now stores 3
SUB r0, r1, r0 # register 0 stores 5 (8 - 3)
Sorry about the formatting. I don't know how to get subscripts in code blocks.
I think a Stack Machine would look something like this:
LOAD 4
LOAD 2
MUL
LOAD 3
SUB # would this be 8 - 3 or 3 - 8?
NEG # if its 3 - 8 then I need to negate the top
PRINT # outputs the top of the stack
HALT
I was going to post this on codereview.stackexchange.com but the problem is this code does not work correctly.
I have an AT89C4051 processor which only has one hardware UART, however I will attach two UART-based devices to it of which one is my computer. The configuration across all devices is 9600,N,8,1 (9600bps, no parity, 8 data bits, 1 stop bit)
I attempted to create two software-based UARTs with machine-optimized code that can interface between the two devices, however the data I received on my computer is incorrect.
I was expecting 'ABCD' or 'abcd' to appear on the screen but all I received was 'DDDD'.
What can I do here to fix this problem without resorting to buying extra hardware?
;dual 9600bps channels
;RX1,TX1 = uart channel
;RX2,TX2 = uart channel
;start with uart configuration
org 0000h
ljmp uartcfg
;every 1/2 bit time execute function
org 000Bh
sjmp swuartint
SWCFG equ 20h ;serial config
;config for uart 1
SWRI bit 20h.0 ;data fully received if SWRI set
serrxslow bit 20h.2 ;bit flip-flop for receive. cpl 2x = 1 full bit time
SWTI bit 20h.1 ;data fully transmitted if SWTI set
sertxslow bit 20h.3 ;bit flip-flop for transmit. cpl 2x = 1 full bit time
;config for uart 2: same format
SWRI2 bit 20h.4
serrxslow2 bit 20h.6
SWTI2 bit 20h.5
sertxslow2 bit 20h.7
SWUARTNO bit 21h.0 ;current Software Uart Number
SWRNR bit 21h.1 ;Software Uart Receiver Not Ready
SWTMP equ 0Fh ;Temporary variable to pass indirect address value to register
SBUFR2 equ 0Eh ;Uart 2 received byte value
SBUFR equ 0Dh ;Uart 1 received byte value
SBUFT2 equ 0Ch ;Uart 2 sent byte value
SBUFT equ 0Bh ;Uart 1 sent byte value
swuartint:
;1/2 bit time detected.
;save registers
push PSW
push ACC
;set register bank RS1:RS0 = 1:1 or 1:0 depending on UART wanted
;so we have our own private set of R0-R7 variables for each UART
setb RS1
mov C,SWUARTNO
mov RS0,C
;Get bit from UART 2
mov C,RX2
jb RS0,nour1
mov C,RX1 ;Get bit from UART 1 if RS0 not set
nour1:
jb SWTI,extx
;Transmit routine is only used if transmit complete flag is NOT set.
cpl sertxslow
jnb sertxslow,extx
;Only do the real transmit work on full bit times
;save existing carry flag (it has received value)
push PSW
mov A,R2 ;R2=transmit counter to load
jnz tnz
;Counter=0: Load transmitted value to R3 through temporary variable
mov SWTMP,#R1
mov R3,SWTMP
;Set counter to 1 and clear C to output start bit (of 0)
inc A
clr C
sjmp extx1
tnz:
jnb ACC.3,tne
jnb ACC.0,tne
;Counter is 9 so set transmit complete flag
setb SWTI
clr A ;set counter to 0 and set C to output stop bit (of 1)
setb C
sjmp extx1
tne:
;Counter is between 1 and 8
;Save accumulator to R5 since push/pop wastes cycles
mov R5,A
mov A,R3 ;load accumulator with outgoing data
RRC A ;shift out one byte to C
mov R3,A ;save data
mov A,R5
inc A ;increment count
extx1:
;Use right transmit channel based on chosen UART number
jb RS0,nour2
mov TX1,C
sjmp extx2
nour2:
mov TX2,C
extx2:
;Save count
mov R2,A
;and load back receiver value C
pop PSW
extx:
mov A,R7 ;load receiving count
jnz rnz
;Counter=0. Store ready status
mov SWRNR,C
jc exrx
;increment counter only if start bit=0
inc A
clr serrxslow ;reset flip flop so we wait 1 bit time to get bit.
sjmp exrx
rnz:
cpl serrxslow
jb serrxslow,exrx
;Counter > 0, 1 bit time waited
jnb ACC.3,rne
jnb ACC.0,rne
;Counter = 9. Status = Stop bit. 1=complete. 0=fail.
mov SWRI,C
;Store data
mov SWTMP,R4
mov #R0,SWTMP
;Clear count
clr A
sjmp exrx
rne:
;Counter = 1 to 8.
;Load data same way as transmit routine
mov R6,A
mov A,R4
RRC A
mov R4,A
mov A,R6
inc A ;increment count
exrx:
mov R7,A ;save count to receive count variable
;Don't switch UARTs when in the middle of a bit time
jnb sertxslow,nextuart
;Don't switch UARTs if receiver has data to process
jnb SWRNR,nextuart
;Don't switch UARTs if transmitter has data to process
jnb SWTI,nextuart
;Switch UART
cpl SWUARTNO
;Flip nibbles in configuration as each nibble
;belongs to its own UART
mov A,SWCFG
swap A
mov SWCFG,A
nextuart:
;restore registers and exit
pop ACC
pop PSW
reti
uartcfg:
setb RS1
setb RS0
mov R0,#SBUFR2 ;If RS1=RS0=1, #R0 = contents of SBUFR2
mov R1,#SBUFT2 ;If RS1=RS0=1, #R1 = contents of SBUFT2
clr RS0
mov R0,#SBUFR ;If RS1=1 and RS0=0, #R0 = contents of SBUFR
mov R1,#SBUFT ;If RS1=1 and RS0=0, #R1 = contents of SBUFT
clr RS1 ;Restore register bank (RS1=0, RS0=0)
mov SWCFG,#22h ;Set transmit complete to both UARTs at once
mov TH0,#0A0h ;Value = 256-(((crystal/baud)/12)/2) = 9.6kbps 1/2 bit rate
mov TL0,#0A0h
mov TMOD,#22h ;8-bit timers auto reload
mov SP,#50h ;get stack pointer away from our work
setb TR0 ;enable timer
mov IE,#082h ;enable Timer 0 interrupt
ljmp startapp ;go to program
;serial port 1
;plugged into computer for data testing
RX1 equ P3.0
TX1 equ P3.1
;serial port 2
RX2 equ P3.3
TX2 equ P1.2
startapp:
; Trying to output 'ABCD' or 'abcd' but all I get is 'DDDD'
mov SBUFT,#0h ;Set character for UART 1
clr SWTI ;clear complete flag
jnb SWTI,$ ;stall until character transmitted
mov SBUFT,#'A' ;repeat...
clr SWTI
jnb SWTI,$
mov SBUFT,#'B'
clr SWTI
jnb SWTI,$
mov SBUFT,#'C'
clr SWTI
jnb SWTI,$
mov SBUFT,#'D'
clr SWTI
jnb SWTI,$
mov SBUFT2,#0h ;Set character for UART 2
clr SWTI2 ;clear complete flag
jnb SWTI2,$ ;stall until character transmitted
mov SBUFT2,#'a' ;repeat...
clr SWTI2
jnb SWTI2,$
mov SBUFT2,#'b'
clr SWTI2
jnb SWTI2,$
mov SBUFT2,#'c'
clr SWTI2
jnb SWTI2,$
mov SBUFT2,#'d'
clr SWTI2
jnb SWTI2,$
sjmp $
Using a disassembler i found a instruction as MOV [LOCAL.9] ,0.
What does LOCAL represents ?
What will this instruction do ?
I want to decoding a blx instruction on arm, and I have found a good answer here:
Decoding BLX instruction on ARM/Thumb (IOS)
But in my case, I follow this tip step by step, and get the wrong result, can anyone tell me why?
This is my test:
.plt: 000083F0 sub_83F0 ...
...
.text:00008436 FF F7 DC EF BLX sub_83F0
I parse the machine code 'FF F7 DC EF' by follow:
F7 FF EF DC
11110 1 1111111111 11 1 0 1 1111101110 0
S imm10H J1 J2 imm10L
I1 = NOT(J1 EOR S) = 1
I2 = NOT(J2 EOR S) = 1
imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
= SignExtend(1111111111111111110111000)
= SignExtend(0x1FFFFB8)
= ?
So the offset is 0xFFB8?
But 0x83F0-0X8436-4=0xFFB6
I need your help!!!
When the target of a BLX is 32-bit ARM code, the immediate value encoded in the BLX instruction is added to align(PC,4), not the raw value of PC.
PC during execution of the BLX instruction is 0x8436 + 4 == 0x843a due to the ARM pipeline
align(0x843a, 4) == 0x8438
So:
0x00008438 + 0ffffffb8 == 0x83f0
The ARM ARM mentions this in the assembler syntax for the <label> part of the instruction:
For BLX (encodings T2, A2), the assembler calculates the required value of the offset from the Align(PC,4) value of the BLX instruction to this label, then selects an encoding that sets imm32 to that offset.
The alignment requirement can also be found by careful reading of the Operation pseudocode in the ARM ARM:
if ConditionPassed() then
EncodingSpecificOperations();
if CurrentInstrSet == InstrSet_ARM then
next_instr_addr = PC - 4;
LR = next_instr_addr;
else
next_instr_addr = PC;
LR = next_instr_addr<31:1> : ‘1’;
if toARM then
SelectInstrSet(InstrSet_ARM);
BranchWritePC(Align(PC,4) + imm32); // <--- alignment of the current PC when BLX to non-Thumb ARM code
else
SelectInstrSet(InstrSet_Thumb);
BranchWritePC(PC + imm32);
F7FF
1111011111111111
111 10 11111111111 h = 10 offset upper = 11111111111
EFDC
1110111111011100
111 01 11111011100 h = 01 blx offset upper 11111011100
offset = 1111111111111111011100<<1
sign extended = 0xFFFFFFB8
0x00008436 + 2 + 0xFFFFFFB8 = 1000083F0
clip to 32 bits 0x000083F0
I'm interested in finding the fastest way (lowest cycle count) of comparing the values stored into NEON registers (say Q0 and Q3) on a Cortex-A9 core (VFP instructions allowed).
So far I have the following:
(1) Using the VFP floating point comparison:
vcmp.f64 d0, d6
vmrs APSR_nzcv, fpscr
vcmpeq.f64 d1, d7
vmrseq APSR_nzcv, fpscr
If the 64bit "floats" are equivalent to NaN, this version will not work.
(2) Using the NEON narrowing and the VFP comparison (this time only once and in a NaN-safe manner):
vceq.i32 q15, q0, q3
vmovn.i32 d31, q15
vshl.s16 d31, d31, #8
vcmp.f64 d31, d29
vmrs APSR_nzcv, fpscr
The D29 register is previously preloaded with the right 16bit pattern:
vmov.i16 d29, #65280 ; 0xff00
My question is: is there any better than this? Am I overseeing some obvious way to do it?
I believe you can reduce it by one instruction. By using the shift left and insert (VLSI), you can combine the 4 32-bit values of Q15 into 4 16-bit values in D31. You can then compare with 0 and get the floating point flags.
vceq.i32 q15, q0, q3
vlsi.32 d31, d30, #16
vcmp.f64 d31, #0
vmrs APSR_nzcv, fpscr