How to write in nasm without writing null bytes? - file

Pretty simple problem.
This nasm is supposed to write a user-written message (i.e. hello) to a file, again determined by user input from an argument. It does this just fine, but the problem is, it writes all the null bytes not used afterwards as well. For example, if I reserve 32 bytes for user input, and the user only uses four for his input, those for bytes will be printed, along with 28 null bytes.
How do I stop printing null bytes?
Code used:
global _start
section .text
_start:
mov rax, 0 ; get input to write to file
mov rdi, 0
mov rsi, msg
mov rdx, 32
syscall
mov rax, 2 ; open the file at the third part of the stack
pop rdi
pop rdi
pop rdi
mov rsi, 1
syscall
mov rdi, rax
mov rax, 1 ; write message to file
mov rsi, msg
mov rdx, 32
syscall
mov rax, 3 ; close file
syscall
mov rax, 1 ; print success message
mov rdi, 1
mov rsi, output
mov rdx, outputL
syscall
mov rax, 60 ; exit
mov rdi, 0
syscall
section .bss
msg: resb 32
section .data
output: db 'Success!', 0x0A
outputL: equ $-output

Well, after doing some digging in header files and experimenting, I figured it out on my own.
Basically, the way it works is that you have to put the user's string through a byte counting process that counts along the string until it finds a null byte, and then stores that number of non-null bytes.
I'll post the workaround I'm using for anyone who's had the same problem as me. Keep in mind that this solution is for 64-bit nasm, NOT 32!
For 32-bit coders, change:
all instances of "rax" with "eax"
all instances of "rdi" with "ebx"
all instances of "rsi" with "ecx"
all instances of "rdx" with "edx"
all instances of "syscall" with "int 80h" (or equivelant)
all instances of "r8" with "edx" (you'll have to juggle this and rdx)
Here's the solution I use, in full:
global _start
; stack: (argc) ./a.out input filename
section .text
_start:
getInput:
mov rax, 0 ; syscall for reading user input
mov rdi, 0
mov rsi, msg ; store user input in the "msg" variable
mov rdx, 32 ; max input size = 32 bytes
xor r8, r8 ; set r8 to zero for counting purposes (this is for later)
getInputLength:
cmp byte [msg + r8], 0 ; compare ((a byte of user input) + 0) to 0
jz open ; if the difference is zero, we've found the end of the string
; so we move on. The length of the string is stored in r9.
inc r8 ; if not, onto the next byte...
jmp getInputLength ; so we jump back up four lines and repeat!
open:
mov rax, 2 ; syscall for opening files
pop rdi
pop rdi
pop rdi ; get the file to open from the stack (third argument)
mov rsi, 1 ; open in write mode
syscall
; the open syscall above has made us a full file descriptor in rax
mov rdi, rax ; so we move it into rdi for later
write:
mov rax, 1 ; syscall for writing to files
; rdi already holds our file descriptor
mov rsi, msg ; set the message we're writing to the msg variable
mov rdx, r8 ; set write length to the string length we measured earlier
syscall
close:
mov rax, 3 ; syscall for closing files
; our file descriptor is still in fd
syscall
exit:
mov rax, 60 ; syscall number for program exit
mov rdi, 0 ; return 0
Keep in mind that this is not a complete program. It totally lacks error handling, offers no user instruction, etc. It is only an illustration of method.

Related

Making/opening/writing files in nasm assembly mac x86_64, 64 bit not working

I am new to assembly and I wanted to get into writing files. I could not find anything for my conditions (64 bit on mac using nasm). Here is my code so far. Its not doing anything.
global start
; default rel
section .text
start:
mov rax, 0x2000005
mov rdi, 2
mov rsi, file
mov rdx, lenfile
syscall
mov rax, 0x2000001 ;Exiting
xor rdi, rdi
syscall
section .data
file db "test.txt"
lenfile equ $ - file
What can I change or add to make a file and write hello world on it?
EDIT 1
I know this will not work but this is what I have so far. I don't know how to write to the file. The file is not being created either.
global start
; default rel
section .text
start:
;open
mov rax, 0x2000005
mov rdi, 2
mov rsi, file
mov rdx, lenfile
syscall
;write
mov rax, 0x2000004
mov rdi, 1
mov rsi, str
mov rdx, strlen
syscall
;close
mov rax, 0x2000006
mov rdi, 3
mov rsi, file
; mov rdx, lenfile
syscall
mov rax, 0x2000001 ;Exiting
xor rdi, rdi
syscall
section .data
str: db "Hello world", 0
strlen: equ $ - str
file: db "test.txt"
lenfile: equ $ - file
Edit 2
I think that I have properly opened it but now I need to properly close it. How would I do that. Forget the writing part for now. Here is what I have so far.
global start
; default rel
section .text
start:
;open
mov rax, 0x2000005
mov rdi, file
mov rsi, 1
syscall
;close
mov rax, 0x2000006
mov rdi, file
mov rsi, 1
syscall
mov rax, 0x2000001 ;Exiting
xor rdi, rdi
syscall
section .data
str: db "Hello world", 0
strlen: equ $ - str
file: db "test.txt"
lenfile: equ $ - file
While I was writing edit 2, I looked at the c version on the closing part. It's just a simple function. fclose(file); How could I do that in assembly?

how to pass a variable by reference to a c function `sscanf()`

the following code tries to read the command line arguments and then scans them with sscanf() and use the result to emit utf8 text.
I'm failing to call sscanf() and getting segfault error at the line where I call this function
I have already debugged this and I know where is the problem but not how to solve it.
global main
extern puts
extern sscanf
extern printf
extern emit_utf_8
section .text
main:
cmp rdi, 2
jl argumentsError
add rsi, 8 ; skip the name of the program
forloop:
mov r12, rsi
push rdi
push rsi
push r12
sub rsp, 8 ; must align stack before call
; start of for bloc
xor rax, rax
mov rdi, qword [r12]
mov rsi, codePointFormat
mov rdx, qword [codePoint]
call sscanf
cmp rax, 1
je ifthen
jmp else
ifthen:
mov rdi, codePoint
call emit_utf_8
jmp endif
else:
mov rdi, incorrectFormat
mov rsi, r12
call printf
endif:
; end of for bloc
add rsp, 8 ; restore %rsp to pre-aligned value
pop r12
pop rsi
pop rdi
add rsi, 8 ; point to next argument
dec rdi ; count down
jnz forloop ; if not done counting keep going
ret
argumentsError: mov rdi, argumentsRequiredMessage
call puts
mov rdi, argumentDescription
call puts
xor rax, rax
inc rax
ret
section .data
argumentsRequiredMessage:
db "This program requires one or more command line arguments,", 0
argumentDescription: db "one for each code point to encode as UTF-8.", 0
incorrectFormat: db "(%s incorrect format)", 0
codePointFormat: db "U+%6X", 0
section .bss
codePoint: resb 8 ; The code point from sscanf should go here.
Is there a way to pass that third argument?
sscanf() signature.
in __cdecl sscanf(const char *const _buffer, const char *const _Format, ...)
I'm using ubuntu 19.04 64 bit

Assembler stuck in sys_read loop

I've written a piece of code that takes a number in ASCII characters from the prompt, converts it into a decimal number and stores it in 'dnumber'. The conversion has been checked and goes well. It goes wrong at the prompt. It seems to be stuck in an infinite loop while asking the user for the ASCII character number. I want the program stop asking for input when the user presses ENTER, but that termination value never seems to be reached even though I think I've set it that way.
I've asked two related questions on this forum lately and it showed that I don't understand system calls properly. I've read all the documentation on 'http://www.tutorialspoint.com/assembly_programming', 'http://cs.lmu.edu/~ray/notes/nasmtutorial/' and some of 'http://www.x86-64.org/documentation/abi.pdf' and apparently I'm still not getting it. Hopefully you can show me the light.
Here is the compiler info:
nasm -f elf64 convinput.asm
ld -s -o convinput convinput.o
Here is the prompt:
$ ./convinput
Enter a number and press enter:
123
123
123
As you can see I've pressed ENTER twice, but the prompt still asks for input.
Here is the code:
section .text
global _start
_start:
mov eax, 4
mov ebx, 1
mov edx, lenmsg1
mov ecx, msg1
int 80h
xor eax, eax
xor ebx, ebx
xor edx, edx
mov esi, data
call input
mov esi, data
movzx ecx, byte [dignum]
xor ebx,ebx ; clear ebx
call string_to_int
mov dword [dnumber], eax
mov eax, 1
mov ebx, 0
int 80h
input:
mov eax, 3
mov ebx, 0
mov ecx, esi
mov edx, 1
int 80h
inc byte [dignum]
cmp byte [esi], 13
inc esi
jne input
ret
string_to_int:
xor ebx,ebx
movzx eax, byte [esi]
inc esi
sub al,'0' ; convert from ASCII to number4
mov ebx, 10
mul ebx
add ebx,eax ; ebx = ebx*10 + eax
dec byte [dignum]
cmp byte [dignum], 0
jne string_to_int
mov eax,ebx
ret
section .bss
dignum resb 1
data resb 1000
dnumber resd 1
section .data
msg1 db 'Enter a number and press enter: ', 10, 0
lenmsg1 equ $ -msg1
; ESI = pointer to the string to convert
; ECX = number of digits in the string (must be > 0)
; Output:
; EAX = integer valu
Judging by values you put into eax you are using a 32-bit syscall table, which on linux is different than 64-bit one.
You can easily see what syscalls (and with what arguments) are called by running your program under strace.

Reading text file line by line NASM

I'm new in assembly programming and I have an assignment in which I have to read a text file line by line and use what is written in the file and pass it to another function. My problem is that I'm not sure of how to read the text this way because from what I have discovered for reading a text file first I have to create a buffer reserving certain quantity of bytes for storing what is in the file. Howerver in this case I want to read line by line (like a loop) until the end of file so I dont know how much bytes I have to reserve. Thanks.
Btw here is the code I'm trying to use:
SECTION .data
file_name db 'instruct.txt',0
SECTION .bss
fd_out resb 1
fd_in resb 1
info resb 20
SECTION .text
global main
main: ;tell linker entry point
push ebp
mov ebp, esp
push ebx
;open the file for reading
mov eax, 5
mov ebx, file_name
mov ecx, 2
mov edx, 0777 ;read, write and execute by all
int 0x80
mov [fd_in], eax
loop:
;read from file
mov eax, 3
mov ebx, [fd_in]
mov ecx, info
mov edx, 5
int 0x80
cmp eax, 0
;check EOF
je exit
; print the info
mov eax, 4
mov ebx, 1
mov ecx, info
mov edx, 5
int 0x80
;
jmp loop
mov eax,1 ;system call number (sys_exit)
int 0x80 ;call kernel
exit:
; close the file
mov eax, 6
mov ebx, [fd_in]
pop ebx
mov esp, ebp
pop ebp
ret
use mmap syscall, read entire file to memory and search for 0x0A sequence. This is the ASCII code for end of line. Perhaps usefull too to check for 0x0D (in case you are dealing with windows text files. There the sequence 0x0A,0x0D indicates an new line and thus an end of line.
mmap will try to allocate memory for you without the overhead of administration for you. Otherwise determine the file length and reserve memory with syscall sbrk. Works also but you have to program a bit more. My suggestion is that mmap is the best way.

Write to own executable in Linux C program, error "Text file busy"

For a security class I am supposed to write self-modifying code for a program which finds its own executable on the disk, reads the binary data, and encrypts part of it before writing it back out to the disk. This is supposed to be like a polymorphic virus which changes itself to fool antivirus scanners which detect known signatures.
I have all the pieces pretty much in place:
I am finding the executable using /proc/self/exe.
I am using a simple AES implementation to encrypt 16 byte string in some dummy code in the executable.
I am able to read the binary data in and locate the part I need to encrypt.
My problem is that the only way I have been able to open the executable is in read-only mode "rb". If I try to open the file for writing in mode "wb" or "r+b" I get back the error "Text file busy". Is there anyway for me to write to a process's own executable in C? Can I do this by changing the permissions somehow?
EDIT: What I am trying to accomplish is to have an executable which will encrypt part of itself each time that it runs so that it will have a new checksum after every time it runs.
After reading data from the executable binary, how can I either write back to it or remove it and replace it with a new file with the same filename?
You cannot write to a file that is currently mapped as an executable. However, you can write to a file that has the same path as the current executable, so long as it isn't actually the same file — try unlinking the file you're being executed from and creating a new file in its place, for instance.
In order to do self modification too, I wrote a small code in nasm (which can be used as a stub), which opens itself and at the middle of the code (right after the mmap), we have a pointer which points to the bytes of the executable that we can modify.
The code looks like this:
BITS 64
section .text
global _start
_start:
call _main__
mov rax, 60
mov rdi, 0x0
syscall ; exit(0);
_main__:
push rbp
mov rbp, rsp
sub rsp, 144 ; stat_file
mov rdi, [rbp+0x18]
lea rsi, [rsp]
call _open_self ; open self
push r12 ; len file
push rax ; addr
mov r14, rsi
mov rdi, [rbp+0x18] ; pathname
pop rsi ; addr
pop rdx ; len
push rdx
push rsi
call __create
mov r13, rax ; second fd
mov rdi, r14 ; fd
pop rsi ; addr -> mmap
pop rdx ; len_file
call __close_unmap
mov rax, 87
mov rdi, [rbp+0x18]
syscall
mov rax, 0x3 ; close(scnd_fd);
mov rdi, r13
syscall
mov rax, 86
push 'nasm'
lea rdi, [rsp]
mov rsi, [rbp+0x18]
syscall ; link tmp name to original name
mov rax, 87
lea rdi, [rsp]
syscall ; delete old tmp file
leave
ret
; ===============================
; Open himself
_open_self:
push rbp
mov rbp, rsp
mov r15, rsi ; &stat_file
mov r12, rdi ; *pathname
mov rax, 0x2
mov rsi, 0x0 ; 0_RD
mov rdx, 509
syscall
push rax ; fd
mov rdi, rax ; fd
mov rsi, r15 ; struct stat
mov rax, 5 ; fstat
syscall
xor rdi, rdi
mov rsi, qword [r15+48]
mov rdx, 0x4
mov r10, 0x2
pop r8
push r8
mov r9, 0x0
mov rax, 9
syscall ; mmap
; rax -> byte of the executable that we gonna dump
mov r12, qword [r15+48]
pop rsi ; fd
leave
ret
; ===============================
; int __create(const char *pathname, void *addr, ssize_t len_bytes_mapped);
__create:
push rbp
mov rbp, rsp
push rsi ; addr
push rcx ; len
push 'nasm'
lea rdi, [rsp]
mov rax, 0x2
mov rsi, 0x42 ; 0_CREAT | O_RDWR
mov rdx, 509
syscall ; sys_open
add rsp, 0x8 ; 'nasm'
mov r9, rax ; fd
mov rdi, rax ; fd
mov rax, 0x1
pop rdx
pop rsi
syscall ; sys_write
mov rax, r9 ; fd final
leave
ret
; int __close_unmap(int fd, unsigned lon addr, ssize_t len_file);
__close_unmap:
push rbp
mov rbp, rsp
push rdi
mov rdi, rsi
mov rsi, rdx
mov rax, 11
syscall ; munmap(addr, len_file)
pop rdi
mov rax, 3
syscall ; close(fd);
leave
ret
It is a bit long but it makes just :
-Open it self in read mode (O_RD == 0x0)
-Do a stat(*pathname, &buffer_struct_stat);
-And then a mmap(0, buffer_struct_stat.st_size, 0x4, MAP_PRIVATE, fd_read_only, 0);
-Here you can edit your executable by editing the bytes at the address returned by mmap
-Create a tmp file named "nasm"
-Do a write(fd_tmp, address_of_mmap, buffer_struct_stat.st_size)
-Close the two file descriptors and munmap the mmap
-Now it's cool : unlink(pathname) and link("nasm", "pathname")

Resources