Notes on Assembling 32-bit Code on 64-bit Machines
--D. Thiebaut (talk) 11:12, 12 June 2014 (EDT)
This page contains various pieces of information gathered around the Web to allow compilation/assembly of assembly and C/C++ code in 32-bit mode on 64-bit machines.
Contents
Software Installation
- on AMD-64bit machine, was getting error about files not included in distribution:
gcc -m32 -o hello hello.c In file included from /usr/include/features.h:398:0, from /usr/include/stdio.h:27, from hello.c:1: /usr/include/gnu/stubs.h:7:27: fatal error: gnu/stubs-32.h: No such file or directory # include <gnu/stubs-32.h> ^ compilation terminated.
- Solution is to install package libc6-dev-i386
sudo apt-get install libc6-dev-i386
- Also created links in /usr/include:
bits -> x86_64-linux-gnu/bits sys -> x86_64-linux-gnu/sys gnu -> x86_64-linux-gnu/gnu
- Or we can link a bunch of files
sudo mkdir /usr/include/gnu
sudo mkdir /usr/include/sys
sudo mkdir /usr/include/bits
sudo ln -s /usr/include/x86_64-linux-gnu/sys/cdefs.h /usr/include/sys/cdefs.h
sudo ln -s /usr/include/x86_64-linux-gnu/sys/types.h /usr/include/sys/types.h
sudo ln -s /usr/include/x86_64-linux-gnu/sys/select.h /usr/include/sys/select.h
sudo ln -s /usr/include/x86_64-linux-gnu/gnu/stubs.h /usr/include/gnu/stubs.h
sudo ln -s /usr/include/x86_64-linux-gnu/gnu/stubs-64.h /usr/include/gnu/stubs-64.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/wordsize.h /usr/include/bits/wordsize.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/types.h /usr/include/bits/types.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/typesizes.h /usr/include/bits/typesizes.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/stdio_lim.h /usr/include/bits/stdio_lim.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/sys_errlist.h /usr/include/bits/sys_errlist.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/huge_val.h /usr/include/bits/huge_val.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/huge_valf.h /usr/include/bits/huge_valf.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/huge_vall.h /usr/include/bits/huge_vall.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/inf.h /usr/include/bits/inf.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/nan.h /usr/include/bits/nan.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/mathdef.h /usr/include/bits/mathdef.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/mathcalls.h /usr/include/bits/mathcalls.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/posix1_lim.h /usr/include/bits/posix1_lim.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/posix2_lim.h /usr/include/bits/posix2_lim.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/xopen_lim.h /usr/include/bits/xopen_lim.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/wchar.h /usr/include/bits/wchar.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/time.h /usr/include/bits/time.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/endian.h /usr/include/bits/endian.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/byteswap.h /usr/include/bits/byteswap.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/locale.h /usr/include/bits/locale.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/waitflags.h /usr/include/bits/waitflags.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/waitstatus.h /usr/include/bits/waitstatus.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/local_lim.h /usr/include/bits/local_lim.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/pthreadtypes.h /usr/include/bits/pthreadtypes.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/timex.h /usr/include/bits/timex.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/select.h /usr/include/bits/select.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/sigset.h /usr/include/bits/sigset.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/sched.h /usr/include/bits/sched.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/setjmp.h /usr/include/bits/setjmp.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/posix_opt.h /usr/include/bits/posix_opt.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/environments.h /usr/include/bits/environments.h
sudo ln -s /usr/include/x86_64-linux-gnu/bits/confname.h /usr/include/bits/confname.h
Example 1: Hello World in C in 32-bit Mode on 64-bit Arch
/* hello.c compile & run as follows: gcc -m32 -o hello hello.c && ./hello */ #include <stdio.h> int main( int argc, char **argv ) { printf( "hello world!\n\n" ); }
Example 2: 32-bit Assembly Program Outputs Array of Stars
; hw2.asm
; Prints an array of 6 lines of stars
; Assemble, link, and run as follows:
;
; nasm -f elf hw2.asm && ld -melf_i386 -o hw2 hw2.o && ./hw2
;
section .data
stars db "*********************", 10
db "*********************", 10
db "*********************", 10
db "*********************", 10
db "*********************", 10
db "*********************", 10
section .text
global _start
_start:
;;; print stars
mov eax, 4
mov ebx, 1
mov ecx, stars
mov edx, 132
int 0x80
;;; ; exit
mov ebx, 0
mov eax, 1
int 0x80
Example 3: 32-bit Assembly Prints using asm_io Library
driver.c
#include <stdio.h>
extern int asm_main( void );
int main() {
asm_main();
}
factorial.asm
;;; factorial.asm
;;; D. Thiebaut
;;; Computes fibonacci[i] until overflow (number becomes negative)
;;; Requires driver.c, asm_io.o, and asm_io.inc
;;;
;;; Compile and link as follows:
;;;
;;; nasm -f elf -F stabs factorial.asm
;;; nasm -f elf -F stabs asm_io.asm
;;; gcc -o factorial driver.c asm_io.o factorial.o
;;; ./factorial
;;;
;;; fib(2) = 2
;;; fib(3) = 3
;;; fib(4) = 5
;;; fib(5) = 8
;;; fib(6) = 13
;;; fib(7) = 21
;;; fib(8) = 34
;;; fib(9) = 55
;;; fib(10) = 89
%include "asm_io.inc"
%assign EXIT 1
;; -------------------------
;; data segment
;; -------------------------
section .data
msg1 db "fib(",0
msg2 db ") = ",0
section .bss
N equ 100
fib resb N ; reserve 100 bytes
;; -------------------------
;; code area
;; -------------------------
section .text
global asm_main
;;; ---------------------------------------------------------
;;; main program
;;; ---------------------------------------------------------
asm_main:
;;; fib[0] = 1;
;;; fib[1] = 1;
;;; for ( i=2; i<100; i++ ) {
;;; temp = fib[i-1]+fib[i-2];
;;; if ( temp <= 0 ) break;
;;; fib[i] = temp;
;;; }
mov byte[fib], 1
mov byte[fib+1], 1
mov esi, 2 ; esi is i
for: cmp esi, N
jge done
mov al, byte[fib+esi-1]
add al, byte[fib+esi-2]
cmp al, 0
jle done
mov byte[fib+esi], al
call printFibi
inc esi
jmp for
done: ret
;;; print "fact( %d ) = %d" % (i, fact(i) )
printFibi:
and eax, 0x000000ff ; keep al, clear the rest
push eax ; save fib(i) in stack
push esi ; save i in stack
mov eax, msg1
call print_string ; prints "fib( "
pop eax ; get esi in eax
call print_int ; print i
mov eax, msg2 ; print ") = "
call print_string
pop eax ; get fib(i) back from stack
call print_int ; print fact(i)
call print_nl ; next line
ret
asm_io.asm
;
; file: asm_io.asm
; Assembly I/O routines
; To assemble for DJGPP
; nasm -f coff -d COFF_TYPE asm_io.asm
; To assemble for Borland C++ 5.x
; nasm -f obj -d OBJ_TYPE asm_io.asm
; To assemble for Microsoft Visual Studio
; nasm -f win32 -d COFF_TYPE asm_io.asm
; To assemble for Linux
; nasm -f elf -d ELF_TYPE asm_io.asm
%define NL 10
%define CF_MASK 00000001h
%define PF_MASK 00000004h
%define AF_MASK 00000010h
%define ZF_MASK 00000040h
%define SF_MASK 00000080h
%define DF_MASK 00000400h
%define OF_MASK 00000800h
;
; Linux C doesn't put underscores on labels
;
;;; %ifdef ELF_TYPE
%define _scanf scanf
%define _printf printf
%define _getchar getchar
%define _putchar putchar
%define _fputs fputs
;;; %endif
%ifdef OBJ_TYPE
segment .data public align=4 class=data use32
%else
segment .data
%endif
int_format db "%d", 0
string_format db "%s", 0
reg_format db "Register Dump # %d", NL
db "EAX = %.8X EBX = %.8X ECX = %.8X EDX = %.8X", NL
db "ESI = %.8X EDI = %.8X EBP = %.8X ESP = %.8X", NL
db "EIP = %.8X FLAGS = %.4X %s %s %s %s %s %s %s", NL
db 0
carry_flag db "CF", 0
zero_flag db "ZF", 0
sign_flag db "SF", 0
parity_flag db "PF", 0
overflow_flag db "OF", 0
dir_flag db "DF", 0
aux_carry_flag db "AF", 0
unset_flag db " ", 0
mem_format1 db "Memory Dump # %d Address = %.8X", NL, 0
mem_format2 db "%.8X ", 0
mem_format3 db "%.2X ", 0
stack_format db "Stack Dump # %d", NL
db "EBP = %.8X ESP = %.8X", NL, 0
stack_line_format db "%+4d %.8X %.8X", NL, 0
math_format1 db "Math Coprocessor Dump # %d Control Word = %.4X"
db " Status Word = %.4X", NL, 0
valid_st_format db "ST%d: %.10g", NL, 0
invalid_st_format db "ST%d: Invalid ST", NL, 0
empty_st_format db "ST%d: Empty", NL, 0
;
; code is put in the _TEXT segment
;
%ifdef OBJ_TYPE
segment text public align=1 class=code use32
%else
segment .text
%endif
global read_int, print_int, print_string, read_char, print_hex
global print_char, print_nl, sub_dump_regs, sub_dump_mem
global sub_dump_math, sub_dump_stack
extern _scanf, _printf, _getchar, _putchar, _fputs
read_int:
enter 4,0
pusha
pushf
lea eax, [ebp-4]
push eax
push dword int_format
call _scanf
pop ecx
pop ecx
popf
popa
mov eax, [ebp-4]
leave
ret
print_int:
enter 0,0
pusha
pushf
push eax
push dword int_format
call _printf
pop ecx
pop ecx
popf
popa
leave
ret
print_hex:
enter 0,0
pusha
pushf
push eax
push dword mem_format2
call _printf
pop ecx
pop ecx
popf
popa
leave
ret
print_string:
enter 0,0
pusha
pushf
push eax
push dword string_format
call _printf
pop ecx
pop ecx
popf
popa
leave
ret
read_char:
enter 4,0
pusha
pushf
call _getchar
mov [ebp-4], eax
popf
popa
mov eax, [ebp-4]
leave
ret
print_char:
enter 0,0
pusha
pushf
push eax
call _putchar
pop ecx
popf
popa
leave
ret
print_nl:
enter 0,0
pusha
pushf
push dword 10 ; 10 == ASCII code for \n
call _putchar
pop ecx
popf
popa
leave
ret
sub_dump_regs:
enter 4,0
pusha
pushf
mov eax, [esp] ; read FLAGS back off stack
mov [ebp-4], eax ; save flags
;
; show which FLAGS are set
;
test eax, CF_MASK
jz cf_off
mov eax, carry_flag
jmp short push_cf
cf_off:
mov eax, unset_flag
push_cf:
push eax
test dword [ebp-4], PF_MASK
jz pf_off
mov eax, parity_flag
jmp short push_pf
pf_off:
mov eax, unset_flag
push_pf:
push eax
test dword [ebp-4], AF_MASK
jz af_off
mov eax, aux_carry_flag
jmp short push_af
af_off:
mov eax, unset_flag
push_af:
push eax
test dword [ebp-4], ZF_MASK
jz zf_off
mov eax, zero_flag
jmp short push_zf
zf_off:
mov eax, unset_flag
push_zf:
push eax
test dword [ebp-4], SF_MASK
jz sf_off
mov eax, sign_flag
jmp short push_sf
sf_off:
mov eax, unset_flag
push_sf:
push eax
test dword [ebp-4], DF_MASK
jz df_off
mov eax, dir_flag
jmp short push_df
df_off:
mov eax, unset_flag
push_df:
push eax
test dword [ebp-4], OF_MASK
jz of_off
mov eax, overflow_flag
jmp short push_of
of_off:
mov eax, unset_flag
push_of:
push eax
push dword [ebp-4] ; FLAGS
mov eax, [ebp+4]
sub eax, 10 ; EIP on stack is 10 bytes ahead of orig
push eax ; EIP
lea eax, [ebp+12]
push eax ; original ESP
push dword [ebp] ; original EBP
push edi
push esi
push edx
push ecx
push ebx
push dword [ebp-8] ; original EAX
push dword [ebp+8] ; # of dump
push dword reg_format
call _printf
add esp, 76
popf
popa
leave
ret 4
sub_dump_stack:
enter 0,0
pusha
pushf
lea eax, [ebp+20]
push eax ; original ESP
push dword [ebp] ; original EBP
push dword [ebp+8] ; # of dump
push dword stack_format
call _printf
add esp, 16
mov ebx, [ebp] ; ebx = original ebp
mov eax, [ebp+16] ; eax = # dwords above ebp
shl eax, 2 ; eax *= 4
add ebx, eax ; ebx = & highest dword in stack to display
mov edx, [ebp+16]
mov ecx, edx
add ecx, [ebp+12]
inc ecx ; ecx = # of dwords to display
stack_line_loop:
push edx
push ecx ; save ecx & edx
push dword [ebx] ; value on stack
push ebx ; address of value on stack
mov eax, edx
sal eax, 2 ; eax = 4*edx
push eax ; offset from ebp
push dword stack_line_format
call _printf
add esp, 16
pop ecx
pop edx
sub ebx, 4
dec edx
loop stack_line_loop
popf
popa
leave
ret 12
sub_dump_mem:
enter 0,0
pusha
pushf
push dword [ebp+12]
push dword [ebp+16]
push dword mem_format1
call _printf
add esp, 12
mov esi, [ebp+12] ; address
and esi, 0FFFFFFF0h ; move to start of paragraph
mov ecx, [ebp+8]
inc ecx
mem_outer_loop:
push ecx
push esi
push dword mem_format2
call _printf
add esp, 8
xor ebx, ebx
mem_hex_loop:
xor eax, eax
mov al, [esi + ebx]
push eax
push dword mem_format3
call _printf
add esp, 8
inc ebx
cmp ebx, 16
jl mem_hex_loop
mov eax, '"'
call print_char
xor ebx, ebx
mem_char_loop:
xor eax, eax
mov al, [esi+ebx]
cmp al, 32
jl non_printable
cmp al, 126
jg non_printable
jmp short mem_char_loop_continue
non_printable:
mov eax, '?'
mem_char_loop_continue:
call print_char
inc ebx
cmp ebx, 16
jl mem_char_loop
mov eax, '"'
call print_char
call print_nl
add esi, 16
pop ecx
loop mem_outer_loop
popf
popa
leave
ret 12
; function sub_dump_math
; prints out state of math coprocessor without modifying the coprocessor
; or regular processor state
; Parameters:
; dump number - dword at [ebp+8]
; Local variables:
; ebp-108 start of fsave buffer
; ebp-116 temp double
; Notes: This procedure uses the Pascal convention.
; fsave buffer structure:
; ebp-108 control word
; ebp-104 status word
; ebp-100 tag word
; ebp-80 ST0
; ebp-70 ST1
; ebp-60 ST2 ...
; ebp-10 ST7
;
sub_dump_math:
enter 116,0
pusha
pushf
fsave [ebp-108] ; save coprocessor state to memory
mov eax, [ebp-104] ; status word
and eax, 0FFFFh
push eax
mov eax, [ebp-108] ; control word
and eax, 0FFFFh
push eax
push dword [ebp+8]
push dword math_format1
call _printf
add esp, 16
;
; rotate tag word so that tags in same order as numbers are
; in the stack
;
mov cx, [ebp-104] ; ax = status word
shr cx, 11
and cx, 7 ; cl = physical state of number on stack top
mov bx, [ebp-100] ; bx = tag word
shl cl,1 ; cl *= 2
ror bx, cl ; move top of stack tag to lowest bits
mov edi, 0 ; edi = stack number of number
lea esi, [ebp-80] ; esi = address of ST0
mov ecx, 8 ; ecx = loop counter
tag_loop:
push ecx
mov ax, 3
and ax, bx ; ax = current tag
or ax, ax ; 00 -> valid number
je valid_st
cmp ax, 1 ; 01 -> zero
je zero_st
cmp ax, 2 ; 10 -> invalid number
je invalid_st
push edi ; 11 -> empty
push dword empty_st_format
call _printf
add esp, 8
jmp short cont_tag_loop
zero_st:
fldz
jmp short print_real
valid_st:
fld tword [esi]
print_real:
fstp qword [ebp-116]
push dword [ebp-112]
push dword [ebp-116]
push edi
push dword valid_st_format
call _printf
add esp, 16
jmp short cont_tag_loop
invalid_st:
push edi
push dword invalid_st_format
call _printf
add esp, 8
cont_tag_loop:
ror bx, 2 ; mov next tag into lowest bits
inc edi
add esi, 10 ; mov to next number on stack
pop ecx
loop tag_loop
frstor [ebp-108] ; restore coprocessor state
popf
popa
leave
ret 4
asm_io.inc
extern read_int, print_int, print_string, print_hex
extern read_char, print_char, print_nl
extern sub_dump_regs, sub_dump_mem, sub_dump_math, sub_dump_stack
%macro dump_regs 1
push dword %1
call sub_dump_regs
%endmacro
;
; usage: dump_mem label, start-address, # paragraphs
%macro dump_mem 3
push dword %1
push dword %2
push dword %3
call sub_dump_mem
%endmacro
%macro dump_math 1
push dword %1
call sub_dump_math
%endmacro
%macro dump_stack 3
push dword %3
push dword %2
push dword %1
call sub_dump_stack
%endmacro