CSC231 Floating-Point Assembly Examples

From dftwiki3
Jump to: navigation, search

--D. Thiebaut 10:51, 12 December 2012 (EST)


Assembly Language Programs

Adding 2 Floats

Printing floats is not an easy to do in assembly, except if we use the standard C libraries to print them. The programs below use such an approach. The way it works is that we call the printf( ... ) function from within the program by first telling nasm that printf is a global function, and then using gcc instead of ld to generate the executable. To print a 64-bit float variable called temp in C we would write:

        printf( "z = %e\n", temp );

So when we want to print z from assembly, we pass the address of the string "temp = %e\n" in the stack, followed by 2 double words representing the value of temp. This is illustrated below in this example where we take two floats x and y equal to 1.5 and 2.5, respectively, and we add them together and store the result in z. Note here that we create two variables that contain the sum, one that is 32-bit in length, z, and one that is 64 bits in length (temp), which is what the printf() function needs.

; sumFloat.asm   use "C" printf on float
; 
; Assemble:	nasm -f elf sumFloat.asm
; Link:		gcc -m32 -o sumFloat sumFloat.o
; Run:		./sumFloat
; prints a single precision floating point number on the screen
; This program uses the external printf C  function which requires
; a format string defining what to print, and a variable number of
; variables to print.


%include "dumpRegs.asm"

        extern printf                   ; the C function to be called

        SECTION .data                   ; Data section

msg     db      "sum = %e",0x0a,0x00
x	dd	1.5
y	dd	2.5
z	dd	0
temp	dq	0
	
	
        SECTION .text                   ; Code section.

        global	main		        ; "C" main program 
main:				        ; label, start of main program
	
	fld	dword [x]	        ; need to convert 32-bit to 64-bit
	fld	dword [y]
	fadd
	fstp	dword [z]		; store sum in z

	mov	eax, [z]
        call    dumpRegs                ;just to see the binary version of z                                            

	fld	dword [z]     		; transform z in 64-bit word by pushing in stack
	fstp	qword [temp]            ; and popping it back as 64-bit quadword

		 
	push	dword [temp+4] 		; push temp as 2 32-bit words
	push	dword [temp]
        push    dword msg		; address of format string
        call    printf			; Call C function
        add     esp, 12			; pop stack 3*4 bytes

        mov     eax, 1			; exit code, 0=normal
	mov	ebx, 0
        int	0x80			;


Notes
  • The output of the program above is eax = 4080 0000 (which is indeed 4.0 in 32-bit IEEE format, and sum = 4.000000e+00.

Computing an Expression

The next program computes z = (x-1) * (y+3.5), where x is 1.5 and y is 2.5. This program also uses the external printf C function to display the value of z.


; 
; Assemble:	nasm -f elf float1.asm
; Link:		gcc -m32 -o float1 float1.o
; Run:		./float1
; Compute z = (x-1) * (y+3.5), where x is 1.5 and y is 2.5
; This program uses the external printf C  function which requires
; a format string defining what to print, and a variable number of
; variables to print.
%include "dumpRegs.asm"

        extern printf                   ; the C function to be called

        SECTION .data                   ; Data section

msg     db      "sum = %e",0x0a,0x00
x	dd	1.5
y	dd	2.5
z	dd	0
temp	dq	0
	
	
        SECTION .text                   ; Code section.

        global	main		        ; "C" main program 
main:				        ; label, start of main program

;;; compute x-1
	fld	dword [x]	        ; st0 <- x
	fld1				; st0 <- 1 st1 <- x
	fsub				; st0 <- x-1

;;; keep (x-1) in stack and compute y+3.5

	fld	dword [y]		; st0 <- y st1 <- x-1
	push	__float32__( 3.5 )	; put 32-bit float 3.5 in memory (actually in stack)
	fld	dword [esp]		; st0 <- 3.5 st1 <- y st2 <- x-1
	add	esp, 4			; undo push
	fadd				; st0 <- y+3.5 st1 <- x-1
	
	fadd				; st0 <- x-1 + y+3.5
	fst	dword [z]		; store sum in z


	fld	dword [z]     		; transform z in 64-bit word
	fstp	qword [temp]            ; store in 64-bit temp and pop stack top

		 
	push	dword [temp+4] 		; push temp as 2 32-bit words
	push	dword [temp]
        push    dword msg		; address of format string
        call    printf			; Call C function
        add     esp, 12			; pop stack 3*4 bytes

        mov     eax, 1			; exit code, 0=normal
	mov	ebx, 0
        int	0x80			;


Notes
  • The fld instruction cannot load an immediate value in the FPU. So we push the immediate value in the regular stack controlled by esp, and then from the stack into the FPU using fld.

Computing the Sum of an Array of Floats


; sumFloat4.asm   use "C" printf on float
; D. Thiebaut
; Assemble:	nasm -f elf sumFloat4.asm
; Link:		gcc -m32 -o sumFloat4 sumFloat4.o
; Run:		./sumFloat4
;
; Compute the sum of all the values in the array table.


        extern printf                   ; the C function to be called

        SECTION .data                   ; Data section
	
table		dd		 7.36464646465
		dd		 0.930984158273
		dd		 10.6047098049
		dd		 14.3058722306
		dd		 15.2983812149
		dd		 -17.4394255035
		dd		 -17.8120975978
		dd		 -12.4885670266
		dd		 3.74178604342
		dd		 16.3611827165
		dd		 -9.1182728262
		dd		 -11.4055038727
		dd		 4.68148165048
		dd		 -9.66095817322
		dd		 5.54394454154
		dd		 13.4203706426
		dd		 18.2194407176
		dd		 -7.878340987
		dd		 -6.60045833452
		dd		 -7.98961850398
N		equ		($-table)/4 	; number of items in table
	
;;; sum of all the numbers in table =  10.07955736
	
msg     db      "sum = %e",0x0a,0x00
temp	dq	0
sum	dd	0	
	
        SECTION .text                   ; Code section.

        global	main		        ; "C" main program 
main:				        ; label, start of main program

	mov	ecx, N
	mov	ebx, 0

	fldz				; st0 <- 0
for:	fld	dword [table + ebx*4]	; st0 <- new value, st1 <- sum of previous
	fadd				; st0 <- sum of new plus previous sum
	inc	ebx
	loop	for

;;; get sum back from FPU
	fstp	dword [sum]   		; put final sum in variable

;;; print resulting sum
	fld	dword [sum]    		; transform z in 64-bit word
	fstp	qword [temp]            ; store in 64-bit temp and pop stack top

		 
	push	dword [temp+4] 		; push temp as 2 32-bit words
	push	dword [temp]
        push    dword msg		; address of format string
        call    printf			; Call C function
        add     esp, 12			; pop stack 3*4 bytes

        mov     eax, 1			; exit code, 0=normal
	mov	ebx, 0
        int	0x80			;


Output
  sum = 1.007956e+01

Finding the Largest Element of an Array of Floats


; sumFloat5.asm   use "C" printf on float
; D. Thiebaut
; Assemble:	nasm -f elf sumFloat5.asm
; Link:		gcc -m32 -o sumFloat5 sumFloat5.o
; Run:		./sumFloat5
; Compute the max of an array of floats stored in table.
; This program uses the external printf C  function which requires
; a format string defining what to print, and a variable number of
; variables to print.


        extern printf                   ; the C function to be called

        SECTION .data                   ; Data section
	
max		dd		 0	
table		dd		 7.36464646465
		dd		 0.930984158273
		dd		 10.6047098049
		dd		 14.3058722306
		dd		 15.2983812149
		dd		 -17.4394255035
		dd		 -17.8120975978
		dd		 -12.4885670266
		dd		 3.74178604342
		dd		 16.3611827165
		dd		 -9.1182728262
		dd		 -11.4055038727
		dd		 4.68148165048
		dd		 -9.66095817322
		dd		 5.54394454154
		dd		 13.4203706426
		dd		 18.2194407176
		dd		 -7.878340987
		dd		 -6.60045833452
		dd		 -7.98961850398
N		equ		($-table)/4 	; number of items in table
	
;; max of all the numbers  = 1.821944e+01
	
msg     db      "max = %e",0x0a,0x00
temp	dq	0

	
        SECTION .text                   ; Code section.

        global	main		        ; "C" main program 
main:				        ; label, start of main program
	mov	eax, dword [table]
	mov	dword [max], eax
	
	mov	ecx, N-1
	mov	ebx, 1

	fld	dword [table]		; st0 <- table[0]
	
for:	fld	dword [table + ebx*4]	; st0 <- new value, st1 <- current max
	fcom				; compare st0 to st1
	fstsw	ax			; store fp status in ax
	and	ax, 100000000b		;
	jz	newMax
	jmp	continue
	
newMax:	fxch	st1
	
continue:
	fcomp				; pop st0 (don't care about compare)

	inc	ebx			; point to next fp number
	loop	for

;;; get sum back from FPU
	fstp	dword [max]   		; st0 is max.  Store it in mem

;;; print resulting sum
	fld	dword [max]    		; transform z in 64-bit word
	fstp	qword [temp]            ; store in 64-bit temp and pop stack top

		 
	push	dword [temp+4] 		; push temp as 2 32-bit words
	push	dword [temp]
        push    dword msg		; address of format string
        call    printf			; Call C function
        add     esp, 12			; pop stack 3*4 bytes

        mov     eax, 1			; exit code, 0=normal
	mov	ebx, 0
        int	0x80			;


Notes
  • The program gets the status register in the FPU, puts it in ax and then checks the 9th bit. This allows it to decide on the result of the comparison
  • The fcomp instruction is there just to pop st0. The result of that comparison is not used anywhere
  • There is another way to find the largest, using the double words as integers. Since the exponent of a larger float will be greater than the exponent of a smaller one (in absolute value), we don't really need the FPU to find the smallest or largest of the floating point array.
  • Output
   max = 1.821944e+01