Difference between revisions of "CSC231 Homework 10 Solution 2010"
Line 2: | Line 2: | ||
---- | ---- | ||
<onlysmith> | <onlysmith> | ||
+ | |||
+ | =Version 1= | ||
<code><pre> | <code><pre> | ||
Line 7: | Line 9: | ||
; D. Thiebaut | ; D. Thiebaut | ||
; | ; | ||
− | ; this program counts the number of primes number less than | + | ; this program counts the number of primes number less than 2,000,000. |
+ | ; | ||
+ | ; It is "improved" by using various compiler optimizations to reduce the number | ||
+ | ; of instructions used during processing: | ||
+ | ; * use registers instead of variables | ||
+ | ; * reorder instructions in order to avoid jumps | ||
+ | ; * use in-line functions instead of real functions and save on stack use | ||
; | ; | ||
; to assemble and run: | ; to assemble and run: | ||
Line 120: | Line 128: | ||
</pre></code> | </pre></code> | ||
+ | =Version 2= | ||
+ | |||
+ | * Version 2 is by Tiffany, and uses loop-unrolling to improve performance | ||
+ | |||
+ | <code><pre> | ||
+ | ;;; ; File: hw10.asm | ||
+ | ;;; ; Author: Tiffany Q. Liu | ||
+ | ;;; ; Acct: 231a-ac | ||
+ | ;;; ; Date: December 14, 2010 | ||
+ | ;;; ; Desc: Modified version of findPrime.asm using compiler optimization | ||
+ | ;;; ; techniques. The following optimization techniques were implemented: | ||
+ | ;;; ; 1) uses ecx to store n and ebx to store i instead of variables, | ||
+ | ;;; ; 2) uses function inlining instead of calling function isPrime, | ||
+ | ;;; ; 3) uses loop unrolling on forn loop - everytime an iteration of | ||
+ | ;;; ; for ( int n = 2; n < N; n++) is ran, 6 values of n gets tested | ||
+ | ;;; ; instead of just one. | ||
+ | ;;; ; Note: Since each unrolling of the forn loop is just a copy of the | ||
+ | ;;; ; loop body repeated several times, I only commented the first | ||
+ | ;;; ; copy of the loop body. | ||
+ | ;;; ; | ||
+ | ;;; ; to assemble and run: | ||
+ | ;;; ; | ||
+ | ;;; ; nasm -f elf -F stabs asm_io.asm | ||
+ | ;;; ; nasm -f elf -F stabs hw10.asm | ||
+ | ;;; ; gcc -c driver.c | ||
+ | ;;; ; gcc asm_io.o hw10.o driver.o -o hw10 | ||
+ | ;;; ; ./hw10 | ||
+ | ;;; ; | ||
+ | ;;; ; bash | ||
+ | ;;; ; for i in 1 2 3 4 ; do time findPrime; echo ; done | ||
+ | ;;; ; | ||
+ | ;;; ; Execution times (ran at 3pm Friday): | ||
+ | ;;; ; 148933 | ||
+ | ;;; ; | ||
+ | ;;; ; real 0m3.890s | ||
+ | ;;; ; user 0m3.886s | ||
+ | ;;; ; sys 0m0.003s | ||
+ | ;;; ; | ||
+ | ;;; ; 148933 | ||
+ | ;;; ; | ||
+ | ;;; ; real 0m3.894s | ||
+ | ;;; ; user 0m3.887s | ||
+ | ;;; ; sys 0m0.001s | ||
+ | ;;; ; | ||
+ | ;;; ; 148933 | ||
+ | ;;; ; | ||
+ | ;;; ; real 0m3.890s | ||
+ | ;;; ; user 0m3.887s | ||
+ | ;;; ; sys 0m0.002s | ||
+ | ;;; ; | ||
+ | ;;; ; 148933 | ||
+ | ;;; ; | ||
+ | ;;; ; real 0m3.894s | ||
+ | ;;; ; user 0m3.887s | ||
+ | ;;; ; sys 0m0.003s | ||
+ | ;;; ; ------------------------------------------------------------------- | ||
+ | |||
+ | %include "asm_io.inc" | ||
+ | |||
+ | ;;; ------------------------------------------------------------ | ||
+ | ;;; data areas | ||
+ | ;;; ------------------------------------------------------------ | ||
+ | |||
+ | section .data | ||
+ | N dd 0 ; typically set to 2,000,000 | ||
+ | count dd 0 ; number of primes found | ||
+ | n dd 0 ; variable tested for prime property | ||
+ | i dd 0 ; index of for loop | ||
+ | |||
+ | ;;; ------------------------------------------------------------ | ||
+ | ;;; code area | ||
+ | ;;; ------------------------------------------------------------ | ||
+ | |||
+ | section .text | ||
+ | global asm_main | ||
+ | |||
+ | asm_main: | ||
+ | mov dword[N], 2000000 ; initialize N to 2,000,000 | ||
+ | mov dword[count], 0 ; initialize count to 0 | ||
+ | mov ecx, 2 ; store n in ecx, initialized to 2 | ||
+ | |||
+ | ;;; for ( int n = 2; n < N; n++ ) | ||
+ | ;;; if ( isPrime( n ) ) | ||
+ | ;;; //cout << n << end1; | ||
+ | ;;; count++; | ||
+ | forn: | ||
+ | cmp ecx, dword[N] ; compare n to N | ||
+ | jl over ; continue to loop if n < N | ||
+ | jmp endForn | ||
+ | over: | ||
+ | ;;; ----------------------------------- first isPrime fcn inlining of forn | ||
+ | ;;; bool isPrime( int n ) { | ||
+ | ;;; for ( int i = 2; i*i <= n; i++ ) | ||
+ | ;;; if ( n % i == 0 ) | ||
+ | ;;; return 0; | ||
+ | ;;; return 1; | ||
+ | ;;; } | ||
+ | if1: | ||
+ | mov ebx, 2 ; store i in ebx, initialized to 2 | ||
+ | .for1: | ||
+ | mov eax, ebx ; compare n to i * i | ||
+ | mul ebx | ||
+ | cmp eax, ecx | ||
+ | ja .endFor1 ; continue to loop if n <= i * i | ||
+ | |||
+ | .if1: | ||
+ | xor edx, edx ; clear out edx register | ||
+ | mov eax, ecx ; prepare to test for prime, eax <- n | ||
+ | div ebx ; eax <- n / i, edx <- n % i | ||
+ | cmp edx, 0 ; n % i == 0 | ||
+ | jne .end_if1 ; if n % i == 0 : test next value of i | ||
+ | |||
+ | .notPrime1: ; else: | ||
+ | mov dl, 0 ; n is not prime, returning 0 | ||
+ | jmp .ret1 | ||
+ | |||
+ | .end_if1: | ||
+ | inc ebx ; i = i + 1 | ||
+ | jmp .for1 ; continue for1 | ||
+ | |||
+ | .endFor1: ; all values of i tested | ||
+ | mov dl, 1 ; n is prime, returning 1 | ||
+ | .ret1: | ||
+ | cmp dl, 1 ; n is prime? | ||
+ | jne endif1 ; if n is not prime, don't incr. count | ||
+ | inc dword[count] ; otherwise, incr. count by 1 | ||
+ | endif1: | ||
+ | inc ecx ; n = n + 1 | ||
+ | ;;; ----------------------------------- second isPrime fcn inlining of forn | ||
+ | if2: | ||
+ | mov ebx, 2 | ||
+ | .for2: | ||
+ | mov eax, ebx | ||
+ | mul ebx | ||
+ | cmp eax, ecx | ||
+ | ja .endFor2 | ||
+ | |||
+ | .if2: | ||
+ | xor edx, edx | ||
+ | mov eax, ecx | ||
+ | div ebx | ||
+ | cmp edx, 0 | ||
+ | jne .end_if2 | ||
+ | |||
+ | .notPrime2: | ||
+ | mov dl, 0 | ||
+ | jmp .ret2 | ||
+ | |||
+ | .end_if2: | ||
+ | inc ebx | ||
+ | jmp .for2 | ||
+ | |||
+ | .endFor2: | ||
+ | mov dl, 1 | ||
+ | .ret2: | ||
+ | cmp dl, 1 | ||
+ | jne endif2 | ||
+ | inc dword[count] | ||
+ | endif2: | ||
+ | inc ecx | ||
+ | ;;; ---------------------------------- third isPrime fcn inlining of forn | ||
+ | if3: | ||
+ | mov ebx, 2 | ||
+ | .for3: | ||
+ | mov eax, ebx | ||
+ | mul ebx | ||
+ | cmp eax, ecx | ||
+ | ja .endFor3 | ||
+ | |||
+ | .if3: | ||
+ | xor edx, edx | ||
+ | mov eax, ecx | ||
+ | div ebx | ||
+ | cmp edx, 0 | ||
+ | jne .end_if3 | ||
+ | |||
+ | .notPrime3: | ||
+ | mov dl, 0 | ||
+ | jmp .ret3 | ||
+ | |||
+ | .end_if3: | ||
+ | inc ebx | ||
+ | jmp .for3 | ||
+ | |||
+ | .endFor3: | ||
+ | mov dl, 1 | ||
+ | .ret3: | ||
+ | cmp dl, 1 | ||
+ | jne endif3 | ||
+ | inc dword[count] | ||
+ | endif3: | ||
+ | inc ecx | ||
+ | ;;; ---------------------------------- fourth isPrime fcn inlining of forn | ||
+ | if4: | ||
+ | mov ebx, 2 | ||
+ | .for4: | ||
+ | mov eax, ebx | ||
+ | mul ebx | ||
+ | cmp eax, ecx | ||
+ | ja .endFor4 | ||
+ | |||
+ | .if4: | ||
+ | xor edx, edx | ||
+ | mov eax, ecx | ||
+ | div ebx | ||
+ | cmp edx, 0 | ||
+ | jne .end_if4 | ||
+ | |||
+ | .notPrime4: | ||
+ | mov dl, 0 | ||
+ | jmp .ret4 | ||
+ | |||
+ | .end_if4: | ||
+ | inc ebx | ||
+ | jmp .for4 | ||
+ | |||
+ | .endFor4: | ||
+ | mov dl, 1 | ||
+ | .ret4: | ||
+ | cmp dl, 1 | ||
+ | jne endif4 | ||
+ | inc dword[count] | ||
+ | endif4: | ||
+ | inc ecx | ||
+ | ;;; ---------------------------------- fifth isPrime fcn inlining of forn | ||
+ | if5: | ||
+ | mov ebx, 2 | ||
+ | .for5: | ||
+ | mov eax, ebx | ||
+ | mul ebx | ||
+ | cmp eax, ecx | ||
+ | ja .endFor5 | ||
+ | |||
+ | .if5: | ||
+ | xor edx, edx | ||
+ | mov eax, ecx | ||
+ | div ebx | ||
+ | cmp edx, 0 | ||
+ | jne .end_if5 | ||
+ | |||
+ | .notPrime5: | ||
+ | mov dl, 0 | ||
+ | jmp .ret5 | ||
+ | |||
+ | .end_if5: | ||
+ | inc ebx | ||
+ | jmp .for5 | ||
+ | |||
+ | .endFor5: | ||
+ | mov dl, 1 | ||
+ | .ret5: | ||
+ | cmp dl, 1 | ||
+ | jne endif5 | ||
+ | inc dword[count] | ||
+ | endif5: | ||
+ | inc ecx | ||
+ | ;;; ---------------------------------- sixth isPrime fcn inlining of forn | ||
+ | if6: | ||
+ | mov ebx, 2 | ||
+ | .for6: | ||
+ | mov eax, ebx | ||
+ | mul ebx | ||
+ | cmp eax, ecx | ||
+ | ja .endFor6 | ||
+ | |||
+ | .if6: | ||
+ | xor edx, edx | ||
+ | mov eax, ecx | ||
+ | div ebx | ||
+ | cmp edx, 0 | ||
+ | jne .end_if6 | ||
+ | |||
+ | .notPrime6: | ||
+ | mov dl, 0 | ||
+ | jmp .ret6 | ||
+ | .end_if6: | ||
+ | inc ebx | ||
+ | jmp .for6 | ||
+ | |||
+ | .endFor6: | ||
+ | mov dl, 1 | ||
+ | .ret6: | ||
+ | cmp dl, 1 | ||
+ | jne endif6 | ||
+ | inc dword[count] | ||
+ | endif6: | ||
+ | inc ecx | ||
+ | ;;; ------------------------------ | ||
+ | jmp forn ; continue to next iteration of forn | ||
+ | endForn: ; when forn runs to completion: | ||
+ | mov eax, dword[count] ; eax <- #prime num in [2, 2,000,000) | ||
+ | call print_int ; print #prime num in eax use asm_io fcn | ||
+ | call print_nl ; print new line use asm_io fcn | ||
+ | |||
+ | ret ; return to C program | ||
+ | |||
+ | </pre></code> | ||
</onlysmith> | </onlysmith> | ||
<br /> | <br /> |
Revision as of 10:53, 15 December 2010
--D. Thiebaut 05:29, 8 December 2010 (UTC)