rayzor

diff src/swapbuf.asm @ 10:235c8b764c0b

optimized swap_buffers
author John Tsiombikas <nuclear@member.fsf.org>
date Thu, 10 Apr 2014 08:03:52 +0300
parents
children 5380ff64e83f
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/swapbuf.asm	Thu Apr 10 08:03:52 2014 +0300
     1.3 @@ -0,0 +1,46 @@
     1.4 +; vim:set ft=nasm:
     1.5 +	segment code use32
     1.6 +
     1.7 +	; void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp)
     1.8 +	; dest	-> eax
     1.9 +	; src	-> edx
    1.10 +	; xsz	-> ebx
    1.11 +	; ysz	-> ecx
    1.12 +	; bpp	-> [ebp + 8] (after pushing ebp)
    1.13 +	global swap_buffers_asm_
    1.14 +swap_buffers_asm_:
    1.15 +	push ebp
    1.16 +	mov ebp, esp
    1.17 +
    1.18 +	mov edi, eax	; let's hold dest ptr in edi, frees up eax
    1.19 +	mov esi, edx	; let's hold src ptr in esi, frees up edx
    1.20 +	; calculate pixel count -> ecx, frees up ebx
    1.21 +	mov eax, ebx
    1.22 +	mul ecx
    1.23 +	mov ecx, eax	; now ecx = xsz * ysz	
    1.24 +	
    1.25 +	mov eax, [ebp + 8]	; eax <- bpp
    1.26 +	cmp eax, 32
    1.27 +	je .bpp32
    1.28 +	cmp eax, 24
    1.29 +	je .bpp24
    1.30 +	cmp eax, 16
    1.31 +	je .bpp16
    1.32 +	; invalid bpp, ignore
    1.33 +	jmp .done
    1.34 +
    1.35 +.bpp32:				; 32bit block transfer, no conversion
    1.36 +	rep movsd		; esi, edi, and ecx already loaded, just go...
    1.37 +	jmp .done
    1.38 +	
    1.39 +.bpp24:				; 32bpp -> 24bpp conversion (LSB-first), 1 byte overrun!
    1.40 +	movsd			; transfer a full 32bit chunk and inc esi,edi by 4
    1.41 +	dec edi			; backtrack dest one byte after last transfer
    1.42 +	dec ecx
    1.43 +	jnz .bpp24
    1.44 +	jmp .done
    1.45 +
    1.46 +.bpp16:				; fuck 16bpp for now (TODO)
    1.47 +.done:
    1.48 +	pop ebp
    1.49 +	ret