rayzor

annotate src/swapbuf.asm @ 10:235c8b764c0b

optimized swap_buffers
author John Tsiombikas <nuclear@member.fsf.org>
date Thu, 10 Apr 2014 08:03:52 +0300
parents
children 5380ff64e83f
rev   line source
nuclear@10 1 ; vim:set ft=nasm:
nuclear@10 2 segment code use32
nuclear@10 3
nuclear@10 4 ; void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp)
nuclear@10 5 ; dest -> eax
nuclear@10 6 ; src -> edx
nuclear@10 7 ; xsz -> ebx
nuclear@10 8 ; ysz -> ecx
nuclear@10 9 ; bpp -> [ebp + 8] (after pushing ebp)
nuclear@10 10 global swap_buffers_asm_
nuclear@10 11 swap_buffers_asm_:
nuclear@10 12 push ebp
nuclear@10 13 mov ebp, esp
nuclear@10 14
nuclear@10 15 mov edi, eax ; let's hold dest ptr in edi, frees up eax
nuclear@10 16 mov esi, edx ; let's hold src ptr in esi, frees up edx
nuclear@10 17 ; calculate pixel count -> ecx, frees up ebx
nuclear@10 18 mov eax, ebx
nuclear@10 19 mul ecx
nuclear@10 20 mov ecx, eax ; now ecx = xsz * ysz
nuclear@10 21
nuclear@10 22 mov eax, [ebp + 8] ; eax <- bpp
nuclear@10 23 cmp eax, 32
nuclear@10 24 je .bpp32
nuclear@10 25 cmp eax, 24
nuclear@10 26 je .bpp24
nuclear@10 27 cmp eax, 16
nuclear@10 28 je .bpp16
nuclear@10 29 ; invalid bpp, ignore
nuclear@10 30 jmp .done
nuclear@10 31
nuclear@10 32 .bpp32: ; 32bit block transfer, no conversion
nuclear@10 33 rep movsd ; esi, edi, and ecx already loaded, just go...
nuclear@10 34 jmp .done
nuclear@10 35
nuclear@10 36 .bpp24: ; 32bpp -> 24bpp conversion (LSB-first), 1 byte overrun!
nuclear@10 37 movsd ; transfer a full 32bit chunk and inc esi,edi by 4
nuclear@10 38 dec edi ; backtrack dest one byte after last transfer
nuclear@10 39 dec ecx
nuclear@10 40 jnz .bpp24
nuclear@10 41 jmp .done
nuclear@10 42
nuclear@10 43 .bpp16: ; fuck 16bpp for now (TODO)
nuclear@10 44 .done:
nuclear@10 45 pop ebp
nuclear@10 46 ret