rayzor
annotate src/swapbuf.asm @ 10:235c8b764c0b
optimized swap_buffers
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Thu, 10 Apr 2014 08:03:52 +0300 |
parents | |
children | 5380ff64e83f |
rev | line source |
---|---|
nuclear@10 | 1 ; vim:set ft=nasm: |
nuclear@10 | 2 segment code use32 |
nuclear@10 | 3 |
nuclear@10 | 4 ; void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp) |
nuclear@10 | 5 ; dest -> eax |
nuclear@10 | 6 ; src -> edx |
nuclear@10 | 7 ; xsz -> ebx |
nuclear@10 | 8 ; ysz -> ecx |
nuclear@10 | 9 ; bpp -> [ebp + 8] (after pushing ebp) |
nuclear@10 | 10 global swap_buffers_asm_ |
nuclear@10 | 11 swap_buffers_asm_: |
nuclear@10 | 12 push ebp |
nuclear@10 | 13 mov ebp, esp |
nuclear@10 | 14 |
nuclear@10 | 15 mov edi, eax ; let's hold dest ptr in edi, frees up eax |
nuclear@10 | 16 mov esi, edx ; let's hold src ptr in esi, frees up edx |
nuclear@10 | 17 ; calculate pixel count -> ecx, frees up ebx |
nuclear@10 | 18 mov eax, ebx |
nuclear@10 | 19 mul ecx |
nuclear@10 | 20 mov ecx, eax ; now ecx = xsz * ysz |
nuclear@10 | 21 |
nuclear@10 | 22 mov eax, [ebp + 8] ; eax <- bpp |
nuclear@10 | 23 cmp eax, 32 |
nuclear@10 | 24 je .bpp32 |
nuclear@10 | 25 cmp eax, 24 |
nuclear@10 | 26 je .bpp24 |
nuclear@10 | 27 cmp eax, 16 |
nuclear@10 | 28 je .bpp16 |
nuclear@10 | 29 ; invalid bpp, ignore |
nuclear@10 | 30 jmp .done |
nuclear@10 | 31 |
nuclear@10 | 32 .bpp32: ; 32bit block transfer, no conversion |
nuclear@10 | 33 rep movsd ; esi, edi, and ecx already loaded, just go... |
nuclear@10 | 34 jmp .done |
nuclear@10 | 35 |
nuclear@10 | 36 .bpp24: ; 32bpp -> 24bpp conversion (LSB-first), 1 byte overrun! |
nuclear@10 | 37 movsd ; transfer a full 32bit chunk and inc esi,edi by 4 |
nuclear@10 | 38 dec edi ; backtrack dest one byte after last transfer |
nuclear@10 | 39 dec ecx |
nuclear@10 | 40 jnz .bpp24 |
nuclear@10 | 41 jmp .done |
nuclear@10 | 42 |
nuclear@10 | 43 .bpp16: ; fuck 16bpp for now (TODO) |
nuclear@10 | 44 .done: |
nuclear@10 | 45 pop ebp |
nuclear@10 | 46 ret |