rayzor
changeset 10:235c8b764c0b
optimized swap_buffers
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Thu, 10 Apr 2014 08:03:52 +0300 |
parents | 70e332156d02 |
children | fe94d9e986ae |
files | Makefile src/main.cc src/swapbuf.asm src/timer.h util/fixcase |
diffstat | 5 files changed, 116 insertions(+), 19 deletions(-) [+] |
line diff
1.1 --- a/Makefile Thu Apr 10 02:31:31 2014 +0300 1.2 +++ b/Makefile Thu Apr 10 08:03:52 2014 +0300 1.3 @@ -1,4 +1,4 @@ 1.4 -baseobj = main.obj logger.obj screen.obj scrman.obj 1.5 +baseobj = main.obj logger.obj screen.obj scrman.obj swapbuf.obj 1.6 modelobj = modeller.obj min3d.obj m3drast.obj lines.obj 1.7 rendobj = renderer.obj vmath.obj 1.8 scnobj = scene.obj object.obj 1.9 @@ -6,11 +6,14 @@ 1.10 obj = $(baseobj) $(modelobj) $(rendobj) $(scnobj) $(sysobj) 1.11 bin = rayzor.exe 1.12 1.13 -#dbg = -d2 1.14 +opt = -5 -fp5 -otexan 1.15 +dbg = -d1 1.16 1.17 +AS = nasm 1.18 CC = wcc386 1.19 CXX = wpp386 1.20 -CFLAGS = $(dbg) -5 -fp5 -otexan -zq -bt=dos -Isrc\stl 1.21 +ASFLAGS = -fobj 1.22 +CFLAGS = $(dbg) $(opt) -zq -bt=dos -Isrc\stl 1.23 CXXFLAGS = $(CFLAGS) 1.24 LD = wlink 1.25 1.26 @@ -18,8 +21,9 @@ 1.27 %write objects.lnk file { $(obj) } 1.28 $(LD) debug all name $@ @objects $(LDFLAGS) 1.29 1.30 -.c: src\ 1.31 -.cc: src\ 1.32 +.c: src 1.33 +.cc: src 1.34 +.asm: src 1.35 1.36 .c.obj: .autodepend 1.37 $(CC) $(CFLAGS) $[* 1.38 @@ -27,6 +31,9 @@ 1.39 .cc.obj: .autodepend 1.40 $(CXX) $(CXXFLAGS) $[* 1.41 1.42 +.asm.obj: 1.43 + $(AS) $(ASFLAGS) -o $@ $[*.asm 1.44 + 1.45 clean: .symbolic 1.46 del *.obj 1.47 del $(bin)
2.1 --- a/src/main.cc Thu Apr 10 02:31:31 2014 +0300 2.2 +++ b/src/main.cc Thu Apr 10 08:03:52 2014 +0300 2.3 @@ -1,7 +1,6 @@ 2.4 #include <stdio.h> 2.5 #include <stdlib.h> 2.6 #include <string.h> 2.7 -#include <math.h> 2.8 #include <signal.h> 2.9 #include "inttypes.h" 2.10 #include "gfx.h" 2.11 @@ -14,6 +13,16 @@ 2.12 #include "modeller.h" 2.13 #include "renderer.h" 2.14 #include "scrman.h" 2.15 +#include "timer.h" 2.16 + 2.17 +#ifdef __DOS__ 2.18 +#undef USE_ASM_SWAPBUF 2.19 +#endif 2.20 + 2.21 +#ifdef USE_ASM_SWAPBUF 2.22 +// defined in swapbuf.asm 2.23 +extern "C" void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp); 2.24 +#endif 2.25 2.26 static bool init(); 2.27 static void cleanup(); 2.28 @@ -31,19 +40,22 @@ 2.29 int fb_bpp = 32; 2.30 Scene *scene; 2.31 2.32 -static int bytespp; 2.33 static bool novideo; 2.34 static void *fb; 2.35 static int rbits, gbits, bbits; 2.36 static int rshift, gshift, bshift; 2.37 static unsigned int rmask, gmask, bmask; 2.38 2.39 +static bool use_asm_swap = true; 2.40 static bool use_mouse; 2.41 static int mouse_x, mouse_y; 2.42 static bool quit; 2.43 2.44 int main(int argc, char **argv) 2.45 { 2.46 + unsigned long start_msec, msec; 2.47 + unsigned long nframes = 0; 2.48 + 2.49 if(!parse_args(argc, argv)) { 2.50 return 1; 2.51 } 2.52 @@ -51,6 +63,8 @@ 2.53 return 1; 2.54 } 2.55 2.56 + start_msec = get_msec(); 2.57 + 2.58 // main loop 2.59 for(;;) { 2.60 handle_keyboard(); 2.61 @@ -58,11 +72,16 @@ 2.62 if(quit) break; 2.63 2.64 display(); 2.65 + ++nframes; 2.66 2.67 if(novideo) break; 2.68 } 2.69 2.70 + msec = get_msec() - start_msec; 2.71 + 2.72 cleanup(); 2.73 + 2.74 + printf("Average framerate: %g\n", (float)nframes / ((float)msec / 1000.0f)); 2.75 printf("Thank you for using Rayzor!\n"); 2.76 return 0; 2.77 } 2.78 @@ -79,6 +98,8 @@ 2.79 signal(SIGILL, sig); 2.80 signal(SIGFPE, sig); 2.81 2.82 + init_timer(128); 2.83 + 2.84 if(!novideo) { 2.85 if(kb_init(32) == -1) { 2.86 fprintf(stderr, "failed to initialize keyboard driver\n"); 2.87 @@ -94,8 +115,8 @@ 2.88 get_color_bits(&rbits, &gbits, &bbits); 2.89 get_color_shift(&rshift, &gshift, &bshift); 2.90 get_color_mask(&rmask, &gmask, &bmask); 2.91 - bytespp = (int)ceil(fb_bpp / 8.0); 2.92 2.93 + printlog("video resolution: %dx%d\n", fb_width, fb_height); 2.94 printlog("bpp: %d (%d %d %d)\n", fb_bpp, rbits, gbits, bbits); 2.95 printlog("shift: %d %d %d\n", rshift, gshift, bshift); 2.96 printlog("mask: %x %x %x\n", rmask, gmask, bmask); 2.97 @@ -107,9 +128,8 @@ 2.98 } else { 2.99 logger_output(stdout); 2.100 printlog("novideo (debug) mode\n"); 2.101 - fb_bpp = 32; 2.102 + fb_bpp = 24; 2.103 rbits = gbits = bbits = 8; 2.104 - bytespp = 3; 2.105 } 2.106 2.107 fb_pixels = new uint32_t[fb_width * fb_height * 4]; 2.108 @@ -165,8 +185,12 @@ 2.109 } 2.110 2.111 if(!novideo) { 2.112 + wait_vsync(); 2.113 +#ifdef USE_ASM_SWAPBUF 2.114 + swap_buffers_asm(fb, fb_pixels, fb_width, fb_height, fb_bpp); 2.115 +#else 2.116 swap_buffers(); 2.117 - wait_vsync(); 2.118 +#endif 2.119 } 2.120 } 2.121 2.122 @@ -175,9 +199,9 @@ 2.123 (((g) << gshift) & gmask) | \ 2.124 (((b) << bshift) & bmask)) 2.125 2.126 -#define UNPACK_RED(c) (((c) >> 16) & 0xff) 2.127 +#define UNPACK_RED(c) ((c) & 0xff) 2.128 #define UNPACK_GREEN(c) (((c) >> 8) & 0xff) 2.129 -#define UNPACK_BLUE(c) ((c) & 0xff) 2.130 +#define UNPACK_BLUE(c) (((c) >> 16) & 0xff) 2.131 2.132 static void swap_buffers() 2.133 { 2.134 @@ -192,12 +216,13 @@ 2.135 case 24: 2.136 { 2.137 unsigned char *dest = (unsigned char*)fb; 2.138 - for(int i=0; i<num_pixels; i++) { 2.139 - uint32_t c = *src++; 2.140 - *dest++ = UNPACK_RED(c); 2.141 - *dest++ = UNPACK_GREEN(c); 2.142 - *dest++ = UNPACK_BLUE(c); 2.143 + for(int i=0; i<num_pixels-1; i++) { 2.144 + *((uint32_t*)dest) = *src++; 2.145 + dest += 3; 2.146 } 2.147 + *dest++ = UNPACK_RED(*src); 2.148 + *dest++ = UNPACK_GREEN(*src); 2.149 + *dest++ = UNPACK_BLUE(*src); 2.150 } 2.151 break; 2.152 2.153 @@ -257,6 +282,14 @@ 2.154 if(novideo) return; 2.155 2.156 while((key = kb_getkey()) != -1) { 2.157 + switch(key) { 2.158 + case '`': 2.159 + use_asm_swap = !use_asm_swap; 2.160 + break; 2.161 + 2.162 + default: 2.163 + break; 2.164 + } 2.165 scr->handle_keyboard(key, true); // TODO also generate release events... 2.166 } 2.167 } 2.168 @@ -285,6 +318,9 @@ 2.169 } 2.170 prev_mx = mx; 2.171 prev_my = my; 2.172 + 2.173 + mouse_x = mx; 2.174 + mouse_y = my; 2.175 } 2.176 2.177
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/src/swapbuf.asm Thu Apr 10 08:03:52 2014 +0300 3.3 @@ -0,0 +1,46 @@ 3.4 +; vim:set ft=nasm: 3.5 + segment code use32 3.6 + 3.7 + ; void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp) 3.8 + ; dest -> eax 3.9 + ; src -> edx 3.10 + ; xsz -> ebx 3.11 + ; ysz -> ecx 3.12 + ; bpp -> [ebp + 8] (after pushing ebp) 3.13 + global swap_buffers_asm_ 3.14 +swap_buffers_asm_: 3.15 + push ebp 3.16 + mov ebp, esp 3.17 + 3.18 + mov edi, eax ; let's hold dest ptr in edi, frees up eax 3.19 + mov esi, edx ; let's hold src ptr in esi, frees up edx 3.20 + ; calculate pixel count -> ecx, frees up ebx 3.21 + mov eax, ebx 3.22 + mul ecx 3.23 + mov ecx, eax ; now ecx = xsz * ysz 3.24 + 3.25 + mov eax, [ebp + 8] ; eax <- bpp 3.26 + cmp eax, 32 3.27 + je .bpp32 3.28 + cmp eax, 24 3.29 + je .bpp24 3.30 + cmp eax, 16 3.31 + je .bpp16 3.32 + ; invalid bpp, ignore 3.33 + jmp .done 3.34 + 3.35 +.bpp32: ; 32bit block transfer, no conversion 3.36 + rep movsd ; esi, edi, and ecx already loaded, just go... 3.37 + jmp .done 3.38 + 3.39 +.bpp24: ; 32bpp -> 24bpp conversion (LSB-first), 1 byte overrun! 3.40 + movsd ; transfer a full 32bit chunk and inc esi,edi by 4 3.41 + dec edi ; backtrack dest one byte after last transfer 3.42 + dec ecx 3.43 + jnz .bpp24 3.44 + jmp .done 3.45 + 3.46 +.bpp16: ; fuck 16bpp for now (TODO) 3.47 +.done: 3.48 + pop ebp 3.49 + ret
4.1 --- a/src/timer.h Thu Apr 10 02:31:31 2014 +0300 4.2 +++ b/src/timer.h Thu Apr 10 08:03:52 2014 +0300 4.3 @@ -18,6 +18,10 @@ 4.4 #ifndef TIMER_H_ 4.5 #define TIMER_H_ 4.6 4.7 +#ifdef __cplusplus 4.8 +extern "C" { 4.9 +#endif 4.10 + 4.11 /* expects the required timer resolution in hertz 4.12 * if res_hz is 0, the current resolution is retained 4.13 */ 4.14 @@ -26,4 +30,8 @@ 4.15 void reset_timer(void); 4.16 unsigned long get_msec(void); 4.17 4.18 +#ifdef __cplusplus 4.19 +} 4.20 +#endif 4.21 + 4.22 #endif /* TIMER_H_ */
5.1 --- a/util/fixcase Thu Apr 10 02:31:31 2014 +0300 5.2 +++ b/util/fixcase Thu Apr 10 08:03:52 2014 +0300 5.3 @@ -1,6 +1,6 @@ 5.4 #!/bin/sh 5.5 5.6 -src=`find \( -iname '*.c' -o -iname '*.cc' -o -iname '*.h' -o -iname '*.inl' \)` 5.7 +src=`find \( -iname '*.c' -o -iname '*.cc' -o -iname '*.h' -o -iname '*.inl' -o -iname '*.asm' \)` 5.8 for i in $src util/*; do 5.9 if echo $i | grep '[A-Z]' >/dev/null; then 5.10 fixed=`echo $i | tr '[:upper:]' '[:lower:]'`