rayzor

changeset 10:235c8b764c0b

optimized swap_buffers
author John Tsiombikas <nuclear@member.fsf.org>
date Thu, 10 Apr 2014 08:03:52 +0300
parents 70e332156d02
children fe94d9e986ae
files Makefile src/main.cc src/swapbuf.asm src/timer.h util/fixcase
diffstat 5 files changed, 116 insertions(+), 19 deletions(-) [+]
line diff
     1.1 --- a/Makefile	Thu Apr 10 02:31:31 2014 +0300
     1.2 +++ b/Makefile	Thu Apr 10 08:03:52 2014 +0300
     1.3 @@ -1,4 +1,4 @@
     1.4 -baseobj = main.obj logger.obj screen.obj scrman.obj
     1.5 +baseobj = main.obj logger.obj screen.obj scrman.obj swapbuf.obj
     1.6  modelobj = modeller.obj min3d.obj m3drast.obj lines.obj
     1.7  rendobj = renderer.obj vmath.obj
     1.8  scnobj = scene.obj object.obj
     1.9 @@ -6,11 +6,14 @@
    1.10  obj = $(baseobj) $(modelobj) $(rendobj) $(scnobj) $(sysobj)
    1.11  bin = rayzor.exe
    1.12  
    1.13 -#dbg = -d2
    1.14 +opt = -5 -fp5 -otexan
    1.15 +dbg = -d1
    1.16  
    1.17 +AS = nasm
    1.18  CC = wcc386
    1.19  CXX = wpp386
    1.20 -CFLAGS = $(dbg) -5 -fp5 -otexan -zq -bt=dos -Isrc\stl
    1.21 +ASFLAGS = -fobj
    1.22 +CFLAGS = $(dbg) $(opt) -zq -bt=dos -Isrc\stl
    1.23  CXXFLAGS = $(CFLAGS)
    1.24  LD = wlink
    1.25  
    1.26 @@ -18,8 +21,9 @@
    1.27  	%write objects.lnk file { $(obj) }
    1.28  	$(LD) debug all name $@ @objects $(LDFLAGS)
    1.29  
    1.30 -.c: src\
    1.31 -.cc: src\
    1.32 +.c: src
    1.33 +.cc: src
    1.34 +.asm: src
    1.35  
    1.36  .c.obj: .autodepend
    1.37  	$(CC) $(CFLAGS) $[*
    1.38 @@ -27,6 +31,9 @@
    1.39  .cc.obj: .autodepend
    1.40  	$(CXX) $(CXXFLAGS) $[*
    1.41  
    1.42 +.asm.obj:
    1.43 +	$(AS) $(ASFLAGS) -o $@ $[*.asm
    1.44 +
    1.45  clean: .symbolic
    1.46  	del *.obj
    1.47  	del $(bin)
     2.1 --- a/src/main.cc	Thu Apr 10 02:31:31 2014 +0300
     2.2 +++ b/src/main.cc	Thu Apr 10 08:03:52 2014 +0300
     2.3 @@ -1,7 +1,6 @@
     2.4  #include <stdio.h>
     2.5  #include <stdlib.h>
     2.6  #include <string.h>
     2.7 -#include <math.h>
     2.8  #include <signal.h>
     2.9  #include "inttypes.h"
    2.10  #include "gfx.h"
    2.11 @@ -14,6 +13,16 @@
    2.12  #include "modeller.h"
    2.13  #include "renderer.h"
    2.14  #include "scrman.h"
    2.15 +#include "timer.h"
    2.16 +
    2.17 +#ifdef __DOS__
    2.18 +#undef USE_ASM_SWAPBUF
    2.19 +#endif
    2.20 +
    2.21 +#ifdef USE_ASM_SWAPBUF
    2.22 +// defined in swapbuf.asm
    2.23 +extern "C" void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp);
    2.24 +#endif
    2.25  
    2.26  static bool init();
    2.27  static void cleanup();
    2.28 @@ -31,19 +40,22 @@
    2.29  int fb_bpp = 32;
    2.30  Scene *scene;
    2.31  
    2.32 -static int bytespp;
    2.33  static bool novideo;
    2.34  static void *fb;
    2.35  static int rbits, gbits, bbits;
    2.36  static int rshift, gshift, bshift;
    2.37  static unsigned int rmask, gmask, bmask;
    2.38  
    2.39 +static bool use_asm_swap = true;
    2.40  static bool use_mouse;
    2.41  static int mouse_x, mouse_y;
    2.42  static bool quit;
    2.43  
    2.44  int main(int argc, char **argv)
    2.45  {
    2.46 +	unsigned long start_msec, msec;
    2.47 +	unsigned long nframes = 0;
    2.48 +
    2.49  	if(!parse_args(argc, argv)) {
    2.50  		return 1;
    2.51  	}
    2.52 @@ -51,6 +63,8 @@
    2.53  		return 1;
    2.54  	}
    2.55  
    2.56 +	start_msec = get_msec();
    2.57 +
    2.58  	// main loop
    2.59  	for(;;) {
    2.60  		handle_keyboard();
    2.61 @@ -58,11 +72,16 @@
    2.62  		if(quit) break;
    2.63  
    2.64  		display();
    2.65 +		++nframes;
    2.66  
    2.67  		if(novideo) break;
    2.68  	}
    2.69  
    2.70 +	msec = get_msec() - start_msec;
    2.71 +
    2.72  	cleanup();
    2.73 +
    2.74 +	printf("Average framerate: %g\n", (float)nframes / ((float)msec / 1000.0f));
    2.75  	printf("Thank you for using Rayzor!\n");
    2.76  	return 0;
    2.77  }
    2.78 @@ -79,6 +98,8 @@
    2.79  	signal(SIGILL, sig);
    2.80  	signal(SIGFPE, sig);
    2.81  
    2.82 +	init_timer(128);
    2.83 +
    2.84  	if(!novideo) {
    2.85  		if(kb_init(32) == -1) {
    2.86  			fprintf(stderr, "failed to initialize keyboard driver\n");
    2.87 @@ -94,8 +115,8 @@
    2.88  		get_color_bits(&rbits, &gbits, &bbits);
    2.89  		get_color_shift(&rshift, &gshift, &bshift);
    2.90  		get_color_mask(&rmask, &gmask, &bmask);
    2.91 -		bytespp = (int)ceil(fb_bpp / 8.0);
    2.92  
    2.93 +		printlog("video resolution: %dx%d\n", fb_width, fb_height);
    2.94  		printlog("bpp: %d (%d %d %d)\n", fb_bpp, rbits, gbits, bbits);
    2.95  		printlog("shift: %d %d %d\n", rshift, gshift, bshift);
    2.96  		printlog("mask: %x %x %x\n", rmask, gmask, bmask);
    2.97 @@ -107,9 +128,8 @@
    2.98  	} else {
    2.99  		logger_output(stdout);
   2.100  		printlog("novideo (debug) mode\n");
   2.101 -		fb_bpp = 32;
   2.102 +		fb_bpp = 24;
   2.103  		rbits = gbits = bbits = 8;
   2.104 -		bytespp = 3;
   2.105  	}
   2.106  
   2.107  	fb_pixels = new uint32_t[fb_width * fb_height * 4];
   2.108 @@ -165,8 +185,12 @@
   2.109  	}
   2.110  
   2.111  	if(!novideo) {
   2.112 +		wait_vsync();
   2.113 +#ifdef USE_ASM_SWAPBUF
   2.114 +		swap_buffers_asm(fb, fb_pixels, fb_width, fb_height, fb_bpp);
   2.115 +#else
   2.116  		swap_buffers();
   2.117 -		wait_vsync();
   2.118 +#endif
   2.119  	}
   2.120  }
   2.121  
   2.122 @@ -175,9 +199,9 @@
   2.123  	 (((g) << gshift) & gmask) | \
   2.124  	 (((b) << bshift) & bmask))
   2.125  
   2.126 -#define UNPACK_RED(c)	(((c) >> 16) & 0xff)
   2.127 +#define UNPACK_RED(c)	((c) & 0xff)
   2.128  #define UNPACK_GREEN(c)	(((c) >> 8) & 0xff)
   2.129 -#define UNPACK_BLUE(c)	((c) & 0xff)
   2.130 +#define UNPACK_BLUE(c)	(((c) >> 16) & 0xff)
   2.131  
   2.132  static void swap_buffers()
   2.133  {
   2.134 @@ -192,12 +216,13 @@
   2.135  	case 24:
   2.136  		{
   2.137  			unsigned char *dest = (unsigned char*)fb;
   2.138 -			for(int i=0; i<num_pixels; i++) {
   2.139 -				uint32_t c = *src++;
   2.140 -				*dest++ = UNPACK_RED(c);
   2.141 -				*dest++ = UNPACK_GREEN(c);
   2.142 -				*dest++ = UNPACK_BLUE(c);
   2.143 +			for(int i=0; i<num_pixels-1; i++) {
   2.144 +				*((uint32_t*)dest) = *src++;
   2.145 +				dest += 3;
   2.146  			}
   2.147 +			*dest++ = UNPACK_RED(*src);
   2.148 +			*dest++ = UNPACK_GREEN(*src);
   2.149 +			*dest++ = UNPACK_BLUE(*src);
   2.150  		}
   2.151  		break;
   2.152  
   2.153 @@ -257,6 +282,14 @@
   2.154  	if(novideo) return;
   2.155  
   2.156  	while((key = kb_getkey()) != -1) {
   2.157 +		switch(key) {
   2.158 +		case '`':
   2.159 +			use_asm_swap = !use_asm_swap;
   2.160 +			break;
   2.161 +
   2.162 +		default:
   2.163 +			break;
   2.164 +		}
   2.165  		scr->handle_keyboard(key, true);	// TODO also generate release events...
   2.166  	}
   2.167  }
   2.168 @@ -285,6 +318,9 @@
   2.169  	}
   2.170  	prev_mx = mx;
   2.171  	prev_my = my;
   2.172 +
   2.173 +	mouse_x = mx;
   2.174 +	mouse_y = my;
   2.175  }
   2.176  
   2.177  
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/swapbuf.asm	Thu Apr 10 08:03:52 2014 +0300
     3.3 @@ -0,0 +1,46 @@
     3.4 +; vim:set ft=nasm:
     3.5 +	segment code use32
     3.6 +
     3.7 +	; void swap_buffers_asm(void *dest, void *src, int xsz, int ysz, int bpp)
     3.8 +	; dest	-> eax
     3.9 +	; src	-> edx
    3.10 +	; xsz	-> ebx
    3.11 +	; ysz	-> ecx
    3.12 +	; bpp	-> [ebp + 8] (after pushing ebp)
    3.13 +	global swap_buffers_asm_
    3.14 +swap_buffers_asm_:
    3.15 +	push ebp
    3.16 +	mov ebp, esp
    3.17 +
    3.18 +	mov edi, eax	; let's hold dest ptr in edi, frees up eax
    3.19 +	mov esi, edx	; let's hold src ptr in esi, frees up edx
    3.20 +	; calculate pixel count -> ecx, frees up ebx
    3.21 +	mov eax, ebx
    3.22 +	mul ecx
    3.23 +	mov ecx, eax	; now ecx = xsz * ysz	
    3.24 +	
    3.25 +	mov eax, [ebp + 8]	; eax <- bpp
    3.26 +	cmp eax, 32
    3.27 +	je .bpp32
    3.28 +	cmp eax, 24
    3.29 +	je .bpp24
    3.30 +	cmp eax, 16
    3.31 +	je .bpp16
    3.32 +	; invalid bpp, ignore
    3.33 +	jmp .done
    3.34 +
    3.35 +.bpp32:				; 32bit block transfer, no conversion
    3.36 +	rep movsd		; esi, edi, and ecx already loaded, just go...
    3.37 +	jmp .done
    3.38 +	
    3.39 +.bpp24:				; 32bpp -> 24bpp conversion (LSB-first), 1 byte overrun!
    3.40 +	movsd			; transfer a full 32bit chunk and inc esi,edi by 4
    3.41 +	dec edi			; backtrack dest one byte after last transfer
    3.42 +	dec ecx
    3.43 +	jnz .bpp24
    3.44 +	jmp .done
    3.45 +
    3.46 +.bpp16:				; fuck 16bpp for now (TODO)
    3.47 +.done:
    3.48 +	pop ebp
    3.49 +	ret	
     4.1 --- a/src/timer.h	Thu Apr 10 02:31:31 2014 +0300
     4.2 +++ b/src/timer.h	Thu Apr 10 08:03:52 2014 +0300
     4.3 @@ -18,6 +18,10 @@
     4.4  #ifndef TIMER_H_
     4.5  #define TIMER_H_
     4.6  
     4.7 +#ifdef __cplusplus
     4.8 +extern "C" {
     4.9 +#endif
    4.10 +
    4.11  /* expects the required timer resolution in hertz
    4.12   * if res_hz is 0, the current resolution is retained
    4.13   */
    4.14 @@ -26,4 +30,8 @@
    4.15  void reset_timer(void);
    4.16  unsigned long get_msec(void);
    4.17  
    4.18 +#ifdef __cplusplus
    4.19 +}
    4.20 +#endif
    4.21 +
    4.22  #endif	/* TIMER_H_ */
     5.1 --- a/util/fixcase	Thu Apr 10 02:31:31 2014 +0300
     5.2 +++ b/util/fixcase	Thu Apr 10 08:03:52 2014 +0300
     5.3 @@ -1,6 +1,6 @@
     5.4  #!/bin/sh
     5.5  
     5.6 -src=`find \( -iname '*.c' -o -iname '*.cc' -o -iname '*.h' -o -iname '*.inl' \)`
     5.7 +src=`find \( -iname '*.c' -o -iname '*.cc' -o -iname '*.h' -o -iname '*.inl' -o -iname '*.asm' \)`
     5.8  for i in $src util/*; do
     5.9  	if echo $i | grep '[A-Z]' >/dev/null; then
    5.10  		fixed=`echo $i | tr '[:upper:]' '[:lower:]'`