gba-x3dtest

changeset 19:62390f9cc93e

texture cache optimization failed
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 30 Jun 2014 09:07:41 +0300
parents f907b2c50a8b
children 2e903e27e35a
files Makefile src/game.c src/logger.c src/polyfill.c src/x3d.c
diffstat 5 files changed, 139 insertions(+), 27 deletions(-) [+]
line diff
     1.1 --- a/Makefile	Thu Jun 26 21:48:09 2014 +0300
     1.2 +++ b/Makefile	Mon Jun 30 09:07:41 2014 +0300
     1.3 @@ -17,7 +17,7 @@
     1.4  EMU = vbam
     1.5  
     1.6  opt = -O3 -fomit-frame-pointer -mcpu=arm7tdmi -mtune=arm7tdmi
     1.7 -#dbg = -g
     1.8 +dbg = -g
     1.9  
    1.10  CFLAGS = $(opt) $(dbg) -pedantic -Wall -I. -I../gbasys/src
    1.11  LDFLAGS = ../gbasys/libgbasys.a -lm
     2.1 --- a/src/game.c	Thu Jun 26 21:48:09 2014 +0300
     2.2 +++ b/src/game.c	Mon Jun 30 09:07:41 2014 +0300
     2.3 @@ -23,6 +23,7 @@
     2.4  static struct mesh box;
     2.5  static int tex;
     2.6  static int fps;
     2.7 +static short show_fps = 1;
     2.8  
     2.9  int game_init(void)
    2.10  {
    2.11 @@ -95,7 +96,9 @@
    2.12  	draw_mesh(&box);
    2.13  	x3d_disable_texture();
    2.14  
    2.15 -	draw_fps_meter(msec);
    2.16 +	if(show_fps) {
    2.17 +		draw_fps_meter(msec);
    2.18 +	}
    2.19  
    2.20  	flip();
    2.21  }
    2.22 @@ -109,7 +112,7 @@
    2.23  
    2.24  	++nframes;
    2.25  
    2.26 -	if(dt >= 1000) {
    2.27 +	if(dt >= 1500) {
    2.28  		last_msec = msec;
    2.29  		fps = 1000 * nframes / dt;
    2.30  		nframes = 0;
    2.31 @@ -119,7 +122,7 @@
    2.32  	bar_height = fps * 4;
    2.33  	if(bar_height > HEIGHT) bar_height = HEIGHT;
    2.34  
    2.35 -	draw_rect(0, HEIGHT - bar_height, 1, bar_height, RGB(0, 255, 0));
    2.36 +	draw_rect(0, HEIGHT - bar_height - 1, 1, bar_height, RGB(0, 255, 0));
    2.37  }
    2.38  
    2.39  void game_keyb(int key, int pressed)
    2.40 @@ -135,6 +138,10 @@
    2.41  		dbg_fill_dump = 1;
    2.42  		break;
    2.43  
    2.44 +	case KEY_START:
    2.45 +		show_fps = !show_fps;
    2.46 +		break;
    2.47 +
    2.48  	default:
    2.49  		break;
    2.50  	}
     3.1 --- a/src/logger.c	Thu Jun 26 21:48:09 2014 +0300
     3.2 +++ b/src/logger.c	Mon Jun 30 09:07:41 2014 +0300
     3.3 @@ -71,7 +71,7 @@
     3.4  	while(*str) putchr(*str++);
     3.5  }
     3.6  
     3.7 -#ifdef __ARM__
     3.8 +#ifdef __arm__
     3.9  static void agbprint(const char *str)
    3.10  {
    3.11  	asm volatile (
     4.1 --- a/src/polyfill.c	Thu Jun 26 21:48:09 2014 +0300
     4.2 +++ b/src/polyfill.c	Mon Jun 30 09:07:41 2014 +0300
     4.3 @@ -4,6 +4,9 @@
     4.4  #include "polyfill.h"
     4.5  #include "fixed.h"
     4.6  #include "gbasys.h"
     4.7 +#include "logger.h"
     4.8 +
     4.9 +#define USE_TEX
    4.10  
    4.11  /* TODO: constant interpolant optimization */
    4.12  
    4.13 @@ -16,6 +19,21 @@
    4.14  		int32_t v0, int32_t v1, uint16_t color, struct texture *tex);
    4.15  static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1);
    4.16  
    4.17 +#ifdef USE_TEX
    4.18 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty);
    4.19 +
    4.20 +/* TODO currently the linkscript will put statics in iwram. make this explicit */
    4.21 +#define TCACHE_XSZ	16
    4.22 +#define TCACHE_YSZ	16
    4.23 +
    4.24 +static struct {
    4.25 +	uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ];
    4.26 +	struct texture *tex;
    4.27 +	int orig_x, orig_y;
    4.28 +} tcache;
    4.29 +#endif	/* USE_TEX */
    4.30 +
    4.31 +
    4.32  void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color,
    4.33  		struct texture *tex)
    4.34  {
    4.35 @@ -27,7 +45,12 @@
    4.36  	int start, end;
    4.37  	pvec3 v0, v1;
    4.38  
    4.39 -	int32_t lu, lv, ru, rv, ldudy, ldvdy, rdudy, rdvdy;	/* texture interpolants */
    4.40 +#ifdef USE_TEX
    4.41 +	int32_t ldudy, ldvdy, rdudy, rdvdy;	/* texture interpolants */
    4.42 +#else
    4.43 +	static	/* to avoid lu,lv,ru,rv uninitialized warnings */
    4.44 +#endif
    4.45 +	int32_t lu, lv, ru, rv;
    4.46  
    4.47  	v0.x = verts[1].x - verts[0].x;
    4.48  	v0.y = verts[1].y - verts[0].y;
    4.49 @@ -70,10 +93,12 @@
    4.50  	}
    4.51  	lx = verts[lidx[0]].x;
    4.52  	ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    4.53 +#ifdef USE_TEX
    4.54  	lu = texcoords[lidx[0]].x;
    4.55  	ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
    4.56  	lv = texcoords[lidx[0]].y;
    4.57  	ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
    4.58 +#endif
    4.59  
    4.60  	/* find starting right edge */
    4.61  	ridx[1] = VNEXT(ridx[0], num);
    4.62 @@ -91,10 +116,12 @@
    4.63  	}
    4.64  	rx = verts[ridx[0]].x;
    4.65  	rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
    4.66 +#ifdef USE_TEX
    4.67  	ru = texcoords[ridx[0]].x;
    4.68  	rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
    4.69  	rv = texcoords[ridx[0]].y;
    4.70  	rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
    4.71 +#endif
    4.72  
    4.73  	start = topy >> 16;
    4.74  	end = boty >> 16;
    4.75 @@ -113,18 +140,23 @@
    4.76  				break;
    4.77  			}
    4.78  
    4.79 +			if(ldy) {
    4.80 +				ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    4.81 +			} else {
    4.82 +				ldxdy = verts[lidx[1]].x - lx;
    4.83 +			}
    4.84 +
    4.85 +#ifdef USE_TEX
    4.86  			lu = texcoords[lidx[0]].x;
    4.87  			lv = texcoords[lidx[0]].y;
    4.88 -
    4.89  			if(ldy) {
    4.90 -				ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    4.91  				ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
    4.92  				ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
    4.93  			} else {
    4.94 -				ldxdy = verts[lidx[1]].x - lx;
    4.95  				ldudy = texcoords[lidx[1]].x - lu;
    4.96  				ldvdy = texcoords[lidx[1]].y - lv;
    4.97  			}
    4.98 +#endif	/* USE_TEX */
    4.99  		}
   4.100  		if(y >= verts[ridx[1]].y) {
   4.101  			rx = verts[ridx[1]].x;
   4.102 @@ -135,18 +167,23 @@
   4.103  				break;
   4.104  			}
   4.105  
   4.106 +			if(rdy) {
   4.107 +				rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
   4.108 +			} else {
   4.109 +				rdxdy = verts[ridx[1]].x - rx;
   4.110 +			}
   4.111 +
   4.112 +#ifdef USE_TEX
   4.113  			ru = texcoords[ridx[0]].x;
   4.114  			rv = texcoords[ridx[0]].y;
   4.115 -
   4.116  			if(rdy) {
   4.117 -				rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
   4.118  				rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
   4.119  				rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
   4.120  			} else {
   4.121 -				rdxdy = verts[ridx[1]].x - rx;
   4.122  				rdudy = texcoords[ridx[1]].x - ru;
   4.123  				rdvdy = texcoords[ridx[1]].y - rv;
   4.124  			}
   4.125 +#endif	/* USE_TEX */
   4.126  		}
   4.127  
   4.128  		if(i >= 0) {
   4.129 @@ -161,10 +198,12 @@
   4.130  		rx += rdxdy;
   4.131  		y += 65536;
   4.132  
   4.133 +#ifdef USE_TEX
   4.134  		lu += ldudy;
   4.135  		lv += ldvdy;
   4.136  		ru += rdudy;
   4.137  		rv += rdvdy;
   4.138 +#endif
   4.139  	}
   4.140  }
   4.141  
   4.142 @@ -173,8 +212,10 @@
   4.143  		int32_t v0, int32_t v1, uint8_t color)
   4.144  {
   4.145  	int ix0, ix1;
   4.146 +#ifdef USE_TEX
   4.147  	int32_t dx;
   4.148  	int32_t u, v, dudx, dvdx;
   4.149 +#endif
   4.150  
   4.151  	int i, num_pairs, num_pix;
   4.152  	uint16_t *pixels;
   4.153 @@ -186,6 +227,7 @@
   4.154  		x1 = tmp;
   4.155  	}
   4.156  
   4.157 +#ifdef USE_TEX
   4.158  	dx = x1 - x0;
   4.159  
   4.160  	u = u0;
   4.161 @@ -197,6 +239,7 @@
   4.162  		dudx = u1 - u0;
   4.163  		dvdx = v1 - v0;
   4.164  	}
   4.165 +#endif
   4.166  
   4.167  	ix0 = (x0 + 32768) >> 16;
   4.168  	ix1 = (x1 + 32768) >> 16;
   4.169 @@ -211,16 +254,20 @@
   4.170  		uint16_t pix = *pixels & 0xff;
   4.171  		*pixels++ = pix | ((uint16_t)color << 8);
   4.172  		--num_pix;
   4.173 +#ifdef USE_TEX
   4.174  		u += dudx;
   4.175  		v += dvdx;
   4.176 +#endif
   4.177  	}
   4.178  
   4.179  	num_pairs = (num_pix & 0xfffe) / 2;
   4.180  
   4.181  	for(i=0; i<num_pairs; i++) {
   4.182  		*pixels++ = colpair;
   4.183 +#ifdef USE_TEX
   4.184  		u += dudx * 2;
   4.185  		v += dvdx * 2;
   4.186 +#endif
   4.187  	}
   4.188  
   4.189  	if(num_pix & 1) {
   4.190 @@ -234,8 +281,10 @@
   4.191  {
   4.192  	int i, ix0, ix1;
   4.193  	uint16_t *pixels;
   4.194 +#ifdef USE_TEX
   4.195  	int32_t dx;
   4.196  	int32_t u, v, dudx, dvdx;
   4.197 +#endif
   4.198  
   4.199  	if(x0 > x1) {
   4.200  		int32_t tmp = x0;
   4.201 @@ -243,6 +292,7 @@
   4.202  		x1 = tmp;
   4.203  	}
   4.204  
   4.205 +#ifdef USE_TEX
   4.206  	dx = x1 - x0;
   4.207  
   4.208  	u = u0;
   4.209 @@ -254,6 +304,7 @@
   4.210  		dudx = u1 - u0;
   4.211  		dvdx = v1 - v0;
   4.212  	}
   4.213 +#endif
   4.214  
   4.215  	ix0 = (x0 + 32768) >> 16;
   4.216  	ix1 = (x1 + 32768) >> 16;
   4.217 @@ -262,24 +313,23 @@
   4.218  	if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
   4.219  
   4.220  	pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0;
   4.221 -	for(i=ix0; i<ix1; i++) {
   4.222 -		/**pixels++ = color;*/
   4.223 -		int cr = u >> 8;
   4.224 -		int cg = v >> 8;
   4.225 -		if(cr > 255) cr = 255;
   4.226 -		if(cg > 255) cg = 255;
   4.227 -
   4.228 -		if(tex) {
   4.229 +#ifdef USE_TEX
   4.230 +	if(tex) {
   4.231 +		for(i=ix0; i<ix1; i++) {
   4.232  			int tx = (u >> (16 - tex->ushift)) & tex->umask;
   4.233  			int ty = (v >> (16 - tex->vshift)) & tex->vmask;
   4.234 -			uint16_t texel = ((uint16_t*)tex->pixels)[ty * tex->xsz + tx];
   4.235 -			*pixels++ = texel;
   4.236 -		} else {
   4.237 +			*pixels++ = tex_lookup(tex, tx, ty);
   4.238 +			//*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx];
   4.239 +
   4.240 +			u += dudx;
   4.241 +			v += dvdx;
   4.242 +		}
   4.243 +	} else
   4.244 +#endif
   4.245 +	{
   4.246 +		for(i=ix0; i<ix1; i++) {
   4.247  			*pixels++ = color;
   4.248  		}
   4.249 -
   4.250 -		u += dudx;
   4.251 -		v += dvdx;
   4.252  	}
   4.253  }
   4.254  
   4.255 @@ -310,3 +360,53 @@
   4.256  {
   4.257  	return x16mul(x0, y1) - x16mul(y0, x1);
   4.258  }
   4.259 +
   4.260 +#ifdef USE_TEX
   4.261 +#define MIN(a, b)		((a) < (b) ? (a) : (b))
   4.262 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty)
   4.263 +{
   4.264 +	int x, y, i, j;
   4.265 +	int width, height;
   4.266 +	uint16_t *ptr;
   4.267 +	int cx = tx - tcache.orig_x;
   4.268 +	int cy = ty - tcache.orig_y;
   4.269 +
   4.270 +	if(tcache.tex == tex && cx >= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) {
   4.271 +		return tcache.pixels[cy * TCACHE_XSZ + cx];
   4.272 +	}
   4.273 +
   4.274 +	tcache.tex = tex;
   4.275 +	x = tx - TCACHE_XSZ / 2;
   4.276 +	y = ty - TCACHE_YSZ / 2;
   4.277 +
   4.278 +	if(x + TCACHE_XSZ > tex->xsz) {
   4.279 +		x = tex->xsz - TCACHE_XSZ;
   4.280 +	}
   4.281 +	if(y + TCACHE_YSZ > tex->ysz) {
   4.282 +		y = tex->ysz - TCACHE_YSZ;
   4.283 +	}
   4.284 +
   4.285 +	if(x < 0) x = 0;
   4.286 +	if(y < 0) y = 0;
   4.287 +
   4.288 +	width = MIN(TCACHE_XSZ, tex->xsz - x);
   4.289 +	height = MIN(TCACHE_YSZ, tex->ysz - y);
   4.290 +
   4.291 +	tcache.orig_x = x;
   4.292 +	tcache.orig_y = y;
   4.293 +
   4.294 +	/*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty,
   4.295 +			width, height, x, y, tcache.pixels);
   4.296 +			*/
   4.297 +
   4.298 +	ptr = (uint16_t*)tex->pixels + y * tex->xsz + x;
   4.299 +	for(i=0; i<height; i++) {
   4.300 +		memcpy(tcache.pixels + i * TCACHE_XSZ, ptr, width * 2);
   4.301 +		ptr += tex->xsz;
   4.302 +	}
   4.303 +
   4.304 +	cx = tx - tcache.orig_x;
   4.305 +	cy = ty - tcache.orig_y;
   4.306 +	return tcache.pixels[cy * TCACHE_XSZ + cx];
   4.307 +}
   4.308 +#endif	/* USE_TEX */
     5.1 --- a/src/x3d.c	Thu Jun 26 21:48:09 2014 +0300
     5.2 +++ b/src/x3d.c	Mon Jun 30 09:07:41 2014 +0300
     5.3 @@ -1,5 +1,6 @@
     5.4  #include "config.h"
     5.5  #include <stdio.h>
     5.6 +#include <stdlib.h>
     5.7  #include <string.h>
     5.8  #include <math.h>
     5.9  #include "x3d.h"
    5.10 @@ -351,7 +352,9 @@
    5.11  
    5.12  	for(i=0; i<MAX_TEXTURES; i++) {
    5.13  		if(!textures[i].pixels) {
    5.14 -			textures[i].pixels = pixels;
    5.15 +			/*textures[i].pixels = pixels;*/
    5.16 +			textures[i].pixels = malloc(xsz * ysz * 2);
    5.17 +			memcpy(textures[i].pixels, pixels, xsz * ysz * 2);
    5.18  			textures[i].xsz = xsz;
    5.19  			textures[i].ysz = ysz;
    5.20  			textures[i].umask = xsz - 1;
    5.21 @@ -366,6 +369,8 @@
    5.22  				}
    5.23  			}
    5.24  
    5.25 +			logmsg(LOG_DBG, "create texture %dx%d: %p\n", xsz, ysz, pixels);
    5.26 +
    5.27  			return i;
    5.28  		}
    5.29  	}