gba-x3dtest

diff src/polyfill.c @ 19:62390f9cc93e

texture cache optimization failed
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 30 Jun 2014 09:07:41 +0300
parents 0a7f402892b3
children 2e903e27e35a
line diff
     1.1 --- a/src/polyfill.c	Thu Jun 26 21:48:09 2014 +0300
     1.2 +++ b/src/polyfill.c	Mon Jun 30 09:07:41 2014 +0300
     1.3 @@ -4,6 +4,9 @@
     1.4  #include "polyfill.h"
     1.5  #include "fixed.h"
     1.6  #include "gbasys.h"
     1.7 +#include "logger.h"
     1.8 +
     1.9 +#define USE_TEX
    1.10  
    1.11  /* TODO: constant interpolant optimization */
    1.12  
    1.13 @@ -16,6 +19,21 @@
    1.14  		int32_t v0, int32_t v1, uint16_t color, struct texture *tex);
    1.15  static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1);
    1.16  
    1.17 +#ifdef USE_TEX
    1.18 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty);
    1.19 +
    1.20 +/* TODO currently the linkscript will put statics in iwram. make this explicit */
    1.21 +#define TCACHE_XSZ	16
    1.22 +#define TCACHE_YSZ	16
    1.23 +
    1.24 +static struct {
    1.25 +	uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ];
    1.26 +	struct texture *tex;
    1.27 +	int orig_x, orig_y;
    1.28 +} tcache;
    1.29 +#endif	/* USE_TEX */
    1.30 +
    1.31 +
    1.32  void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color,
    1.33  		struct texture *tex)
    1.34  {
    1.35 @@ -27,7 +45,12 @@
    1.36  	int start, end;
    1.37  	pvec3 v0, v1;
    1.38  
    1.39 -	int32_t lu, lv, ru, rv, ldudy, ldvdy, rdudy, rdvdy;	/* texture interpolants */
    1.40 +#ifdef USE_TEX
    1.41 +	int32_t ldudy, ldvdy, rdudy, rdvdy;	/* texture interpolants */
    1.42 +#else
    1.43 +	static	/* to avoid lu,lv,ru,rv uninitialized warnings */
    1.44 +#endif
    1.45 +	int32_t lu, lv, ru, rv;
    1.46  
    1.47  	v0.x = verts[1].x - verts[0].x;
    1.48  	v0.y = verts[1].y - verts[0].y;
    1.49 @@ -70,10 +93,12 @@
    1.50  	}
    1.51  	lx = verts[lidx[0]].x;
    1.52  	ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    1.53 +#ifdef USE_TEX
    1.54  	lu = texcoords[lidx[0]].x;
    1.55  	ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
    1.56  	lv = texcoords[lidx[0]].y;
    1.57  	ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
    1.58 +#endif
    1.59  
    1.60  	/* find starting right edge */
    1.61  	ridx[1] = VNEXT(ridx[0], num);
    1.62 @@ -91,10 +116,12 @@
    1.63  	}
    1.64  	rx = verts[ridx[0]].x;
    1.65  	rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
    1.66 +#ifdef USE_TEX
    1.67  	ru = texcoords[ridx[0]].x;
    1.68  	rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
    1.69  	rv = texcoords[ridx[0]].y;
    1.70  	rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
    1.71 +#endif
    1.72  
    1.73  	start = topy >> 16;
    1.74  	end = boty >> 16;
    1.75 @@ -113,18 +140,23 @@
    1.76  				break;
    1.77  			}
    1.78  
    1.79 +			if(ldy) {
    1.80 +				ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    1.81 +			} else {
    1.82 +				ldxdy = verts[lidx[1]].x - lx;
    1.83 +			}
    1.84 +
    1.85 +#ifdef USE_TEX
    1.86  			lu = texcoords[lidx[0]].x;
    1.87  			lv = texcoords[lidx[0]].y;
    1.88 -
    1.89  			if(ldy) {
    1.90 -				ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    1.91  				ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
    1.92  				ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
    1.93  			} else {
    1.94 -				ldxdy = verts[lidx[1]].x - lx;
    1.95  				ldudy = texcoords[lidx[1]].x - lu;
    1.96  				ldvdy = texcoords[lidx[1]].y - lv;
    1.97  			}
    1.98 +#endif	/* USE_TEX */
    1.99  		}
   1.100  		if(y >= verts[ridx[1]].y) {
   1.101  			rx = verts[ridx[1]].x;
   1.102 @@ -135,18 +167,23 @@
   1.103  				break;
   1.104  			}
   1.105  
   1.106 +			if(rdy) {
   1.107 +				rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
   1.108 +			} else {
   1.109 +				rdxdy = verts[ridx[1]].x - rx;
   1.110 +			}
   1.111 +
   1.112 +#ifdef USE_TEX
   1.113  			ru = texcoords[ridx[0]].x;
   1.114  			rv = texcoords[ridx[0]].y;
   1.115 -
   1.116  			if(rdy) {
   1.117 -				rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
   1.118  				rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
   1.119  				rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
   1.120  			} else {
   1.121 -				rdxdy = verts[ridx[1]].x - rx;
   1.122  				rdudy = texcoords[ridx[1]].x - ru;
   1.123  				rdvdy = texcoords[ridx[1]].y - rv;
   1.124  			}
   1.125 +#endif	/* USE_TEX */
   1.126  		}
   1.127  
   1.128  		if(i >= 0) {
   1.129 @@ -161,10 +198,12 @@
   1.130  		rx += rdxdy;
   1.131  		y += 65536;
   1.132  
   1.133 +#ifdef USE_TEX
   1.134  		lu += ldudy;
   1.135  		lv += ldvdy;
   1.136  		ru += rdudy;
   1.137  		rv += rdvdy;
   1.138 +#endif
   1.139  	}
   1.140  }
   1.141  
   1.142 @@ -173,8 +212,10 @@
   1.143  		int32_t v0, int32_t v1, uint8_t color)
   1.144  {
   1.145  	int ix0, ix1;
   1.146 +#ifdef USE_TEX
   1.147  	int32_t dx;
   1.148  	int32_t u, v, dudx, dvdx;
   1.149 +#endif
   1.150  
   1.151  	int i, num_pairs, num_pix;
   1.152  	uint16_t *pixels;
   1.153 @@ -186,6 +227,7 @@
   1.154  		x1 = tmp;
   1.155  	}
   1.156  
   1.157 +#ifdef USE_TEX
   1.158  	dx = x1 - x0;
   1.159  
   1.160  	u = u0;
   1.161 @@ -197,6 +239,7 @@
   1.162  		dudx = u1 - u0;
   1.163  		dvdx = v1 - v0;
   1.164  	}
   1.165 +#endif
   1.166  
   1.167  	ix0 = (x0 + 32768) >> 16;
   1.168  	ix1 = (x1 + 32768) >> 16;
   1.169 @@ -211,16 +254,20 @@
   1.170  		uint16_t pix = *pixels & 0xff;
   1.171  		*pixels++ = pix | ((uint16_t)color << 8);
   1.172  		--num_pix;
   1.173 +#ifdef USE_TEX
   1.174  		u += dudx;
   1.175  		v += dvdx;
   1.176 +#endif
   1.177  	}
   1.178  
   1.179  	num_pairs = (num_pix & 0xfffe) / 2;
   1.180  
   1.181  	for(i=0; i<num_pairs; i++) {
   1.182  		*pixels++ = colpair;
   1.183 +#ifdef USE_TEX
   1.184  		u += dudx * 2;
   1.185  		v += dvdx * 2;
   1.186 +#endif
   1.187  	}
   1.188  
   1.189  	if(num_pix & 1) {
   1.190 @@ -234,8 +281,10 @@
   1.191  {
   1.192  	int i, ix0, ix1;
   1.193  	uint16_t *pixels;
   1.194 +#ifdef USE_TEX
   1.195  	int32_t dx;
   1.196  	int32_t u, v, dudx, dvdx;
   1.197 +#endif
   1.198  
   1.199  	if(x0 > x1) {
   1.200  		int32_t tmp = x0;
   1.201 @@ -243,6 +292,7 @@
   1.202  		x1 = tmp;
   1.203  	}
   1.204  
   1.205 +#ifdef USE_TEX
   1.206  	dx = x1 - x0;
   1.207  
   1.208  	u = u0;
   1.209 @@ -254,6 +304,7 @@
   1.210  		dudx = u1 - u0;
   1.211  		dvdx = v1 - v0;
   1.212  	}
   1.213 +#endif
   1.214  
   1.215  	ix0 = (x0 + 32768) >> 16;
   1.216  	ix1 = (x1 + 32768) >> 16;
   1.217 @@ -262,24 +313,23 @@
   1.218  	if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
   1.219  
   1.220  	pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0;
   1.221 -	for(i=ix0; i<ix1; i++) {
   1.222 -		/**pixels++ = color;*/
   1.223 -		int cr = u >> 8;
   1.224 -		int cg = v >> 8;
   1.225 -		if(cr > 255) cr = 255;
   1.226 -		if(cg > 255) cg = 255;
   1.227 -
   1.228 -		if(tex) {
   1.229 +#ifdef USE_TEX
   1.230 +	if(tex) {
   1.231 +		for(i=ix0; i<ix1; i++) {
   1.232  			int tx = (u >> (16 - tex->ushift)) & tex->umask;
   1.233  			int ty = (v >> (16 - tex->vshift)) & tex->vmask;
   1.234 -			uint16_t texel = ((uint16_t*)tex->pixels)[ty * tex->xsz + tx];
   1.235 -			*pixels++ = texel;
   1.236 -		} else {
   1.237 +			*pixels++ = tex_lookup(tex, tx, ty);
   1.238 +			//*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx];
   1.239 +
   1.240 +			u += dudx;
   1.241 +			v += dvdx;
   1.242 +		}
   1.243 +	} else
   1.244 +#endif
   1.245 +	{
   1.246 +		for(i=ix0; i<ix1; i++) {
   1.247  			*pixels++ = color;
   1.248  		}
   1.249 -
   1.250 -		u += dudx;
   1.251 -		v += dvdx;
   1.252  	}
   1.253  }
   1.254  
   1.255 @@ -310,3 +360,53 @@
   1.256  {
   1.257  	return x16mul(x0, y1) - x16mul(y0, x1);
   1.258  }
   1.259 +
   1.260 +#ifdef USE_TEX
   1.261 +#define MIN(a, b)		((a) < (b) ? (a) : (b))
   1.262 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty)
   1.263 +{
   1.264 +	int x, y, i, j;
   1.265 +	int width, height;
   1.266 +	uint16_t *ptr;
   1.267 +	int cx = tx - tcache.orig_x;
   1.268 +	int cy = ty - tcache.orig_y;
   1.269 +
   1.270 +	if(tcache.tex == tex && cx >= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) {
   1.271 +		return tcache.pixels[cy * TCACHE_XSZ + cx];
   1.272 +	}
   1.273 +
   1.274 +	tcache.tex = tex;
   1.275 +	x = tx - TCACHE_XSZ / 2;
   1.276 +	y = ty - TCACHE_YSZ / 2;
   1.277 +
   1.278 +	if(x + TCACHE_XSZ > tex->xsz) {
   1.279 +		x = tex->xsz - TCACHE_XSZ;
   1.280 +	}
   1.281 +	if(y + TCACHE_YSZ > tex->ysz) {
   1.282 +		y = tex->ysz - TCACHE_YSZ;
   1.283 +	}
   1.284 +
   1.285 +	if(x < 0) x = 0;
   1.286 +	if(y < 0) y = 0;
   1.287 +
   1.288 +	width = MIN(TCACHE_XSZ, tex->xsz - x);
   1.289 +	height = MIN(TCACHE_YSZ, tex->ysz - y);
   1.290 +
   1.291 +	tcache.orig_x = x;
   1.292 +	tcache.orig_y = y;
   1.293 +
   1.294 +	/*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty,
   1.295 +			width, height, x, y, tcache.pixels);
   1.296 +			*/
   1.297 +
   1.298 +	ptr = (uint16_t*)tex->pixels + y * tex->xsz + x;
   1.299 +	for(i=0; i<height; i++) {
   1.300 +		memcpy(tcache.pixels + i * TCACHE_XSZ, ptr, width * 2);
   1.301 +		ptr += tex->xsz;
   1.302 +	}
   1.303 +
   1.304 +	cx = tx - tcache.orig_x;
   1.305 +	cy = ty - tcache.orig_y;
   1.306 +	return tcache.pixels[cy * TCACHE_XSZ + cx];
   1.307 +}
   1.308 +#endif	/* USE_TEX */