gba-x3dtest
diff src/polyfill.c @ 19:62390f9cc93e
texture cache optimization failed
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Mon, 30 Jun 2014 09:07:41 +0300 |
parents | 0a7f402892b3 |
children | 2e903e27e35a |
line diff
1.1 --- a/src/polyfill.c Thu Jun 26 21:48:09 2014 +0300 1.2 +++ b/src/polyfill.c Mon Jun 30 09:07:41 2014 +0300 1.3 @@ -4,6 +4,9 @@ 1.4 #include "polyfill.h" 1.5 #include "fixed.h" 1.6 #include "gbasys.h" 1.7 +#include "logger.h" 1.8 + 1.9 +#define USE_TEX 1.10 1.11 /* TODO: constant interpolant optimization */ 1.12 1.13 @@ -16,6 +19,21 @@ 1.14 int32_t v0, int32_t v1, uint16_t color, struct texture *tex); 1.15 static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1); 1.16 1.17 +#ifdef USE_TEX 1.18 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty); 1.19 + 1.20 +/* TODO currently the linkscript will put statics in iwram. make this explicit */ 1.21 +#define TCACHE_XSZ 16 1.22 +#define TCACHE_YSZ 16 1.23 + 1.24 +static struct { 1.25 + uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ]; 1.26 + struct texture *tex; 1.27 + int orig_x, orig_y; 1.28 +} tcache; 1.29 +#endif /* USE_TEX */ 1.30 + 1.31 + 1.32 void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color, 1.33 struct texture *tex) 1.34 { 1.35 @@ -27,7 +45,12 @@ 1.36 int start, end; 1.37 pvec3 v0, v1; 1.38 1.39 - int32_t lu, lv, ru, rv, ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */ 1.40 +#ifdef USE_TEX 1.41 + int32_t ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */ 1.42 +#else 1.43 + static /* to avoid lu,lv,ru,rv uninitialized warnings */ 1.44 +#endif 1.45 + int32_t lu, lv, ru, rv; 1.46 1.47 v0.x = verts[1].x - verts[0].x; 1.48 v0.y = verts[1].y - verts[0].y; 1.49 @@ -70,10 +93,12 @@ 1.50 } 1.51 lx = verts[lidx[0]].x; 1.52 ldxdy = x16div(verts[lidx[1]].x - lx, ldy); 1.53 +#ifdef USE_TEX 1.54 lu = texcoords[lidx[0]].x; 1.55 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy); 1.56 lv = texcoords[lidx[0]].y; 1.57 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy); 1.58 +#endif 1.59 1.60 /* find starting right edge */ 1.61 ridx[1] = VNEXT(ridx[0], num); 1.62 @@ -91,10 +116,12 @@ 1.63 } 1.64 rx = verts[ridx[0]].x; 1.65 rdxdy = x16div(verts[ridx[1]].x - rx, rdy); 1.66 +#ifdef USE_TEX 1.67 ru = texcoords[ridx[0]].x; 1.68 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy); 1.69 rv = texcoords[ridx[0]].y; 1.70 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy); 1.71 +#endif 1.72 1.73 start = topy >> 16; 1.74 end = boty >> 16; 1.75 @@ -113,18 +140,23 @@ 1.76 break; 1.77 } 1.78 1.79 + if(ldy) { 1.80 + ldxdy = x16div(verts[lidx[1]].x - lx, ldy); 1.81 + } else { 1.82 + ldxdy = verts[lidx[1]].x - lx; 1.83 + } 1.84 + 1.85 +#ifdef USE_TEX 1.86 lu = texcoords[lidx[0]].x; 1.87 lv = texcoords[lidx[0]].y; 1.88 - 1.89 if(ldy) { 1.90 - ldxdy = x16div(verts[lidx[1]].x - lx, ldy); 1.91 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy); 1.92 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy); 1.93 } else { 1.94 - ldxdy = verts[lidx[1]].x - lx; 1.95 ldudy = texcoords[lidx[1]].x - lu; 1.96 ldvdy = texcoords[lidx[1]].y - lv; 1.97 } 1.98 +#endif /* USE_TEX */ 1.99 } 1.100 if(y >= verts[ridx[1]].y) { 1.101 rx = verts[ridx[1]].x; 1.102 @@ -135,18 +167,23 @@ 1.103 break; 1.104 } 1.105 1.106 + if(rdy) { 1.107 + rdxdy = x16div(verts[ridx[1]].x - rx, rdy); 1.108 + } else { 1.109 + rdxdy = verts[ridx[1]].x - rx; 1.110 + } 1.111 + 1.112 +#ifdef USE_TEX 1.113 ru = texcoords[ridx[0]].x; 1.114 rv = texcoords[ridx[0]].y; 1.115 - 1.116 if(rdy) { 1.117 - rdxdy = x16div(verts[ridx[1]].x - rx, rdy); 1.118 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy); 1.119 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy); 1.120 } else { 1.121 - rdxdy = verts[ridx[1]].x - rx; 1.122 rdudy = texcoords[ridx[1]].x - ru; 1.123 rdvdy = texcoords[ridx[1]].y - rv; 1.124 } 1.125 +#endif /* USE_TEX */ 1.126 } 1.127 1.128 if(i >= 0) { 1.129 @@ -161,10 +198,12 @@ 1.130 rx += rdxdy; 1.131 y += 65536; 1.132 1.133 +#ifdef USE_TEX 1.134 lu += ldudy; 1.135 lv += ldvdy; 1.136 ru += rdudy; 1.137 rv += rdvdy; 1.138 +#endif 1.139 } 1.140 } 1.141 1.142 @@ -173,8 +212,10 @@ 1.143 int32_t v0, int32_t v1, uint8_t color) 1.144 { 1.145 int ix0, ix1; 1.146 +#ifdef USE_TEX 1.147 int32_t dx; 1.148 int32_t u, v, dudx, dvdx; 1.149 +#endif 1.150 1.151 int i, num_pairs, num_pix; 1.152 uint16_t *pixels; 1.153 @@ -186,6 +227,7 @@ 1.154 x1 = tmp; 1.155 } 1.156 1.157 +#ifdef USE_TEX 1.158 dx = x1 - x0; 1.159 1.160 u = u0; 1.161 @@ -197,6 +239,7 @@ 1.162 dudx = u1 - u0; 1.163 dvdx = v1 - v0; 1.164 } 1.165 +#endif 1.166 1.167 ix0 = (x0 + 32768) >> 16; 1.168 ix1 = (x1 + 32768) >> 16; 1.169 @@ -211,16 +254,20 @@ 1.170 uint16_t pix = *pixels & 0xff; 1.171 *pixels++ = pix | ((uint16_t)color << 8); 1.172 --num_pix; 1.173 +#ifdef USE_TEX 1.174 u += dudx; 1.175 v += dvdx; 1.176 +#endif 1.177 } 1.178 1.179 num_pairs = (num_pix & 0xfffe) / 2; 1.180 1.181 for(i=0; i<num_pairs; i++) { 1.182 *pixels++ = colpair; 1.183 +#ifdef USE_TEX 1.184 u += dudx * 2; 1.185 v += dvdx * 2; 1.186 +#endif 1.187 } 1.188 1.189 if(num_pix & 1) { 1.190 @@ -234,8 +281,10 @@ 1.191 { 1.192 int i, ix0, ix1; 1.193 uint16_t *pixels; 1.194 +#ifdef USE_TEX 1.195 int32_t dx; 1.196 int32_t u, v, dudx, dvdx; 1.197 +#endif 1.198 1.199 if(x0 > x1) { 1.200 int32_t tmp = x0; 1.201 @@ -243,6 +292,7 @@ 1.202 x1 = tmp; 1.203 } 1.204 1.205 +#ifdef USE_TEX 1.206 dx = x1 - x0; 1.207 1.208 u = u0; 1.209 @@ -254,6 +304,7 @@ 1.210 dudx = u1 - u0; 1.211 dvdx = v1 - v0; 1.212 } 1.213 +#endif 1.214 1.215 ix0 = (x0 + 32768) >> 16; 1.216 ix1 = (x1 + 32768) >> 16; 1.217 @@ -262,24 +313,23 @@ 1.218 if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1; 1.219 1.220 pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0; 1.221 - for(i=ix0; i<ix1; i++) { 1.222 - /**pixels++ = color;*/ 1.223 - int cr = u >> 8; 1.224 - int cg = v >> 8; 1.225 - if(cr > 255) cr = 255; 1.226 - if(cg > 255) cg = 255; 1.227 - 1.228 - if(tex) { 1.229 +#ifdef USE_TEX 1.230 + if(tex) { 1.231 + for(i=ix0; i<ix1; i++) { 1.232 int tx = (u >> (16 - tex->ushift)) & tex->umask; 1.233 int ty = (v >> (16 - tex->vshift)) & tex->vmask; 1.234 - uint16_t texel = ((uint16_t*)tex->pixels)[ty * tex->xsz + tx]; 1.235 - *pixels++ = texel; 1.236 - } else { 1.237 + *pixels++ = tex_lookup(tex, tx, ty); 1.238 + //*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx]; 1.239 + 1.240 + u += dudx; 1.241 + v += dvdx; 1.242 + } 1.243 + } else 1.244 +#endif 1.245 + { 1.246 + for(i=ix0; i<ix1; i++) { 1.247 *pixels++ = color; 1.248 } 1.249 - 1.250 - u += dudx; 1.251 - v += dvdx; 1.252 } 1.253 } 1.254 1.255 @@ -310,3 +360,53 @@ 1.256 { 1.257 return x16mul(x0, y1) - x16mul(y0, x1); 1.258 } 1.259 + 1.260 +#ifdef USE_TEX 1.261 +#define MIN(a, b) ((a) < (b) ? (a) : (b)) 1.262 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty) 1.263 +{ 1.264 + int x, y, i, j; 1.265 + int width, height; 1.266 + uint16_t *ptr; 1.267 + int cx = tx - tcache.orig_x; 1.268 + int cy = ty - tcache.orig_y; 1.269 + 1.270 + if(tcache.tex == tex && cx >= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) { 1.271 + return tcache.pixels[cy * TCACHE_XSZ + cx]; 1.272 + } 1.273 + 1.274 + tcache.tex = tex; 1.275 + x = tx - TCACHE_XSZ / 2; 1.276 + y = ty - TCACHE_YSZ / 2; 1.277 + 1.278 + if(x + TCACHE_XSZ > tex->xsz) { 1.279 + x = tex->xsz - TCACHE_XSZ; 1.280 + } 1.281 + if(y + TCACHE_YSZ > tex->ysz) { 1.282 + y = tex->ysz - TCACHE_YSZ; 1.283 + } 1.284 + 1.285 + if(x < 0) x = 0; 1.286 + if(y < 0) y = 0; 1.287 + 1.288 + width = MIN(TCACHE_XSZ, tex->xsz - x); 1.289 + height = MIN(TCACHE_YSZ, tex->ysz - y); 1.290 + 1.291 + tcache.orig_x = x; 1.292 + tcache.orig_y = y; 1.293 + 1.294 + /*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty, 1.295 + width, height, x, y, tcache.pixels); 1.296 + */ 1.297 + 1.298 + ptr = (uint16_t*)tex->pixels + y * tex->xsz + x; 1.299 + for(i=0; i<height; i++) { 1.300 + memcpy(tcache.pixels + i * TCACHE_XSZ, ptr, width * 2); 1.301 + ptr += tex->xsz; 1.302 + } 1.303 + 1.304 + cx = tx - tcache.orig_x; 1.305 + cy = ty - tcache.orig_y; 1.306 + return tcache.pixels[cy * TCACHE_XSZ + cx]; 1.307 +} 1.308 +#endif /* USE_TEX */