gba-x3dtest
changeset 19:62390f9cc93e
texture cache optimization failed
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Mon, 30 Jun 2014 09:07:41 +0300 |
parents | f907b2c50a8b |
children | 2e903e27e35a |
files | Makefile src/game.c src/logger.c src/polyfill.c src/x3d.c |
diffstat | 5 files changed, 139 insertions(+), 27 deletions(-) [+] |
line diff
1.1 --- a/Makefile Thu Jun 26 21:48:09 2014 +0300 1.2 +++ b/Makefile Mon Jun 30 09:07:41 2014 +0300 1.3 @@ -17,7 +17,7 @@ 1.4 EMU = vbam 1.5 1.6 opt = -O3 -fomit-frame-pointer -mcpu=arm7tdmi -mtune=arm7tdmi 1.7 -#dbg = -g 1.8 +dbg = -g 1.9 1.10 CFLAGS = $(opt) $(dbg) -pedantic -Wall -I. -I../gbasys/src 1.11 LDFLAGS = ../gbasys/libgbasys.a -lm
2.1 --- a/src/game.c Thu Jun 26 21:48:09 2014 +0300 2.2 +++ b/src/game.c Mon Jun 30 09:07:41 2014 +0300 2.3 @@ -23,6 +23,7 @@ 2.4 static struct mesh box; 2.5 static int tex; 2.6 static int fps; 2.7 +static short show_fps = 1; 2.8 2.9 int game_init(void) 2.10 { 2.11 @@ -95,7 +96,9 @@ 2.12 draw_mesh(&box); 2.13 x3d_disable_texture(); 2.14 2.15 - draw_fps_meter(msec); 2.16 + if(show_fps) { 2.17 + draw_fps_meter(msec); 2.18 + } 2.19 2.20 flip(); 2.21 } 2.22 @@ -109,7 +112,7 @@ 2.23 2.24 ++nframes; 2.25 2.26 - if(dt >= 1000) { 2.27 + if(dt >= 1500) { 2.28 last_msec = msec; 2.29 fps = 1000 * nframes / dt; 2.30 nframes = 0; 2.31 @@ -119,7 +122,7 @@ 2.32 bar_height = fps * 4; 2.33 if(bar_height > HEIGHT) bar_height = HEIGHT; 2.34 2.35 - draw_rect(0, HEIGHT - bar_height, 1, bar_height, RGB(0, 255, 0)); 2.36 + draw_rect(0, HEIGHT - bar_height - 1, 1, bar_height, RGB(0, 255, 0)); 2.37 } 2.38 2.39 void game_keyb(int key, int pressed) 2.40 @@ -135,6 +138,10 @@ 2.41 dbg_fill_dump = 1; 2.42 break; 2.43 2.44 + case KEY_START: 2.45 + show_fps = !show_fps; 2.46 + break; 2.47 + 2.48 default: 2.49 break; 2.50 }
3.1 --- a/src/logger.c Thu Jun 26 21:48:09 2014 +0300 3.2 +++ b/src/logger.c Mon Jun 30 09:07:41 2014 +0300 3.3 @@ -71,7 +71,7 @@ 3.4 while(*str) putchr(*str++); 3.5 } 3.6 3.7 -#ifdef __ARM__ 3.8 +#ifdef __arm__ 3.9 static void agbprint(const char *str) 3.10 { 3.11 asm volatile (
4.1 --- a/src/polyfill.c Thu Jun 26 21:48:09 2014 +0300 4.2 +++ b/src/polyfill.c Mon Jun 30 09:07:41 2014 +0300 4.3 @@ -4,6 +4,9 @@ 4.4 #include "polyfill.h" 4.5 #include "fixed.h" 4.6 #include "gbasys.h" 4.7 +#include "logger.h" 4.8 + 4.9 +#define USE_TEX 4.10 4.11 /* TODO: constant interpolant optimization */ 4.12 4.13 @@ -16,6 +19,21 @@ 4.14 int32_t v0, int32_t v1, uint16_t color, struct texture *tex); 4.15 static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1); 4.16 4.17 +#ifdef USE_TEX 4.18 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty); 4.19 + 4.20 +/* TODO currently the linkscript will put statics in iwram. make this explicit */ 4.21 +#define TCACHE_XSZ 16 4.22 +#define TCACHE_YSZ 16 4.23 + 4.24 +static struct { 4.25 + uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ]; 4.26 + struct texture *tex; 4.27 + int orig_x, orig_y; 4.28 +} tcache; 4.29 +#endif /* USE_TEX */ 4.30 + 4.31 + 4.32 void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color, 4.33 struct texture *tex) 4.34 { 4.35 @@ -27,7 +45,12 @@ 4.36 int start, end; 4.37 pvec3 v0, v1; 4.38 4.39 - int32_t lu, lv, ru, rv, ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */ 4.40 +#ifdef USE_TEX 4.41 + int32_t ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */ 4.42 +#else 4.43 + static /* to avoid lu,lv,ru,rv uninitialized warnings */ 4.44 +#endif 4.45 + int32_t lu, lv, ru, rv; 4.46 4.47 v0.x = verts[1].x - verts[0].x; 4.48 v0.y = verts[1].y - verts[0].y; 4.49 @@ -70,10 +93,12 @@ 4.50 } 4.51 lx = verts[lidx[0]].x; 4.52 ldxdy = x16div(verts[lidx[1]].x - lx, ldy); 4.53 +#ifdef USE_TEX 4.54 lu = texcoords[lidx[0]].x; 4.55 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy); 4.56 lv = texcoords[lidx[0]].y; 4.57 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy); 4.58 +#endif 4.59 4.60 /* find starting right edge */ 4.61 ridx[1] = VNEXT(ridx[0], num); 4.62 @@ -91,10 +116,12 @@ 4.63 } 4.64 rx = verts[ridx[0]].x; 4.65 rdxdy = x16div(verts[ridx[1]].x - rx, rdy); 4.66 +#ifdef USE_TEX 4.67 ru = texcoords[ridx[0]].x; 4.68 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy); 4.69 rv = texcoords[ridx[0]].y; 4.70 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy); 4.71 +#endif 4.72 4.73 start = topy >> 16; 4.74 end = boty >> 16; 4.75 @@ -113,18 +140,23 @@ 4.76 break; 4.77 } 4.78 4.79 + if(ldy) { 4.80 + ldxdy = x16div(verts[lidx[1]].x - lx, ldy); 4.81 + } else { 4.82 + ldxdy = verts[lidx[1]].x - lx; 4.83 + } 4.84 + 4.85 +#ifdef USE_TEX 4.86 lu = texcoords[lidx[0]].x; 4.87 lv = texcoords[lidx[0]].y; 4.88 - 4.89 if(ldy) { 4.90 - ldxdy = x16div(verts[lidx[1]].x - lx, ldy); 4.91 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy); 4.92 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy); 4.93 } else { 4.94 - ldxdy = verts[lidx[1]].x - lx; 4.95 ldudy = texcoords[lidx[1]].x - lu; 4.96 ldvdy = texcoords[lidx[1]].y - lv; 4.97 } 4.98 +#endif /* USE_TEX */ 4.99 } 4.100 if(y >= verts[ridx[1]].y) { 4.101 rx = verts[ridx[1]].x; 4.102 @@ -135,18 +167,23 @@ 4.103 break; 4.104 } 4.105 4.106 + if(rdy) { 4.107 + rdxdy = x16div(verts[ridx[1]].x - rx, rdy); 4.108 + } else { 4.109 + rdxdy = verts[ridx[1]].x - rx; 4.110 + } 4.111 + 4.112 +#ifdef USE_TEX 4.113 ru = texcoords[ridx[0]].x; 4.114 rv = texcoords[ridx[0]].y; 4.115 - 4.116 if(rdy) { 4.117 - rdxdy = x16div(verts[ridx[1]].x - rx, rdy); 4.118 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy); 4.119 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy); 4.120 } else { 4.121 - rdxdy = verts[ridx[1]].x - rx; 4.122 rdudy = texcoords[ridx[1]].x - ru; 4.123 rdvdy = texcoords[ridx[1]].y - rv; 4.124 } 4.125 +#endif /* USE_TEX */ 4.126 } 4.127 4.128 if(i >= 0) { 4.129 @@ -161,10 +198,12 @@ 4.130 rx += rdxdy; 4.131 y += 65536; 4.132 4.133 +#ifdef USE_TEX 4.134 lu += ldudy; 4.135 lv += ldvdy; 4.136 ru += rdudy; 4.137 rv += rdvdy; 4.138 +#endif 4.139 } 4.140 } 4.141 4.142 @@ -173,8 +212,10 @@ 4.143 int32_t v0, int32_t v1, uint8_t color) 4.144 { 4.145 int ix0, ix1; 4.146 +#ifdef USE_TEX 4.147 int32_t dx; 4.148 int32_t u, v, dudx, dvdx; 4.149 +#endif 4.150 4.151 int i, num_pairs, num_pix; 4.152 uint16_t *pixels; 4.153 @@ -186,6 +227,7 @@ 4.154 x1 = tmp; 4.155 } 4.156 4.157 +#ifdef USE_TEX 4.158 dx = x1 - x0; 4.159 4.160 u = u0; 4.161 @@ -197,6 +239,7 @@ 4.162 dudx = u1 - u0; 4.163 dvdx = v1 - v0; 4.164 } 4.165 +#endif 4.166 4.167 ix0 = (x0 + 32768) >> 16; 4.168 ix1 = (x1 + 32768) >> 16; 4.169 @@ -211,16 +254,20 @@ 4.170 uint16_t pix = *pixels & 0xff; 4.171 *pixels++ = pix | ((uint16_t)color << 8); 4.172 --num_pix; 4.173 +#ifdef USE_TEX 4.174 u += dudx; 4.175 v += dvdx; 4.176 +#endif 4.177 } 4.178 4.179 num_pairs = (num_pix & 0xfffe) / 2; 4.180 4.181 for(i=0; i<num_pairs; i++) { 4.182 *pixels++ = colpair; 4.183 +#ifdef USE_TEX 4.184 u += dudx * 2; 4.185 v += dvdx * 2; 4.186 +#endif 4.187 } 4.188 4.189 if(num_pix & 1) { 4.190 @@ -234,8 +281,10 @@ 4.191 { 4.192 int i, ix0, ix1; 4.193 uint16_t *pixels; 4.194 +#ifdef USE_TEX 4.195 int32_t dx; 4.196 int32_t u, v, dudx, dvdx; 4.197 +#endif 4.198 4.199 if(x0 > x1) { 4.200 int32_t tmp = x0; 4.201 @@ -243,6 +292,7 @@ 4.202 x1 = tmp; 4.203 } 4.204 4.205 +#ifdef USE_TEX 4.206 dx = x1 - x0; 4.207 4.208 u = u0; 4.209 @@ -254,6 +304,7 @@ 4.210 dudx = u1 - u0; 4.211 dvdx = v1 - v0; 4.212 } 4.213 +#endif 4.214 4.215 ix0 = (x0 + 32768) >> 16; 4.216 ix1 = (x1 + 32768) >> 16; 4.217 @@ -262,24 +313,23 @@ 4.218 if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1; 4.219 4.220 pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0; 4.221 - for(i=ix0; i<ix1; i++) { 4.222 - /**pixels++ = color;*/ 4.223 - int cr = u >> 8; 4.224 - int cg = v >> 8; 4.225 - if(cr > 255) cr = 255; 4.226 - if(cg > 255) cg = 255; 4.227 - 4.228 - if(tex) { 4.229 +#ifdef USE_TEX 4.230 + if(tex) { 4.231 + for(i=ix0; i<ix1; i++) { 4.232 int tx = (u >> (16 - tex->ushift)) & tex->umask; 4.233 int ty = (v >> (16 - tex->vshift)) & tex->vmask; 4.234 - uint16_t texel = ((uint16_t*)tex->pixels)[ty * tex->xsz + tx]; 4.235 - *pixels++ = texel; 4.236 - } else { 4.237 + *pixels++ = tex_lookup(tex, tx, ty); 4.238 + //*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx]; 4.239 + 4.240 + u += dudx; 4.241 + v += dvdx; 4.242 + } 4.243 + } else 4.244 +#endif 4.245 + { 4.246 + for(i=ix0; i<ix1; i++) { 4.247 *pixels++ = color; 4.248 } 4.249 - 4.250 - u += dudx; 4.251 - v += dvdx; 4.252 } 4.253 } 4.254 4.255 @@ -310,3 +360,53 @@ 4.256 { 4.257 return x16mul(x0, y1) - x16mul(y0, x1); 4.258 } 4.259 + 4.260 +#ifdef USE_TEX 4.261 +#define MIN(a, b) ((a) < (b) ? (a) : (b)) 4.262 +static uint16_t tex_lookup(struct texture *tex, int tx, int ty) 4.263 +{ 4.264 + int x, y, i, j; 4.265 + int width, height; 4.266 + uint16_t *ptr; 4.267 + int cx = tx - tcache.orig_x; 4.268 + int cy = ty - tcache.orig_y; 4.269 + 4.270 + if(tcache.tex == tex && cx >= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) { 4.271 + return tcache.pixels[cy * TCACHE_XSZ + cx]; 4.272 + } 4.273 + 4.274 + tcache.tex = tex; 4.275 + x = tx - TCACHE_XSZ / 2; 4.276 + y = ty - TCACHE_YSZ / 2; 4.277 + 4.278 + if(x + TCACHE_XSZ > tex->xsz) { 4.279 + x = tex->xsz - TCACHE_XSZ; 4.280 + } 4.281 + if(y + TCACHE_YSZ > tex->ysz) { 4.282 + y = tex->ysz - TCACHE_YSZ; 4.283 + } 4.284 + 4.285 + if(x < 0) x = 0; 4.286 + if(y < 0) y = 0; 4.287 + 4.288 + width = MIN(TCACHE_XSZ, tex->xsz - x); 4.289 + height = MIN(TCACHE_YSZ, tex->ysz - y); 4.290 + 4.291 + tcache.orig_x = x; 4.292 + tcache.orig_y = y; 4.293 + 4.294 + /*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty, 4.295 + width, height, x, y, tcache.pixels); 4.296 + */ 4.297 + 4.298 + ptr = (uint16_t*)tex->pixels + y * tex->xsz + x; 4.299 + for(i=0; i<height; i++) { 4.300 + memcpy(tcache.pixels + i * TCACHE_XSZ, ptr, width * 2); 4.301 + ptr += tex->xsz; 4.302 + } 4.303 + 4.304 + cx = tx - tcache.orig_x; 4.305 + cy = ty - tcache.orig_y; 4.306 + return tcache.pixels[cy * TCACHE_XSZ + cx]; 4.307 +} 4.308 +#endif /* USE_TEX */
5.1 --- a/src/x3d.c Thu Jun 26 21:48:09 2014 +0300 5.2 +++ b/src/x3d.c Mon Jun 30 09:07:41 2014 +0300 5.3 @@ -1,5 +1,6 @@ 5.4 #include "config.h" 5.5 #include <stdio.h> 5.6 +#include <stdlib.h> 5.7 #include <string.h> 5.8 #include <math.h> 5.9 #include "x3d.h" 5.10 @@ -351,7 +352,9 @@ 5.11 5.12 for(i=0; i<MAX_TEXTURES; i++) { 5.13 if(!textures[i].pixels) { 5.14 - textures[i].pixels = pixels; 5.15 + /*textures[i].pixels = pixels;*/ 5.16 + textures[i].pixels = malloc(xsz * ysz * 2); 5.17 + memcpy(textures[i].pixels, pixels, xsz * ysz * 2); 5.18 textures[i].xsz = xsz; 5.19 textures[i].ysz = ysz; 5.20 textures[i].umask = xsz - 1; 5.21 @@ -366,6 +369,8 @@ 5.22 } 5.23 } 5.24 5.25 + logmsg(LOG_DBG, "create texture %dx%d: %p\n", xsz, ysz, pixels); 5.26 + 5.27 return i; 5.28 } 5.29 }