# HG changeset patch # User John Tsiombikas # Date 1404108461 -10800 # Node ID 62390f9cc93e75fdaf8e6af74240c68822f992af # Parent f907b2c50a8bef06f9307961d157a65c63bdbd5a texture cache optimization failed diff -r f907b2c50a8b -r 62390f9cc93e Makefile --- a/Makefile Thu Jun 26 21:48:09 2014 +0300 +++ b/Makefile Mon Jun 30 09:07:41 2014 +0300 @@ -17,7 +17,7 @@ EMU = vbam opt = -O3 -fomit-frame-pointer -mcpu=arm7tdmi -mtune=arm7tdmi -#dbg = -g +dbg = -g CFLAGS = $(opt) $(dbg) -pedantic -Wall -I. -I../gbasys/src LDFLAGS = ../gbasys/libgbasys.a -lm diff -r f907b2c50a8b -r 62390f9cc93e src/game.c --- a/src/game.c Thu Jun 26 21:48:09 2014 +0300 +++ b/src/game.c Mon Jun 30 09:07:41 2014 +0300 @@ -23,6 +23,7 @@ static struct mesh box; static int tex; static int fps; +static short show_fps = 1; int game_init(void) { @@ -95,7 +96,9 @@ draw_mesh(&box); x3d_disable_texture(); - draw_fps_meter(msec); + if(show_fps) { + draw_fps_meter(msec); + } flip(); } @@ -109,7 +112,7 @@ ++nframes; - if(dt >= 1000) { + if(dt >= 1500) { last_msec = msec; fps = 1000 * nframes / dt; nframes = 0; @@ -119,7 +122,7 @@ bar_height = fps * 4; if(bar_height > HEIGHT) bar_height = HEIGHT; - draw_rect(0, HEIGHT - bar_height, 1, bar_height, RGB(0, 255, 0)); + draw_rect(0, HEIGHT - bar_height - 1, 1, bar_height, RGB(0, 255, 0)); } void game_keyb(int key, int pressed) @@ -135,6 +138,10 @@ dbg_fill_dump = 1; break; + case KEY_START: + show_fps = !show_fps; + break; + default: break; } diff -r f907b2c50a8b -r 62390f9cc93e src/logger.c --- a/src/logger.c Thu Jun 26 21:48:09 2014 +0300 +++ b/src/logger.c Mon Jun 30 09:07:41 2014 +0300 @@ -71,7 +71,7 @@ while(*str) putchr(*str++); } -#ifdef __ARM__ +#ifdef __arm__ static void agbprint(const char *str) { asm volatile ( diff -r f907b2c50a8b -r 62390f9cc93e src/polyfill.c --- a/src/polyfill.c Thu Jun 26 21:48:09 2014 +0300 +++ b/src/polyfill.c Mon Jun 30 09:07:41 2014 +0300 @@ -4,6 +4,9 @@ #include "polyfill.h" #include "fixed.h" #include "gbasys.h" +#include "logger.h" + +#define USE_TEX /* TODO: constant interpolant optimization */ @@ -16,6 +19,21 @@ int32_t v0, int32_t v1, uint16_t color, struct texture *tex); static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1); +#ifdef USE_TEX +static uint16_t tex_lookup(struct texture *tex, int tx, int ty); + +/* TODO currently the linkscript will put statics in iwram. make this explicit */ +#define TCACHE_XSZ 16 +#define TCACHE_YSZ 16 + +static struct { + uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ]; + struct texture *tex; + int orig_x, orig_y; +} tcache; +#endif /* USE_TEX */ + + void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color, struct texture *tex) { @@ -27,7 +45,12 @@ int start, end; pvec3 v0, v1; - int32_t lu, lv, ru, rv, ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */ +#ifdef USE_TEX + int32_t ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */ +#else + static /* to avoid lu,lv,ru,rv uninitialized warnings */ +#endif + int32_t lu, lv, ru, rv; v0.x = verts[1].x - verts[0].x; v0.y = verts[1].y - verts[0].y; @@ -70,10 +93,12 @@ } lx = verts[lidx[0]].x; ldxdy = x16div(verts[lidx[1]].x - lx, ldy); +#ifdef USE_TEX lu = texcoords[lidx[0]].x; ldudy = x16div(texcoords[lidx[1]].x - lu, ldy); lv = texcoords[lidx[0]].y; ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy); +#endif /* find starting right edge */ ridx[1] = VNEXT(ridx[0], num); @@ -91,10 +116,12 @@ } rx = verts[ridx[0]].x; rdxdy = x16div(verts[ridx[1]].x - rx, rdy); +#ifdef USE_TEX ru = texcoords[ridx[0]].x; rdudy = x16div(texcoords[ridx[1]].x - ru, rdy); rv = texcoords[ridx[0]].y; rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy); +#endif start = topy >> 16; end = boty >> 16; @@ -113,18 +140,23 @@ break; } + if(ldy) { + ldxdy = x16div(verts[lidx[1]].x - lx, ldy); + } else { + ldxdy = verts[lidx[1]].x - lx; + } + +#ifdef USE_TEX lu = texcoords[lidx[0]].x; lv = texcoords[lidx[0]].y; - if(ldy) { - ldxdy = x16div(verts[lidx[1]].x - lx, ldy); ldudy = x16div(texcoords[lidx[1]].x - lu, ldy); ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy); } else { - ldxdy = verts[lidx[1]].x - lx; ldudy = texcoords[lidx[1]].x - lu; ldvdy = texcoords[lidx[1]].y - lv; } +#endif /* USE_TEX */ } if(y >= verts[ridx[1]].y) { rx = verts[ridx[1]].x; @@ -135,18 +167,23 @@ break; } + if(rdy) { + rdxdy = x16div(verts[ridx[1]].x - rx, rdy); + } else { + rdxdy = verts[ridx[1]].x - rx; + } + +#ifdef USE_TEX ru = texcoords[ridx[0]].x; rv = texcoords[ridx[0]].y; - if(rdy) { - rdxdy = x16div(verts[ridx[1]].x - rx, rdy); rdudy = x16div(texcoords[ridx[1]].x - ru, rdy); rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy); } else { - rdxdy = verts[ridx[1]].x - rx; rdudy = texcoords[ridx[1]].x - ru; rdvdy = texcoords[ridx[1]].y - rv; } +#endif /* USE_TEX */ } if(i >= 0) { @@ -161,10 +198,12 @@ rx += rdxdy; y += 65536; +#ifdef USE_TEX lu += ldudy; lv += ldvdy; ru += rdudy; rv += rdvdy; +#endif } } @@ -173,8 +212,10 @@ int32_t v0, int32_t v1, uint8_t color) { int ix0, ix1; +#ifdef USE_TEX int32_t dx; int32_t u, v, dudx, dvdx; +#endif int i, num_pairs, num_pix; uint16_t *pixels; @@ -186,6 +227,7 @@ x1 = tmp; } +#ifdef USE_TEX dx = x1 - x0; u = u0; @@ -197,6 +239,7 @@ dudx = u1 - u0; dvdx = v1 - v0; } +#endif ix0 = (x0 + 32768) >> 16; ix1 = (x1 + 32768) >> 16; @@ -211,16 +254,20 @@ uint16_t pix = *pixels & 0xff; *pixels++ = pix | ((uint16_t)color << 8); --num_pix; +#ifdef USE_TEX u += dudx; v += dvdx; +#endif } num_pairs = (num_pix & 0xfffe) / 2; for(i=0; i x1) { int32_t tmp = x0; @@ -243,6 +292,7 @@ x1 = tmp; } +#ifdef USE_TEX dx = x1 - x0; u = u0; @@ -254,6 +304,7 @@ dudx = u1 - u0; dvdx = v1 - v0; } +#endif ix0 = (x0 + 32768) >> 16; ix1 = (x1 + 32768) >> 16; @@ -262,24 +313,23 @@ if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1; pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0; - for(i=ix0; i> 8; - int cg = v >> 8; - if(cr > 255) cr = 255; - if(cg > 255) cg = 255; - - if(tex) { +#ifdef USE_TEX + if(tex) { + for(i=ix0; i> (16 - tex->ushift)) & tex->umask; int ty = (v >> (16 - tex->vshift)) & tex->vmask; - uint16_t texel = ((uint16_t*)tex->pixels)[ty * tex->xsz + tx]; - *pixels++ = texel; - } else { + *pixels++ = tex_lookup(tex, tx, ty); + //*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx]; + + u += dudx; + v += dvdx; + } + } else +#endif + { + for(i=ix0; i= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) { + return tcache.pixels[cy * TCACHE_XSZ + cx]; + } + + tcache.tex = tex; + x = tx - TCACHE_XSZ / 2; + y = ty - TCACHE_YSZ / 2; + + if(x + TCACHE_XSZ > tex->xsz) { + x = tex->xsz - TCACHE_XSZ; + } + if(y + TCACHE_YSZ > tex->ysz) { + y = tex->ysz - TCACHE_YSZ; + } + + if(x < 0) x = 0; + if(y < 0) y = 0; + + width = MIN(TCACHE_XSZ, tex->xsz - x); + height = MIN(TCACHE_YSZ, tex->ysz - y); + + tcache.orig_x = x; + tcache.orig_y = y; + + /*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty, + width, height, x, y, tcache.pixels); + */ + + ptr = (uint16_t*)tex->pixels + y * tex->xsz + x; + for(i=0; ixsz; + } + + cx = tx - tcache.orig_x; + cy = ty - tcache.orig_y; + return tcache.pixels[cy * TCACHE_XSZ + cx]; +} +#endif /* USE_TEX */ diff -r f907b2c50a8b -r 62390f9cc93e src/x3d.c --- a/src/x3d.c Thu Jun 26 21:48:09 2014 +0300 +++ b/src/x3d.c Mon Jun 30 09:07:41 2014 +0300 @@ -1,5 +1,6 @@ #include "config.h" #include +#include #include #include #include "x3d.h" @@ -351,7 +352,9 @@ for(i=0; i