gba-x3dtest

annotate src/polyfill.c @ 19:62390f9cc93e

texture cache optimization failed
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 30 Jun 2014 09:07:41 +0300
parents 0a7f402892b3
children 2e903e27e35a
rev   line source
nuclear@5 1 #include "config.h"
nuclear@5 2 #include <string.h>
nuclear@6 3 #include <assert.h>
nuclear@5 4 #include "polyfill.h"
nuclear@5 5 #include "fixed.h"
nuclear@5 6 #include "gbasys.h"
nuclear@19 7 #include "logger.h"
nuclear@19 8
nuclear@19 9 #define USE_TEX
nuclear@5 10
nuclear@15 11 /* TODO: constant interpolant optimization */
nuclear@15 12
nuclear@9 13 #define VNEXT(x, n) (((x) + 1) % (n))
nuclear@9 14 #define VPREV(x, n) ((x) > 0 ? (x) - 1 : (n) - 1)
nuclear@9 15
nuclear@15 16 static void fill_scanline_pal(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
nuclear@15 17 int32_t v0, int32_t v1, uint8_t color);
nuclear@15 18 static void fill_scanline_rgb(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
nuclear@17 19 int32_t v0, int32_t v1, uint16_t color, struct texture *tex);
nuclear@14 20 static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1);
nuclear@5 21
nuclear@19 22 #ifdef USE_TEX
nuclear@19 23 static uint16_t tex_lookup(struct texture *tex, int tx, int ty);
nuclear@19 24
nuclear@19 25 /* TODO currently the linkscript will put statics in iwram. make this explicit */
nuclear@19 26 #define TCACHE_XSZ 16
nuclear@19 27 #define TCACHE_YSZ 16
nuclear@19 28
nuclear@19 29 static struct {
nuclear@19 30 uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ];
nuclear@19 31 struct texture *tex;
nuclear@19 32 int orig_x, orig_y;
nuclear@19 33 } tcache;
nuclear@19 34 #endif /* USE_TEX */
nuclear@19 35
nuclear@19 36
nuclear@17 37 void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color,
nuclear@17 38 struct texture *tex)
nuclear@5 39 {
nuclear@5 40 int i, topidx = 0, botidx = 0;
nuclear@9 41 int lidx[2] = {-1, -1}, ridx[2] = {-1, -1};
nuclear@5 42 int32_t y, topy, boty;
nuclear@9 43 int32_t ldy = 0, rdy = 0, ldxdy, rdxdy;
nuclear@5 44 int32_t lx, rx;
nuclear@5 45 int start, end;
nuclear@14 46 pvec3 v0, v1;
nuclear@14 47
nuclear@19 48 #ifdef USE_TEX
nuclear@19 49 int32_t ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */
nuclear@19 50 #else
nuclear@19 51 static /* to avoid lu,lv,ru,rv uninitialized warnings */
nuclear@19 52 #endif
nuclear@19 53 int32_t lu, lv, ru, rv;
nuclear@15 54
nuclear@14 55 v0.x = verts[1].x - verts[0].x;
nuclear@14 56 v0.y = verts[1].y - verts[0].y;
nuclear@14 57
nuclear@14 58 v1.x = verts[2].x - verts[0].x;
nuclear@14 59 v1.y = verts[2].y - verts[0].y;
nuclear@14 60
nuclear@14 61 if(winding(v0.x, v0.y, v1.x, v1.y) < 0) {
nuclear@14 62 return; /* backface */
nuclear@14 63 }
nuclear@5 64
nuclear@5 65 topy = boty = verts[0].y;
nuclear@5 66 for(i=1; i<num; i++) {
nuclear@5 67 int32_t y = verts[i].y;
nuclear@5 68 if(y < topy) {
nuclear@5 69 topy = y;
nuclear@5 70 topidx = i;
nuclear@5 71 }
nuclear@5 72 if(y > boty) {
nuclear@5 73 boty = y;
nuclear@5 74 botidx = i;
nuclear@5 75 }
nuclear@5 76 }
nuclear@5 77
nuclear@5 78 lidx[0] = ridx[0] = topidx;
nuclear@9 79
nuclear@9 80 /* find starting left edge */
nuclear@9 81 lidx[1] = VPREV(lidx[0], num);
nuclear@9 82 ldy = verts[lidx[1]].y - verts[lidx[0]].y;
nuclear@9 83
nuclear@9 84 while(ldy == 0) {
nuclear@9 85 lidx[0] = lidx[1];
nuclear@9 86 lidx[1] = VPREV(lidx[1], num);
nuclear@9 87
nuclear@9 88 if(lidx[1] == topidx) {
nuclear@9 89 return; /* degenerate */
nuclear@9 90 }
nuclear@9 91
nuclear@9 92 ldy = verts[lidx[1]].y - verts[lidx[0]].y;
nuclear@9 93 }
nuclear@9 94 lx = verts[lidx[0]].x;
nuclear@9 95 ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
nuclear@19 96 #ifdef USE_TEX
nuclear@17 97 lu = texcoords[lidx[0]].x;
nuclear@17 98 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
nuclear@17 99 lv = texcoords[lidx[0]].y;
nuclear@17 100 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
nuclear@19 101 #endif
nuclear@9 102
nuclear@9 103 /* find starting right edge */
nuclear@9 104 ridx[1] = VNEXT(ridx[0], num);
nuclear@9 105 rdy = verts[ridx[1]].y - verts[ridx[0]].y;
nuclear@9 106
nuclear@9 107 while(rdy == 0) {
nuclear@9 108 ridx[0] = ridx[1];
nuclear@9 109 ridx[1] = VNEXT(ridx[1], num);
nuclear@9 110
nuclear@9 111 if(ridx[1] == topidx) {
nuclear@9 112 return; /* degenerate */
nuclear@9 113 }
nuclear@9 114
nuclear@9 115 rdy = verts[ridx[1]].y - verts[ridx[0]].y;
nuclear@9 116 }
nuclear@9 117 rx = verts[ridx[0]].x;
nuclear@9 118 rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
nuclear@19 119 #ifdef USE_TEX
nuclear@17 120 ru = texcoords[ridx[0]].x;
nuclear@17 121 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
nuclear@17 122 rv = texcoords[ridx[0]].y;
nuclear@17 123 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
nuclear@19 124 #endif
nuclear@9 125
nuclear@5 126 start = topy >> 16;
nuclear@5 127 end = boty >> 16;
nuclear@5 128
nuclear@6 129 if(end >= HEIGHT) end = HEIGHT - 1;
nuclear@6 130
nuclear@5 131 y = topy;
nuclear@14 132 for(i=start; i<=end; i++) {
nuclear@5 133
nuclear@5 134 if(y >= verts[lidx[1]].y) {
nuclear@9 135 lx = verts[lidx[1]].x;
nuclear@5 136 lidx[0] = lidx[1];
nuclear@9 137 lidx[1] = VPREV(lidx[1], num);
nuclear@5 138 ldy = verts[lidx[1]].y - verts[lidx[0]].y;
nuclear@5 139 if(ldy < 0) {
nuclear@5 140 break;
nuclear@5 141 }
nuclear@15 142
nuclear@19 143 if(ldy) {
nuclear@19 144 ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
nuclear@19 145 } else {
nuclear@19 146 ldxdy = verts[lidx[1]].x - lx;
nuclear@19 147 }
nuclear@19 148
nuclear@19 149 #ifdef USE_TEX
nuclear@17 150 lu = texcoords[lidx[0]].x;
nuclear@17 151 lv = texcoords[lidx[0]].y;
nuclear@14 152 if(ldy) {
nuclear@17 153 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
nuclear@17 154 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
nuclear@14 155 } else {
nuclear@17 156 ldudy = texcoords[lidx[1]].x - lu;
nuclear@17 157 ldvdy = texcoords[lidx[1]].y - lv;
nuclear@14 158 }
nuclear@19 159 #endif /* USE_TEX */
nuclear@5 160 }
nuclear@5 161 if(y >= verts[ridx[1]].y) {
nuclear@9 162 rx = verts[ridx[1]].x;
nuclear@5 163 ridx[0] = ridx[1];
nuclear@9 164 ridx[1] = VNEXT(ridx[1], num);
nuclear@5 165 rdy = verts[ridx[1]].y - verts[ridx[0]].y;
nuclear@5 166 if(rdy < 0) {
nuclear@5 167 break;
nuclear@5 168 }
nuclear@15 169
nuclear@19 170 if(rdy) {
nuclear@19 171 rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
nuclear@19 172 } else {
nuclear@19 173 rdxdy = verts[ridx[1]].x - rx;
nuclear@19 174 }
nuclear@19 175
nuclear@19 176 #ifdef USE_TEX
nuclear@17 177 ru = texcoords[ridx[0]].x;
nuclear@17 178 rv = texcoords[ridx[0]].y;
nuclear@14 179 if(rdy) {
nuclear@17 180 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
nuclear@17 181 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
nuclear@14 182 } else {
nuclear@17 183 rdudy = texcoords[ridx[1]].x - ru;
nuclear@17 184 rdvdy = texcoords[ridx[1]].y - rv;
nuclear@14 185 }
nuclear@19 186 #endif /* USE_TEX */
nuclear@5 187 }
nuclear@5 188
nuclear@15 189 if(i >= 0) {
nuclear@9 190 #ifdef PALMODE
nuclear@15 191 fill_scanline_pal(i, lx, rx, lu, ru, lv, rv, (uint8_t)color);
nuclear@9 192 #else
nuclear@17 193 fill_scanline_rgb(i, lx, rx, lu, ru, lv, rv, color, tex);
nuclear@9 194 #endif
nuclear@9 195 }
nuclear@9 196
nuclear@5 197 lx += ldxdy;
nuclear@5 198 rx += rdxdy;
nuclear@6 199 y += 65536;
nuclear@15 200
nuclear@19 201 #ifdef USE_TEX
nuclear@15 202 lu += ldudy;
nuclear@15 203 lv += ldvdy;
nuclear@15 204 ru += rdudy;
nuclear@15 205 rv += rdvdy;
nuclear@19 206 #endif
nuclear@5 207 }
nuclear@5 208 }
nuclear@5 209
nuclear@9 210
nuclear@15 211 static void fill_scanline_pal(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
nuclear@15 212 int32_t v0, int32_t v1, uint8_t color)
nuclear@9 213 {
nuclear@15 214 int ix0, ix1;
nuclear@19 215 #ifdef USE_TEX
nuclear@15 216 int32_t dx;
nuclear@15 217 int32_t u, v, dudx, dvdx;
nuclear@19 218 #endif
nuclear@15 219
nuclear@15 220 int i, num_pairs, num_pix;
nuclear@15 221 uint16_t *pixels;
nuclear@9 222 uint16_t colpair = (uint16_t)color | ((uint16_t)color << 8);
nuclear@9 223
nuclear@15 224 if(x0 > x1) {
nuclear@15 225 int32_t tmp = x0;
nuclear@15 226 x0 = x1;
nuclear@15 227 x1 = tmp;
nuclear@15 228 }
nuclear@15 229
nuclear@19 230 #ifdef USE_TEX
nuclear@15 231 dx = x1 - x0;
nuclear@15 232
nuclear@15 233 u = u0;
nuclear@15 234 v = v0;
nuclear@15 235 if(dx) {
nuclear@15 236 dudx = x16div(u1 - u0, dx);
nuclear@15 237 dvdx = x16div(v1 - v0, dx);
nuclear@15 238 } else {
nuclear@15 239 dudx = u1 - u0;
nuclear@15 240 dvdx = v1 - v0;
nuclear@15 241 }
nuclear@19 242 #endif
nuclear@15 243
nuclear@15 244 ix0 = (x0 + 32768) >> 16;
nuclear@15 245 ix1 = (x1 + 32768) >> 16;
nuclear@15 246
nuclear@15 247 if(ix0 < 0) ix0 = 0;
nuclear@15 248 if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
nuclear@15 249
nuclear@15 250 num_pix = ix1 - ix0;
nuclear@15 251 pixels = (uint16_t*)back_buffer->pixels + (y * WIDTH + ix0) / 2;
nuclear@15 252
nuclear@15 253 if(ix0 & 1) {
nuclear@9 254 uint16_t pix = *pixels & 0xff;
nuclear@9 255 *pixels++ = pix | ((uint16_t)color << 8);
nuclear@9 256 --num_pix;
nuclear@19 257 #ifdef USE_TEX
nuclear@15 258 u += dudx;
nuclear@15 259 v += dvdx;
nuclear@19 260 #endif
nuclear@9 261 }
nuclear@9 262
nuclear@9 263 num_pairs = (num_pix & 0xfffe) / 2;
nuclear@9 264
nuclear@9 265 for(i=0; i<num_pairs; i++) {
nuclear@9 266 *pixels++ = colpair;
nuclear@19 267 #ifdef USE_TEX
nuclear@15 268 u += dudx * 2;
nuclear@15 269 v += dvdx * 2;
nuclear@19 270 #endif
nuclear@9 271 }
nuclear@9 272
nuclear@9 273 if(num_pix & 1) {
nuclear@9 274 uint16_t pix = *pixels & 0xff00;
nuclear@9 275 *pixels = pix | color;
nuclear@9 276 }
nuclear@9 277 }
nuclear@9 278
nuclear@15 279 static void fill_scanline_rgb(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
nuclear@17 280 int32_t v0, int32_t v1, uint16_t color, struct texture *tex)
nuclear@5 281 {
nuclear@15 282 int i, ix0, ix1;
nuclear@14 283 uint16_t *pixels;
nuclear@19 284 #ifdef USE_TEX
nuclear@15 285 int32_t dx;
nuclear@15 286 int32_t u, v, dudx, dvdx;
nuclear@19 287 #endif
nuclear@5 288
nuclear@14 289 if(x0 > x1) {
nuclear@15 290 int32_t tmp = x0;
nuclear@14 291 x0 = x1;
nuclear@15 292 x1 = tmp;
nuclear@14 293 }
nuclear@14 294
nuclear@19 295 #ifdef USE_TEX
nuclear@15 296 dx = x1 - x0;
nuclear@14 297
nuclear@15 298 u = u0;
nuclear@15 299 v = v0;
nuclear@15 300 if(dx) {
nuclear@15 301 dudx = x16div(u1 - u0, dx);
nuclear@15 302 dvdx = x16div(v1 - v0, dx);
nuclear@15 303 } else {
nuclear@15 304 dudx = u1 - u0;
nuclear@15 305 dvdx = v1 - v0;
nuclear@15 306 }
nuclear@19 307 #endif
nuclear@15 308
nuclear@15 309 ix0 = (x0 + 32768) >> 16;
nuclear@15 310 ix1 = (x1 + 32768) >> 16;
nuclear@15 311
nuclear@15 312 if(ix0 < 0) ix0 = 0;
nuclear@15 313 if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
nuclear@15 314
nuclear@15 315 pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0;
nuclear@19 316 #ifdef USE_TEX
nuclear@19 317 if(tex) {
nuclear@19 318 for(i=ix0; i<ix1; i++) {
nuclear@17 319 int tx = (u >> (16 - tex->ushift)) & tex->umask;
nuclear@17 320 int ty = (v >> (16 - tex->vshift)) & tex->vmask;
nuclear@19 321 *pixels++ = tex_lookup(tex, tx, ty);
nuclear@19 322 //*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx];
nuclear@19 323
nuclear@19 324 u += dudx;
nuclear@19 325 v += dvdx;
nuclear@19 326 }
nuclear@19 327 } else
nuclear@19 328 #endif
nuclear@19 329 {
nuclear@19 330 for(i=ix0; i<ix1; i++) {
nuclear@17 331 *pixels++ = color;
nuclear@17 332 }
nuclear@5 333 }
nuclear@5 334 }
nuclear@8 335
nuclear@8 336
nuclear@8 337 void draw_point(const pvec3 *v, uint16_t color)
nuclear@8 338 {
nuclear@8 339 int x = v->x >> 16;
nuclear@8 340 int y = v->y >> 16;
nuclear@8 341 uint16_t *pixels = (uint16_t*)back_buffer->pixels;
nuclear@8 342
nuclear@8 343 if(x < 0 || x >= WIDTH || y < 0 || y >= HEIGHT) {
nuclear@8 344 return;
nuclear@8 345 }
nuclear@8 346
nuclear@9 347 #ifdef PALMODE
nuclear@9 348 pixels += (y * WIDTH + x) / 2;
nuclear@9 349 if(x & 1) {
nuclear@9 350 *pixels = (*pixels & 0xff) | (color << 8);
nuclear@9 351 } else {
nuclear@9 352 *pixels = (*pixels & 0xff00) | color;
nuclear@9 353 }
nuclear@9 354 #else
nuclear@8 355 pixels[y * WIDTH + x] = color;
nuclear@9 356 #endif
nuclear@8 357 }
nuclear@14 358
nuclear@14 359 static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1)
nuclear@14 360 {
nuclear@14 361 return x16mul(x0, y1) - x16mul(y0, x1);
nuclear@14 362 }
nuclear@19 363
nuclear@19 364 #ifdef USE_TEX
nuclear@19 365 #define MIN(a, b) ((a) < (b) ? (a) : (b))
nuclear@19 366 static uint16_t tex_lookup(struct texture *tex, int tx, int ty)
nuclear@19 367 {
nuclear@19 368 int x, y, i, j;
nuclear@19 369 int width, height;
nuclear@19 370 uint16_t *ptr;
nuclear@19 371 int cx = tx - tcache.orig_x;
nuclear@19 372 int cy = ty - tcache.orig_y;
nuclear@19 373
nuclear@19 374 if(tcache.tex == tex && cx >= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) {
nuclear@19 375 return tcache.pixels[cy * TCACHE_XSZ + cx];
nuclear@19 376 }
nuclear@19 377
nuclear@19 378 tcache.tex = tex;
nuclear@19 379 x = tx - TCACHE_XSZ / 2;
nuclear@19 380 y = ty - TCACHE_YSZ / 2;
nuclear@19 381
nuclear@19 382 if(x + TCACHE_XSZ > tex->xsz) {
nuclear@19 383 x = tex->xsz - TCACHE_XSZ;
nuclear@19 384 }
nuclear@19 385 if(y + TCACHE_YSZ > tex->ysz) {
nuclear@19 386 y = tex->ysz - TCACHE_YSZ;
nuclear@19 387 }
nuclear@19 388
nuclear@19 389 if(x < 0) x = 0;
nuclear@19 390 if(y < 0) y = 0;
nuclear@19 391
nuclear@19 392 width = MIN(TCACHE_XSZ, tex->xsz - x);
nuclear@19 393 height = MIN(TCACHE_YSZ, tex->ysz - y);
nuclear@19 394
nuclear@19 395 tcache.orig_x = x;
nuclear@19 396 tcache.orig_y = y;
nuclear@19 397
nuclear@19 398 /*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty,
nuclear@19 399 width, height, x, y, tcache.pixels);
nuclear@19 400 */
nuclear@19 401
nuclear@19 402 ptr = (uint16_t*)tex->pixels + y * tex->xsz + x;
nuclear@19 403 for(i=0; i<height; i++) {
nuclear@19 404 memcpy(tcache.pixels + i * TCACHE_XSZ, ptr, width * 2);
nuclear@19 405 ptr += tex->xsz;
nuclear@19 406 }
nuclear@19 407
nuclear@19 408 cx = tx - tcache.orig_x;
nuclear@19 409 cy = ty - tcache.orig_y;
nuclear@19 410 return tcache.pixels[cy * TCACHE_XSZ + cx];
nuclear@19 411 }
nuclear@19 412 #endif /* USE_TEX */