gba-x3dtest

view src/polyfill.c @ 19:62390f9cc93e

texture cache optimization failed
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 30 Jun 2014 09:07:41 +0300
parents 0a7f402892b3
children 2e903e27e35a
line source
1 #include "config.h"
2 #include <string.h>
3 #include <assert.h>
4 #include "polyfill.h"
5 #include "fixed.h"
6 #include "gbasys.h"
7 #include "logger.h"
9 #define USE_TEX
11 /* TODO: constant interpolant optimization */
13 #define VNEXT(x, n) (((x) + 1) % (n))
14 #define VPREV(x, n) ((x) > 0 ? (x) - 1 : (n) - 1)
16 static void fill_scanline_pal(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
17 int32_t v0, int32_t v1, uint8_t color);
18 static void fill_scanline_rgb(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
19 int32_t v0, int32_t v1, uint16_t color, struct texture *tex);
20 static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1);
22 #ifdef USE_TEX
23 static uint16_t tex_lookup(struct texture *tex, int tx, int ty);
25 /* TODO currently the linkscript will put statics in iwram. make this explicit */
26 #define TCACHE_XSZ 16
27 #define TCACHE_YSZ 16
29 static struct {
30 uint16_t pixels[TCACHE_XSZ * TCACHE_YSZ];
31 struct texture *tex;
32 int orig_x, orig_y;
33 } tcache;
34 #endif /* USE_TEX */
37 void draw_poly(int num, const pvec3 *verts, const pvec2 *texcoords, uint16_t color,
38 struct texture *tex)
39 {
40 int i, topidx = 0, botidx = 0;
41 int lidx[2] = {-1, -1}, ridx[2] = {-1, -1};
42 int32_t y, topy, boty;
43 int32_t ldy = 0, rdy = 0, ldxdy, rdxdy;
44 int32_t lx, rx;
45 int start, end;
46 pvec3 v0, v1;
48 #ifdef USE_TEX
49 int32_t ldudy, ldvdy, rdudy, rdvdy; /* texture interpolants */
50 #else
51 static /* to avoid lu,lv,ru,rv uninitialized warnings */
52 #endif
53 int32_t lu, lv, ru, rv;
55 v0.x = verts[1].x - verts[0].x;
56 v0.y = verts[1].y - verts[0].y;
58 v1.x = verts[2].x - verts[0].x;
59 v1.y = verts[2].y - verts[0].y;
61 if(winding(v0.x, v0.y, v1.x, v1.y) < 0) {
62 return; /* backface */
63 }
65 topy = boty = verts[0].y;
66 for(i=1; i<num; i++) {
67 int32_t y = verts[i].y;
68 if(y < topy) {
69 topy = y;
70 topidx = i;
71 }
72 if(y > boty) {
73 boty = y;
74 botidx = i;
75 }
76 }
78 lidx[0] = ridx[0] = topidx;
80 /* find starting left edge */
81 lidx[1] = VPREV(lidx[0], num);
82 ldy = verts[lidx[1]].y - verts[lidx[0]].y;
84 while(ldy == 0) {
85 lidx[0] = lidx[1];
86 lidx[1] = VPREV(lidx[1], num);
88 if(lidx[1] == topidx) {
89 return; /* degenerate */
90 }
92 ldy = verts[lidx[1]].y - verts[lidx[0]].y;
93 }
94 lx = verts[lidx[0]].x;
95 ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
96 #ifdef USE_TEX
97 lu = texcoords[lidx[0]].x;
98 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
99 lv = texcoords[lidx[0]].y;
100 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
101 #endif
103 /* find starting right edge */
104 ridx[1] = VNEXT(ridx[0], num);
105 rdy = verts[ridx[1]].y - verts[ridx[0]].y;
107 while(rdy == 0) {
108 ridx[0] = ridx[1];
109 ridx[1] = VNEXT(ridx[1], num);
111 if(ridx[1] == topidx) {
112 return; /* degenerate */
113 }
115 rdy = verts[ridx[1]].y - verts[ridx[0]].y;
116 }
117 rx = verts[ridx[0]].x;
118 rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
119 #ifdef USE_TEX
120 ru = texcoords[ridx[0]].x;
121 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
122 rv = texcoords[ridx[0]].y;
123 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
124 #endif
126 start = topy >> 16;
127 end = boty >> 16;
129 if(end >= HEIGHT) end = HEIGHT - 1;
131 y = topy;
132 for(i=start; i<=end; i++) {
134 if(y >= verts[lidx[1]].y) {
135 lx = verts[lidx[1]].x;
136 lidx[0] = lidx[1];
137 lidx[1] = VPREV(lidx[1], num);
138 ldy = verts[lidx[1]].y - verts[lidx[0]].y;
139 if(ldy < 0) {
140 break;
141 }
143 if(ldy) {
144 ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
145 } else {
146 ldxdy = verts[lidx[1]].x - lx;
147 }
149 #ifdef USE_TEX
150 lu = texcoords[lidx[0]].x;
151 lv = texcoords[lidx[0]].y;
152 if(ldy) {
153 ldudy = x16div(texcoords[lidx[1]].x - lu, ldy);
154 ldvdy = x16div(texcoords[lidx[1]].y - lv, ldy);
155 } else {
156 ldudy = texcoords[lidx[1]].x - lu;
157 ldvdy = texcoords[lidx[1]].y - lv;
158 }
159 #endif /* USE_TEX */
160 }
161 if(y >= verts[ridx[1]].y) {
162 rx = verts[ridx[1]].x;
163 ridx[0] = ridx[1];
164 ridx[1] = VNEXT(ridx[1], num);
165 rdy = verts[ridx[1]].y - verts[ridx[0]].y;
166 if(rdy < 0) {
167 break;
168 }
170 if(rdy) {
171 rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
172 } else {
173 rdxdy = verts[ridx[1]].x - rx;
174 }
176 #ifdef USE_TEX
177 ru = texcoords[ridx[0]].x;
178 rv = texcoords[ridx[0]].y;
179 if(rdy) {
180 rdudy = x16div(texcoords[ridx[1]].x - ru, rdy);
181 rdvdy = x16div(texcoords[ridx[1]].y - rv, rdy);
182 } else {
183 rdudy = texcoords[ridx[1]].x - ru;
184 rdvdy = texcoords[ridx[1]].y - rv;
185 }
186 #endif /* USE_TEX */
187 }
189 if(i >= 0) {
190 #ifdef PALMODE
191 fill_scanline_pal(i, lx, rx, lu, ru, lv, rv, (uint8_t)color);
192 #else
193 fill_scanline_rgb(i, lx, rx, lu, ru, lv, rv, color, tex);
194 #endif
195 }
197 lx += ldxdy;
198 rx += rdxdy;
199 y += 65536;
201 #ifdef USE_TEX
202 lu += ldudy;
203 lv += ldvdy;
204 ru += rdudy;
205 rv += rdvdy;
206 #endif
207 }
208 }
211 static void fill_scanline_pal(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
212 int32_t v0, int32_t v1, uint8_t color)
213 {
214 int ix0, ix1;
215 #ifdef USE_TEX
216 int32_t dx;
217 int32_t u, v, dudx, dvdx;
218 #endif
220 int i, num_pairs, num_pix;
221 uint16_t *pixels;
222 uint16_t colpair = (uint16_t)color | ((uint16_t)color << 8);
224 if(x0 > x1) {
225 int32_t tmp = x0;
226 x0 = x1;
227 x1 = tmp;
228 }
230 #ifdef USE_TEX
231 dx = x1 - x0;
233 u = u0;
234 v = v0;
235 if(dx) {
236 dudx = x16div(u1 - u0, dx);
237 dvdx = x16div(v1 - v0, dx);
238 } else {
239 dudx = u1 - u0;
240 dvdx = v1 - v0;
241 }
242 #endif
244 ix0 = (x0 + 32768) >> 16;
245 ix1 = (x1 + 32768) >> 16;
247 if(ix0 < 0) ix0 = 0;
248 if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
250 num_pix = ix1 - ix0;
251 pixels = (uint16_t*)back_buffer->pixels + (y * WIDTH + ix0) / 2;
253 if(ix0 & 1) {
254 uint16_t pix = *pixels & 0xff;
255 *pixels++ = pix | ((uint16_t)color << 8);
256 --num_pix;
257 #ifdef USE_TEX
258 u += dudx;
259 v += dvdx;
260 #endif
261 }
263 num_pairs = (num_pix & 0xfffe) / 2;
265 for(i=0; i<num_pairs; i++) {
266 *pixels++ = colpair;
267 #ifdef USE_TEX
268 u += dudx * 2;
269 v += dvdx * 2;
270 #endif
271 }
273 if(num_pix & 1) {
274 uint16_t pix = *pixels & 0xff00;
275 *pixels = pix | color;
276 }
277 }
279 static void fill_scanline_rgb(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
280 int32_t v0, int32_t v1, uint16_t color, struct texture *tex)
281 {
282 int i, ix0, ix1;
283 uint16_t *pixels;
284 #ifdef USE_TEX
285 int32_t dx;
286 int32_t u, v, dudx, dvdx;
287 #endif
289 if(x0 > x1) {
290 int32_t tmp = x0;
291 x0 = x1;
292 x1 = tmp;
293 }
295 #ifdef USE_TEX
296 dx = x1 - x0;
298 u = u0;
299 v = v0;
300 if(dx) {
301 dudx = x16div(u1 - u0, dx);
302 dvdx = x16div(v1 - v0, dx);
303 } else {
304 dudx = u1 - u0;
305 dvdx = v1 - v0;
306 }
307 #endif
309 ix0 = (x0 + 32768) >> 16;
310 ix1 = (x1 + 32768) >> 16;
312 if(ix0 < 0) ix0 = 0;
313 if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
315 pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0;
316 #ifdef USE_TEX
317 if(tex) {
318 for(i=ix0; i<ix1; i++) {
319 int tx = (u >> (16 - tex->ushift)) & tex->umask;
320 int ty = (v >> (16 - tex->vshift)) & tex->vmask;
321 *pixels++ = tex_lookup(tex, tx, ty);
322 //*pixels++ = ((uint16_t*)tex->pixels)[ty * tex->xsz + yx];
324 u += dudx;
325 v += dvdx;
326 }
327 } else
328 #endif
329 {
330 for(i=ix0; i<ix1; i++) {
331 *pixels++ = color;
332 }
333 }
334 }
337 void draw_point(const pvec3 *v, uint16_t color)
338 {
339 int x = v->x >> 16;
340 int y = v->y >> 16;
341 uint16_t *pixels = (uint16_t*)back_buffer->pixels;
343 if(x < 0 || x >= WIDTH || y < 0 || y >= HEIGHT) {
344 return;
345 }
347 #ifdef PALMODE
348 pixels += (y * WIDTH + x) / 2;
349 if(x & 1) {
350 *pixels = (*pixels & 0xff) | (color << 8);
351 } else {
352 *pixels = (*pixels & 0xff00) | color;
353 }
354 #else
355 pixels[y * WIDTH + x] = color;
356 #endif
357 }
359 static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1)
360 {
361 return x16mul(x0, y1) - x16mul(y0, x1);
362 }
364 #ifdef USE_TEX
365 #define MIN(a, b) ((a) < (b) ? (a) : (b))
366 static uint16_t tex_lookup(struct texture *tex, int tx, int ty)
367 {
368 int x, y, i, j;
369 int width, height;
370 uint16_t *ptr;
371 int cx = tx - tcache.orig_x;
372 int cy = ty - tcache.orig_y;
374 if(tcache.tex == tex && cx >= 0 && cx < TCACHE_XSZ && cy >= 0 && cy < TCACHE_YSZ) {
375 return tcache.pixels[cy * TCACHE_XSZ + cx];
376 }
378 tcache.tex = tex;
379 x = tx - TCACHE_XSZ / 2;
380 y = ty - TCACHE_YSZ / 2;
382 if(x + TCACHE_XSZ > tex->xsz) {
383 x = tex->xsz - TCACHE_XSZ;
384 }
385 if(y + TCACHE_YSZ > tex->ysz) {
386 y = tex->ysz - TCACHE_YSZ;
387 }
389 if(x < 0) x = 0;
390 if(y < 0) y = 0;
392 width = MIN(TCACHE_XSZ, tex->xsz - x);
393 height = MIN(TCACHE_YSZ, tex->ysz - y);
395 tcache.orig_x = x;
396 tcache.orig_y = y;
398 /*logmsg(LOG_DBG, "lookup(%d, %d): loading %dx%d+%d+%d into cache %p\n", tx, ty,
399 width, height, x, y, tcache.pixels);
400 */
402 ptr = (uint16_t*)tex->pixels + y * tex->xsz + x;
403 for(i=0; i<height; i++) {
404 memcpy(tcache.pixels + i * TCACHE_XSZ, ptr, width * 2);
405 ptr += tex->xsz;
406 }
408 cx = tx - tcache.orig_x;
409 cy = ty - tcache.orig_y;
410 return tcache.pixels[cy * TCACHE_XSZ + cx];
411 }
412 #endif /* USE_TEX */