gba-x3dtest

diff src/polyfill.c @ 15:b755fb002f17

foo
author John Tsiombikas <nuclear@member.fsf.org>
date Wed, 25 Jun 2014 17:02:48 +0300
parents c398d834d64a
children 0a7f402892b3
line diff
     1.1 --- a/src/polyfill.c	Mon Jun 23 10:33:24 2014 +0300
     1.2 +++ b/src/polyfill.c	Wed Jun 25 17:02:48 2014 +0300
     1.3 @@ -5,14 +5,18 @@
     1.4  #include "fixed.h"
     1.5  #include "gbasys.h"
     1.6  
     1.7 +/* TODO: constant interpolant optimization */
     1.8 +
     1.9  #define VNEXT(x, n)		(((x) + 1) % (n))
    1.10  #define VPREV(x, n)		((x) > 0 ? (x) - 1 : (n) - 1)
    1.11  
    1.12 -static void fill_scanline_pal(int y, int x0, int x1, uint8_t color);
    1.13 -static void fill_scanline_rgb(int y, int x0, int x1, uint16_t color);
    1.14 +static void fill_scanline_pal(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
    1.15 +		int32_t v0, int32_t v1, uint8_t color);
    1.16 +static void fill_scanline_rgb(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
    1.17 +		int32_t v0, int32_t v1, uint16_t color);
    1.18  static int winding(int32_t x0, int32_t y0, int32_t x1, int32_t y1);
    1.19  
    1.20 -void draw_poly(int num, const pvec3 *verts, uint16_t color)
    1.21 +void draw_poly(int num, const pvec3 *verts, const pvec2 *tex, uint16_t color)
    1.22  {
    1.23  	int i, topidx = 0, botidx = 0;
    1.24  	int lidx[2] = {-1, -1}, ridx[2] = {-1, -1};
    1.25 @@ -22,6 +26,8 @@
    1.26  	int start, end;
    1.27  	pvec3 v0, v1;
    1.28  
    1.29 +	int32_t lu, lv, ru, rv, ldudy, ldvdy, rdudy, rdvdy;	/* texture interpolants */
    1.30 +
    1.31  	v0.x = verts[1].x - verts[0].x;
    1.32  	v0.y = verts[1].y - verts[0].y;
    1.33  
    1.34 @@ -63,6 +69,10 @@
    1.35  	}
    1.36  	lx = verts[lidx[0]].x;
    1.37  	ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    1.38 +	lu = tex[lidx[0]].x;
    1.39 +	ldudy = x16div(tex[lidx[1]].x - lu, ldy);
    1.40 +	lv = tex[lidx[0]].y;
    1.41 +	ldvdy = x16div(tex[lidx[1]].y - lv, ldy);
    1.42  
    1.43  	/* find starting right edge */
    1.44  	ridx[1] = VNEXT(ridx[0], num);
    1.45 @@ -80,6 +90,10 @@
    1.46  	}
    1.47  	rx = verts[ridx[0]].x;
    1.48  	rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
    1.49 +	ru = tex[ridx[0]].x;
    1.50 +	rdudy = x16div(tex[ridx[1]].x - ru, rdy);
    1.51 +	rv = tex[ridx[0]].y;
    1.52 +	rdvdy = x16div(tex[ridx[1]].y - rv, rdy);
    1.53  
    1.54  	start = topy >> 16;
    1.55  	end = boty >> 16;
    1.56 @@ -88,7 +102,6 @@
    1.57  
    1.58  	y = topy;
    1.59  	for(i=start; i<=end; i++) {
    1.60 -		short x0, x1;
    1.61  
    1.62  		if(y >= verts[lidx[1]].y) {
    1.63  			lx = verts[lidx[1]].x;
    1.64 @@ -98,10 +111,18 @@
    1.65  			if(ldy < 0) {
    1.66  				break;
    1.67  			}
    1.68 +
    1.69 +			lu = tex[lidx[0]].x;
    1.70 +			lv = tex[lidx[0]].y;
    1.71 +
    1.72  			if(ldy) {
    1.73  				ldxdy = x16div(verts[lidx[1]].x - lx, ldy);
    1.74 +				ldudy = x16div(tex[lidx[1]].x - lu, ldy);
    1.75 +				ldvdy = x16div(tex[lidx[1]].y - lv, ldy);
    1.76  			} else {
    1.77  				ldxdy = verts[lidx[1]].x - lx;
    1.78 +				ldudy = tex[lidx[1]].x - lu;
    1.79 +				ldvdy = tex[lidx[1]].y - lv;
    1.80  			}
    1.81  		}
    1.82  		if(y >= verts[ridx[1]].y) {
    1.83 @@ -112,81 +133,145 @@
    1.84  			if(rdy < 0) {
    1.85  				break;
    1.86  			}
    1.87 +
    1.88 +			ru = tex[ridx[0]].x;
    1.89 +			rv = tex[ridx[0]].y;
    1.90 +
    1.91  			if(rdy) {
    1.92  				rdxdy = x16div(verts[ridx[1]].x - rx, rdy);
    1.93 +				rdudy = x16div(tex[ridx[1]].x - ru, rdy);
    1.94 +				rdvdy = x16div(tex[ridx[1]].y - rv, rdy);
    1.95  			} else {
    1.96  				rdxdy = verts[ridx[1]].x - rx;
    1.97 +				rdudy = tex[ridx[1]].x - ru;
    1.98 +				rdvdy = tex[ridx[1]].y - rv;
    1.99  			}
   1.100  		}
   1.101  
   1.102 -		x0 = lx >> 16;
   1.103 -		x1 = rx >> 16;
   1.104 -
   1.105 -		if(i >= 0) {// && x0 < x1) {
   1.106 +		if(i >= 0) {
   1.107  #ifdef PALMODE
   1.108 -			fill_scanline_pal(i, x0, x1, (uint8_t)color);
   1.109 +			fill_scanline_pal(i, lx, rx, lu, ru, lv, rv, (uint8_t)color);
   1.110  #else
   1.111 -			fill_scanline_rgb(i, x0, x1, color);
   1.112 +			fill_scanline_rgb(i, lx, rx, lu, ru, lv, rv, color);
   1.113  #endif
   1.114  		}
   1.115  
   1.116  		lx += ldxdy;
   1.117  		rx += rdxdy;
   1.118  		y += 65536;
   1.119 +
   1.120 +		lu += ldudy;
   1.121 +		lv += ldvdy;
   1.122 +		ru += rdudy;
   1.123 +		rv += rdvdy;
   1.124  	}
   1.125  }
   1.126  
   1.127  
   1.128 -static void fill_scanline_pal(int y, int x0, int x1, uint8_t color)
   1.129 +static void fill_scanline_pal(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
   1.130 +		int32_t v0, int32_t v1, uint8_t color)
   1.131  {
   1.132 -#if 1
   1.133 -	int i, num_pairs, num_pix = x1 - x0;
   1.134 -	uint16_t *pixels = (uint16_t*)back_buffer->pixels + (y * WIDTH + x0) / 2;
   1.135 +	int ix0, ix1;
   1.136 +	int32_t dx;
   1.137 +	int32_t u, v, dudx, dvdx;
   1.138 +
   1.139 +	int i, num_pairs, num_pix;
   1.140 +	uint16_t *pixels;
   1.141  	uint16_t colpair = (uint16_t)color | ((uint16_t)color << 8);
   1.142  
   1.143 -	if(x0 & 1) {
   1.144 +	if(x0 > x1) {
   1.145 +		int32_t tmp = x0;
   1.146 +		x0 = x1;
   1.147 +		x1 = tmp;
   1.148 +	}
   1.149 +
   1.150 +	dx = x1 - x0;
   1.151 +
   1.152 +	u = u0;
   1.153 +	v = v0;
   1.154 +	if(dx) {
   1.155 +		dudx = x16div(u1 - u0, dx);
   1.156 +		dvdx = x16div(v1 - v0, dx);
   1.157 +	} else {
   1.158 +		dudx = u1 - u0;
   1.159 +		dvdx = v1 - v0;
   1.160 +	}
   1.161 +
   1.162 +	ix0 = (x0 + 32768) >> 16;
   1.163 +	ix1 = (x1 + 32768) >> 16;
   1.164 +
   1.165 +	if(ix0 < 0) ix0 = 0;
   1.166 +	if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
   1.167 +
   1.168 +	num_pix = ix1 - ix0;
   1.169 +	pixels = (uint16_t*)back_buffer->pixels + (y * WIDTH + ix0) / 2;
   1.170 +
   1.171 +	if(ix0 & 1) {
   1.172  		uint16_t pix = *pixels & 0xff;
   1.173  		*pixels++ = pix | ((uint16_t)color << 8);
   1.174  		--num_pix;
   1.175 +		u += dudx;
   1.176 +		v += dvdx;
   1.177  	}
   1.178  
   1.179  	num_pairs = (num_pix & 0xfffe) / 2;
   1.180  
   1.181  	for(i=0; i<num_pairs; i++) {
   1.182  		*pixels++ = colpair;
   1.183 +		u += dudx * 2;
   1.184 +		v += dvdx * 2;
   1.185  	}
   1.186  
   1.187  	if(num_pix & 1) {
   1.188  		uint16_t pix = *pixels & 0xff00;
   1.189  		*pixels = pix | color;
   1.190  	}
   1.191 -#else
   1.192 -	int i;
   1.193 -	uint8_t *pixels = (uint8_t*)back_buffer->pixels + y * WIDTH + x0;
   1.194 -
   1.195 -	for(i=x0; i<x1; i++) {
   1.196 -		*pixels++ = color;
   1.197 -	}
   1.198 -#endif
   1.199  }
   1.200  
   1.201 -static void fill_scanline_rgb(int y, int x0, int x1, uint16_t color)
   1.202 +static void fill_scanline_rgb(int y, int32_t x0, int32_t x1, int32_t u0, int32_t u1,
   1.203 +		int32_t v0, int32_t v1, uint16_t color)
   1.204  {
   1.205 -	int i;
   1.206 +	int i, ix0, ix1;
   1.207  	uint16_t *pixels;
   1.208 +	int32_t dx;
   1.209 +	int32_t u, v, dudx, dvdx;
   1.210  
   1.211  	if(x0 > x1) {
   1.212 -		i = x0;
   1.213 +		int32_t tmp = x0;
   1.214  		x0 = x1;
   1.215 -		x1 = i;
   1.216 +		x1 = tmp;
   1.217  	}
   1.218  
   1.219 -	if(x0 < 0) x0 = 0;
   1.220 -	if(x1 >= WIDTH - 1) x1 = WIDTH - 1;
   1.221 +	dx = x1 - x0;
   1.222  
   1.223 -	pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + x0;
   1.224 -	for(i=x0; i<x1; i++) {
   1.225 -		*pixels++ = color;
   1.226 +	u = u0;
   1.227 +	v = v0;
   1.228 +	if(dx) {
   1.229 +		dudx = x16div(u1 - u0, dx);
   1.230 +		dvdx = x16div(v1 - v0, dx);
   1.231 +	} else {
   1.232 +		dudx = u1 - u0;
   1.233 +		dvdx = v1 - v0;
   1.234 +	}
   1.235 +
   1.236 +	ix0 = (x0 + 32768) >> 16;
   1.237 +	ix1 = (x1 + 32768) >> 16;
   1.238 +
   1.239 +	if(ix0 < 0) ix0 = 0;
   1.240 +	if(ix1 >= WIDTH - 1) ix1 = WIDTH - 1;
   1.241 +
   1.242 +	pixels = (uint16_t*)back_buffer->pixels + y * WIDTH + ix0;
   1.243 +	for(i=ix0; i<ix1; i++) {
   1.244 +		/**pixels++ = color;*/
   1.245 +		int cr = u >> 8;
   1.246 +		int cg = v >> 8;
   1.247 +		if(cr > 255) cr = 255;
   1.248 +		if(cg > 255) cg = 255;
   1.249 +
   1.250 +		*pixels++ = RGB(cr, cg, 0);
   1.251 +
   1.252 +		u += dudx;
   1.253 +		v += dvdx;
   1.254  	}
   1.255  }
   1.256