clray

changeset 12:85fd61f374d9

fixed the bloody intersection bug
author John Tsiombikas <nuclear@member.fsf.org>
date Tue, 03 Aug 2010 13:06:59 +0100
parents d9a1bab1c3f5
children 407935b73af3
files Makefile rt.cl src/clray.cc src/mesh.h src/ocl.cc src/ocl.h src/rt.cc src/rt.h
diffstat 8 files changed, 244 insertions(+), 121 deletions(-) [+]
line diff
     1.1 --- a/Makefile	Sat Jul 31 22:23:57 2010 +0100
     1.2 +++ b/Makefile	Tue Aug 03 13:06:59 2010 +0100
     1.3 @@ -1,5 +1,6 @@
     1.4  src = $(wildcard src/*.cc)
     1.5  obj = $(src:.cc=.o)
     1.6 +dep = $(obj:.o=.d)
     1.7  bin = test
     1.8  
     1.9  CXX = g++
    1.10 @@ -17,6 +18,11 @@
    1.11  $(bin): $(obj)
    1.12  	$(CXX) -o $@ $(obj) $(LDFLAGS)
    1.13  
    1.14 +-include $(dep)
    1.15 +
    1.16 +%.d: %.cc
    1.17 +	@$(CPP) $(CXXFLAGS) -MM -MT $(@:.d=.o) $< >$@
    1.18 +
    1.19  .PHONY: clean
    1.20  clean:
    1.21 -	rm -f $(obj) $(bin)
    1.22 +	rm -f $(obj) $(bin) $(dep)
     2.1 --- a/rt.cl	Sat Jul 31 22:23:57 2010 +0100
     2.2 +++ b/rt.cl	Tue Aug 03 13:06:59 2010 +0100
     2.3 @@ -1,3 +1,5 @@
     2.4 +/* vim: set ft=opencl:ts=4:sw=4 */
     2.5 +
     2.6  struct RendInfo {
     2.7  	int xsz, ysz;
     2.8  	int num_faces, num_lights;
     2.9 @@ -7,19 +9,22 @@
    2.10  struct Vertex {
    2.11  	float4 pos;
    2.12  	float4 normal;
    2.13 -	float2 tex;
    2.14 +	float4 tex;
    2.15 +	float4 padding;
    2.16  };
    2.17  
    2.18  struct Face {
    2.19  	struct Vertex v[3];
    2.20  	float4 normal;
    2.21  	int matid;
    2.22 +	int padding[3];
    2.23  };
    2.24  
    2.25  struct Material {
    2.26  	float4 kd, ks;
    2.27  	float kr, kt;
    2.28  	float spow;
    2.29 +	float padding;
    2.30  };
    2.31  
    2.32  struct Light {
    2.33 @@ -32,7 +37,7 @@
    2.34  
    2.35  struct SurfPoint {
    2.36  	float t;
    2.37 -	float4 pos, norm;
    2.38 +	float4 pos, norm, dbg;
    2.39  	global const struct Face *obj;
    2.40  	global const struct Material *mat;
    2.41  };
    2.42 @@ -44,9 +49,8 @@
    2.43  bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp);
    2.44  float4 reflect(float4 v, float4 n);
    2.45  float4 transform(float4 v, global const float *xform);
    2.46 -struct Ray transform_ray(global const struct Ray *ray, global const float *xform);
    2.47 -float4 calc_bary(float4 pt, global const struct Face *face);
    2.48 -
    2.49 +struct Ray transform_ray(global const struct Ray *ray, global const float *xform, global const float *invtrans);
    2.50 +float4 calc_bary(float4 pt, global const struct Face *face, float4 norm);
    2.51  
    2.52  kernel void render(global float4 *fb,
    2.53  		global const struct RendInfo *rinf,
    2.54 @@ -54,11 +58,12 @@
    2.55  		global const struct Material *matlib,
    2.56  		global const struct Light *lights,
    2.57  		global const struct Ray *primrays,
    2.58 -		global const float *xform)
    2.59 +		global const float *xform,
    2.60 +		global const float *invtrans)
    2.61  {
    2.62  	int idx = get_global_id(0);
    2.63  
    2.64 -	struct Ray ray = transform_ray(primrays + idx, xform);
    2.65 +	struct Ray ray = transform_ray(primrays + idx, xform, invtrans);
    2.66  
    2.67  	struct SurfPoint sp, sp0;
    2.68  	sp0.t = FLT_MAX;
    2.69 @@ -81,61 +86,86 @@
    2.70  float4 shade(struct Ray ray, struct SurfPoint sp,
    2.71  		global const struct Light *lights, int num_lights)
    2.72  {
    2.73 +	float4 norm = sp.norm;
    2.74 +	bool entering = true;
    2.75 +
    2.76 +	if(dot(ray.dir, norm) >= 0.0) {
    2.77 +		norm = -norm;
    2.78 +		entering = false;
    2.79 +	}
    2.80 +
    2.81  	float4 dcol = (float4)(0, 0, 0, 0);
    2.82  	float4 scol = (float4)(0, 0, 0, 0);
    2.83  
    2.84  	for(int i=0; i<num_lights; i++) {
    2.85  		float4 ldir = normalize(lights[i].pos - sp.pos);
    2.86  		float4 vdir = -normalize(ray.dir);
    2.87 -		float4 vref = reflect(vdir, sp.norm);
    2.88 +		float4 vref = reflect(vdir, norm);
    2.89  
    2.90 -		float diff = fmax(dot(ldir, sp.norm), 0.0f);
    2.91 +		float diff = fmax(dot(ldir, norm), 0.0f);
    2.92  		float spec = powr(fmax(dot(ldir, vref), 0.0f), sp.mat->spow);
    2.93  
    2.94  		dcol += sp.mat->kd * diff * lights[i].color;
    2.95 -		scol += sp.mat->ks * spec * lights[i].color;
    2.96 +		//scol += sp.mat->ks * spec * lights[i].color;
    2.97  	}
    2.98  
    2.99  	return dcol + scol;
   2.100  }
   2.101  
   2.102 +float dot3(float4 a, float4 b)
   2.103 +{
   2.104 +	return a.x * b.x + a.y * b.y + a.z * b.z;
   2.105 +}
   2.106 +
   2.107 +
   2.108  bool intersect(struct Ray ray,
   2.109  		global const struct Face *face,
   2.110  		struct SurfPoint *sp)
   2.111  {
   2.112 -	float ndotdir = dot(face->normal, ray.dir);
   2.113 +	float4 origin = ray.origin;
   2.114 +	float4 dir = ray.dir;
   2.115 +	float4 norm = face->normal;
   2.116 +
   2.117 +	float ndotdir = dot3(dir, norm);
   2.118 +
   2.119  	if(fabs(ndotdir) <= EPSILON) {
   2.120  		return false;
   2.121  	}
   2.122  
   2.123  	float4 pt = face->v[0].pos;
   2.124 -	float4 vec = pt - ray.origin;
   2.125 +	float4 vec = pt - origin;
   2.126  
   2.127 -	float ndotvec = dot(face->normal, vec);
   2.128 +	float ndotvec = dot3(norm, vec);
   2.129  	float t = ndotvec / ndotdir;
   2.130  
   2.131  	if(t < EPSILON || t > 1.0) {
   2.132  		return false;
   2.133  	}
   2.134 -	pt = ray.origin + ray.dir * t;
   2.135 +	pt = origin + dir * t;
   2.136  
   2.137 -	float4 bc = calc_bary(pt, face);
   2.138 +	if(pt.w < 0.0) return false;
   2.139 +
   2.140 +
   2.141 +	float4 bc = calc_bary(pt, face, norm);
   2.142  	float bc_sum = bc.x + bc.y + bc.z;
   2.143  
   2.144 -	if(bc_sum < -EPSILON || bc_sum > 1.0) {
   2.145 +	if(bc_sum < 0.0 || bc_sum > 1.0 + EPSILON) {
   2.146  		return false;
   2.147 +		bc *= 1.2;
   2.148  	}
   2.149  
   2.150  	sp->t = t;
   2.151  	sp->pos = pt;
   2.152 -	sp->norm = face->normal;
   2.153 +	sp->norm = norm;
   2.154  	sp->obj = face;
   2.155 +	sp->dbg = bc;
   2.156  	return true;
   2.157  }
   2.158  
   2.159  float4 reflect(float4 v, float4 n)
   2.160  {
   2.161 -	return 2.0f * dot(v, n) * n - v;
   2.162 +	float4 res = 2.0f * dot(v, n) * n - v;
   2.163 +	return res;
   2.164  }
   2.165  
   2.166  float4 transform(float4 v, global const float *xform)
   2.167 @@ -144,33 +174,28 @@
   2.168  	res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12];
   2.169  	res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13];
   2.170  	res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14];
   2.171 -	res.w = 1.0;
   2.172 +	res.w = 0.0;
   2.173  	return res;
   2.174  }
   2.175  
   2.176 -struct Ray transform_ray(global const struct Ray *ray, global const float *xform)
   2.177 +struct Ray transform_ray(global const struct Ray *ray, global const float *xform, global const float *invtrans)
   2.178  {
   2.179  	struct Ray res;
   2.180 -	float rot[16];
   2.181 -
   2.182 -	for(int i=0; i<16; i++) {
   2.183 -		rot[i] = xform[i];
   2.184 -	}
   2.185 -	rot[3] = rot[7] = rot[11] = rot[12] = rot[13] = rot[14] = 0.0f;
   2.186 -	rot[15] = 1.0f;
   2.187 -
   2.188  	res.origin = transform(ray->origin, xform);
   2.189 -	res.dir = transform(ray->dir, xform);
   2.190 +	res.dir = transform(ray->dir, invtrans);
   2.191  	return res;
   2.192  }
   2.193  
   2.194 -float4 calc_bary(float4 pt, global const struct Face *face)
   2.195 +float4 calc_bary(float4 pt, global const struct Face *face, float4 norm)
   2.196  {
   2.197 -	float4 bc = {0, 0, 0, 0};
   2.198 +	float4 bc = (float4)(0, 0, 0, 0);
   2.199  
   2.200 -	float4 vi = face->v[1].pos - face->v[0].pos;
   2.201 -	float4 vj = face->v[2].pos - face->v[0].pos;
   2.202 -	float area = fabs(dot(cross(vi, vj), face->normal) / 2.0);
   2.203 +	// calculate area of the whole triangle
   2.204 +	float4 v1 = face->v[1].pos - face->v[0].pos;
   2.205 +	float4 v2 = face->v[2].pos - face->v[0].pos;
   2.206 +	float4 xv1v2 = cross(v1, v2);
   2.207 +
   2.208 +	float area = fabs(dot3(xv1v2, norm)) * 0.5;
   2.209  	if(area < EPSILON) {
   2.210  		return bc;
   2.211  	}
   2.212 @@ -179,10 +204,14 @@
   2.213  	float4 pv1 = face->v[1].pos - pt;
   2.214  	float4 pv2 = face->v[2].pos - pt;
   2.215  
   2.216 -	// calculate the areas of each sub-triangle
   2.217 -	float a0 = fabs(dot(cross(pv1, pv2), face->normal) / 2.0);
   2.218 -	float a1 = fabs(dot(cross(pv2, pv0), face->normal) / 2.0);
   2.219 -	float a2 = fabs(dot(cross(pv0, pv1), face->normal) / 2.0);
   2.220 +	// calculate the area of each sub-triangle
   2.221 +	float4 x12 = cross(pv1, pv2);
   2.222 +	float4 x20 = cross(pv2, pv0);
   2.223 +	float4 x01 = cross(pv0, pv1);
   2.224 +
   2.225 +	float a0 = fabs(dot3(x12, norm)) * 0.5;
   2.226 +	float a1 = fabs(dot3(x20, norm)) * 0.5;
   2.227 +	float a2 = fabs(dot3(x01, norm)) * 0.5;
   2.228  
   2.229  	bc.x = a0 / area;
   2.230  	bc.y = a1 / area;
     3.1 --- a/src/clray.cc	Sat Jul 31 22:23:57 2010 +0100
     3.2 +++ b/src/clray.cc	Tue Aug 03 13:06:59 2010 +0100
     3.3 @@ -8,6 +8,7 @@
     3.4  #include <GLUT/glut.h>
     3.5  #endif
     3.6  #include "rt.h"
     3.7 +#include "matrix.h"
     3.8  
     3.9  void cleanup();
    3.10  void disp();
    3.11 @@ -17,13 +18,14 @@
    3.12  void motion(int x, int y);
    3.13  bool write_ppm(const char *fname, float *fb, int xsz, int ysz);
    3.14  
    3.15 -static float *fb;
    3.16  static int xsz, ysz;
    3.17  static bool need_update = true;
    3.18  
    3.19  static float cam_theta, cam_phi = 25.0;
    3.20  static float cam_dist = 10.0;
    3.21  
    3.22 +static bool dbg_glrender;
    3.23 +
    3.24  int main(int argc, char **argv)
    3.25  {
    3.26  	glutInitWindowSize(800, 600);
    3.27 @@ -40,8 +42,7 @@
    3.28  	glutMouseFunc(mouse);
    3.29  	glutMotionFunc(motion);
    3.30  
    3.31 -	fb = new float[xsz * ysz * 4];
    3.32 -	if(!init_renderer(xsz, ysz, fb)) {
    3.33 +	if(!init_renderer(xsz, ysz)) {
    3.34  		return 1;
    3.35  	}
    3.36  	atexit(cleanup);
    3.37 @@ -60,42 +61,60 @@
    3.38  
    3.39  void cleanup()
    3.40  {
    3.41 -	delete [] fb;
    3.42  	destroy_renderer();
    3.43  }
    3.44  
    3.45 +static Matrix4x4 mat, inv_mat, inv_trans;
    3.46 +
    3.47  void disp()
    3.48  {
    3.49  	glMatrixMode(GL_MODELVIEW);
    3.50  	glLoadIdentity();
    3.51  
    3.52  	if(need_update) {
    3.53 -		float mat[16];
    3.54 +		glPushMatrix();
    3.55 +		glRotatef(-cam_theta, 0, 1, 0);
    3.56 +		glRotatef(-cam_phi, 1, 0, 0);
    3.57 +		glTranslatef(0, 0, cam_dist);
    3.58  
    3.59 -		glPushMatrix();
    3.60 -		glRotatef(cam_theta, 0, 1, 0);
    3.61 -		glRotatef(cam_phi, 1, 0, 0);
    3.62 -		glTranslatef(0, 0, -cam_dist);
    3.63 +		glGetFloatv(GL_MODELVIEW_MATRIX, mat.m);
    3.64  
    3.65 -		glGetFloatv(GL_MODELVIEW_MATRIX, mat);
    3.66 -		set_xform(mat);
    3.67 +		inv_mat = mat;
    3.68 +		inv_mat.invert();
    3.69 +
    3.70 +		/*inv_trans = inv_mat;
    3.71 +		inv_trans.transpose();*/
    3.72 +		inv_trans = mat;
    3.73 +		inv_trans.m[3] = inv_trans.m[7] = inv_trans.m[11] = 0.0;
    3.74 +		inv_trans.m[12] = inv_trans.m[13] = inv_trans.m[14] = 0.0;
    3.75 +		inv_trans.m[15] = 1.0;
    3.76 +
    3.77 +		set_xform(mat.m, inv_trans.m);
    3.78  		glPopMatrix();
    3.79  
    3.80 -		render();
    3.81 -		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, xsz, ysz, GL_RGBA, GL_FLOAT, fb);
    3.82 +		if(!render()) {
    3.83 +			exit(1);
    3.84 +		}
    3.85  		need_update = false;
    3.86  	}
    3.87  
    3.88 -	glEnable(GL_TEXTURE_2D);
    3.89 +	if(dbg_glrender) {
    3.90 +		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    3.91 +		glLoadMatrixf(inv_mat.m);
    3.92 +		dbg_render_gl();
    3.93 +	} else {
    3.94 +		glEnable(GL_TEXTURE_2D);
    3.95  
    3.96 -	glBegin(GL_QUADS);
    3.97 -	glTexCoord2f(0, 1); glVertex2f(-1, -1);
    3.98 -	glTexCoord2f(1, 1); glVertex2f(1, -1);
    3.99 -	glTexCoord2f(1, 0); glVertex2f(1, 1);
   3.100 -	glTexCoord2f(0, 0); glVertex2f(-1, 1);
   3.101 -	glEnd();
   3.102 +		glBegin(GL_QUADS);
   3.103 +		glColor3f(1, 1, 1);
   3.104 +		glTexCoord2f(0, 1); glVertex2f(-1, -1);
   3.105 +		glTexCoord2f(1, 1); glVertex2f(1, -1);
   3.106 +		glTexCoord2f(1, 0); glVertex2f(1, 1);
   3.107 +		glTexCoord2f(0, 0); glVertex2f(-1, 1);
   3.108 +		glEnd();
   3.109  
   3.110 -	glDisable(GL_TEXTURE_2D);
   3.111 +		glDisable(GL_TEXTURE_2D);
   3.112 +	}
   3.113  
   3.114  	glutSwapBuffers();
   3.115  }
   3.116 @@ -116,14 +135,16 @@
   3.117  	case 27:
   3.118  		exit(0);
   3.119  
   3.120 -	case 's':
   3.121 -		if(write_ppm("shot.ppm", fb, xsz, ysz)) {
   3.122 -			printf("captured screenshot shot.ppm\n");
   3.123 -		}
   3.124 +	case 'r':
   3.125 +		need_update = true;
   3.126 +		glutPostRedisplay();
   3.127  		break;
   3.128  
   3.129 -	case 'r':
   3.130 -		need_update = true;
   3.131 +	case 'd':
   3.132 +		dbg_glrender = !dbg_glrender;
   3.133 +		if(dbg_glrender) {
   3.134 +			printf("DEBUG GL RENDER\n");
   3.135 +		}
   3.136  		glutPostRedisplay();
   3.137  		break;
   3.138  
   3.139 @@ -160,7 +181,7 @@
   3.140  		cam_theta += dx * ROT_SCALE;
   3.141  		cam_phi += dy * ROT_SCALE;
   3.142  
   3.143 -		if(cam_phi < -89) cam_phi = 89;
   3.144 +		if(cam_phi < -89) cam_phi = -89;
   3.145  		if(cam_phi > 89) cam_phi = 89;
   3.146  
   3.147  		need_update = true;
     4.1 --- a/src/mesh.h	Sat Jul 31 22:23:57 2010 +0100
     4.2 +++ b/src/mesh.h	Tue Aug 03 13:06:59 2010 +0100
     4.3 @@ -6,19 +6,22 @@
     4.4  struct Vertex {
     4.5  	float pos[4];
     4.6  	float normal[4];
     4.7 -	float tex[2];
     4.8 +	float tex[4];
     4.9 +	float padding[4];
    4.10  };
    4.11  
    4.12  struct Face {
    4.13  	Vertex v[3];
    4.14  	float normal[4];
    4.15  	int matid;
    4.16 +	int padding[3];
    4.17  };
    4.18  
    4.19  struct Material {
    4.20  	float kd[4], ks[4];
    4.21  	float kr, kt;
    4.22  	float spow;
    4.23 +	float padding;
    4.24  };
    4.25  
    4.26  struct Mesh {
     5.1 --- a/src/ocl.cc	Sat Jul 31 22:23:57 2010 +0100
     5.2 +++ b/src/ocl.cc	Tue Aug 03 13:06:59 2010 +0100
     5.3 @@ -78,9 +78,14 @@
     5.4  {
     5.5  	int err;
     5.6  	cl_mem mem;
     5.7 +	cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
     5.8  
     5.9 +	if(buf) {
    5.10 +		flags |= CL_MEM_COPY_HOST_PTR;
    5.11 +	}
    5.12  
    5.13 -	if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
    5.14 +
    5.15 +	if(!(mem = clCreateBuffer(ctx, flags, sz, buf, &err))) {
    5.16  		fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
    5.17  		return 0;
    5.18  	}
    5.19 @@ -88,13 +93,13 @@
    5.20  	CLMemBuffer *mbuf = new CLMemBuffer;
    5.21  	mbuf->mem = mem;
    5.22  	mbuf->size = sz;
    5.23 +	mbuf->ptr = 0;
    5.24  	return mbuf;
    5.25  }
    5.26  
    5.27  void destroy_mem_buffer(CLMemBuffer *mbuf)
    5.28  {
    5.29  	if(mbuf) {
    5.30 -
    5.31  		clReleaseMemObject(mbuf->mem);
    5.32  		delete mbuf;
    5.33  	}
    5.34 @@ -104,6 +109,12 @@
    5.35  {
    5.36  	if(!mbuf) return 0;
    5.37  
    5.38 +#ifndef NDEBUG
    5.39 +	if(mbuf->ptr) {
    5.40 +		fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
    5.41 +	}
    5.42 +#endif
    5.43 +
    5.44  	int err;
    5.45  	mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
    5.46  	if(!mbuf->ptr) {
    5.47 @@ -117,6 +128,7 @@
    5.48  {
    5.49  	if(!mbuf || !mbuf->ptr) return;
    5.50  	clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
    5.51 +	mbuf->ptr = 0;
    5.52  }
    5.53  
    5.54  bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
     6.1 --- a/src/ocl.h	Sat Jul 31 22:23:57 2010 +0100
     6.2 +++ b/src/ocl.h	Tue Aug 03 13:06:59 2010 +0100
     6.3 @@ -27,7 +27,7 @@
     6.4  	void *ptr;
     6.5  };
     6.6  
     6.7 -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf);
     6.8 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf = 0);
     6.9  void destroy_mem_buffer(CLMemBuffer *mbuf);
    6.10  
    6.11  void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr);
    6.12 @@ -72,7 +72,7 @@
    6.13  
    6.14  	bool set_argi(int arg, int val);
    6.15  	bool set_argf(int arg, float val);
    6.16 -	bool set_arg_buffer(int arg, int rdwr, size_t sz, void *buf);
    6.17 +	bool set_arg_buffer(int arg, int rdwr, size_t sz, void *buf = 0);
    6.18  	CLMemBuffer *get_arg_buffer(int arg);
    6.19  
    6.20  	bool build();
     7.1 --- a/src/rt.cc	Sat Jul 31 22:23:57 2010 +0100
     7.2 +++ b/src/rt.cc	Tue Aug 03 13:06:59 2010 +0100
     7.3 @@ -2,36 +2,43 @@
     7.4  #include <string.h>
     7.5  #include <math.h>
     7.6  #include <assert.h>
     7.7 +
     7.8 +#ifndef __APPLE__
     7.9 +#include <GL/gl.h>
    7.10 +#include <GL/glu.h>
    7.11 +#else
    7.12 +#include <OpenGL/gl.h>
    7.13 +#include <OpenGL/glu.h>
    7.14 +#endif
    7.15 +
    7.16  #include "ocl.h"
    7.17  #include "mesh.h"
    7.18  
    7.19 -#ifdef __GNUC__
    7.20 -#define PACKED	__attribute__((packed))
    7.21 -#else
    7.22 -#define PACKED
    7.23 -#endif
    7.24 -
    7.25 -#ifdef _MSC_VER
    7.26 -#pragma push(pack, 1)
    7.27 -#endif
    7.28 +// kernel arguments
    7.29 +enum {
    7.30 +	KARG_FRAMEBUFFER,
    7.31 +	KARG_RENDER_INFO,
    7.32 +	KARG_FACES,
    7.33 +	KARG_MATLIB,
    7.34 +	KARG_LIGHTS,
    7.35 +	KARG_PRIM_RAYS,
    7.36 +	KARG_XFORM,
    7.37 +	KARG_INVTRANS_XFORM
    7.38 +};
    7.39  
    7.40  struct RendInfo {
    7.41  	int xsz, ysz;
    7.42  	int num_faces, num_lights;
    7.43  	int max_iter;
    7.44 -} PACKED;
    7.45 +};
    7.46  
    7.47  struct Ray {
    7.48  	float origin[4], dir[4];
    7.49 -} PACKED;
    7.50 +};
    7.51  
    7.52  struct Light {
    7.53  	float pos[4], color[4];
    7.54 -} PACKED;
    7.55 -
    7.56 -#ifdef _MSC_VER
    7.57 -#pragma pop(pack)
    7.58 -#endif
    7.59 +};
    7.60  
    7.61  static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg);
    7.62  
    7.63 @@ -42,39 +49,36 @@
    7.64  static Face faces[] = {
    7.65  	{/* face0 */
    7.66  		{
    7.67 -			{{-1, 0, 0, 1}, {0, 0, -1, 1}, {0, 0}},
    7.68 -			{{0, 1, 0, 1}, {0, 0, -1, 1}, {0, 0}},
    7.69 -			{{1, 0, 0, 1}, {0, 0, -1, 1}, {0, 0}}
    7.70 +			{{-1, 0, 0, 0}, {0, 0, -1, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}},
    7.71 +			{{0, 1, 0, 0}, {0, 0, -1, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}},
    7.72 +			{{1, 0, 0, 0}, {0, 0, -1, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}
    7.73  		},
    7.74 -		{0, 0, -1, 1}, 0
    7.75 +		{0, 0, -1, 0}, 0, {0, 0, 0}
    7.76  	},
    7.77  	{/* face1 */
    7.78  		{
    7.79 -			{{-5, 0, -3, 1}, {0, 0, -1, 1}, {0, 0}},
    7.80 -			{{0, 0, 3, 1}, {0, 0, -1, 1}, {0, 0}},
    7.81 -			{{5, 0, -3, 1}, {0, 0, -1, 1}, {0, 0}}
    7.82 +			{{-5, 0, -3, 0}, {0, 1, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}},
    7.83 +			{{0, 0, 3, 0}, {0, 1, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}},
    7.84 +			{{5, 0, -3, 0}, {0, 1, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}}
    7.85  		},
    7.86 -		{0, 0, -1, 1}, 1
    7.87 +		{0, 1, 0, 0}, 1, {0, 0, 0}
    7.88  	}
    7.89  };
    7.90  
    7.91  static Material matlib[] = {
    7.92 -	{{1, 0, 0, 1}, {1, 1, 1, 1}, 0, 0, 60.0},
    7.93 -	{{0.2, 0.8, 0.3, 1}, {0, 0, 0, 0}, 0, 0, 0}
    7.94 +	{{1, 0, 0, 1}, {1, 1, 1, 1}, 0, 0, 60.0, 0},
    7.95 +	{{0.2, 0.8, 0.3, 1}, {0, 0, 0, 0}, 0, 0, 0, 0}
    7.96  };
    7.97  
    7.98  static Light lightlist[] = {
    7.99 -	{{-10, 10, -20, 1}, {1, 1, 1, 1}}
   7.100 +	{{-10, 10, -20, 0}, {1, 1, 1, 1}}
   7.101  };
   7.102  
   7.103 -static float xform[16] = {
   7.104 -	1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1
   7.105 -};
   7.106  
   7.107  static RendInfo rinf;
   7.108  
   7.109  
   7.110 -bool init_renderer(int xsz, int ysz, float *fb)
   7.111 +bool init_renderer(int xsz, int ysz)
   7.112  {
   7.113  	// render info
   7.114  	rinf.xsz = xsz;
   7.115 @@ -99,13 +103,16 @@
   7.116  	}
   7.117  
   7.118  	/* setup argument buffers */
   7.119 -	prog->set_arg_buffer(0, ARG_WR, xsz * ysz * 4 * sizeof(float), fb);
   7.120 -	prog->set_arg_buffer(1, ARG_RD, sizeof rinf, &rinf);
   7.121 -	prog->set_arg_buffer(2, ARG_RD, sizeof faces, faces);
   7.122 -	prog->set_arg_buffer(3, ARG_RD, sizeof matlib, matlib);
   7.123 -	prog->set_arg_buffer(4, ARG_RD, sizeof lightlist, lightlist);
   7.124 -	prog->set_arg_buffer(5, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
   7.125 -	prog->set_arg_buffer(6, ARG_RD, sizeof xform, &xform);
   7.126 +	prog->set_arg_buffer(KARG_FRAMEBUFFER, ARG_WR, xsz * ysz * 4 * sizeof(float));
   7.127 +	prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf);
   7.128 +	prog->set_arg_buffer(KARG_FACES, ARG_RD, sizeof faces, faces);
   7.129 +	prog->set_arg_buffer(KARG_MATLIB, ARG_RD, sizeof matlib, matlib);
   7.130 +	prog->set_arg_buffer(KARG_LIGHTS, ARG_RD, sizeof lightlist, lightlist);
   7.131 +	prog->set_arg_buffer(KARG_PRIM_RAYS, ARG_RD, xsz * ysz * sizeof *prim_rays, prim_rays);
   7.132 +	prog->set_arg_buffer(KARG_XFORM, ARG_RD, 16 * sizeof(float));
   7.133 +	prog->set_arg_buffer(KARG_INVTRANS_XFORM, ARG_RD, 16 * sizeof(float));
   7.134 +
   7.135 +	delete [] prim_rays;
   7.136  
   7.137  	global_size = xsz * ysz;
   7.138  	return true;
   7.139 @@ -113,7 +120,6 @@
   7.140  
   7.141  void destroy_renderer()
   7.142  {
   7.143 -	delete [] prim_rays;
   7.144  	delete prog;
   7.145  }
   7.146  
   7.147 @@ -124,19 +130,63 @@
   7.148  	}
   7.149  
   7.150  	CLMemBuffer *mbuf = prog->get_arg_buffer(0);
   7.151 -	map_mem_buffer(mbuf, MAP_RD);
   7.152 +	void *fb = map_mem_buffer(mbuf, MAP_RD);
   7.153 +	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rinf.xsz, rinf.ysz, GL_RGBA, GL_FLOAT, fb);
   7.154  	unmap_mem_buffer(mbuf);
   7.155  	return true;
   7.156  }
   7.157  
   7.158 -void set_xform(float *matrix)
   7.159 +void dbg_render_gl()
   7.160  {
   7.161 -	CLMemBuffer *mbuf = prog->get_arg_buffer(6);
   7.162 -	assert(mbuf);
   7.163 +	glPushAttrib(GL_ENABLE_BIT | GL_TRANSFORM_BIT);
   7.164  
   7.165 -	assert(map_mem_buffer(mbuf, MAP_WR) == xform);
   7.166 -	memcpy(xform, matrix, sizeof xform);
   7.167 -	unmap_mem_buffer(mbuf);
   7.168 +	glDisable(GL_TEXTURE_2D);
   7.169 +	glEnable(GL_DEPTH_TEST);
   7.170 +
   7.171 +	glMatrixMode(GL_PROJECTION);
   7.172 +	glPushMatrix();
   7.173 +	glLoadIdentity();
   7.174 +	gluPerspective(45.0, (float)rinf.xsz / (float)rinf.ysz, 0.5, 1000.0);
   7.175 +
   7.176 +	glBegin(GL_TRIANGLES);
   7.177 +	for(int i=0; i<rinf.num_faces; i++) {
   7.178 +		Material *mat = matlib + faces[i].matid;
   7.179 +		glColor3f(mat->kd[0], mat->kd[1], mat->kd[2]);
   7.180 +
   7.181 +		for(int j=0; j<3; j++) {
   7.182 +			float *pos = faces[i].v[j].pos;
   7.183 +			glVertex3f(pos[0], pos[1], pos[2]);
   7.184 +		}
   7.185 +	}
   7.186 +	glEnd();
   7.187 +
   7.188 +	glPopMatrix();
   7.189 +	glPopAttrib();
   7.190 +}
   7.191 +
   7.192 +void set_xform(float *matrix, float *invtrans)
   7.193 +{
   7.194 +	CLMemBuffer *mbuf_xform = prog->get_arg_buffer(KARG_XFORM);
   7.195 +	CLMemBuffer *mbuf_invtrans = prog->get_arg_buffer(KARG_INVTRANS_XFORM);
   7.196 +	assert(mbuf_xform && mbuf_invtrans);
   7.197 +
   7.198 +	float *mem = (float*)map_mem_buffer(mbuf_xform, MAP_WR);
   7.199 +	memcpy(mem, matrix, 16 * sizeof *mem);
   7.200 +	printf("-- xform:\n");
   7.201 +	for(int i=0; i<16; i++) {
   7.202 +		printf("%2.3f\t", mem[i]);
   7.203 +		if(i % 4 == 3) putchar('\n');
   7.204 +	}
   7.205 +	unmap_mem_buffer(mbuf_xform);
   7.206 +
   7.207 +	mem = (float*)map_mem_buffer(mbuf_invtrans, MAP_WR);
   7.208 +	memcpy(mem, invtrans, 16 * sizeof *mem);
   7.209 +	printf("-- inverse-transpose:\n");
   7.210 +	for(int i=0; i<16; i++) {
   7.211 +		printf("%2.3f\t", mem[i]);
   7.212 +		if(i % 4 == 3) putchar('\n');
   7.213 +	}
   7.214 +	unmap_mem_buffer(mbuf_invtrans);
   7.215  }
   7.216  
   7.217  static Ray get_primary_ray(int x, int y, int w, int h, float vfov_deg)
   7.218 @@ -155,6 +205,6 @@
   7.219  	py *= 100.0;
   7.220  	pz *= 100.0;
   7.221  
   7.222 -	Ray ray = {{0, 0, 0, 1}, {px, py, pz, 1}};
   7.223 +	Ray ray = {{0, 0, 0, 1}, {px, py, -pz, 1}};
   7.224  	return ray;
   7.225  }
     8.1 --- a/src/rt.h	Sat Jul 31 22:23:57 2010 +0100
     8.2 +++ b/src/rt.h	Tue Aug 03 13:06:59 2010 +0100
     8.3 @@ -1,9 +1,11 @@
     8.4  #ifndef RT_H_
     8.5  #define RT_H_
     8.6  
     8.7 -bool init_renderer(int xsz, int ysz, float *fb);
     8.8 +bool init_renderer(int xsz, int ysz);
     8.9  void destroy_renderer();
    8.10  bool render();
    8.11 -void set_xform(float *matrix);
    8.12 +void set_xform(float *matrix, float *invtrans);
    8.13 +
    8.14 +void dbg_render_gl();
    8.15  
    8.16  #endif	/* RT_H_ */