nuclear@12: /* vim: set ft=opencl:ts=4:sw=4 */ nuclear@45: #include "common.h" nuclear@12: nuclear@2: struct RendInfo { nuclear@22: float4 ambient; nuclear@2: int xsz, ysz; nuclear@9: int num_faces, num_lights; nuclear@2: int max_iter; nuclear@28: int kd_depth; nuclear@2: }; nuclear@2: nuclear@9: struct Vertex { nuclear@2: float4 pos; nuclear@9: float4 normal; nuclear@12: float4 tex; nuclear@12: float4 padding; nuclear@9: }; nuclear@9: nuclear@9: struct Face { nuclear@9: struct Vertex v[3]; nuclear@9: float4 normal; nuclear@9: int matid; nuclear@12: int padding[3]; nuclear@9: }; nuclear@9: nuclear@9: struct Material { nuclear@5: float4 kd, ks; nuclear@9: float kr, kt; nuclear@9: float spow; nuclear@12: float padding; nuclear@2: }; nuclear@2: nuclear@3: struct Light { nuclear@3: float4 pos, color; nuclear@3: }; nuclear@3: nuclear@2: struct Ray { nuclear@2: float4 origin, dir; nuclear@2: }; nuclear@2: nuclear@2: struct SurfPoint { nuclear@2: float t; nuclear@12: float4 pos, norm, dbg; nuclear@9: global const struct Face *obj; nuclear@19: struct Material mat; nuclear@2: }; nuclear@2: nuclear@16: struct Scene { nuclear@16: float4 ambient; nuclear@16: global const struct Face *faces; nuclear@16: int num_faces; nuclear@16: global const struct Light *lights; nuclear@16: int num_lights; nuclear@16: global const struct Material *matlib; nuclear@43: //global const struct KDNode *kdtree; nuclear@28: }; nuclear@28: nuclear@28: struct AABBox { nuclear@28: float4 min, max; nuclear@28: }; nuclear@28: nuclear@28: struct KDNode { nuclear@29: struct AABBox aabb; nuclear@43: int face_idx[MAX_NODE_FACES]; nuclear@28: int num_faces; nuclear@35: int left, right; nuclear@35: int padding; nuclear@16: }; nuclear@2: nuclear@16: #define MIN_ENERGY 0.001 nuclear@21: #define EPSILON 1e-5 nuclear@16: nuclear@43: float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg); nuclear@43: bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp, read_only image2d_t kdimg); nuclear@9: bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp); nuclear@28: bool intersect_aabb(struct Ray ray, struct AABBox aabb); nuclear@16: nuclear@8: float4 reflect(float4 v, float4 n); nuclear@8: float4 transform(float4 v, global const float *xform); nuclear@16: void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans); nuclear@12: float4 calc_bary(float4 pt, global const struct Face *face, float4 norm); nuclear@19: float mean(float4 v); nuclear@4: nuclear@43: void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg); nuclear@43: nuclear@39: nuclear@39: kernel void render(write_only image2d_t fb, nuclear@4: global const struct RendInfo *rinf, nuclear@9: global const struct Face *faces, nuclear@9: global const struct Material *matlib, nuclear@4: global const struct Light *lights, nuclear@7: global const struct Ray *primrays, nuclear@12: global const float *xform, nuclear@28: global const float *invtrans, nuclear@43: //global const struct KDNode *kdtree nuclear@43: read_only image2d_t kdtree_img) nuclear@2: { nuclear@2: int idx = get_global_id(0); nuclear@2: nuclear@16: struct Scene scn; nuclear@16: scn.ambient = rinf->ambient; nuclear@16: scn.faces = faces; nuclear@16: scn.num_faces = rinf->num_faces; nuclear@16: scn.lights = lights; nuclear@16: scn.num_lights = rinf->num_lights; nuclear@16: scn.matlib = matlib; nuclear@43: //scn.kdtree_img = kdtree_img; nuclear@8: nuclear@16: struct Ray ray = primrays[idx]; nuclear@16: transform_ray(&ray, xform, invtrans); nuclear@4: nuclear@19: float4 pixel = (float4)(0, 0, 0, 0); nuclear@22: float4 energy = (float4)(1.0, 1.0, 1.0, 0.0); nuclear@19: int iter = 0; nuclear@19: nuclear@19: while(iter++ < rinf->max_iter && mean(energy) > MIN_ENERGY) { nuclear@19: struct SurfPoint sp; nuclear@43: if(find_intersection(ray, &scn, &sp, kdtree_img)) { nuclear@43: pixel += shade(ray, &scn, &sp, kdtree_img) * energy; nuclear@19: nuclear@19: float4 refl_col = sp.mat.ks * sp.mat.kr; nuclear@19: nuclear@19: ray.origin = sp.pos; nuclear@19: ray.dir = reflect(-ray.dir, sp.norm); nuclear@19: nuclear@35: energy *= refl_col; nuclear@19: } else { nuclear@43: energy = (float4)(0.0, 0.0, 0.0, 0.0); nuclear@19: } nuclear@17: } nuclear@19: nuclear@39: int2 coord; nuclear@43: coord.x = idx % rinf->xsz; nuclear@43: coord.y = idx / rinf->xsz; nuclear@39: nuclear@39: write_imagef(fb, coord, pixel); nuclear@4: } nuclear@4: nuclear@43: float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp, read_only image2d_t kdimg) nuclear@16: { nuclear@16: float4 norm = sp->norm; nuclear@43: //bool entering = true; nuclear@12: nuclear@12: if(dot(ray.dir, norm) >= 0.0) { nuclear@12: norm = -norm; nuclear@43: //entering = false; nuclear@12: } nuclear@12: nuclear@19: float4 dcol = scn->ambient * sp->mat.kd; nuclear@8: float4 scol = (float4)(0, 0, 0, 0); nuclear@5: nuclear@16: for(int i=0; inum_lights; i++) { nuclear@16: float4 ldir = scn->lights[i].pos - sp->pos; nuclear@5: nuclear@16: struct Ray shadowray; nuclear@16: shadowray.origin = sp->pos; nuclear@16: shadowray.dir = ldir; nuclear@5: nuclear@43: if(!find_intersection(shadowray, scn, 0, kdimg)) { nuclear@16: ldir = normalize(ldir); nuclear@43: float4 vdir = -ray.dir; nuclear@43: vdir.x = native_divide(vdir.x, RAY_MAG); nuclear@43: vdir.y = native_divide(vdir.y, RAY_MAG); nuclear@43: vdir.z = native_divide(vdir.z, RAY_MAG); nuclear@16: float4 vref = reflect(vdir, norm); nuclear@16: nuclear@16: float diff = fmax(dot(ldir, norm), 0.0f); nuclear@43: dcol += sp->mat.kd /* scn->lights[i].color*/ * diff; nuclear@16: nuclear@43: float spec = native_powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow); nuclear@43: scol += sp->mat.ks /* scn->lights[i].color*/ * spec; nuclear@16: } nuclear@16: } nuclear@16: nuclear@8: return dcol + scol; nuclear@2: } nuclear@2: nuclear@45: #define STACK_SIZE MAX_TREE_DEPTH nuclear@43: bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres, read_only image2d_t kdimg) nuclear@28: { nuclear@29: struct SurfPoint sp0; nuclear@29: sp0.t = 1.0; nuclear@29: sp0.obj = 0; nuclear@29: nuclear@29: int idxstack[STACK_SIZE]; nuclear@31: int top = 0; // points after the topmost element of the stack nuclear@35: idxstack[top++] = 0; // root at tree[0] nuclear@29: nuclear@31: while(top > 0) { nuclear@31: int idx = idxstack[--top]; // remove this index from the stack and process it nuclear@30: nuclear@43: struct KDNode node; nuclear@43: read_kdnode(idx, &node, kdimg); nuclear@29: nuclear@43: if(intersect_aabb(ray, node.aabb)) { nuclear@43: if(node.left == -1) { nuclear@31: // leaf node... check each face in turn and update the nearest intersection as needed nuclear@43: for(int i=0; ifaces + fidx, &spt) && spt.t < sp0.t) { nuclear@31: sp0 = spt; nuclear@29: } nuclear@29: } nuclear@31: } else { nuclear@31: // internal node... recurse to the children nuclear@43: idxstack[top++] = node.left; nuclear@43: idxstack[top++] = node.right; nuclear@29: } nuclear@29: } nuclear@29: } nuclear@29: nuclear@29: if(!sp0.obj) { nuclear@29: return false; nuclear@29: } nuclear@29: nuclear@29: if(spres) { nuclear@29: *spres = sp0; nuclear@29: spres->mat = scn->matlib[sp0.obj->matid]; nuclear@29: } nuclear@29: return true; nuclear@28: } nuclear@16: nuclear@16: bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp) nuclear@2: { nuclear@12: float4 origin = ray.origin; nuclear@12: float4 dir = ray.dir; nuclear@12: float4 norm = face->normal; nuclear@12: nuclear@16: float ndotdir = dot(dir, norm); nuclear@12: nuclear@9: if(fabs(ndotdir) <= EPSILON) { nuclear@9: return false; nuclear@9: } nuclear@2: nuclear@9: float4 pt = face->v[0].pos; nuclear@12: float4 vec = pt - origin; nuclear@2: nuclear@16: float ndotvec = dot(norm, vec); nuclear@43: float t = native_divide(ndotvec, ndotdir); nuclear@2: nuclear@2: if(t < EPSILON || t > 1.0) { nuclear@2: return false; nuclear@2: } nuclear@12: pt = origin + dir * t; nuclear@9: nuclear@12: nuclear@12: float4 bc = calc_bary(pt, face, norm); nuclear@9: float bc_sum = bc.x + bc.y + bc.z; nuclear@9: nuclear@20: if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) { nuclear@9: return false; nuclear@12: bc *= 1.2; nuclear@9: } nuclear@2: nuclear@2: sp->t = t; nuclear@9: sp->pos = pt; nuclear@21: sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z); nuclear@9: sp->obj = face; nuclear@12: sp->dbg = bc; nuclear@2: return true; nuclear@2: } nuclear@5: nuclear@28: bool intersect_aabb(struct Ray ray, struct AABBox aabb) nuclear@28: { nuclear@28: if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z && nuclear@28: ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) { nuclear@28: return true; nuclear@28: } nuclear@28: nuclear@29: float4 bbox[2] = { nuclear@29: aabb.min.x, aabb.min.y, aabb.min.z, 0, nuclear@29: aabb.max.x, aabb.max.y, aabb.max.z, 0 nuclear@29: }; nuclear@28: nuclear@28: int xsign = (int)(ray.dir.x < 0.0); nuclear@43: float invdirx = native_recip(ray.dir.x); nuclear@28: float tmin = (bbox[xsign].x - ray.origin.x) * invdirx; nuclear@28: float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx; nuclear@28: nuclear@28: int ysign = (int)(ray.dir.y < 0.0); nuclear@43: float invdiry = native_recip(ray.dir.y); nuclear@28: float tymin = (bbox[ysign].y - ray.origin.y) * invdiry; nuclear@28: float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry; nuclear@28: nuclear@28: if(tmin > tymax || tymin > tmax) { nuclear@28: return false; nuclear@28: } nuclear@28: nuclear@28: if(tymin > tmin) tmin = tymin; nuclear@28: if(tymax < tmax) tmax = tymax; nuclear@28: nuclear@28: int zsign = (int)(ray.dir.z < 0.0); nuclear@43: float invdirz = native_recip(ray.dir.z); nuclear@28: float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz; nuclear@28: float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz; nuclear@28: nuclear@28: if(tmin > tzmax || tzmin > tmax) { nuclear@28: return false; nuclear@28: } nuclear@28: nuclear@29: return tmin < 1.0 && tmax > 0.0; nuclear@28: } nuclear@28: nuclear@8: float4 reflect(float4 v, float4 n) nuclear@5: { nuclear@23: return 2.0f * dot(v, n) * n - v; nuclear@5: } nuclear@8: nuclear@8: float4 transform(float4 v, global const float *xform) nuclear@8: { nuclear@8: float4 res; nuclear@8: res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12]; nuclear@8: res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13]; nuclear@8: res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14]; nuclear@12: res.w = 0.0; nuclear@8: return res; nuclear@8: } nuclear@8: nuclear@16: void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans) nuclear@8: { nuclear@16: ray->origin = transform(ray->origin, xform); nuclear@16: ray->dir = transform(ray->dir, invtrans); nuclear@8: } nuclear@9: nuclear@12: float4 calc_bary(float4 pt, global const struct Face *face, float4 norm) nuclear@9: { nuclear@12: float4 bc = (float4)(0, 0, 0, 0); nuclear@9: nuclear@12: // calculate area of the whole triangle nuclear@12: float4 v1 = face->v[1].pos - face->v[0].pos; nuclear@12: float4 v2 = face->v[2].pos - face->v[0].pos; nuclear@12: float4 xv1v2 = cross(v1, v2); nuclear@12: nuclear@16: float area = fabs(dot(xv1v2, norm)) * 0.5; nuclear@9: if(area < EPSILON) { nuclear@9: return bc; nuclear@9: } nuclear@9: nuclear@9: float4 pv0 = face->v[0].pos - pt; nuclear@9: float4 pv1 = face->v[1].pos - pt; nuclear@9: float4 pv2 = face->v[2].pos - pt; nuclear@9: nuclear@12: // calculate the area of each sub-triangle nuclear@12: float4 x12 = cross(pv1, pv2); nuclear@12: float4 x20 = cross(pv2, pv0); nuclear@12: float4 x01 = cross(pv0, pv1); nuclear@12: nuclear@16: float a0 = fabs(dot(x12, norm)) * 0.5; nuclear@16: float a1 = fabs(dot(x20, norm)) * 0.5; nuclear@16: float a2 = fabs(dot(x01, norm)) * 0.5; nuclear@9: nuclear@43: bc.x = native_divide(a0, area); nuclear@43: bc.y = native_divide(a1, area); nuclear@43: bc.z = native_divide(a2, area); nuclear@9: return bc; nuclear@9: } nuclear@19: nuclear@19: float mean(float4 v) nuclear@19: { nuclear@19: return native_divide(v.x + v.y + v.z, 3.0); nuclear@19: } nuclear@43: nuclear@43: nuclear@43: const sampler_t kdsampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; nuclear@43: nuclear@43: // read a KD-tree node from a texture scanline nuclear@43: void read_kdnode(int idx, struct KDNode *node, read_only image2d_t kdimg) nuclear@43: { nuclear@45: int startx = KDIMG_NODE_WIDTH * (idx / KDIMG_MAX_HEIGHT); nuclear@45: nuclear@43: int2 tc; nuclear@45: tc.x = startx; nuclear@45: tc.y = idx % KDIMG_MAX_HEIGHT; nuclear@43: nuclear@43: node->aabb.min = read_imagef(kdimg, kdsampler, tc); tc.x++; nuclear@43: node->aabb.max = read_imagef(kdimg, kdsampler, tc); nuclear@43: nuclear@45: tc.x = startx + 2 + MAX_NODE_FACES / 4; nuclear@43: float4 pix = read_imagef(kdimg, kdsampler, tc); nuclear@43: node->num_faces = (int)pix.x; nuclear@43: node->left = (int)pix.y; nuclear@43: node->right = (int)pix.z; nuclear@43: nuclear@45: tc.x = startx + 2; nuclear@43: for(int i=0; inum_faces; i+=4) { nuclear@43: float4 pix = read_imagef(kdimg, kdsampler, tc); tc.x++; nuclear@43: node->face_idx[i] = (int)pix.x; nuclear@43: node->face_idx[i + 1] = (int)pix.y; nuclear@43: node->face_idx[i + 2] = (int)pix.z; nuclear@43: node->face_idx[i + 3] = (int)pix.w; nuclear@43: } nuclear@43: }