nuclear@12: /* vim: set ft=opencl:ts=4:sw=4 */ nuclear@12: nuclear@2: struct RendInfo { nuclear@22: float4 ambient; nuclear@2: int xsz, ysz; nuclear@9: int num_faces, num_lights; nuclear@2: int max_iter; nuclear@28: int kd_depth; nuclear@2: }; nuclear@2: nuclear@9: struct Vertex { nuclear@2: float4 pos; nuclear@9: float4 normal; nuclear@12: float4 tex; nuclear@12: float4 padding; nuclear@9: }; nuclear@9: nuclear@9: struct Face { nuclear@9: struct Vertex v[3]; nuclear@9: float4 normal; nuclear@9: int matid; nuclear@12: int padding[3]; nuclear@9: }; nuclear@9: nuclear@9: struct Material { nuclear@5: float4 kd, ks; nuclear@9: float kr, kt; nuclear@9: float spow; nuclear@12: float padding; nuclear@2: }; nuclear@2: nuclear@3: struct Light { nuclear@3: float4 pos, color; nuclear@3: }; nuclear@3: nuclear@2: struct Ray { nuclear@2: float4 origin, dir; nuclear@2: }; nuclear@2: nuclear@2: struct SurfPoint { nuclear@2: float t; nuclear@12: float4 pos, norm, dbg; nuclear@9: global const struct Face *obj; nuclear@19: struct Material mat; nuclear@2: }; nuclear@2: nuclear@16: struct Scene { nuclear@16: float4 ambient; nuclear@16: global const struct Face *faces; nuclear@16: int num_faces; nuclear@16: global const struct Light *lights; nuclear@16: int num_lights; nuclear@16: global const struct Material *matlib; nuclear@28: global const struct KDNode *kdtree; nuclear@28: }; nuclear@28: nuclear@28: struct AABBox { nuclear@28: float4 min, max; nuclear@28: }; nuclear@28: nuclear@28: struct KDNode { nuclear@29: struct AABBox aabb; nuclear@28: int face_idx[32]; nuclear@28: int num_faces; nuclear@28: int padding[3]; nuclear@16: }; nuclear@2: nuclear@16: #define MIN_ENERGY 0.001 nuclear@21: #define EPSILON 1e-5 nuclear@16: nuclear@16: float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp); nuclear@16: bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *sp); nuclear@9: bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp); nuclear@28: bool intersect_aabb(struct Ray ray, struct AABBox aabb); nuclear@16: nuclear@8: float4 reflect(float4 v, float4 n); nuclear@8: float4 transform(float4 v, global const float *xform); nuclear@16: void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans); nuclear@12: float4 calc_bary(float4 pt, global const struct Face *face, float4 norm); nuclear@19: float mean(float4 v); nuclear@4: nuclear@4: kernel void render(global float4 *fb, nuclear@4: global const struct RendInfo *rinf, nuclear@9: global const struct Face *faces, nuclear@9: global const struct Material *matlib, nuclear@4: global const struct Light *lights, nuclear@7: global const struct Ray *primrays, nuclear@12: global const float *xform, nuclear@28: global const float *invtrans, nuclear@28: global const struct KDNode *kdtree) nuclear@2: { nuclear@2: int idx = get_global_id(0); nuclear@2: nuclear@16: struct Scene scn; nuclear@16: scn.ambient = rinf->ambient; nuclear@16: scn.faces = faces; nuclear@16: scn.num_faces = rinf->num_faces; nuclear@16: scn.lights = lights; nuclear@16: scn.num_lights = rinf->num_lights; nuclear@16: scn.matlib = matlib; nuclear@30: scn.kdtree = kdtree; nuclear@8: nuclear@16: struct Ray ray = primrays[idx]; nuclear@16: transform_ray(&ray, xform, invtrans); nuclear@4: nuclear@19: float4 pixel = (float4)(0, 0, 0, 0); nuclear@22: float4 energy = (float4)(1.0, 1.0, 1.0, 0.0); nuclear@19: int iter = 0; nuclear@19: nuclear@19: while(iter++ < rinf->max_iter && mean(energy) > MIN_ENERGY) { nuclear@19: struct SurfPoint sp; nuclear@19: if(find_intersection(ray, &scn, &sp)) { nuclear@19: pixel += shade(ray, &scn, &sp) * energy; nuclear@19: nuclear@19: float4 refl_col = sp.mat.ks * sp.mat.kr; nuclear@19: nuclear@19: ray.origin = sp.pos; nuclear@19: ray.dir = reflect(-ray.dir, sp.norm); nuclear@19: nuclear@19: energy *= sp.mat.ks * sp.mat.kr; nuclear@19: } else { nuclear@19: iter = INT_MAX - 1; // to break out of the loop nuclear@19: } nuclear@17: } nuclear@19: nuclear@19: fb[idx] = pixel; nuclear@4: } nuclear@4: nuclear@16: float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp) nuclear@16: { nuclear@16: float4 norm = sp->norm; nuclear@12: bool entering = true; nuclear@12: nuclear@12: if(dot(ray.dir, norm) >= 0.0) { nuclear@12: norm = -norm; nuclear@12: entering = false; nuclear@12: } nuclear@12: nuclear@19: float4 dcol = scn->ambient * sp->mat.kd; nuclear@8: float4 scol = (float4)(0, 0, 0, 0); nuclear@5: nuclear@16: for(int i=0; inum_lights; i++) { nuclear@16: float4 ldir = scn->lights[i].pos - sp->pos; nuclear@5: nuclear@16: struct Ray shadowray; nuclear@16: shadowray.origin = sp->pos; nuclear@16: shadowray.dir = ldir; nuclear@5: nuclear@16: if(!find_intersection(shadowray, scn, 0)) { nuclear@16: ldir = normalize(ldir); nuclear@16: float4 vdir = -normalize(ray.dir); nuclear@16: float4 vref = reflect(vdir, norm); nuclear@16: nuclear@16: float diff = fmax(dot(ldir, norm), 0.0f); nuclear@22: dcol += sp->mat.kd * scn->lights[i].color * diff; nuclear@16: nuclear@20: float spec = powr(fmax(dot(ldir, vref), 0.0f), sp->mat.spow); nuclear@22: scol += sp->mat.ks * scn->lights[i].color * spec; nuclear@16: } nuclear@16: } nuclear@16: nuclear@8: return dcol + scol; nuclear@2: } nuclear@2: nuclear@30: #define STACK_SIZE 64 nuclear@28: bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) nuclear@28: { nuclear@29: struct SurfPoint sp0; nuclear@29: sp0.t = 1.0; nuclear@29: sp0.obj = 0; nuclear@29: nuclear@29: int idxstack[STACK_SIZE]; nuclear@29: int sp = 0; // points at the topmost element of the stack nuclear@29: idxstack[sp] = 1; // root at tree[1] (heap) nuclear@29: nuclear@30: printf("check intersection\n"); nuclear@30: nuclear@29: while(sp >= 0) { nuclear@29: int idx = idxstack[sp--]; // remove this index from the stack and process it nuclear@29: nuclear@29: global struct KDNode *node = scn->kdtree + idx; nuclear@30: printf("idx: %d (%p) num_faces: %d\n", idx, node, node->num_faces); nuclear@29: nuclear@29: if(intersect_aabb(ray, node->aabb)) { nuclear@29: // leaf node ... nuclear@30: if(node->num_faces >= 0) { nuclear@29: // check each face in turn and update the nearest intersection as needed nuclear@29: for(int i=0; inum_faces; i++) { nuclear@29: struct SurfPoint sp; nuclear@29: int fidx = node->face_idx[i]; nuclear@29: nuclear@29: if(intersect(ray, scn->faces + fidx, &sp) && sp.t < sp0.t) { nuclear@29: sp0 = sp; nuclear@29: } nuclear@29: } nuclear@29: } nuclear@29: } else { nuclear@29: // internal node ... recurse to the children nuclear@29: idxstack[++sp] = idx * 2; nuclear@29: idxstack[++sp] = idx * 2 + 1; nuclear@29: } nuclear@29: } nuclear@29: nuclear@29: if(!sp0.obj) { nuclear@29: return false; nuclear@29: } nuclear@29: nuclear@29: if(spres) { nuclear@29: *spres = sp0; nuclear@29: spres->mat = scn->matlib[sp0.obj->matid]; nuclear@29: } nuclear@29: return true; nuclear@28: } nuclear@16: nuclear@28: /*bool find_intersection(struct Ray ray, const struct Scene *scn, struct SurfPoint *spres) nuclear@12: { nuclear@16: struct SurfPoint sp, sp0; nuclear@16: sp0.t = 1.0; nuclear@16: sp0.obj = 0; nuclear@16: nuclear@16: for(int i=0; inum_faces; i++) { nuclear@16: if(intersect(ray, scn->faces + i, &sp) && sp.t < sp0.t) { nuclear@16: sp0 = sp; nuclear@16: } nuclear@16: } nuclear@16: nuclear@16: if(!sp0.obj) { nuclear@16: return false; nuclear@16: } nuclear@16: nuclear@16: if(spres) { nuclear@16: *spres = sp0; nuclear@19: spres->mat = scn->matlib[sp0.obj->matid]; nuclear@16: } nuclear@16: return true; nuclear@28: }*/ nuclear@12: nuclear@16: bool intersect(struct Ray ray, global const struct Face *face, struct SurfPoint *sp) nuclear@2: { nuclear@12: float4 origin = ray.origin; nuclear@12: float4 dir = ray.dir; nuclear@12: float4 norm = face->normal; nuclear@12: nuclear@16: float ndotdir = dot(dir, norm); nuclear@12: nuclear@9: if(fabs(ndotdir) <= EPSILON) { nuclear@9: return false; nuclear@9: } nuclear@2: nuclear@9: float4 pt = face->v[0].pos; nuclear@12: float4 vec = pt - origin; nuclear@2: nuclear@16: float ndotvec = dot(norm, vec); nuclear@9: float t = ndotvec / ndotdir; nuclear@2: nuclear@2: if(t < EPSILON || t > 1.0) { nuclear@2: return false; nuclear@2: } nuclear@12: pt = origin + dir * t; nuclear@9: nuclear@12: nuclear@12: float4 bc = calc_bary(pt, face, norm); nuclear@9: float bc_sum = bc.x + bc.y + bc.z; nuclear@9: nuclear@20: if(bc_sum < 1.0 - EPSILON || bc_sum > 1.0 + EPSILON) { nuclear@9: return false; nuclear@12: bc *= 1.2; nuclear@9: } nuclear@2: nuclear@2: sp->t = t; nuclear@9: sp->pos = pt; nuclear@21: sp->norm = normalize(face->v[0].normal * bc.x + face->v[1].normal * bc.y + face->v[2].normal * bc.z); nuclear@9: sp->obj = face; nuclear@12: sp->dbg = bc; nuclear@2: return true; nuclear@2: } nuclear@5: nuclear@28: bool intersect_aabb(struct Ray ray, struct AABBox aabb) nuclear@28: { nuclear@28: if(ray.origin.x >= aabb.min.x && ray.origin.y >= aabb.min.y && ray.origin.z >= aabb.min.z && nuclear@28: ray.origin.x < aabb.max.x && ray.origin.y < aabb.max.y && ray.origin.z < aabb.max.z) { nuclear@28: return true; nuclear@28: } nuclear@28: nuclear@29: float4 bbox[2] = { nuclear@29: aabb.min.x, aabb.min.y, aabb.min.z, 0, nuclear@29: aabb.max.x, aabb.max.y, aabb.max.z, 0 nuclear@29: }; nuclear@28: nuclear@28: int xsign = (int)(ray.dir.x < 0.0); nuclear@28: float invdirx = 1.0 / ray.dir.x; nuclear@28: float tmin = (bbox[xsign].x - ray.origin.x) * invdirx; nuclear@28: float tmax = (bbox[1 - xsign].x - ray.origin.x) * invdirx; nuclear@28: nuclear@28: int ysign = (int)(ray.dir.y < 0.0); nuclear@28: float invdiry = 1.0 / ray.dir.y; nuclear@28: float tymin = (bbox[ysign].y - ray.origin.y) * invdiry; nuclear@28: float tymax = (bbox[1 - ysign].y - ray.origin.y) * invdiry; nuclear@28: nuclear@28: if(tmin > tymax || tymin > tmax) { nuclear@28: return false; nuclear@28: } nuclear@28: nuclear@28: if(tymin > tmin) tmin = tymin; nuclear@28: if(tymax < tmax) tmax = tymax; nuclear@28: nuclear@28: int zsign = (int)(ray.dir.z < 0.0); nuclear@28: float invdirz = 1.0 / ray.dir.z; nuclear@28: float tzmin = (bbox[zsign].z - ray.origin.z) * invdirz; nuclear@28: float tzmax = (bbox[1 - zsign].z - ray.origin.z) * invdirz; nuclear@28: nuclear@28: if(tmin > tzmax || tzmin > tmax) { nuclear@28: return false; nuclear@28: } nuclear@28: nuclear@29: return tmin < 1.0 && tmax > 0.0; nuclear@28: } nuclear@28: nuclear@8: float4 reflect(float4 v, float4 n) nuclear@5: { nuclear@23: return 2.0f * dot(v, n) * n - v; nuclear@5: } nuclear@8: nuclear@8: float4 transform(float4 v, global const float *xform) nuclear@8: { nuclear@8: float4 res; nuclear@8: res.x = v.x * xform[0] + v.y * xform[4] + v.z * xform[8] + xform[12]; nuclear@8: res.y = v.x * xform[1] + v.y * xform[5] + v.z * xform[9] + xform[13]; nuclear@8: res.z = v.x * xform[2] + v.y * xform[6] + v.z * xform[10] + xform[14]; nuclear@12: res.w = 0.0; nuclear@8: return res; nuclear@8: } nuclear@8: nuclear@16: void transform_ray(struct Ray *ray, global const float *xform, global const float *invtrans) nuclear@8: { nuclear@16: ray->origin = transform(ray->origin, xform); nuclear@16: ray->dir = transform(ray->dir, invtrans); nuclear@8: } nuclear@9: nuclear@12: float4 calc_bary(float4 pt, global const struct Face *face, float4 norm) nuclear@9: { nuclear@12: float4 bc = (float4)(0, 0, 0, 0); nuclear@9: nuclear@12: // calculate area of the whole triangle nuclear@12: float4 v1 = face->v[1].pos - face->v[0].pos; nuclear@12: float4 v2 = face->v[2].pos - face->v[0].pos; nuclear@12: float4 xv1v2 = cross(v1, v2); nuclear@12: nuclear@16: float area = fabs(dot(xv1v2, norm)) * 0.5; nuclear@9: if(area < EPSILON) { nuclear@9: return bc; nuclear@9: } nuclear@9: nuclear@9: float4 pv0 = face->v[0].pos - pt; nuclear@9: float4 pv1 = face->v[1].pos - pt; nuclear@9: float4 pv2 = face->v[2].pos - pt; nuclear@9: nuclear@12: // calculate the area of each sub-triangle nuclear@12: float4 x12 = cross(pv1, pv2); nuclear@12: float4 x20 = cross(pv2, pv0); nuclear@12: float4 x01 = cross(pv0, pv1); nuclear@12: nuclear@16: float a0 = fabs(dot(x12, norm)) * 0.5; nuclear@16: float a1 = fabs(dot(x20, norm)) * 0.5; nuclear@16: float a2 = fabs(dot(x01, norm)) * 0.5; nuclear@9: nuclear@9: bc.x = a0 / area; nuclear@9: bc.y = a1 / area; nuclear@9: bc.z = a2 / area; nuclear@9: return bc; nuclear@9: } nuclear@19: nuclear@19: float mean(float4 v) nuclear@19: { nuclear@19: return native_divide(v.x + v.y + v.z, 3.0); nuclear@19: }