# HG changeset patch # User John Tsiombikas # Date 1282930209 -3600 # Node ID 980bc07be868dbf36e80aa605a8677f94850e1a4 # Parent 4bcf78e572d6c1358209ff592d90383b91493c5e Implemented OpenGL/OpenCL interop, and removed the texture copy diff -r 4bcf78e572d6 -r 980bc07be868 rt.cl --- a/rt.cl Fri Aug 27 02:22:08 2010 +0100 +++ b/rt.cl Fri Aug 27 18:30:09 2010 +0100 @@ -80,7 +80,8 @@ float4 calc_bary(float4 pt, global const struct Face *face, float4 norm); float mean(float4 v); -kernel void render(global float4 *fb, + +kernel void render(write_only image2d_t fb, global const struct RendInfo *rinf, global const struct Face *faces, global const struct Material *matlib, @@ -124,7 +125,14 @@ } } - fb[idx] = pixel; + int img_x = get_image_width(fb); + + int2 coord; + coord.x = idx % img_x; + coord.y = idx / img_x; + + write_imagef(fb, coord, pixel); + //fb[idx] = pixel; } float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp) diff -r 4bcf78e572d6 -r 980bc07be868 src/clray.cc --- a/src/clray.cc Fri Aug 27 02:22:08 2010 +0100 +++ b/src/clray.cc Fri Aug 27 18:30:09 2010 +0100 @@ -11,6 +11,7 @@ #include "rt.h" #include "matrix.h" #include "scene.h" +#include "ocl.h" void cleanup(); void disp(); @@ -31,6 +32,7 @@ static bool dbg_show_obj = true; static Scene scn; +static unsigned int tex; int main(int argc, char **argv) { @@ -102,11 +104,6 @@ glutMouseFunc(mouse); glutMotionFunc(motion); - if(!init_renderer(xsz, ysz, &scn)) { - return 1; - } - atexit(cleanup); - unsigned int *test_pattern = new unsigned int[xsz * ysz]; for(int i=0; i #include #include +#include #ifndef _MSC_VER #include #else @@ -12,13 +13,14 @@ #endif #include #include "ocl.h" +#include "ogl.h" #include "ocl_errstr.h" +#if defined(unix) || defined(__unix__) +#include +#include +#endif -class InitCL { -public: - InitCL(); -}; struct device_info { cl_device_id id; @@ -33,7 +35,6 @@ unsigned long mem_size; }; -static bool init_opencl(void); static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); static int get_dev_info(cl_device_id dev, struct device_info *di); static int devcmp(struct device_info *a, struct device_info *b); @@ -42,26 +43,36 @@ static const char *clstrerror(int err); -static InitCL initcl; static cl_context ctx; static cl_command_queue cmdq; static device_info devinf; -InitCL::InitCL() -{ - if(!init_opencl()) { - exit(0); - } -} - -static bool init_opencl(void) +bool init_opencl() { if(select_device(&devinf, devcmp) == -1) { return false; } +#if defined(__APPLE__) +#error "CL/GL context sharing not implemented on MacOSX yet" +#elif defined(unix) || defined(__unix__) + Display *dpy = glXGetCurrentDisplay(); + GLXContext glctx = glXGetCurrentContext(); - if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { + assert(dpy && glctx); + + cl_context_properties prop[] = { + CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy, + CL_GL_CONTEXT_KHR, (cl_context_properties)glctx, + 0 + }; +#elif defined(WIN32) || defined(__WIN32__) +#error "CL/GL context sharing not implemented on windows yet" +#else +#error "unknown or unsupported platform" +#endif + + if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) { fprintf(stderr, "failed to create opencl context\n"); return false; } @@ -94,6 +105,25 @@ mbuf->mem = mem; mbuf->size = sz; mbuf->ptr = 0; + mbuf->tex = 0; + return mbuf; +} + +CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex) +{ + int err; + cl_mem mem; + + if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) { + fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err)); + return 0; + } + + CLMemBuffer *mbuf = new CLMemBuffer; + mbuf->mem = mem; + mbuf->size = 0; + mbuf->ptr = 0; + mbuf->tex = tex; return mbuf; } @@ -105,7 +135,7 @@ } } -void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev) { if(!mbuf) return 0; @@ -116,7 +146,7 @@ #endif int err; - mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); + mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err); if(!mbuf->ptr) { fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err)); return 0; @@ -124,31 +154,31 @@ return mbuf->ptr; } -void unmap_mem_buffer(CLMemBuffer *mbuf) +void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev) { if(!mbuf || !mbuf->ptr) return; - clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); + clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev); mbuf->ptr = 0; } -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src) +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev) { if(!mbuf) return false; int err; - if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { + if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) { fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err)); return false; } return true; } -bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev) { if(!mbuf) return false; int err; - if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { + if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) { fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err)); return false; } @@ -156,6 +186,35 @@ } +bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev) +{ + if(!mbuf || !mbuf->tex) { + return false; + } + + int err; + if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { + fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err)); + return false; + } + return true; +} + +bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev) +{ + if(!mbuf || !mbuf->tex) { + return false; + } + + int err; + if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { + fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err)); + return false; + } + return true; +} + + CLArg::CLArg() { memset(this, 0, sizeof *this); @@ -169,10 +228,19 @@ this->kname = kname; args.resize(16); built = false; + + wait_event = last_event = 0; } CLProgram::~CLProgram() { + if(wait_event) { + clReleaseEvent(wait_event); + } + if(last_event) { + clReleaseEvent(last_event); + } + if(prog) { clReleaseProgram(prog); @@ -248,8 +316,28 @@ printf("create argument %d buffer: %d bytes\n", idx, (int)sz); CLMemBuffer *buf; - if(sz <= 0 || !(buf = create_mem_buffer(rdwr, sz, ptr))) { - fprintf(stderr, "invalid size while creating argument buffer %d: %d\n", idx, (int)sz); + if(sz <= 0) { + fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz); + return false; + } + if(!(buf = create_mem_buffer(rdwr, sz, ptr))) { + return false; + } + + if((int)args.size() <= idx) { + args.resize(idx + 1); + } + args[idx].type = ARGTYPE_MEM_BUF; + args[idx].v.mbuf = buf; + return true; +} + +bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex) +{ + printf("create argument %d from texture %u\n", idx, tex); + CLMemBuffer *buf; + + if(!(buf = create_mem_buffer(rdwr, tex))) { return false; } @@ -284,7 +372,7 @@ { int err; - if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) { + if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) { size_t sz; clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz); @@ -376,19 +464,37 @@ } va_end(ap); + if(last_event) { + clReleaseEvent(last_event); + } + int err; - cl_event event; - - if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, &event)) != 0) { + if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, + wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) { fprintf(stderr, "error executing kernel: %s\n", clstrerror(err)); return false; } - clWaitForEvents(1, &event); - clReleaseEvent(event); + if(wait_event) { + clReleaseEvent(wait_event); + wait_event = 0; + } return true; } +void CLProgram::set_wait_event(cl_event ev) +{ + if(wait_event) { + clReleaseEvent(wait_event); + } + wait_event = ev; +} + +cl_event CLProgram::get_last_event() const +{ + return last_event; +} + static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*)) { unsigned int i, j, num_dev, num_plat, sel, ret; diff -r 4bcf78e572d6 -r 980bc07be868 src/ocl.h --- a/src/ocl.h Fri Aug 27 02:22:08 2010 +0100 +++ b/src/ocl.h Fri Aug 27 18:30:09 2010 +0100 @@ -5,6 +5,7 @@ #include #ifndef __APPLE__ #include +#include #else #include #endif @@ -25,16 +26,24 @@ cl_mem mem; size_t size; void *ptr; + unsigned int tex; }; -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf = 0); + +bool init_opencl(); + +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf); +CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex); void destroy_mem_buffer(CLMemBuffer *mbuf); -void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr); -void unmap_mem_buffer(CLMemBuffer *mbuf); +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev = 0); +void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev = 0); -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src); -bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest); +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev = 0); +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev = 0); + +bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev = 0); +bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev = 0); enum { ARGTYPE_NONE, @@ -65,6 +74,8 @@ cl_kernel kernel; std::vector args; bool built; + mutable cl_event wait_event; + mutable cl_event last_event; public: CLProgram(const char *kname); @@ -75,6 +86,7 @@ bool set_argi(int arg, int val); bool set_argf(int arg, float val); bool set_arg_buffer(int arg, int rdwr, size_t sz, const void *buf = 0); + bool set_arg_texture(int arg, int rdwr, unsigned int tex); CLMemBuffer *get_arg_buffer(int arg); int get_num_args() const; @@ -82,6 +94,12 @@ bool run() const; bool run(int dim, ...) const; + + // sets an event that has to be completed before running the kernel + void set_wait_event(cl_event ev); + + // gets the last event so that we can wait for it to finish + cl_event get_last_event() const; }; #endif /* OCL_H_ */ diff -r 4bcf78e572d6 -r 980bc07be868 src/rt.cc --- a/src/rt.cc Fri Aug 27 02:22:08 2010 +0100 +++ b/src/rt.cc Fri Aug 27 18:30:09 2010 +0100 @@ -53,7 +53,7 @@ static RendInfo rinf; -bool init_renderer(int xsz, int ysz, Scene *scn) +bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex) { // render info rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0; @@ -94,7 +94,7 @@ // XXX now we can actually destroy the original kdtree and keep only the GPU version /* setup argument buffers */ - prog->set_arg_buffer(KARG_FRAMEBUFFER, ARG_WR, xsz * ysz * 4 * sizeof(float)); + prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex); prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf); prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces); prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials()); @@ -125,13 +125,32 @@ bool render() { + // XXX do we need to call glFinish ? + long tm0 = get_msec(); + cl_event ev; + CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); + + if(!acquire_gl_object(texbuf, &ev)) { + return false; + } + + // make sure that we will wait for the acquire to finish before running + prog->set_wait_event(ev); + if(!prog->run(1, global_size)) { return false; } - long tm_run = get_msec() - tm0; + if(!release_gl_object(texbuf, &ev)) { + return false; + } + clWaitForEvents(1, &ev); + + printf("rendered in %ld msec\n", get_msec() - tm0); + + /*long tm_run = get_msec() - tm0; CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); void *fb = map_mem_buffer(mbuf, MAP_RD); @@ -146,6 +165,7 @@ long tm_upd = get_msec() - tm0 - tm_run; printf("render %ld msec (%ld run, %ld upd)\n", tm_run + tm_upd, tm_run, tm_upd); + */ return true; } diff -r 4bcf78e572d6 -r 980bc07be868 src/rt.h --- a/src/rt.h Fri Aug 27 02:22:08 2010 +0100 +++ b/src/rt.h Fri Aug 27 18:30:09 2010 +0100 @@ -3,7 +3,7 @@ #include "scene.h" -bool init_renderer(int xsz, int ysz, Scene *scn); +bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex); void destroy_renderer(); bool render(); void set_xform(float *matrix, float *invtrans);