clray
changeset 39:980bc07be868
Implemented OpenGL/OpenCL interop, and removed the texture copy
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Fri, 27 Aug 2010 18:30:09 +0100 (2010-08-27) |
parents | 4bcf78e572d6 |
children | 1bcbb53b3505 |
files | rt.cl src/clray.cc src/ocl.cc src/ocl.h src/rt.cc src/rt.h |
diffstat | 6 files changed, 208 insertions(+), 50 deletions(-) [+] |
line diff
1.1 --- a/rt.cl Fri Aug 27 02:22:08 2010 +0100 1.2 +++ b/rt.cl Fri Aug 27 18:30:09 2010 +0100 1.3 @@ -80,7 +80,8 @@ 1.4 float4 calc_bary(float4 pt, global const struct Face *face, float4 norm); 1.5 float mean(float4 v); 1.6 1.7 -kernel void render(global float4 *fb, 1.8 + 1.9 +kernel void render(write_only image2d_t fb, 1.10 global const struct RendInfo *rinf, 1.11 global const struct Face *faces, 1.12 global const struct Material *matlib, 1.13 @@ -124,7 +125,14 @@ 1.14 } 1.15 } 1.16 1.17 - fb[idx] = pixel; 1.18 + int img_x = get_image_width(fb); 1.19 + 1.20 + int2 coord; 1.21 + coord.x = idx % img_x; 1.22 + coord.y = idx / img_x; 1.23 + 1.24 + write_imagef(fb, coord, pixel); 1.25 + //fb[idx] = pixel; 1.26 } 1.27 1.28 float4 shade(struct Ray ray, struct Scene *scn, const struct SurfPoint *sp)
2.1 --- a/src/clray.cc Fri Aug 27 02:22:08 2010 +0100 2.2 +++ b/src/clray.cc Fri Aug 27 18:30:09 2010 +0100 2.3 @@ -11,6 +11,7 @@ 2.4 #include "rt.h" 2.5 #include "matrix.h" 2.6 #include "scene.h" 2.7 +#include "ocl.h" 2.8 2.9 void cleanup(); 2.10 void disp(); 2.11 @@ -31,6 +32,7 @@ 2.12 static bool dbg_show_obj = true; 2.13 2.14 static Scene scn; 2.15 +static unsigned int tex; 2.16 2.17 int main(int argc, char **argv) 2.18 { 2.19 @@ -102,11 +104,6 @@ 2.20 glutMouseFunc(mouse); 2.21 glutMotionFunc(motion); 2.22 2.23 - if(!init_renderer(xsz, ysz, &scn)) { 2.24 - return 1; 2.25 - } 2.26 - atexit(cleanup); 2.27 - 2.28 unsigned int *test_pattern = new unsigned int[xsz * ysz]; 2.29 for(int i=0; i<ysz; i++) { 2.30 for(int j=0; j<xsz; j++) { 2.31 @@ -114,15 +111,24 @@ 2.32 } 2.33 } 2.34 2.35 - /*glGenTextures(1, &tex); 2.36 - glBindTexture(GL_TEXTURE_2D, tex);*/ 2.37 + glGenTextures(1, &tex); 2.38 + glBindTexture(GL_TEXTURE_2D, tex); 2.39 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP); 2.40 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP); 2.41 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 2.42 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 2.43 - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB32F_ARB, xsz, ysz, 0, GL_RGBA, GL_UNSIGNED_BYTE, test_pattern); 2.44 + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F_ARB, xsz, ysz, 0, GL_RGBA, GL_UNSIGNED_BYTE, test_pattern); 2.45 delete [] test_pattern; 2.46 2.47 + if(!init_opencl()) { 2.48 + return 1; 2.49 + } 2.50 + 2.51 + if(!init_renderer(xsz, ysz, &scn, tex)) { 2.52 + return 1; 2.53 + } 2.54 + atexit(cleanup); 2.55 + 2.56 glutMainLoop(); 2.57 return 0; 2.58 }
3.1 --- a/src/ocl.cc Fri Aug 27 02:22:08 2010 +0100 3.2 +++ b/src/ocl.cc Fri Aug 27 18:30:09 2010 +0100 3.3 @@ -5,6 +5,7 @@ 3.4 #include <string.h> 3.5 #include <stdarg.h> 3.6 #include <errno.h> 3.7 +#include <assert.h> 3.8 #ifndef _MSC_VER 3.9 #include <alloca.h> 3.10 #else 3.11 @@ -12,13 +13,14 @@ 3.12 #endif 3.13 #include <sys/stat.h> 3.14 #include "ocl.h" 3.15 +#include "ogl.h" 3.16 #include "ocl_errstr.h" 3.17 3.18 +#if defined(unix) || defined(__unix__) 3.19 +#include <X11/Xlib.h> 3.20 +#include <GL/glx.h> 3.21 +#endif 3.22 3.23 -class InitCL { 3.24 -public: 3.25 - InitCL(); 3.26 -}; 3.27 3.28 struct device_info { 3.29 cl_device_id id; 3.30 @@ -33,7 +35,6 @@ 3.31 unsigned long mem_size; 3.32 }; 3.33 3.34 -static bool init_opencl(void); 3.35 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); 3.36 static int get_dev_info(cl_device_id dev, struct device_info *di); 3.37 static int devcmp(struct device_info *a, struct device_info *b); 3.38 @@ -42,26 +43,36 @@ 3.39 static const char *clstrerror(int err); 3.40 3.41 3.42 -static InitCL initcl; 3.43 static cl_context ctx; 3.44 static cl_command_queue cmdq; 3.45 static device_info devinf; 3.46 3.47 -InitCL::InitCL() 3.48 -{ 3.49 - if(!init_opencl()) { 3.50 - exit(0); 3.51 - } 3.52 -} 3.53 - 3.54 -static bool init_opencl(void) 3.55 +bool init_opencl() 3.56 { 3.57 if(select_device(&devinf, devcmp) == -1) { 3.58 return false; 3.59 } 3.60 3.61 +#if defined(__APPLE__) 3.62 +#error "CL/GL context sharing not implemented on MacOSX yet" 3.63 +#elif defined(unix) || defined(__unix__) 3.64 + Display *dpy = glXGetCurrentDisplay(); 3.65 + GLXContext glctx = glXGetCurrentContext(); 3.66 3.67 - if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { 3.68 + assert(dpy && glctx); 3.69 + 3.70 + cl_context_properties prop[] = { 3.71 + CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy, 3.72 + CL_GL_CONTEXT_KHR, (cl_context_properties)glctx, 3.73 + 0 3.74 + }; 3.75 +#elif defined(WIN32) || defined(__WIN32__) 3.76 +#error "CL/GL context sharing not implemented on windows yet" 3.77 +#else 3.78 +#error "unknown or unsupported platform" 3.79 +#endif 3.80 + 3.81 + if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) { 3.82 fprintf(stderr, "failed to create opencl context\n"); 3.83 return false; 3.84 } 3.85 @@ -94,6 +105,25 @@ 3.86 mbuf->mem = mem; 3.87 mbuf->size = sz; 3.88 mbuf->ptr = 0; 3.89 + mbuf->tex = 0; 3.90 + return mbuf; 3.91 +} 3.92 + 3.93 +CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex) 3.94 +{ 3.95 + int err; 3.96 + cl_mem mem; 3.97 + 3.98 + if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) { 3.99 + fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err)); 3.100 + return 0; 3.101 + } 3.102 + 3.103 + CLMemBuffer *mbuf = new CLMemBuffer; 3.104 + mbuf->mem = mem; 3.105 + mbuf->size = 0; 3.106 + mbuf->ptr = 0; 3.107 + mbuf->tex = tex; 3.108 return mbuf; 3.109 } 3.110 3.111 @@ -105,7 +135,7 @@ 3.112 } 3.113 } 3.114 3.115 -void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) 3.116 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev) 3.117 { 3.118 if(!mbuf) return 0; 3.119 3.120 @@ -116,7 +146,7 @@ 3.121 #endif 3.122 3.123 int err; 3.124 - mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); 3.125 + mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err); 3.126 if(!mbuf->ptr) { 3.127 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err)); 3.128 return 0; 3.129 @@ -124,31 +154,31 @@ 3.130 return mbuf->ptr; 3.131 } 3.132 3.133 -void unmap_mem_buffer(CLMemBuffer *mbuf) 3.134 +void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev) 3.135 { 3.136 if(!mbuf || !mbuf->ptr) return; 3.137 - clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); 3.138 + clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev); 3.139 mbuf->ptr = 0; 3.140 } 3.141 3.142 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src) 3.143 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev) 3.144 { 3.145 if(!mbuf) return false; 3.146 3.147 int err; 3.148 - if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { 3.149 + if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) { 3.150 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err)); 3.151 return false; 3.152 } 3.153 return true; 3.154 } 3.155 3.156 -bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) 3.157 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev) 3.158 { 3.159 if(!mbuf) return false; 3.160 3.161 int err; 3.162 - if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { 3.163 + if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) { 3.164 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err)); 3.165 return false; 3.166 } 3.167 @@ -156,6 +186,35 @@ 3.168 } 3.169 3.170 3.171 +bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev) 3.172 +{ 3.173 + if(!mbuf || !mbuf->tex) { 3.174 + return false; 3.175 + } 3.176 + 3.177 + int err; 3.178 + if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { 3.179 + fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err)); 3.180 + return false; 3.181 + } 3.182 + return true; 3.183 +} 3.184 + 3.185 +bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev) 3.186 +{ 3.187 + if(!mbuf || !mbuf->tex) { 3.188 + return false; 3.189 + } 3.190 + 3.191 + int err; 3.192 + if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { 3.193 + fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err)); 3.194 + return false; 3.195 + } 3.196 + return true; 3.197 +} 3.198 + 3.199 + 3.200 CLArg::CLArg() 3.201 { 3.202 memset(this, 0, sizeof *this); 3.203 @@ -169,10 +228,19 @@ 3.204 this->kname = kname; 3.205 args.resize(16); 3.206 built = false; 3.207 + 3.208 + wait_event = last_event = 0; 3.209 } 3.210 3.211 CLProgram::~CLProgram() 3.212 { 3.213 + if(wait_event) { 3.214 + clReleaseEvent(wait_event); 3.215 + } 3.216 + if(last_event) { 3.217 + clReleaseEvent(last_event); 3.218 + } 3.219 + 3.220 if(prog) { 3.221 3.222 clReleaseProgram(prog); 3.223 @@ -248,8 +316,28 @@ 3.224 printf("create argument %d buffer: %d bytes\n", idx, (int)sz); 3.225 CLMemBuffer *buf; 3.226 3.227 - if(sz <= 0 || !(buf = create_mem_buffer(rdwr, sz, ptr))) { 3.228 - fprintf(stderr, "invalid size while creating argument buffer %d: %d\n", idx, (int)sz); 3.229 + if(sz <= 0) { 3.230 + fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz); 3.231 + return false; 3.232 + } 3.233 + if(!(buf = create_mem_buffer(rdwr, sz, ptr))) { 3.234 + return false; 3.235 + } 3.236 + 3.237 + if((int)args.size() <= idx) { 3.238 + args.resize(idx + 1); 3.239 + } 3.240 + args[idx].type = ARGTYPE_MEM_BUF; 3.241 + args[idx].v.mbuf = buf; 3.242 + return true; 3.243 +} 3.244 + 3.245 +bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex) 3.246 +{ 3.247 + printf("create argument %d from texture %u\n", idx, tex); 3.248 + CLMemBuffer *buf; 3.249 + 3.250 + if(!(buf = create_mem_buffer(rdwr, tex))) { 3.251 return false; 3.252 } 3.253 3.254 @@ -284,7 +372,7 @@ 3.255 { 3.256 int err; 3.257 3.258 - if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) { 3.259 + if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) { 3.260 size_t sz; 3.261 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz); 3.262 3.263 @@ -376,19 +464,37 @@ 3.264 } 3.265 va_end(ap); 3.266 3.267 + if(last_event) { 3.268 + clReleaseEvent(last_event); 3.269 + } 3.270 + 3.271 int err; 3.272 - cl_event event; 3.273 - 3.274 - if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, &event)) != 0) { 3.275 + if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 3.276 + wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) { 3.277 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err)); 3.278 return false; 3.279 } 3.280 3.281 - clWaitForEvents(1, &event); 3.282 - clReleaseEvent(event); 3.283 + if(wait_event) { 3.284 + clReleaseEvent(wait_event); 3.285 + wait_event = 0; 3.286 + } 3.287 return true; 3.288 } 3.289 3.290 +void CLProgram::set_wait_event(cl_event ev) 3.291 +{ 3.292 + if(wait_event) { 3.293 + clReleaseEvent(wait_event); 3.294 + } 3.295 + wait_event = ev; 3.296 +} 3.297 + 3.298 +cl_event CLProgram::get_last_event() const 3.299 +{ 3.300 + return last_event; 3.301 +} 3.302 + 3.303 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*)) 3.304 { 3.305 unsigned int i, j, num_dev, num_plat, sel, ret;
4.1 --- a/src/ocl.h Fri Aug 27 02:22:08 2010 +0100 4.2 +++ b/src/ocl.h Fri Aug 27 18:30:09 2010 +0100 4.3 @@ -5,6 +5,7 @@ 4.4 #include <string> 4.5 #ifndef __APPLE__ 4.6 #include <CL/cl.h> 4.7 +#include <CL/cl_gl.h> 4.8 #else 4.9 #include <OpenCL/opencl.h> 4.10 #endif 4.11 @@ -25,16 +26,24 @@ 4.12 cl_mem mem; 4.13 size_t size; 4.14 void *ptr; 4.15 + unsigned int tex; 4.16 }; 4.17 4.18 -CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf = 0); 4.19 + 4.20 +bool init_opencl(); 4.21 + 4.22 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf); 4.23 +CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex); 4.24 void destroy_mem_buffer(CLMemBuffer *mbuf); 4.25 4.26 -void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr); 4.27 -void unmap_mem_buffer(CLMemBuffer *mbuf); 4.28 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev = 0); 4.29 +void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev = 0); 4.30 4.31 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src); 4.32 -bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest); 4.33 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev = 0); 4.34 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev = 0); 4.35 + 4.36 +bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev = 0); 4.37 +bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev = 0); 4.38 4.39 enum { 4.40 ARGTYPE_NONE, 4.41 @@ -65,6 +74,8 @@ 4.42 cl_kernel kernel; 4.43 std::vector<CLArg> args; 4.44 bool built; 4.45 + mutable cl_event wait_event; 4.46 + mutable cl_event last_event; 4.47 4.48 public: 4.49 CLProgram(const char *kname); 4.50 @@ -75,6 +86,7 @@ 4.51 bool set_argi(int arg, int val); 4.52 bool set_argf(int arg, float val); 4.53 bool set_arg_buffer(int arg, int rdwr, size_t sz, const void *buf = 0); 4.54 + bool set_arg_texture(int arg, int rdwr, unsigned int tex); 4.55 CLMemBuffer *get_arg_buffer(int arg); 4.56 int get_num_args() const; 4.57 4.58 @@ -82,6 +94,12 @@ 4.59 4.60 bool run() const; 4.61 bool run(int dim, ...) const; 4.62 + 4.63 + // sets an event that has to be completed before running the kernel 4.64 + void set_wait_event(cl_event ev); 4.65 + 4.66 + // gets the last event so that we can wait for it to finish 4.67 + cl_event get_last_event() const; 4.68 }; 4.69 4.70 #endif /* OCL_H_ */
5.1 --- a/src/rt.cc Fri Aug 27 02:22:08 2010 +0100 5.2 +++ b/src/rt.cc Fri Aug 27 18:30:09 2010 +0100 5.3 @@ -53,7 +53,7 @@ 5.4 static RendInfo rinf; 5.5 5.6 5.7 -bool init_renderer(int xsz, int ysz, Scene *scn) 5.8 +bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex) 5.9 { 5.10 // render info 5.11 rinf.ambient[0] = rinf.ambient[1] = rinf.ambient[2] = 0.0; 5.12 @@ -94,7 +94,7 @@ 5.13 // XXX now we can actually destroy the original kdtree and keep only the GPU version 5.14 5.15 /* setup argument buffers */ 5.16 - prog->set_arg_buffer(KARG_FRAMEBUFFER, ARG_WR, xsz * ysz * 4 * sizeof(float)); 5.17 + prog->set_arg_texture(KARG_FRAMEBUFFER, ARG_WR, tex); 5.18 prog->set_arg_buffer(KARG_RENDER_INFO, ARG_RD, sizeof rinf, &rinf); 5.19 prog->set_arg_buffer(KARG_FACES, ARG_RD, rinf.num_faces * sizeof(Face), faces); 5.20 prog->set_arg_buffer(KARG_MATLIB, ARG_RD, scn->get_num_materials() * sizeof(Material), scn->get_materials()); 5.21 @@ -125,13 +125,32 @@ 5.22 5.23 bool render() 5.24 { 5.25 + // XXX do we need to call glFinish ? 5.26 + 5.27 long tm0 = get_msec(); 5.28 5.29 + cl_event ev; 5.30 + CLMemBuffer *texbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); 5.31 + 5.32 + if(!acquire_gl_object(texbuf, &ev)) { 5.33 + return false; 5.34 + } 5.35 + 5.36 + // make sure that we will wait for the acquire to finish before running 5.37 + prog->set_wait_event(ev); 5.38 + 5.39 if(!prog->run(1, global_size)) { 5.40 return false; 5.41 } 5.42 5.43 - long tm_run = get_msec() - tm0; 5.44 + if(!release_gl_object(texbuf, &ev)) { 5.45 + return false; 5.46 + } 5.47 + clWaitForEvents(1, &ev); 5.48 + 5.49 + printf("rendered in %ld msec\n", get_msec() - tm0); 5.50 + 5.51 + /*long tm_run = get_msec() - tm0; 5.52 5.53 CLMemBuffer *mbuf = prog->get_arg_buffer(KARG_FRAMEBUFFER); 5.54 void *fb = map_mem_buffer(mbuf, MAP_RD); 5.55 @@ -146,6 +165,7 @@ 5.56 long tm_upd = get_msec() - tm0 - tm_run; 5.57 5.58 printf("render %ld msec (%ld run, %ld upd)\n", tm_run + tm_upd, tm_run, tm_upd); 5.59 + */ 5.60 return true; 5.61 } 5.62
6.1 --- a/src/rt.h Fri Aug 27 02:22:08 2010 +0100 6.2 +++ b/src/rt.h Fri Aug 27 18:30:09 2010 +0100 6.3 @@ -3,7 +3,7 @@ 6.4 6.5 #include "scene.h" 6.6 6.7 -bool init_renderer(int xsz, int ysz, Scene *scn); 6.8 +bool init_renderer(int xsz, int ysz, Scene *scn, unsigned int tex); 6.9 void destroy_renderer(); 6.10 bool render(); 6.11 void set_xform(float *matrix, float *invtrans);