clray
diff src/ocl.cc @ 39:980bc07be868
Implemented OpenGL/OpenCL interop, and removed the texture copy
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Fri, 27 Aug 2010 18:30:09 +0100 |
parents | 4cf4919c3812 |
children | 1bcbb53b3505 |
line diff
1.1 --- a/src/ocl.cc Fri Aug 27 02:22:08 2010 +0100 1.2 +++ b/src/ocl.cc Fri Aug 27 18:30:09 2010 +0100 1.3 @@ -5,6 +5,7 @@ 1.4 #include <string.h> 1.5 #include <stdarg.h> 1.6 #include <errno.h> 1.7 +#include <assert.h> 1.8 #ifndef _MSC_VER 1.9 #include <alloca.h> 1.10 #else 1.11 @@ -12,13 +13,14 @@ 1.12 #endif 1.13 #include <sys/stat.h> 1.14 #include "ocl.h" 1.15 +#include "ogl.h" 1.16 #include "ocl_errstr.h" 1.17 1.18 +#if defined(unix) || defined(__unix__) 1.19 +#include <X11/Xlib.h> 1.20 +#include <GL/glx.h> 1.21 +#endif 1.22 1.23 -class InitCL { 1.24 -public: 1.25 - InitCL(); 1.26 -}; 1.27 1.28 struct device_info { 1.29 cl_device_id id; 1.30 @@ -33,7 +35,6 @@ 1.31 unsigned long mem_size; 1.32 }; 1.33 1.34 -static bool init_opencl(void); 1.35 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); 1.36 static int get_dev_info(cl_device_id dev, struct device_info *di); 1.37 static int devcmp(struct device_info *a, struct device_info *b); 1.38 @@ -42,26 +43,36 @@ 1.39 static const char *clstrerror(int err); 1.40 1.41 1.42 -static InitCL initcl; 1.43 static cl_context ctx; 1.44 static cl_command_queue cmdq; 1.45 static device_info devinf; 1.46 1.47 -InitCL::InitCL() 1.48 -{ 1.49 - if(!init_opencl()) { 1.50 - exit(0); 1.51 - } 1.52 -} 1.53 - 1.54 -static bool init_opencl(void) 1.55 +bool init_opencl() 1.56 { 1.57 if(select_device(&devinf, devcmp) == -1) { 1.58 return false; 1.59 } 1.60 1.61 +#if defined(__APPLE__) 1.62 +#error "CL/GL context sharing not implemented on MacOSX yet" 1.63 +#elif defined(unix) || defined(__unix__) 1.64 + Display *dpy = glXGetCurrentDisplay(); 1.65 + GLXContext glctx = glXGetCurrentContext(); 1.66 1.67 - if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { 1.68 + assert(dpy && glctx); 1.69 + 1.70 + cl_context_properties prop[] = { 1.71 + CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy, 1.72 + CL_GL_CONTEXT_KHR, (cl_context_properties)glctx, 1.73 + 0 1.74 + }; 1.75 +#elif defined(WIN32) || defined(__WIN32__) 1.76 +#error "CL/GL context sharing not implemented on windows yet" 1.77 +#else 1.78 +#error "unknown or unsupported platform" 1.79 +#endif 1.80 + 1.81 + if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) { 1.82 fprintf(stderr, "failed to create opencl context\n"); 1.83 return false; 1.84 } 1.85 @@ -94,6 +105,25 @@ 1.86 mbuf->mem = mem; 1.87 mbuf->size = sz; 1.88 mbuf->ptr = 0; 1.89 + mbuf->tex = 0; 1.90 + return mbuf; 1.91 +} 1.92 + 1.93 +CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex) 1.94 +{ 1.95 + int err; 1.96 + cl_mem mem; 1.97 + 1.98 + if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) { 1.99 + fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err)); 1.100 + return 0; 1.101 + } 1.102 + 1.103 + CLMemBuffer *mbuf = new CLMemBuffer; 1.104 + mbuf->mem = mem; 1.105 + mbuf->size = 0; 1.106 + mbuf->ptr = 0; 1.107 + mbuf->tex = tex; 1.108 return mbuf; 1.109 } 1.110 1.111 @@ -105,7 +135,7 @@ 1.112 } 1.113 } 1.114 1.115 -void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) 1.116 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev) 1.117 { 1.118 if(!mbuf) return 0; 1.119 1.120 @@ -116,7 +146,7 @@ 1.121 #endif 1.122 1.123 int err; 1.124 - mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); 1.125 + mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err); 1.126 if(!mbuf->ptr) { 1.127 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err)); 1.128 return 0; 1.129 @@ -124,31 +154,31 @@ 1.130 return mbuf->ptr; 1.131 } 1.132 1.133 -void unmap_mem_buffer(CLMemBuffer *mbuf) 1.134 +void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev) 1.135 { 1.136 if(!mbuf || !mbuf->ptr) return; 1.137 - clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); 1.138 + clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev); 1.139 mbuf->ptr = 0; 1.140 } 1.141 1.142 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src) 1.143 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev) 1.144 { 1.145 if(!mbuf) return false; 1.146 1.147 int err; 1.148 - if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { 1.149 + if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) { 1.150 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err)); 1.151 return false; 1.152 } 1.153 return true; 1.154 } 1.155 1.156 -bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) 1.157 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev) 1.158 { 1.159 if(!mbuf) return false; 1.160 1.161 int err; 1.162 - if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { 1.163 + if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) { 1.164 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err)); 1.165 return false; 1.166 } 1.167 @@ -156,6 +186,35 @@ 1.168 } 1.169 1.170 1.171 +bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev) 1.172 +{ 1.173 + if(!mbuf || !mbuf->tex) { 1.174 + return false; 1.175 + } 1.176 + 1.177 + int err; 1.178 + if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { 1.179 + fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err)); 1.180 + return false; 1.181 + } 1.182 + return true; 1.183 +} 1.184 + 1.185 +bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev) 1.186 +{ 1.187 + if(!mbuf || !mbuf->tex) { 1.188 + return false; 1.189 + } 1.190 + 1.191 + int err; 1.192 + if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { 1.193 + fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err)); 1.194 + return false; 1.195 + } 1.196 + return true; 1.197 +} 1.198 + 1.199 + 1.200 CLArg::CLArg() 1.201 { 1.202 memset(this, 0, sizeof *this); 1.203 @@ -169,10 +228,19 @@ 1.204 this->kname = kname; 1.205 args.resize(16); 1.206 built = false; 1.207 + 1.208 + wait_event = last_event = 0; 1.209 } 1.210 1.211 CLProgram::~CLProgram() 1.212 { 1.213 + if(wait_event) { 1.214 + clReleaseEvent(wait_event); 1.215 + } 1.216 + if(last_event) { 1.217 + clReleaseEvent(last_event); 1.218 + } 1.219 + 1.220 if(prog) { 1.221 1.222 clReleaseProgram(prog); 1.223 @@ -248,8 +316,28 @@ 1.224 printf("create argument %d buffer: %d bytes\n", idx, (int)sz); 1.225 CLMemBuffer *buf; 1.226 1.227 - if(sz <= 0 || !(buf = create_mem_buffer(rdwr, sz, ptr))) { 1.228 - fprintf(stderr, "invalid size while creating argument buffer %d: %d\n", idx, (int)sz); 1.229 + if(sz <= 0) { 1.230 + fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz); 1.231 + return false; 1.232 + } 1.233 + if(!(buf = create_mem_buffer(rdwr, sz, ptr))) { 1.234 + return false; 1.235 + } 1.236 + 1.237 + if((int)args.size() <= idx) { 1.238 + args.resize(idx + 1); 1.239 + } 1.240 + args[idx].type = ARGTYPE_MEM_BUF; 1.241 + args[idx].v.mbuf = buf; 1.242 + return true; 1.243 +} 1.244 + 1.245 +bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex) 1.246 +{ 1.247 + printf("create argument %d from texture %u\n", idx, tex); 1.248 + CLMemBuffer *buf; 1.249 + 1.250 + if(!(buf = create_mem_buffer(rdwr, tex))) { 1.251 return false; 1.252 } 1.253 1.254 @@ -284,7 +372,7 @@ 1.255 { 1.256 int err; 1.257 1.258 - if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) { 1.259 + if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) { 1.260 size_t sz; 1.261 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz); 1.262 1.263 @@ -376,19 +464,37 @@ 1.264 } 1.265 va_end(ap); 1.266 1.267 + if(last_event) { 1.268 + clReleaseEvent(last_event); 1.269 + } 1.270 + 1.271 int err; 1.272 - cl_event event; 1.273 - 1.274 - if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, &event)) != 0) { 1.275 + if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 1.276 + wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) { 1.277 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err)); 1.278 return false; 1.279 } 1.280 1.281 - clWaitForEvents(1, &event); 1.282 - clReleaseEvent(event); 1.283 + if(wait_event) { 1.284 + clReleaseEvent(wait_event); 1.285 + wait_event = 0; 1.286 + } 1.287 return true; 1.288 } 1.289 1.290 +void CLProgram::set_wait_event(cl_event ev) 1.291 +{ 1.292 + if(wait_event) { 1.293 + clReleaseEvent(wait_event); 1.294 + } 1.295 + wait_event = ev; 1.296 +} 1.297 + 1.298 +cl_event CLProgram::get_last_event() const 1.299 +{ 1.300 + return last_event; 1.301 +} 1.302 + 1.303 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*)) 1.304 { 1.305 unsigned int i, j, num_dev, num_plat, sel, ret;