clray

diff src/ocl.cc @ 39:980bc07be868

Implemented OpenGL/OpenCL interop, and removed the texture copy
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 27 Aug 2010 18:30:09 +0100
parents 4cf4919c3812
children 1bcbb53b3505
line diff
     1.1 --- a/src/ocl.cc	Fri Aug 27 02:22:08 2010 +0100
     1.2 +++ b/src/ocl.cc	Fri Aug 27 18:30:09 2010 +0100
     1.3 @@ -5,6 +5,7 @@
     1.4  #include <string.h>
     1.5  #include <stdarg.h>
     1.6  #include <errno.h>
     1.7 +#include <assert.h>
     1.8  #ifndef _MSC_VER
     1.9  #include <alloca.h>
    1.10  #else
    1.11 @@ -12,13 +13,14 @@
    1.12  #endif
    1.13  #include <sys/stat.h>
    1.14  #include "ocl.h"
    1.15 +#include "ogl.h"
    1.16  #include "ocl_errstr.h"
    1.17  
    1.18 +#if defined(unix) || defined(__unix__)
    1.19 +#include <X11/Xlib.h>
    1.20 +#include <GL/glx.h>
    1.21 +#endif
    1.22  
    1.23 -class InitCL {
    1.24 -public:
    1.25 -	InitCL();
    1.26 -};
    1.27  
    1.28  struct device_info {
    1.29  	cl_device_id id;
    1.30 @@ -33,7 +35,6 @@
    1.31  	unsigned long mem_size;
    1.32  };
    1.33  
    1.34 -static bool init_opencl(void);
    1.35  static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
    1.36  static int get_dev_info(cl_device_id dev, struct device_info *di);
    1.37  static int devcmp(struct device_info *a, struct device_info *b);
    1.38 @@ -42,26 +43,36 @@
    1.39  static const char *clstrerror(int err);
    1.40  
    1.41  
    1.42 -static InitCL initcl;
    1.43  static cl_context ctx;
    1.44  static cl_command_queue cmdq;
    1.45  static device_info devinf;
    1.46  
    1.47 -InitCL::InitCL()
    1.48 -{
    1.49 -	if(!init_opencl()) {
    1.50 -		exit(0);
    1.51 -	}
    1.52 -}
    1.53 -
    1.54 -static bool init_opencl(void)
    1.55 +bool init_opencl()
    1.56  {
    1.57  	if(select_device(&devinf, devcmp) == -1) {
    1.58  		return false;
    1.59  	}
    1.60  
    1.61 +#if defined(__APPLE__)
    1.62 +#error "CL/GL context sharing not implemented on MacOSX yet"
    1.63 +#elif defined(unix) || defined(__unix__)
    1.64 +	Display *dpy = glXGetCurrentDisplay();
    1.65 +	GLXContext glctx = glXGetCurrentContext();
    1.66  
    1.67 -	if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
    1.68 +	assert(dpy && glctx);
    1.69 +
    1.70 +	cl_context_properties prop[] = {
    1.71 +		CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
    1.72 +		CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
    1.73 +		0
    1.74 +	};
    1.75 +#elif defined(WIN32) || defined(__WIN32__)
    1.76 +#error "CL/GL context sharing not implemented on windows yet"
    1.77 +#else
    1.78 +#error "unknown or unsupported platform"
    1.79 +#endif
    1.80 +
    1.81 +	if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
    1.82  		fprintf(stderr, "failed to create opencl context\n");
    1.83  		return false;
    1.84  	}
    1.85 @@ -94,6 +105,25 @@
    1.86  	mbuf->mem = mem;
    1.87  	mbuf->size = sz;
    1.88  	mbuf->ptr = 0;
    1.89 +	mbuf->tex = 0;
    1.90 +	return mbuf;
    1.91 +}
    1.92 +
    1.93 +CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex)
    1.94 +{
    1.95 +	int err;
    1.96 +	cl_mem mem;
    1.97 +
    1.98 +	if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
    1.99 +		fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
   1.100 +		return 0;
   1.101 +	}
   1.102 +
   1.103 +	CLMemBuffer *mbuf = new CLMemBuffer;
   1.104 +	mbuf->mem = mem;
   1.105 +	mbuf->size = 0;
   1.106 +	mbuf->ptr = 0;
   1.107 +	mbuf->tex = tex;
   1.108  	return mbuf;
   1.109  }
   1.110  
   1.111 @@ -105,7 +135,7 @@
   1.112  	}
   1.113  }
   1.114  
   1.115 -void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
   1.116 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
   1.117  {
   1.118  	if(!mbuf) return 0;
   1.119  
   1.120 @@ -116,7 +146,7 @@
   1.121  #endif
   1.122  
   1.123  	int err;
   1.124 -	mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
   1.125 +	mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
   1.126  	if(!mbuf->ptr) {
   1.127  		fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
   1.128  		return 0;
   1.129 @@ -124,31 +154,31 @@
   1.130  	return mbuf->ptr;
   1.131  }
   1.132  
   1.133 -void unmap_mem_buffer(CLMemBuffer *mbuf)
   1.134 +void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
   1.135  {
   1.136  	if(!mbuf || !mbuf->ptr) return;
   1.137 -	clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
   1.138 +	clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
   1.139  	mbuf->ptr = 0;
   1.140  }
   1.141  
   1.142 -bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src)
   1.143 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
   1.144  {
   1.145  	if(!mbuf) return false;
   1.146  
   1.147  	int err;
   1.148 -	if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
   1.149 +	if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
   1.150  		fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
   1.151  		return false;
   1.152  	}
   1.153  	return true;
   1.154  }
   1.155  
   1.156 -bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
   1.157 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
   1.158  {
   1.159  	if(!mbuf) return false;
   1.160  
   1.161  	int err;
   1.162 -	if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
   1.163 +	if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
   1.164  		fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
   1.165  		return false;
   1.166  	}
   1.167 @@ -156,6 +186,35 @@
   1.168  }
   1.169  
   1.170  
   1.171 +bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
   1.172 +{
   1.173 +	if(!mbuf || !mbuf->tex) {
   1.174 +		return false;
   1.175 +	}
   1.176 +
   1.177 +	int err;
   1.178 +	if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
   1.179 +		fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
   1.180 +		return false;
   1.181 +	}
   1.182 +	return true;
   1.183 +}
   1.184 +
   1.185 +bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
   1.186 +{
   1.187 +	if(!mbuf || !mbuf->tex) {
   1.188 +		return false;
   1.189 +	}
   1.190 +
   1.191 +	int err;
   1.192 +	if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
   1.193 +		fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
   1.194 +		return false;
   1.195 +	}
   1.196 +	return true;
   1.197 +}
   1.198 +
   1.199 +
   1.200  CLArg::CLArg()
   1.201  {
   1.202  	memset(this, 0, sizeof *this);
   1.203 @@ -169,10 +228,19 @@
   1.204  	this->kname = kname;
   1.205  	args.resize(16);
   1.206  	built = false;
   1.207 +
   1.208 +	wait_event = last_event = 0;
   1.209  }
   1.210  
   1.211  CLProgram::~CLProgram()
   1.212  {
   1.213 +	if(wait_event) {
   1.214 +		clReleaseEvent(wait_event);
   1.215 +	}
   1.216 +	if(last_event) {
   1.217 +		clReleaseEvent(last_event);
   1.218 +	}
   1.219 +
   1.220  	if(prog) {
   1.221  
   1.222  		clReleaseProgram(prog);
   1.223 @@ -248,8 +316,28 @@
   1.224  	printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
   1.225  	CLMemBuffer *buf;
   1.226  
   1.227 -	if(sz <= 0 || !(buf = create_mem_buffer(rdwr, sz, ptr))) {
   1.228 -		fprintf(stderr, "invalid size while creating argument buffer %d: %d\n", idx, (int)sz);
   1.229 +	if(sz <= 0) {
   1.230 +		fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
   1.231 +		return false;
   1.232 +	}
   1.233 +	if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
   1.234 +		return false;
   1.235 +	}
   1.236 +
   1.237 +	if((int)args.size() <= idx) {
   1.238 +		args.resize(idx + 1);
   1.239 +	}
   1.240 +	args[idx].type = ARGTYPE_MEM_BUF;
   1.241 +	args[idx].v.mbuf = buf;
   1.242 +	return true;
   1.243 +}
   1.244 +
   1.245 +bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
   1.246 +{
   1.247 +	printf("create argument %d from texture %u\n", idx, tex);
   1.248 +	CLMemBuffer *buf;
   1.249 +
   1.250 +	if(!(buf = create_mem_buffer(rdwr, tex))) {
   1.251  		return false;
   1.252  	}
   1.253  
   1.254 @@ -284,7 +372,7 @@
   1.255  {
   1.256  	int err;
   1.257  
   1.258 -	if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) {
   1.259 +	if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
   1.260  		size_t sz;
   1.261  		clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
   1.262  
   1.263 @@ -376,19 +464,37 @@
   1.264  	}
   1.265  	va_end(ap);
   1.266  
   1.267 +	if(last_event) {
   1.268 +		clReleaseEvent(last_event);
   1.269 +	}
   1.270 +
   1.271  	int err;
   1.272 -	cl_event event;
   1.273 -
   1.274 -	if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, &event)) != 0) {
   1.275 +	if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
   1.276 +					wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
   1.277  		fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
   1.278  		return false;
   1.279  	}
   1.280  
   1.281 -	clWaitForEvents(1, &event);
   1.282 -	clReleaseEvent(event);
   1.283 +	if(wait_event) {
   1.284 +		clReleaseEvent(wait_event);
   1.285 +		wait_event = 0;
   1.286 +	}
   1.287  	return true;
   1.288  }
   1.289  
   1.290 +void CLProgram::set_wait_event(cl_event ev)
   1.291 +{
   1.292 +	if(wait_event) {
   1.293 +		clReleaseEvent(wait_event);
   1.294 +	}
   1.295 +	wait_event = ev;
   1.296 +}
   1.297 +
   1.298 +cl_event CLProgram::get_last_event() const
   1.299 +{
   1.300 +	return last_event;
   1.301 +}
   1.302 +
   1.303  static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
   1.304  {
   1.305  	unsigned int i, j, num_dev, num_plat, sel, ret;