clray

view src/ocl.cc @ 40:1bcbb53b3505

segfault on exit?
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 27 Aug 2010 19:00:14 +0100
parents 980bc07be868
children 057b8575a1c1
line source
1 #define OCL_CC_
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdarg.h>
7 #include <errno.h>
8 #include <assert.h>
9 #ifndef _MSC_VER
10 #include <alloca.h>
11 #else
12 #include <malloc.h>
13 #endif
14 #include <sys/stat.h>
15 #include "ocl.h"
16 #include "ogl.h"
17 #include "ocl_errstr.h"
19 #if defined(unix) || defined(__unix__)
20 #include <X11/Xlib.h>
21 #include <GL/glx.h>
22 #endif
25 struct device_info {
26 cl_device_id id;
27 cl_device_type type;
28 unsigned int units;
29 unsigned int clock;
31 unsigned int dim;
32 size_t *work_item_sizes;
33 size_t work_group_size;
35 unsigned long mem_size;
36 };
38 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
39 static int get_dev_info(cl_device_id dev, struct device_info *di);
40 static int devcmp(struct device_info *a, struct device_info *b);
41 static const char *devtypestr(cl_device_type type);
42 static void print_memsize(FILE *out, unsigned long memsz);
43 static const char *clstrerror(int err);
46 static cl_context ctx;
47 static cl_command_queue cmdq;
48 static device_info devinf;
50 bool init_opencl()
51 {
52 if(select_device(&devinf, devcmp) == -1) {
53 return false;
54 }
56 #ifndef CLGL_INTEROP
57 cl_context_properties *prop = 0;
59 #else
61 #if defined(__APPLE__)
62 #error "CL/GL context sharing not implemented on MacOSX yet"
63 #elif defined(unix) || defined(__unix__)
64 Display *dpy = glXGetCurrentDisplay();
65 GLXContext glctx = glXGetCurrentContext();
67 assert(dpy && glctx);
69 cl_context_properties prop[] = {
70 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
71 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
72 0
73 };
74 #elif defined(WIN32) || defined(__WIN32__)
75 #error "CL/GL context sharing not implemented on windows yet"
76 #else
77 #error "unknown or unsupported platform"
78 #endif
80 #endif /* CLGL_INTEROP */
82 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
83 fprintf(stderr, "failed to create opencl context\n");
84 return false;
85 }
87 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
88 fprintf(stderr, "failed to create command queue\n");
89 return false;
90 }
91 return true;
92 }
94 void destroy_opencl()
95 {
96 if(cmdq) {
97 clReleaseCommandQueue(cmdq);
98 cmdq = 0;
99 }
101 if(ctx) {
102 clReleaseContext(ctx);
103 ctx = 0;
104 }
105 }
108 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
109 {
110 int err;
111 cl_mem mem;
112 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
114 if(buf) {
115 flags |= CL_MEM_COPY_HOST_PTR;
116 }
119 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
120 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
121 return 0;
122 }
124 CLMemBuffer *mbuf = new CLMemBuffer;
125 mbuf->mem = mem;
126 mbuf->size = sz;
127 mbuf->ptr = 0;
128 mbuf->tex = 0;
129 return mbuf;
130 }
132 CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex)
133 {
134 int err;
135 cl_mem mem;
137 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
138 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
139 return 0;
140 }
142 CLMemBuffer *mbuf = new CLMemBuffer;
143 mbuf->mem = mem;
144 mbuf->size = 0;
145 mbuf->ptr = 0;
146 mbuf->tex = tex;
147 return mbuf;
148 }
150 void destroy_mem_buffer(CLMemBuffer *mbuf)
151 {
152 if(mbuf) {
153 clReleaseMemObject(mbuf->mem);
154 delete mbuf;
155 }
156 }
158 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
159 {
160 if(!mbuf) return 0;
162 #ifndef NDEBUG
163 if(mbuf->ptr) {
164 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
165 }
166 #endif
168 int err;
169 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
170 if(!mbuf->ptr) {
171 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
172 return 0;
173 }
174 return mbuf->ptr;
175 }
177 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
178 {
179 if(!mbuf || !mbuf->ptr) return;
180 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
181 mbuf->ptr = 0;
182 }
184 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
185 {
186 if(!mbuf) return false;
188 int err;
189 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
190 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
191 return false;
192 }
193 return true;
194 }
196 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
197 {
198 if(!mbuf) return false;
200 int err;
201 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
202 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
203 return false;
204 }
205 return true;
206 }
209 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
210 {
211 if(!mbuf || !mbuf->tex) {
212 return false;
213 }
215 int err;
216 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
217 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
218 return false;
219 }
220 return true;
221 }
223 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
224 {
225 if(!mbuf || !mbuf->tex) {
226 return false;
227 }
229 int err;
230 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
231 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
232 return false;
233 }
234 return true;
235 }
238 CLArg::CLArg()
239 {
240 memset(this, 0, sizeof *this);
241 }
244 CLProgram::CLProgram(const char *kname)
245 {
246 prog = 0;
247 kernel = 0;
248 this->kname = kname;
249 args.resize(16);
250 built = false;
252 wait_event = last_event = 0;
253 }
255 CLProgram::~CLProgram()
256 {
257 if(wait_event) {
258 clReleaseEvent(wait_event);
259 }
260 if(last_event) {
261 clWaitForEvents(1, &last_event);
262 clReleaseEvent(last_event);
263 }
265 if(prog) {
266 clReleaseProgram(prog);
267 }
268 if(kernel) {
269 clReleaseKernel(kernel);
270 }
271 for(size_t i=0; i<args.size(); i++) {
272 if(args[i].type == ARGTYPE_MEM_BUF) {
273 destroy_mem_buffer(args[i].v.mbuf);
274 }
275 }
276 }
278 bool CLProgram::load(const char *fname)
279 {
280 FILE *fp;
281 char *src;
282 struct stat st;
284 printf("loading opencl program (%s)\n", fname);
286 if(!(fp = fopen(fname, "rb"))) {
287 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
288 return false;
289 }
291 fstat(fileno(fp), &st);
293 src = new char[st.st_size + 1];
295 fread(src, 1, st.st_size, fp);
296 src[st.st_size] = 0;
297 fclose(fp);
300 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
301 fprintf(stderr, "error creating program object: %s\n", fname);
302 delete [] src;
303 return false;
304 }
305 delete [] src;
306 return true;
307 }
309 bool CLProgram::set_argi(int idx, int val)
310 {
311 if((int)args.size() <= idx) {
312 args.resize(idx + 1);
313 }
315 CLArg *arg = &args[idx];
316 arg->type = ARGTYPE_INT;
317 arg->v.ival = val;
318 return true;
319 }
321 bool CLProgram::set_argf(int idx, float val)
322 {
323 if((int)args.size() <= idx) {
324 args.resize(idx + 1);
325 }
327 CLArg *arg = &args[idx];
328 arg->type = ARGTYPE_FLOAT;
329 arg->v.fval = val;
330 return true;
331 }
333 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
334 {
335 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
336 CLMemBuffer *buf;
338 if(sz <= 0) {
339 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
340 return false;
341 }
342 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
343 return false;
344 }
346 if((int)args.size() <= idx) {
347 args.resize(idx + 1);
348 }
349 args[idx].type = ARGTYPE_MEM_BUF;
350 args[idx].v.mbuf = buf;
351 return true;
352 }
354 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
355 {
356 printf("create argument %d from texture %u\n", idx, tex);
357 CLMemBuffer *buf;
359 if(!(buf = create_mem_buffer(rdwr, tex))) {
360 return false;
361 }
363 if((int)args.size() <= idx) {
364 args.resize(idx + 1);
365 }
366 args[idx].type = ARGTYPE_MEM_BUF;
367 args[idx].v.mbuf = buf;
368 return true;
369 }
371 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
372 {
373 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
374 return 0;
375 }
376 return args[arg].v.mbuf;
377 }
379 int CLProgram::get_num_args() const
380 {
381 int num_args = 0;
382 for(size_t i=0; i<args.size(); i++) {
383 if(args[i].type != ARGTYPE_NONE) {
384 num_args++;
385 }
386 }
387 return num_args;
388 }
390 bool CLProgram::build()
391 {
392 int err;
394 if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
395 size_t sz;
396 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
398 char *errlog = (char*)alloca(sz + 1);
399 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
400 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
402 clReleaseProgram(prog);
403 prog = 0;
404 return false;
405 }
408 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
409 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
410 clReleaseProgram(prog);
411 prog = 0;
412 return false;
413 }
415 for(size_t i=0; i<args.size(); i++) {
416 int err;
418 if(args[i].type == ARGTYPE_NONE) {
419 break;
420 }
422 switch(args[i].type) {
423 case ARGTYPE_INT:
424 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
425 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
426 goto fail;
427 }
428 break;
430 case ARGTYPE_FLOAT:
431 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
432 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
433 goto fail;
434 }
435 break;
437 case ARGTYPE_MEM_BUF:
438 {
439 CLMemBuffer *mbuf = args[i].v.mbuf;
441 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
442 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
443 goto fail;
444 }
445 }
446 break;
448 default:
449 break;
450 }
451 }
453 built = true;
454 return true;
456 fail:
457 clReleaseProgram(prog);
458 clReleaseKernel(kernel);
459 prog = 0;
460 kernel = 0;
461 return false;
462 }
464 bool CLProgram::run() const
465 {
466 return run(1, 1);
467 }
469 bool CLProgram::run(int dim, ...) const
470 {
471 va_list ap;
472 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
474 va_start(ap, dim);
475 for(int i=0; i<dim; i++) {
476 global_size[i] = va_arg(ap, int);
477 }
478 va_end(ap);
480 if(last_event) {
481 clReleaseEvent(last_event);
482 }
484 int err;
485 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
486 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
487 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
488 return false;
489 }
491 if(wait_event) {
492 clReleaseEvent(wait_event);
493 wait_event = 0;
494 }
495 return true;
496 }
498 void CLProgram::set_wait_event(cl_event ev)
499 {
500 if(wait_event) {
501 clReleaseEvent(wait_event);
502 }
503 wait_event = ev;
504 }
506 cl_event CLProgram::get_last_event() const
507 {
508 return last_event;
509 }
511 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
512 {
513 unsigned int i, j, num_dev, num_plat, sel, ret;
514 cl_device_id dev[32];
515 cl_platform_id plat[32];
517 dev_inf->work_item_sizes = 0;
519 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
520 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
521 return -1;
522 }
523 if(!num_plat) {
524 fprintf(stderr, "OpenCL not available!\n");
525 return -1;
526 }
528 for(i=0; i<num_plat; i++) {
529 char buf[512];
531 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
532 printf("[%d]: %s", i, buf);
533 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
534 printf(", %s", buf);
535 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
536 printf(" (%s)\n", buf);
537 }
539 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
540 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
541 return -1;
542 }
543 printf("found %d cl devices.\n", num_dev);
545 for(i=0; i<num_dev; i++) {
546 struct device_info di;
548 if(get_dev_info(dev[i], &di) == -1) {
549 free(dev_inf->work_item_sizes);
550 return -1;
551 }
553 printf("--> device %u (%s)\n", i, devtypestr(di.type));
554 printf("max compute units: %u\n", di.units);
555 printf("max clock frequency: %u\n", di.clock);
556 printf("max work item dimensions: %u\n", di.dim);
558 printf("max work item sizes: ");
559 for(j=0; j<di.dim; j++) {
560 printf("%u", (unsigned int)di.work_item_sizes[j]);
561 if(di.dim - j > 1) {
562 printf(", ");
563 }
564 }
565 putchar('\n');
567 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
568 printf("max object allocation size: ");
569 print_memsize(stdout, di.mem_size);
570 putchar('\n');
572 if(devcmp(&di, dev_inf) > 0) {
573 free(dev_inf->work_item_sizes);
574 memcpy(dev_inf, &di, sizeof di);
575 sel = i;
576 }
577 }
579 if(num_dev) {
580 printf("\nusing device: %d\n", sel);
581 return 0;
582 }
584 return -1;
585 }
587 static int get_dev_info(cl_device_id dev, struct device_info *di)
588 {
589 di->id = dev;
592 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
593 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
594 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
595 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
597 di->work_item_sizes = new size_t[di->dim];
599 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
600 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
601 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
603 return 0;
604 }
606 static int devcmp(struct device_info *a, struct device_info *b)
607 {
608 unsigned int aval = a->units * a->clock;
609 unsigned int bval = b->units * b->clock;
611 return aval - bval;
612 }
614 static const char *devtypestr(cl_device_type type)
615 {
616 switch(type) {
617 case CL_DEVICE_TYPE_CPU:
618 return "cpu";
619 case CL_DEVICE_TYPE_GPU:
620 return "gpu";
621 case CL_DEVICE_TYPE_ACCELERATOR:
622 return "accelerator";
623 default:
624 break;
625 }
626 return "unknown";
627 }
629 static void print_memsize(FILE *out, unsigned long bytes)
630 {
631 int i;
632 unsigned long memsz = bytes;
633 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
635 for(i=0; suffix[i]; i++) {
636 if(memsz < 1024) {
637 fprintf(out, "%lu %s", memsz, suffix[i]);
638 if(i > 0) {
639 fprintf(out, " (%lu bytes)", bytes);
640 }
641 return;
642 }
644 memsz /= 1024;
645 }
646 }
648 static const char *clstrerror(int err)
649 {
650 if(err > 0) {
651 return "<invalid error code>";
652 }
653 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
654 return "<unknown error>";
655 }
656 return ocl_errstr[-err];
657 }