clray

view src/ocl.cc @ 41:057b8575a1c1

- changed the membuffer into an imagebuffer for the non-GL/CL-interop case - fixed the segfault
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 27 Aug 2010 20:39:55 +0100
parents 1bcbb53b3505
children 1169f3d04135
line source
1 #define OCL_CC_
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdarg.h>
7 #include <errno.h>
8 #include <assert.h>
9 #ifndef _MSC_VER
10 #include <alloca.h>
11 #else
12 #include <malloc.h>
13 #endif
14 #include <sys/stat.h>
15 #include "ocl.h"
16 #include "ogl.h"
17 #include "ocl_errstr.h"
19 #if defined(unix) || defined(__unix__)
20 #include <X11/Xlib.h>
21 #include <GL/glx.h>
22 #endif
25 struct device_info {
26 cl_device_id id;
27 cl_device_type type;
28 unsigned int units;
29 unsigned int clock;
31 unsigned int dim;
32 size_t *work_item_sizes;
33 size_t work_group_size;
35 unsigned long mem_size;
36 };
38 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
39 static int get_dev_info(cl_device_id dev, struct device_info *di);
40 static int devcmp(struct device_info *a, struct device_info *b);
41 static const char *devtypestr(cl_device_type type);
42 static void print_memsize(FILE *out, unsigned long memsz);
43 static const char *clstrerror(int err);
46 static cl_context ctx;
47 static cl_command_queue cmdq;
48 static device_info devinf;
50 bool init_opencl()
51 {
52 if(select_device(&devinf, devcmp) == -1) {
53 return false;
54 }
56 #ifndef CLGL_INTEROP
57 cl_context_properties *prop = 0;
59 #else
61 #if defined(__APPLE__)
62 #error "CL/GL context sharing not implemented on MacOSX yet"
63 #elif defined(unix) || defined(__unix__)
64 Display *dpy = glXGetCurrentDisplay();
65 GLXContext glctx = glXGetCurrentContext();
67 assert(dpy && glctx);
69 cl_context_properties prop[] = {
70 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
71 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
72 0
73 };
74 #elif defined(WIN32) || defined(__WIN32__)
75 #error "CL/GL context sharing not implemented on windows yet"
76 #else
77 #error "unknown or unsupported platform"
78 #endif
80 #endif /* CLGL_INTEROP */
82 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
83 fprintf(stderr, "failed to create opencl context\n");
84 return false;
85 }
87 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
88 fprintf(stderr, "failed to create command queue\n");
89 return false;
90 }
91 return true;
92 }
94 void destroy_opencl()
95 {
96 if(cmdq) {
97 clReleaseCommandQueue(cmdq);
98 cmdq = 0;
99 }
101 if(ctx) {
102 clReleaseContext(ctx);
103 ctx = 0;
104 }
105 }
108 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
109 {
110 int err;
111 cl_mem mem;
112 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
114 if(buf) {
115 flags |= CL_MEM_COPY_HOST_PTR;
116 }
119 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
120 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
121 return 0;
122 }
124 CLMemBuffer *mbuf = new CLMemBuffer;
125 mbuf->type = MEM_BUFFER;
126 mbuf->mem = mem;
127 mbuf->size = sz;
128 mbuf->xsz = mbuf->ysz = 0;
129 mbuf->ptr = 0;
130 mbuf->tex = 0;
131 return mbuf;
132 }
134 CLMemBuffer *create_image_buffer(int rdwr, int xsz, int ysz, const void *pixels)
135 {
136 int err, pitch;
137 cl_mem mem;
138 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
140 if(pixels) {
141 flags |= CL_MEM_COPY_HOST_PTR;
142 pitch = xsz * 4 * sizeof(float);
143 } else {
144 pitch = 0;
145 }
147 cl_image_format fmt = {CL_RGBA, CL_FLOAT};
149 if(!(mem = clCreateImage2D(ctx, flags, &fmt, xsz, ysz, pitch, (void*)pixels, &err))) {
150 fprintf(stderr, "failed to create %dx%d image: %s\n", xsz, ysz, clstrerror(err));
151 return 0;
152 }
154 CLMemBuffer *mbuf = new CLMemBuffer;
155 mbuf->type = IMAGE_BUFFER;
156 mbuf->mem = mem;
157 mbuf->size = ysz * pitch;
158 mbuf->xsz = xsz;
159 mbuf->ysz = ysz;
160 mbuf->ptr = 0;
161 mbuf->tex = 0;
162 return mbuf;
163 }
165 CLMemBuffer *create_image_buffer(int rdwr, unsigned int tex)
166 {
167 int err, xsz, ysz;
168 cl_mem mem;
170 glGetError(); // clear previous OpenGL errors
172 glPushAttrib(GL_TEXTURE_BIT);
173 glBindTexture(GL_TEXTURE_2D, tex);
174 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xsz);
175 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &ysz);
176 glPopAttrib();
178 if(glGetError()) {
179 fprintf(stderr, "create_image_buffer: GL error while retreiving texture parameters for texture %u\n", tex);
180 return 0;
181 }
183 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
184 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
185 return 0;
186 }
188 CLMemBuffer *mbuf = new CLMemBuffer;
189 mbuf->type = IMAGE_BUFFER;
190 mbuf->mem = mem;
191 mbuf->size = 0;
192 mbuf->xsz = xsz;
193 mbuf->ysz = ysz;
194 mbuf->ptr = 0;
195 mbuf->tex = tex;
197 return mbuf;
198 }
200 void destroy_mem_buffer(CLMemBuffer *mbuf)
201 {
202 if(mbuf) {
203 clReleaseMemObject(mbuf->mem);
204 delete mbuf;
205 }
206 }
208 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
209 {
210 if(!mbuf) return 0;
212 #ifndef NDEBUG
213 if(mbuf->ptr) {
214 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
215 }
216 #endif
218 int err;
220 if(mbuf->type == MEM_BUFFER) {
221 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
222 if(!mbuf->ptr) {
223 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
224 return 0;
225 }
226 } else {
227 assert(mbuf->type == IMAGE_BUFFER);
229 size_t orig[] = {0, 0, 0};
230 size_t rgn[] = {mbuf->xsz, mbuf->ysz, 1};
231 size_t pitch;
233 mbuf->ptr = clEnqueueMapImage(cmdq, mbuf->mem, 1, rdwr, orig, rgn, &pitch, 0, 0, 0, ev, &err);
234 if(!mbuf->ptr) {
235 fprintf(stderr, "failed to map image: %s\n", clstrerror(err));
236 return 0;
237 }
239 assert(pitch == mbuf->xsz * 4 * sizeof(float));
240 }
241 return mbuf->ptr;
242 }
244 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
245 {
246 if(!mbuf || !mbuf->ptr) return;
248 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
249 mbuf->ptr = 0;
250 }
252 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
253 {
254 if(!mbuf) return false;
256 int err;
257 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
258 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
259 return false;
260 }
261 return true;
262 }
264 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
265 {
266 if(!mbuf) return false;
268 int err;
269 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
270 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
271 return false;
272 }
273 return true;
274 }
277 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
278 {
279 if(!mbuf || !mbuf->tex) {
280 return false;
281 }
283 int err;
284 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
285 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
286 return false;
287 }
288 return true;
289 }
291 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
292 {
293 if(!mbuf || !mbuf->tex) {
294 return false;
295 }
297 int err;
298 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
299 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
300 return false;
301 }
302 return true;
303 }
306 CLArg::CLArg()
307 {
308 memset(this, 0, sizeof *this);
309 }
312 CLProgram::CLProgram(const char *kname)
313 {
314 prog = 0;
315 kernel = 0;
316 this->kname = kname;
317 args.resize(16);
318 built = false;
320 wait_event = last_event = 0;
321 }
323 CLProgram::~CLProgram()
324 {
325 if(wait_event) {
326 clReleaseEvent(wait_event);
327 }
328 if(last_event) {
329 clWaitForEvents(1, &last_event);
330 clReleaseEvent(last_event);
331 }
333 if(prog) {
334 clReleaseProgram(prog);
335 }
336 if(kernel) {
337 clReleaseKernel(kernel);
338 }
339 for(size_t i=0; i<args.size(); i++) {
340 if(args[i].type == ARGTYPE_MEM_BUF) {
341 destroy_mem_buffer(args[i].v.mbuf);
342 }
343 }
344 }
346 bool CLProgram::load(const char *fname)
347 {
348 FILE *fp;
349 char *src;
350 struct stat st;
352 printf("loading opencl program (%s)\n", fname);
354 if(!(fp = fopen(fname, "rb"))) {
355 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
356 return false;
357 }
359 fstat(fileno(fp), &st);
361 src = new char[st.st_size + 1];
363 fread(src, 1, st.st_size, fp);
364 src[st.st_size] = 0;
365 fclose(fp);
368 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
369 fprintf(stderr, "error creating program object: %s\n", fname);
370 delete [] src;
371 return false;
372 }
373 delete [] src;
374 return true;
375 }
377 bool CLProgram::set_argi(int idx, int val)
378 {
379 if((int)args.size() <= idx) {
380 args.resize(idx + 1);
381 }
383 CLArg *arg = &args[idx];
384 arg->type = ARGTYPE_INT;
385 arg->v.ival = val;
386 return true;
387 }
389 bool CLProgram::set_argf(int idx, float val)
390 {
391 if((int)args.size() <= idx) {
392 args.resize(idx + 1);
393 }
395 CLArg *arg = &args[idx];
396 arg->type = ARGTYPE_FLOAT;
397 arg->v.fval = val;
398 return true;
399 }
401 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
402 {
403 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
404 CLMemBuffer *buf;
406 if(sz <= 0) {
407 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
408 return false;
409 }
410 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
411 return false;
412 }
414 if((int)args.size() <= idx) {
415 args.resize(idx + 1);
416 }
417 args[idx].type = ARGTYPE_MEM_BUF;
418 args[idx].v.mbuf = buf;
419 return true;
420 }
422 bool CLProgram::set_arg_image(int idx, int rdwr, int xsz, int ysz, const void *pix)
423 {
424 printf("create argument %d from %dx%d image\n", idx, xsz, ysz);
425 CLMemBuffer *buf;
427 if(!(buf = create_image_buffer(rdwr, xsz, ysz, pix))) {
428 return false;
429 }
431 if((int)args.size() <= idx) {
432 args.resize(idx + 1);
433 }
434 args[idx].type = ARGTYPE_MEM_BUF;
435 args[idx].v.mbuf = buf;
436 return true;
437 }
439 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
440 {
441 printf("create argument %d from texture %u\n", idx, tex);
442 CLMemBuffer *buf;
444 if(!(buf = create_image_buffer(rdwr, tex))) {
445 return false;
446 }
448 if((int)args.size() <= idx) {
449 args.resize(idx + 1);
450 }
451 args[idx].type = ARGTYPE_MEM_BUF;
452 args[idx].v.mbuf = buf;
453 return true;
454 }
456 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
457 {
458 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
459 return 0;
460 }
461 return args[arg].v.mbuf;
462 }
464 int CLProgram::get_num_args() const
465 {
466 int num_args = 0;
467 for(size_t i=0; i<args.size(); i++) {
468 if(args[i].type != ARGTYPE_NONE) {
469 num_args++;
470 }
471 }
472 return num_args;
473 }
475 bool CLProgram::build()
476 {
477 int err;
479 if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
480 size_t sz;
481 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
483 char *errlog = (char*)alloca(sz + 1);
484 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
485 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
487 clReleaseProgram(prog);
488 prog = 0;
489 return false;
490 }
493 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
494 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
495 clReleaseProgram(prog);
496 prog = 0;
497 return false;
498 }
500 for(size_t i=0; i<args.size(); i++) {
501 int err;
503 if(args[i].type == ARGTYPE_NONE) {
504 break;
505 }
507 switch(args[i].type) {
508 case ARGTYPE_INT:
509 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
510 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
511 goto fail;
512 }
513 break;
515 case ARGTYPE_FLOAT:
516 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
517 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
518 goto fail;
519 }
520 break;
522 case ARGTYPE_MEM_BUF:
523 {
524 CLMemBuffer *mbuf = args[i].v.mbuf;
526 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
527 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
528 goto fail;
529 }
530 }
531 break;
533 default:
534 break;
535 }
536 }
538 built = true;
539 return true;
541 fail:
542 clReleaseProgram(prog);
543 clReleaseKernel(kernel);
544 prog = 0;
545 kernel = 0;
546 return false;
547 }
549 bool CLProgram::run() const
550 {
551 return run(1, 1);
552 }
554 bool CLProgram::run(int dim, ...) const
555 {
556 va_list ap;
557 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
559 va_start(ap, dim);
560 for(int i=0; i<dim; i++) {
561 global_size[i] = va_arg(ap, int);
562 }
563 va_end(ap);
565 if(last_event) {
566 clReleaseEvent(last_event);
567 }
569 int err;
570 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
571 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
572 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
573 return false;
574 }
576 if(wait_event) {
577 clReleaseEvent(wait_event);
578 wait_event = 0;
579 }
580 return true;
581 }
583 void CLProgram::set_wait_event(cl_event ev)
584 {
585 if(wait_event) {
586 clReleaseEvent(wait_event);
587 }
588 wait_event = ev;
589 }
591 cl_event CLProgram::get_last_event() const
592 {
593 return last_event;
594 }
596 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
597 {
598 unsigned int i, j, num_dev, num_plat, sel, ret;
599 cl_device_id dev[32];
600 cl_platform_id plat[32];
602 dev_inf->work_item_sizes = 0;
604 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
605 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
606 return -1;
607 }
608 if(!num_plat) {
609 fprintf(stderr, "OpenCL not available!\n");
610 return -1;
611 }
613 for(i=0; i<num_plat; i++) {
614 char buf[512];
616 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
617 printf("[%d]: %s", i, buf);
618 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
619 printf(", %s", buf);
620 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
621 printf(" (%s)\n", buf);
622 }
624 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
625 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
626 return -1;
627 }
628 printf("found %d cl devices.\n", num_dev);
630 for(i=0; i<num_dev; i++) {
631 struct device_info di;
633 if(get_dev_info(dev[i], &di) == -1) {
634 free(dev_inf->work_item_sizes);
635 return -1;
636 }
638 printf("--> device %u (%s)\n", i, devtypestr(di.type));
639 printf("max compute units: %u\n", di.units);
640 printf("max clock frequency: %u\n", di.clock);
641 printf("max work item dimensions: %u\n", di.dim);
643 printf("max work item sizes: ");
644 for(j=0; j<di.dim; j++) {
645 printf("%u", (unsigned int)di.work_item_sizes[j]);
646 if(di.dim - j > 1) {
647 printf(", ");
648 }
649 }
650 putchar('\n');
652 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
653 printf("max object allocation size: ");
654 print_memsize(stdout, di.mem_size);
655 putchar('\n');
657 if(devcmp(&di, dev_inf) > 0) {
658 free(dev_inf->work_item_sizes);
659 memcpy(dev_inf, &di, sizeof di);
660 sel = i;
661 }
662 }
664 if(num_dev) {
665 printf("\nusing device: %d\n", sel);
666 return 0;
667 }
669 return -1;
670 }
672 static int get_dev_info(cl_device_id dev, struct device_info *di)
673 {
674 di->id = dev;
677 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
678 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
679 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
680 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
682 di->work_item_sizes = new size_t[di->dim];
684 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
685 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
686 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
688 return 0;
689 }
691 static int devcmp(struct device_info *a, struct device_info *b)
692 {
693 unsigned int aval = a->units * a->clock;
694 unsigned int bval = b->units * b->clock;
696 return aval - bval;
697 }
699 static const char *devtypestr(cl_device_type type)
700 {
701 switch(type) {
702 case CL_DEVICE_TYPE_CPU:
703 return "cpu";
704 case CL_DEVICE_TYPE_GPU:
705 return "gpu";
706 case CL_DEVICE_TYPE_ACCELERATOR:
707 return "accelerator";
708 default:
709 break;
710 }
711 return "unknown";
712 }
714 static void print_memsize(FILE *out, unsigned long bytes)
715 {
716 int i;
717 unsigned long memsz = bytes;
718 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
720 for(i=0; suffix[i]; i++) {
721 if(memsz < 1024) {
722 fprintf(out, "%lu %s", memsz, suffix[i]);
723 if(i > 0) {
724 fprintf(out, " (%lu bytes)", bytes);
725 }
726 return;
727 }
729 memsz /= 1024;
730 }
731 }
733 static const char *clstrerror(int err)
734 {
735 if(err > 0) {
736 return "<invalid error code>";
737 }
738 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
739 return "<unknown error>";
740 }
741 return ocl_errstr[-err];
742 }