clray

view src/ocl.cc @ 43:f9eec11e5acc

shoehorned the kdtree into an opnecl image and improved performance slightly
author John Tsiombikas <nuclear@member.fsf.org>
date Sat, 28 Aug 2010 09:38:49 +0100
parents 1169f3d04135
children 8047637961a2
line source
1 #define OCL_CC_
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdarg.h>
7 #include <errno.h>
8 #include <assert.h>
9 #ifndef _MSC_VER
10 #include <alloca.h>
11 #else
12 #include <malloc.h>
13 #endif
14 #include <sys/stat.h>
15 #include "ocl.h"
16 #include "ogl.h"
17 #include "ocl_errstr.h"
19 #if defined(unix) || defined(__unix__)
20 #include <X11/Xlib.h>
21 #include <GL/glx.h>
22 #endif
24 #ifdef __APPLE__
25 #include <OpenGL/CGLCurrent.h>
26 #endif
29 struct device_info {
30 cl_device_id id;
31 cl_device_type type;
32 unsigned int units;
33 unsigned int clock;
35 unsigned int dim;
36 size_t *work_item_sizes;
37 size_t work_group_size;
39 unsigned long mem_size;
41 char *extensions;
42 bool gl_sharing;
43 };
45 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
46 static int get_dev_info(cl_device_id dev, struct device_info *di);
47 static void destroy_dev_info(struct device_info *di);
48 static int devcmp(struct device_info *a, struct device_info *b);
49 static const char *devtypestr(cl_device_type type);
50 static void print_memsize(FILE *out, unsigned long memsz);
51 static const char *clstrerror(int err);
54 static cl_context ctx;
55 static cl_command_queue cmdq;
56 static device_info devinf;
58 bool init_opencl()
59 {
60 if(select_device(&devinf, devcmp) == -1) {
61 return false;
62 }
66 #ifndef CLGL_INTEROP
67 cl_context_properties *prop = 0;
68 #else
70 #if defined(__APPLE__)
71 CGLContextObj glctx = CGLGetCurrentContext();
72 CGLShareGroupObj sgrp = CGLGetShareGroup(glctx);
74 cl_context_properties prop[] = {
75 #ifdef CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE
76 CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)sgrp,
77 #else
78 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
79 CL_CGL_SHAREGROUP_KHR, (cl_context_properties)sgrp,
80 #endif
81 0
82 };
83 #elif defined(unix) || defined(__unix__)
84 Display *dpy = glXGetCurrentDisplay();
85 GLXContext glctx = glXGetCurrentContext();
87 assert(dpy && glctx);
89 cl_context_properties prop[] = {
90 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
91 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
92 0
93 };
94 #elif defined(WIN32) || defined(__WIN32__)
95 HGLRC glctx = wglGetCurrentContext();
96 HDC dc = wglGetCurrentDC();
98 cl_context_properties prop[] = {
99 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
100 CL_WGL_HDC_KHR, (cl_context_properties)dc,
101 0
102 };
103 #else
104 #error "unknown or unsupported platform"
105 #endif
107 #endif /* CLGL_INTEROP */
109 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
110 fprintf(stderr, "failed to create opencl context\n");
111 return false;
112 }
114 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
115 fprintf(stderr, "failed to create command queue\n");
116 return false;
117 }
118 return true;
119 }
121 void destroy_opencl()
122 {
123 if(cmdq) {
124 clReleaseCommandQueue(cmdq);
125 cmdq = 0;
126 }
128 if(ctx) {
129 clReleaseContext(ctx);
130 ctx = 0;
131 }
132 }
135 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
136 {
137 int err;
138 cl_mem mem;
139 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
141 if(buf) {
142 flags |= CL_MEM_COPY_HOST_PTR;
143 }
146 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
147 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
148 return 0;
149 }
151 CLMemBuffer *mbuf = new CLMemBuffer;
152 mbuf->type = MEM_BUFFER;
153 mbuf->mem = mem;
154 mbuf->size = sz;
155 mbuf->xsz = mbuf->ysz = 0;
156 mbuf->ptr = 0;
157 mbuf->tex = 0;
158 return mbuf;
159 }
161 CLMemBuffer *create_image_buffer(int rdwr, int xsz, int ysz, const void *pixels)
162 {
163 int err, pitch;
164 cl_mem mem;
165 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
167 if(pixels) {
168 flags |= CL_MEM_COPY_HOST_PTR;
169 pitch = xsz * 4 * sizeof(float);
170 } else {
171 pitch = 0;
172 }
174 cl_image_format fmt = {CL_RGBA, CL_FLOAT};
176 if(!(mem = clCreateImage2D(ctx, flags, &fmt, xsz, ysz, pitch, (void*)pixels, &err))) {
177 fprintf(stderr, "failed to create %dx%d image: %s\n", xsz, ysz, clstrerror(err));
178 return 0;
179 }
181 CLMemBuffer *mbuf = new CLMemBuffer;
182 mbuf->type = IMAGE_BUFFER;
183 mbuf->mem = mem;
184 mbuf->size = ysz * pitch;
185 mbuf->xsz = xsz;
186 mbuf->ysz = ysz;
187 mbuf->ptr = 0;
188 mbuf->tex = 0;
189 return mbuf;
190 }
192 CLMemBuffer *create_image_buffer(int rdwr, unsigned int tex)
193 {
194 int err, xsz, ysz;
195 cl_mem mem;
197 glGetError(); // clear previous OpenGL errors
199 glPushAttrib(GL_TEXTURE_BIT);
200 glBindTexture(GL_TEXTURE_2D, tex);
201 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xsz);
202 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &ysz);
203 glPopAttrib();
205 if(glGetError()) {
206 fprintf(stderr, "create_image_buffer: GL error while retreiving texture parameters for texture %u\n", tex);
207 return 0;
208 }
210 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
211 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
212 return 0;
213 }
215 CLMemBuffer *mbuf = new CLMemBuffer;
216 mbuf->type = IMAGE_BUFFER;
217 mbuf->mem = mem;
218 mbuf->size = 0;
219 mbuf->xsz = xsz;
220 mbuf->ysz = ysz;
221 mbuf->ptr = 0;
222 mbuf->tex = tex;
224 return mbuf;
225 }
227 void destroy_mem_buffer(CLMemBuffer *mbuf)
228 {
229 if(mbuf) {
230 clReleaseMemObject(mbuf->mem);
231 delete mbuf;
232 }
233 }
235 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
236 {
237 if(!mbuf) return 0;
239 #ifndef NDEBUG
240 if(mbuf->ptr) {
241 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
242 }
243 #endif
245 int err;
247 if(mbuf->type == MEM_BUFFER) {
248 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
249 if(!mbuf->ptr) {
250 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
251 return 0;
252 }
253 } else {
254 assert(mbuf->type == IMAGE_BUFFER);
256 size_t orig[] = {0, 0, 0};
257 size_t rgn[] = {mbuf->xsz, mbuf->ysz, 1};
258 size_t pitch;
260 mbuf->ptr = clEnqueueMapImage(cmdq, mbuf->mem, 1, rdwr, orig, rgn, &pitch, 0, 0, 0, ev, &err);
261 if(!mbuf->ptr) {
262 fprintf(stderr, "failed to map image: %s\n", clstrerror(err));
263 return 0;
264 }
266 assert(pitch == mbuf->xsz * 4 * sizeof(float));
267 }
268 return mbuf->ptr;
269 }
271 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
272 {
273 if(!mbuf || !mbuf->ptr) return;
275 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
276 mbuf->ptr = 0;
277 }
279 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
280 {
281 if(!mbuf) return false;
283 int err;
284 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
285 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
286 return false;
287 }
288 return true;
289 }
291 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
292 {
293 if(!mbuf) return false;
295 int err;
296 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
297 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
298 return false;
299 }
300 return true;
301 }
304 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
305 {
306 if(!mbuf || !mbuf->tex) {
307 return false;
308 }
310 int err;
311 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
312 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
313 return false;
314 }
315 return true;
316 }
318 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
319 {
320 if(!mbuf || !mbuf->tex) {
321 return false;
322 }
324 int err;
325 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
326 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
327 return false;
328 }
329 return true;
330 }
333 CLArg::CLArg()
334 {
335 memset(this, 0, sizeof *this);
336 }
339 CLProgram::CLProgram(const char *kname)
340 {
341 prog = 0;
342 kernel = 0;
343 this->kname = kname;
344 args.resize(16);
345 built = false;
347 wait_event = last_event = 0;
348 }
350 CLProgram::~CLProgram()
351 {
352 if(wait_event) {
353 clReleaseEvent(wait_event);
354 }
355 if(last_event) {
356 clWaitForEvents(1, &last_event);
357 clReleaseEvent(last_event);
358 }
360 if(prog) {
361 clReleaseProgram(prog);
362 }
363 if(kernel) {
364 clReleaseKernel(kernel);
365 }
366 for(size_t i=0; i<args.size(); i++) {
367 if(args[i].type == ARGTYPE_MEM_BUF) {
368 destroy_mem_buffer(args[i].v.mbuf);
369 }
370 }
371 }
373 bool CLProgram::load(const char *fname)
374 {
375 FILE *fp;
376 char *src;
377 struct stat st;
379 printf("loading opencl program (%s)\n", fname);
381 if(!(fp = fopen(fname, "rb"))) {
382 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
383 return false;
384 }
386 fstat(fileno(fp), &st);
388 src = new char[st.st_size + 1];
390 fread(src, 1, st.st_size, fp);
391 src[st.st_size] = 0;
392 fclose(fp);
395 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
396 fprintf(stderr, "error creating program object: %s\n", fname);
397 delete [] src;
398 return false;
399 }
400 delete [] src;
401 return true;
402 }
404 bool CLProgram::set_argi(int idx, int val)
405 {
406 if((int)args.size() <= idx) {
407 args.resize(idx + 1);
408 }
410 CLArg *arg = &args[idx];
411 arg->type = ARGTYPE_INT;
412 arg->v.ival = val;
413 return true;
414 }
416 bool CLProgram::set_argf(int idx, float val)
417 {
418 if((int)args.size() <= idx) {
419 args.resize(idx + 1);
420 }
422 CLArg *arg = &args[idx];
423 arg->type = ARGTYPE_FLOAT;
424 arg->v.fval = val;
425 return true;
426 }
428 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
429 {
430 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
431 CLMemBuffer *buf;
433 if(sz <= 0) {
434 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
435 return false;
436 }
437 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
438 return false;
439 }
441 if((int)args.size() <= idx) {
442 args.resize(idx + 1);
443 }
444 args[idx].type = ARGTYPE_MEM_BUF;
445 args[idx].v.mbuf = buf;
446 return true;
447 }
449 bool CLProgram::set_arg_image(int idx, int rdwr, int xsz, int ysz, const void *pix)
450 {
451 printf("create argument %d from %dx%d image\n", idx, xsz, ysz);
452 CLMemBuffer *buf;
454 if(!(buf = create_image_buffer(rdwr, xsz, ysz, pix))) {
455 return false;
456 }
458 if((int)args.size() <= idx) {
459 args.resize(idx + 1);
460 }
461 args[idx].type = ARGTYPE_MEM_BUF;
462 args[idx].v.mbuf = buf;
463 return true;
464 }
466 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
467 {
468 printf("create argument %d from texture %u\n", idx, tex);
469 CLMemBuffer *buf;
471 if(!(buf = create_image_buffer(rdwr, tex))) {
472 return false;
473 }
475 if((int)args.size() <= idx) {
476 args.resize(idx + 1);
477 }
478 args[idx].type = ARGTYPE_MEM_BUF;
479 args[idx].v.mbuf = buf;
480 return true;
481 }
483 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
484 {
485 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
486 return 0;
487 }
488 return args[arg].v.mbuf;
489 }
491 int CLProgram::get_num_args() const
492 {
493 int num_args = 0;
494 for(size_t i=0; i<args.size(); i++) {
495 if(args[i].type != ARGTYPE_NONE) {
496 num_args++;
497 }
498 }
499 return num_args;
500 }
502 bool CLProgram::build()
503 {
504 int err;
506 const char *opt = "-cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
508 if((err = clBuildProgram(prog, 0, 0, opt, 0, 0)) != 0) {
509 size_t sz;
510 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
512 char *errlog = (char*)alloca(sz + 1);
513 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
514 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
516 clReleaseProgram(prog);
517 prog = 0;
518 return false;
519 }
522 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
523 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
524 clReleaseProgram(prog);
525 prog = 0;
526 return false;
527 }
529 for(size_t i=0; i<args.size(); i++) {
530 int err;
532 if(args[i].type == ARGTYPE_NONE) {
533 break;
534 }
536 switch(args[i].type) {
537 case ARGTYPE_INT:
538 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
539 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
540 goto fail;
541 }
542 break;
544 case ARGTYPE_FLOAT:
545 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
546 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
547 goto fail;
548 }
549 break;
551 case ARGTYPE_MEM_BUF:
552 {
553 CLMemBuffer *mbuf = args[i].v.mbuf;
555 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
556 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
557 goto fail;
558 }
559 }
560 break;
562 default:
563 break;
564 }
565 }
567 built = true;
568 return true;
570 fail:
571 clReleaseProgram(prog);
572 clReleaseKernel(kernel);
573 prog = 0;
574 kernel = 0;
575 return false;
576 }
578 bool CLProgram::run() const
579 {
580 return run(1, 1);
581 }
583 bool CLProgram::run(int dim, ...) const
584 {
585 va_list ap;
586 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
588 va_start(ap, dim);
589 for(int i=0; i<dim; i++) {
590 global_size[i] = va_arg(ap, int);
591 }
592 va_end(ap);
594 if(last_event) {
595 clReleaseEvent(last_event);
596 }
598 int err;
599 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
600 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
601 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
602 return false;
603 }
605 if(wait_event) {
606 clReleaseEvent(wait_event);
607 wait_event = 0;
608 }
609 return true;
610 }
612 void CLProgram::set_wait_event(cl_event ev)
613 {
614 if(wait_event) {
615 clReleaseEvent(wait_event);
616 }
617 wait_event = ev;
618 }
620 cl_event CLProgram::get_last_event() const
621 {
622 return last_event;
623 }
625 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
626 {
627 unsigned int i, j, num_dev, num_plat, sel, ret;
628 cl_device_id dev[32];
629 cl_platform_id plat[32];
631 dev_inf->work_item_sizes = 0;
633 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
634 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
635 return -1;
636 }
637 if(!num_plat) {
638 fprintf(stderr, "OpenCL not available!\n");
639 return -1;
640 }
642 for(i=0; i<num_plat; i++) {
643 char buf[512];
645 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
646 printf("[%d]: %s", i, buf);
647 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
648 printf(", %s", buf);
649 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
650 printf(" (%s)\n", buf);
651 }
653 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
654 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
655 return -1;
656 }
657 printf("found %d cl devices.\n", num_dev);
659 for(i=0; i<num_dev; i++) {
660 struct device_info di;
662 if(get_dev_info(dev[i], &di) == -1) {
663 destroy_dev_info(&di);
664 return -1;
665 }
667 printf("--> device %u (%s)\n", i, devtypestr(di.type));
668 printf("max compute units: %u\n", di.units);
669 printf("max clock frequency: %u\n", di.clock);
670 printf("max work item dimensions: %u\n", di.dim);
672 printf("max work item sizes: ");
673 for(j=0; j<di.dim; j++) {
674 printf("%u", (unsigned int)di.work_item_sizes[j]);
675 if(di.dim - j > 1) {
676 printf(", ");
677 }
678 }
679 putchar('\n');
681 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
682 printf("max object allocation size: ");
683 print_memsize(stdout, di.mem_size);
684 putchar('\n');
686 printf("extensions: %s\n", di.extensions);
688 if(devcmp(&di, dev_inf) > 0) {
689 free(dev_inf->work_item_sizes);
690 memcpy(dev_inf, &di, sizeof di);
691 sel = i;
692 }
693 }
695 if(num_dev) {
696 printf("\nusing device: %d\n", sel);
697 return 0;
698 }
700 return -1;
701 }
703 static int get_dev_info(cl_device_id dev, struct device_info *di)
704 {
705 di->id = dev;
707 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
708 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
709 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
710 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
712 di->work_item_sizes = new size_t[di->dim];
714 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
715 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
716 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
718 size_t ext_str_len;
719 clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, 0, &ext_str_len);
721 di->extensions = new char[ext_str_len + 1];
722 clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, ext_str_len, di->extensions, 0);
723 di->extensions[ext_str_len] = 0;
725 if(strstr(di->extensions, "cl_khr_gl_sharing") || strstr(di->extensions, "cl_APPLE_gl_sharing")) {
726 di->gl_sharing = true;
727 } else {
728 di->gl_sharing = false;
729 }
731 return 0;
732 }
734 static void destroy_dev_info(struct device_info *di)
735 {
736 delete [] di->work_item_sizes;
737 delete [] di->extensions;
738 }
740 static int devcmp(struct device_info *a, struct device_info *b)
741 {
742 unsigned int aval = a->units * a->clock;
743 unsigned int bval = b->units * b->clock;
745 return aval - bval;
746 }
748 static const char *devtypestr(cl_device_type type)
749 {
750 switch(type) {
751 case CL_DEVICE_TYPE_CPU:
752 return "cpu";
753 case CL_DEVICE_TYPE_GPU:
754 return "gpu";
755 case CL_DEVICE_TYPE_ACCELERATOR:
756 return "accelerator";
757 default:
758 break;
759 }
760 return "unknown";
761 }
763 static void print_memsize(FILE *out, unsigned long bytes)
764 {
765 int i;
766 unsigned long memsz = bytes;
767 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
769 for(i=0; suffix[i]; i++) {
770 if(memsz < 1024) {
771 fprintf(out, "%lu %s", memsz, suffix[i]);
772 if(i > 0) {
773 fprintf(out, " (%lu bytes)", bytes);
774 }
775 return;
776 }
778 memsz /= 1024;
779 }
780 }
782 static const char *clstrerror(int err)
783 {
784 if(err > 0) {
785 return "<invalid error code>";
786 }
787 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
788 return "<unknown error>";
789 }
790 return ocl_errstr[-err];
791 }