clray

annotate src/ocl.cc @ 40:1bcbb53b3505

segfault on exit?
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 27 Aug 2010 19:00:14 +0100
parents 980bc07be868
children 057b8575a1c1
rev   line source
nuclear@8 1 #define OCL_CC_
nuclear@8 2
nuclear@0 3 #include <stdio.h>
nuclear@0 4 #include <stdlib.h>
nuclear@0 5 #include <string.h>
nuclear@8 6 #include <stdarg.h>
nuclear@0 7 #include <errno.h>
nuclear@39 8 #include <assert.h>
John@11 9 #ifndef _MSC_VER
nuclear@0 10 #include <alloca.h>
John@11 11 #else
John@11 12 #include <malloc.h>
John@11 13 #endif
nuclear@0 14 #include <sys/stat.h>
nuclear@0 15 #include "ocl.h"
nuclear@39 16 #include "ogl.h"
nuclear@8 17 #include "ocl_errstr.h"
nuclear@0 18
nuclear@39 19 #if defined(unix) || defined(__unix__)
nuclear@39 20 #include <X11/Xlib.h>
nuclear@39 21 #include <GL/glx.h>
nuclear@39 22 #endif
nuclear@0 23
nuclear@0 24
nuclear@0 25 struct device_info {
nuclear@0 26 cl_device_id id;
nuclear@0 27 cl_device_type type;
nuclear@0 28 unsigned int units;
nuclear@0 29 unsigned int clock;
nuclear@0 30
nuclear@0 31 unsigned int dim;
nuclear@0 32 size_t *work_item_sizes;
nuclear@0 33 size_t work_group_size;
nuclear@0 34
nuclear@0 35 unsigned long mem_size;
nuclear@0 36 };
nuclear@0 37
nuclear@0 38 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
nuclear@0 39 static int get_dev_info(cl_device_id dev, struct device_info *di);
nuclear@0 40 static int devcmp(struct device_info *a, struct device_info *b);
nuclear@0 41 static const char *devtypestr(cl_device_type type);
nuclear@0 42 static void print_memsize(FILE *out, unsigned long memsz);
nuclear@8 43 static const char *clstrerror(int err);
nuclear@0 44
nuclear@0 45
nuclear@0 46 static cl_context ctx;
nuclear@0 47 static cl_command_queue cmdq;
nuclear@0 48 static device_info devinf;
nuclear@0 49
nuclear@39 50 bool init_opencl()
nuclear@0 51 {
nuclear@0 52 if(select_device(&devinf, devcmp) == -1) {
nuclear@0 53 return false;
nuclear@0 54 }
nuclear@0 55
nuclear@40 56 #ifndef CLGL_INTEROP
nuclear@40 57 cl_context_properties *prop = 0;
nuclear@40 58
nuclear@40 59 #else
nuclear@40 60
nuclear@39 61 #if defined(__APPLE__)
nuclear@39 62 #error "CL/GL context sharing not implemented on MacOSX yet"
nuclear@39 63 #elif defined(unix) || defined(__unix__)
nuclear@39 64 Display *dpy = glXGetCurrentDisplay();
nuclear@39 65 GLXContext glctx = glXGetCurrentContext();
nuclear@0 66
nuclear@39 67 assert(dpy && glctx);
nuclear@39 68
nuclear@39 69 cl_context_properties prop[] = {
nuclear@39 70 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
nuclear@39 71 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
nuclear@39 72 0
nuclear@39 73 };
nuclear@39 74 #elif defined(WIN32) || defined(__WIN32__)
nuclear@39 75 #error "CL/GL context sharing not implemented on windows yet"
nuclear@39 76 #else
nuclear@39 77 #error "unknown or unsupported platform"
nuclear@39 78 #endif
nuclear@39 79
nuclear@40 80 #endif /* CLGL_INTEROP */
nuclear@40 81
nuclear@39 82 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
nuclear@0 83 fprintf(stderr, "failed to create opencl context\n");
nuclear@0 84 return false;
nuclear@0 85 }
nuclear@0 86
nuclear@0 87 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
nuclear@0 88 fprintf(stderr, "failed to create command queue\n");
nuclear@0 89 return false;
nuclear@0 90 }
nuclear@0 91 return true;
nuclear@0 92 }
nuclear@0 93
nuclear@40 94 void destroy_opencl()
nuclear@40 95 {
nuclear@40 96 if(cmdq) {
nuclear@40 97 clReleaseCommandQueue(cmdq);
nuclear@40 98 cmdq = 0;
nuclear@40 99 }
nuclear@40 100
nuclear@40 101 if(ctx) {
nuclear@40 102 clReleaseContext(ctx);
nuclear@40 103 ctx = 0;
nuclear@40 104 }
nuclear@40 105 }
nuclear@40 106
nuclear@0 107
nuclear@28 108 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
nuclear@0 109 {
nuclear@0 110 int err;
nuclear@0 111 cl_mem mem;
nuclear@12 112 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
nuclear@0 113
nuclear@12 114 if(buf) {
nuclear@12 115 flags |= CL_MEM_COPY_HOST_PTR;
nuclear@12 116 }
nuclear@0 117
nuclear@12 118
nuclear@28 119 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
nuclear@8 120 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
nuclear@0 121 return 0;
nuclear@0 122 }
nuclear@0 123
nuclear@0 124 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@0 125 mbuf->mem = mem;
nuclear@0 126 mbuf->size = sz;
nuclear@12 127 mbuf->ptr = 0;
nuclear@39 128 mbuf->tex = 0;
nuclear@39 129 return mbuf;
nuclear@39 130 }
nuclear@39 131
nuclear@39 132 CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex)
nuclear@39 133 {
nuclear@39 134 int err;
nuclear@39 135 cl_mem mem;
nuclear@39 136
nuclear@39 137 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
nuclear@39 138 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
nuclear@39 139 return 0;
nuclear@39 140 }
nuclear@39 141
nuclear@39 142 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@39 143 mbuf->mem = mem;
nuclear@39 144 mbuf->size = 0;
nuclear@39 145 mbuf->ptr = 0;
nuclear@39 146 mbuf->tex = tex;
nuclear@0 147 return mbuf;
nuclear@0 148 }
nuclear@0 149
nuclear@0 150 void destroy_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 151 {
nuclear@0 152 if(mbuf) {
nuclear@0 153 clReleaseMemObject(mbuf->mem);
nuclear@0 154 delete mbuf;
nuclear@0 155 }
nuclear@0 156 }
nuclear@0 157
nuclear@39 158 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
nuclear@0 159 {
nuclear@0 160 if(!mbuf) return 0;
nuclear@0 161
nuclear@12 162 #ifndef NDEBUG
nuclear@12 163 if(mbuf->ptr) {
nuclear@12 164 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
nuclear@12 165 }
nuclear@12 166 #endif
nuclear@12 167
nuclear@0 168 int err;
nuclear@39 169 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
nuclear@0 170 if(!mbuf->ptr) {
nuclear@8 171 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
nuclear@0 172 return 0;
nuclear@0 173 }
nuclear@0 174 return mbuf->ptr;
nuclear@0 175 }
nuclear@0 176
nuclear@39 177 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
nuclear@0 178 {
nuclear@0 179 if(!mbuf || !mbuf->ptr) return;
nuclear@39 180 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
nuclear@12 181 mbuf->ptr = 0;
nuclear@0 182 }
nuclear@0 183
nuclear@39 184 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
nuclear@0 185 {
nuclear@0 186 if(!mbuf) return false;
nuclear@0 187
nuclear@0 188 int err;
nuclear@39 189 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
nuclear@8 190 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
nuclear@0 191 return false;
nuclear@0 192 }
nuclear@0 193 return true;
nuclear@0 194 }
nuclear@0 195
nuclear@39 196 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
nuclear@0 197 {
nuclear@0 198 if(!mbuf) return false;
nuclear@0 199
nuclear@0 200 int err;
nuclear@39 201 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
nuclear@8 202 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
nuclear@0 203 return false;
nuclear@0 204 }
nuclear@0 205 return true;
nuclear@0 206 }
nuclear@0 207
nuclear@0 208
nuclear@39 209 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
nuclear@39 210 {
nuclear@39 211 if(!mbuf || !mbuf->tex) {
nuclear@39 212 return false;
nuclear@39 213 }
nuclear@39 214
nuclear@39 215 int err;
nuclear@39 216 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
nuclear@39 217 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
nuclear@39 218 return false;
nuclear@39 219 }
nuclear@39 220 return true;
nuclear@39 221 }
nuclear@39 222
nuclear@39 223 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
nuclear@39 224 {
nuclear@39 225 if(!mbuf || !mbuf->tex) {
nuclear@39 226 return false;
nuclear@39 227 }
nuclear@39 228
nuclear@39 229 int err;
nuclear@39 230 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
nuclear@39 231 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
nuclear@39 232 return false;
nuclear@39 233 }
nuclear@39 234 return true;
nuclear@39 235 }
nuclear@39 236
nuclear@39 237
John@14 238 CLArg::CLArg()
John@14 239 {
John@14 240 memset(this, 0, sizeof *this);
John@14 241 }
John@14 242
John@14 243
nuclear@0 244 CLProgram::CLProgram(const char *kname)
nuclear@0 245 {
nuclear@0 246 prog = 0;
nuclear@0 247 kernel = 0;
nuclear@0 248 this->kname = kname;
nuclear@1 249 args.resize(16);
nuclear@0 250 built = false;
nuclear@39 251
nuclear@39 252 wait_event = last_event = 0;
nuclear@0 253 }
nuclear@0 254
nuclear@0 255 CLProgram::~CLProgram()
nuclear@0 256 {
nuclear@39 257 if(wait_event) {
nuclear@39 258 clReleaseEvent(wait_event);
nuclear@39 259 }
nuclear@39 260 if(last_event) {
nuclear@40 261 clWaitForEvents(1, &last_event);
nuclear@39 262 clReleaseEvent(last_event);
nuclear@39 263 }
nuclear@39 264
nuclear@0 265 if(prog) {
nuclear@0 266 clReleaseProgram(prog);
nuclear@0 267 }
nuclear@0 268 if(kernel) {
nuclear@0 269 clReleaseKernel(kernel);
nuclear@0 270 }
nuclear@1 271 for(size_t i=0; i<args.size(); i++) {
nuclear@1 272 if(args[i].type == ARGTYPE_MEM_BUF) {
nuclear@1 273 destroy_mem_buffer(args[i].v.mbuf);
nuclear@0 274 }
nuclear@0 275 }
nuclear@0 276 }
nuclear@0 277
nuclear@0 278 bool CLProgram::load(const char *fname)
nuclear@0 279 {
nuclear@0 280 FILE *fp;
nuclear@0 281 char *src;
nuclear@0 282 struct stat st;
nuclear@0 283
nuclear@0 284 printf("loading opencl program (%s)\n", fname);
nuclear@0 285
nuclear@0 286 if(!(fp = fopen(fname, "rb"))) {
nuclear@0 287 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
nuclear@0 288 return false;
nuclear@0 289 }
nuclear@0 290
nuclear@0 291 fstat(fileno(fp), &st);
nuclear@0 292
nuclear@0 293 src = new char[st.st_size + 1];
nuclear@0 294
nuclear@0 295 fread(src, 1, st.st_size, fp);
nuclear@0 296 src[st.st_size] = 0;
nuclear@0 297 fclose(fp);
nuclear@0 298
nuclear@0 299
nuclear@0 300 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
nuclear@0 301 fprintf(stderr, "error creating program object: %s\n", fname);
nuclear@0 302 delete [] src;
nuclear@0 303 return false;
nuclear@0 304 }
nuclear@0 305 delete [] src;
nuclear@0 306 return true;
nuclear@0 307 }
nuclear@0 308
nuclear@1 309 bool CLProgram::set_argi(int idx, int val)
nuclear@1 310 {
nuclear@1 311 if((int)args.size() <= idx) {
nuclear@1 312 args.resize(idx + 1);
nuclear@1 313 }
nuclear@1 314
nuclear@1 315 CLArg *arg = &args[idx];
nuclear@1 316 arg->type = ARGTYPE_INT;
nuclear@1 317 arg->v.ival = val;
nuclear@1 318 return true;
nuclear@1 319 }
nuclear@1 320
nuclear@1 321 bool CLProgram::set_argf(int idx, float val)
nuclear@1 322 {
nuclear@1 323 if((int)args.size() <= idx) {
nuclear@1 324 args.resize(idx + 1);
nuclear@1 325 }
nuclear@1 326
nuclear@1 327 CLArg *arg = &args[idx];
nuclear@1 328 arg->type = ARGTYPE_FLOAT;
nuclear@1 329 arg->v.fval = val;
nuclear@1 330 return true;
nuclear@1 331 }
nuclear@1 332
nuclear@28 333 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
nuclear@0 334 {
nuclear@13 335 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
nuclear@0 336 CLMemBuffer *buf;
nuclear@0 337
nuclear@39 338 if(sz <= 0) {
nuclear@39 339 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
nuclear@39 340 return false;
nuclear@39 341 }
nuclear@39 342 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
nuclear@39 343 return false;
nuclear@39 344 }
nuclear@39 345
nuclear@39 346 if((int)args.size() <= idx) {
nuclear@39 347 args.resize(idx + 1);
nuclear@39 348 }
nuclear@39 349 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@39 350 args[idx].v.mbuf = buf;
nuclear@39 351 return true;
nuclear@39 352 }
nuclear@39 353
nuclear@39 354 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
nuclear@39 355 {
nuclear@39 356 printf("create argument %d from texture %u\n", idx, tex);
nuclear@39 357 CLMemBuffer *buf;
nuclear@39 358
nuclear@39 359 if(!(buf = create_mem_buffer(rdwr, tex))) {
nuclear@0 360 return false;
nuclear@0 361 }
nuclear@0 362
nuclear@1 363 if((int)args.size() <= idx) {
nuclear@1 364 args.resize(idx + 1);
nuclear@0 365 }
nuclear@1 366 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@1 367 args[idx].v.mbuf = buf;
nuclear@0 368 return true;
nuclear@0 369 }
nuclear@0 370
nuclear@0 371 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
nuclear@0 372 {
nuclear@1 373 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
nuclear@0 374 return 0;
nuclear@0 375 }
nuclear@1 376 return args[arg].v.mbuf;
nuclear@0 377 }
nuclear@0 378
John@14 379 int CLProgram::get_num_args() const
John@14 380 {
John@14 381 int num_args = 0;
John@14 382 for(size_t i=0; i<args.size(); i++) {
John@14 383 if(args[i].type != ARGTYPE_NONE) {
John@14 384 num_args++;
John@14 385 }
John@14 386 }
John@14 387 return num_args;
John@14 388 }
John@14 389
nuclear@0 390 bool CLProgram::build()
nuclear@0 391 {
nuclear@2 392 int err;
nuclear@0 393
nuclear@39 394 if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
nuclear@2 395 size_t sz;
nuclear@2 396 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
nuclear@0 397
nuclear@2 398 char *errlog = (char*)alloca(sz + 1);
nuclear@2 399 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
nuclear@8 400 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
nuclear@2 401
nuclear@0 402 clReleaseProgram(prog);
nuclear@0 403 prog = 0;
nuclear@0 404 return false;
nuclear@0 405 }
nuclear@0 406
nuclear@0 407
nuclear@0 408 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
nuclear@0 409 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
nuclear@0 410 clReleaseProgram(prog);
nuclear@0 411 prog = 0;
nuclear@0 412 return false;
nuclear@0 413 }
nuclear@0 414
nuclear@1 415 for(size_t i=0; i<args.size(); i++) {
nuclear@1 416 int err;
nuclear@0 417
nuclear@1 418 if(args[i].type == ARGTYPE_NONE) {
nuclear@1 419 break;
nuclear@1 420 }
nuclear@1 421
nuclear@1 422 switch(args[i].type) {
nuclear@1 423 case ARGTYPE_INT:
nuclear@1 424 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
nuclear@8 425 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 426 goto fail;
nuclear@1 427 }
nuclear@1 428 break;
nuclear@1 429
nuclear@1 430 case ARGTYPE_FLOAT:
nuclear@1 431 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
nuclear@8 432 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 433 goto fail;
nuclear@1 434 }
nuclear@1 435 break;
nuclear@1 436
nuclear@1 437 case ARGTYPE_MEM_BUF:
nuclear@1 438 {
nuclear@1 439 CLMemBuffer *mbuf = args[i].v.mbuf;
nuclear@1 440
nuclear@1 441 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
nuclear@8 442 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 443 goto fail;
nuclear@1 444 }
nuclear@1 445 }
nuclear@1 446 break;
nuclear@1 447
nuclear@1 448 default:
nuclear@1 449 break;
nuclear@0 450 }
nuclear@0 451 }
nuclear@0 452
nuclear@0 453 built = true;
nuclear@0 454 return true;
nuclear@1 455
nuclear@1 456 fail:
nuclear@1 457 clReleaseProgram(prog);
nuclear@1 458 clReleaseKernel(kernel);
nuclear@1 459 prog = 0;
nuclear@1 460 kernel = 0;
nuclear@1 461 return false;
nuclear@0 462 }
nuclear@0 463
nuclear@0 464 bool CLProgram::run() const
nuclear@0 465 {
nuclear@0 466 return run(1, 1);
nuclear@0 467 }
nuclear@0 468
nuclear@0 469 bool CLProgram::run(int dim, ...) const
nuclear@0 470 {
nuclear@0 471 va_list ap;
nuclear@0 472 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
nuclear@0 473
nuclear@0 474 va_start(ap, dim);
nuclear@0 475 for(int i=0; i<dim; i++) {
nuclear@0 476 global_size[i] = va_arg(ap, int);
nuclear@0 477 }
nuclear@0 478 va_end(ap);
nuclear@0 479
nuclear@39 480 if(last_event) {
nuclear@39 481 clReleaseEvent(last_event);
nuclear@39 482 }
nuclear@39 483
nuclear@0 484 int err;
nuclear@39 485 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
nuclear@39 486 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
nuclear@8 487 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
nuclear@0 488 return false;
nuclear@0 489 }
nuclear@32 490
nuclear@39 491 if(wait_event) {
nuclear@39 492 clReleaseEvent(wait_event);
nuclear@39 493 wait_event = 0;
nuclear@39 494 }
nuclear@0 495 return true;
nuclear@0 496 }
nuclear@0 497
nuclear@39 498 void CLProgram::set_wait_event(cl_event ev)
nuclear@39 499 {
nuclear@39 500 if(wait_event) {
nuclear@39 501 clReleaseEvent(wait_event);
nuclear@39 502 }
nuclear@39 503 wait_event = ev;
nuclear@39 504 }
nuclear@39 505
nuclear@39 506 cl_event CLProgram::get_last_event() const
nuclear@39 507 {
nuclear@39 508 return last_event;
nuclear@39 509 }
nuclear@39 510
nuclear@0 511 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
nuclear@0 512 {
nuclear@8 513 unsigned int i, j, num_dev, num_plat, sel, ret;
nuclear@0 514 cl_device_id dev[32];
nuclear@8 515 cl_platform_id plat[32];
nuclear@0 516
nuclear@0 517 dev_inf->work_item_sizes = 0;
nuclear@0 518
nuclear@8 519 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
nuclear@8 520 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
nuclear@8 521 return -1;
nuclear@8 522 }
nuclear@8 523 if(!num_plat) {
nuclear@8 524 fprintf(stderr, "OpenCL not available!\n");
nuclear@8 525 return -1;
nuclear@8 526 }
nuclear@0 527
nuclear@8 528 for(i=0; i<num_plat; i++) {
nuclear@8 529 char buf[512];
nuclear@8 530
nuclear@8 531 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
nuclear@8 532 printf("[%d]: %s", i, buf);
nuclear@8 533 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
nuclear@8 534 printf(", %s", buf);
nuclear@8 535 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
nuclear@8 536 printf(" (%s)\n", buf);
nuclear@8 537 }
nuclear@8 538
nuclear@8 539 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
nuclear@8 540 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
nuclear@8 541 return -1;
nuclear@8 542 }
nuclear@0 543 printf("found %d cl devices.\n", num_dev);
nuclear@0 544
nuclear@0 545 for(i=0; i<num_dev; i++) {
nuclear@0 546 struct device_info di;
nuclear@0 547
nuclear@0 548 if(get_dev_info(dev[i], &di) == -1) {
nuclear@0 549 free(dev_inf->work_item_sizes);
nuclear@0 550 return -1;
nuclear@0 551 }
nuclear@0 552
nuclear@0 553 printf("--> device %u (%s)\n", i, devtypestr(di.type));
nuclear@0 554 printf("max compute units: %u\n", di.units);
nuclear@0 555 printf("max clock frequency: %u\n", di.clock);
nuclear@0 556 printf("max work item dimensions: %u\n", di.dim);
nuclear@0 557
nuclear@0 558 printf("max work item sizes: ");
nuclear@0 559 for(j=0; j<di.dim; j++) {
nuclear@0 560 printf("%u", (unsigned int)di.work_item_sizes[j]);
nuclear@0 561 if(di.dim - j > 1) {
nuclear@0 562 printf(", ");
nuclear@0 563 }
nuclear@0 564 }
nuclear@0 565 putchar('\n');
nuclear@0 566
nuclear@0 567 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
nuclear@0 568 printf("max object allocation size: ");
nuclear@0 569 print_memsize(stdout, di.mem_size);
nuclear@0 570 putchar('\n');
nuclear@0 571
nuclear@0 572 if(devcmp(&di, dev_inf) > 0) {
nuclear@0 573 free(dev_inf->work_item_sizes);
nuclear@0 574 memcpy(dev_inf, &di, sizeof di);
nuclear@0 575 sel = i;
nuclear@0 576 }
nuclear@0 577 }
nuclear@0 578
nuclear@0 579 if(num_dev) {
nuclear@0 580 printf("\nusing device: %d\n", sel);
nuclear@0 581 return 0;
nuclear@0 582 }
nuclear@0 583
nuclear@0 584 return -1;
nuclear@0 585 }
nuclear@0 586
nuclear@0 587 static int get_dev_info(cl_device_id dev, struct device_info *di)
nuclear@0 588 {
nuclear@0 589 di->id = dev;
nuclear@0 590
nuclear@0 591
nuclear@0 592 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
nuclear@0 593 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
nuclear@0 594 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
nuclear@0 595 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
nuclear@0 596
nuclear@0 597 di->work_item_sizes = new size_t[di->dim];
nuclear@0 598
nuclear@0 599 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
nuclear@0 600 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
nuclear@0 601 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
nuclear@0 602
nuclear@0 603 return 0;
nuclear@0 604 }
nuclear@0 605
nuclear@0 606 static int devcmp(struct device_info *a, struct device_info *b)
nuclear@0 607 {
nuclear@0 608 unsigned int aval = a->units * a->clock;
nuclear@0 609 unsigned int bval = b->units * b->clock;
nuclear@0 610
nuclear@0 611 return aval - bval;
nuclear@0 612 }
nuclear@0 613
nuclear@0 614 static const char *devtypestr(cl_device_type type)
nuclear@0 615 {
nuclear@0 616 switch(type) {
nuclear@0 617 case CL_DEVICE_TYPE_CPU:
nuclear@0 618 return "cpu";
nuclear@0 619 case CL_DEVICE_TYPE_GPU:
nuclear@0 620 return "gpu";
nuclear@0 621 case CL_DEVICE_TYPE_ACCELERATOR:
nuclear@0 622 return "accelerator";
nuclear@0 623 default:
nuclear@0 624 break;
nuclear@0 625 }
nuclear@0 626 return "unknown";
nuclear@0 627 }
nuclear@0 628
nuclear@0 629 static void print_memsize(FILE *out, unsigned long bytes)
nuclear@0 630 {
nuclear@0 631 int i;
nuclear@0 632 unsigned long memsz = bytes;
nuclear@0 633 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
nuclear@0 634
nuclear@0 635 for(i=0; suffix[i]; i++) {
nuclear@0 636 if(memsz < 1024) {
nuclear@0 637 fprintf(out, "%lu %s", memsz, suffix[i]);
nuclear@0 638 if(i > 0) {
nuclear@0 639 fprintf(out, " (%lu bytes)", bytes);
nuclear@0 640 }
nuclear@0 641 return;
nuclear@0 642 }
nuclear@0 643
nuclear@0 644 memsz /= 1024;
nuclear@0 645 }
nuclear@0 646 }
nuclear@8 647
nuclear@8 648 static const char *clstrerror(int err)
nuclear@8 649 {
nuclear@8 650 if(err > 0) {
nuclear@8 651 return "<invalid error code>";
nuclear@8 652 }
nuclear@8 653 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
nuclear@8 654 return "<unknown error>";
nuclear@8 655 }
nuclear@8 656 return ocl_errstr[-err];
nuclear@8 657 }