clray

annotate src/ocl.cc @ 41:057b8575a1c1

- changed the membuffer into an imagebuffer for the non-GL/CL-interop case - fixed the segfault
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 27 Aug 2010 20:39:55 +0100
parents 1bcbb53b3505
children 1169f3d04135
rev   line source
nuclear@8 1 #define OCL_CC_
nuclear@8 2
nuclear@0 3 #include <stdio.h>
nuclear@0 4 #include <stdlib.h>
nuclear@0 5 #include <string.h>
nuclear@8 6 #include <stdarg.h>
nuclear@0 7 #include <errno.h>
nuclear@39 8 #include <assert.h>
John@11 9 #ifndef _MSC_VER
nuclear@0 10 #include <alloca.h>
John@11 11 #else
John@11 12 #include <malloc.h>
John@11 13 #endif
nuclear@0 14 #include <sys/stat.h>
nuclear@0 15 #include "ocl.h"
nuclear@39 16 #include "ogl.h"
nuclear@8 17 #include "ocl_errstr.h"
nuclear@0 18
nuclear@39 19 #if defined(unix) || defined(__unix__)
nuclear@39 20 #include <X11/Xlib.h>
nuclear@39 21 #include <GL/glx.h>
nuclear@39 22 #endif
nuclear@0 23
nuclear@0 24
nuclear@0 25 struct device_info {
nuclear@0 26 cl_device_id id;
nuclear@0 27 cl_device_type type;
nuclear@0 28 unsigned int units;
nuclear@0 29 unsigned int clock;
nuclear@0 30
nuclear@0 31 unsigned int dim;
nuclear@0 32 size_t *work_item_sizes;
nuclear@0 33 size_t work_group_size;
nuclear@0 34
nuclear@0 35 unsigned long mem_size;
nuclear@0 36 };
nuclear@0 37
nuclear@0 38 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
nuclear@0 39 static int get_dev_info(cl_device_id dev, struct device_info *di);
nuclear@0 40 static int devcmp(struct device_info *a, struct device_info *b);
nuclear@0 41 static const char *devtypestr(cl_device_type type);
nuclear@0 42 static void print_memsize(FILE *out, unsigned long memsz);
nuclear@8 43 static const char *clstrerror(int err);
nuclear@0 44
nuclear@0 45
nuclear@0 46 static cl_context ctx;
nuclear@0 47 static cl_command_queue cmdq;
nuclear@0 48 static device_info devinf;
nuclear@0 49
nuclear@39 50 bool init_opencl()
nuclear@0 51 {
nuclear@0 52 if(select_device(&devinf, devcmp) == -1) {
nuclear@0 53 return false;
nuclear@0 54 }
nuclear@0 55
nuclear@40 56 #ifndef CLGL_INTEROP
nuclear@40 57 cl_context_properties *prop = 0;
nuclear@40 58
nuclear@40 59 #else
nuclear@40 60
nuclear@39 61 #if defined(__APPLE__)
nuclear@39 62 #error "CL/GL context sharing not implemented on MacOSX yet"
nuclear@39 63 #elif defined(unix) || defined(__unix__)
nuclear@39 64 Display *dpy = glXGetCurrentDisplay();
nuclear@39 65 GLXContext glctx = glXGetCurrentContext();
nuclear@0 66
nuclear@39 67 assert(dpy && glctx);
nuclear@39 68
nuclear@39 69 cl_context_properties prop[] = {
nuclear@39 70 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
nuclear@39 71 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
nuclear@39 72 0
nuclear@39 73 };
nuclear@39 74 #elif defined(WIN32) || defined(__WIN32__)
nuclear@39 75 #error "CL/GL context sharing not implemented on windows yet"
nuclear@39 76 #else
nuclear@39 77 #error "unknown or unsupported platform"
nuclear@39 78 #endif
nuclear@39 79
nuclear@40 80 #endif /* CLGL_INTEROP */
nuclear@40 81
nuclear@39 82 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
nuclear@0 83 fprintf(stderr, "failed to create opencl context\n");
nuclear@0 84 return false;
nuclear@0 85 }
nuclear@0 86
nuclear@0 87 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
nuclear@0 88 fprintf(stderr, "failed to create command queue\n");
nuclear@0 89 return false;
nuclear@0 90 }
nuclear@0 91 return true;
nuclear@0 92 }
nuclear@0 93
nuclear@40 94 void destroy_opencl()
nuclear@40 95 {
nuclear@40 96 if(cmdq) {
nuclear@40 97 clReleaseCommandQueue(cmdq);
nuclear@40 98 cmdq = 0;
nuclear@40 99 }
nuclear@40 100
nuclear@40 101 if(ctx) {
nuclear@40 102 clReleaseContext(ctx);
nuclear@40 103 ctx = 0;
nuclear@40 104 }
nuclear@40 105 }
nuclear@40 106
nuclear@0 107
nuclear@28 108 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
nuclear@0 109 {
nuclear@0 110 int err;
nuclear@0 111 cl_mem mem;
nuclear@12 112 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
nuclear@0 113
nuclear@12 114 if(buf) {
nuclear@12 115 flags |= CL_MEM_COPY_HOST_PTR;
nuclear@12 116 }
nuclear@0 117
nuclear@12 118
nuclear@28 119 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
nuclear@8 120 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
nuclear@0 121 return 0;
nuclear@0 122 }
nuclear@0 123
nuclear@0 124 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@41 125 mbuf->type = MEM_BUFFER;
nuclear@0 126 mbuf->mem = mem;
nuclear@0 127 mbuf->size = sz;
nuclear@41 128 mbuf->xsz = mbuf->ysz = 0;
nuclear@12 129 mbuf->ptr = 0;
nuclear@39 130 mbuf->tex = 0;
nuclear@39 131 return mbuf;
nuclear@39 132 }
nuclear@39 133
nuclear@41 134 CLMemBuffer *create_image_buffer(int rdwr, int xsz, int ysz, const void *pixels)
nuclear@39 135 {
nuclear@41 136 int err, pitch;
nuclear@39 137 cl_mem mem;
nuclear@41 138 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
nuclear@41 139
nuclear@41 140 if(pixels) {
nuclear@41 141 flags |= CL_MEM_COPY_HOST_PTR;
nuclear@41 142 pitch = xsz * 4 * sizeof(float);
nuclear@41 143 } else {
nuclear@41 144 pitch = 0;
nuclear@41 145 }
nuclear@41 146
nuclear@41 147 cl_image_format fmt = {CL_RGBA, CL_FLOAT};
nuclear@41 148
nuclear@41 149 if(!(mem = clCreateImage2D(ctx, flags, &fmt, xsz, ysz, pitch, (void*)pixels, &err))) {
nuclear@41 150 fprintf(stderr, "failed to create %dx%d image: %s\n", xsz, ysz, clstrerror(err));
nuclear@41 151 return 0;
nuclear@41 152 }
nuclear@41 153
nuclear@41 154 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@41 155 mbuf->type = IMAGE_BUFFER;
nuclear@41 156 mbuf->mem = mem;
nuclear@41 157 mbuf->size = ysz * pitch;
nuclear@41 158 mbuf->xsz = xsz;
nuclear@41 159 mbuf->ysz = ysz;
nuclear@41 160 mbuf->ptr = 0;
nuclear@41 161 mbuf->tex = 0;
nuclear@41 162 return mbuf;
nuclear@41 163 }
nuclear@41 164
nuclear@41 165 CLMemBuffer *create_image_buffer(int rdwr, unsigned int tex)
nuclear@41 166 {
nuclear@41 167 int err, xsz, ysz;
nuclear@41 168 cl_mem mem;
nuclear@41 169
nuclear@41 170 glGetError(); // clear previous OpenGL errors
nuclear@41 171
nuclear@41 172 glPushAttrib(GL_TEXTURE_BIT);
nuclear@41 173 glBindTexture(GL_TEXTURE_2D, tex);
nuclear@41 174 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xsz);
nuclear@41 175 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &ysz);
nuclear@41 176 glPopAttrib();
nuclear@41 177
nuclear@41 178 if(glGetError()) {
nuclear@41 179 fprintf(stderr, "create_image_buffer: GL error while retreiving texture parameters for texture %u\n", tex);
nuclear@41 180 return 0;
nuclear@41 181 }
nuclear@39 182
nuclear@39 183 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
nuclear@39 184 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
nuclear@39 185 return 0;
nuclear@39 186 }
nuclear@39 187
nuclear@39 188 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@41 189 mbuf->type = IMAGE_BUFFER;
nuclear@39 190 mbuf->mem = mem;
nuclear@39 191 mbuf->size = 0;
nuclear@41 192 mbuf->xsz = xsz;
nuclear@41 193 mbuf->ysz = ysz;
nuclear@39 194 mbuf->ptr = 0;
nuclear@39 195 mbuf->tex = tex;
nuclear@41 196
nuclear@0 197 return mbuf;
nuclear@0 198 }
nuclear@0 199
nuclear@0 200 void destroy_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 201 {
nuclear@0 202 if(mbuf) {
nuclear@0 203 clReleaseMemObject(mbuf->mem);
nuclear@0 204 delete mbuf;
nuclear@0 205 }
nuclear@0 206 }
nuclear@0 207
nuclear@39 208 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
nuclear@0 209 {
nuclear@0 210 if(!mbuf) return 0;
nuclear@0 211
nuclear@12 212 #ifndef NDEBUG
nuclear@12 213 if(mbuf->ptr) {
nuclear@12 214 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
nuclear@12 215 }
nuclear@12 216 #endif
nuclear@12 217
nuclear@0 218 int err;
nuclear@41 219
nuclear@41 220 if(mbuf->type == MEM_BUFFER) {
nuclear@41 221 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
nuclear@41 222 if(!mbuf->ptr) {
nuclear@41 223 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
nuclear@41 224 return 0;
nuclear@41 225 }
nuclear@41 226 } else {
nuclear@41 227 assert(mbuf->type == IMAGE_BUFFER);
nuclear@41 228
nuclear@41 229 size_t orig[] = {0, 0, 0};
nuclear@41 230 size_t rgn[] = {mbuf->xsz, mbuf->ysz, 1};
nuclear@41 231 size_t pitch;
nuclear@41 232
nuclear@41 233 mbuf->ptr = clEnqueueMapImage(cmdq, mbuf->mem, 1, rdwr, orig, rgn, &pitch, 0, 0, 0, ev, &err);
nuclear@41 234 if(!mbuf->ptr) {
nuclear@41 235 fprintf(stderr, "failed to map image: %s\n", clstrerror(err));
nuclear@41 236 return 0;
nuclear@41 237 }
nuclear@41 238
nuclear@41 239 assert(pitch == mbuf->xsz * 4 * sizeof(float));
nuclear@0 240 }
nuclear@0 241 return mbuf->ptr;
nuclear@0 242 }
nuclear@0 243
nuclear@39 244 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
nuclear@0 245 {
nuclear@0 246 if(!mbuf || !mbuf->ptr) return;
nuclear@41 247
nuclear@39 248 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
nuclear@12 249 mbuf->ptr = 0;
nuclear@0 250 }
nuclear@0 251
nuclear@39 252 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
nuclear@0 253 {
nuclear@0 254 if(!mbuf) return false;
nuclear@0 255
nuclear@0 256 int err;
nuclear@39 257 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
nuclear@8 258 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
nuclear@0 259 return false;
nuclear@0 260 }
nuclear@0 261 return true;
nuclear@0 262 }
nuclear@0 263
nuclear@39 264 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
nuclear@0 265 {
nuclear@0 266 if(!mbuf) return false;
nuclear@0 267
nuclear@0 268 int err;
nuclear@39 269 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
nuclear@8 270 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
nuclear@0 271 return false;
nuclear@0 272 }
nuclear@0 273 return true;
nuclear@0 274 }
nuclear@0 275
nuclear@0 276
nuclear@39 277 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
nuclear@39 278 {
nuclear@39 279 if(!mbuf || !mbuf->tex) {
nuclear@39 280 return false;
nuclear@39 281 }
nuclear@39 282
nuclear@39 283 int err;
nuclear@39 284 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
nuclear@39 285 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
nuclear@39 286 return false;
nuclear@39 287 }
nuclear@39 288 return true;
nuclear@39 289 }
nuclear@39 290
nuclear@39 291 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
nuclear@39 292 {
nuclear@39 293 if(!mbuf || !mbuf->tex) {
nuclear@39 294 return false;
nuclear@39 295 }
nuclear@39 296
nuclear@39 297 int err;
nuclear@39 298 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
nuclear@39 299 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
nuclear@39 300 return false;
nuclear@39 301 }
nuclear@39 302 return true;
nuclear@39 303 }
nuclear@39 304
nuclear@39 305
John@14 306 CLArg::CLArg()
John@14 307 {
John@14 308 memset(this, 0, sizeof *this);
John@14 309 }
John@14 310
John@14 311
nuclear@0 312 CLProgram::CLProgram(const char *kname)
nuclear@0 313 {
nuclear@0 314 prog = 0;
nuclear@0 315 kernel = 0;
nuclear@0 316 this->kname = kname;
nuclear@1 317 args.resize(16);
nuclear@0 318 built = false;
nuclear@39 319
nuclear@39 320 wait_event = last_event = 0;
nuclear@0 321 }
nuclear@0 322
nuclear@0 323 CLProgram::~CLProgram()
nuclear@0 324 {
nuclear@39 325 if(wait_event) {
nuclear@39 326 clReleaseEvent(wait_event);
nuclear@39 327 }
nuclear@39 328 if(last_event) {
nuclear@40 329 clWaitForEvents(1, &last_event);
nuclear@39 330 clReleaseEvent(last_event);
nuclear@39 331 }
nuclear@39 332
nuclear@0 333 if(prog) {
nuclear@0 334 clReleaseProgram(prog);
nuclear@0 335 }
nuclear@0 336 if(kernel) {
nuclear@0 337 clReleaseKernel(kernel);
nuclear@0 338 }
nuclear@1 339 for(size_t i=0; i<args.size(); i++) {
nuclear@1 340 if(args[i].type == ARGTYPE_MEM_BUF) {
nuclear@1 341 destroy_mem_buffer(args[i].v.mbuf);
nuclear@0 342 }
nuclear@0 343 }
nuclear@0 344 }
nuclear@0 345
nuclear@0 346 bool CLProgram::load(const char *fname)
nuclear@0 347 {
nuclear@0 348 FILE *fp;
nuclear@0 349 char *src;
nuclear@0 350 struct stat st;
nuclear@0 351
nuclear@0 352 printf("loading opencl program (%s)\n", fname);
nuclear@0 353
nuclear@0 354 if(!(fp = fopen(fname, "rb"))) {
nuclear@0 355 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
nuclear@0 356 return false;
nuclear@0 357 }
nuclear@0 358
nuclear@0 359 fstat(fileno(fp), &st);
nuclear@0 360
nuclear@0 361 src = new char[st.st_size + 1];
nuclear@0 362
nuclear@0 363 fread(src, 1, st.st_size, fp);
nuclear@0 364 src[st.st_size] = 0;
nuclear@0 365 fclose(fp);
nuclear@0 366
nuclear@0 367
nuclear@0 368 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
nuclear@0 369 fprintf(stderr, "error creating program object: %s\n", fname);
nuclear@0 370 delete [] src;
nuclear@0 371 return false;
nuclear@0 372 }
nuclear@0 373 delete [] src;
nuclear@0 374 return true;
nuclear@0 375 }
nuclear@0 376
nuclear@1 377 bool CLProgram::set_argi(int idx, int val)
nuclear@1 378 {
nuclear@1 379 if((int)args.size() <= idx) {
nuclear@1 380 args.resize(idx + 1);
nuclear@1 381 }
nuclear@1 382
nuclear@1 383 CLArg *arg = &args[idx];
nuclear@1 384 arg->type = ARGTYPE_INT;
nuclear@1 385 arg->v.ival = val;
nuclear@1 386 return true;
nuclear@1 387 }
nuclear@1 388
nuclear@1 389 bool CLProgram::set_argf(int idx, float val)
nuclear@1 390 {
nuclear@1 391 if((int)args.size() <= idx) {
nuclear@1 392 args.resize(idx + 1);
nuclear@1 393 }
nuclear@1 394
nuclear@1 395 CLArg *arg = &args[idx];
nuclear@1 396 arg->type = ARGTYPE_FLOAT;
nuclear@1 397 arg->v.fval = val;
nuclear@1 398 return true;
nuclear@1 399 }
nuclear@1 400
nuclear@28 401 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
nuclear@0 402 {
nuclear@13 403 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
nuclear@0 404 CLMemBuffer *buf;
nuclear@0 405
nuclear@39 406 if(sz <= 0) {
nuclear@39 407 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
nuclear@39 408 return false;
nuclear@39 409 }
nuclear@39 410 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
nuclear@39 411 return false;
nuclear@39 412 }
nuclear@39 413
nuclear@39 414 if((int)args.size() <= idx) {
nuclear@39 415 args.resize(idx + 1);
nuclear@39 416 }
nuclear@39 417 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@39 418 args[idx].v.mbuf = buf;
nuclear@39 419 return true;
nuclear@39 420 }
nuclear@39 421
nuclear@41 422 bool CLProgram::set_arg_image(int idx, int rdwr, int xsz, int ysz, const void *pix)
nuclear@41 423 {
nuclear@41 424 printf("create argument %d from %dx%d image\n", idx, xsz, ysz);
nuclear@41 425 CLMemBuffer *buf;
nuclear@41 426
nuclear@41 427 if(!(buf = create_image_buffer(rdwr, xsz, ysz, pix))) {
nuclear@41 428 return false;
nuclear@41 429 }
nuclear@41 430
nuclear@41 431 if((int)args.size() <= idx) {
nuclear@41 432 args.resize(idx + 1);
nuclear@41 433 }
nuclear@41 434 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@41 435 args[idx].v.mbuf = buf;
nuclear@41 436 return true;
nuclear@41 437 }
nuclear@41 438
nuclear@39 439 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
nuclear@39 440 {
nuclear@39 441 printf("create argument %d from texture %u\n", idx, tex);
nuclear@39 442 CLMemBuffer *buf;
nuclear@39 443
nuclear@41 444 if(!(buf = create_image_buffer(rdwr, tex))) {
nuclear@0 445 return false;
nuclear@0 446 }
nuclear@0 447
nuclear@1 448 if((int)args.size() <= idx) {
nuclear@1 449 args.resize(idx + 1);
nuclear@0 450 }
nuclear@1 451 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@1 452 args[idx].v.mbuf = buf;
nuclear@0 453 return true;
nuclear@0 454 }
nuclear@0 455
nuclear@0 456 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
nuclear@0 457 {
nuclear@1 458 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
nuclear@0 459 return 0;
nuclear@0 460 }
nuclear@1 461 return args[arg].v.mbuf;
nuclear@0 462 }
nuclear@0 463
John@14 464 int CLProgram::get_num_args() const
John@14 465 {
John@14 466 int num_args = 0;
John@14 467 for(size_t i=0; i<args.size(); i++) {
John@14 468 if(args[i].type != ARGTYPE_NONE) {
John@14 469 num_args++;
John@14 470 }
John@14 471 }
John@14 472 return num_args;
John@14 473 }
John@14 474
nuclear@0 475 bool CLProgram::build()
nuclear@0 476 {
nuclear@2 477 int err;
nuclear@0 478
nuclear@39 479 if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
nuclear@2 480 size_t sz;
nuclear@2 481 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
nuclear@0 482
nuclear@2 483 char *errlog = (char*)alloca(sz + 1);
nuclear@2 484 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
nuclear@8 485 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
nuclear@2 486
nuclear@0 487 clReleaseProgram(prog);
nuclear@0 488 prog = 0;
nuclear@0 489 return false;
nuclear@0 490 }
nuclear@0 491
nuclear@0 492
nuclear@0 493 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
nuclear@0 494 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
nuclear@0 495 clReleaseProgram(prog);
nuclear@0 496 prog = 0;
nuclear@0 497 return false;
nuclear@0 498 }
nuclear@0 499
nuclear@1 500 for(size_t i=0; i<args.size(); i++) {
nuclear@1 501 int err;
nuclear@0 502
nuclear@1 503 if(args[i].type == ARGTYPE_NONE) {
nuclear@1 504 break;
nuclear@1 505 }
nuclear@1 506
nuclear@1 507 switch(args[i].type) {
nuclear@1 508 case ARGTYPE_INT:
nuclear@1 509 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
nuclear@8 510 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 511 goto fail;
nuclear@1 512 }
nuclear@1 513 break;
nuclear@1 514
nuclear@1 515 case ARGTYPE_FLOAT:
nuclear@1 516 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
nuclear@8 517 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 518 goto fail;
nuclear@1 519 }
nuclear@1 520 break;
nuclear@1 521
nuclear@1 522 case ARGTYPE_MEM_BUF:
nuclear@1 523 {
nuclear@1 524 CLMemBuffer *mbuf = args[i].v.mbuf;
nuclear@1 525
nuclear@1 526 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
nuclear@8 527 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 528 goto fail;
nuclear@1 529 }
nuclear@1 530 }
nuclear@1 531 break;
nuclear@1 532
nuclear@1 533 default:
nuclear@1 534 break;
nuclear@0 535 }
nuclear@0 536 }
nuclear@0 537
nuclear@0 538 built = true;
nuclear@0 539 return true;
nuclear@1 540
nuclear@1 541 fail:
nuclear@1 542 clReleaseProgram(prog);
nuclear@1 543 clReleaseKernel(kernel);
nuclear@1 544 prog = 0;
nuclear@1 545 kernel = 0;
nuclear@1 546 return false;
nuclear@0 547 }
nuclear@0 548
nuclear@0 549 bool CLProgram::run() const
nuclear@0 550 {
nuclear@0 551 return run(1, 1);
nuclear@0 552 }
nuclear@0 553
nuclear@0 554 bool CLProgram::run(int dim, ...) const
nuclear@0 555 {
nuclear@0 556 va_list ap;
nuclear@0 557 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
nuclear@0 558
nuclear@0 559 va_start(ap, dim);
nuclear@0 560 for(int i=0; i<dim; i++) {
nuclear@0 561 global_size[i] = va_arg(ap, int);
nuclear@0 562 }
nuclear@0 563 va_end(ap);
nuclear@0 564
nuclear@39 565 if(last_event) {
nuclear@39 566 clReleaseEvent(last_event);
nuclear@39 567 }
nuclear@39 568
nuclear@0 569 int err;
nuclear@39 570 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
nuclear@39 571 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
nuclear@8 572 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
nuclear@0 573 return false;
nuclear@0 574 }
nuclear@32 575
nuclear@39 576 if(wait_event) {
nuclear@39 577 clReleaseEvent(wait_event);
nuclear@39 578 wait_event = 0;
nuclear@39 579 }
nuclear@0 580 return true;
nuclear@0 581 }
nuclear@0 582
nuclear@39 583 void CLProgram::set_wait_event(cl_event ev)
nuclear@39 584 {
nuclear@39 585 if(wait_event) {
nuclear@39 586 clReleaseEvent(wait_event);
nuclear@39 587 }
nuclear@39 588 wait_event = ev;
nuclear@39 589 }
nuclear@39 590
nuclear@39 591 cl_event CLProgram::get_last_event() const
nuclear@39 592 {
nuclear@39 593 return last_event;
nuclear@39 594 }
nuclear@39 595
nuclear@0 596 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
nuclear@0 597 {
nuclear@8 598 unsigned int i, j, num_dev, num_plat, sel, ret;
nuclear@0 599 cl_device_id dev[32];
nuclear@8 600 cl_platform_id plat[32];
nuclear@0 601
nuclear@0 602 dev_inf->work_item_sizes = 0;
nuclear@0 603
nuclear@8 604 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
nuclear@8 605 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
nuclear@8 606 return -1;
nuclear@8 607 }
nuclear@8 608 if(!num_plat) {
nuclear@8 609 fprintf(stderr, "OpenCL not available!\n");
nuclear@8 610 return -1;
nuclear@8 611 }
nuclear@0 612
nuclear@8 613 for(i=0; i<num_plat; i++) {
nuclear@8 614 char buf[512];
nuclear@8 615
nuclear@8 616 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
nuclear@8 617 printf("[%d]: %s", i, buf);
nuclear@8 618 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
nuclear@8 619 printf(", %s", buf);
nuclear@8 620 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
nuclear@8 621 printf(" (%s)\n", buf);
nuclear@8 622 }
nuclear@8 623
nuclear@8 624 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
nuclear@8 625 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
nuclear@8 626 return -1;
nuclear@8 627 }
nuclear@0 628 printf("found %d cl devices.\n", num_dev);
nuclear@0 629
nuclear@0 630 for(i=0; i<num_dev; i++) {
nuclear@0 631 struct device_info di;
nuclear@0 632
nuclear@0 633 if(get_dev_info(dev[i], &di) == -1) {
nuclear@0 634 free(dev_inf->work_item_sizes);
nuclear@0 635 return -1;
nuclear@0 636 }
nuclear@0 637
nuclear@0 638 printf("--> device %u (%s)\n", i, devtypestr(di.type));
nuclear@0 639 printf("max compute units: %u\n", di.units);
nuclear@0 640 printf("max clock frequency: %u\n", di.clock);
nuclear@0 641 printf("max work item dimensions: %u\n", di.dim);
nuclear@0 642
nuclear@0 643 printf("max work item sizes: ");
nuclear@0 644 for(j=0; j<di.dim; j++) {
nuclear@0 645 printf("%u", (unsigned int)di.work_item_sizes[j]);
nuclear@0 646 if(di.dim - j > 1) {
nuclear@0 647 printf(", ");
nuclear@0 648 }
nuclear@0 649 }
nuclear@0 650 putchar('\n');
nuclear@0 651
nuclear@0 652 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
nuclear@0 653 printf("max object allocation size: ");
nuclear@0 654 print_memsize(stdout, di.mem_size);
nuclear@0 655 putchar('\n');
nuclear@0 656
nuclear@0 657 if(devcmp(&di, dev_inf) > 0) {
nuclear@0 658 free(dev_inf->work_item_sizes);
nuclear@0 659 memcpy(dev_inf, &di, sizeof di);
nuclear@0 660 sel = i;
nuclear@0 661 }
nuclear@0 662 }
nuclear@0 663
nuclear@0 664 if(num_dev) {
nuclear@0 665 printf("\nusing device: %d\n", sel);
nuclear@0 666 return 0;
nuclear@0 667 }
nuclear@0 668
nuclear@0 669 return -1;
nuclear@0 670 }
nuclear@0 671
nuclear@0 672 static int get_dev_info(cl_device_id dev, struct device_info *di)
nuclear@0 673 {
nuclear@0 674 di->id = dev;
nuclear@0 675
nuclear@0 676
nuclear@0 677 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
nuclear@0 678 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
nuclear@0 679 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
nuclear@0 680 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
nuclear@0 681
nuclear@0 682 di->work_item_sizes = new size_t[di->dim];
nuclear@0 683
nuclear@0 684 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
nuclear@0 685 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
nuclear@0 686 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
nuclear@0 687
nuclear@0 688 return 0;
nuclear@0 689 }
nuclear@0 690
nuclear@0 691 static int devcmp(struct device_info *a, struct device_info *b)
nuclear@0 692 {
nuclear@0 693 unsigned int aval = a->units * a->clock;
nuclear@0 694 unsigned int bval = b->units * b->clock;
nuclear@0 695
nuclear@0 696 return aval - bval;
nuclear@0 697 }
nuclear@0 698
nuclear@0 699 static const char *devtypestr(cl_device_type type)
nuclear@0 700 {
nuclear@0 701 switch(type) {
nuclear@0 702 case CL_DEVICE_TYPE_CPU:
nuclear@0 703 return "cpu";
nuclear@0 704 case CL_DEVICE_TYPE_GPU:
nuclear@0 705 return "gpu";
nuclear@0 706 case CL_DEVICE_TYPE_ACCELERATOR:
nuclear@0 707 return "accelerator";
nuclear@0 708 default:
nuclear@0 709 break;
nuclear@0 710 }
nuclear@0 711 return "unknown";
nuclear@0 712 }
nuclear@0 713
nuclear@0 714 static void print_memsize(FILE *out, unsigned long bytes)
nuclear@0 715 {
nuclear@0 716 int i;
nuclear@0 717 unsigned long memsz = bytes;
nuclear@0 718 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
nuclear@0 719
nuclear@0 720 for(i=0; suffix[i]; i++) {
nuclear@0 721 if(memsz < 1024) {
nuclear@0 722 fprintf(out, "%lu %s", memsz, suffix[i]);
nuclear@0 723 if(i > 0) {
nuclear@0 724 fprintf(out, " (%lu bytes)", bytes);
nuclear@0 725 }
nuclear@0 726 return;
nuclear@0 727 }
nuclear@0 728
nuclear@0 729 memsz /= 1024;
nuclear@0 730 }
nuclear@0 731 }
nuclear@8 732
nuclear@8 733 static const char *clstrerror(int err)
nuclear@8 734 {
nuclear@8 735 if(err > 0) {
nuclear@8 736 return "<invalid error code>";
nuclear@8 737 }
nuclear@8 738 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
nuclear@8 739 return "<unknown error>";
nuclear@8 740 }
nuclear@8 741 return ocl_errstr[-err];
nuclear@8 742 }