clray

annotate src/ocl.cc @ 61:0f174fd60f19

added readme file and license
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 28 Dec 2015 10:27:23 +0200
parents 8047637961a2
children
rev   line source
nuclear@8 1 #define OCL_CC_
nuclear@8 2
nuclear@60 3 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
nuclear@60 4
nuclear@0 5 #include <stdio.h>
nuclear@0 6 #include <stdlib.h>
nuclear@0 7 #include <string.h>
nuclear@8 8 #include <stdarg.h>
nuclear@0 9 #include <errno.h>
nuclear@39 10 #include <assert.h>
John@11 11 #ifndef _MSC_VER
nuclear@0 12 #include <alloca.h>
John@11 13 #else
John@11 14 #include <malloc.h>
John@11 15 #endif
nuclear@0 16 #include <sys/stat.h>
nuclear@0 17 #include "ocl.h"
nuclear@39 18 #include "ogl.h"
nuclear@8 19 #include "ocl_errstr.h"
nuclear@0 20
nuclear@39 21 #if defined(unix) || defined(__unix__)
nuclear@39 22 #include <X11/Xlib.h>
nuclear@39 23 #include <GL/glx.h>
nuclear@39 24 #endif
nuclear@0 25
nuclear@42 26 #ifdef __APPLE__
nuclear@42 27 #include <OpenGL/CGLCurrent.h>
nuclear@42 28 #endif
nuclear@42 29
nuclear@0 30
nuclear@0 31 struct device_info {
nuclear@0 32 cl_device_id id;
nuclear@0 33 cl_device_type type;
nuclear@0 34 unsigned int units;
nuclear@0 35 unsigned int clock;
nuclear@0 36
nuclear@0 37 unsigned int dim;
nuclear@0 38 size_t *work_item_sizes;
nuclear@0 39 size_t work_group_size;
nuclear@0 40
nuclear@0 41 unsigned long mem_size;
nuclear@42 42
nuclear@42 43 char *extensions;
nuclear@42 44 bool gl_sharing;
nuclear@0 45 };
nuclear@0 46
nuclear@0 47 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
nuclear@0 48 static int get_dev_info(cl_device_id dev, struct device_info *di);
nuclear@42 49 static void destroy_dev_info(struct device_info *di);
nuclear@0 50 static int devcmp(struct device_info *a, struct device_info *b);
nuclear@0 51 static const char *devtypestr(cl_device_type type);
nuclear@0 52 static void print_memsize(FILE *out, unsigned long memsz);
nuclear@8 53 static const char *clstrerror(int err);
nuclear@0 54
nuclear@0 55
nuclear@0 56 static cl_context ctx;
nuclear@0 57 static cl_command_queue cmdq;
nuclear@0 58 static device_info devinf;
nuclear@0 59
nuclear@39 60 bool init_opencl()
nuclear@0 61 {
nuclear@0 62 if(select_device(&devinf, devcmp) == -1) {
nuclear@0 63 return false;
nuclear@0 64 }
nuclear@0 65
nuclear@40 66 #ifndef CLGL_INTEROP
nuclear@40 67 cl_context_properties *prop = 0;
nuclear@40 68 #else
nuclear@40 69
nuclear@39 70 #if defined(__APPLE__)
nuclear@42 71 CGLContextObj glctx = CGLGetCurrentContext();
nuclear@42 72 CGLShareGroupObj sgrp = CGLGetShareGroup(glctx);
nuclear@42 73
nuclear@42 74 cl_context_properties prop[] = {
nuclear@42 75 #ifdef CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE
nuclear@42 76 CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)sgrp,
nuclear@42 77 #else
nuclear@42 78 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
nuclear@42 79 CL_CGL_SHAREGROUP_KHR, (cl_context_properties)sgrp,
nuclear@42 80 #endif
nuclear@42 81 0
nuclear@42 82 };
nuclear@39 83 #elif defined(unix) || defined(__unix__)
nuclear@39 84 Display *dpy = glXGetCurrentDisplay();
nuclear@39 85 GLXContext glctx = glXGetCurrentContext();
nuclear@0 86
nuclear@39 87 assert(dpy && glctx);
nuclear@39 88
nuclear@39 89 cl_context_properties prop[] = {
nuclear@39 90 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
nuclear@39 91 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
nuclear@39 92 0
nuclear@39 93 };
nuclear@39 94 #elif defined(WIN32) || defined(__WIN32__)
nuclear@42 95 HGLRC glctx = wglGetCurrentContext();
nuclear@42 96 HDC dc = wglGetCurrentDC();
nuclear@42 97
nuclear@42 98 cl_context_properties prop[] = {
nuclear@42 99 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
nuclear@42 100 CL_WGL_HDC_KHR, (cl_context_properties)dc,
nuclear@42 101 0
nuclear@42 102 };
nuclear@39 103 #else
nuclear@39 104 #error "unknown or unsupported platform"
nuclear@39 105 #endif
nuclear@39 106
nuclear@40 107 #endif /* CLGL_INTEROP */
nuclear@40 108
nuclear@39 109 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
nuclear@0 110 fprintf(stderr, "failed to create opencl context\n");
nuclear@0 111 return false;
nuclear@0 112 }
nuclear@0 113
nuclear@0 114 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
nuclear@0 115 fprintf(stderr, "failed to create command queue\n");
nuclear@0 116 return false;
nuclear@0 117 }
nuclear@0 118 return true;
nuclear@0 119 }
nuclear@0 120
nuclear@40 121 void destroy_opencl()
nuclear@40 122 {
nuclear@40 123 if(cmdq) {
nuclear@40 124 clReleaseCommandQueue(cmdq);
nuclear@40 125 cmdq = 0;
nuclear@40 126 }
nuclear@40 127
nuclear@40 128 if(ctx) {
nuclear@40 129 clReleaseContext(ctx);
nuclear@40 130 ctx = 0;
nuclear@40 131 }
nuclear@40 132 }
nuclear@40 133
nuclear@60 134 void finish_opencl()
nuclear@60 135 {
nuclear@60 136 clFinish(cmdq);
nuclear@60 137 }
nuclear@60 138
nuclear@0 139
nuclear@28 140 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
nuclear@0 141 {
nuclear@0 142 int err;
nuclear@0 143 cl_mem mem;
nuclear@12 144 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
nuclear@0 145
nuclear@12 146 if(buf) {
nuclear@12 147 flags |= CL_MEM_COPY_HOST_PTR;
nuclear@12 148 }
nuclear@0 149
nuclear@12 150
nuclear@28 151 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
nuclear@8 152 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
nuclear@0 153 return 0;
nuclear@0 154 }
nuclear@0 155
nuclear@0 156 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@41 157 mbuf->type = MEM_BUFFER;
nuclear@0 158 mbuf->mem = mem;
nuclear@0 159 mbuf->size = sz;
nuclear@41 160 mbuf->xsz = mbuf->ysz = 0;
nuclear@12 161 mbuf->ptr = 0;
nuclear@39 162 mbuf->tex = 0;
nuclear@39 163 return mbuf;
nuclear@39 164 }
nuclear@39 165
nuclear@41 166 CLMemBuffer *create_image_buffer(int rdwr, int xsz, int ysz, const void *pixels)
nuclear@39 167 {
nuclear@41 168 int err, pitch;
nuclear@39 169 cl_mem mem;
nuclear@41 170 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
nuclear@41 171
nuclear@41 172 if(pixels) {
nuclear@41 173 flags |= CL_MEM_COPY_HOST_PTR;
nuclear@41 174 pitch = xsz * 4 * sizeof(float);
nuclear@41 175 } else {
nuclear@41 176 pitch = 0;
nuclear@41 177 }
nuclear@41 178
nuclear@41 179 cl_image_format fmt = {CL_RGBA, CL_FLOAT};
nuclear@41 180
nuclear@41 181 if(!(mem = clCreateImage2D(ctx, flags, &fmt, xsz, ysz, pitch, (void*)pixels, &err))) {
nuclear@41 182 fprintf(stderr, "failed to create %dx%d image: %s\n", xsz, ysz, clstrerror(err));
nuclear@41 183 return 0;
nuclear@41 184 }
nuclear@41 185
nuclear@41 186 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@41 187 mbuf->type = IMAGE_BUFFER;
nuclear@41 188 mbuf->mem = mem;
nuclear@41 189 mbuf->size = ysz * pitch;
nuclear@41 190 mbuf->xsz = xsz;
nuclear@41 191 mbuf->ysz = ysz;
nuclear@41 192 mbuf->ptr = 0;
nuclear@41 193 mbuf->tex = 0;
nuclear@41 194 return mbuf;
nuclear@41 195 }
nuclear@41 196
nuclear@41 197 CLMemBuffer *create_image_buffer(int rdwr, unsigned int tex)
nuclear@41 198 {
nuclear@41 199 int err, xsz, ysz;
nuclear@41 200 cl_mem mem;
nuclear@41 201
nuclear@41 202 glGetError(); // clear previous OpenGL errors
nuclear@41 203
nuclear@41 204 glPushAttrib(GL_TEXTURE_BIT);
nuclear@41 205 glBindTexture(GL_TEXTURE_2D, tex);
nuclear@41 206 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xsz);
nuclear@41 207 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &ysz);
nuclear@41 208 glPopAttrib();
nuclear@41 209
nuclear@41 210 if(glGetError()) {
nuclear@41 211 fprintf(stderr, "create_image_buffer: GL error while retreiving texture parameters for texture %u\n", tex);
nuclear@41 212 return 0;
nuclear@41 213 }
nuclear@39 214
nuclear@39 215 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
nuclear@39 216 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
nuclear@39 217 return 0;
nuclear@39 218 }
nuclear@39 219
nuclear@39 220 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@41 221 mbuf->type = IMAGE_BUFFER;
nuclear@39 222 mbuf->mem = mem;
nuclear@39 223 mbuf->size = 0;
nuclear@41 224 mbuf->xsz = xsz;
nuclear@41 225 mbuf->ysz = ysz;
nuclear@39 226 mbuf->ptr = 0;
nuclear@39 227 mbuf->tex = tex;
nuclear@41 228
nuclear@0 229 return mbuf;
nuclear@0 230 }
nuclear@0 231
nuclear@0 232 void destroy_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 233 {
nuclear@0 234 if(mbuf) {
nuclear@0 235 clReleaseMemObject(mbuf->mem);
nuclear@0 236 delete mbuf;
nuclear@0 237 }
nuclear@0 238 }
nuclear@0 239
nuclear@39 240 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
nuclear@0 241 {
nuclear@0 242 if(!mbuf) return 0;
nuclear@0 243
nuclear@12 244 #ifndef NDEBUG
nuclear@12 245 if(mbuf->ptr) {
nuclear@12 246 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
nuclear@12 247 }
nuclear@12 248 #endif
nuclear@12 249
nuclear@0 250 int err;
nuclear@41 251
nuclear@41 252 if(mbuf->type == MEM_BUFFER) {
nuclear@41 253 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
nuclear@41 254 if(!mbuf->ptr) {
nuclear@41 255 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
nuclear@41 256 return 0;
nuclear@41 257 }
nuclear@41 258 } else {
nuclear@41 259 assert(mbuf->type == IMAGE_BUFFER);
nuclear@41 260
nuclear@41 261 size_t orig[] = {0, 0, 0};
nuclear@41 262 size_t rgn[] = {mbuf->xsz, mbuf->ysz, 1};
nuclear@41 263 size_t pitch;
nuclear@41 264
nuclear@41 265 mbuf->ptr = clEnqueueMapImage(cmdq, mbuf->mem, 1, rdwr, orig, rgn, &pitch, 0, 0, 0, ev, &err);
nuclear@41 266 if(!mbuf->ptr) {
nuclear@41 267 fprintf(stderr, "failed to map image: %s\n", clstrerror(err));
nuclear@41 268 return 0;
nuclear@41 269 }
nuclear@41 270
nuclear@41 271 assert(pitch == mbuf->xsz * 4 * sizeof(float));
nuclear@0 272 }
nuclear@0 273 return mbuf->ptr;
nuclear@0 274 }
nuclear@0 275
nuclear@39 276 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
nuclear@0 277 {
nuclear@0 278 if(!mbuf || !mbuf->ptr) return;
nuclear@41 279
nuclear@39 280 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
nuclear@12 281 mbuf->ptr = 0;
nuclear@0 282 }
nuclear@0 283
nuclear@39 284 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
nuclear@0 285 {
nuclear@0 286 if(!mbuf) return false;
nuclear@0 287
nuclear@0 288 int err;
nuclear@39 289 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
nuclear@8 290 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
nuclear@0 291 return false;
nuclear@0 292 }
nuclear@0 293 return true;
nuclear@0 294 }
nuclear@0 295
nuclear@39 296 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
nuclear@0 297 {
nuclear@0 298 if(!mbuf) return false;
nuclear@0 299
nuclear@0 300 int err;
nuclear@39 301 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
nuclear@8 302 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
nuclear@0 303 return false;
nuclear@0 304 }
nuclear@0 305 return true;
nuclear@0 306 }
nuclear@0 307
nuclear@0 308
nuclear@39 309 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
nuclear@39 310 {
nuclear@39 311 if(!mbuf || !mbuf->tex) {
nuclear@39 312 return false;
nuclear@39 313 }
nuclear@39 314
nuclear@39 315 int err;
nuclear@39 316 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
nuclear@39 317 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
nuclear@39 318 return false;
nuclear@39 319 }
nuclear@39 320 return true;
nuclear@39 321 }
nuclear@39 322
nuclear@39 323 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
nuclear@39 324 {
nuclear@39 325 if(!mbuf || !mbuf->tex) {
nuclear@39 326 return false;
nuclear@39 327 }
nuclear@39 328
nuclear@39 329 int err;
nuclear@39 330 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
nuclear@39 331 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
nuclear@39 332 return false;
nuclear@39 333 }
nuclear@39 334 return true;
nuclear@39 335 }
nuclear@39 336
nuclear@39 337
John@14 338 CLArg::CLArg()
John@14 339 {
John@14 340 memset(this, 0, sizeof *this);
John@14 341 }
John@14 342
John@14 343
nuclear@0 344 CLProgram::CLProgram(const char *kname)
nuclear@0 345 {
nuclear@0 346 prog = 0;
nuclear@0 347 kernel = 0;
nuclear@0 348 this->kname = kname;
nuclear@1 349 args.resize(16);
nuclear@0 350 built = false;
nuclear@39 351
nuclear@39 352 wait_event = last_event = 0;
nuclear@0 353 }
nuclear@0 354
nuclear@0 355 CLProgram::~CLProgram()
nuclear@0 356 {
nuclear@39 357 if(wait_event) {
nuclear@39 358 clReleaseEvent(wait_event);
nuclear@39 359 }
nuclear@39 360 if(last_event) {
nuclear@40 361 clWaitForEvents(1, &last_event);
nuclear@39 362 clReleaseEvent(last_event);
nuclear@39 363 }
nuclear@39 364
nuclear@0 365 if(prog) {
nuclear@0 366 clReleaseProgram(prog);
nuclear@0 367 }
nuclear@0 368 if(kernel) {
nuclear@0 369 clReleaseKernel(kernel);
nuclear@0 370 }
nuclear@1 371 for(size_t i=0; i<args.size(); i++) {
nuclear@1 372 if(args[i].type == ARGTYPE_MEM_BUF) {
nuclear@1 373 destroy_mem_buffer(args[i].v.mbuf);
nuclear@0 374 }
nuclear@0 375 }
nuclear@0 376 }
nuclear@0 377
nuclear@0 378 bool CLProgram::load(const char *fname)
nuclear@0 379 {
nuclear@0 380 FILE *fp;
nuclear@0 381 char *src;
nuclear@0 382 struct stat st;
nuclear@0 383
nuclear@0 384 printf("loading opencl program (%s)\n", fname);
nuclear@0 385
nuclear@0 386 if(!(fp = fopen(fname, "rb"))) {
nuclear@0 387 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
nuclear@0 388 return false;
nuclear@0 389 }
nuclear@0 390
nuclear@0 391 fstat(fileno(fp), &st);
nuclear@0 392
nuclear@0 393 src = new char[st.st_size + 1];
nuclear@0 394
nuclear@0 395 fread(src, 1, st.st_size, fp);
nuclear@0 396 src[st.st_size] = 0;
nuclear@0 397 fclose(fp);
nuclear@0 398
nuclear@0 399
nuclear@0 400 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
nuclear@0 401 fprintf(stderr, "error creating program object: %s\n", fname);
nuclear@0 402 delete [] src;
nuclear@0 403 return false;
nuclear@0 404 }
nuclear@0 405 delete [] src;
nuclear@0 406 return true;
nuclear@0 407 }
nuclear@0 408
nuclear@1 409 bool CLProgram::set_argi(int idx, int val)
nuclear@1 410 {
nuclear@1 411 if((int)args.size() <= idx) {
nuclear@1 412 args.resize(idx + 1);
nuclear@1 413 }
nuclear@1 414
nuclear@1 415 CLArg *arg = &args[idx];
nuclear@1 416 arg->type = ARGTYPE_INT;
nuclear@1 417 arg->v.ival = val;
nuclear@1 418 return true;
nuclear@1 419 }
nuclear@1 420
nuclear@1 421 bool CLProgram::set_argf(int idx, float val)
nuclear@1 422 {
nuclear@1 423 if((int)args.size() <= idx) {
nuclear@1 424 args.resize(idx + 1);
nuclear@1 425 }
nuclear@1 426
nuclear@1 427 CLArg *arg = &args[idx];
nuclear@1 428 arg->type = ARGTYPE_FLOAT;
nuclear@1 429 arg->v.fval = val;
nuclear@1 430 return true;
nuclear@1 431 }
nuclear@1 432
nuclear@28 433 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
nuclear@0 434 {
nuclear@13 435 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
nuclear@0 436 CLMemBuffer *buf;
nuclear@0 437
nuclear@39 438 if(sz <= 0) {
nuclear@39 439 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
nuclear@39 440 return false;
nuclear@39 441 }
nuclear@39 442 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
nuclear@39 443 return false;
nuclear@39 444 }
nuclear@39 445
nuclear@39 446 if((int)args.size() <= idx) {
nuclear@39 447 args.resize(idx + 1);
nuclear@39 448 }
nuclear@39 449 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@39 450 args[idx].v.mbuf = buf;
nuclear@39 451 return true;
nuclear@39 452 }
nuclear@39 453
nuclear@41 454 bool CLProgram::set_arg_image(int idx, int rdwr, int xsz, int ysz, const void *pix)
nuclear@41 455 {
nuclear@41 456 printf("create argument %d from %dx%d image\n", idx, xsz, ysz);
nuclear@41 457 CLMemBuffer *buf;
nuclear@41 458
nuclear@41 459 if(!(buf = create_image_buffer(rdwr, xsz, ysz, pix))) {
nuclear@41 460 return false;
nuclear@41 461 }
nuclear@41 462
nuclear@41 463 if((int)args.size() <= idx) {
nuclear@41 464 args.resize(idx + 1);
nuclear@41 465 }
nuclear@41 466 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@41 467 args[idx].v.mbuf = buf;
nuclear@41 468 return true;
nuclear@41 469 }
nuclear@41 470
nuclear@39 471 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
nuclear@39 472 {
nuclear@39 473 printf("create argument %d from texture %u\n", idx, tex);
nuclear@39 474 CLMemBuffer *buf;
nuclear@39 475
nuclear@41 476 if(!(buf = create_image_buffer(rdwr, tex))) {
nuclear@0 477 return false;
nuclear@0 478 }
nuclear@0 479
nuclear@1 480 if((int)args.size() <= idx) {
nuclear@1 481 args.resize(idx + 1);
nuclear@0 482 }
nuclear@1 483 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@1 484 args[idx].v.mbuf = buf;
nuclear@0 485 return true;
nuclear@0 486 }
nuclear@0 487
nuclear@0 488 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
nuclear@0 489 {
nuclear@1 490 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
nuclear@0 491 return 0;
nuclear@0 492 }
nuclear@1 493 return args[arg].v.mbuf;
nuclear@0 494 }
nuclear@0 495
John@14 496 int CLProgram::get_num_args() const
John@14 497 {
John@14 498 int num_args = 0;
John@14 499 for(size_t i=0; i<args.size(); i++) {
John@14 500 if(args[i].type != ARGTYPE_NONE) {
John@14 501 num_args++;
John@14 502 }
John@14 503 }
John@14 504 return num_args;
John@14 505 }
John@14 506
nuclear@45 507 bool CLProgram::build(const char *opt)
nuclear@0 508 {
nuclear@2 509 int err;
nuclear@43 510 if((err = clBuildProgram(prog, 0, 0, opt, 0, 0)) != 0) {
nuclear@2 511 size_t sz;
nuclear@2 512 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
nuclear@0 513
nuclear@2 514 char *errlog = (char*)alloca(sz + 1);
nuclear@2 515 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
nuclear@8 516 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
nuclear@2 517
nuclear@0 518 clReleaseProgram(prog);
nuclear@0 519 prog = 0;
nuclear@0 520 return false;
nuclear@0 521 }
nuclear@0 522
nuclear@0 523
nuclear@0 524 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
nuclear@0 525 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
nuclear@0 526 clReleaseProgram(prog);
nuclear@0 527 prog = 0;
nuclear@0 528 return false;
nuclear@0 529 }
nuclear@0 530
nuclear@1 531 for(size_t i=0; i<args.size(); i++) {
nuclear@1 532 int err;
nuclear@0 533
nuclear@1 534 if(args[i].type == ARGTYPE_NONE) {
nuclear@1 535 break;
nuclear@1 536 }
nuclear@1 537
nuclear@1 538 switch(args[i].type) {
nuclear@1 539 case ARGTYPE_INT:
nuclear@1 540 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
nuclear@8 541 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 542 goto fail;
nuclear@1 543 }
nuclear@1 544 break;
nuclear@1 545
nuclear@1 546 case ARGTYPE_FLOAT:
nuclear@1 547 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
nuclear@8 548 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 549 goto fail;
nuclear@1 550 }
nuclear@1 551 break;
nuclear@1 552
nuclear@1 553 case ARGTYPE_MEM_BUF:
nuclear@1 554 {
nuclear@1 555 CLMemBuffer *mbuf = args[i].v.mbuf;
nuclear@1 556
nuclear@1 557 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
nuclear@8 558 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 559 goto fail;
nuclear@1 560 }
nuclear@1 561 }
nuclear@1 562 break;
nuclear@1 563
nuclear@1 564 default:
nuclear@1 565 break;
nuclear@0 566 }
nuclear@0 567 }
nuclear@0 568
nuclear@0 569 built = true;
nuclear@0 570 return true;
nuclear@1 571
nuclear@1 572 fail:
nuclear@1 573 clReleaseProgram(prog);
nuclear@1 574 clReleaseKernel(kernel);
nuclear@1 575 prog = 0;
nuclear@1 576 kernel = 0;
nuclear@1 577 return false;
nuclear@0 578 }
nuclear@0 579
nuclear@0 580 bool CLProgram::run() const
nuclear@0 581 {
nuclear@0 582 return run(1, 1);
nuclear@0 583 }
nuclear@0 584
nuclear@0 585 bool CLProgram::run(int dim, ...) const
nuclear@0 586 {
nuclear@0 587 va_list ap;
nuclear@0 588 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
nuclear@0 589
nuclear@0 590 va_start(ap, dim);
nuclear@0 591 for(int i=0; i<dim; i++) {
nuclear@0 592 global_size[i] = va_arg(ap, int);
nuclear@0 593 }
nuclear@0 594 va_end(ap);
nuclear@0 595
nuclear@39 596 if(last_event) {
nuclear@39 597 clReleaseEvent(last_event);
nuclear@39 598 }
nuclear@39 599
nuclear@0 600 int err;
nuclear@39 601 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
nuclear@39 602 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
nuclear@8 603 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
nuclear@0 604 return false;
nuclear@0 605 }
nuclear@32 606
nuclear@39 607 if(wait_event) {
nuclear@39 608 clReleaseEvent(wait_event);
nuclear@39 609 wait_event = 0;
nuclear@39 610 }
nuclear@0 611 return true;
nuclear@0 612 }
nuclear@0 613
nuclear@39 614 void CLProgram::set_wait_event(cl_event ev)
nuclear@39 615 {
nuclear@39 616 if(wait_event) {
nuclear@39 617 clReleaseEvent(wait_event);
nuclear@39 618 }
nuclear@39 619 wait_event = ev;
nuclear@39 620 }
nuclear@39 621
nuclear@39 622 cl_event CLProgram::get_last_event() const
nuclear@39 623 {
nuclear@39 624 return last_event;
nuclear@39 625 }
nuclear@39 626
nuclear@0 627 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
nuclear@0 628 {
nuclear@8 629 unsigned int i, j, num_dev, num_plat, sel, ret;
nuclear@0 630 cl_device_id dev[32];
nuclear@8 631 cl_platform_id plat[32];
nuclear@0 632
nuclear@0 633 dev_inf->work_item_sizes = 0;
nuclear@0 634
nuclear@8 635 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
nuclear@8 636 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
nuclear@8 637 return -1;
nuclear@8 638 }
nuclear@8 639 if(!num_plat) {
nuclear@8 640 fprintf(stderr, "OpenCL not available!\n");
nuclear@8 641 return -1;
nuclear@8 642 }
nuclear@0 643
nuclear@8 644 for(i=0; i<num_plat; i++) {
nuclear@8 645 char buf[512];
nuclear@8 646
nuclear@8 647 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
nuclear@8 648 printf("[%d]: %s", i, buf);
nuclear@8 649 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
nuclear@8 650 printf(", %s", buf);
nuclear@8 651 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
nuclear@8 652 printf(" (%s)\n", buf);
nuclear@8 653 }
nuclear@8 654
nuclear@8 655 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
nuclear@8 656 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
nuclear@8 657 return -1;
nuclear@8 658 }
nuclear@0 659 printf("found %d cl devices.\n", num_dev);
nuclear@0 660
nuclear@0 661 for(i=0; i<num_dev; i++) {
nuclear@0 662 struct device_info di;
nuclear@0 663
nuclear@0 664 if(get_dev_info(dev[i], &di) == -1) {
nuclear@42 665 destroy_dev_info(&di);
nuclear@0 666 return -1;
nuclear@0 667 }
nuclear@0 668
nuclear@0 669 printf("--> device %u (%s)\n", i, devtypestr(di.type));
nuclear@0 670 printf("max compute units: %u\n", di.units);
nuclear@0 671 printf("max clock frequency: %u\n", di.clock);
nuclear@0 672 printf("max work item dimensions: %u\n", di.dim);
nuclear@0 673
nuclear@0 674 printf("max work item sizes: ");
nuclear@0 675 for(j=0; j<di.dim; j++) {
nuclear@0 676 printf("%u", (unsigned int)di.work_item_sizes[j]);
nuclear@0 677 if(di.dim - j > 1) {
nuclear@0 678 printf(", ");
nuclear@0 679 }
nuclear@0 680 }
nuclear@0 681 putchar('\n');
nuclear@0 682
nuclear@0 683 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
nuclear@0 684 printf("max object allocation size: ");
nuclear@0 685 print_memsize(stdout, di.mem_size);
nuclear@0 686 putchar('\n');
nuclear@0 687
nuclear@42 688 printf("extensions: %s\n", di.extensions);
nuclear@42 689
nuclear@0 690 if(devcmp(&di, dev_inf) > 0) {
nuclear@0 691 free(dev_inf->work_item_sizes);
nuclear@0 692 memcpy(dev_inf, &di, sizeof di);
nuclear@0 693 sel = i;
nuclear@0 694 }
nuclear@0 695 }
nuclear@0 696
nuclear@0 697 if(num_dev) {
nuclear@0 698 printf("\nusing device: %d\n", sel);
nuclear@0 699 return 0;
nuclear@0 700 }
nuclear@0 701
nuclear@0 702 return -1;
nuclear@0 703 }
nuclear@0 704
nuclear@0 705 static int get_dev_info(cl_device_id dev, struct device_info *di)
nuclear@0 706 {
nuclear@0 707 di->id = dev;
nuclear@0 708
nuclear@0 709 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
nuclear@0 710 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
nuclear@0 711 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
nuclear@0 712 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
nuclear@0 713
nuclear@0 714 di->work_item_sizes = new size_t[di->dim];
nuclear@0 715
nuclear@0 716 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
nuclear@0 717 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
nuclear@0 718 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
nuclear@0 719
nuclear@42 720 size_t ext_str_len;
nuclear@42 721 clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, 0, &ext_str_len);
nuclear@42 722
nuclear@42 723 di->extensions = new char[ext_str_len + 1];
nuclear@42 724 clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, ext_str_len, di->extensions, 0);
nuclear@42 725 di->extensions[ext_str_len] = 0;
nuclear@42 726
nuclear@42 727 if(strstr(di->extensions, "cl_khr_gl_sharing") || strstr(di->extensions, "cl_APPLE_gl_sharing")) {
nuclear@42 728 di->gl_sharing = true;
nuclear@42 729 } else {
nuclear@42 730 di->gl_sharing = false;
nuclear@42 731 }
nuclear@42 732
nuclear@0 733 return 0;
nuclear@0 734 }
nuclear@0 735
nuclear@42 736 static void destroy_dev_info(struct device_info *di)
nuclear@42 737 {
nuclear@42 738 delete [] di->work_item_sizes;
nuclear@42 739 delete [] di->extensions;
nuclear@42 740 }
nuclear@42 741
nuclear@0 742 static int devcmp(struct device_info *a, struct device_info *b)
nuclear@0 743 {
nuclear@0 744 unsigned int aval = a->units * a->clock;
nuclear@0 745 unsigned int bval = b->units * b->clock;
nuclear@0 746
nuclear@0 747 return aval - bval;
nuclear@0 748 }
nuclear@0 749
nuclear@0 750 static const char *devtypestr(cl_device_type type)
nuclear@0 751 {
nuclear@0 752 switch(type) {
nuclear@0 753 case CL_DEVICE_TYPE_CPU:
nuclear@0 754 return "cpu";
nuclear@0 755 case CL_DEVICE_TYPE_GPU:
nuclear@0 756 return "gpu";
nuclear@0 757 case CL_DEVICE_TYPE_ACCELERATOR:
nuclear@0 758 return "accelerator";
nuclear@0 759 default:
nuclear@0 760 break;
nuclear@0 761 }
nuclear@0 762 return "unknown";
nuclear@0 763 }
nuclear@0 764
nuclear@0 765 static void print_memsize(FILE *out, unsigned long bytes)
nuclear@0 766 {
nuclear@0 767 int i;
nuclear@0 768 unsigned long memsz = bytes;
nuclear@0 769 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
nuclear@0 770
nuclear@0 771 for(i=0; suffix[i]; i++) {
nuclear@0 772 if(memsz < 1024) {
nuclear@0 773 fprintf(out, "%lu %s", memsz, suffix[i]);
nuclear@0 774 if(i > 0) {
nuclear@0 775 fprintf(out, " (%lu bytes)", bytes);
nuclear@0 776 }
nuclear@0 777 return;
nuclear@0 778 }
nuclear@0 779
nuclear@0 780 memsz /= 1024;
nuclear@0 781 }
nuclear@0 782 }
nuclear@8 783
nuclear@8 784 static const char *clstrerror(int err)
nuclear@8 785 {
nuclear@8 786 if(err > 0) {
nuclear@8 787 return "<invalid error code>";
nuclear@8 788 }
nuclear@8 789 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
nuclear@8 790 return "<unknown error>";
nuclear@8 791 }
nuclear@8 792 return ocl_errstr[-err];
nuclear@8 793 }