clray

annotate src/ocl.cc @ 8:deaf85acf6af

interactive spheres
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 23 Jul 2010 19:48:43 +0100
parents 41d6253492ad
children d9a1bab1c3f5
rev   line source
nuclear@8 1 #define OCL_CC_
nuclear@8 2
nuclear@0 3 #include <stdio.h>
nuclear@0 4 #include <stdlib.h>
nuclear@0 5 #include <string.h>
nuclear@8 6 #include <stdarg.h>
nuclear@0 7 #include <errno.h>
nuclear@0 8 #include <alloca.h>
nuclear@0 9 #include <sys/stat.h>
nuclear@0 10 #include "ocl.h"
nuclear@8 11 #include "ocl_errstr.h"
nuclear@0 12
nuclear@0 13
nuclear@0 14 class InitCL {
nuclear@0 15 public:
nuclear@0 16 InitCL();
nuclear@0 17 };
nuclear@0 18
nuclear@0 19 struct device_info {
nuclear@0 20 cl_device_id id;
nuclear@0 21 cl_device_type type;
nuclear@0 22 unsigned int units;
nuclear@0 23 unsigned int clock;
nuclear@0 24
nuclear@0 25 unsigned int dim;
nuclear@0 26 size_t *work_item_sizes;
nuclear@0 27 size_t work_group_size;
nuclear@0 28
nuclear@0 29 unsigned long mem_size;
nuclear@0 30 };
nuclear@0 31
nuclear@0 32 static bool init_opencl(void);
nuclear@0 33 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
nuclear@0 34 static int get_dev_info(cl_device_id dev, struct device_info *di);
nuclear@0 35 static int devcmp(struct device_info *a, struct device_info *b);
nuclear@0 36 static const char *devtypestr(cl_device_type type);
nuclear@0 37 static void print_memsize(FILE *out, unsigned long memsz);
nuclear@8 38 static const char *clstrerror(int err);
nuclear@0 39
nuclear@0 40
nuclear@0 41 static InitCL initcl;
nuclear@0 42 static cl_context ctx;
nuclear@0 43 static cl_command_queue cmdq;
nuclear@0 44 static device_info devinf;
nuclear@0 45
nuclear@0 46 InitCL::InitCL()
nuclear@0 47 {
nuclear@0 48 if(!init_opencl()) {
nuclear@0 49 exit(0);
nuclear@0 50 }
nuclear@0 51 }
nuclear@0 52
nuclear@0 53 static bool init_opencl(void)
nuclear@0 54 {
nuclear@0 55 if(select_device(&devinf, devcmp) == -1) {
nuclear@0 56 return false;
nuclear@0 57 }
nuclear@0 58
nuclear@0 59
nuclear@0 60 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
nuclear@0 61 fprintf(stderr, "failed to create opencl context\n");
nuclear@0 62 return false;
nuclear@0 63 }
nuclear@0 64
nuclear@0 65 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
nuclear@0 66 fprintf(stderr, "failed to create command queue\n");
nuclear@0 67 return false;
nuclear@0 68 }
nuclear@0 69 return true;
nuclear@0 70 }
nuclear@0 71
nuclear@0 72
nuclear@0 73 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
nuclear@0 74 {
nuclear@0 75 int err;
nuclear@0 76 cl_mem mem;
nuclear@0 77
nuclear@0 78
nuclear@0 79 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
nuclear@8 80 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
nuclear@0 81 return 0;
nuclear@0 82 }
nuclear@0 83
nuclear@0 84 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@0 85 mbuf->mem = mem;
nuclear@0 86 mbuf->size = sz;
nuclear@0 87 return mbuf;
nuclear@0 88 }
nuclear@0 89
nuclear@0 90 void destroy_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 91 {
nuclear@0 92 if(mbuf) {
nuclear@0 93
nuclear@0 94 clReleaseMemObject(mbuf->mem);
nuclear@0 95 delete mbuf;
nuclear@0 96 }
nuclear@0 97 }
nuclear@0 98
nuclear@0 99 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
nuclear@0 100 {
nuclear@0 101 if(!mbuf) return 0;
nuclear@0 102
nuclear@0 103 int err;
nuclear@0 104 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
nuclear@0 105 if(!mbuf->ptr) {
nuclear@8 106 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
nuclear@0 107 return 0;
nuclear@0 108 }
nuclear@0 109 return mbuf->ptr;
nuclear@0 110 }
nuclear@0 111
nuclear@0 112 void unmap_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 113 {
nuclear@0 114 if(!mbuf || !mbuf->ptr) return;
nuclear@0 115 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
nuclear@0 116 }
nuclear@0 117
nuclear@0 118 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
nuclear@0 119 {
nuclear@0 120 if(!mbuf) return false;
nuclear@0 121
nuclear@0 122 int err;
nuclear@0 123 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
nuclear@8 124 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
nuclear@0 125 return false;
nuclear@0 126 }
nuclear@0 127 return true;
nuclear@0 128 }
nuclear@0 129
nuclear@0 130 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
nuclear@0 131 {
nuclear@0 132 if(!mbuf) return false;
nuclear@0 133
nuclear@0 134 int err;
nuclear@0 135 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
nuclear@8 136 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
nuclear@0 137 return false;
nuclear@0 138 }
nuclear@0 139 return true;
nuclear@0 140 }
nuclear@0 141
nuclear@0 142
nuclear@0 143 CLProgram::CLProgram(const char *kname)
nuclear@0 144 {
nuclear@0 145 prog = 0;
nuclear@0 146 kernel = 0;
nuclear@0 147 this->kname = kname;
nuclear@1 148 args.resize(16);
nuclear@0 149 built = false;
nuclear@0 150 }
nuclear@0 151
nuclear@0 152 CLProgram::~CLProgram()
nuclear@0 153 {
nuclear@0 154 if(prog) {
nuclear@0 155
nuclear@0 156 clReleaseProgram(prog);
nuclear@0 157 }
nuclear@0 158 if(kernel) {
nuclear@0 159
nuclear@0 160 clReleaseKernel(kernel);
nuclear@0 161 }
nuclear@1 162 for(size_t i=0; i<args.size(); i++) {
nuclear@1 163 if(args[i].type == ARGTYPE_MEM_BUF) {
nuclear@1 164 destroy_mem_buffer(args[i].v.mbuf);
nuclear@0 165 }
nuclear@0 166 }
nuclear@0 167 }
nuclear@0 168
nuclear@0 169 bool CLProgram::load(const char *fname)
nuclear@0 170 {
nuclear@0 171 FILE *fp;
nuclear@0 172 char *src;
nuclear@0 173 struct stat st;
nuclear@0 174
nuclear@0 175 printf("loading opencl program (%s)\n", fname);
nuclear@0 176
nuclear@0 177 if(!(fp = fopen(fname, "rb"))) {
nuclear@0 178 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
nuclear@0 179 return false;
nuclear@0 180 }
nuclear@0 181
nuclear@0 182 fstat(fileno(fp), &st);
nuclear@0 183
nuclear@0 184 src = new char[st.st_size + 1];
nuclear@0 185
nuclear@0 186 fread(src, 1, st.st_size, fp);
nuclear@0 187 src[st.st_size] = 0;
nuclear@0 188 fclose(fp);
nuclear@0 189
nuclear@0 190
nuclear@0 191 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
nuclear@0 192 fprintf(stderr, "error creating program object: %s\n", fname);
nuclear@0 193 delete [] src;
nuclear@0 194 return false;
nuclear@0 195 }
nuclear@0 196 delete [] src;
nuclear@0 197 return true;
nuclear@0 198 }
nuclear@0 199
nuclear@1 200 bool CLProgram::set_argi(int idx, int val)
nuclear@1 201 {
nuclear@1 202 if((int)args.size() <= idx) {
nuclear@1 203 args.resize(idx + 1);
nuclear@1 204 }
nuclear@1 205
nuclear@1 206 CLArg *arg = &args[idx];
nuclear@1 207 arg->type = ARGTYPE_INT;
nuclear@1 208 arg->v.ival = val;
nuclear@1 209 return true;
nuclear@1 210 }
nuclear@1 211
nuclear@1 212 bool CLProgram::set_argf(int idx, float val)
nuclear@1 213 {
nuclear@1 214 if((int)args.size() <= idx) {
nuclear@1 215 args.resize(idx + 1);
nuclear@1 216 }
nuclear@1 217
nuclear@1 218 CLArg *arg = &args[idx];
nuclear@1 219 arg->type = ARGTYPE_FLOAT;
nuclear@1 220 arg->v.fval = val;
nuclear@1 221 return true;
nuclear@1 222 }
nuclear@1 223
nuclear@1 224 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
nuclear@0 225 {
nuclear@0 226 CLMemBuffer *buf;
nuclear@0 227
nuclear@0 228 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
nuclear@0 229 return false;
nuclear@0 230 }
nuclear@0 231
nuclear@1 232 if((int)args.size() <= idx) {
nuclear@1 233 args.resize(idx + 1);
nuclear@0 234 }
nuclear@1 235 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@1 236 args[idx].v.mbuf = buf;
nuclear@0 237 return true;
nuclear@0 238 }
nuclear@0 239
nuclear@0 240 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
nuclear@0 241 {
nuclear@1 242 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
nuclear@0 243 return 0;
nuclear@0 244 }
nuclear@1 245 return args[arg].v.mbuf;
nuclear@0 246 }
nuclear@0 247
nuclear@0 248 bool CLProgram::build()
nuclear@0 249 {
nuclear@2 250 int err;
nuclear@0 251
nuclear@2 252 if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) {
nuclear@2 253 size_t sz;
nuclear@2 254 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
nuclear@0 255
nuclear@2 256 char *errlog = (char*)alloca(sz + 1);
nuclear@2 257 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
nuclear@8 258 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
nuclear@2 259
nuclear@0 260 clReleaseProgram(prog);
nuclear@0 261 prog = 0;
nuclear@0 262 return false;
nuclear@0 263 }
nuclear@0 264
nuclear@0 265
nuclear@0 266 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
nuclear@0 267 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
nuclear@0 268 clReleaseProgram(prog);
nuclear@0 269 prog = 0;
nuclear@0 270 return false;
nuclear@0 271 }
nuclear@0 272
nuclear@1 273 for(size_t i=0; i<args.size(); i++) {
nuclear@1 274 int err;
nuclear@0 275
nuclear@1 276 if(args[i].type == ARGTYPE_NONE) {
nuclear@1 277 break;
nuclear@1 278 }
nuclear@1 279
nuclear@1 280 switch(args[i].type) {
nuclear@1 281 case ARGTYPE_INT:
nuclear@1 282 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
nuclear@8 283 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 284 goto fail;
nuclear@1 285 }
nuclear@1 286 break;
nuclear@1 287
nuclear@1 288 case ARGTYPE_FLOAT:
nuclear@1 289 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
nuclear@8 290 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 291 goto fail;
nuclear@1 292 }
nuclear@1 293 break;
nuclear@1 294
nuclear@1 295 case ARGTYPE_MEM_BUF:
nuclear@1 296 {
nuclear@1 297 CLMemBuffer *mbuf = args[i].v.mbuf;
nuclear@1 298
nuclear@1 299 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
nuclear@8 300 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
nuclear@1 301 goto fail;
nuclear@1 302 }
nuclear@1 303 }
nuclear@1 304 break;
nuclear@1 305
nuclear@1 306 default:
nuclear@1 307 break;
nuclear@0 308 }
nuclear@0 309 }
nuclear@0 310
nuclear@0 311 built = true;
nuclear@0 312 return true;
nuclear@1 313
nuclear@1 314 fail:
nuclear@1 315 clReleaseProgram(prog);
nuclear@1 316 clReleaseKernel(kernel);
nuclear@1 317 prog = 0;
nuclear@1 318 kernel = 0;
nuclear@1 319 return false;
nuclear@0 320 }
nuclear@0 321
nuclear@0 322 bool CLProgram::run() const
nuclear@0 323 {
nuclear@0 324 return run(1, 1);
nuclear@0 325 }
nuclear@0 326
nuclear@0 327 bool CLProgram::run(int dim, ...) const
nuclear@0 328 {
nuclear@0 329 if(!built) {
nuclear@0 330 if(!((CLProgram*)this)->build()) {
nuclear@0 331 return false;
nuclear@0 332 }
nuclear@0 333 }
nuclear@0 334
nuclear@0 335 va_list ap;
nuclear@0 336 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
nuclear@0 337
nuclear@0 338 va_start(ap, dim);
nuclear@0 339 for(int i=0; i<dim; i++) {
nuclear@0 340 global_size[i] = va_arg(ap, int);
nuclear@0 341 }
nuclear@0 342 va_end(ap);
nuclear@0 343
nuclear@0 344 int err;
nuclear@0 345 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
nuclear@8 346 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
nuclear@0 347 return false;
nuclear@0 348 }
nuclear@0 349 return true;
nuclear@0 350 }
nuclear@0 351
nuclear@0 352 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
nuclear@0 353 {
nuclear@8 354 unsigned int i, j, num_dev, num_plat, sel, ret;
nuclear@0 355 cl_device_id dev[32];
nuclear@8 356 cl_platform_id plat[32];
nuclear@0 357
nuclear@0 358 dev_inf->work_item_sizes = 0;
nuclear@0 359
nuclear@8 360 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
nuclear@8 361 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
nuclear@8 362 return -1;
nuclear@8 363 }
nuclear@8 364 if(!num_plat) {
nuclear@8 365 fprintf(stderr, "OpenCL not available!\n");
nuclear@8 366 return -1;
nuclear@8 367 }
nuclear@0 368
nuclear@8 369 for(i=0; i<num_plat; i++) {
nuclear@8 370 char buf[512];
nuclear@8 371
nuclear@8 372 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
nuclear@8 373 printf("[%d]: %s", i, buf);
nuclear@8 374 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
nuclear@8 375 printf(", %s", buf);
nuclear@8 376 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
nuclear@8 377 printf(" (%s)\n", buf);
nuclear@8 378 }
nuclear@8 379
nuclear@8 380 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
nuclear@8 381 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
nuclear@8 382 return -1;
nuclear@8 383 }
nuclear@0 384 printf("found %d cl devices.\n", num_dev);
nuclear@0 385
nuclear@0 386 for(i=0; i<num_dev; i++) {
nuclear@0 387 struct device_info di;
nuclear@0 388
nuclear@0 389 if(get_dev_info(dev[i], &di) == -1) {
nuclear@0 390 free(dev_inf->work_item_sizes);
nuclear@0 391 return -1;
nuclear@0 392 }
nuclear@0 393
nuclear@0 394 printf("--> device %u (%s)\n", i, devtypestr(di.type));
nuclear@0 395 printf("max compute units: %u\n", di.units);
nuclear@0 396 printf("max clock frequency: %u\n", di.clock);
nuclear@0 397 printf("max work item dimensions: %u\n", di.dim);
nuclear@0 398
nuclear@0 399 printf("max work item sizes: ");
nuclear@0 400 for(j=0; j<di.dim; j++) {
nuclear@0 401 printf("%u", (unsigned int)di.work_item_sizes[j]);
nuclear@0 402 if(di.dim - j > 1) {
nuclear@0 403 printf(", ");
nuclear@0 404 }
nuclear@0 405 }
nuclear@0 406 putchar('\n');
nuclear@0 407
nuclear@0 408 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
nuclear@0 409 printf("max object allocation size: ");
nuclear@0 410 print_memsize(stdout, di.mem_size);
nuclear@0 411 putchar('\n');
nuclear@0 412
nuclear@0 413 if(devcmp(&di, dev_inf) > 0) {
nuclear@0 414 free(dev_inf->work_item_sizes);
nuclear@0 415 memcpy(dev_inf, &di, sizeof di);
nuclear@0 416 sel = i;
nuclear@0 417 }
nuclear@0 418 }
nuclear@0 419
nuclear@0 420 if(num_dev) {
nuclear@0 421 printf("\nusing device: %d\n", sel);
nuclear@0 422 return 0;
nuclear@0 423 }
nuclear@0 424
nuclear@0 425 return -1;
nuclear@0 426 }
nuclear@0 427
nuclear@0 428 static int get_dev_info(cl_device_id dev, struct device_info *di)
nuclear@0 429 {
nuclear@0 430 di->id = dev;
nuclear@0 431
nuclear@0 432
nuclear@0 433 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
nuclear@0 434 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
nuclear@0 435 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
nuclear@0 436 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
nuclear@0 437
nuclear@0 438 di->work_item_sizes = new size_t[di->dim];
nuclear@0 439
nuclear@0 440 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
nuclear@0 441 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
nuclear@0 442 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
nuclear@0 443
nuclear@0 444 return 0;
nuclear@0 445 }
nuclear@0 446
nuclear@0 447 static int devcmp(struct device_info *a, struct device_info *b)
nuclear@0 448 {
nuclear@0 449 unsigned int aval = a->units * a->clock;
nuclear@0 450 unsigned int bval = b->units * b->clock;
nuclear@0 451
nuclear@0 452 return aval - bval;
nuclear@0 453 }
nuclear@0 454
nuclear@0 455 static const char *devtypestr(cl_device_type type)
nuclear@0 456 {
nuclear@0 457 switch(type) {
nuclear@0 458 case CL_DEVICE_TYPE_CPU:
nuclear@0 459 return "cpu";
nuclear@0 460 case CL_DEVICE_TYPE_GPU:
nuclear@0 461 return "gpu";
nuclear@0 462 case CL_DEVICE_TYPE_ACCELERATOR:
nuclear@0 463 return "accelerator";
nuclear@0 464 default:
nuclear@0 465 break;
nuclear@0 466 }
nuclear@0 467 return "unknown";
nuclear@0 468 }
nuclear@0 469
nuclear@0 470 static void print_memsize(FILE *out, unsigned long bytes)
nuclear@0 471 {
nuclear@0 472 int i;
nuclear@0 473 unsigned long memsz = bytes;
nuclear@0 474 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
nuclear@0 475
nuclear@0 476 for(i=0; suffix[i]; i++) {
nuclear@0 477 if(memsz < 1024) {
nuclear@0 478 fprintf(out, "%lu %s", memsz, suffix[i]);
nuclear@0 479 if(i > 0) {
nuclear@0 480 fprintf(out, " (%lu bytes)", bytes);
nuclear@0 481 }
nuclear@0 482 return;
nuclear@0 483 }
nuclear@0 484
nuclear@0 485 memsz /= 1024;
nuclear@0 486 }
nuclear@0 487 }
nuclear@8 488
nuclear@8 489 static const char *clstrerror(int err)
nuclear@8 490 {
nuclear@8 491 if(err > 0) {
nuclear@8 492 return "<invalid error code>";
nuclear@8 493 }
nuclear@8 494 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
nuclear@8 495 return "<unknown error>";
nuclear@8 496 }
nuclear@8 497 return ocl_errstr[-err];
nuclear@8 498 }