clray

annotate src/ocl.cc @ 1:0b0e4d18d53f

added non-buffer args
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 12 Jul 2010 07:00:19 +0300
parents 5767277e049f
children 41d6253492ad
rev   line source
nuclear@0 1 #include <stdio.h>
nuclear@0 2 #include <stdlib.h>
nuclear@0 3 #include <string.h>
nuclear@0 4 #include <errno.h>
nuclear@0 5 #include <alloca.h>
nuclear@0 6 #include <sys/stat.h>
nuclear@0 7 #include "ocl.h"
nuclear@0 8
nuclear@0 9
nuclear@0 10 class InitCL {
nuclear@0 11 public:
nuclear@0 12 InitCL();
nuclear@0 13 };
nuclear@0 14
nuclear@0 15 struct device_info {
nuclear@0 16 cl_device_id id;
nuclear@0 17 cl_device_type type;
nuclear@0 18 unsigned int units;
nuclear@0 19 unsigned int clock;
nuclear@0 20
nuclear@0 21 unsigned int dim;
nuclear@0 22 size_t *work_item_sizes;
nuclear@0 23 size_t work_group_size;
nuclear@0 24
nuclear@0 25 unsigned long mem_size;
nuclear@0 26 };
nuclear@0 27
nuclear@0 28 static bool init_opencl(void);
nuclear@0 29 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
nuclear@0 30 static int get_dev_info(cl_device_id dev, struct device_info *di);
nuclear@0 31 static int devcmp(struct device_info *a, struct device_info *b);
nuclear@0 32 static const char *devtypestr(cl_device_type type);
nuclear@0 33 static void print_memsize(FILE *out, unsigned long memsz);
nuclear@0 34
nuclear@0 35
nuclear@0 36 static InitCL initcl;
nuclear@0 37 static cl_context ctx;
nuclear@0 38 static cl_command_queue cmdq;
nuclear@0 39 static device_info devinf;
nuclear@0 40
nuclear@0 41 InitCL::InitCL()
nuclear@0 42 {
nuclear@0 43 if(!init_opencl()) {
nuclear@0 44 exit(0);
nuclear@0 45 }
nuclear@0 46 }
nuclear@0 47
nuclear@0 48 static bool init_opencl(void)
nuclear@0 49 {
nuclear@0 50 if(select_device(&devinf, devcmp) == -1) {
nuclear@0 51 return false;
nuclear@0 52 }
nuclear@0 53
nuclear@0 54
nuclear@0 55 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
nuclear@0 56 fprintf(stderr, "failed to create opencl context\n");
nuclear@0 57 return false;
nuclear@0 58 }
nuclear@0 59
nuclear@0 60 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
nuclear@0 61 fprintf(stderr, "failed to create command queue\n");
nuclear@0 62 return false;
nuclear@0 63 }
nuclear@0 64 return true;
nuclear@0 65 }
nuclear@0 66
nuclear@0 67
nuclear@0 68 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
nuclear@0 69 {
nuclear@0 70 int err;
nuclear@0 71 cl_mem mem;
nuclear@0 72
nuclear@0 73
nuclear@0 74 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
nuclear@0 75 fprintf(stderr, "failed to create memory buffer (%d)\n", err);
nuclear@0 76 return 0;
nuclear@0 77 }
nuclear@0 78
nuclear@0 79 CLMemBuffer *mbuf = new CLMemBuffer;
nuclear@0 80 mbuf->mem = mem;
nuclear@0 81 mbuf->size = sz;
nuclear@0 82 return mbuf;
nuclear@0 83 }
nuclear@0 84
nuclear@0 85 void destroy_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 86 {
nuclear@0 87 if(mbuf) {
nuclear@0 88
nuclear@0 89 clReleaseMemObject(mbuf->mem);
nuclear@0 90 delete mbuf;
nuclear@0 91 }
nuclear@0 92 }
nuclear@0 93
nuclear@0 94 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
nuclear@0 95 {
nuclear@0 96 if(!mbuf) return 0;
nuclear@0 97
nuclear@0 98 int err;
nuclear@0 99 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
nuclear@0 100 if(!mbuf->ptr) {
nuclear@0 101 fprintf(stderr, "failed to map buffer (%d)\n", err);
nuclear@0 102 return 0;
nuclear@0 103 }
nuclear@0 104 return mbuf->ptr;
nuclear@0 105 }
nuclear@0 106
nuclear@0 107 void unmap_mem_buffer(CLMemBuffer *mbuf)
nuclear@0 108 {
nuclear@0 109 if(!mbuf || !mbuf->ptr) return;
nuclear@0 110 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
nuclear@0 111 }
nuclear@0 112
nuclear@0 113 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
nuclear@0 114 {
nuclear@0 115 if(!mbuf) return false;
nuclear@0 116
nuclear@0 117 int err;
nuclear@0 118 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
nuclear@0 119 fprintf(stderr, "failed to write buffer (%d)\n", err);
nuclear@0 120 return false;
nuclear@0 121 }
nuclear@0 122 return true;
nuclear@0 123 }
nuclear@0 124
nuclear@0 125 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
nuclear@0 126 {
nuclear@0 127 if(!mbuf) return false;
nuclear@0 128
nuclear@0 129 int err;
nuclear@0 130 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
nuclear@0 131 fprintf(stderr, "failed to read buffer (%d)\n", err);
nuclear@0 132 return false;
nuclear@0 133 }
nuclear@0 134 return true;
nuclear@0 135 }
nuclear@0 136
nuclear@0 137
nuclear@0 138 CLProgram::CLProgram(const char *kname)
nuclear@0 139 {
nuclear@0 140 prog = 0;
nuclear@0 141 kernel = 0;
nuclear@0 142 this->kname = kname;
nuclear@1 143 args.resize(16);
nuclear@0 144 built = false;
nuclear@0 145 }
nuclear@0 146
nuclear@0 147 CLProgram::~CLProgram()
nuclear@0 148 {
nuclear@0 149 if(prog) {
nuclear@0 150
nuclear@0 151 clReleaseProgram(prog);
nuclear@0 152 }
nuclear@0 153 if(kernel) {
nuclear@0 154
nuclear@0 155 clReleaseKernel(kernel);
nuclear@0 156 }
nuclear@1 157 for(size_t i=0; i<args.size(); i++) {
nuclear@1 158 if(args[i].type == ARGTYPE_MEM_BUF) {
nuclear@1 159 destroy_mem_buffer(args[i].v.mbuf);
nuclear@0 160 }
nuclear@0 161 }
nuclear@0 162 }
nuclear@0 163
nuclear@0 164 bool CLProgram::load(const char *fname)
nuclear@0 165 {
nuclear@0 166 FILE *fp;
nuclear@0 167 char *src;
nuclear@0 168 struct stat st;
nuclear@0 169
nuclear@0 170 printf("loading opencl program (%s)\n", fname);
nuclear@0 171
nuclear@0 172 if(!(fp = fopen(fname, "rb"))) {
nuclear@0 173 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
nuclear@0 174 return false;
nuclear@0 175 }
nuclear@0 176
nuclear@0 177 fstat(fileno(fp), &st);
nuclear@0 178
nuclear@0 179 src = new char[st.st_size + 1];
nuclear@0 180
nuclear@0 181 fread(src, 1, st.st_size, fp);
nuclear@0 182 src[st.st_size] = 0;
nuclear@0 183 fclose(fp);
nuclear@0 184
nuclear@0 185
nuclear@0 186 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
nuclear@0 187 fprintf(stderr, "error creating program object: %s\n", fname);
nuclear@0 188 delete [] src;
nuclear@0 189 return false;
nuclear@0 190 }
nuclear@0 191 delete [] src;
nuclear@0 192 return true;
nuclear@0 193 }
nuclear@0 194
nuclear@1 195 bool CLProgram::set_argi(int idx, int val)
nuclear@1 196 {
nuclear@1 197 if((int)args.size() <= idx) {
nuclear@1 198 args.resize(idx + 1);
nuclear@1 199 }
nuclear@1 200
nuclear@1 201 CLArg *arg = &args[idx];
nuclear@1 202 arg->type = ARGTYPE_INT;
nuclear@1 203 arg->v.ival = val;
nuclear@1 204 return true;
nuclear@1 205 }
nuclear@1 206
nuclear@1 207 bool CLProgram::set_argf(int idx, float val)
nuclear@1 208 {
nuclear@1 209 if((int)args.size() <= idx) {
nuclear@1 210 args.resize(idx + 1);
nuclear@1 211 }
nuclear@1 212
nuclear@1 213 CLArg *arg = &args[idx];
nuclear@1 214 arg->type = ARGTYPE_FLOAT;
nuclear@1 215 arg->v.fval = val;
nuclear@1 216 return true;
nuclear@1 217 }
nuclear@1 218
nuclear@1 219 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
nuclear@0 220 {
nuclear@0 221 CLMemBuffer *buf;
nuclear@0 222
nuclear@0 223 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
nuclear@0 224 return false;
nuclear@0 225 }
nuclear@0 226
nuclear@1 227 if((int)args.size() <= idx) {
nuclear@1 228 args.resize(idx + 1);
nuclear@0 229 }
nuclear@1 230 args[idx].type = ARGTYPE_MEM_BUF;
nuclear@1 231 args[idx].v.mbuf = buf;
nuclear@0 232 return true;
nuclear@0 233 }
nuclear@0 234
nuclear@0 235 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
nuclear@0 236 {
nuclear@1 237 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
nuclear@0 238 return 0;
nuclear@0 239 }
nuclear@1 240 return args[arg].v.mbuf;
nuclear@0 241 }
nuclear@0 242
nuclear@0 243 bool CLProgram::build()
nuclear@0 244 {
nuclear@0 245 char errlog[512];
nuclear@0 246
nuclear@0 247
nuclear@0 248 if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) {
nuclear@0 249 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0);
nuclear@0 250 fprintf(stderr, "failed to build program:\n%s\n", errlog);
nuclear@0 251 clReleaseProgram(prog);
nuclear@0 252 prog = 0;
nuclear@0 253 return false;
nuclear@0 254 }
nuclear@0 255
nuclear@0 256
nuclear@0 257 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
nuclear@0 258 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
nuclear@0 259 clReleaseProgram(prog);
nuclear@0 260 prog = 0;
nuclear@0 261 return false;
nuclear@0 262 }
nuclear@0 263
nuclear@1 264 for(size_t i=0; i<args.size(); i++) {
nuclear@1 265 int err;
nuclear@0 266
nuclear@1 267 if(args[i].type == ARGTYPE_NONE) {
nuclear@1 268 break;
nuclear@1 269 }
nuclear@1 270
nuclear@1 271 switch(args[i].type) {
nuclear@1 272 case ARGTYPE_INT:
nuclear@1 273 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
nuclear@1 274 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
nuclear@1 275 goto fail;
nuclear@1 276 }
nuclear@1 277 break;
nuclear@1 278
nuclear@1 279 case ARGTYPE_FLOAT:
nuclear@1 280 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
nuclear@1 281 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
nuclear@1 282 goto fail;
nuclear@1 283 }
nuclear@1 284 break;
nuclear@1 285
nuclear@1 286 case ARGTYPE_MEM_BUF:
nuclear@1 287 {
nuclear@1 288 CLMemBuffer *mbuf = args[i].v.mbuf;
nuclear@1 289
nuclear@1 290 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
nuclear@1 291 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
nuclear@1 292 goto fail;
nuclear@1 293 }
nuclear@1 294 }
nuclear@1 295 break;
nuclear@1 296
nuclear@1 297 default:
nuclear@1 298 break;
nuclear@0 299 }
nuclear@0 300 }
nuclear@0 301
nuclear@0 302 built = true;
nuclear@0 303 return true;
nuclear@1 304
nuclear@1 305 fail:
nuclear@1 306 clReleaseProgram(prog);
nuclear@1 307 clReleaseKernel(kernel);
nuclear@1 308 prog = 0;
nuclear@1 309 kernel = 0;
nuclear@1 310 return false;
nuclear@0 311 }
nuclear@0 312
nuclear@0 313 bool CLProgram::run() const
nuclear@0 314 {
nuclear@0 315 return run(1, 1);
nuclear@0 316 }
nuclear@0 317
nuclear@0 318 bool CLProgram::run(int dim, ...) const
nuclear@0 319 {
nuclear@0 320 if(!built) {
nuclear@0 321 if(!((CLProgram*)this)->build()) {
nuclear@0 322 return false;
nuclear@0 323 }
nuclear@0 324 }
nuclear@0 325
nuclear@0 326 va_list ap;
nuclear@0 327 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
nuclear@0 328
nuclear@0 329 va_start(ap, dim);
nuclear@0 330 for(int i=0; i<dim; i++) {
nuclear@0 331 global_size[i] = va_arg(ap, int);
nuclear@0 332 }
nuclear@0 333 va_end(ap);
nuclear@0 334
nuclear@0 335 int err;
nuclear@0 336 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
nuclear@0 337 fprintf(stderr, "error executing kernel (%d)\n", err);
nuclear@0 338 return false;
nuclear@0 339 }
nuclear@0 340 return true;
nuclear@0 341 }
nuclear@0 342
nuclear@0 343 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
nuclear@0 344 {
nuclear@0 345 unsigned int i, j, num_dev, sel;
nuclear@0 346 cl_device_id dev[32];
nuclear@0 347
nuclear@0 348 dev_inf->work_item_sizes = 0;
nuclear@0 349
nuclear@0 350
nuclear@0 351 clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev);
nuclear@0 352 printf("found %d cl devices.\n", num_dev);
nuclear@0 353
nuclear@0 354 for(i=0; i<num_dev; i++) {
nuclear@0 355 struct device_info di;
nuclear@0 356
nuclear@0 357 if(get_dev_info(dev[i], &di) == -1) {
nuclear@0 358 free(dev_inf->work_item_sizes);
nuclear@0 359 return -1;
nuclear@0 360 }
nuclear@0 361
nuclear@0 362 printf("--> device %u (%s)\n", i, devtypestr(di.type));
nuclear@0 363 printf("max compute units: %u\n", di.units);
nuclear@0 364 printf("max clock frequency: %u\n", di.clock);
nuclear@0 365 printf("max work item dimensions: %u\n", di.dim);
nuclear@0 366
nuclear@0 367 printf("max work item sizes: ");
nuclear@0 368 for(j=0; j<di.dim; j++) {
nuclear@0 369 printf("%u", (unsigned int)di.work_item_sizes[j]);
nuclear@0 370 if(di.dim - j > 1) {
nuclear@0 371 printf(", ");
nuclear@0 372 }
nuclear@0 373 }
nuclear@0 374 putchar('\n');
nuclear@0 375
nuclear@0 376 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
nuclear@0 377 printf("max object allocation size: ");
nuclear@0 378 print_memsize(stdout, di.mem_size);
nuclear@0 379 putchar('\n');
nuclear@0 380
nuclear@0 381 if(devcmp(&di, dev_inf) > 0) {
nuclear@0 382 free(dev_inf->work_item_sizes);
nuclear@0 383 memcpy(dev_inf, &di, sizeof di);
nuclear@0 384 sel = i;
nuclear@0 385 }
nuclear@0 386 }
nuclear@0 387
nuclear@0 388 if(num_dev) {
nuclear@0 389 printf("\nusing device: %d\n", sel);
nuclear@0 390 return 0;
nuclear@0 391 }
nuclear@0 392
nuclear@0 393 return -1;
nuclear@0 394 }
nuclear@0 395
nuclear@0 396 static int get_dev_info(cl_device_id dev, struct device_info *di)
nuclear@0 397 {
nuclear@0 398 di->id = dev;
nuclear@0 399
nuclear@0 400
nuclear@0 401 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
nuclear@0 402 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
nuclear@0 403 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
nuclear@0 404 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
nuclear@0 405
nuclear@0 406 di->work_item_sizes = new size_t[di->dim];
nuclear@0 407
nuclear@0 408 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
nuclear@0 409 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
nuclear@0 410 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
nuclear@0 411
nuclear@0 412 return 0;
nuclear@0 413 }
nuclear@0 414
nuclear@0 415 static int devcmp(struct device_info *a, struct device_info *b)
nuclear@0 416 {
nuclear@0 417 unsigned int aval = a->units * a->clock;
nuclear@0 418 unsigned int bval = b->units * b->clock;
nuclear@0 419
nuclear@0 420 return aval - bval;
nuclear@0 421 }
nuclear@0 422
nuclear@0 423 static const char *devtypestr(cl_device_type type)
nuclear@0 424 {
nuclear@0 425 switch(type) {
nuclear@0 426 case CL_DEVICE_TYPE_CPU:
nuclear@0 427 return "cpu";
nuclear@0 428 case CL_DEVICE_TYPE_GPU:
nuclear@0 429 return "gpu";
nuclear@0 430 case CL_DEVICE_TYPE_ACCELERATOR:
nuclear@0 431 return "accelerator";
nuclear@0 432 default:
nuclear@0 433 break;
nuclear@0 434 }
nuclear@0 435 return "unknown";
nuclear@0 436 }
nuclear@0 437
nuclear@0 438 static void print_memsize(FILE *out, unsigned long bytes)
nuclear@0 439 {
nuclear@0 440 int i;
nuclear@0 441 unsigned long memsz = bytes;
nuclear@0 442 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
nuclear@0 443
nuclear@0 444 for(i=0; suffix[i]; i++) {
nuclear@0 445 if(memsz < 1024) {
nuclear@0 446 fprintf(out, "%lu %s", memsz, suffix[i]);
nuclear@0 447 if(i > 0) {
nuclear@0 448 fprintf(out, " (%lu bytes)", bytes);
nuclear@0 449 }
nuclear@0 450 return;
nuclear@0 451 }
nuclear@0 452
nuclear@0 453 memsz /= 1024;
nuclear@0 454 }
nuclear@0 455 }