clray

view src/ocl.cc @ 8:deaf85acf6af

interactive spheres
author John Tsiombikas <nuclear@member.fsf.org>
date Fri, 23 Jul 2010 19:48:43 +0100
parents 41d6253492ad
children d9a1bab1c3f5
line source
1 #define OCL_CC_
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdarg.h>
7 #include <errno.h>
8 #include <alloca.h>
9 #include <sys/stat.h>
10 #include "ocl.h"
11 #include "ocl_errstr.h"
14 class InitCL {
15 public:
16 InitCL();
17 };
19 struct device_info {
20 cl_device_id id;
21 cl_device_type type;
22 unsigned int units;
23 unsigned int clock;
25 unsigned int dim;
26 size_t *work_item_sizes;
27 size_t work_group_size;
29 unsigned long mem_size;
30 };
32 static bool init_opencl(void);
33 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
34 static int get_dev_info(cl_device_id dev, struct device_info *di);
35 static int devcmp(struct device_info *a, struct device_info *b);
36 static const char *devtypestr(cl_device_type type);
37 static void print_memsize(FILE *out, unsigned long memsz);
38 static const char *clstrerror(int err);
41 static InitCL initcl;
42 static cl_context ctx;
43 static cl_command_queue cmdq;
44 static device_info devinf;
46 InitCL::InitCL()
47 {
48 if(!init_opencl()) {
49 exit(0);
50 }
51 }
53 static bool init_opencl(void)
54 {
55 if(select_device(&devinf, devcmp) == -1) {
56 return false;
57 }
60 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
61 fprintf(stderr, "failed to create opencl context\n");
62 return false;
63 }
65 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
66 fprintf(stderr, "failed to create command queue\n");
67 return false;
68 }
69 return true;
70 }
73 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
74 {
75 int err;
76 cl_mem mem;
79 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
80 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
81 return 0;
82 }
84 CLMemBuffer *mbuf = new CLMemBuffer;
85 mbuf->mem = mem;
86 mbuf->size = sz;
87 return mbuf;
88 }
90 void destroy_mem_buffer(CLMemBuffer *mbuf)
91 {
92 if(mbuf) {
94 clReleaseMemObject(mbuf->mem);
95 delete mbuf;
96 }
97 }
99 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
100 {
101 if(!mbuf) return 0;
103 int err;
104 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
105 if(!mbuf->ptr) {
106 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
107 return 0;
108 }
109 return mbuf->ptr;
110 }
112 void unmap_mem_buffer(CLMemBuffer *mbuf)
113 {
114 if(!mbuf || !mbuf->ptr) return;
115 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
116 }
118 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
119 {
120 if(!mbuf) return false;
122 int err;
123 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
124 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
125 return false;
126 }
127 return true;
128 }
130 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
131 {
132 if(!mbuf) return false;
134 int err;
135 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
136 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
137 return false;
138 }
139 return true;
140 }
143 CLProgram::CLProgram(const char *kname)
144 {
145 prog = 0;
146 kernel = 0;
147 this->kname = kname;
148 args.resize(16);
149 built = false;
150 }
152 CLProgram::~CLProgram()
153 {
154 if(prog) {
156 clReleaseProgram(prog);
157 }
158 if(kernel) {
160 clReleaseKernel(kernel);
161 }
162 for(size_t i=0; i<args.size(); i++) {
163 if(args[i].type == ARGTYPE_MEM_BUF) {
164 destroy_mem_buffer(args[i].v.mbuf);
165 }
166 }
167 }
169 bool CLProgram::load(const char *fname)
170 {
171 FILE *fp;
172 char *src;
173 struct stat st;
175 printf("loading opencl program (%s)\n", fname);
177 if(!(fp = fopen(fname, "rb"))) {
178 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
179 return false;
180 }
182 fstat(fileno(fp), &st);
184 src = new char[st.st_size + 1];
186 fread(src, 1, st.st_size, fp);
187 src[st.st_size] = 0;
188 fclose(fp);
191 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
192 fprintf(stderr, "error creating program object: %s\n", fname);
193 delete [] src;
194 return false;
195 }
196 delete [] src;
197 return true;
198 }
200 bool CLProgram::set_argi(int idx, int val)
201 {
202 if((int)args.size() <= idx) {
203 args.resize(idx + 1);
204 }
206 CLArg *arg = &args[idx];
207 arg->type = ARGTYPE_INT;
208 arg->v.ival = val;
209 return true;
210 }
212 bool CLProgram::set_argf(int idx, float val)
213 {
214 if((int)args.size() <= idx) {
215 args.resize(idx + 1);
216 }
218 CLArg *arg = &args[idx];
219 arg->type = ARGTYPE_FLOAT;
220 arg->v.fval = val;
221 return true;
222 }
224 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
225 {
226 CLMemBuffer *buf;
228 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
229 return false;
230 }
232 if((int)args.size() <= idx) {
233 args.resize(idx + 1);
234 }
235 args[idx].type = ARGTYPE_MEM_BUF;
236 args[idx].v.mbuf = buf;
237 return true;
238 }
240 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
241 {
242 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
243 return 0;
244 }
245 return args[arg].v.mbuf;
246 }
248 bool CLProgram::build()
249 {
250 int err;
252 if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) {
253 size_t sz;
254 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
256 char *errlog = (char*)alloca(sz + 1);
257 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
258 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
260 clReleaseProgram(prog);
261 prog = 0;
262 return false;
263 }
266 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
267 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
268 clReleaseProgram(prog);
269 prog = 0;
270 return false;
271 }
273 for(size_t i=0; i<args.size(); i++) {
274 int err;
276 if(args[i].type == ARGTYPE_NONE) {
277 break;
278 }
280 switch(args[i].type) {
281 case ARGTYPE_INT:
282 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
283 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
284 goto fail;
285 }
286 break;
288 case ARGTYPE_FLOAT:
289 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
290 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
291 goto fail;
292 }
293 break;
295 case ARGTYPE_MEM_BUF:
296 {
297 CLMemBuffer *mbuf = args[i].v.mbuf;
299 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
300 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
301 goto fail;
302 }
303 }
304 break;
306 default:
307 break;
308 }
309 }
311 built = true;
312 return true;
314 fail:
315 clReleaseProgram(prog);
316 clReleaseKernel(kernel);
317 prog = 0;
318 kernel = 0;
319 return false;
320 }
322 bool CLProgram::run() const
323 {
324 return run(1, 1);
325 }
327 bool CLProgram::run(int dim, ...) const
328 {
329 if(!built) {
330 if(!((CLProgram*)this)->build()) {
331 return false;
332 }
333 }
335 va_list ap;
336 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
338 va_start(ap, dim);
339 for(int i=0; i<dim; i++) {
340 global_size[i] = va_arg(ap, int);
341 }
342 va_end(ap);
344 int err;
345 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
346 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
347 return false;
348 }
349 return true;
350 }
352 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
353 {
354 unsigned int i, j, num_dev, num_plat, sel, ret;
355 cl_device_id dev[32];
356 cl_platform_id plat[32];
358 dev_inf->work_item_sizes = 0;
360 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
361 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
362 return -1;
363 }
364 if(!num_plat) {
365 fprintf(stderr, "OpenCL not available!\n");
366 return -1;
367 }
369 for(i=0; i<num_plat; i++) {
370 char buf[512];
372 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
373 printf("[%d]: %s", i, buf);
374 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
375 printf(", %s", buf);
376 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
377 printf(" (%s)\n", buf);
378 }
380 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
381 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
382 return -1;
383 }
384 printf("found %d cl devices.\n", num_dev);
386 for(i=0; i<num_dev; i++) {
387 struct device_info di;
389 if(get_dev_info(dev[i], &di) == -1) {
390 free(dev_inf->work_item_sizes);
391 return -1;
392 }
394 printf("--> device %u (%s)\n", i, devtypestr(di.type));
395 printf("max compute units: %u\n", di.units);
396 printf("max clock frequency: %u\n", di.clock);
397 printf("max work item dimensions: %u\n", di.dim);
399 printf("max work item sizes: ");
400 for(j=0; j<di.dim; j++) {
401 printf("%u", (unsigned int)di.work_item_sizes[j]);
402 if(di.dim - j > 1) {
403 printf(", ");
404 }
405 }
406 putchar('\n');
408 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
409 printf("max object allocation size: ");
410 print_memsize(stdout, di.mem_size);
411 putchar('\n');
413 if(devcmp(&di, dev_inf) > 0) {
414 free(dev_inf->work_item_sizes);
415 memcpy(dev_inf, &di, sizeof di);
416 sel = i;
417 }
418 }
420 if(num_dev) {
421 printf("\nusing device: %d\n", sel);
422 return 0;
423 }
425 return -1;
426 }
428 static int get_dev_info(cl_device_id dev, struct device_info *di)
429 {
430 di->id = dev;
433 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
434 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
435 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
436 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
438 di->work_item_sizes = new size_t[di->dim];
440 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
441 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
442 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
444 return 0;
445 }
447 static int devcmp(struct device_info *a, struct device_info *b)
448 {
449 unsigned int aval = a->units * a->clock;
450 unsigned int bval = b->units * b->clock;
452 return aval - bval;
453 }
455 static const char *devtypestr(cl_device_type type)
456 {
457 switch(type) {
458 case CL_DEVICE_TYPE_CPU:
459 return "cpu";
460 case CL_DEVICE_TYPE_GPU:
461 return "gpu";
462 case CL_DEVICE_TYPE_ACCELERATOR:
463 return "accelerator";
464 default:
465 break;
466 }
467 return "unknown";
468 }
470 static void print_memsize(FILE *out, unsigned long bytes)
471 {
472 int i;
473 unsigned long memsz = bytes;
474 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
476 for(i=0; suffix[i]; i++) {
477 if(memsz < 1024) {
478 fprintf(out, "%lu %s", memsz, suffix[i]);
479 if(i > 0) {
480 fprintf(out, " (%lu bytes)", bytes);
481 }
482 return;
483 }
485 memsz /= 1024;
486 }
487 }
489 static const char *clstrerror(int err)
490 {
491 if(err > 0) {
492 return "<invalid error code>";
493 }
494 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
495 return "<unknown error>";
496 }
497 return ocl_errstr[-err];
498 }