clray

view src/ocl.cc @ 11:d9a1bab1c3f5

ported to windows
author John Tsiombikas
date Sat, 31 Jul 2010 22:23:57 +0100
parents deaf85acf6af
children 85fd61f374d9
line source
1 #define OCL_CC_
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <stdarg.h>
7 #include <errno.h>
8 #ifndef _MSC_VER
9 #include <alloca.h>
10 #else
11 #include <malloc.h>
12 #endif
13 #include <sys/stat.h>
14 #include "ocl.h"
15 #include "ocl_errstr.h"
18 class InitCL {
19 public:
20 InitCL();
21 };
23 struct device_info {
24 cl_device_id id;
25 cl_device_type type;
26 unsigned int units;
27 unsigned int clock;
29 unsigned int dim;
30 size_t *work_item_sizes;
31 size_t work_group_size;
33 unsigned long mem_size;
34 };
36 static bool init_opencl(void);
37 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
38 static int get_dev_info(cl_device_id dev, struct device_info *di);
39 static int devcmp(struct device_info *a, struct device_info *b);
40 static const char *devtypestr(cl_device_type type);
41 static void print_memsize(FILE *out, unsigned long memsz);
42 static const char *clstrerror(int err);
45 static InitCL initcl;
46 static cl_context ctx;
47 static cl_command_queue cmdq;
48 static device_info devinf;
50 InitCL::InitCL()
51 {
52 if(!init_opencl()) {
53 exit(0);
54 }
55 }
57 static bool init_opencl(void)
58 {
59 if(select_device(&devinf, devcmp) == -1) {
60 return false;
61 }
64 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
65 fprintf(stderr, "failed to create opencl context\n");
66 return false;
67 }
69 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
70 fprintf(stderr, "failed to create command queue\n");
71 return false;
72 }
73 return true;
74 }
77 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
78 {
79 int err;
80 cl_mem mem;
83 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
84 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
85 return 0;
86 }
88 CLMemBuffer *mbuf = new CLMemBuffer;
89 mbuf->mem = mem;
90 mbuf->size = sz;
91 return mbuf;
92 }
94 void destroy_mem_buffer(CLMemBuffer *mbuf)
95 {
96 if(mbuf) {
98 clReleaseMemObject(mbuf->mem);
99 delete mbuf;
100 }
101 }
103 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
104 {
105 if(!mbuf) return 0;
107 int err;
108 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
109 if(!mbuf->ptr) {
110 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
111 return 0;
112 }
113 return mbuf->ptr;
114 }
116 void unmap_mem_buffer(CLMemBuffer *mbuf)
117 {
118 if(!mbuf || !mbuf->ptr) return;
119 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
120 }
122 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
123 {
124 if(!mbuf) return false;
126 int err;
127 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
128 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
129 return false;
130 }
131 return true;
132 }
134 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
135 {
136 if(!mbuf) return false;
138 int err;
139 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
140 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
141 return false;
142 }
143 return true;
144 }
147 CLProgram::CLProgram(const char *kname)
148 {
149 prog = 0;
150 kernel = 0;
151 this->kname = kname;
152 args.resize(16);
153 built = false;
154 }
156 CLProgram::~CLProgram()
157 {
158 if(prog) {
160 clReleaseProgram(prog);
161 }
162 if(kernel) {
164 clReleaseKernel(kernel);
165 }
166 for(size_t i=0; i<args.size(); i++) {
167 if(args[i].type == ARGTYPE_MEM_BUF) {
168 destroy_mem_buffer(args[i].v.mbuf);
169 }
170 }
171 }
173 bool CLProgram::load(const char *fname)
174 {
175 FILE *fp;
176 char *src;
177 struct stat st;
179 printf("loading opencl program (%s)\n", fname);
181 if(!(fp = fopen(fname, "rb"))) {
182 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
183 return false;
184 }
186 fstat(fileno(fp), &st);
188 src = new char[st.st_size + 1];
190 fread(src, 1, st.st_size, fp);
191 src[st.st_size] = 0;
192 fclose(fp);
195 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
196 fprintf(stderr, "error creating program object: %s\n", fname);
197 delete [] src;
198 return false;
199 }
200 delete [] src;
201 return true;
202 }
204 bool CLProgram::set_argi(int idx, int val)
205 {
206 if((int)args.size() <= idx) {
207 args.resize(idx + 1);
208 }
210 CLArg *arg = &args[idx];
211 arg->type = ARGTYPE_INT;
212 arg->v.ival = val;
213 return true;
214 }
216 bool CLProgram::set_argf(int idx, float val)
217 {
218 if((int)args.size() <= idx) {
219 args.resize(idx + 1);
220 }
222 CLArg *arg = &args[idx];
223 arg->type = ARGTYPE_FLOAT;
224 arg->v.fval = val;
225 return true;
226 }
228 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
229 {
230 CLMemBuffer *buf;
232 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
233 return false;
234 }
236 if((int)args.size() <= idx) {
237 args.resize(idx + 1);
238 }
239 args[idx].type = ARGTYPE_MEM_BUF;
240 args[idx].v.mbuf = buf;
241 return true;
242 }
244 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
245 {
246 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
247 return 0;
248 }
249 return args[arg].v.mbuf;
250 }
252 bool CLProgram::build()
253 {
254 int err;
256 if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) {
257 size_t sz;
258 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
260 char *errlog = (char*)alloca(sz + 1);
261 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
262 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
264 clReleaseProgram(prog);
265 prog = 0;
266 return false;
267 }
270 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
271 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
272 clReleaseProgram(prog);
273 prog = 0;
274 return false;
275 }
277 for(size_t i=0; i<args.size(); i++) {
278 int err;
280 if(args[i].type == ARGTYPE_NONE) {
281 break;
282 }
284 switch(args[i].type) {
285 case ARGTYPE_INT:
286 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
287 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
288 goto fail;
289 }
290 break;
292 case ARGTYPE_FLOAT:
293 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
294 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
295 goto fail;
296 }
297 break;
299 case ARGTYPE_MEM_BUF:
300 {
301 CLMemBuffer *mbuf = args[i].v.mbuf;
303 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
304 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
305 goto fail;
306 }
307 }
308 break;
310 default:
311 break;
312 }
313 }
315 built = true;
316 return true;
318 fail:
319 clReleaseProgram(prog);
320 clReleaseKernel(kernel);
321 prog = 0;
322 kernel = 0;
323 return false;
324 }
326 bool CLProgram::run() const
327 {
328 return run(1, 1);
329 }
331 bool CLProgram::run(int dim, ...) const
332 {
333 if(!built) {
334 if(!((CLProgram*)this)->build()) {
335 return false;
336 }
337 }
339 va_list ap;
340 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
342 va_start(ap, dim);
343 for(int i=0; i<dim; i++) {
344 global_size[i] = va_arg(ap, int);
345 }
346 va_end(ap);
348 int err;
349 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
350 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
351 return false;
352 }
353 return true;
354 }
356 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
357 {
358 unsigned int i, j, num_dev, num_plat, sel, ret;
359 cl_device_id dev[32];
360 cl_platform_id plat[32];
362 dev_inf->work_item_sizes = 0;
364 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
365 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
366 return -1;
367 }
368 if(!num_plat) {
369 fprintf(stderr, "OpenCL not available!\n");
370 return -1;
371 }
373 for(i=0; i<num_plat; i++) {
374 char buf[512];
376 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
377 printf("[%d]: %s", i, buf);
378 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
379 printf(", %s", buf);
380 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
381 printf(" (%s)\n", buf);
382 }
384 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
385 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
386 return -1;
387 }
388 printf("found %d cl devices.\n", num_dev);
390 for(i=0; i<num_dev; i++) {
391 struct device_info di;
393 if(get_dev_info(dev[i], &di) == -1) {
394 free(dev_inf->work_item_sizes);
395 return -1;
396 }
398 printf("--> device %u (%s)\n", i, devtypestr(di.type));
399 printf("max compute units: %u\n", di.units);
400 printf("max clock frequency: %u\n", di.clock);
401 printf("max work item dimensions: %u\n", di.dim);
403 printf("max work item sizes: ");
404 for(j=0; j<di.dim; j++) {
405 printf("%u", (unsigned int)di.work_item_sizes[j]);
406 if(di.dim - j > 1) {
407 printf(", ");
408 }
409 }
410 putchar('\n');
412 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
413 printf("max object allocation size: ");
414 print_memsize(stdout, di.mem_size);
415 putchar('\n');
417 if(devcmp(&di, dev_inf) > 0) {
418 free(dev_inf->work_item_sizes);
419 memcpy(dev_inf, &di, sizeof di);
420 sel = i;
421 }
422 }
424 if(num_dev) {
425 printf("\nusing device: %d\n", sel);
426 return 0;
427 }
429 return -1;
430 }
432 static int get_dev_info(cl_device_id dev, struct device_info *di)
433 {
434 di->id = dev;
437 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
438 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
439 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
440 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
442 di->work_item_sizes = new size_t[di->dim];
444 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
445 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
446 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
448 return 0;
449 }
451 static int devcmp(struct device_info *a, struct device_info *b)
452 {
453 unsigned int aval = a->units * a->clock;
454 unsigned int bval = b->units * b->clock;
456 return aval - bval;
457 }
459 static const char *devtypestr(cl_device_type type)
460 {
461 switch(type) {
462 case CL_DEVICE_TYPE_CPU:
463 return "cpu";
464 case CL_DEVICE_TYPE_GPU:
465 return "gpu";
466 case CL_DEVICE_TYPE_ACCELERATOR:
467 return "accelerator";
468 default:
469 break;
470 }
471 return "unknown";
472 }
474 static void print_memsize(FILE *out, unsigned long bytes)
475 {
476 int i;
477 unsigned long memsz = bytes;
478 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
480 for(i=0; suffix[i]; i++) {
481 if(memsz < 1024) {
482 fprintf(out, "%lu %s", memsz, suffix[i]);
483 if(i > 0) {
484 fprintf(out, " (%lu bytes)", bytes);
485 }
486 return;
487 }
489 memsz /= 1024;
490 }
491 }
493 static const char *clstrerror(int err)
494 {
495 if(err > 0) {
496 return "<invalid error code>";
497 }
498 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
499 return "<unknown error>";
500 }
501 return ocl_errstr[-err];
502 }