clray

view src/ocl.cc @ 4:3c95d568d3c7

wow a ball
author John Tsiombikas <nuclear@member.fsf.org>
date Thu, 15 Jul 2010 07:37:05 +0300
parents 0b0e4d18d53f
children deaf85acf6af
line source
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <errno.h>
5 #include <alloca.h>
6 #include <sys/stat.h>
7 #include "ocl.h"
10 class InitCL {
11 public:
12 InitCL();
13 };
15 struct device_info {
16 cl_device_id id;
17 cl_device_type type;
18 unsigned int units;
19 unsigned int clock;
21 unsigned int dim;
22 size_t *work_item_sizes;
23 size_t work_group_size;
25 unsigned long mem_size;
26 };
28 static bool init_opencl(void);
29 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
30 static int get_dev_info(cl_device_id dev, struct device_info *di);
31 static int devcmp(struct device_info *a, struct device_info *b);
32 static const char *devtypestr(cl_device_type type);
33 static void print_memsize(FILE *out, unsigned long memsz);
36 static InitCL initcl;
37 static cl_context ctx;
38 static cl_command_queue cmdq;
39 static device_info devinf;
41 InitCL::InitCL()
42 {
43 if(!init_opencl()) {
44 exit(0);
45 }
46 }
48 static bool init_opencl(void)
49 {
50 if(select_device(&devinf, devcmp) == -1) {
51 return false;
52 }
55 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
56 fprintf(stderr, "failed to create opencl context\n");
57 return false;
58 }
60 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
61 fprintf(stderr, "failed to create command queue\n");
62 return false;
63 }
64 return true;
65 }
68 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
69 {
70 int err;
71 cl_mem mem;
74 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
75 fprintf(stderr, "failed to create memory buffer (%d)\n", err);
76 return 0;
77 }
79 CLMemBuffer *mbuf = new CLMemBuffer;
80 mbuf->mem = mem;
81 mbuf->size = sz;
82 return mbuf;
83 }
85 void destroy_mem_buffer(CLMemBuffer *mbuf)
86 {
87 if(mbuf) {
89 clReleaseMemObject(mbuf->mem);
90 delete mbuf;
91 }
92 }
94 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
95 {
96 if(!mbuf) return 0;
98 int err;
99 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
100 if(!mbuf->ptr) {
101 fprintf(stderr, "failed to map buffer (%d)\n", err);
102 return 0;
103 }
104 return mbuf->ptr;
105 }
107 void unmap_mem_buffer(CLMemBuffer *mbuf)
108 {
109 if(!mbuf || !mbuf->ptr) return;
110 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
111 }
113 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
114 {
115 if(!mbuf) return false;
117 int err;
118 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
119 fprintf(stderr, "failed to write buffer (%d)\n", err);
120 return false;
121 }
122 return true;
123 }
125 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
126 {
127 if(!mbuf) return false;
129 int err;
130 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
131 fprintf(stderr, "failed to read buffer (%d)\n", err);
132 return false;
133 }
134 return true;
135 }
138 CLProgram::CLProgram(const char *kname)
139 {
140 prog = 0;
141 kernel = 0;
142 this->kname = kname;
143 args.resize(16);
144 built = false;
145 }
147 CLProgram::~CLProgram()
148 {
149 if(prog) {
151 clReleaseProgram(prog);
152 }
153 if(kernel) {
155 clReleaseKernel(kernel);
156 }
157 for(size_t i=0; i<args.size(); i++) {
158 if(args[i].type == ARGTYPE_MEM_BUF) {
159 destroy_mem_buffer(args[i].v.mbuf);
160 }
161 }
162 }
164 bool CLProgram::load(const char *fname)
165 {
166 FILE *fp;
167 char *src;
168 struct stat st;
170 printf("loading opencl program (%s)\n", fname);
172 if(!(fp = fopen(fname, "rb"))) {
173 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
174 return false;
175 }
177 fstat(fileno(fp), &st);
179 src = new char[st.st_size + 1];
181 fread(src, 1, st.st_size, fp);
182 src[st.st_size] = 0;
183 fclose(fp);
186 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
187 fprintf(stderr, "error creating program object: %s\n", fname);
188 delete [] src;
189 return false;
190 }
191 delete [] src;
192 return true;
193 }
195 bool CLProgram::set_argi(int idx, int val)
196 {
197 if((int)args.size() <= idx) {
198 args.resize(idx + 1);
199 }
201 CLArg *arg = &args[idx];
202 arg->type = ARGTYPE_INT;
203 arg->v.ival = val;
204 return true;
205 }
207 bool CLProgram::set_argf(int idx, float val)
208 {
209 if((int)args.size() <= idx) {
210 args.resize(idx + 1);
211 }
213 CLArg *arg = &args[idx];
214 arg->type = ARGTYPE_FLOAT;
215 arg->v.fval = val;
216 return true;
217 }
219 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
220 {
221 CLMemBuffer *buf;
223 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
224 return false;
225 }
227 if((int)args.size() <= idx) {
228 args.resize(idx + 1);
229 }
230 args[idx].type = ARGTYPE_MEM_BUF;
231 args[idx].v.mbuf = buf;
232 return true;
233 }
235 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
236 {
237 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
238 return 0;
239 }
240 return args[arg].v.mbuf;
241 }
243 bool CLProgram::build()
244 {
245 int err;
247 if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) {
248 size_t sz;
249 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
251 char *errlog = (char*)alloca(sz + 1);
252 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
253 fprintf(stderr, "failed to build program: (%d)\n%s\n", err, errlog);
255 clReleaseProgram(prog);
256 prog = 0;
257 return false;
258 }
261 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
262 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
263 clReleaseProgram(prog);
264 prog = 0;
265 return false;
266 }
268 for(size_t i=0; i<args.size(); i++) {
269 int err;
271 if(args[i].type == ARGTYPE_NONE) {
272 break;
273 }
275 switch(args[i].type) {
276 case ARGTYPE_INT:
277 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
278 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
279 goto fail;
280 }
281 break;
283 case ARGTYPE_FLOAT:
284 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
285 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
286 goto fail;
287 }
288 break;
290 case ARGTYPE_MEM_BUF:
291 {
292 CLMemBuffer *mbuf = args[i].v.mbuf;
294 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
295 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
296 goto fail;
297 }
298 }
299 break;
301 default:
302 break;
303 }
304 }
306 built = true;
307 return true;
309 fail:
310 clReleaseProgram(prog);
311 clReleaseKernel(kernel);
312 prog = 0;
313 kernel = 0;
314 return false;
315 }
317 bool CLProgram::run() const
318 {
319 return run(1, 1);
320 }
322 bool CLProgram::run(int dim, ...) const
323 {
324 if(!built) {
325 if(!((CLProgram*)this)->build()) {
326 return false;
327 }
328 }
330 va_list ap;
331 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
333 va_start(ap, dim);
334 for(int i=0; i<dim; i++) {
335 global_size[i] = va_arg(ap, int);
336 }
337 va_end(ap);
339 int err;
340 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
341 fprintf(stderr, "error executing kernel (%d)\n", err);
342 return false;
343 }
344 return true;
345 }
347 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
348 {
349 unsigned int i, j, num_dev, sel;
350 cl_device_id dev[32];
352 dev_inf->work_item_sizes = 0;
355 clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev);
356 printf("found %d cl devices.\n", num_dev);
358 for(i=0; i<num_dev; i++) {
359 struct device_info di;
361 if(get_dev_info(dev[i], &di) == -1) {
362 free(dev_inf->work_item_sizes);
363 return -1;
364 }
366 printf("--> device %u (%s)\n", i, devtypestr(di.type));
367 printf("max compute units: %u\n", di.units);
368 printf("max clock frequency: %u\n", di.clock);
369 printf("max work item dimensions: %u\n", di.dim);
371 printf("max work item sizes: ");
372 for(j=0; j<di.dim; j++) {
373 printf("%u", (unsigned int)di.work_item_sizes[j]);
374 if(di.dim - j > 1) {
375 printf(", ");
376 }
377 }
378 putchar('\n');
380 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
381 printf("max object allocation size: ");
382 print_memsize(stdout, di.mem_size);
383 putchar('\n');
385 if(devcmp(&di, dev_inf) > 0) {
386 free(dev_inf->work_item_sizes);
387 memcpy(dev_inf, &di, sizeof di);
388 sel = i;
389 }
390 }
392 if(num_dev) {
393 printf("\nusing device: %d\n", sel);
394 return 0;
395 }
397 return -1;
398 }
400 static int get_dev_info(cl_device_id dev, struct device_info *di)
401 {
402 di->id = dev;
405 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
406 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
407 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
408 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
410 di->work_item_sizes = new size_t[di->dim];
412 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
413 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
414 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
416 return 0;
417 }
419 static int devcmp(struct device_info *a, struct device_info *b)
420 {
421 unsigned int aval = a->units * a->clock;
422 unsigned int bval = b->units * b->clock;
424 return aval - bval;
425 }
427 static const char *devtypestr(cl_device_type type)
428 {
429 switch(type) {
430 case CL_DEVICE_TYPE_CPU:
431 return "cpu";
432 case CL_DEVICE_TYPE_GPU:
433 return "gpu";
434 case CL_DEVICE_TYPE_ACCELERATOR:
435 return "accelerator";
436 default:
437 break;
438 }
439 return "unknown";
440 }
442 static void print_memsize(FILE *out, unsigned long bytes)
443 {
444 int i;
445 unsigned long memsz = bytes;
446 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
448 for(i=0; suffix[i]; i++) {
449 if(memsz < 1024) {
450 fprintf(out, "%lu %s", memsz, suffix[i]);
451 if(i > 0) {
452 fprintf(out, " (%lu bytes)", bytes);
453 }
454 return;
455 }
457 memsz /= 1024;
458 }
459 }