clray

view src/ocl.cc @ 1:0b0e4d18d53f

added non-buffer args
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 12 Jul 2010 07:00:19 +0300
parents 5767277e049f
children 41d6253492ad
line source
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <errno.h>
5 #include <alloca.h>
6 #include <sys/stat.h>
7 #include "ocl.h"
10 class InitCL {
11 public:
12 InitCL();
13 };
15 struct device_info {
16 cl_device_id id;
17 cl_device_type type;
18 unsigned int units;
19 unsigned int clock;
21 unsigned int dim;
22 size_t *work_item_sizes;
23 size_t work_group_size;
25 unsigned long mem_size;
26 };
28 static bool init_opencl(void);
29 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
30 static int get_dev_info(cl_device_id dev, struct device_info *di);
31 static int devcmp(struct device_info *a, struct device_info *b);
32 static const char *devtypestr(cl_device_type type);
33 static void print_memsize(FILE *out, unsigned long memsz);
36 static InitCL initcl;
37 static cl_context ctx;
38 static cl_command_queue cmdq;
39 static device_info devinf;
41 InitCL::InitCL()
42 {
43 if(!init_opencl()) {
44 exit(0);
45 }
46 }
48 static bool init_opencl(void)
49 {
50 if(select_device(&devinf, devcmp) == -1) {
51 return false;
52 }
55 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
56 fprintf(stderr, "failed to create opencl context\n");
57 return false;
58 }
60 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
61 fprintf(stderr, "failed to create command queue\n");
62 return false;
63 }
64 return true;
65 }
68 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
69 {
70 int err;
71 cl_mem mem;
74 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
75 fprintf(stderr, "failed to create memory buffer (%d)\n", err);
76 return 0;
77 }
79 CLMemBuffer *mbuf = new CLMemBuffer;
80 mbuf->mem = mem;
81 mbuf->size = sz;
82 return mbuf;
83 }
85 void destroy_mem_buffer(CLMemBuffer *mbuf)
86 {
87 if(mbuf) {
89 clReleaseMemObject(mbuf->mem);
90 delete mbuf;
91 }
92 }
94 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
95 {
96 if(!mbuf) return 0;
98 int err;
99 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
100 if(!mbuf->ptr) {
101 fprintf(stderr, "failed to map buffer (%d)\n", err);
102 return 0;
103 }
104 return mbuf->ptr;
105 }
107 void unmap_mem_buffer(CLMemBuffer *mbuf)
108 {
109 if(!mbuf || !mbuf->ptr) return;
110 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
111 }
113 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
114 {
115 if(!mbuf) return false;
117 int err;
118 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
119 fprintf(stderr, "failed to write buffer (%d)\n", err);
120 return false;
121 }
122 return true;
123 }
125 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
126 {
127 if(!mbuf) return false;
129 int err;
130 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
131 fprintf(stderr, "failed to read buffer (%d)\n", err);
132 return false;
133 }
134 return true;
135 }
138 CLProgram::CLProgram(const char *kname)
139 {
140 prog = 0;
141 kernel = 0;
142 this->kname = kname;
143 args.resize(16);
144 built = false;
145 }
147 CLProgram::~CLProgram()
148 {
149 if(prog) {
151 clReleaseProgram(prog);
152 }
153 if(kernel) {
155 clReleaseKernel(kernel);
156 }
157 for(size_t i=0; i<args.size(); i++) {
158 if(args[i].type == ARGTYPE_MEM_BUF) {
159 destroy_mem_buffer(args[i].v.mbuf);
160 }
161 }
162 }
164 bool CLProgram::load(const char *fname)
165 {
166 FILE *fp;
167 char *src;
168 struct stat st;
170 printf("loading opencl program (%s)\n", fname);
172 if(!(fp = fopen(fname, "rb"))) {
173 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
174 return false;
175 }
177 fstat(fileno(fp), &st);
179 src = new char[st.st_size + 1];
181 fread(src, 1, st.st_size, fp);
182 src[st.st_size] = 0;
183 fclose(fp);
186 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
187 fprintf(stderr, "error creating program object: %s\n", fname);
188 delete [] src;
189 return false;
190 }
191 delete [] src;
192 return true;
193 }
195 bool CLProgram::set_argi(int idx, int val)
196 {
197 if((int)args.size() <= idx) {
198 args.resize(idx + 1);
199 }
201 CLArg *arg = &args[idx];
202 arg->type = ARGTYPE_INT;
203 arg->v.ival = val;
204 return true;
205 }
207 bool CLProgram::set_argf(int idx, float val)
208 {
209 if((int)args.size() <= idx) {
210 args.resize(idx + 1);
211 }
213 CLArg *arg = &args[idx];
214 arg->type = ARGTYPE_FLOAT;
215 arg->v.fval = val;
216 return true;
217 }
219 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
220 {
221 CLMemBuffer *buf;
223 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
224 return false;
225 }
227 if((int)args.size() <= idx) {
228 args.resize(idx + 1);
229 }
230 args[idx].type = ARGTYPE_MEM_BUF;
231 args[idx].v.mbuf = buf;
232 return true;
233 }
235 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
236 {
237 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
238 return 0;
239 }
240 return args[arg].v.mbuf;
241 }
243 bool CLProgram::build()
244 {
245 char errlog[512];
248 if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) {
249 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0);
250 fprintf(stderr, "failed to build program:\n%s\n", errlog);
251 clReleaseProgram(prog);
252 prog = 0;
253 return false;
254 }
257 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
258 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
259 clReleaseProgram(prog);
260 prog = 0;
261 return false;
262 }
264 for(size_t i=0; i<args.size(); i++) {
265 int err;
267 if(args[i].type == ARGTYPE_NONE) {
268 break;
269 }
271 switch(args[i].type) {
272 case ARGTYPE_INT:
273 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
274 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
275 goto fail;
276 }
277 break;
279 case ARGTYPE_FLOAT:
280 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
281 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
282 goto fail;
283 }
284 break;
286 case ARGTYPE_MEM_BUF:
287 {
288 CLMemBuffer *mbuf = args[i].v.mbuf;
290 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
291 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
292 goto fail;
293 }
294 }
295 break;
297 default:
298 break;
299 }
300 }
302 built = true;
303 return true;
305 fail:
306 clReleaseProgram(prog);
307 clReleaseKernel(kernel);
308 prog = 0;
309 kernel = 0;
310 return false;
311 }
313 bool CLProgram::run() const
314 {
315 return run(1, 1);
316 }
318 bool CLProgram::run(int dim, ...) const
319 {
320 if(!built) {
321 if(!((CLProgram*)this)->build()) {
322 return false;
323 }
324 }
326 va_list ap;
327 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
329 va_start(ap, dim);
330 for(int i=0; i<dim; i++) {
331 global_size[i] = va_arg(ap, int);
332 }
333 va_end(ap);
335 int err;
336 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
337 fprintf(stderr, "error executing kernel (%d)\n", err);
338 return false;
339 }
340 return true;
341 }
343 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
344 {
345 unsigned int i, j, num_dev, sel;
346 cl_device_id dev[32];
348 dev_inf->work_item_sizes = 0;
351 clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev);
352 printf("found %d cl devices.\n", num_dev);
354 for(i=0; i<num_dev; i++) {
355 struct device_info di;
357 if(get_dev_info(dev[i], &di) == -1) {
358 free(dev_inf->work_item_sizes);
359 return -1;
360 }
362 printf("--> device %u (%s)\n", i, devtypestr(di.type));
363 printf("max compute units: %u\n", di.units);
364 printf("max clock frequency: %u\n", di.clock);
365 printf("max work item dimensions: %u\n", di.dim);
367 printf("max work item sizes: ");
368 for(j=0; j<di.dim; j++) {
369 printf("%u", (unsigned int)di.work_item_sizes[j]);
370 if(di.dim - j > 1) {
371 printf(", ");
372 }
373 }
374 putchar('\n');
376 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
377 printf("max object allocation size: ");
378 print_memsize(stdout, di.mem_size);
379 putchar('\n');
381 if(devcmp(&di, dev_inf) > 0) {
382 free(dev_inf->work_item_sizes);
383 memcpy(dev_inf, &di, sizeof di);
384 sel = i;
385 }
386 }
388 if(num_dev) {
389 printf("\nusing device: %d\n", sel);
390 return 0;
391 }
393 return -1;
394 }
396 static int get_dev_info(cl_device_id dev, struct device_info *di)
397 {
398 di->id = dev;
401 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
402 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
403 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
404 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
406 di->work_item_sizes = new size_t[di->dim];
408 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
409 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
410 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
412 return 0;
413 }
415 static int devcmp(struct device_info *a, struct device_info *b)
416 {
417 unsigned int aval = a->units * a->clock;
418 unsigned int bval = b->units * b->clock;
420 return aval - bval;
421 }
423 static const char *devtypestr(cl_device_type type)
424 {
425 switch(type) {
426 case CL_DEVICE_TYPE_CPU:
427 return "cpu";
428 case CL_DEVICE_TYPE_GPU:
429 return "gpu";
430 case CL_DEVICE_TYPE_ACCELERATOR:
431 return "accelerator";
432 default:
433 break;
434 }
435 return "unknown";
436 }
438 static void print_memsize(FILE *out, unsigned long bytes)
439 {
440 int i;
441 unsigned long memsz = bytes;
442 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
444 for(i=0; suffix[i]; i++) {
445 if(memsz < 1024) {
446 fprintf(out, "%lu %s", memsz, suffix[i]);
447 if(i > 0) {
448 fprintf(out, " (%lu bytes)", bytes);
449 }
450 return;
451 }
453 memsz /= 1024;
454 }
455 }