rev |
line source |
nuclear@8
|
1 #define OCL_CC_
|
nuclear@8
|
2
|
nuclear@0
|
3 #include <stdio.h>
|
nuclear@0
|
4 #include <stdlib.h>
|
nuclear@0
|
5 #include <string.h>
|
nuclear@8
|
6 #include <stdarg.h>
|
nuclear@0
|
7 #include <errno.h>
|
nuclear@39
|
8 #include <assert.h>
|
John@11
|
9 #ifndef _MSC_VER
|
nuclear@0
|
10 #include <alloca.h>
|
John@11
|
11 #else
|
John@11
|
12 #include <malloc.h>
|
John@11
|
13 #endif
|
nuclear@0
|
14 #include <sys/stat.h>
|
nuclear@0
|
15 #include "ocl.h"
|
nuclear@39
|
16 #include "ogl.h"
|
nuclear@8
|
17 #include "ocl_errstr.h"
|
nuclear@0
|
18
|
nuclear@39
|
19 #if defined(unix) || defined(__unix__)
|
nuclear@39
|
20 #include <X11/Xlib.h>
|
nuclear@39
|
21 #include <GL/glx.h>
|
nuclear@39
|
22 #endif
|
nuclear@0
|
23
|
nuclear@0
|
24
|
nuclear@0
|
25 struct device_info {
|
nuclear@0
|
26 cl_device_id id;
|
nuclear@0
|
27 cl_device_type type;
|
nuclear@0
|
28 unsigned int units;
|
nuclear@0
|
29 unsigned int clock;
|
nuclear@0
|
30
|
nuclear@0
|
31 unsigned int dim;
|
nuclear@0
|
32 size_t *work_item_sizes;
|
nuclear@0
|
33 size_t work_group_size;
|
nuclear@0
|
34
|
nuclear@0
|
35 unsigned long mem_size;
|
nuclear@0
|
36 };
|
nuclear@0
|
37
|
nuclear@0
|
38 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
|
nuclear@0
|
39 static int get_dev_info(cl_device_id dev, struct device_info *di);
|
nuclear@0
|
40 static int devcmp(struct device_info *a, struct device_info *b);
|
nuclear@0
|
41 static const char *devtypestr(cl_device_type type);
|
nuclear@0
|
42 static void print_memsize(FILE *out, unsigned long memsz);
|
nuclear@8
|
43 static const char *clstrerror(int err);
|
nuclear@0
|
44
|
nuclear@0
|
45
|
nuclear@0
|
46 static cl_context ctx;
|
nuclear@0
|
47 static cl_command_queue cmdq;
|
nuclear@0
|
48 static device_info devinf;
|
nuclear@0
|
49
|
nuclear@39
|
50 bool init_opencl()
|
nuclear@0
|
51 {
|
nuclear@0
|
52 if(select_device(&devinf, devcmp) == -1) {
|
nuclear@0
|
53 return false;
|
nuclear@0
|
54 }
|
nuclear@0
|
55
|
nuclear@39
|
56 #if defined(__APPLE__)
|
nuclear@39
|
57 #error "CL/GL context sharing not implemented on MacOSX yet"
|
nuclear@39
|
58 #elif defined(unix) || defined(__unix__)
|
nuclear@39
|
59 Display *dpy = glXGetCurrentDisplay();
|
nuclear@39
|
60 GLXContext glctx = glXGetCurrentContext();
|
nuclear@0
|
61
|
nuclear@39
|
62 assert(dpy && glctx);
|
nuclear@39
|
63
|
nuclear@39
|
64 cl_context_properties prop[] = {
|
nuclear@39
|
65 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
|
nuclear@39
|
66 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
|
nuclear@39
|
67 0
|
nuclear@39
|
68 };
|
nuclear@39
|
69 #elif defined(WIN32) || defined(__WIN32__)
|
nuclear@39
|
70 #error "CL/GL context sharing not implemented on windows yet"
|
nuclear@39
|
71 #else
|
nuclear@39
|
72 #error "unknown or unsupported platform"
|
nuclear@39
|
73 #endif
|
nuclear@39
|
74
|
nuclear@39
|
75 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
|
nuclear@0
|
76 fprintf(stderr, "failed to create opencl context\n");
|
nuclear@0
|
77 return false;
|
nuclear@0
|
78 }
|
nuclear@0
|
79
|
nuclear@0
|
80 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
|
nuclear@0
|
81 fprintf(stderr, "failed to create command queue\n");
|
nuclear@0
|
82 return false;
|
nuclear@0
|
83 }
|
nuclear@0
|
84 return true;
|
nuclear@0
|
85 }
|
nuclear@0
|
86
|
nuclear@0
|
87
|
nuclear@28
|
88 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
|
nuclear@0
|
89 {
|
nuclear@0
|
90 int err;
|
nuclear@0
|
91 cl_mem mem;
|
nuclear@12
|
92 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
|
nuclear@0
|
93
|
nuclear@12
|
94 if(buf) {
|
nuclear@12
|
95 flags |= CL_MEM_COPY_HOST_PTR;
|
nuclear@12
|
96 }
|
nuclear@0
|
97
|
nuclear@12
|
98
|
nuclear@28
|
99 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
|
nuclear@8
|
100 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
|
nuclear@0
|
101 return 0;
|
nuclear@0
|
102 }
|
nuclear@0
|
103
|
nuclear@0
|
104 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@0
|
105 mbuf->mem = mem;
|
nuclear@0
|
106 mbuf->size = sz;
|
nuclear@12
|
107 mbuf->ptr = 0;
|
nuclear@39
|
108 mbuf->tex = 0;
|
nuclear@39
|
109 return mbuf;
|
nuclear@39
|
110 }
|
nuclear@39
|
111
|
nuclear@39
|
112 CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex)
|
nuclear@39
|
113 {
|
nuclear@39
|
114 int err;
|
nuclear@39
|
115 cl_mem mem;
|
nuclear@39
|
116
|
nuclear@39
|
117 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
|
nuclear@39
|
118 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
|
nuclear@39
|
119 return 0;
|
nuclear@39
|
120 }
|
nuclear@39
|
121
|
nuclear@39
|
122 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@39
|
123 mbuf->mem = mem;
|
nuclear@39
|
124 mbuf->size = 0;
|
nuclear@39
|
125 mbuf->ptr = 0;
|
nuclear@39
|
126 mbuf->tex = tex;
|
nuclear@0
|
127 return mbuf;
|
nuclear@0
|
128 }
|
nuclear@0
|
129
|
nuclear@0
|
130 void destroy_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
131 {
|
nuclear@0
|
132 if(mbuf) {
|
nuclear@0
|
133 clReleaseMemObject(mbuf->mem);
|
nuclear@0
|
134 delete mbuf;
|
nuclear@0
|
135 }
|
nuclear@0
|
136 }
|
nuclear@0
|
137
|
nuclear@39
|
138 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
|
nuclear@0
|
139 {
|
nuclear@0
|
140 if(!mbuf) return 0;
|
nuclear@0
|
141
|
nuclear@12
|
142 #ifndef NDEBUG
|
nuclear@12
|
143 if(mbuf->ptr) {
|
nuclear@12
|
144 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
|
nuclear@12
|
145 }
|
nuclear@12
|
146 #endif
|
nuclear@12
|
147
|
nuclear@0
|
148 int err;
|
nuclear@39
|
149 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
|
nuclear@0
|
150 if(!mbuf->ptr) {
|
nuclear@8
|
151 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
|
nuclear@0
|
152 return 0;
|
nuclear@0
|
153 }
|
nuclear@0
|
154 return mbuf->ptr;
|
nuclear@0
|
155 }
|
nuclear@0
|
156
|
nuclear@39
|
157 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@0
|
158 {
|
nuclear@0
|
159 if(!mbuf || !mbuf->ptr) return;
|
nuclear@39
|
160 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
|
nuclear@12
|
161 mbuf->ptr = 0;
|
nuclear@0
|
162 }
|
nuclear@0
|
163
|
nuclear@39
|
164 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
|
nuclear@0
|
165 {
|
nuclear@0
|
166 if(!mbuf) return false;
|
nuclear@0
|
167
|
nuclear@0
|
168 int err;
|
nuclear@39
|
169 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
|
nuclear@8
|
170 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
|
nuclear@0
|
171 return false;
|
nuclear@0
|
172 }
|
nuclear@0
|
173 return true;
|
nuclear@0
|
174 }
|
nuclear@0
|
175
|
nuclear@39
|
176 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
|
nuclear@0
|
177 {
|
nuclear@0
|
178 if(!mbuf) return false;
|
nuclear@0
|
179
|
nuclear@0
|
180 int err;
|
nuclear@39
|
181 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
|
nuclear@8
|
182 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
|
nuclear@0
|
183 return false;
|
nuclear@0
|
184 }
|
nuclear@0
|
185 return true;
|
nuclear@0
|
186 }
|
nuclear@0
|
187
|
nuclear@0
|
188
|
nuclear@39
|
189 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@39
|
190 {
|
nuclear@39
|
191 if(!mbuf || !mbuf->tex) {
|
nuclear@39
|
192 return false;
|
nuclear@39
|
193 }
|
nuclear@39
|
194
|
nuclear@39
|
195 int err;
|
nuclear@39
|
196 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
|
nuclear@39
|
197 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
|
nuclear@39
|
198 return false;
|
nuclear@39
|
199 }
|
nuclear@39
|
200 return true;
|
nuclear@39
|
201 }
|
nuclear@39
|
202
|
nuclear@39
|
203 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@39
|
204 {
|
nuclear@39
|
205 if(!mbuf || !mbuf->tex) {
|
nuclear@39
|
206 return false;
|
nuclear@39
|
207 }
|
nuclear@39
|
208
|
nuclear@39
|
209 int err;
|
nuclear@39
|
210 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
|
nuclear@39
|
211 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
|
nuclear@39
|
212 return false;
|
nuclear@39
|
213 }
|
nuclear@39
|
214 return true;
|
nuclear@39
|
215 }
|
nuclear@39
|
216
|
nuclear@39
|
217
|
John@14
|
218 CLArg::CLArg()
|
John@14
|
219 {
|
John@14
|
220 memset(this, 0, sizeof *this);
|
John@14
|
221 }
|
John@14
|
222
|
John@14
|
223
|
nuclear@0
|
224 CLProgram::CLProgram(const char *kname)
|
nuclear@0
|
225 {
|
nuclear@0
|
226 prog = 0;
|
nuclear@0
|
227 kernel = 0;
|
nuclear@0
|
228 this->kname = kname;
|
nuclear@1
|
229 args.resize(16);
|
nuclear@0
|
230 built = false;
|
nuclear@39
|
231
|
nuclear@39
|
232 wait_event = last_event = 0;
|
nuclear@0
|
233 }
|
nuclear@0
|
234
|
nuclear@0
|
235 CLProgram::~CLProgram()
|
nuclear@0
|
236 {
|
nuclear@39
|
237 if(wait_event) {
|
nuclear@39
|
238 clReleaseEvent(wait_event);
|
nuclear@39
|
239 }
|
nuclear@39
|
240 if(last_event) {
|
nuclear@39
|
241 clReleaseEvent(last_event);
|
nuclear@39
|
242 }
|
nuclear@39
|
243
|
nuclear@0
|
244 if(prog) {
|
nuclear@0
|
245
|
nuclear@0
|
246 clReleaseProgram(prog);
|
nuclear@0
|
247 }
|
nuclear@0
|
248 if(kernel) {
|
nuclear@0
|
249
|
nuclear@0
|
250 clReleaseKernel(kernel);
|
nuclear@0
|
251 }
|
nuclear@1
|
252 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
253 if(args[i].type == ARGTYPE_MEM_BUF) {
|
nuclear@1
|
254 destroy_mem_buffer(args[i].v.mbuf);
|
nuclear@0
|
255 }
|
nuclear@0
|
256 }
|
nuclear@0
|
257 }
|
nuclear@0
|
258
|
nuclear@0
|
259 bool CLProgram::load(const char *fname)
|
nuclear@0
|
260 {
|
nuclear@0
|
261 FILE *fp;
|
nuclear@0
|
262 char *src;
|
nuclear@0
|
263 struct stat st;
|
nuclear@0
|
264
|
nuclear@0
|
265 printf("loading opencl program (%s)\n", fname);
|
nuclear@0
|
266
|
nuclear@0
|
267 if(!(fp = fopen(fname, "rb"))) {
|
nuclear@0
|
268 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
|
nuclear@0
|
269 return false;
|
nuclear@0
|
270 }
|
nuclear@0
|
271
|
nuclear@0
|
272 fstat(fileno(fp), &st);
|
nuclear@0
|
273
|
nuclear@0
|
274 src = new char[st.st_size + 1];
|
nuclear@0
|
275
|
nuclear@0
|
276 fread(src, 1, st.st_size, fp);
|
nuclear@0
|
277 src[st.st_size] = 0;
|
nuclear@0
|
278 fclose(fp);
|
nuclear@0
|
279
|
nuclear@0
|
280
|
nuclear@0
|
281 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
|
nuclear@0
|
282 fprintf(stderr, "error creating program object: %s\n", fname);
|
nuclear@0
|
283 delete [] src;
|
nuclear@0
|
284 return false;
|
nuclear@0
|
285 }
|
nuclear@0
|
286 delete [] src;
|
nuclear@0
|
287 return true;
|
nuclear@0
|
288 }
|
nuclear@0
|
289
|
nuclear@1
|
290 bool CLProgram::set_argi(int idx, int val)
|
nuclear@1
|
291 {
|
nuclear@1
|
292 if((int)args.size() <= idx) {
|
nuclear@1
|
293 args.resize(idx + 1);
|
nuclear@1
|
294 }
|
nuclear@1
|
295
|
nuclear@1
|
296 CLArg *arg = &args[idx];
|
nuclear@1
|
297 arg->type = ARGTYPE_INT;
|
nuclear@1
|
298 arg->v.ival = val;
|
nuclear@1
|
299 return true;
|
nuclear@1
|
300 }
|
nuclear@1
|
301
|
nuclear@1
|
302 bool CLProgram::set_argf(int idx, float val)
|
nuclear@1
|
303 {
|
nuclear@1
|
304 if((int)args.size() <= idx) {
|
nuclear@1
|
305 args.resize(idx + 1);
|
nuclear@1
|
306 }
|
nuclear@1
|
307
|
nuclear@1
|
308 CLArg *arg = &args[idx];
|
nuclear@1
|
309 arg->type = ARGTYPE_FLOAT;
|
nuclear@1
|
310 arg->v.fval = val;
|
nuclear@1
|
311 return true;
|
nuclear@1
|
312 }
|
nuclear@1
|
313
|
nuclear@28
|
314 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
|
nuclear@0
|
315 {
|
nuclear@13
|
316 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
|
nuclear@0
|
317 CLMemBuffer *buf;
|
nuclear@0
|
318
|
nuclear@39
|
319 if(sz <= 0) {
|
nuclear@39
|
320 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
|
nuclear@39
|
321 return false;
|
nuclear@39
|
322 }
|
nuclear@39
|
323 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
|
nuclear@39
|
324 return false;
|
nuclear@39
|
325 }
|
nuclear@39
|
326
|
nuclear@39
|
327 if((int)args.size() <= idx) {
|
nuclear@39
|
328 args.resize(idx + 1);
|
nuclear@39
|
329 }
|
nuclear@39
|
330 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@39
|
331 args[idx].v.mbuf = buf;
|
nuclear@39
|
332 return true;
|
nuclear@39
|
333 }
|
nuclear@39
|
334
|
nuclear@39
|
335 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
|
nuclear@39
|
336 {
|
nuclear@39
|
337 printf("create argument %d from texture %u\n", idx, tex);
|
nuclear@39
|
338 CLMemBuffer *buf;
|
nuclear@39
|
339
|
nuclear@39
|
340 if(!(buf = create_mem_buffer(rdwr, tex))) {
|
nuclear@0
|
341 return false;
|
nuclear@0
|
342 }
|
nuclear@0
|
343
|
nuclear@1
|
344 if((int)args.size() <= idx) {
|
nuclear@1
|
345 args.resize(idx + 1);
|
nuclear@0
|
346 }
|
nuclear@1
|
347 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@1
|
348 args[idx].v.mbuf = buf;
|
nuclear@0
|
349 return true;
|
nuclear@0
|
350 }
|
nuclear@0
|
351
|
nuclear@0
|
352 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
|
nuclear@0
|
353 {
|
nuclear@1
|
354 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
|
nuclear@0
|
355 return 0;
|
nuclear@0
|
356 }
|
nuclear@1
|
357 return args[arg].v.mbuf;
|
nuclear@0
|
358 }
|
nuclear@0
|
359
|
John@14
|
360 int CLProgram::get_num_args() const
|
John@14
|
361 {
|
John@14
|
362 int num_args = 0;
|
John@14
|
363 for(size_t i=0; i<args.size(); i++) {
|
John@14
|
364 if(args[i].type != ARGTYPE_NONE) {
|
John@14
|
365 num_args++;
|
John@14
|
366 }
|
John@14
|
367 }
|
John@14
|
368 return num_args;
|
John@14
|
369 }
|
John@14
|
370
|
nuclear@0
|
371 bool CLProgram::build()
|
nuclear@0
|
372 {
|
nuclear@2
|
373 int err;
|
nuclear@0
|
374
|
nuclear@39
|
375 if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
|
nuclear@2
|
376 size_t sz;
|
nuclear@2
|
377 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
|
nuclear@0
|
378
|
nuclear@2
|
379 char *errlog = (char*)alloca(sz + 1);
|
nuclear@2
|
380 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
|
nuclear@8
|
381 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
|
nuclear@2
|
382
|
nuclear@0
|
383 clReleaseProgram(prog);
|
nuclear@0
|
384 prog = 0;
|
nuclear@0
|
385 return false;
|
nuclear@0
|
386 }
|
nuclear@0
|
387
|
nuclear@0
|
388
|
nuclear@0
|
389 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
|
nuclear@0
|
390 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
|
nuclear@0
|
391 clReleaseProgram(prog);
|
nuclear@0
|
392 prog = 0;
|
nuclear@0
|
393 return false;
|
nuclear@0
|
394 }
|
nuclear@0
|
395
|
nuclear@1
|
396 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
397 int err;
|
nuclear@0
|
398
|
nuclear@1
|
399 if(args[i].type == ARGTYPE_NONE) {
|
nuclear@1
|
400 break;
|
nuclear@1
|
401 }
|
nuclear@1
|
402
|
nuclear@1
|
403 switch(args[i].type) {
|
nuclear@1
|
404 case ARGTYPE_INT:
|
nuclear@1
|
405 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
|
nuclear@8
|
406 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
407 goto fail;
|
nuclear@1
|
408 }
|
nuclear@1
|
409 break;
|
nuclear@1
|
410
|
nuclear@1
|
411 case ARGTYPE_FLOAT:
|
nuclear@1
|
412 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
|
nuclear@8
|
413 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
414 goto fail;
|
nuclear@1
|
415 }
|
nuclear@1
|
416 break;
|
nuclear@1
|
417
|
nuclear@1
|
418 case ARGTYPE_MEM_BUF:
|
nuclear@1
|
419 {
|
nuclear@1
|
420 CLMemBuffer *mbuf = args[i].v.mbuf;
|
nuclear@1
|
421
|
nuclear@1
|
422 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
|
nuclear@8
|
423 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
424 goto fail;
|
nuclear@1
|
425 }
|
nuclear@1
|
426 }
|
nuclear@1
|
427 break;
|
nuclear@1
|
428
|
nuclear@1
|
429 default:
|
nuclear@1
|
430 break;
|
nuclear@0
|
431 }
|
nuclear@0
|
432 }
|
nuclear@0
|
433
|
nuclear@0
|
434 built = true;
|
nuclear@0
|
435 return true;
|
nuclear@1
|
436
|
nuclear@1
|
437 fail:
|
nuclear@1
|
438 clReleaseProgram(prog);
|
nuclear@1
|
439 clReleaseKernel(kernel);
|
nuclear@1
|
440 prog = 0;
|
nuclear@1
|
441 kernel = 0;
|
nuclear@1
|
442 return false;
|
nuclear@0
|
443 }
|
nuclear@0
|
444
|
nuclear@0
|
445 bool CLProgram::run() const
|
nuclear@0
|
446 {
|
nuclear@0
|
447 return run(1, 1);
|
nuclear@0
|
448 }
|
nuclear@0
|
449
|
nuclear@0
|
450 bool CLProgram::run(int dim, ...) const
|
nuclear@0
|
451 {
|
nuclear@0
|
452 if(!built) {
|
nuclear@0
|
453 if(!((CLProgram*)this)->build()) {
|
nuclear@0
|
454 return false;
|
nuclear@0
|
455 }
|
nuclear@0
|
456 }
|
nuclear@0
|
457
|
nuclear@0
|
458 va_list ap;
|
nuclear@0
|
459 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
|
nuclear@0
|
460
|
nuclear@0
|
461 va_start(ap, dim);
|
nuclear@0
|
462 for(int i=0; i<dim; i++) {
|
nuclear@0
|
463 global_size[i] = va_arg(ap, int);
|
nuclear@0
|
464 }
|
nuclear@0
|
465 va_end(ap);
|
nuclear@0
|
466
|
nuclear@39
|
467 if(last_event) {
|
nuclear@39
|
468 clReleaseEvent(last_event);
|
nuclear@39
|
469 }
|
nuclear@39
|
470
|
nuclear@0
|
471 int err;
|
nuclear@39
|
472 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
|
nuclear@39
|
473 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
|
nuclear@8
|
474 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
|
nuclear@0
|
475 return false;
|
nuclear@0
|
476 }
|
nuclear@32
|
477
|
nuclear@39
|
478 if(wait_event) {
|
nuclear@39
|
479 clReleaseEvent(wait_event);
|
nuclear@39
|
480 wait_event = 0;
|
nuclear@39
|
481 }
|
nuclear@0
|
482 return true;
|
nuclear@0
|
483 }
|
nuclear@0
|
484
|
nuclear@39
|
485 void CLProgram::set_wait_event(cl_event ev)
|
nuclear@39
|
486 {
|
nuclear@39
|
487 if(wait_event) {
|
nuclear@39
|
488 clReleaseEvent(wait_event);
|
nuclear@39
|
489 }
|
nuclear@39
|
490 wait_event = ev;
|
nuclear@39
|
491 }
|
nuclear@39
|
492
|
nuclear@39
|
493 cl_event CLProgram::get_last_event() const
|
nuclear@39
|
494 {
|
nuclear@39
|
495 return last_event;
|
nuclear@39
|
496 }
|
nuclear@39
|
497
|
nuclear@0
|
498 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
|
nuclear@0
|
499 {
|
nuclear@8
|
500 unsigned int i, j, num_dev, num_plat, sel, ret;
|
nuclear@0
|
501 cl_device_id dev[32];
|
nuclear@8
|
502 cl_platform_id plat[32];
|
nuclear@0
|
503
|
nuclear@0
|
504 dev_inf->work_item_sizes = 0;
|
nuclear@0
|
505
|
nuclear@8
|
506 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
|
nuclear@8
|
507 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
508 return -1;
|
nuclear@8
|
509 }
|
nuclear@8
|
510 if(!num_plat) {
|
nuclear@8
|
511 fprintf(stderr, "OpenCL not available!\n");
|
nuclear@8
|
512 return -1;
|
nuclear@8
|
513 }
|
nuclear@0
|
514
|
nuclear@8
|
515 for(i=0; i<num_plat; i++) {
|
nuclear@8
|
516 char buf[512];
|
nuclear@8
|
517
|
nuclear@8
|
518 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
|
nuclear@8
|
519 printf("[%d]: %s", i, buf);
|
nuclear@8
|
520 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
|
nuclear@8
|
521 printf(", %s", buf);
|
nuclear@8
|
522 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
|
nuclear@8
|
523 printf(" (%s)\n", buf);
|
nuclear@8
|
524 }
|
nuclear@8
|
525
|
nuclear@8
|
526 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
|
nuclear@8
|
527 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
528 return -1;
|
nuclear@8
|
529 }
|
nuclear@0
|
530 printf("found %d cl devices.\n", num_dev);
|
nuclear@0
|
531
|
nuclear@0
|
532 for(i=0; i<num_dev; i++) {
|
nuclear@0
|
533 struct device_info di;
|
nuclear@0
|
534
|
nuclear@0
|
535 if(get_dev_info(dev[i], &di) == -1) {
|
nuclear@0
|
536 free(dev_inf->work_item_sizes);
|
nuclear@0
|
537 return -1;
|
nuclear@0
|
538 }
|
nuclear@0
|
539
|
nuclear@0
|
540 printf("--> device %u (%s)\n", i, devtypestr(di.type));
|
nuclear@0
|
541 printf("max compute units: %u\n", di.units);
|
nuclear@0
|
542 printf("max clock frequency: %u\n", di.clock);
|
nuclear@0
|
543 printf("max work item dimensions: %u\n", di.dim);
|
nuclear@0
|
544
|
nuclear@0
|
545 printf("max work item sizes: ");
|
nuclear@0
|
546 for(j=0; j<di.dim; j++) {
|
nuclear@0
|
547 printf("%u", (unsigned int)di.work_item_sizes[j]);
|
nuclear@0
|
548 if(di.dim - j > 1) {
|
nuclear@0
|
549 printf(", ");
|
nuclear@0
|
550 }
|
nuclear@0
|
551 }
|
nuclear@0
|
552 putchar('\n');
|
nuclear@0
|
553
|
nuclear@0
|
554 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
|
nuclear@0
|
555 printf("max object allocation size: ");
|
nuclear@0
|
556 print_memsize(stdout, di.mem_size);
|
nuclear@0
|
557 putchar('\n');
|
nuclear@0
|
558
|
nuclear@0
|
559 if(devcmp(&di, dev_inf) > 0) {
|
nuclear@0
|
560 free(dev_inf->work_item_sizes);
|
nuclear@0
|
561 memcpy(dev_inf, &di, sizeof di);
|
nuclear@0
|
562 sel = i;
|
nuclear@0
|
563 }
|
nuclear@0
|
564 }
|
nuclear@0
|
565
|
nuclear@0
|
566 if(num_dev) {
|
nuclear@0
|
567 printf("\nusing device: %d\n", sel);
|
nuclear@0
|
568 return 0;
|
nuclear@0
|
569 }
|
nuclear@0
|
570
|
nuclear@0
|
571 return -1;
|
nuclear@0
|
572 }
|
nuclear@0
|
573
|
nuclear@0
|
574 static int get_dev_info(cl_device_id dev, struct device_info *di)
|
nuclear@0
|
575 {
|
nuclear@0
|
576 di->id = dev;
|
nuclear@0
|
577
|
nuclear@0
|
578
|
nuclear@0
|
579 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
|
nuclear@0
|
580 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
|
nuclear@0
|
581 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
|
nuclear@0
|
582 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
|
nuclear@0
|
583
|
nuclear@0
|
584 di->work_item_sizes = new size_t[di->dim];
|
nuclear@0
|
585
|
nuclear@0
|
586 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
|
nuclear@0
|
587 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
|
nuclear@0
|
588 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
|
nuclear@0
|
589
|
nuclear@0
|
590 return 0;
|
nuclear@0
|
591 }
|
nuclear@0
|
592
|
nuclear@0
|
593 static int devcmp(struct device_info *a, struct device_info *b)
|
nuclear@0
|
594 {
|
nuclear@0
|
595 unsigned int aval = a->units * a->clock;
|
nuclear@0
|
596 unsigned int bval = b->units * b->clock;
|
nuclear@0
|
597
|
nuclear@0
|
598 return aval - bval;
|
nuclear@0
|
599 }
|
nuclear@0
|
600
|
nuclear@0
|
601 static const char *devtypestr(cl_device_type type)
|
nuclear@0
|
602 {
|
nuclear@0
|
603 switch(type) {
|
nuclear@0
|
604 case CL_DEVICE_TYPE_CPU:
|
nuclear@0
|
605 return "cpu";
|
nuclear@0
|
606 case CL_DEVICE_TYPE_GPU:
|
nuclear@0
|
607 return "gpu";
|
nuclear@0
|
608 case CL_DEVICE_TYPE_ACCELERATOR:
|
nuclear@0
|
609 return "accelerator";
|
nuclear@0
|
610 default:
|
nuclear@0
|
611 break;
|
nuclear@0
|
612 }
|
nuclear@0
|
613 return "unknown";
|
nuclear@0
|
614 }
|
nuclear@0
|
615
|
nuclear@0
|
616 static void print_memsize(FILE *out, unsigned long bytes)
|
nuclear@0
|
617 {
|
nuclear@0
|
618 int i;
|
nuclear@0
|
619 unsigned long memsz = bytes;
|
nuclear@0
|
620 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
|
nuclear@0
|
621
|
nuclear@0
|
622 for(i=0; suffix[i]; i++) {
|
nuclear@0
|
623 if(memsz < 1024) {
|
nuclear@0
|
624 fprintf(out, "%lu %s", memsz, suffix[i]);
|
nuclear@0
|
625 if(i > 0) {
|
nuclear@0
|
626 fprintf(out, " (%lu bytes)", bytes);
|
nuclear@0
|
627 }
|
nuclear@0
|
628 return;
|
nuclear@0
|
629 }
|
nuclear@0
|
630
|
nuclear@0
|
631 memsz /= 1024;
|
nuclear@0
|
632 }
|
nuclear@0
|
633 }
|
nuclear@8
|
634
|
nuclear@8
|
635 static const char *clstrerror(int err)
|
nuclear@8
|
636 {
|
nuclear@8
|
637 if(err > 0) {
|
nuclear@8
|
638 return "<invalid error code>";
|
nuclear@8
|
639 }
|
nuclear@8
|
640 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
|
nuclear@8
|
641 return "<unknown error>";
|
nuclear@8
|
642 }
|
nuclear@8
|
643 return ocl_errstr[-err];
|
nuclear@8
|
644 }
|