rev |
line source |
nuclear@8
|
1 #define OCL_CC_
|
nuclear@8
|
2
|
nuclear@0
|
3 #include <stdio.h>
|
nuclear@0
|
4 #include <stdlib.h>
|
nuclear@0
|
5 #include <string.h>
|
nuclear@8
|
6 #include <stdarg.h>
|
nuclear@0
|
7 #include <errno.h>
|
nuclear@39
|
8 #include <assert.h>
|
John@11
|
9 #ifndef _MSC_VER
|
nuclear@0
|
10 #include <alloca.h>
|
John@11
|
11 #else
|
John@11
|
12 #include <malloc.h>
|
John@11
|
13 #endif
|
nuclear@0
|
14 #include <sys/stat.h>
|
nuclear@0
|
15 #include "ocl.h"
|
nuclear@39
|
16 #include "ogl.h"
|
nuclear@8
|
17 #include "ocl_errstr.h"
|
nuclear@0
|
18
|
nuclear@39
|
19 #if defined(unix) || defined(__unix__)
|
nuclear@39
|
20 #include <X11/Xlib.h>
|
nuclear@39
|
21 #include <GL/glx.h>
|
nuclear@39
|
22 #endif
|
nuclear@0
|
23
|
nuclear@0
|
24
|
nuclear@0
|
25 struct device_info {
|
nuclear@0
|
26 cl_device_id id;
|
nuclear@0
|
27 cl_device_type type;
|
nuclear@0
|
28 unsigned int units;
|
nuclear@0
|
29 unsigned int clock;
|
nuclear@0
|
30
|
nuclear@0
|
31 unsigned int dim;
|
nuclear@0
|
32 size_t *work_item_sizes;
|
nuclear@0
|
33 size_t work_group_size;
|
nuclear@0
|
34
|
nuclear@0
|
35 unsigned long mem_size;
|
nuclear@0
|
36 };
|
nuclear@0
|
37
|
nuclear@0
|
38 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
|
nuclear@0
|
39 static int get_dev_info(cl_device_id dev, struct device_info *di);
|
nuclear@0
|
40 static int devcmp(struct device_info *a, struct device_info *b);
|
nuclear@0
|
41 static const char *devtypestr(cl_device_type type);
|
nuclear@0
|
42 static void print_memsize(FILE *out, unsigned long memsz);
|
nuclear@8
|
43 static const char *clstrerror(int err);
|
nuclear@0
|
44
|
nuclear@0
|
45
|
nuclear@0
|
46 static cl_context ctx;
|
nuclear@0
|
47 static cl_command_queue cmdq;
|
nuclear@0
|
48 static device_info devinf;
|
nuclear@0
|
49
|
nuclear@39
|
50 bool init_opencl()
|
nuclear@0
|
51 {
|
nuclear@0
|
52 if(select_device(&devinf, devcmp) == -1) {
|
nuclear@0
|
53 return false;
|
nuclear@0
|
54 }
|
nuclear@0
|
55
|
nuclear@40
|
56 #ifndef CLGL_INTEROP
|
nuclear@40
|
57 cl_context_properties *prop = 0;
|
nuclear@40
|
58
|
nuclear@40
|
59 #else
|
nuclear@40
|
60
|
nuclear@39
|
61 #if defined(__APPLE__)
|
nuclear@39
|
62 #error "CL/GL context sharing not implemented on MacOSX yet"
|
nuclear@39
|
63 #elif defined(unix) || defined(__unix__)
|
nuclear@39
|
64 Display *dpy = glXGetCurrentDisplay();
|
nuclear@39
|
65 GLXContext glctx = glXGetCurrentContext();
|
nuclear@0
|
66
|
nuclear@39
|
67 assert(dpy && glctx);
|
nuclear@39
|
68
|
nuclear@39
|
69 cl_context_properties prop[] = {
|
nuclear@39
|
70 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
|
nuclear@39
|
71 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
|
nuclear@39
|
72 0
|
nuclear@39
|
73 };
|
nuclear@39
|
74 #elif defined(WIN32) || defined(__WIN32__)
|
nuclear@39
|
75 #error "CL/GL context sharing not implemented on windows yet"
|
nuclear@39
|
76 #else
|
nuclear@39
|
77 #error "unknown or unsupported platform"
|
nuclear@39
|
78 #endif
|
nuclear@39
|
79
|
nuclear@40
|
80 #endif /* CLGL_INTEROP */
|
nuclear@40
|
81
|
nuclear@39
|
82 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
|
nuclear@0
|
83 fprintf(stderr, "failed to create opencl context\n");
|
nuclear@0
|
84 return false;
|
nuclear@0
|
85 }
|
nuclear@0
|
86
|
nuclear@0
|
87 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
|
nuclear@0
|
88 fprintf(stderr, "failed to create command queue\n");
|
nuclear@0
|
89 return false;
|
nuclear@0
|
90 }
|
nuclear@0
|
91 return true;
|
nuclear@0
|
92 }
|
nuclear@0
|
93
|
nuclear@40
|
94 void destroy_opencl()
|
nuclear@40
|
95 {
|
nuclear@40
|
96 if(cmdq) {
|
nuclear@40
|
97 clReleaseCommandQueue(cmdq);
|
nuclear@40
|
98 cmdq = 0;
|
nuclear@40
|
99 }
|
nuclear@40
|
100
|
nuclear@40
|
101 if(ctx) {
|
nuclear@40
|
102 clReleaseContext(ctx);
|
nuclear@40
|
103 ctx = 0;
|
nuclear@40
|
104 }
|
nuclear@40
|
105 }
|
nuclear@40
|
106
|
nuclear@0
|
107
|
nuclear@28
|
108 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
|
nuclear@0
|
109 {
|
nuclear@0
|
110 int err;
|
nuclear@0
|
111 cl_mem mem;
|
nuclear@12
|
112 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
|
nuclear@0
|
113
|
nuclear@12
|
114 if(buf) {
|
nuclear@12
|
115 flags |= CL_MEM_COPY_HOST_PTR;
|
nuclear@12
|
116 }
|
nuclear@0
|
117
|
nuclear@12
|
118
|
nuclear@28
|
119 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
|
nuclear@8
|
120 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
|
nuclear@0
|
121 return 0;
|
nuclear@0
|
122 }
|
nuclear@0
|
123
|
nuclear@0
|
124 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@0
|
125 mbuf->mem = mem;
|
nuclear@0
|
126 mbuf->size = sz;
|
nuclear@12
|
127 mbuf->ptr = 0;
|
nuclear@39
|
128 mbuf->tex = 0;
|
nuclear@39
|
129 return mbuf;
|
nuclear@39
|
130 }
|
nuclear@39
|
131
|
nuclear@39
|
132 CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex)
|
nuclear@39
|
133 {
|
nuclear@39
|
134 int err;
|
nuclear@39
|
135 cl_mem mem;
|
nuclear@39
|
136
|
nuclear@39
|
137 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
|
nuclear@39
|
138 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
|
nuclear@39
|
139 return 0;
|
nuclear@39
|
140 }
|
nuclear@39
|
141
|
nuclear@39
|
142 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@39
|
143 mbuf->mem = mem;
|
nuclear@39
|
144 mbuf->size = 0;
|
nuclear@39
|
145 mbuf->ptr = 0;
|
nuclear@39
|
146 mbuf->tex = tex;
|
nuclear@0
|
147 return mbuf;
|
nuclear@0
|
148 }
|
nuclear@0
|
149
|
nuclear@0
|
150 void destroy_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
151 {
|
nuclear@0
|
152 if(mbuf) {
|
nuclear@0
|
153 clReleaseMemObject(mbuf->mem);
|
nuclear@0
|
154 delete mbuf;
|
nuclear@0
|
155 }
|
nuclear@0
|
156 }
|
nuclear@0
|
157
|
nuclear@39
|
158 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
|
nuclear@0
|
159 {
|
nuclear@0
|
160 if(!mbuf) return 0;
|
nuclear@0
|
161
|
nuclear@12
|
162 #ifndef NDEBUG
|
nuclear@12
|
163 if(mbuf->ptr) {
|
nuclear@12
|
164 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
|
nuclear@12
|
165 }
|
nuclear@12
|
166 #endif
|
nuclear@12
|
167
|
nuclear@0
|
168 int err;
|
nuclear@39
|
169 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
|
nuclear@0
|
170 if(!mbuf->ptr) {
|
nuclear@8
|
171 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
|
nuclear@0
|
172 return 0;
|
nuclear@0
|
173 }
|
nuclear@0
|
174 return mbuf->ptr;
|
nuclear@0
|
175 }
|
nuclear@0
|
176
|
nuclear@39
|
177 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@0
|
178 {
|
nuclear@0
|
179 if(!mbuf || !mbuf->ptr) return;
|
nuclear@39
|
180 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
|
nuclear@12
|
181 mbuf->ptr = 0;
|
nuclear@0
|
182 }
|
nuclear@0
|
183
|
nuclear@39
|
184 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
|
nuclear@0
|
185 {
|
nuclear@0
|
186 if(!mbuf) return false;
|
nuclear@0
|
187
|
nuclear@0
|
188 int err;
|
nuclear@39
|
189 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
|
nuclear@8
|
190 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
|
nuclear@0
|
191 return false;
|
nuclear@0
|
192 }
|
nuclear@0
|
193 return true;
|
nuclear@0
|
194 }
|
nuclear@0
|
195
|
nuclear@39
|
196 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
|
nuclear@0
|
197 {
|
nuclear@0
|
198 if(!mbuf) return false;
|
nuclear@0
|
199
|
nuclear@0
|
200 int err;
|
nuclear@39
|
201 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
|
nuclear@8
|
202 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
|
nuclear@0
|
203 return false;
|
nuclear@0
|
204 }
|
nuclear@0
|
205 return true;
|
nuclear@0
|
206 }
|
nuclear@0
|
207
|
nuclear@0
|
208
|
nuclear@39
|
209 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@39
|
210 {
|
nuclear@39
|
211 if(!mbuf || !mbuf->tex) {
|
nuclear@39
|
212 return false;
|
nuclear@39
|
213 }
|
nuclear@39
|
214
|
nuclear@39
|
215 int err;
|
nuclear@39
|
216 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
|
nuclear@39
|
217 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
|
nuclear@39
|
218 return false;
|
nuclear@39
|
219 }
|
nuclear@39
|
220 return true;
|
nuclear@39
|
221 }
|
nuclear@39
|
222
|
nuclear@39
|
223 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@39
|
224 {
|
nuclear@39
|
225 if(!mbuf || !mbuf->tex) {
|
nuclear@39
|
226 return false;
|
nuclear@39
|
227 }
|
nuclear@39
|
228
|
nuclear@39
|
229 int err;
|
nuclear@39
|
230 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
|
nuclear@39
|
231 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
|
nuclear@39
|
232 return false;
|
nuclear@39
|
233 }
|
nuclear@39
|
234 return true;
|
nuclear@39
|
235 }
|
nuclear@39
|
236
|
nuclear@39
|
237
|
John@14
|
238 CLArg::CLArg()
|
John@14
|
239 {
|
John@14
|
240 memset(this, 0, sizeof *this);
|
John@14
|
241 }
|
John@14
|
242
|
John@14
|
243
|
nuclear@0
|
244 CLProgram::CLProgram(const char *kname)
|
nuclear@0
|
245 {
|
nuclear@0
|
246 prog = 0;
|
nuclear@0
|
247 kernel = 0;
|
nuclear@0
|
248 this->kname = kname;
|
nuclear@1
|
249 args.resize(16);
|
nuclear@0
|
250 built = false;
|
nuclear@39
|
251
|
nuclear@39
|
252 wait_event = last_event = 0;
|
nuclear@0
|
253 }
|
nuclear@0
|
254
|
nuclear@0
|
255 CLProgram::~CLProgram()
|
nuclear@0
|
256 {
|
nuclear@39
|
257 if(wait_event) {
|
nuclear@39
|
258 clReleaseEvent(wait_event);
|
nuclear@39
|
259 }
|
nuclear@39
|
260 if(last_event) {
|
nuclear@40
|
261 clWaitForEvents(1, &last_event);
|
nuclear@39
|
262 clReleaseEvent(last_event);
|
nuclear@39
|
263 }
|
nuclear@39
|
264
|
nuclear@0
|
265 if(prog) {
|
nuclear@0
|
266 clReleaseProgram(prog);
|
nuclear@0
|
267 }
|
nuclear@0
|
268 if(kernel) {
|
nuclear@0
|
269 clReleaseKernel(kernel);
|
nuclear@0
|
270 }
|
nuclear@1
|
271 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
272 if(args[i].type == ARGTYPE_MEM_BUF) {
|
nuclear@1
|
273 destroy_mem_buffer(args[i].v.mbuf);
|
nuclear@0
|
274 }
|
nuclear@0
|
275 }
|
nuclear@0
|
276 }
|
nuclear@0
|
277
|
nuclear@0
|
278 bool CLProgram::load(const char *fname)
|
nuclear@0
|
279 {
|
nuclear@0
|
280 FILE *fp;
|
nuclear@0
|
281 char *src;
|
nuclear@0
|
282 struct stat st;
|
nuclear@0
|
283
|
nuclear@0
|
284 printf("loading opencl program (%s)\n", fname);
|
nuclear@0
|
285
|
nuclear@0
|
286 if(!(fp = fopen(fname, "rb"))) {
|
nuclear@0
|
287 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
|
nuclear@0
|
288 return false;
|
nuclear@0
|
289 }
|
nuclear@0
|
290
|
nuclear@0
|
291 fstat(fileno(fp), &st);
|
nuclear@0
|
292
|
nuclear@0
|
293 src = new char[st.st_size + 1];
|
nuclear@0
|
294
|
nuclear@0
|
295 fread(src, 1, st.st_size, fp);
|
nuclear@0
|
296 src[st.st_size] = 0;
|
nuclear@0
|
297 fclose(fp);
|
nuclear@0
|
298
|
nuclear@0
|
299
|
nuclear@0
|
300 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
|
nuclear@0
|
301 fprintf(stderr, "error creating program object: %s\n", fname);
|
nuclear@0
|
302 delete [] src;
|
nuclear@0
|
303 return false;
|
nuclear@0
|
304 }
|
nuclear@0
|
305 delete [] src;
|
nuclear@0
|
306 return true;
|
nuclear@0
|
307 }
|
nuclear@0
|
308
|
nuclear@1
|
309 bool CLProgram::set_argi(int idx, int val)
|
nuclear@1
|
310 {
|
nuclear@1
|
311 if((int)args.size() <= idx) {
|
nuclear@1
|
312 args.resize(idx + 1);
|
nuclear@1
|
313 }
|
nuclear@1
|
314
|
nuclear@1
|
315 CLArg *arg = &args[idx];
|
nuclear@1
|
316 arg->type = ARGTYPE_INT;
|
nuclear@1
|
317 arg->v.ival = val;
|
nuclear@1
|
318 return true;
|
nuclear@1
|
319 }
|
nuclear@1
|
320
|
nuclear@1
|
321 bool CLProgram::set_argf(int idx, float val)
|
nuclear@1
|
322 {
|
nuclear@1
|
323 if((int)args.size() <= idx) {
|
nuclear@1
|
324 args.resize(idx + 1);
|
nuclear@1
|
325 }
|
nuclear@1
|
326
|
nuclear@1
|
327 CLArg *arg = &args[idx];
|
nuclear@1
|
328 arg->type = ARGTYPE_FLOAT;
|
nuclear@1
|
329 arg->v.fval = val;
|
nuclear@1
|
330 return true;
|
nuclear@1
|
331 }
|
nuclear@1
|
332
|
nuclear@28
|
333 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
|
nuclear@0
|
334 {
|
nuclear@13
|
335 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
|
nuclear@0
|
336 CLMemBuffer *buf;
|
nuclear@0
|
337
|
nuclear@39
|
338 if(sz <= 0) {
|
nuclear@39
|
339 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
|
nuclear@39
|
340 return false;
|
nuclear@39
|
341 }
|
nuclear@39
|
342 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
|
nuclear@39
|
343 return false;
|
nuclear@39
|
344 }
|
nuclear@39
|
345
|
nuclear@39
|
346 if((int)args.size() <= idx) {
|
nuclear@39
|
347 args.resize(idx + 1);
|
nuclear@39
|
348 }
|
nuclear@39
|
349 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@39
|
350 args[idx].v.mbuf = buf;
|
nuclear@39
|
351 return true;
|
nuclear@39
|
352 }
|
nuclear@39
|
353
|
nuclear@39
|
354 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
|
nuclear@39
|
355 {
|
nuclear@39
|
356 printf("create argument %d from texture %u\n", idx, tex);
|
nuclear@39
|
357 CLMemBuffer *buf;
|
nuclear@39
|
358
|
nuclear@39
|
359 if(!(buf = create_mem_buffer(rdwr, tex))) {
|
nuclear@0
|
360 return false;
|
nuclear@0
|
361 }
|
nuclear@0
|
362
|
nuclear@1
|
363 if((int)args.size() <= idx) {
|
nuclear@1
|
364 args.resize(idx + 1);
|
nuclear@0
|
365 }
|
nuclear@1
|
366 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@1
|
367 args[idx].v.mbuf = buf;
|
nuclear@0
|
368 return true;
|
nuclear@0
|
369 }
|
nuclear@0
|
370
|
nuclear@0
|
371 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
|
nuclear@0
|
372 {
|
nuclear@1
|
373 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
|
nuclear@0
|
374 return 0;
|
nuclear@0
|
375 }
|
nuclear@1
|
376 return args[arg].v.mbuf;
|
nuclear@0
|
377 }
|
nuclear@0
|
378
|
John@14
|
379 int CLProgram::get_num_args() const
|
John@14
|
380 {
|
John@14
|
381 int num_args = 0;
|
John@14
|
382 for(size_t i=0; i<args.size(); i++) {
|
John@14
|
383 if(args[i].type != ARGTYPE_NONE) {
|
John@14
|
384 num_args++;
|
John@14
|
385 }
|
John@14
|
386 }
|
John@14
|
387 return num_args;
|
John@14
|
388 }
|
John@14
|
389
|
nuclear@0
|
390 bool CLProgram::build()
|
nuclear@0
|
391 {
|
nuclear@2
|
392 int err;
|
nuclear@0
|
393
|
nuclear@39
|
394 if((err = clBuildProgram(prog, 0, 0, "-cl-mad-enable", 0, 0)) != 0) {
|
nuclear@2
|
395 size_t sz;
|
nuclear@2
|
396 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
|
nuclear@0
|
397
|
nuclear@2
|
398 char *errlog = (char*)alloca(sz + 1);
|
nuclear@2
|
399 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
|
nuclear@8
|
400 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
|
nuclear@2
|
401
|
nuclear@0
|
402 clReleaseProgram(prog);
|
nuclear@0
|
403 prog = 0;
|
nuclear@0
|
404 return false;
|
nuclear@0
|
405 }
|
nuclear@0
|
406
|
nuclear@0
|
407
|
nuclear@0
|
408 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
|
nuclear@0
|
409 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
|
nuclear@0
|
410 clReleaseProgram(prog);
|
nuclear@0
|
411 prog = 0;
|
nuclear@0
|
412 return false;
|
nuclear@0
|
413 }
|
nuclear@0
|
414
|
nuclear@1
|
415 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
416 int err;
|
nuclear@0
|
417
|
nuclear@1
|
418 if(args[i].type == ARGTYPE_NONE) {
|
nuclear@1
|
419 break;
|
nuclear@1
|
420 }
|
nuclear@1
|
421
|
nuclear@1
|
422 switch(args[i].type) {
|
nuclear@1
|
423 case ARGTYPE_INT:
|
nuclear@1
|
424 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
|
nuclear@8
|
425 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
426 goto fail;
|
nuclear@1
|
427 }
|
nuclear@1
|
428 break;
|
nuclear@1
|
429
|
nuclear@1
|
430 case ARGTYPE_FLOAT:
|
nuclear@1
|
431 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
|
nuclear@8
|
432 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
433 goto fail;
|
nuclear@1
|
434 }
|
nuclear@1
|
435 break;
|
nuclear@1
|
436
|
nuclear@1
|
437 case ARGTYPE_MEM_BUF:
|
nuclear@1
|
438 {
|
nuclear@1
|
439 CLMemBuffer *mbuf = args[i].v.mbuf;
|
nuclear@1
|
440
|
nuclear@1
|
441 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
|
nuclear@8
|
442 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
443 goto fail;
|
nuclear@1
|
444 }
|
nuclear@1
|
445 }
|
nuclear@1
|
446 break;
|
nuclear@1
|
447
|
nuclear@1
|
448 default:
|
nuclear@1
|
449 break;
|
nuclear@0
|
450 }
|
nuclear@0
|
451 }
|
nuclear@0
|
452
|
nuclear@0
|
453 built = true;
|
nuclear@0
|
454 return true;
|
nuclear@1
|
455
|
nuclear@1
|
456 fail:
|
nuclear@1
|
457 clReleaseProgram(prog);
|
nuclear@1
|
458 clReleaseKernel(kernel);
|
nuclear@1
|
459 prog = 0;
|
nuclear@1
|
460 kernel = 0;
|
nuclear@1
|
461 return false;
|
nuclear@0
|
462 }
|
nuclear@0
|
463
|
nuclear@0
|
464 bool CLProgram::run() const
|
nuclear@0
|
465 {
|
nuclear@0
|
466 return run(1, 1);
|
nuclear@0
|
467 }
|
nuclear@0
|
468
|
nuclear@0
|
469 bool CLProgram::run(int dim, ...) const
|
nuclear@0
|
470 {
|
nuclear@0
|
471 va_list ap;
|
nuclear@0
|
472 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
|
nuclear@0
|
473
|
nuclear@0
|
474 va_start(ap, dim);
|
nuclear@0
|
475 for(int i=0; i<dim; i++) {
|
nuclear@0
|
476 global_size[i] = va_arg(ap, int);
|
nuclear@0
|
477 }
|
nuclear@0
|
478 va_end(ap);
|
nuclear@0
|
479
|
nuclear@39
|
480 if(last_event) {
|
nuclear@39
|
481 clReleaseEvent(last_event);
|
nuclear@39
|
482 }
|
nuclear@39
|
483
|
nuclear@0
|
484 int err;
|
nuclear@39
|
485 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
|
nuclear@39
|
486 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
|
nuclear@8
|
487 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
|
nuclear@0
|
488 return false;
|
nuclear@0
|
489 }
|
nuclear@32
|
490
|
nuclear@39
|
491 if(wait_event) {
|
nuclear@39
|
492 clReleaseEvent(wait_event);
|
nuclear@39
|
493 wait_event = 0;
|
nuclear@39
|
494 }
|
nuclear@0
|
495 return true;
|
nuclear@0
|
496 }
|
nuclear@0
|
497
|
nuclear@39
|
498 void CLProgram::set_wait_event(cl_event ev)
|
nuclear@39
|
499 {
|
nuclear@39
|
500 if(wait_event) {
|
nuclear@39
|
501 clReleaseEvent(wait_event);
|
nuclear@39
|
502 }
|
nuclear@39
|
503 wait_event = ev;
|
nuclear@39
|
504 }
|
nuclear@39
|
505
|
nuclear@39
|
506 cl_event CLProgram::get_last_event() const
|
nuclear@39
|
507 {
|
nuclear@39
|
508 return last_event;
|
nuclear@39
|
509 }
|
nuclear@39
|
510
|
nuclear@0
|
511 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
|
nuclear@0
|
512 {
|
nuclear@8
|
513 unsigned int i, j, num_dev, num_plat, sel, ret;
|
nuclear@0
|
514 cl_device_id dev[32];
|
nuclear@8
|
515 cl_platform_id plat[32];
|
nuclear@0
|
516
|
nuclear@0
|
517 dev_inf->work_item_sizes = 0;
|
nuclear@0
|
518
|
nuclear@8
|
519 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
|
nuclear@8
|
520 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
521 return -1;
|
nuclear@8
|
522 }
|
nuclear@8
|
523 if(!num_plat) {
|
nuclear@8
|
524 fprintf(stderr, "OpenCL not available!\n");
|
nuclear@8
|
525 return -1;
|
nuclear@8
|
526 }
|
nuclear@0
|
527
|
nuclear@8
|
528 for(i=0; i<num_plat; i++) {
|
nuclear@8
|
529 char buf[512];
|
nuclear@8
|
530
|
nuclear@8
|
531 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
|
nuclear@8
|
532 printf("[%d]: %s", i, buf);
|
nuclear@8
|
533 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
|
nuclear@8
|
534 printf(", %s", buf);
|
nuclear@8
|
535 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
|
nuclear@8
|
536 printf(" (%s)\n", buf);
|
nuclear@8
|
537 }
|
nuclear@8
|
538
|
nuclear@8
|
539 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
|
nuclear@8
|
540 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
541 return -1;
|
nuclear@8
|
542 }
|
nuclear@0
|
543 printf("found %d cl devices.\n", num_dev);
|
nuclear@0
|
544
|
nuclear@0
|
545 for(i=0; i<num_dev; i++) {
|
nuclear@0
|
546 struct device_info di;
|
nuclear@0
|
547
|
nuclear@0
|
548 if(get_dev_info(dev[i], &di) == -1) {
|
nuclear@0
|
549 free(dev_inf->work_item_sizes);
|
nuclear@0
|
550 return -1;
|
nuclear@0
|
551 }
|
nuclear@0
|
552
|
nuclear@0
|
553 printf("--> device %u (%s)\n", i, devtypestr(di.type));
|
nuclear@0
|
554 printf("max compute units: %u\n", di.units);
|
nuclear@0
|
555 printf("max clock frequency: %u\n", di.clock);
|
nuclear@0
|
556 printf("max work item dimensions: %u\n", di.dim);
|
nuclear@0
|
557
|
nuclear@0
|
558 printf("max work item sizes: ");
|
nuclear@0
|
559 for(j=0; j<di.dim; j++) {
|
nuclear@0
|
560 printf("%u", (unsigned int)di.work_item_sizes[j]);
|
nuclear@0
|
561 if(di.dim - j > 1) {
|
nuclear@0
|
562 printf(", ");
|
nuclear@0
|
563 }
|
nuclear@0
|
564 }
|
nuclear@0
|
565 putchar('\n');
|
nuclear@0
|
566
|
nuclear@0
|
567 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
|
nuclear@0
|
568 printf("max object allocation size: ");
|
nuclear@0
|
569 print_memsize(stdout, di.mem_size);
|
nuclear@0
|
570 putchar('\n');
|
nuclear@0
|
571
|
nuclear@0
|
572 if(devcmp(&di, dev_inf) > 0) {
|
nuclear@0
|
573 free(dev_inf->work_item_sizes);
|
nuclear@0
|
574 memcpy(dev_inf, &di, sizeof di);
|
nuclear@0
|
575 sel = i;
|
nuclear@0
|
576 }
|
nuclear@0
|
577 }
|
nuclear@0
|
578
|
nuclear@0
|
579 if(num_dev) {
|
nuclear@0
|
580 printf("\nusing device: %d\n", sel);
|
nuclear@0
|
581 return 0;
|
nuclear@0
|
582 }
|
nuclear@0
|
583
|
nuclear@0
|
584 return -1;
|
nuclear@0
|
585 }
|
nuclear@0
|
586
|
nuclear@0
|
587 static int get_dev_info(cl_device_id dev, struct device_info *di)
|
nuclear@0
|
588 {
|
nuclear@0
|
589 di->id = dev;
|
nuclear@0
|
590
|
nuclear@0
|
591
|
nuclear@0
|
592 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
|
nuclear@0
|
593 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
|
nuclear@0
|
594 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
|
nuclear@0
|
595 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
|
nuclear@0
|
596
|
nuclear@0
|
597 di->work_item_sizes = new size_t[di->dim];
|
nuclear@0
|
598
|
nuclear@0
|
599 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
|
nuclear@0
|
600 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
|
nuclear@0
|
601 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
|
nuclear@0
|
602
|
nuclear@0
|
603 return 0;
|
nuclear@0
|
604 }
|
nuclear@0
|
605
|
nuclear@0
|
606 static int devcmp(struct device_info *a, struct device_info *b)
|
nuclear@0
|
607 {
|
nuclear@0
|
608 unsigned int aval = a->units * a->clock;
|
nuclear@0
|
609 unsigned int bval = b->units * b->clock;
|
nuclear@0
|
610
|
nuclear@0
|
611 return aval - bval;
|
nuclear@0
|
612 }
|
nuclear@0
|
613
|
nuclear@0
|
614 static const char *devtypestr(cl_device_type type)
|
nuclear@0
|
615 {
|
nuclear@0
|
616 switch(type) {
|
nuclear@0
|
617 case CL_DEVICE_TYPE_CPU:
|
nuclear@0
|
618 return "cpu";
|
nuclear@0
|
619 case CL_DEVICE_TYPE_GPU:
|
nuclear@0
|
620 return "gpu";
|
nuclear@0
|
621 case CL_DEVICE_TYPE_ACCELERATOR:
|
nuclear@0
|
622 return "accelerator";
|
nuclear@0
|
623 default:
|
nuclear@0
|
624 break;
|
nuclear@0
|
625 }
|
nuclear@0
|
626 return "unknown";
|
nuclear@0
|
627 }
|
nuclear@0
|
628
|
nuclear@0
|
629 static void print_memsize(FILE *out, unsigned long bytes)
|
nuclear@0
|
630 {
|
nuclear@0
|
631 int i;
|
nuclear@0
|
632 unsigned long memsz = bytes;
|
nuclear@0
|
633 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
|
nuclear@0
|
634
|
nuclear@0
|
635 for(i=0; suffix[i]; i++) {
|
nuclear@0
|
636 if(memsz < 1024) {
|
nuclear@0
|
637 fprintf(out, "%lu %s", memsz, suffix[i]);
|
nuclear@0
|
638 if(i > 0) {
|
nuclear@0
|
639 fprintf(out, " (%lu bytes)", bytes);
|
nuclear@0
|
640 }
|
nuclear@0
|
641 return;
|
nuclear@0
|
642 }
|
nuclear@0
|
643
|
nuclear@0
|
644 memsz /= 1024;
|
nuclear@0
|
645 }
|
nuclear@0
|
646 }
|
nuclear@8
|
647
|
nuclear@8
|
648 static const char *clstrerror(int err)
|
nuclear@8
|
649 {
|
nuclear@8
|
650 if(err > 0) {
|
nuclear@8
|
651 return "<invalid error code>";
|
nuclear@8
|
652 }
|
nuclear@8
|
653 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
|
nuclear@8
|
654 return "<unknown error>";
|
nuclear@8
|
655 }
|
nuclear@8
|
656 return ocl_errstr[-err];
|
nuclear@8
|
657 }
|