rev |
line source |
nuclear@8
|
1 #define OCL_CC_
|
nuclear@8
|
2
|
nuclear@0
|
3 #include <stdio.h>
|
nuclear@0
|
4 #include <stdlib.h>
|
nuclear@0
|
5 #include <string.h>
|
nuclear@8
|
6 #include <stdarg.h>
|
nuclear@0
|
7 #include <errno.h>
|
nuclear@39
|
8 #include <assert.h>
|
John@11
|
9 #ifndef _MSC_VER
|
nuclear@0
|
10 #include <alloca.h>
|
John@11
|
11 #else
|
John@11
|
12 #include <malloc.h>
|
John@11
|
13 #endif
|
nuclear@0
|
14 #include <sys/stat.h>
|
nuclear@0
|
15 #include "ocl.h"
|
nuclear@39
|
16 #include "ogl.h"
|
nuclear@8
|
17 #include "ocl_errstr.h"
|
nuclear@0
|
18
|
nuclear@39
|
19 #if defined(unix) || defined(__unix__)
|
nuclear@39
|
20 #include <X11/Xlib.h>
|
nuclear@39
|
21 #include <GL/glx.h>
|
nuclear@39
|
22 #endif
|
nuclear@0
|
23
|
nuclear@42
|
24 #ifdef __APPLE__
|
nuclear@42
|
25 #include <OpenGL/CGLCurrent.h>
|
nuclear@42
|
26 #endif
|
nuclear@42
|
27
|
nuclear@0
|
28
|
nuclear@0
|
29 struct device_info {
|
nuclear@0
|
30 cl_device_id id;
|
nuclear@0
|
31 cl_device_type type;
|
nuclear@0
|
32 unsigned int units;
|
nuclear@0
|
33 unsigned int clock;
|
nuclear@0
|
34
|
nuclear@0
|
35 unsigned int dim;
|
nuclear@0
|
36 size_t *work_item_sizes;
|
nuclear@0
|
37 size_t work_group_size;
|
nuclear@0
|
38
|
nuclear@0
|
39 unsigned long mem_size;
|
nuclear@42
|
40
|
nuclear@42
|
41 char *extensions;
|
nuclear@42
|
42 bool gl_sharing;
|
nuclear@0
|
43 };
|
nuclear@0
|
44
|
nuclear@0
|
45 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
|
nuclear@0
|
46 static int get_dev_info(cl_device_id dev, struct device_info *di);
|
nuclear@42
|
47 static void destroy_dev_info(struct device_info *di);
|
nuclear@0
|
48 static int devcmp(struct device_info *a, struct device_info *b);
|
nuclear@0
|
49 static const char *devtypestr(cl_device_type type);
|
nuclear@0
|
50 static void print_memsize(FILE *out, unsigned long memsz);
|
nuclear@8
|
51 static const char *clstrerror(int err);
|
nuclear@0
|
52
|
nuclear@0
|
53
|
nuclear@0
|
54 static cl_context ctx;
|
nuclear@0
|
55 static cl_command_queue cmdq;
|
nuclear@0
|
56 static device_info devinf;
|
nuclear@0
|
57
|
nuclear@39
|
58 bool init_opencl()
|
nuclear@0
|
59 {
|
nuclear@0
|
60 if(select_device(&devinf, devcmp) == -1) {
|
nuclear@0
|
61 return false;
|
nuclear@0
|
62 }
|
nuclear@0
|
63
|
nuclear@42
|
64
|
nuclear@42
|
65
|
nuclear@40
|
66 #ifndef CLGL_INTEROP
|
nuclear@40
|
67 cl_context_properties *prop = 0;
|
nuclear@40
|
68 #else
|
nuclear@40
|
69
|
nuclear@39
|
70 #if defined(__APPLE__)
|
nuclear@42
|
71 CGLContextObj glctx = CGLGetCurrentContext();
|
nuclear@42
|
72 CGLShareGroupObj sgrp = CGLGetShareGroup(glctx);
|
nuclear@42
|
73
|
nuclear@42
|
74 cl_context_properties prop[] = {
|
nuclear@42
|
75 #ifdef CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE
|
nuclear@42
|
76 CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)sgrp,
|
nuclear@42
|
77 #else
|
nuclear@42
|
78 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
|
nuclear@42
|
79 CL_CGL_SHAREGROUP_KHR, (cl_context_properties)sgrp,
|
nuclear@42
|
80 #endif
|
nuclear@42
|
81 0
|
nuclear@42
|
82 };
|
nuclear@39
|
83 #elif defined(unix) || defined(__unix__)
|
nuclear@39
|
84 Display *dpy = glXGetCurrentDisplay();
|
nuclear@39
|
85 GLXContext glctx = glXGetCurrentContext();
|
nuclear@0
|
86
|
nuclear@39
|
87 assert(dpy && glctx);
|
nuclear@39
|
88
|
nuclear@39
|
89 cl_context_properties prop[] = {
|
nuclear@39
|
90 CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy,
|
nuclear@39
|
91 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
|
nuclear@39
|
92 0
|
nuclear@39
|
93 };
|
nuclear@39
|
94 #elif defined(WIN32) || defined(__WIN32__)
|
nuclear@42
|
95 HGLRC glctx = wglGetCurrentContext();
|
nuclear@42
|
96 HDC dc = wglGetCurrentDC();
|
nuclear@42
|
97
|
nuclear@42
|
98 cl_context_properties prop[] = {
|
nuclear@42
|
99 CL_GL_CONTEXT_KHR, (cl_context_properties)glctx,
|
nuclear@42
|
100 CL_WGL_HDC_KHR, (cl_context_properties)dc,
|
nuclear@42
|
101 0
|
nuclear@42
|
102 };
|
nuclear@39
|
103 #else
|
nuclear@39
|
104 #error "unknown or unsupported platform"
|
nuclear@39
|
105 #endif
|
nuclear@39
|
106
|
nuclear@40
|
107 #endif /* CLGL_INTEROP */
|
nuclear@40
|
108
|
nuclear@39
|
109 if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) {
|
nuclear@0
|
110 fprintf(stderr, "failed to create opencl context\n");
|
nuclear@0
|
111 return false;
|
nuclear@0
|
112 }
|
nuclear@0
|
113
|
nuclear@0
|
114 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
|
nuclear@0
|
115 fprintf(stderr, "failed to create command queue\n");
|
nuclear@0
|
116 return false;
|
nuclear@0
|
117 }
|
nuclear@0
|
118 return true;
|
nuclear@0
|
119 }
|
nuclear@0
|
120
|
nuclear@40
|
121 void destroy_opencl()
|
nuclear@40
|
122 {
|
nuclear@40
|
123 if(cmdq) {
|
nuclear@40
|
124 clReleaseCommandQueue(cmdq);
|
nuclear@40
|
125 cmdq = 0;
|
nuclear@40
|
126 }
|
nuclear@40
|
127
|
nuclear@40
|
128 if(ctx) {
|
nuclear@40
|
129 clReleaseContext(ctx);
|
nuclear@40
|
130 ctx = 0;
|
nuclear@40
|
131 }
|
nuclear@40
|
132 }
|
nuclear@40
|
133
|
nuclear@0
|
134
|
nuclear@28
|
135 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf)
|
nuclear@0
|
136 {
|
nuclear@0
|
137 int err;
|
nuclear@0
|
138 cl_mem mem;
|
nuclear@12
|
139 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
|
nuclear@0
|
140
|
nuclear@12
|
141 if(buf) {
|
nuclear@12
|
142 flags |= CL_MEM_COPY_HOST_PTR;
|
nuclear@12
|
143 }
|
nuclear@0
|
144
|
nuclear@12
|
145
|
nuclear@28
|
146 if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) {
|
nuclear@8
|
147 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
|
nuclear@0
|
148 return 0;
|
nuclear@0
|
149 }
|
nuclear@0
|
150
|
nuclear@0
|
151 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@41
|
152 mbuf->type = MEM_BUFFER;
|
nuclear@0
|
153 mbuf->mem = mem;
|
nuclear@0
|
154 mbuf->size = sz;
|
nuclear@41
|
155 mbuf->xsz = mbuf->ysz = 0;
|
nuclear@12
|
156 mbuf->ptr = 0;
|
nuclear@39
|
157 mbuf->tex = 0;
|
nuclear@39
|
158 return mbuf;
|
nuclear@39
|
159 }
|
nuclear@39
|
160
|
nuclear@41
|
161 CLMemBuffer *create_image_buffer(int rdwr, int xsz, int ysz, const void *pixels)
|
nuclear@39
|
162 {
|
nuclear@41
|
163 int err, pitch;
|
nuclear@39
|
164 cl_mem mem;
|
nuclear@41
|
165 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
|
nuclear@41
|
166
|
nuclear@41
|
167 if(pixels) {
|
nuclear@41
|
168 flags |= CL_MEM_COPY_HOST_PTR;
|
nuclear@41
|
169 pitch = xsz * 4 * sizeof(float);
|
nuclear@41
|
170 } else {
|
nuclear@41
|
171 pitch = 0;
|
nuclear@41
|
172 }
|
nuclear@41
|
173
|
nuclear@41
|
174 cl_image_format fmt = {CL_RGBA, CL_FLOAT};
|
nuclear@41
|
175
|
nuclear@41
|
176 if(!(mem = clCreateImage2D(ctx, flags, &fmt, xsz, ysz, pitch, (void*)pixels, &err))) {
|
nuclear@41
|
177 fprintf(stderr, "failed to create %dx%d image: %s\n", xsz, ysz, clstrerror(err));
|
nuclear@41
|
178 return 0;
|
nuclear@41
|
179 }
|
nuclear@41
|
180
|
nuclear@41
|
181 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@41
|
182 mbuf->type = IMAGE_BUFFER;
|
nuclear@41
|
183 mbuf->mem = mem;
|
nuclear@41
|
184 mbuf->size = ysz * pitch;
|
nuclear@41
|
185 mbuf->xsz = xsz;
|
nuclear@41
|
186 mbuf->ysz = ysz;
|
nuclear@41
|
187 mbuf->ptr = 0;
|
nuclear@41
|
188 mbuf->tex = 0;
|
nuclear@41
|
189 return mbuf;
|
nuclear@41
|
190 }
|
nuclear@41
|
191
|
nuclear@41
|
192 CLMemBuffer *create_image_buffer(int rdwr, unsigned int tex)
|
nuclear@41
|
193 {
|
nuclear@41
|
194 int err, xsz, ysz;
|
nuclear@41
|
195 cl_mem mem;
|
nuclear@41
|
196
|
nuclear@41
|
197 glGetError(); // clear previous OpenGL errors
|
nuclear@41
|
198
|
nuclear@41
|
199 glPushAttrib(GL_TEXTURE_BIT);
|
nuclear@41
|
200 glBindTexture(GL_TEXTURE_2D, tex);
|
nuclear@41
|
201 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &xsz);
|
nuclear@41
|
202 glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_HEIGHT, &ysz);
|
nuclear@41
|
203 glPopAttrib();
|
nuclear@41
|
204
|
nuclear@41
|
205 if(glGetError()) {
|
nuclear@41
|
206 fprintf(stderr, "create_image_buffer: GL error while retreiving texture parameters for texture %u\n", tex);
|
nuclear@41
|
207 return 0;
|
nuclear@41
|
208 }
|
nuclear@39
|
209
|
nuclear@39
|
210 if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) {
|
nuclear@39
|
211 fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err));
|
nuclear@39
|
212 return 0;
|
nuclear@39
|
213 }
|
nuclear@39
|
214
|
nuclear@39
|
215 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@41
|
216 mbuf->type = IMAGE_BUFFER;
|
nuclear@39
|
217 mbuf->mem = mem;
|
nuclear@39
|
218 mbuf->size = 0;
|
nuclear@41
|
219 mbuf->xsz = xsz;
|
nuclear@41
|
220 mbuf->ysz = ysz;
|
nuclear@39
|
221 mbuf->ptr = 0;
|
nuclear@39
|
222 mbuf->tex = tex;
|
nuclear@41
|
223
|
nuclear@0
|
224 return mbuf;
|
nuclear@0
|
225 }
|
nuclear@0
|
226
|
nuclear@0
|
227 void destroy_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
228 {
|
nuclear@0
|
229 if(mbuf) {
|
nuclear@0
|
230 clReleaseMemObject(mbuf->mem);
|
nuclear@0
|
231 delete mbuf;
|
nuclear@0
|
232 }
|
nuclear@0
|
233 }
|
nuclear@0
|
234
|
nuclear@39
|
235 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev)
|
nuclear@0
|
236 {
|
nuclear@0
|
237 if(!mbuf) return 0;
|
nuclear@0
|
238
|
nuclear@12
|
239 #ifndef NDEBUG
|
nuclear@12
|
240 if(mbuf->ptr) {
|
nuclear@12
|
241 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
|
nuclear@12
|
242 }
|
nuclear@12
|
243 #endif
|
nuclear@12
|
244
|
nuclear@0
|
245 int err;
|
nuclear@41
|
246
|
nuclear@41
|
247 if(mbuf->type == MEM_BUFFER) {
|
nuclear@41
|
248 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err);
|
nuclear@41
|
249 if(!mbuf->ptr) {
|
nuclear@41
|
250 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
|
nuclear@41
|
251 return 0;
|
nuclear@41
|
252 }
|
nuclear@41
|
253 } else {
|
nuclear@41
|
254 assert(mbuf->type == IMAGE_BUFFER);
|
nuclear@41
|
255
|
nuclear@41
|
256 size_t orig[] = {0, 0, 0};
|
nuclear@41
|
257 size_t rgn[] = {mbuf->xsz, mbuf->ysz, 1};
|
nuclear@41
|
258 size_t pitch;
|
nuclear@41
|
259
|
nuclear@41
|
260 mbuf->ptr = clEnqueueMapImage(cmdq, mbuf->mem, 1, rdwr, orig, rgn, &pitch, 0, 0, 0, ev, &err);
|
nuclear@41
|
261 if(!mbuf->ptr) {
|
nuclear@41
|
262 fprintf(stderr, "failed to map image: %s\n", clstrerror(err));
|
nuclear@41
|
263 return 0;
|
nuclear@41
|
264 }
|
nuclear@41
|
265
|
nuclear@41
|
266 assert(pitch == mbuf->xsz * 4 * sizeof(float));
|
nuclear@0
|
267 }
|
nuclear@0
|
268 return mbuf->ptr;
|
nuclear@0
|
269 }
|
nuclear@0
|
270
|
nuclear@39
|
271 void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@0
|
272 {
|
nuclear@0
|
273 if(!mbuf || !mbuf->ptr) return;
|
nuclear@41
|
274
|
nuclear@39
|
275 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev);
|
nuclear@12
|
276 mbuf->ptr = 0;
|
nuclear@0
|
277 }
|
nuclear@0
|
278
|
nuclear@39
|
279 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev)
|
nuclear@0
|
280 {
|
nuclear@0
|
281 if(!mbuf) return false;
|
nuclear@0
|
282
|
nuclear@0
|
283 int err;
|
nuclear@39
|
284 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) {
|
nuclear@8
|
285 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
|
nuclear@0
|
286 return false;
|
nuclear@0
|
287 }
|
nuclear@0
|
288 return true;
|
nuclear@0
|
289 }
|
nuclear@0
|
290
|
nuclear@39
|
291 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev)
|
nuclear@0
|
292 {
|
nuclear@0
|
293 if(!mbuf) return false;
|
nuclear@0
|
294
|
nuclear@0
|
295 int err;
|
nuclear@39
|
296 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) {
|
nuclear@8
|
297 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
|
nuclear@0
|
298 return false;
|
nuclear@0
|
299 }
|
nuclear@0
|
300 return true;
|
nuclear@0
|
301 }
|
nuclear@0
|
302
|
nuclear@0
|
303
|
nuclear@39
|
304 bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@39
|
305 {
|
nuclear@39
|
306 if(!mbuf || !mbuf->tex) {
|
nuclear@39
|
307 return false;
|
nuclear@39
|
308 }
|
nuclear@39
|
309
|
nuclear@39
|
310 int err;
|
nuclear@39
|
311 if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
|
nuclear@39
|
312 fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err));
|
nuclear@39
|
313 return false;
|
nuclear@39
|
314 }
|
nuclear@39
|
315 return true;
|
nuclear@39
|
316 }
|
nuclear@39
|
317
|
nuclear@39
|
318 bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev)
|
nuclear@39
|
319 {
|
nuclear@39
|
320 if(!mbuf || !mbuf->tex) {
|
nuclear@39
|
321 return false;
|
nuclear@39
|
322 }
|
nuclear@39
|
323
|
nuclear@39
|
324 int err;
|
nuclear@39
|
325 if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) {
|
nuclear@39
|
326 fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err));
|
nuclear@39
|
327 return false;
|
nuclear@39
|
328 }
|
nuclear@39
|
329 return true;
|
nuclear@39
|
330 }
|
nuclear@39
|
331
|
nuclear@39
|
332
|
John@14
|
333 CLArg::CLArg()
|
John@14
|
334 {
|
John@14
|
335 memset(this, 0, sizeof *this);
|
John@14
|
336 }
|
John@14
|
337
|
John@14
|
338
|
nuclear@0
|
339 CLProgram::CLProgram(const char *kname)
|
nuclear@0
|
340 {
|
nuclear@0
|
341 prog = 0;
|
nuclear@0
|
342 kernel = 0;
|
nuclear@0
|
343 this->kname = kname;
|
nuclear@1
|
344 args.resize(16);
|
nuclear@0
|
345 built = false;
|
nuclear@39
|
346
|
nuclear@39
|
347 wait_event = last_event = 0;
|
nuclear@0
|
348 }
|
nuclear@0
|
349
|
nuclear@0
|
350 CLProgram::~CLProgram()
|
nuclear@0
|
351 {
|
nuclear@39
|
352 if(wait_event) {
|
nuclear@39
|
353 clReleaseEvent(wait_event);
|
nuclear@39
|
354 }
|
nuclear@39
|
355 if(last_event) {
|
nuclear@40
|
356 clWaitForEvents(1, &last_event);
|
nuclear@39
|
357 clReleaseEvent(last_event);
|
nuclear@39
|
358 }
|
nuclear@39
|
359
|
nuclear@0
|
360 if(prog) {
|
nuclear@0
|
361 clReleaseProgram(prog);
|
nuclear@0
|
362 }
|
nuclear@0
|
363 if(kernel) {
|
nuclear@0
|
364 clReleaseKernel(kernel);
|
nuclear@0
|
365 }
|
nuclear@1
|
366 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
367 if(args[i].type == ARGTYPE_MEM_BUF) {
|
nuclear@1
|
368 destroy_mem_buffer(args[i].v.mbuf);
|
nuclear@0
|
369 }
|
nuclear@0
|
370 }
|
nuclear@0
|
371 }
|
nuclear@0
|
372
|
nuclear@0
|
373 bool CLProgram::load(const char *fname)
|
nuclear@0
|
374 {
|
nuclear@0
|
375 FILE *fp;
|
nuclear@0
|
376 char *src;
|
nuclear@0
|
377 struct stat st;
|
nuclear@0
|
378
|
nuclear@0
|
379 printf("loading opencl program (%s)\n", fname);
|
nuclear@0
|
380
|
nuclear@0
|
381 if(!(fp = fopen(fname, "rb"))) {
|
nuclear@0
|
382 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
|
nuclear@0
|
383 return false;
|
nuclear@0
|
384 }
|
nuclear@0
|
385
|
nuclear@0
|
386 fstat(fileno(fp), &st);
|
nuclear@0
|
387
|
nuclear@0
|
388 src = new char[st.st_size + 1];
|
nuclear@0
|
389
|
nuclear@0
|
390 fread(src, 1, st.st_size, fp);
|
nuclear@0
|
391 src[st.st_size] = 0;
|
nuclear@0
|
392 fclose(fp);
|
nuclear@0
|
393
|
nuclear@0
|
394
|
nuclear@0
|
395 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
|
nuclear@0
|
396 fprintf(stderr, "error creating program object: %s\n", fname);
|
nuclear@0
|
397 delete [] src;
|
nuclear@0
|
398 return false;
|
nuclear@0
|
399 }
|
nuclear@0
|
400 delete [] src;
|
nuclear@0
|
401 return true;
|
nuclear@0
|
402 }
|
nuclear@0
|
403
|
nuclear@1
|
404 bool CLProgram::set_argi(int idx, int val)
|
nuclear@1
|
405 {
|
nuclear@1
|
406 if((int)args.size() <= idx) {
|
nuclear@1
|
407 args.resize(idx + 1);
|
nuclear@1
|
408 }
|
nuclear@1
|
409
|
nuclear@1
|
410 CLArg *arg = &args[idx];
|
nuclear@1
|
411 arg->type = ARGTYPE_INT;
|
nuclear@1
|
412 arg->v.ival = val;
|
nuclear@1
|
413 return true;
|
nuclear@1
|
414 }
|
nuclear@1
|
415
|
nuclear@1
|
416 bool CLProgram::set_argf(int idx, float val)
|
nuclear@1
|
417 {
|
nuclear@1
|
418 if((int)args.size() <= idx) {
|
nuclear@1
|
419 args.resize(idx + 1);
|
nuclear@1
|
420 }
|
nuclear@1
|
421
|
nuclear@1
|
422 CLArg *arg = &args[idx];
|
nuclear@1
|
423 arg->type = ARGTYPE_FLOAT;
|
nuclear@1
|
424 arg->v.fval = val;
|
nuclear@1
|
425 return true;
|
nuclear@1
|
426 }
|
nuclear@1
|
427
|
nuclear@28
|
428 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr)
|
nuclear@0
|
429 {
|
nuclear@13
|
430 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
|
nuclear@0
|
431 CLMemBuffer *buf;
|
nuclear@0
|
432
|
nuclear@39
|
433 if(sz <= 0) {
|
nuclear@39
|
434 fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz);
|
nuclear@39
|
435 return false;
|
nuclear@39
|
436 }
|
nuclear@39
|
437 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
|
nuclear@39
|
438 return false;
|
nuclear@39
|
439 }
|
nuclear@39
|
440
|
nuclear@39
|
441 if((int)args.size() <= idx) {
|
nuclear@39
|
442 args.resize(idx + 1);
|
nuclear@39
|
443 }
|
nuclear@39
|
444 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@39
|
445 args[idx].v.mbuf = buf;
|
nuclear@39
|
446 return true;
|
nuclear@39
|
447 }
|
nuclear@39
|
448
|
nuclear@41
|
449 bool CLProgram::set_arg_image(int idx, int rdwr, int xsz, int ysz, const void *pix)
|
nuclear@41
|
450 {
|
nuclear@41
|
451 printf("create argument %d from %dx%d image\n", idx, xsz, ysz);
|
nuclear@41
|
452 CLMemBuffer *buf;
|
nuclear@41
|
453
|
nuclear@41
|
454 if(!(buf = create_image_buffer(rdwr, xsz, ysz, pix))) {
|
nuclear@41
|
455 return false;
|
nuclear@41
|
456 }
|
nuclear@41
|
457
|
nuclear@41
|
458 if((int)args.size() <= idx) {
|
nuclear@41
|
459 args.resize(idx + 1);
|
nuclear@41
|
460 }
|
nuclear@41
|
461 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@41
|
462 args[idx].v.mbuf = buf;
|
nuclear@41
|
463 return true;
|
nuclear@41
|
464 }
|
nuclear@41
|
465
|
nuclear@39
|
466 bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex)
|
nuclear@39
|
467 {
|
nuclear@39
|
468 printf("create argument %d from texture %u\n", idx, tex);
|
nuclear@39
|
469 CLMemBuffer *buf;
|
nuclear@39
|
470
|
nuclear@41
|
471 if(!(buf = create_image_buffer(rdwr, tex))) {
|
nuclear@0
|
472 return false;
|
nuclear@0
|
473 }
|
nuclear@0
|
474
|
nuclear@1
|
475 if((int)args.size() <= idx) {
|
nuclear@1
|
476 args.resize(idx + 1);
|
nuclear@0
|
477 }
|
nuclear@1
|
478 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@1
|
479 args[idx].v.mbuf = buf;
|
nuclear@0
|
480 return true;
|
nuclear@0
|
481 }
|
nuclear@0
|
482
|
nuclear@0
|
483 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
|
nuclear@0
|
484 {
|
nuclear@1
|
485 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
|
nuclear@0
|
486 return 0;
|
nuclear@0
|
487 }
|
nuclear@1
|
488 return args[arg].v.mbuf;
|
nuclear@0
|
489 }
|
nuclear@0
|
490
|
John@14
|
491 int CLProgram::get_num_args() const
|
John@14
|
492 {
|
John@14
|
493 int num_args = 0;
|
John@14
|
494 for(size_t i=0; i<args.size(); i++) {
|
John@14
|
495 if(args[i].type != ARGTYPE_NONE) {
|
John@14
|
496 num_args++;
|
John@14
|
497 }
|
John@14
|
498 }
|
John@14
|
499 return num_args;
|
John@14
|
500 }
|
John@14
|
501
|
nuclear@0
|
502 bool CLProgram::build()
|
nuclear@0
|
503 {
|
nuclear@2
|
504 int err;
|
nuclear@0
|
505
|
nuclear@43
|
506 const char *opt = "-cl-mad-enable -cl-single-precision-constant -cl-fast-relaxed-math";
|
nuclear@43
|
507
|
nuclear@43
|
508 if((err = clBuildProgram(prog, 0, 0, opt, 0, 0)) != 0) {
|
nuclear@2
|
509 size_t sz;
|
nuclear@2
|
510 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
|
nuclear@0
|
511
|
nuclear@2
|
512 char *errlog = (char*)alloca(sz + 1);
|
nuclear@2
|
513 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
|
nuclear@8
|
514 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
|
nuclear@2
|
515
|
nuclear@0
|
516 clReleaseProgram(prog);
|
nuclear@0
|
517 prog = 0;
|
nuclear@0
|
518 return false;
|
nuclear@0
|
519 }
|
nuclear@0
|
520
|
nuclear@0
|
521
|
nuclear@0
|
522 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
|
nuclear@0
|
523 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
|
nuclear@0
|
524 clReleaseProgram(prog);
|
nuclear@0
|
525 prog = 0;
|
nuclear@0
|
526 return false;
|
nuclear@0
|
527 }
|
nuclear@0
|
528
|
nuclear@1
|
529 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
530 int err;
|
nuclear@0
|
531
|
nuclear@1
|
532 if(args[i].type == ARGTYPE_NONE) {
|
nuclear@1
|
533 break;
|
nuclear@1
|
534 }
|
nuclear@1
|
535
|
nuclear@1
|
536 switch(args[i].type) {
|
nuclear@1
|
537 case ARGTYPE_INT:
|
nuclear@1
|
538 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
|
nuclear@8
|
539 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
540 goto fail;
|
nuclear@1
|
541 }
|
nuclear@1
|
542 break;
|
nuclear@1
|
543
|
nuclear@1
|
544 case ARGTYPE_FLOAT:
|
nuclear@1
|
545 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
|
nuclear@8
|
546 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
547 goto fail;
|
nuclear@1
|
548 }
|
nuclear@1
|
549 break;
|
nuclear@1
|
550
|
nuclear@1
|
551 case ARGTYPE_MEM_BUF:
|
nuclear@1
|
552 {
|
nuclear@1
|
553 CLMemBuffer *mbuf = args[i].v.mbuf;
|
nuclear@1
|
554
|
nuclear@1
|
555 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
|
nuclear@8
|
556 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
557 goto fail;
|
nuclear@1
|
558 }
|
nuclear@1
|
559 }
|
nuclear@1
|
560 break;
|
nuclear@1
|
561
|
nuclear@1
|
562 default:
|
nuclear@1
|
563 break;
|
nuclear@0
|
564 }
|
nuclear@0
|
565 }
|
nuclear@0
|
566
|
nuclear@0
|
567 built = true;
|
nuclear@0
|
568 return true;
|
nuclear@1
|
569
|
nuclear@1
|
570 fail:
|
nuclear@1
|
571 clReleaseProgram(prog);
|
nuclear@1
|
572 clReleaseKernel(kernel);
|
nuclear@1
|
573 prog = 0;
|
nuclear@1
|
574 kernel = 0;
|
nuclear@1
|
575 return false;
|
nuclear@0
|
576 }
|
nuclear@0
|
577
|
nuclear@0
|
578 bool CLProgram::run() const
|
nuclear@0
|
579 {
|
nuclear@0
|
580 return run(1, 1);
|
nuclear@0
|
581 }
|
nuclear@0
|
582
|
nuclear@0
|
583 bool CLProgram::run(int dim, ...) const
|
nuclear@0
|
584 {
|
nuclear@0
|
585 va_list ap;
|
nuclear@0
|
586 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
|
nuclear@0
|
587
|
nuclear@0
|
588 va_start(ap, dim);
|
nuclear@0
|
589 for(int i=0; i<dim; i++) {
|
nuclear@0
|
590 global_size[i] = va_arg(ap, int);
|
nuclear@0
|
591 }
|
nuclear@0
|
592 va_end(ap);
|
nuclear@0
|
593
|
nuclear@39
|
594 if(last_event) {
|
nuclear@39
|
595 clReleaseEvent(last_event);
|
nuclear@39
|
596 }
|
nuclear@39
|
597
|
nuclear@0
|
598 int err;
|
nuclear@39
|
599 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0,
|
nuclear@39
|
600 wait_event ? 1 : 0, wait_event ? &wait_event : 0, &last_event)) != 0) {
|
nuclear@8
|
601 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
|
nuclear@0
|
602 return false;
|
nuclear@0
|
603 }
|
nuclear@32
|
604
|
nuclear@39
|
605 if(wait_event) {
|
nuclear@39
|
606 clReleaseEvent(wait_event);
|
nuclear@39
|
607 wait_event = 0;
|
nuclear@39
|
608 }
|
nuclear@0
|
609 return true;
|
nuclear@0
|
610 }
|
nuclear@0
|
611
|
nuclear@39
|
612 void CLProgram::set_wait_event(cl_event ev)
|
nuclear@39
|
613 {
|
nuclear@39
|
614 if(wait_event) {
|
nuclear@39
|
615 clReleaseEvent(wait_event);
|
nuclear@39
|
616 }
|
nuclear@39
|
617 wait_event = ev;
|
nuclear@39
|
618 }
|
nuclear@39
|
619
|
nuclear@39
|
620 cl_event CLProgram::get_last_event() const
|
nuclear@39
|
621 {
|
nuclear@39
|
622 return last_event;
|
nuclear@39
|
623 }
|
nuclear@39
|
624
|
nuclear@0
|
625 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
|
nuclear@0
|
626 {
|
nuclear@8
|
627 unsigned int i, j, num_dev, num_plat, sel, ret;
|
nuclear@0
|
628 cl_device_id dev[32];
|
nuclear@8
|
629 cl_platform_id plat[32];
|
nuclear@0
|
630
|
nuclear@0
|
631 dev_inf->work_item_sizes = 0;
|
nuclear@0
|
632
|
nuclear@8
|
633 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
|
nuclear@8
|
634 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
635 return -1;
|
nuclear@8
|
636 }
|
nuclear@8
|
637 if(!num_plat) {
|
nuclear@8
|
638 fprintf(stderr, "OpenCL not available!\n");
|
nuclear@8
|
639 return -1;
|
nuclear@8
|
640 }
|
nuclear@0
|
641
|
nuclear@8
|
642 for(i=0; i<num_plat; i++) {
|
nuclear@8
|
643 char buf[512];
|
nuclear@8
|
644
|
nuclear@8
|
645 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
|
nuclear@8
|
646 printf("[%d]: %s", i, buf);
|
nuclear@8
|
647 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
|
nuclear@8
|
648 printf(", %s", buf);
|
nuclear@8
|
649 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
|
nuclear@8
|
650 printf(" (%s)\n", buf);
|
nuclear@8
|
651 }
|
nuclear@8
|
652
|
nuclear@8
|
653 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
|
nuclear@8
|
654 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
655 return -1;
|
nuclear@8
|
656 }
|
nuclear@0
|
657 printf("found %d cl devices.\n", num_dev);
|
nuclear@0
|
658
|
nuclear@0
|
659 for(i=0; i<num_dev; i++) {
|
nuclear@0
|
660 struct device_info di;
|
nuclear@0
|
661
|
nuclear@0
|
662 if(get_dev_info(dev[i], &di) == -1) {
|
nuclear@42
|
663 destroy_dev_info(&di);
|
nuclear@0
|
664 return -1;
|
nuclear@0
|
665 }
|
nuclear@0
|
666
|
nuclear@0
|
667 printf("--> device %u (%s)\n", i, devtypestr(di.type));
|
nuclear@0
|
668 printf("max compute units: %u\n", di.units);
|
nuclear@0
|
669 printf("max clock frequency: %u\n", di.clock);
|
nuclear@0
|
670 printf("max work item dimensions: %u\n", di.dim);
|
nuclear@0
|
671
|
nuclear@0
|
672 printf("max work item sizes: ");
|
nuclear@0
|
673 for(j=0; j<di.dim; j++) {
|
nuclear@0
|
674 printf("%u", (unsigned int)di.work_item_sizes[j]);
|
nuclear@0
|
675 if(di.dim - j > 1) {
|
nuclear@0
|
676 printf(", ");
|
nuclear@0
|
677 }
|
nuclear@0
|
678 }
|
nuclear@0
|
679 putchar('\n');
|
nuclear@0
|
680
|
nuclear@0
|
681 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
|
nuclear@0
|
682 printf("max object allocation size: ");
|
nuclear@0
|
683 print_memsize(stdout, di.mem_size);
|
nuclear@0
|
684 putchar('\n');
|
nuclear@0
|
685
|
nuclear@42
|
686 printf("extensions: %s\n", di.extensions);
|
nuclear@42
|
687
|
nuclear@0
|
688 if(devcmp(&di, dev_inf) > 0) {
|
nuclear@0
|
689 free(dev_inf->work_item_sizes);
|
nuclear@0
|
690 memcpy(dev_inf, &di, sizeof di);
|
nuclear@0
|
691 sel = i;
|
nuclear@0
|
692 }
|
nuclear@0
|
693 }
|
nuclear@0
|
694
|
nuclear@0
|
695 if(num_dev) {
|
nuclear@0
|
696 printf("\nusing device: %d\n", sel);
|
nuclear@0
|
697 return 0;
|
nuclear@0
|
698 }
|
nuclear@0
|
699
|
nuclear@0
|
700 return -1;
|
nuclear@0
|
701 }
|
nuclear@0
|
702
|
nuclear@0
|
703 static int get_dev_info(cl_device_id dev, struct device_info *di)
|
nuclear@0
|
704 {
|
nuclear@0
|
705 di->id = dev;
|
nuclear@0
|
706
|
nuclear@0
|
707 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
|
nuclear@0
|
708 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
|
nuclear@0
|
709 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
|
nuclear@0
|
710 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
|
nuclear@0
|
711
|
nuclear@0
|
712 di->work_item_sizes = new size_t[di->dim];
|
nuclear@0
|
713
|
nuclear@0
|
714 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
|
nuclear@0
|
715 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
|
nuclear@0
|
716 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
|
nuclear@0
|
717
|
nuclear@42
|
718 size_t ext_str_len;
|
nuclear@42
|
719 clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 0, 0, &ext_str_len);
|
nuclear@42
|
720
|
nuclear@42
|
721 di->extensions = new char[ext_str_len + 1];
|
nuclear@42
|
722 clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, ext_str_len, di->extensions, 0);
|
nuclear@42
|
723 di->extensions[ext_str_len] = 0;
|
nuclear@42
|
724
|
nuclear@42
|
725 if(strstr(di->extensions, "cl_khr_gl_sharing") || strstr(di->extensions, "cl_APPLE_gl_sharing")) {
|
nuclear@42
|
726 di->gl_sharing = true;
|
nuclear@42
|
727 } else {
|
nuclear@42
|
728 di->gl_sharing = false;
|
nuclear@42
|
729 }
|
nuclear@42
|
730
|
nuclear@0
|
731 return 0;
|
nuclear@0
|
732 }
|
nuclear@0
|
733
|
nuclear@42
|
734 static void destroy_dev_info(struct device_info *di)
|
nuclear@42
|
735 {
|
nuclear@42
|
736 delete [] di->work_item_sizes;
|
nuclear@42
|
737 delete [] di->extensions;
|
nuclear@42
|
738 }
|
nuclear@42
|
739
|
nuclear@0
|
740 static int devcmp(struct device_info *a, struct device_info *b)
|
nuclear@0
|
741 {
|
nuclear@0
|
742 unsigned int aval = a->units * a->clock;
|
nuclear@0
|
743 unsigned int bval = b->units * b->clock;
|
nuclear@0
|
744
|
nuclear@0
|
745 return aval - bval;
|
nuclear@0
|
746 }
|
nuclear@0
|
747
|
nuclear@0
|
748 static const char *devtypestr(cl_device_type type)
|
nuclear@0
|
749 {
|
nuclear@0
|
750 switch(type) {
|
nuclear@0
|
751 case CL_DEVICE_TYPE_CPU:
|
nuclear@0
|
752 return "cpu";
|
nuclear@0
|
753 case CL_DEVICE_TYPE_GPU:
|
nuclear@0
|
754 return "gpu";
|
nuclear@0
|
755 case CL_DEVICE_TYPE_ACCELERATOR:
|
nuclear@0
|
756 return "accelerator";
|
nuclear@0
|
757 default:
|
nuclear@0
|
758 break;
|
nuclear@0
|
759 }
|
nuclear@0
|
760 return "unknown";
|
nuclear@0
|
761 }
|
nuclear@0
|
762
|
nuclear@0
|
763 static void print_memsize(FILE *out, unsigned long bytes)
|
nuclear@0
|
764 {
|
nuclear@0
|
765 int i;
|
nuclear@0
|
766 unsigned long memsz = bytes;
|
nuclear@0
|
767 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
|
nuclear@0
|
768
|
nuclear@0
|
769 for(i=0; suffix[i]; i++) {
|
nuclear@0
|
770 if(memsz < 1024) {
|
nuclear@0
|
771 fprintf(out, "%lu %s", memsz, suffix[i]);
|
nuclear@0
|
772 if(i > 0) {
|
nuclear@0
|
773 fprintf(out, " (%lu bytes)", bytes);
|
nuclear@0
|
774 }
|
nuclear@0
|
775 return;
|
nuclear@0
|
776 }
|
nuclear@0
|
777
|
nuclear@0
|
778 memsz /= 1024;
|
nuclear@0
|
779 }
|
nuclear@0
|
780 }
|
nuclear@8
|
781
|
nuclear@8
|
782 static const char *clstrerror(int err)
|
nuclear@8
|
783 {
|
nuclear@8
|
784 if(err > 0) {
|
nuclear@8
|
785 return "<invalid error code>";
|
nuclear@8
|
786 }
|
nuclear@8
|
787 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
|
nuclear@8
|
788 return "<unknown error>";
|
nuclear@8
|
789 }
|
nuclear@8
|
790 return ocl_errstr[-err];
|
nuclear@8
|
791 }
|