rev |
line source |
nuclear@8
|
1 #define OCL_CC_
|
nuclear@8
|
2
|
nuclear@0
|
3 #include <stdio.h>
|
nuclear@0
|
4 #include <stdlib.h>
|
nuclear@0
|
5 #include <string.h>
|
nuclear@8
|
6 #include <stdarg.h>
|
nuclear@0
|
7 #include <errno.h>
|
John@11
|
8 #ifndef _MSC_VER
|
nuclear@0
|
9 #include <alloca.h>
|
John@11
|
10 #else
|
John@11
|
11 #include <malloc.h>
|
John@11
|
12 #endif
|
nuclear@0
|
13 #include <sys/stat.h>
|
nuclear@0
|
14 #include "ocl.h"
|
nuclear@8
|
15 #include "ocl_errstr.h"
|
nuclear@0
|
16
|
nuclear@0
|
17
|
nuclear@0
|
18 class InitCL {
|
nuclear@0
|
19 public:
|
nuclear@0
|
20 InitCL();
|
nuclear@0
|
21 };
|
nuclear@0
|
22
|
nuclear@0
|
23 struct device_info {
|
nuclear@0
|
24 cl_device_id id;
|
nuclear@0
|
25 cl_device_type type;
|
nuclear@0
|
26 unsigned int units;
|
nuclear@0
|
27 unsigned int clock;
|
nuclear@0
|
28
|
nuclear@0
|
29 unsigned int dim;
|
nuclear@0
|
30 size_t *work_item_sizes;
|
nuclear@0
|
31 size_t work_group_size;
|
nuclear@0
|
32
|
nuclear@0
|
33 unsigned long mem_size;
|
nuclear@0
|
34 };
|
nuclear@0
|
35
|
nuclear@0
|
36 static bool init_opencl(void);
|
nuclear@0
|
37 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
|
nuclear@0
|
38 static int get_dev_info(cl_device_id dev, struct device_info *di);
|
nuclear@0
|
39 static int devcmp(struct device_info *a, struct device_info *b);
|
nuclear@0
|
40 static const char *devtypestr(cl_device_type type);
|
nuclear@0
|
41 static void print_memsize(FILE *out, unsigned long memsz);
|
nuclear@8
|
42 static const char *clstrerror(int err);
|
nuclear@0
|
43
|
nuclear@0
|
44
|
nuclear@0
|
45 static InitCL initcl;
|
nuclear@0
|
46 static cl_context ctx;
|
nuclear@0
|
47 static cl_command_queue cmdq;
|
nuclear@0
|
48 static device_info devinf;
|
nuclear@0
|
49
|
nuclear@0
|
50 InitCL::InitCL()
|
nuclear@0
|
51 {
|
nuclear@0
|
52 if(!init_opencl()) {
|
nuclear@0
|
53 exit(0);
|
nuclear@0
|
54 }
|
nuclear@0
|
55 }
|
nuclear@0
|
56
|
nuclear@0
|
57 static bool init_opencl(void)
|
nuclear@0
|
58 {
|
nuclear@0
|
59 if(select_device(&devinf, devcmp) == -1) {
|
nuclear@0
|
60 return false;
|
nuclear@0
|
61 }
|
nuclear@0
|
62
|
nuclear@0
|
63
|
nuclear@0
|
64 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
|
nuclear@0
|
65 fprintf(stderr, "failed to create opencl context\n");
|
nuclear@0
|
66 return false;
|
nuclear@0
|
67 }
|
nuclear@0
|
68
|
nuclear@0
|
69 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
|
nuclear@0
|
70 fprintf(stderr, "failed to create command queue\n");
|
nuclear@0
|
71 return false;
|
nuclear@0
|
72 }
|
nuclear@0
|
73 return true;
|
nuclear@0
|
74 }
|
nuclear@0
|
75
|
nuclear@0
|
76
|
nuclear@0
|
77 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
|
nuclear@0
|
78 {
|
nuclear@0
|
79 int err;
|
nuclear@0
|
80 cl_mem mem;
|
nuclear@12
|
81 cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR;
|
nuclear@0
|
82
|
nuclear@12
|
83 if(buf) {
|
nuclear@12
|
84 flags |= CL_MEM_COPY_HOST_PTR;
|
nuclear@12
|
85 }
|
nuclear@0
|
86
|
nuclear@12
|
87
|
nuclear@12
|
88 if(!(mem = clCreateBuffer(ctx, flags, sz, buf, &err))) {
|
nuclear@8
|
89 fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err));
|
nuclear@0
|
90 return 0;
|
nuclear@0
|
91 }
|
nuclear@0
|
92
|
nuclear@0
|
93 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@0
|
94 mbuf->mem = mem;
|
nuclear@0
|
95 mbuf->size = sz;
|
nuclear@12
|
96 mbuf->ptr = 0;
|
nuclear@0
|
97 return mbuf;
|
nuclear@0
|
98 }
|
nuclear@0
|
99
|
nuclear@0
|
100 void destroy_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
101 {
|
nuclear@0
|
102 if(mbuf) {
|
nuclear@0
|
103 clReleaseMemObject(mbuf->mem);
|
nuclear@0
|
104 delete mbuf;
|
nuclear@0
|
105 }
|
nuclear@0
|
106 }
|
nuclear@0
|
107
|
nuclear@0
|
108 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
|
nuclear@0
|
109 {
|
nuclear@0
|
110 if(!mbuf) return 0;
|
nuclear@0
|
111
|
nuclear@12
|
112 #ifndef NDEBUG
|
nuclear@12
|
113 if(mbuf->ptr) {
|
nuclear@12
|
114 fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n");
|
nuclear@12
|
115 }
|
nuclear@12
|
116 #endif
|
nuclear@12
|
117
|
nuclear@0
|
118 int err;
|
nuclear@0
|
119 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
|
nuclear@0
|
120 if(!mbuf->ptr) {
|
nuclear@8
|
121 fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err));
|
nuclear@0
|
122 return 0;
|
nuclear@0
|
123 }
|
nuclear@0
|
124 return mbuf->ptr;
|
nuclear@0
|
125 }
|
nuclear@0
|
126
|
nuclear@0
|
127 void unmap_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
128 {
|
nuclear@0
|
129 if(!mbuf || !mbuf->ptr) return;
|
nuclear@0
|
130 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
|
nuclear@12
|
131 mbuf->ptr = 0;
|
nuclear@0
|
132 }
|
nuclear@0
|
133
|
nuclear@0
|
134 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
|
nuclear@0
|
135 {
|
nuclear@0
|
136 if(!mbuf) return false;
|
nuclear@0
|
137
|
nuclear@0
|
138 int err;
|
nuclear@0
|
139 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
|
nuclear@8
|
140 fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err));
|
nuclear@0
|
141 return false;
|
nuclear@0
|
142 }
|
nuclear@0
|
143 return true;
|
nuclear@0
|
144 }
|
nuclear@0
|
145
|
nuclear@0
|
146 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
|
nuclear@0
|
147 {
|
nuclear@0
|
148 if(!mbuf) return false;
|
nuclear@0
|
149
|
nuclear@0
|
150 int err;
|
nuclear@0
|
151 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
|
nuclear@8
|
152 fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err));
|
nuclear@0
|
153 return false;
|
nuclear@0
|
154 }
|
nuclear@0
|
155 return true;
|
nuclear@0
|
156 }
|
nuclear@0
|
157
|
nuclear@0
|
158
|
John@14
|
159 CLArg::CLArg()
|
John@14
|
160 {
|
John@14
|
161 memset(this, 0, sizeof *this);
|
John@14
|
162 }
|
John@14
|
163
|
John@14
|
164
|
nuclear@0
|
165 CLProgram::CLProgram(const char *kname)
|
nuclear@0
|
166 {
|
nuclear@0
|
167 prog = 0;
|
nuclear@0
|
168 kernel = 0;
|
nuclear@0
|
169 this->kname = kname;
|
nuclear@1
|
170 args.resize(16);
|
nuclear@0
|
171 built = false;
|
nuclear@0
|
172 }
|
nuclear@0
|
173
|
nuclear@0
|
174 CLProgram::~CLProgram()
|
nuclear@0
|
175 {
|
nuclear@0
|
176 if(prog) {
|
nuclear@0
|
177
|
nuclear@0
|
178 clReleaseProgram(prog);
|
nuclear@0
|
179 }
|
nuclear@0
|
180 if(kernel) {
|
nuclear@0
|
181
|
nuclear@0
|
182 clReleaseKernel(kernel);
|
nuclear@0
|
183 }
|
nuclear@1
|
184 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
185 if(args[i].type == ARGTYPE_MEM_BUF) {
|
nuclear@1
|
186 destroy_mem_buffer(args[i].v.mbuf);
|
nuclear@0
|
187 }
|
nuclear@0
|
188 }
|
nuclear@0
|
189 }
|
nuclear@0
|
190
|
nuclear@0
|
191 bool CLProgram::load(const char *fname)
|
nuclear@0
|
192 {
|
nuclear@0
|
193 FILE *fp;
|
nuclear@0
|
194 char *src;
|
nuclear@0
|
195 struct stat st;
|
nuclear@0
|
196
|
nuclear@0
|
197 printf("loading opencl program (%s)\n", fname);
|
nuclear@0
|
198
|
nuclear@0
|
199 if(!(fp = fopen(fname, "rb"))) {
|
nuclear@0
|
200 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
|
nuclear@0
|
201 return false;
|
nuclear@0
|
202 }
|
nuclear@0
|
203
|
nuclear@0
|
204 fstat(fileno(fp), &st);
|
nuclear@0
|
205
|
nuclear@0
|
206 src = new char[st.st_size + 1];
|
nuclear@0
|
207
|
nuclear@0
|
208 fread(src, 1, st.st_size, fp);
|
nuclear@0
|
209 src[st.st_size] = 0;
|
nuclear@0
|
210 fclose(fp);
|
nuclear@0
|
211
|
nuclear@0
|
212
|
nuclear@0
|
213 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
|
nuclear@0
|
214 fprintf(stderr, "error creating program object: %s\n", fname);
|
nuclear@0
|
215 delete [] src;
|
nuclear@0
|
216 return false;
|
nuclear@0
|
217 }
|
nuclear@0
|
218 delete [] src;
|
nuclear@0
|
219 return true;
|
nuclear@0
|
220 }
|
nuclear@0
|
221
|
nuclear@1
|
222 bool CLProgram::set_argi(int idx, int val)
|
nuclear@1
|
223 {
|
nuclear@1
|
224 if((int)args.size() <= idx) {
|
nuclear@1
|
225 args.resize(idx + 1);
|
nuclear@1
|
226 }
|
nuclear@1
|
227
|
nuclear@1
|
228 CLArg *arg = &args[idx];
|
nuclear@1
|
229 arg->type = ARGTYPE_INT;
|
nuclear@1
|
230 arg->v.ival = val;
|
nuclear@1
|
231 return true;
|
nuclear@1
|
232 }
|
nuclear@1
|
233
|
nuclear@1
|
234 bool CLProgram::set_argf(int idx, float val)
|
nuclear@1
|
235 {
|
nuclear@1
|
236 if((int)args.size() <= idx) {
|
nuclear@1
|
237 args.resize(idx + 1);
|
nuclear@1
|
238 }
|
nuclear@1
|
239
|
nuclear@1
|
240 CLArg *arg = &args[idx];
|
nuclear@1
|
241 arg->type = ARGTYPE_FLOAT;
|
nuclear@1
|
242 arg->v.fval = val;
|
nuclear@1
|
243 return true;
|
nuclear@1
|
244 }
|
nuclear@1
|
245
|
nuclear@1
|
246 bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, void *ptr)
|
nuclear@0
|
247 {
|
nuclear@13
|
248 printf("create argument %d buffer: %d bytes\n", idx, (int)sz);
|
nuclear@0
|
249 CLMemBuffer *buf;
|
nuclear@0
|
250
|
John@14
|
251 if(sz <= 0 || !(buf = create_mem_buffer(rdwr, sz, ptr))) {
|
John@14
|
252 fprintf(stderr, "invalid size while creating argument buffer %d: %d\n", idx, (int)sz);
|
nuclear@0
|
253 return false;
|
nuclear@0
|
254 }
|
nuclear@0
|
255
|
nuclear@1
|
256 if((int)args.size() <= idx) {
|
nuclear@1
|
257 args.resize(idx + 1);
|
nuclear@0
|
258 }
|
nuclear@1
|
259 args[idx].type = ARGTYPE_MEM_BUF;
|
nuclear@1
|
260 args[idx].v.mbuf = buf;
|
nuclear@0
|
261 return true;
|
nuclear@0
|
262 }
|
nuclear@0
|
263
|
nuclear@0
|
264 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
|
nuclear@0
|
265 {
|
nuclear@1
|
266 if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) {
|
nuclear@0
|
267 return 0;
|
nuclear@0
|
268 }
|
nuclear@1
|
269 return args[arg].v.mbuf;
|
nuclear@0
|
270 }
|
nuclear@0
|
271
|
John@14
|
272 int CLProgram::get_num_args() const
|
John@14
|
273 {
|
John@14
|
274 int num_args = 0;
|
John@14
|
275 for(size_t i=0; i<args.size(); i++) {
|
John@14
|
276 if(args[i].type != ARGTYPE_NONE) {
|
John@14
|
277 num_args++;
|
John@14
|
278 }
|
John@14
|
279 }
|
John@14
|
280 return num_args;
|
John@14
|
281 }
|
John@14
|
282
|
nuclear@0
|
283 bool CLProgram::build()
|
nuclear@0
|
284 {
|
nuclear@2
|
285 int err;
|
nuclear@0
|
286
|
nuclear@2
|
287 if((err = clBuildProgram(prog, 0, 0, 0, 0, 0)) != 0) {
|
nuclear@2
|
288 size_t sz;
|
nuclear@2
|
289 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, 0, 0, &sz);
|
nuclear@0
|
290
|
nuclear@2
|
291 char *errlog = (char*)alloca(sz + 1);
|
nuclear@2
|
292 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sz, errlog, 0);
|
nuclear@8
|
293 fprintf(stderr, "failed to build program: %s\n%s\n", clstrerror(err), errlog);
|
nuclear@2
|
294
|
nuclear@0
|
295 clReleaseProgram(prog);
|
nuclear@0
|
296 prog = 0;
|
nuclear@0
|
297 return false;
|
nuclear@0
|
298 }
|
nuclear@0
|
299
|
nuclear@0
|
300
|
nuclear@0
|
301 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
|
nuclear@0
|
302 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
|
nuclear@0
|
303 clReleaseProgram(prog);
|
nuclear@0
|
304 prog = 0;
|
nuclear@0
|
305 return false;
|
nuclear@0
|
306 }
|
nuclear@0
|
307
|
nuclear@1
|
308 for(size_t i=0; i<args.size(); i++) {
|
nuclear@1
|
309 int err;
|
nuclear@0
|
310
|
nuclear@1
|
311 if(args[i].type == ARGTYPE_NONE) {
|
nuclear@1
|
312 break;
|
nuclear@1
|
313 }
|
nuclear@1
|
314
|
nuclear@1
|
315 switch(args[i].type) {
|
nuclear@1
|
316 case ARGTYPE_INT:
|
nuclear@1
|
317 if((err = clSetKernelArg(kernel, i, sizeof(int), &args[i].v.ival)) != 0) {
|
nuclear@8
|
318 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
319 goto fail;
|
nuclear@1
|
320 }
|
nuclear@1
|
321 break;
|
nuclear@1
|
322
|
nuclear@1
|
323 case ARGTYPE_FLOAT:
|
nuclear@1
|
324 if((err = clSetKernelArg(kernel, i, sizeof(float), &args[i].v.fval)) != 0) {
|
nuclear@8
|
325 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
326 goto fail;
|
nuclear@1
|
327 }
|
nuclear@1
|
328 break;
|
nuclear@1
|
329
|
nuclear@1
|
330 case ARGTYPE_MEM_BUF:
|
nuclear@1
|
331 {
|
nuclear@1
|
332 CLMemBuffer *mbuf = args[i].v.mbuf;
|
nuclear@1
|
333
|
nuclear@1
|
334 if((err = clSetKernelArg(kernel, i, sizeof mbuf->mem, &mbuf->mem)) != 0) {
|
nuclear@8
|
335 fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err));
|
nuclear@1
|
336 goto fail;
|
nuclear@1
|
337 }
|
nuclear@1
|
338 }
|
nuclear@1
|
339 break;
|
nuclear@1
|
340
|
nuclear@1
|
341 default:
|
nuclear@1
|
342 break;
|
nuclear@0
|
343 }
|
nuclear@0
|
344 }
|
nuclear@0
|
345
|
nuclear@0
|
346 built = true;
|
nuclear@0
|
347 return true;
|
nuclear@1
|
348
|
nuclear@1
|
349 fail:
|
nuclear@1
|
350 clReleaseProgram(prog);
|
nuclear@1
|
351 clReleaseKernel(kernel);
|
nuclear@1
|
352 prog = 0;
|
nuclear@1
|
353 kernel = 0;
|
nuclear@1
|
354 return false;
|
nuclear@0
|
355 }
|
nuclear@0
|
356
|
nuclear@0
|
357 bool CLProgram::run() const
|
nuclear@0
|
358 {
|
nuclear@0
|
359 return run(1, 1);
|
nuclear@0
|
360 }
|
nuclear@0
|
361
|
nuclear@0
|
362 bool CLProgram::run(int dim, ...) const
|
nuclear@0
|
363 {
|
nuclear@0
|
364 if(!built) {
|
nuclear@0
|
365 if(!((CLProgram*)this)->build()) {
|
nuclear@0
|
366 return false;
|
nuclear@0
|
367 }
|
nuclear@0
|
368 }
|
nuclear@0
|
369
|
nuclear@0
|
370 va_list ap;
|
nuclear@0
|
371 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
|
nuclear@0
|
372
|
nuclear@0
|
373 va_start(ap, dim);
|
nuclear@0
|
374 for(int i=0; i<dim; i++) {
|
nuclear@0
|
375 global_size[i] = va_arg(ap, int);
|
nuclear@0
|
376 }
|
nuclear@0
|
377 va_end(ap);
|
nuclear@0
|
378
|
nuclear@0
|
379 int err;
|
nuclear@0
|
380 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
|
nuclear@8
|
381 fprintf(stderr, "error executing kernel: %s\n", clstrerror(err));
|
nuclear@0
|
382 return false;
|
nuclear@0
|
383 }
|
nuclear@0
|
384 return true;
|
nuclear@0
|
385 }
|
nuclear@0
|
386
|
nuclear@0
|
387 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
|
nuclear@0
|
388 {
|
nuclear@8
|
389 unsigned int i, j, num_dev, num_plat, sel, ret;
|
nuclear@0
|
390 cl_device_id dev[32];
|
nuclear@8
|
391 cl_platform_id plat[32];
|
nuclear@0
|
392
|
nuclear@0
|
393 dev_inf->work_item_sizes = 0;
|
nuclear@0
|
394
|
nuclear@8
|
395 if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) {
|
nuclear@8
|
396 fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
397 return -1;
|
nuclear@8
|
398 }
|
nuclear@8
|
399 if(!num_plat) {
|
nuclear@8
|
400 fprintf(stderr, "OpenCL not available!\n");
|
nuclear@8
|
401 return -1;
|
nuclear@8
|
402 }
|
nuclear@0
|
403
|
nuclear@8
|
404 for(i=0; i<num_plat; i++) {
|
nuclear@8
|
405 char buf[512];
|
nuclear@8
|
406
|
nuclear@8
|
407 clGetPlatformInfo(plat[i], CL_PLATFORM_NAME, sizeof buf, buf, 0);
|
nuclear@8
|
408 printf("[%d]: %s", i, buf);
|
nuclear@8
|
409 clGetPlatformInfo(plat[i], CL_PLATFORM_VENDOR, sizeof buf, buf, 0);
|
nuclear@8
|
410 printf(", %s", buf);
|
nuclear@8
|
411 clGetPlatformInfo(plat[i], CL_PLATFORM_VERSION, sizeof buf, buf, 0);
|
nuclear@8
|
412 printf(" (%s)\n", buf);
|
nuclear@8
|
413 }
|
nuclear@8
|
414
|
nuclear@8
|
415 if((ret = clGetDeviceIDs(plat[0], CL_DEVICE_TYPE_ALL, 32, dev, &num_dev)) != 0) {
|
nuclear@8
|
416 fprintf(stderr, "clGetDeviceIDs failed: %s\n", clstrerror(ret));
|
nuclear@8
|
417 return -1;
|
nuclear@8
|
418 }
|
nuclear@0
|
419 printf("found %d cl devices.\n", num_dev);
|
nuclear@0
|
420
|
nuclear@0
|
421 for(i=0; i<num_dev; i++) {
|
nuclear@0
|
422 struct device_info di;
|
nuclear@0
|
423
|
nuclear@0
|
424 if(get_dev_info(dev[i], &di) == -1) {
|
nuclear@0
|
425 free(dev_inf->work_item_sizes);
|
nuclear@0
|
426 return -1;
|
nuclear@0
|
427 }
|
nuclear@0
|
428
|
nuclear@0
|
429 printf("--> device %u (%s)\n", i, devtypestr(di.type));
|
nuclear@0
|
430 printf("max compute units: %u\n", di.units);
|
nuclear@0
|
431 printf("max clock frequency: %u\n", di.clock);
|
nuclear@0
|
432 printf("max work item dimensions: %u\n", di.dim);
|
nuclear@0
|
433
|
nuclear@0
|
434 printf("max work item sizes: ");
|
nuclear@0
|
435 for(j=0; j<di.dim; j++) {
|
nuclear@0
|
436 printf("%u", (unsigned int)di.work_item_sizes[j]);
|
nuclear@0
|
437 if(di.dim - j > 1) {
|
nuclear@0
|
438 printf(", ");
|
nuclear@0
|
439 }
|
nuclear@0
|
440 }
|
nuclear@0
|
441 putchar('\n');
|
nuclear@0
|
442
|
nuclear@0
|
443 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
|
nuclear@0
|
444 printf("max object allocation size: ");
|
nuclear@0
|
445 print_memsize(stdout, di.mem_size);
|
nuclear@0
|
446 putchar('\n');
|
nuclear@0
|
447
|
nuclear@0
|
448 if(devcmp(&di, dev_inf) > 0) {
|
nuclear@0
|
449 free(dev_inf->work_item_sizes);
|
nuclear@0
|
450 memcpy(dev_inf, &di, sizeof di);
|
nuclear@0
|
451 sel = i;
|
nuclear@0
|
452 }
|
nuclear@0
|
453 }
|
nuclear@0
|
454
|
nuclear@0
|
455 if(num_dev) {
|
nuclear@0
|
456 printf("\nusing device: %d\n", sel);
|
nuclear@0
|
457 return 0;
|
nuclear@0
|
458 }
|
nuclear@0
|
459
|
nuclear@0
|
460 return -1;
|
nuclear@0
|
461 }
|
nuclear@0
|
462
|
nuclear@0
|
463 static int get_dev_info(cl_device_id dev, struct device_info *di)
|
nuclear@0
|
464 {
|
nuclear@0
|
465 di->id = dev;
|
nuclear@0
|
466
|
nuclear@0
|
467
|
nuclear@0
|
468 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
|
nuclear@0
|
469 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
|
nuclear@0
|
470 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
|
nuclear@0
|
471 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
|
nuclear@0
|
472
|
nuclear@0
|
473 di->work_item_sizes = new size_t[di->dim];
|
nuclear@0
|
474
|
nuclear@0
|
475 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
|
nuclear@0
|
476 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
|
nuclear@0
|
477 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
|
nuclear@0
|
478
|
nuclear@0
|
479 return 0;
|
nuclear@0
|
480 }
|
nuclear@0
|
481
|
nuclear@0
|
482 static int devcmp(struct device_info *a, struct device_info *b)
|
nuclear@0
|
483 {
|
nuclear@0
|
484 unsigned int aval = a->units * a->clock;
|
nuclear@0
|
485 unsigned int bval = b->units * b->clock;
|
nuclear@0
|
486
|
nuclear@0
|
487 return aval - bval;
|
nuclear@0
|
488 }
|
nuclear@0
|
489
|
nuclear@0
|
490 static const char *devtypestr(cl_device_type type)
|
nuclear@0
|
491 {
|
nuclear@0
|
492 switch(type) {
|
nuclear@0
|
493 case CL_DEVICE_TYPE_CPU:
|
nuclear@0
|
494 return "cpu";
|
nuclear@0
|
495 case CL_DEVICE_TYPE_GPU:
|
nuclear@0
|
496 return "gpu";
|
nuclear@0
|
497 case CL_DEVICE_TYPE_ACCELERATOR:
|
nuclear@0
|
498 return "accelerator";
|
nuclear@0
|
499 default:
|
nuclear@0
|
500 break;
|
nuclear@0
|
501 }
|
nuclear@0
|
502 return "unknown";
|
nuclear@0
|
503 }
|
nuclear@0
|
504
|
nuclear@0
|
505 static void print_memsize(FILE *out, unsigned long bytes)
|
nuclear@0
|
506 {
|
nuclear@0
|
507 int i;
|
nuclear@0
|
508 unsigned long memsz = bytes;
|
nuclear@0
|
509 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
|
nuclear@0
|
510
|
nuclear@0
|
511 for(i=0; suffix[i]; i++) {
|
nuclear@0
|
512 if(memsz < 1024) {
|
nuclear@0
|
513 fprintf(out, "%lu %s", memsz, suffix[i]);
|
nuclear@0
|
514 if(i > 0) {
|
nuclear@0
|
515 fprintf(out, " (%lu bytes)", bytes);
|
nuclear@0
|
516 }
|
nuclear@0
|
517 return;
|
nuclear@0
|
518 }
|
nuclear@0
|
519
|
nuclear@0
|
520 memsz /= 1024;
|
nuclear@0
|
521 }
|
nuclear@0
|
522 }
|
nuclear@8
|
523
|
nuclear@8
|
524 static const char *clstrerror(int err)
|
nuclear@8
|
525 {
|
nuclear@8
|
526 if(err > 0) {
|
nuclear@8
|
527 return "<invalid error code>";
|
nuclear@8
|
528 }
|
nuclear@8
|
529 if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) {
|
nuclear@8
|
530 return "<unknown error>";
|
nuclear@8
|
531 }
|
nuclear@8
|
532 return ocl_errstr[-err];
|
nuclear@8
|
533 }
|