rev |
line source |
nuclear@0
|
1 #include <stdio.h>
|
nuclear@0
|
2 #include <stdlib.h>
|
nuclear@0
|
3 #include <string.h>
|
nuclear@0
|
4 #include <errno.h>
|
nuclear@0
|
5 #include <alloca.h>
|
nuclear@0
|
6 #include <sys/stat.h>
|
nuclear@0
|
7 #include "ocl.h"
|
nuclear@0
|
8
|
nuclear@0
|
9
|
nuclear@0
|
10 class InitCL {
|
nuclear@0
|
11 public:
|
nuclear@0
|
12 InitCL();
|
nuclear@0
|
13 };
|
nuclear@0
|
14
|
nuclear@0
|
15 struct device_info {
|
nuclear@0
|
16 cl_device_id id;
|
nuclear@0
|
17 cl_device_type type;
|
nuclear@0
|
18 unsigned int units;
|
nuclear@0
|
19 unsigned int clock;
|
nuclear@0
|
20
|
nuclear@0
|
21 unsigned int dim;
|
nuclear@0
|
22 size_t *work_item_sizes;
|
nuclear@0
|
23 size_t work_group_size;
|
nuclear@0
|
24
|
nuclear@0
|
25 unsigned long mem_size;
|
nuclear@0
|
26 };
|
nuclear@0
|
27
|
nuclear@0
|
28 static bool init_opencl(void);
|
nuclear@0
|
29 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
|
nuclear@0
|
30 static int get_dev_info(cl_device_id dev, struct device_info *di);
|
nuclear@0
|
31 static int devcmp(struct device_info *a, struct device_info *b);
|
nuclear@0
|
32 static const char *devtypestr(cl_device_type type);
|
nuclear@0
|
33 static void print_memsize(FILE *out, unsigned long memsz);
|
nuclear@0
|
34
|
nuclear@0
|
35
|
nuclear@0
|
36 static InitCL initcl;
|
nuclear@0
|
37 static cl_context ctx;
|
nuclear@0
|
38 static cl_command_queue cmdq;
|
nuclear@0
|
39 static device_info devinf;
|
nuclear@0
|
40
|
nuclear@0
|
41 InitCL::InitCL()
|
nuclear@0
|
42 {
|
nuclear@0
|
43 if(!init_opencl()) {
|
nuclear@0
|
44 exit(0);
|
nuclear@0
|
45 }
|
nuclear@0
|
46 }
|
nuclear@0
|
47
|
nuclear@0
|
48 static bool init_opencl(void)
|
nuclear@0
|
49 {
|
nuclear@0
|
50 if(select_device(&devinf, devcmp) == -1) {
|
nuclear@0
|
51 return false;
|
nuclear@0
|
52 }
|
nuclear@0
|
53
|
nuclear@0
|
54
|
nuclear@0
|
55 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
|
nuclear@0
|
56 fprintf(stderr, "failed to create opencl context\n");
|
nuclear@0
|
57 return false;
|
nuclear@0
|
58 }
|
nuclear@0
|
59
|
nuclear@0
|
60 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
|
nuclear@0
|
61 fprintf(stderr, "failed to create command queue\n");
|
nuclear@0
|
62 return false;
|
nuclear@0
|
63 }
|
nuclear@0
|
64 return true;
|
nuclear@0
|
65 }
|
nuclear@0
|
66
|
nuclear@0
|
67
|
nuclear@0
|
68 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
|
nuclear@0
|
69 {
|
nuclear@0
|
70 int err;
|
nuclear@0
|
71 cl_mem mem;
|
nuclear@0
|
72
|
nuclear@0
|
73
|
nuclear@0
|
74 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
|
nuclear@0
|
75 fprintf(stderr, "failed to create memory buffer (%d)\n", err);
|
nuclear@0
|
76 return 0;
|
nuclear@0
|
77 }
|
nuclear@0
|
78
|
nuclear@0
|
79 CLMemBuffer *mbuf = new CLMemBuffer;
|
nuclear@0
|
80 mbuf->mem = mem;
|
nuclear@0
|
81 mbuf->size = sz;
|
nuclear@0
|
82 return mbuf;
|
nuclear@0
|
83 }
|
nuclear@0
|
84
|
nuclear@0
|
85 void destroy_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
86 {
|
nuclear@0
|
87 if(mbuf) {
|
nuclear@0
|
88
|
nuclear@0
|
89 clReleaseMemObject(mbuf->mem);
|
nuclear@0
|
90 delete mbuf;
|
nuclear@0
|
91 }
|
nuclear@0
|
92 }
|
nuclear@0
|
93
|
nuclear@0
|
94 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
|
nuclear@0
|
95 {
|
nuclear@0
|
96 if(!mbuf) return 0;
|
nuclear@0
|
97
|
nuclear@0
|
98 int err;
|
nuclear@0
|
99 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
|
nuclear@0
|
100 if(!mbuf->ptr) {
|
nuclear@0
|
101 fprintf(stderr, "failed to map buffer (%d)\n", err);
|
nuclear@0
|
102 return 0;
|
nuclear@0
|
103 }
|
nuclear@0
|
104 return mbuf->ptr;
|
nuclear@0
|
105 }
|
nuclear@0
|
106
|
nuclear@0
|
107 void unmap_mem_buffer(CLMemBuffer *mbuf)
|
nuclear@0
|
108 {
|
nuclear@0
|
109 if(!mbuf || !mbuf->ptr) return;
|
nuclear@0
|
110 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
|
nuclear@0
|
111 }
|
nuclear@0
|
112
|
nuclear@0
|
113 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
|
nuclear@0
|
114 {
|
nuclear@0
|
115 if(!mbuf) return false;
|
nuclear@0
|
116
|
nuclear@0
|
117 int err;
|
nuclear@0
|
118 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
|
nuclear@0
|
119 fprintf(stderr, "failed to write buffer (%d)\n", err);
|
nuclear@0
|
120 return false;
|
nuclear@0
|
121 }
|
nuclear@0
|
122 return true;
|
nuclear@0
|
123 }
|
nuclear@0
|
124
|
nuclear@0
|
125 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
|
nuclear@0
|
126 {
|
nuclear@0
|
127 if(!mbuf) return false;
|
nuclear@0
|
128
|
nuclear@0
|
129 int err;
|
nuclear@0
|
130 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
|
nuclear@0
|
131 fprintf(stderr, "failed to read buffer (%d)\n", err);
|
nuclear@0
|
132 return false;
|
nuclear@0
|
133 }
|
nuclear@0
|
134 return true;
|
nuclear@0
|
135 }
|
nuclear@0
|
136
|
nuclear@0
|
137
|
nuclear@0
|
138 CLProgram::CLProgram(const char *kname)
|
nuclear@0
|
139 {
|
nuclear@0
|
140 prog = 0;
|
nuclear@0
|
141 kernel = 0;
|
nuclear@0
|
142 this->kname = kname;
|
nuclear@0
|
143 mbuf.resize(16);
|
nuclear@0
|
144 built = false;
|
nuclear@0
|
145 }
|
nuclear@0
|
146
|
nuclear@0
|
147 CLProgram::~CLProgram()
|
nuclear@0
|
148 {
|
nuclear@0
|
149 if(prog) {
|
nuclear@0
|
150
|
nuclear@0
|
151 clReleaseProgram(prog);
|
nuclear@0
|
152 }
|
nuclear@0
|
153 if(kernel) {
|
nuclear@0
|
154
|
nuclear@0
|
155 clReleaseKernel(kernel);
|
nuclear@0
|
156 }
|
nuclear@0
|
157 for(size_t i=0; i<mbuf.size(); i++) {
|
nuclear@0
|
158 if(mbuf[i]) {
|
nuclear@0
|
159 destroy_mem_buffer(mbuf[i]);
|
nuclear@0
|
160 }
|
nuclear@0
|
161 }
|
nuclear@0
|
162 }
|
nuclear@0
|
163
|
nuclear@0
|
164 bool CLProgram::load(const char *fname)
|
nuclear@0
|
165 {
|
nuclear@0
|
166 FILE *fp;
|
nuclear@0
|
167 char *src;
|
nuclear@0
|
168 struct stat st;
|
nuclear@0
|
169
|
nuclear@0
|
170 printf("loading opencl program (%s)\n", fname);
|
nuclear@0
|
171
|
nuclear@0
|
172 if(!(fp = fopen(fname, "rb"))) {
|
nuclear@0
|
173 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
|
nuclear@0
|
174 return false;
|
nuclear@0
|
175 }
|
nuclear@0
|
176
|
nuclear@0
|
177 fstat(fileno(fp), &st);
|
nuclear@0
|
178
|
nuclear@0
|
179 src = new char[st.st_size + 1];
|
nuclear@0
|
180
|
nuclear@0
|
181 fread(src, 1, st.st_size, fp);
|
nuclear@0
|
182 src[st.st_size] = 0;
|
nuclear@0
|
183 fclose(fp);
|
nuclear@0
|
184
|
nuclear@0
|
185
|
nuclear@0
|
186 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
|
nuclear@0
|
187 fprintf(stderr, "error creating program object: %s\n", fname);
|
nuclear@0
|
188 delete [] src;
|
nuclear@0
|
189 return false;
|
nuclear@0
|
190 }
|
nuclear@0
|
191 delete [] src;
|
nuclear@0
|
192 return true;
|
nuclear@0
|
193 }
|
nuclear@0
|
194
|
nuclear@0
|
195 bool CLProgram::set_arg(int arg, int rdwr, size_t sz, void *ptr)
|
nuclear@0
|
196 {
|
nuclear@0
|
197 CLMemBuffer *buf;
|
nuclear@0
|
198
|
nuclear@0
|
199 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
|
nuclear@0
|
200 return false;
|
nuclear@0
|
201 }
|
nuclear@0
|
202
|
nuclear@0
|
203 if((int)mbuf.size() <= arg) {
|
nuclear@0
|
204 mbuf.resize(arg + 1);
|
nuclear@0
|
205 }
|
nuclear@0
|
206 mbuf[arg] = buf;
|
nuclear@0
|
207 return true;
|
nuclear@0
|
208 }
|
nuclear@0
|
209
|
nuclear@0
|
210 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
|
nuclear@0
|
211 {
|
nuclear@0
|
212 if(arg < 0 || arg >= (int)mbuf.size()) {
|
nuclear@0
|
213 return 0;
|
nuclear@0
|
214 }
|
nuclear@0
|
215 return mbuf[arg];
|
nuclear@0
|
216 }
|
nuclear@0
|
217
|
nuclear@0
|
218 bool CLProgram::build()
|
nuclear@0
|
219 {
|
nuclear@0
|
220 char errlog[512];
|
nuclear@0
|
221
|
nuclear@0
|
222
|
nuclear@0
|
223 if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) {
|
nuclear@0
|
224 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0);
|
nuclear@0
|
225 fprintf(stderr, "failed to build program:\n%s\n", errlog);
|
nuclear@0
|
226 clReleaseProgram(prog);
|
nuclear@0
|
227 prog = 0;
|
nuclear@0
|
228 return false;
|
nuclear@0
|
229 }
|
nuclear@0
|
230
|
nuclear@0
|
231
|
nuclear@0
|
232 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
|
nuclear@0
|
233 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
|
nuclear@0
|
234 clReleaseProgram(prog);
|
nuclear@0
|
235 prog = 0;
|
nuclear@0
|
236 return false;
|
nuclear@0
|
237 }
|
nuclear@0
|
238
|
nuclear@0
|
239 for(size_t i=0; i<mbuf.size(); i++) {
|
nuclear@0
|
240 if(!mbuf[i]) break;
|
nuclear@0
|
241
|
nuclear@0
|
242 int err;
|
nuclear@0
|
243 if((err = clSetKernelArg(kernel, i, sizeof mbuf[i]->mem, &mbuf[i]->mem)) != 0) {
|
nuclear@0
|
244 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
|
nuclear@0
|
245 clReleaseProgram(prog);
|
nuclear@0
|
246 clReleaseKernel(kernel);
|
nuclear@0
|
247 prog = 0;
|
nuclear@0
|
248 kernel = 0;
|
nuclear@0
|
249 return false;
|
nuclear@0
|
250 }
|
nuclear@0
|
251 }
|
nuclear@0
|
252
|
nuclear@0
|
253 built = true;
|
nuclear@0
|
254 return true;
|
nuclear@0
|
255 }
|
nuclear@0
|
256
|
nuclear@0
|
257 bool CLProgram::run() const
|
nuclear@0
|
258 {
|
nuclear@0
|
259 return run(1, 1);
|
nuclear@0
|
260 }
|
nuclear@0
|
261
|
nuclear@0
|
262 bool CLProgram::run(int dim, ...) const
|
nuclear@0
|
263 {
|
nuclear@0
|
264 if(!built) {
|
nuclear@0
|
265 if(!((CLProgram*)this)->build()) {
|
nuclear@0
|
266 return false;
|
nuclear@0
|
267 }
|
nuclear@0
|
268 }
|
nuclear@0
|
269
|
nuclear@0
|
270 va_list ap;
|
nuclear@0
|
271 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
|
nuclear@0
|
272
|
nuclear@0
|
273 va_start(ap, dim);
|
nuclear@0
|
274 for(int i=0; i<dim; i++) {
|
nuclear@0
|
275 global_size[i] = va_arg(ap, int);
|
nuclear@0
|
276 }
|
nuclear@0
|
277 va_end(ap);
|
nuclear@0
|
278
|
nuclear@0
|
279 int err;
|
nuclear@0
|
280 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
|
nuclear@0
|
281 fprintf(stderr, "error executing kernel (%d)\n", err);
|
nuclear@0
|
282 return false;
|
nuclear@0
|
283 }
|
nuclear@0
|
284 return true;
|
nuclear@0
|
285 }
|
nuclear@0
|
286
|
nuclear@0
|
287 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
|
nuclear@0
|
288 {
|
nuclear@0
|
289 unsigned int i, j, num_dev, sel;
|
nuclear@0
|
290 cl_device_id dev[32];
|
nuclear@0
|
291
|
nuclear@0
|
292 dev_inf->work_item_sizes = 0;
|
nuclear@0
|
293
|
nuclear@0
|
294
|
nuclear@0
|
295 clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev);
|
nuclear@0
|
296 printf("found %d cl devices.\n", num_dev);
|
nuclear@0
|
297
|
nuclear@0
|
298 for(i=0; i<num_dev; i++) {
|
nuclear@0
|
299 struct device_info di;
|
nuclear@0
|
300
|
nuclear@0
|
301 if(get_dev_info(dev[i], &di) == -1) {
|
nuclear@0
|
302 free(dev_inf->work_item_sizes);
|
nuclear@0
|
303 return -1;
|
nuclear@0
|
304 }
|
nuclear@0
|
305
|
nuclear@0
|
306 printf("--> device %u (%s)\n", i, devtypestr(di.type));
|
nuclear@0
|
307 printf("max compute units: %u\n", di.units);
|
nuclear@0
|
308 printf("max clock frequency: %u\n", di.clock);
|
nuclear@0
|
309 printf("max work item dimensions: %u\n", di.dim);
|
nuclear@0
|
310
|
nuclear@0
|
311 printf("max work item sizes: ");
|
nuclear@0
|
312 for(j=0; j<di.dim; j++) {
|
nuclear@0
|
313 printf("%u", (unsigned int)di.work_item_sizes[j]);
|
nuclear@0
|
314 if(di.dim - j > 1) {
|
nuclear@0
|
315 printf(", ");
|
nuclear@0
|
316 }
|
nuclear@0
|
317 }
|
nuclear@0
|
318 putchar('\n');
|
nuclear@0
|
319
|
nuclear@0
|
320 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
|
nuclear@0
|
321 printf("max object allocation size: ");
|
nuclear@0
|
322 print_memsize(stdout, di.mem_size);
|
nuclear@0
|
323 putchar('\n');
|
nuclear@0
|
324
|
nuclear@0
|
325 if(devcmp(&di, dev_inf) > 0) {
|
nuclear@0
|
326 free(dev_inf->work_item_sizes);
|
nuclear@0
|
327 memcpy(dev_inf, &di, sizeof di);
|
nuclear@0
|
328 sel = i;
|
nuclear@0
|
329 }
|
nuclear@0
|
330 }
|
nuclear@0
|
331
|
nuclear@0
|
332 if(num_dev) {
|
nuclear@0
|
333 printf("\nusing device: %d\n", sel);
|
nuclear@0
|
334 return 0;
|
nuclear@0
|
335 }
|
nuclear@0
|
336
|
nuclear@0
|
337 return -1;
|
nuclear@0
|
338 }
|
nuclear@0
|
339
|
nuclear@0
|
340 static int get_dev_info(cl_device_id dev, struct device_info *di)
|
nuclear@0
|
341 {
|
nuclear@0
|
342 di->id = dev;
|
nuclear@0
|
343
|
nuclear@0
|
344
|
nuclear@0
|
345 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
|
nuclear@0
|
346 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
|
nuclear@0
|
347 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
|
nuclear@0
|
348 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
|
nuclear@0
|
349
|
nuclear@0
|
350 di->work_item_sizes = new size_t[di->dim];
|
nuclear@0
|
351
|
nuclear@0
|
352 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
|
nuclear@0
|
353 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
|
nuclear@0
|
354 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
|
nuclear@0
|
355
|
nuclear@0
|
356 return 0;
|
nuclear@0
|
357 }
|
nuclear@0
|
358
|
nuclear@0
|
359 static int devcmp(struct device_info *a, struct device_info *b)
|
nuclear@0
|
360 {
|
nuclear@0
|
361 unsigned int aval = a->units * a->clock;
|
nuclear@0
|
362 unsigned int bval = b->units * b->clock;
|
nuclear@0
|
363
|
nuclear@0
|
364 return aval - bval;
|
nuclear@0
|
365 }
|
nuclear@0
|
366
|
nuclear@0
|
367 static const char *devtypestr(cl_device_type type)
|
nuclear@0
|
368 {
|
nuclear@0
|
369 switch(type) {
|
nuclear@0
|
370 case CL_DEVICE_TYPE_CPU:
|
nuclear@0
|
371 return "cpu";
|
nuclear@0
|
372 case CL_DEVICE_TYPE_GPU:
|
nuclear@0
|
373 return "gpu";
|
nuclear@0
|
374 case CL_DEVICE_TYPE_ACCELERATOR:
|
nuclear@0
|
375 return "accelerator";
|
nuclear@0
|
376 default:
|
nuclear@0
|
377 break;
|
nuclear@0
|
378 }
|
nuclear@0
|
379 return "unknown";
|
nuclear@0
|
380 }
|
nuclear@0
|
381
|
nuclear@0
|
382 static void print_memsize(FILE *out, unsigned long bytes)
|
nuclear@0
|
383 {
|
nuclear@0
|
384 int i;
|
nuclear@0
|
385 unsigned long memsz = bytes;
|
nuclear@0
|
386 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
|
nuclear@0
|
387
|
nuclear@0
|
388 for(i=0; suffix[i]; i++) {
|
nuclear@0
|
389 if(memsz < 1024) {
|
nuclear@0
|
390 fprintf(out, "%lu %s", memsz, suffix[i]);
|
nuclear@0
|
391 if(i > 0) {
|
nuclear@0
|
392 fprintf(out, " (%lu bytes)", bytes);
|
nuclear@0
|
393 }
|
nuclear@0
|
394 return;
|
nuclear@0
|
395 }
|
nuclear@0
|
396
|
nuclear@0
|
397 memsz /= 1024;
|
nuclear@0
|
398 }
|
nuclear@0
|
399 }
|