clray
view src/ocl.cc @ 0:5767277e049f
first test works, let's try to make a raytracer now...
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Mon, 12 Jul 2010 05:56:47 +0300 |
parents | |
children | 0b0e4d18d53f |
line source
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <errno.h>
5 #include <alloca.h>
6 #include <sys/stat.h>
7 #include "ocl.h"
10 class InitCL {
11 public:
12 InitCL();
13 };
15 struct device_info {
16 cl_device_id id;
17 cl_device_type type;
18 unsigned int units;
19 unsigned int clock;
21 unsigned int dim;
22 size_t *work_item_sizes;
23 size_t work_group_size;
25 unsigned long mem_size;
26 };
28 static bool init_opencl(void);
29 static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
30 static int get_dev_info(cl_device_id dev, struct device_info *di);
31 static int devcmp(struct device_info *a, struct device_info *b);
32 static const char *devtypestr(cl_device_type type);
33 static void print_memsize(FILE *out, unsigned long memsz);
36 static InitCL initcl;
37 static cl_context ctx;
38 static cl_command_queue cmdq;
39 static device_info devinf;
41 InitCL::InitCL()
42 {
43 if(!init_opencl()) {
44 exit(0);
45 }
46 }
48 static bool init_opencl(void)
49 {
50 if(select_device(&devinf, devcmp) == -1) {
51 return false;
52 }
55 if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
56 fprintf(stderr, "failed to create opencl context\n");
57 return false;
58 }
60 if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
61 fprintf(stderr, "failed to create command queue\n");
62 return false;
63 }
64 return true;
65 }
68 CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
69 {
70 int err;
71 cl_mem mem;
74 if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
75 fprintf(stderr, "failed to create memory buffer (%d)\n", err);
76 return 0;
77 }
79 CLMemBuffer *mbuf = new CLMemBuffer;
80 mbuf->mem = mem;
81 mbuf->size = sz;
82 return mbuf;
83 }
85 void destroy_mem_buffer(CLMemBuffer *mbuf)
86 {
87 if(mbuf) {
89 clReleaseMemObject(mbuf->mem);
90 delete mbuf;
91 }
92 }
94 void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
95 {
96 if(!mbuf) return 0;
98 int err;
99 mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
100 if(!mbuf->ptr) {
101 fprintf(stderr, "failed to map buffer (%d)\n", err);
102 return 0;
103 }
104 return mbuf->ptr;
105 }
107 void unmap_mem_buffer(CLMemBuffer *mbuf)
108 {
109 if(!mbuf || !mbuf->ptr) return;
110 clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
111 }
113 bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
114 {
115 if(!mbuf) return false;
117 int err;
118 if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
119 fprintf(stderr, "failed to write buffer (%d)\n", err);
120 return false;
121 }
122 return true;
123 }
125 bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
126 {
127 if(!mbuf) return false;
129 int err;
130 if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
131 fprintf(stderr, "failed to read buffer (%d)\n", err);
132 return false;
133 }
134 return true;
135 }
138 CLProgram::CLProgram(const char *kname)
139 {
140 prog = 0;
141 kernel = 0;
142 this->kname = kname;
143 mbuf.resize(16);
144 built = false;
145 }
147 CLProgram::~CLProgram()
148 {
149 if(prog) {
151 clReleaseProgram(prog);
152 }
153 if(kernel) {
155 clReleaseKernel(kernel);
156 }
157 for(size_t i=0; i<mbuf.size(); i++) {
158 if(mbuf[i]) {
159 destroy_mem_buffer(mbuf[i]);
160 }
161 }
162 }
164 bool CLProgram::load(const char *fname)
165 {
166 FILE *fp;
167 char *src;
168 struct stat st;
170 printf("loading opencl program (%s)\n", fname);
172 if(!(fp = fopen(fname, "rb"))) {
173 fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
174 return false;
175 }
177 fstat(fileno(fp), &st);
179 src = new char[st.st_size + 1];
181 fread(src, 1, st.st_size, fp);
182 src[st.st_size] = 0;
183 fclose(fp);
186 if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
187 fprintf(stderr, "error creating program object: %s\n", fname);
188 delete [] src;
189 return false;
190 }
191 delete [] src;
192 return true;
193 }
195 bool CLProgram::set_arg(int arg, int rdwr, size_t sz, void *ptr)
196 {
197 CLMemBuffer *buf;
199 if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
200 return false;
201 }
203 if((int)mbuf.size() <= arg) {
204 mbuf.resize(arg + 1);
205 }
206 mbuf[arg] = buf;
207 return true;
208 }
210 CLMemBuffer *CLProgram::get_arg_buffer(int arg)
211 {
212 if(arg < 0 || arg >= (int)mbuf.size()) {
213 return 0;
214 }
215 return mbuf[arg];
216 }
218 bool CLProgram::build()
219 {
220 char errlog[512];
223 if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) {
224 clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0);
225 fprintf(stderr, "failed to build program:\n%s\n", errlog);
226 clReleaseProgram(prog);
227 prog = 0;
228 return false;
229 }
232 if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
233 fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
234 clReleaseProgram(prog);
235 prog = 0;
236 return false;
237 }
239 for(size_t i=0; i<mbuf.size(); i++) {
240 if(!mbuf[i]) break;
242 int err;
243 if((err = clSetKernelArg(kernel, i, sizeof mbuf[i]->mem, &mbuf[i]->mem)) != 0) {
244 fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
245 clReleaseProgram(prog);
246 clReleaseKernel(kernel);
247 prog = 0;
248 kernel = 0;
249 return false;
250 }
251 }
253 built = true;
254 return true;
255 }
257 bool CLProgram::run() const
258 {
259 return run(1, 1);
260 }
262 bool CLProgram::run(int dim, ...) const
263 {
264 if(!built) {
265 if(!((CLProgram*)this)->build()) {
266 return false;
267 }
268 }
270 va_list ap;
271 size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
273 va_start(ap, dim);
274 for(int i=0; i<dim; i++) {
275 global_size[i] = va_arg(ap, int);
276 }
277 va_end(ap);
279 int err;
280 if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
281 fprintf(stderr, "error executing kernel (%d)\n", err);
282 return false;
283 }
284 return true;
285 }
287 static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
288 {
289 unsigned int i, j, num_dev, sel;
290 cl_device_id dev[32];
292 dev_inf->work_item_sizes = 0;
295 clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev);
296 printf("found %d cl devices.\n", num_dev);
298 for(i=0; i<num_dev; i++) {
299 struct device_info di;
301 if(get_dev_info(dev[i], &di) == -1) {
302 free(dev_inf->work_item_sizes);
303 return -1;
304 }
306 printf("--> device %u (%s)\n", i, devtypestr(di.type));
307 printf("max compute units: %u\n", di.units);
308 printf("max clock frequency: %u\n", di.clock);
309 printf("max work item dimensions: %u\n", di.dim);
311 printf("max work item sizes: ");
312 for(j=0; j<di.dim; j++) {
313 printf("%u", (unsigned int)di.work_item_sizes[j]);
314 if(di.dim - j > 1) {
315 printf(", ");
316 }
317 }
318 putchar('\n');
320 printf("max work group size: %u\n", (unsigned int)di.work_group_size);
321 printf("max object allocation size: ");
322 print_memsize(stdout, di.mem_size);
323 putchar('\n');
325 if(devcmp(&di, dev_inf) > 0) {
326 free(dev_inf->work_item_sizes);
327 memcpy(dev_inf, &di, sizeof di);
328 sel = i;
329 }
330 }
332 if(num_dev) {
333 printf("\nusing device: %d\n", sel);
334 return 0;
335 }
337 return -1;
338 }
340 static int get_dev_info(cl_device_id dev, struct device_info *di)
341 {
342 di->id = dev;
345 clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
346 clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
347 clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
348 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
350 di->work_item_sizes = new size_t[di->dim];
352 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
353 clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
354 clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
356 return 0;
357 }
359 static int devcmp(struct device_info *a, struct device_info *b)
360 {
361 unsigned int aval = a->units * a->clock;
362 unsigned int bval = b->units * b->clock;
364 return aval - bval;
365 }
367 static const char *devtypestr(cl_device_type type)
368 {
369 switch(type) {
370 case CL_DEVICE_TYPE_CPU:
371 return "cpu";
372 case CL_DEVICE_TYPE_GPU:
373 return "gpu";
374 case CL_DEVICE_TYPE_ACCELERATOR:
375 return "accelerator";
376 default:
377 break;
378 }
379 return "unknown";
380 }
382 static void print_memsize(FILE *out, unsigned long bytes)
383 {
384 int i;
385 unsigned long memsz = bytes;
386 const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
388 for(i=0; suffix[i]; i++) {
389 if(memsz < 1024) {
390 fprintf(out, "%lu %s", memsz, suffix[i]);
391 if(i > 0) {
392 fprintf(out, " (%lu bytes)", bytes);
393 }
394 return;
395 }
397 memsz /= 1024;
398 }
399 }