clray
diff src/ocl.cc @ 0:5767277e049f
first test works, let's try to make a raytracer now...
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Mon, 12 Jul 2010 05:56:47 +0300 |
parents | |
children | 0b0e4d18d53f |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/ocl.cc Mon Jul 12 05:56:47 2010 +0300 1.3 @@ -0,0 +1,399 @@ 1.4 +#include <stdio.h> 1.5 +#include <stdlib.h> 1.6 +#include <string.h> 1.7 +#include <errno.h> 1.8 +#include <alloca.h> 1.9 +#include <sys/stat.h> 1.10 +#include "ocl.h" 1.11 + 1.12 + 1.13 +class InitCL { 1.14 +public: 1.15 + InitCL(); 1.16 +}; 1.17 + 1.18 +struct device_info { 1.19 + cl_device_id id; 1.20 + cl_device_type type; 1.21 + unsigned int units; 1.22 + unsigned int clock; 1.23 + 1.24 + unsigned int dim; 1.25 + size_t *work_item_sizes; 1.26 + size_t work_group_size; 1.27 + 1.28 + unsigned long mem_size; 1.29 +}; 1.30 + 1.31 +static bool init_opencl(void); 1.32 +static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); 1.33 +static int get_dev_info(cl_device_id dev, struct device_info *di); 1.34 +static int devcmp(struct device_info *a, struct device_info *b); 1.35 +static const char *devtypestr(cl_device_type type); 1.36 +static void print_memsize(FILE *out, unsigned long memsz); 1.37 + 1.38 + 1.39 +static InitCL initcl; 1.40 +static cl_context ctx; 1.41 +static cl_command_queue cmdq; 1.42 +static device_info devinf; 1.43 + 1.44 +InitCL::InitCL() 1.45 +{ 1.46 + if(!init_opencl()) { 1.47 + exit(0); 1.48 + } 1.49 +} 1.50 + 1.51 +static bool init_opencl(void) 1.52 +{ 1.53 + if(select_device(&devinf, devcmp) == -1) { 1.54 + return false; 1.55 + } 1.56 + 1.57 + 1.58 + if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { 1.59 + fprintf(stderr, "failed to create opencl context\n"); 1.60 + return false; 1.61 + } 1.62 + 1.63 + if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) { 1.64 + fprintf(stderr, "failed to create command queue\n"); 1.65 + return false; 1.66 + } 1.67 + return true; 1.68 +} 1.69 + 1.70 + 1.71 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf) 1.72 +{ 1.73 + int err; 1.74 + cl_mem mem; 1.75 + 1.76 + 1.77 + if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) { 1.78 + fprintf(stderr, "failed to create memory buffer (%d)\n", err); 1.79 + return 0; 1.80 + } 1.81 + 1.82 + CLMemBuffer *mbuf = new CLMemBuffer; 1.83 + mbuf->mem = mem; 1.84 + mbuf->size = sz; 1.85 + return mbuf; 1.86 +} 1.87 + 1.88 +void destroy_mem_buffer(CLMemBuffer *mbuf) 1.89 +{ 1.90 + if(mbuf) { 1.91 + 1.92 + clReleaseMemObject(mbuf->mem); 1.93 + delete mbuf; 1.94 + } 1.95 +} 1.96 + 1.97 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) 1.98 +{ 1.99 + if(!mbuf) return 0; 1.100 + 1.101 + int err; 1.102 + mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); 1.103 + if(!mbuf->ptr) { 1.104 + fprintf(stderr, "failed to map buffer (%d)\n", err); 1.105 + return 0; 1.106 + } 1.107 + return mbuf->ptr; 1.108 +} 1.109 + 1.110 +void unmap_mem_buffer(CLMemBuffer *mbuf) 1.111 +{ 1.112 + if(!mbuf || !mbuf->ptr) return; 1.113 + clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); 1.114 +} 1.115 + 1.116 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src) 1.117 +{ 1.118 + if(!mbuf) return false; 1.119 + 1.120 + int err; 1.121 + if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { 1.122 + fprintf(stderr, "failed to write buffer (%d)\n", err); 1.123 + return false; 1.124 + } 1.125 + return true; 1.126 +} 1.127 + 1.128 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) 1.129 +{ 1.130 + if(!mbuf) return false; 1.131 + 1.132 + int err; 1.133 + if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { 1.134 + fprintf(stderr, "failed to read buffer (%d)\n", err); 1.135 + return false; 1.136 + } 1.137 + return true; 1.138 +} 1.139 + 1.140 + 1.141 +CLProgram::CLProgram(const char *kname) 1.142 +{ 1.143 + prog = 0; 1.144 + kernel = 0; 1.145 + this->kname = kname; 1.146 + mbuf.resize(16); 1.147 + built = false; 1.148 +} 1.149 + 1.150 +CLProgram::~CLProgram() 1.151 +{ 1.152 + if(prog) { 1.153 + 1.154 + clReleaseProgram(prog); 1.155 + } 1.156 + if(kernel) { 1.157 + 1.158 + clReleaseKernel(kernel); 1.159 + } 1.160 + for(size_t i=0; i<mbuf.size(); i++) { 1.161 + if(mbuf[i]) { 1.162 + destroy_mem_buffer(mbuf[i]); 1.163 + } 1.164 + } 1.165 +} 1.166 + 1.167 +bool CLProgram::load(const char *fname) 1.168 +{ 1.169 + FILE *fp; 1.170 + char *src; 1.171 + struct stat st; 1.172 + 1.173 + printf("loading opencl program (%s)\n", fname); 1.174 + 1.175 + if(!(fp = fopen(fname, "rb"))) { 1.176 + fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno)); 1.177 + return false; 1.178 + } 1.179 + 1.180 + fstat(fileno(fp), &st); 1.181 + 1.182 + src = new char[st.st_size + 1]; 1.183 + 1.184 + fread(src, 1, st.st_size, fp); 1.185 + src[st.st_size] = 0; 1.186 + fclose(fp); 1.187 + 1.188 + 1.189 + if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) { 1.190 + fprintf(stderr, "error creating program object: %s\n", fname); 1.191 + delete [] src; 1.192 + return false; 1.193 + } 1.194 + delete [] src; 1.195 + return true; 1.196 +} 1.197 + 1.198 +bool CLProgram::set_arg(int arg, int rdwr, size_t sz, void *ptr) 1.199 +{ 1.200 + CLMemBuffer *buf; 1.201 + 1.202 + if(!(buf = create_mem_buffer(rdwr, sz, ptr))) { 1.203 + return false; 1.204 + } 1.205 + 1.206 + if((int)mbuf.size() <= arg) { 1.207 + mbuf.resize(arg + 1); 1.208 + } 1.209 + mbuf[arg] = buf; 1.210 + return true; 1.211 +} 1.212 + 1.213 +CLMemBuffer *CLProgram::get_arg_buffer(int arg) 1.214 +{ 1.215 + if(arg < 0 || arg >= (int)mbuf.size()) { 1.216 + return 0; 1.217 + } 1.218 + return mbuf[arg]; 1.219 +} 1.220 + 1.221 +bool CLProgram::build() 1.222 +{ 1.223 + char errlog[512]; 1.224 + 1.225 + 1.226 + if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) { 1.227 + clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0); 1.228 + fprintf(stderr, "failed to build program:\n%s\n", errlog); 1.229 + clReleaseProgram(prog); 1.230 + prog = 0; 1.231 + return false; 1.232 + } 1.233 + 1.234 + 1.235 + if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) { 1.236 + fprintf(stderr, "failed to create kernel: %s\n", kname.c_str()); 1.237 + clReleaseProgram(prog); 1.238 + prog = 0; 1.239 + return false; 1.240 + } 1.241 + 1.242 + for(size_t i=0; i<mbuf.size(); i++) { 1.243 + if(!mbuf[i]) break; 1.244 + 1.245 + int err; 1.246 + if((err = clSetKernelArg(kernel, i, sizeof mbuf[i]->mem, &mbuf[i]->mem)) != 0) { 1.247 + fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err); 1.248 + clReleaseProgram(prog); 1.249 + clReleaseKernel(kernel); 1.250 + prog = 0; 1.251 + kernel = 0; 1.252 + return false; 1.253 + } 1.254 + } 1.255 + 1.256 + built = true; 1.257 + return true; 1.258 +} 1.259 + 1.260 +bool CLProgram::run() const 1.261 +{ 1.262 + return run(1, 1); 1.263 +} 1.264 + 1.265 +bool CLProgram::run(int dim, ...) const 1.266 +{ 1.267 + if(!built) { 1.268 + if(!((CLProgram*)this)->build()) { 1.269 + return false; 1.270 + } 1.271 + } 1.272 + 1.273 + va_list ap; 1.274 + size_t *global_size = (size_t*)alloca(dim * sizeof *global_size); 1.275 + 1.276 + va_start(ap, dim); 1.277 + for(int i=0; i<dim; i++) { 1.278 + global_size[i] = va_arg(ap, int); 1.279 + } 1.280 + va_end(ap); 1.281 + 1.282 + int err; 1.283 + if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) { 1.284 + fprintf(stderr, "error executing kernel (%d)\n", err); 1.285 + return false; 1.286 + } 1.287 + return true; 1.288 +} 1.289 + 1.290 +static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*)) 1.291 +{ 1.292 + unsigned int i, j, num_dev, sel; 1.293 + cl_device_id dev[32]; 1.294 + 1.295 + dev_inf->work_item_sizes = 0; 1.296 + 1.297 + 1.298 + clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev); 1.299 + printf("found %d cl devices.\n", num_dev); 1.300 + 1.301 + for(i=0; i<num_dev; i++) { 1.302 + struct device_info di; 1.303 + 1.304 + if(get_dev_info(dev[i], &di) == -1) { 1.305 + free(dev_inf->work_item_sizes); 1.306 + return -1; 1.307 + } 1.308 + 1.309 + printf("--> device %u (%s)\n", i, devtypestr(di.type)); 1.310 + printf("max compute units: %u\n", di.units); 1.311 + printf("max clock frequency: %u\n", di.clock); 1.312 + printf("max work item dimensions: %u\n", di.dim); 1.313 + 1.314 + printf("max work item sizes: "); 1.315 + for(j=0; j<di.dim; j++) { 1.316 + printf("%u", (unsigned int)di.work_item_sizes[j]); 1.317 + if(di.dim - j > 1) { 1.318 + printf(", "); 1.319 + } 1.320 + } 1.321 + putchar('\n'); 1.322 + 1.323 + printf("max work group size: %u\n", (unsigned int)di.work_group_size); 1.324 + printf("max object allocation size: "); 1.325 + print_memsize(stdout, di.mem_size); 1.326 + putchar('\n'); 1.327 + 1.328 + if(devcmp(&di, dev_inf) > 0) { 1.329 + free(dev_inf->work_item_sizes); 1.330 + memcpy(dev_inf, &di, sizeof di); 1.331 + sel = i; 1.332 + } 1.333 + } 1.334 + 1.335 + if(num_dev) { 1.336 + printf("\nusing device: %d\n", sel); 1.337 + return 0; 1.338 + } 1.339 + 1.340 + return -1; 1.341 +} 1.342 + 1.343 +static int get_dev_info(cl_device_id dev, struct device_info *di) 1.344 +{ 1.345 + di->id = dev; 1.346 + 1.347 + 1.348 + clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0); 1.349 + clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0); 1.350 + clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0); 1.351 + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0); 1.352 + 1.353 + di->work_item_sizes = new size_t[di->dim]; 1.354 + 1.355 + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0); 1.356 + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0); 1.357 + clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0); 1.358 + 1.359 + return 0; 1.360 +} 1.361 + 1.362 +static int devcmp(struct device_info *a, struct device_info *b) 1.363 +{ 1.364 + unsigned int aval = a->units * a->clock; 1.365 + unsigned int bval = b->units * b->clock; 1.366 + 1.367 + return aval - bval; 1.368 +} 1.369 + 1.370 +static const char *devtypestr(cl_device_type type) 1.371 +{ 1.372 + switch(type) { 1.373 + case CL_DEVICE_TYPE_CPU: 1.374 + return "cpu"; 1.375 + case CL_DEVICE_TYPE_GPU: 1.376 + return "gpu"; 1.377 + case CL_DEVICE_TYPE_ACCELERATOR: 1.378 + return "accelerator"; 1.379 + default: 1.380 + break; 1.381 + } 1.382 + return "unknown"; 1.383 +} 1.384 + 1.385 +static void print_memsize(FILE *out, unsigned long bytes) 1.386 +{ 1.387 + int i; 1.388 + unsigned long memsz = bytes; 1.389 + const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0}; 1.390 + 1.391 + for(i=0; suffix[i]; i++) { 1.392 + if(memsz < 1024) { 1.393 + fprintf(out, "%lu %s", memsz, suffix[i]); 1.394 + if(i > 0) { 1.395 + fprintf(out, " (%lu bytes)", bytes); 1.396 + } 1.397 + return; 1.398 + } 1.399 + 1.400 + memsz /= 1024; 1.401 + } 1.402 +}