clray
changeset 0:5767277e049f
first test works, let's try to make a raytracer now...
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Mon, 12 Jul 2010 05:56:47 +0300 |
parents | |
children | 0b0e4d18d53f |
files | Makefile src/clray.cc src/ocl.cc src/ocl.h test.cl |
diffstat | 5 files changed, 523 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/Makefile Mon Jul 12 05:56:47 2010 +0300 1.3 @@ -0,0 +1,14 @@ 1.4 +src = $(wildcard src/*.cc) 1.5 +obj = $(src:.cc=.o) 1.6 +bin = test 1.7 + 1.8 +CXX = g++ 1.9 +CXXFLAGS = -pedantic -Wall -g 1.10 +LDFLAGS = -framework OpenCL 1.11 + 1.12 +$(bin): $(obj) 1.13 + $(CXX) -o $@ $(obj) $(LDFLAGS) 1.14 + 1.15 +.PHONY: clean 1.16 +clean: 1.17 + rm -f $(obj) $(bin)
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/src/clray.cc Mon Jul 12 05:56:47 2010 +0300 2.3 @@ -0,0 +1,41 @@ 2.4 +#include <stdio.h> 2.5 +#include <assert.h> 2.6 +#include "ocl.h" 2.7 + 2.8 +int main() 2.9 +{ 2.10 + int data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; 2.11 + int res[16]; 2.12 + int count = sizeof data / sizeof *data; 2.13 + 2.14 + for(int i=0; i<count; i++) { 2.15 + printf("%d ", data[i]); 2.16 + } 2.17 + putchar('\n'); 2.18 + 2.19 + CLProgram prog("test"); 2.20 + if(!prog.load("test.cl")) { 2.21 + return 1; 2.22 + } 2.23 + if(!prog.set_arg(0, ARG_RD, sizeof data, data)) { 2.24 + return 1; 2.25 + } 2.26 + if(!prog.set_arg(1, ARG_WR, sizeof res, res)) { 2.27 + return 1; 2.28 + } 2.29 + 2.30 + if(!prog.run(1, 16)) { 2.31 + return 1; 2.32 + } 2.33 + 2.34 + CLMemBuffer *mbuf = prog.get_arg_buffer(1); 2.35 + map_mem_buffer(mbuf, MAP_RD); 2.36 + 2.37 + for(int i=0; i<count; i++) { 2.38 + printf("%d ", res[i]); 2.39 + } 2.40 + putchar('\n'); 2.41 + unmap_mem_buffer(mbuf); 2.42 + 2.43 + return 0; 2.44 +}
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/src/ocl.cc Mon Jul 12 05:56:47 2010 +0300 3.3 @@ -0,0 +1,399 @@ 3.4 +#include <stdio.h> 3.5 +#include <stdlib.h> 3.6 +#include <string.h> 3.7 +#include <errno.h> 3.8 +#include <alloca.h> 3.9 +#include <sys/stat.h> 3.10 +#include "ocl.h" 3.11 + 3.12 + 3.13 +class InitCL { 3.14 +public: 3.15 + InitCL(); 3.16 +}; 3.17 + 3.18 +struct device_info { 3.19 + cl_device_id id; 3.20 + cl_device_type type; 3.21 + unsigned int units; 3.22 + unsigned int clock; 3.23 + 3.24 + unsigned int dim; 3.25 + size_t *work_item_sizes; 3.26 + size_t work_group_size; 3.27 + 3.28 + unsigned long mem_size; 3.29 +}; 3.30 + 3.31 +static bool init_opencl(void); 3.32 +static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); 3.33 +static int get_dev_info(cl_device_id dev, struct device_info *di); 3.34 +static int devcmp(struct device_info *a, struct device_info *b); 3.35 +static const char *devtypestr(cl_device_type type); 3.36 +static void print_memsize(FILE *out, unsigned long memsz); 3.37 + 3.38 + 3.39 +static InitCL initcl; 3.40 +static cl_context ctx; 3.41 +static cl_command_queue cmdq; 3.42 +static device_info devinf; 3.43 + 3.44 +InitCL::InitCL() 3.45 +{ 3.46 + if(!init_opencl()) { 3.47 + exit(0); 3.48 + } 3.49 +} 3.50 + 3.51 +static bool init_opencl(void) 3.52 +{ 3.53 + if(select_device(&devinf, devcmp) == -1) { 3.54 + return false; 3.55 + } 3.56 + 3.57 + 3.58 + if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { 3.59 + fprintf(stderr, "failed to create opencl context\n"); 3.60 + return false; 3.61 + } 3.62 + 3.63 + if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) { 3.64 + fprintf(stderr, "failed to create command queue\n"); 3.65 + return false; 3.66 + } 3.67 + return true; 3.68 +} 3.69 + 3.70 + 3.71 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf) 3.72 +{ 3.73 + int err; 3.74 + cl_mem mem; 3.75 + 3.76 + 3.77 + if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) { 3.78 + fprintf(stderr, "failed to create memory buffer (%d)\n", err); 3.79 + return 0; 3.80 + } 3.81 + 3.82 + CLMemBuffer *mbuf = new CLMemBuffer; 3.83 + mbuf->mem = mem; 3.84 + mbuf->size = sz; 3.85 + return mbuf; 3.86 +} 3.87 + 3.88 +void destroy_mem_buffer(CLMemBuffer *mbuf) 3.89 +{ 3.90 + if(mbuf) { 3.91 + 3.92 + clReleaseMemObject(mbuf->mem); 3.93 + delete mbuf; 3.94 + } 3.95 +} 3.96 + 3.97 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) 3.98 +{ 3.99 + if(!mbuf) return 0; 3.100 + 3.101 + int err; 3.102 + mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); 3.103 + if(!mbuf->ptr) { 3.104 + fprintf(stderr, "failed to map buffer (%d)\n", err); 3.105 + return 0; 3.106 + } 3.107 + return mbuf->ptr; 3.108 +} 3.109 + 3.110 +void unmap_mem_buffer(CLMemBuffer *mbuf) 3.111 +{ 3.112 + if(!mbuf || !mbuf->ptr) return; 3.113 + clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); 3.114 +} 3.115 + 3.116 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src) 3.117 +{ 3.118 + if(!mbuf) return false; 3.119 + 3.120 + int err; 3.121 + if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { 3.122 + fprintf(stderr, "failed to write buffer (%d)\n", err); 3.123 + return false; 3.124 + } 3.125 + return true; 3.126 +} 3.127 + 3.128 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) 3.129 +{ 3.130 + if(!mbuf) return false; 3.131 + 3.132 + int err; 3.133 + if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { 3.134 + fprintf(stderr, "failed to read buffer (%d)\n", err); 3.135 + return false; 3.136 + } 3.137 + return true; 3.138 +} 3.139 + 3.140 + 3.141 +CLProgram::CLProgram(const char *kname) 3.142 +{ 3.143 + prog = 0; 3.144 + kernel = 0; 3.145 + this->kname = kname; 3.146 + mbuf.resize(16); 3.147 + built = false; 3.148 +} 3.149 + 3.150 +CLProgram::~CLProgram() 3.151 +{ 3.152 + if(prog) { 3.153 + 3.154 + clReleaseProgram(prog); 3.155 + } 3.156 + if(kernel) { 3.157 + 3.158 + clReleaseKernel(kernel); 3.159 + } 3.160 + for(size_t i=0; i<mbuf.size(); i++) { 3.161 + if(mbuf[i]) { 3.162 + destroy_mem_buffer(mbuf[i]); 3.163 + } 3.164 + } 3.165 +} 3.166 + 3.167 +bool CLProgram::load(const char *fname) 3.168 +{ 3.169 + FILE *fp; 3.170 + char *src; 3.171 + struct stat st; 3.172 + 3.173 + printf("loading opencl program (%s)\n", fname); 3.174 + 3.175 + if(!(fp = fopen(fname, "rb"))) { 3.176 + fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno)); 3.177 + return false; 3.178 + } 3.179 + 3.180 + fstat(fileno(fp), &st); 3.181 + 3.182 + src = new char[st.st_size + 1]; 3.183 + 3.184 + fread(src, 1, st.st_size, fp); 3.185 + src[st.st_size] = 0; 3.186 + fclose(fp); 3.187 + 3.188 + 3.189 + if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) { 3.190 + fprintf(stderr, "error creating program object: %s\n", fname); 3.191 + delete [] src; 3.192 + return false; 3.193 + } 3.194 + delete [] src; 3.195 + return true; 3.196 +} 3.197 + 3.198 +bool CLProgram::set_arg(int arg, int rdwr, size_t sz, void *ptr) 3.199 +{ 3.200 + CLMemBuffer *buf; 3.201 + 3.202 + if(!(buf = create_mem_buffer(rdwr, sz, ptr))) { 3.203 + return false; 3.204 + } 3.205 + 3.206 + if((int)mbuf.size() <= arg) { 3.207 + mbuf.resize(arg + 1); 3.208 + } 3.209 + mbuf[arg] = buf; 3.210 + return true; 3.211 +} 3.212 + 3.213 +CLMemBuffer *CLProgram::get_arg_buffer(int arg) 3.214 +{ 3.215 + if(arg < 0 || arg >= (int)mbuf.size()) { 3.216 + return 0; 3.217 + } 3.218 + return mbuf[arg]; 3.219 +} 3.220 + 3.221 +bool CLProgram::build() 3.222 +{ 3.223 + char errlog[512]; 3.224 + 3.225 + 3.226 + if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) { 3.227 + clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0); 3.228 + fprintf(stderr, "failed to build program:\n%s\n", errlog); 3.229 + clReleaseProgram(prog); 3.230 + prog = 0; 3.231 + return false; 3.232 + } 3.233 + 3.234 + 3.235 + if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) { 3.236 + fprintf(stderr, "failed to create kernel: %s\n", kname.c_str()); 3.237 + clReleaseProgram(prog); 3.238 + prog = 0; 3.239 + return false; 3.240 + } 3.241 + 3.242 + for(size_t i=0; i<mbuf.size(); i++) { 3.243 + if(!mbuf[i]) break; 3.244 + 3.245 + int err; 3.246 + if((err = clSetKernelArg(kernel, i, sizeof mbuf[i]->mem, &mbuf[i]->mem)) != 0) { 3.247 + fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err); 3.248 + clReleaseProgram(prog); 3.249 + clReleaseKernel(kernel); 3.250 + prog = 0; 3.251 + kernel = 0; 3.252 + return false; 3.253 + } 3.254 + } 3.255 + 3.256 + built = true; 3.257 + return true; 3.258 +} 3.259 + 3.260 +bool CLProgram::run() const 3.261 +{ 3.262 + return run(1, 1); 3.263 +} 3.264 + 3.265 +bool CLProgram::run(int dim, ...) const 3.266 +{ 3.267 + if(!built) { 3.268 + if(!((CLProgram*)this)->build()) { 3.269 + return false; 3.270 + } 3.271 + } 3.272 + 3.273 + va_list ap; 3.274 + size_t *global_size = (size_t*)alloca(dim * sizeof *global_size); 3.275 + 3.276 + va_start(ap, dim); 3.277 + for(int i=0; i<dim; i++) { 3.278 + global_size[i] = va_arg(ap, int); 3.279 + } 3.280 + va_end(ap); 3.281 + 3.282 + int err; 3.283 + if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) { 3.284 + fprintf(stderr, "error executing kernel (%d)\n", err); 3.285 + return false; 3.286 + } 3.287 + return true; 3.288 +} 3.289 + 3.290 +static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*)) 3.291 +{ 3.292 + unsigned int i, j, num_dev, sel; 3.293 + cl_device_id dev[32]; 3.294 + 3.295 + dev_inf->work_item_sizes = 0; 3.296 + 3.297 + 3.298 + clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev); 3.299 + printf("found %d cl devices.\n", num_dev); 3.300 + 3.301 + for(i=0; i<num_dev; i++) { 3.302 + struct device_info di; 3.303 + 3.304 + if(get_dev_info(dev[i], &di) == -1) { 3.305 + free(dev_inf->work_item_sizes); 3.306 + return -1; 3.307 + } 3.308 + 3.309 + printf("--> device %u (%s)\n", i, devtypestr(di.type)); 3.310 + printf("max compute units: %u\n", di.units); 3.311 + printf("max clock frequency: %u\n", di.clock); 3.312 + printf("max work item dimensions: %u\n", di.dim); 3.313 + 3.314 + printf("max work item sizes: "); 3.315 + for(j=0; j<di.dim; j++) { 3.316 + printf("%u", (unsigned int)di.work_item_sizes[j]); 3.317 + if(di.dim - j > 1) { 3.318 + printf(", "); 3.319 + } 3.320 + } 3.321 + putchar('\n'); 3.322 + 3.323 + printf("max work group size: %u\n", (unsigned int)di.work_group_size); 3.324 + printf("max object allocation size: "); 3.325 + print_memsize(stdout, di.mem_size); 3.326 + putchar('\n'); 3.327 + 3.328 + if(devcmp(&di, dev_inf) > 0) { 3.329 + free(dev_inf->work_item_sizes); 3.330 + memcpy(dev_inf, &di, sizeof di); 3.331 + sel = i; 3.332 + } 3.333 + } 3.334 + 3.335 + if(num_dev) { 3.336 + printf("\nusing device: %d\n", sel); 3.337 + return 0; 3.338 + } 3.339 + 3.340 + return -1; 3.341 +} 3.342 + 3.343 +static int get_dev_info(cl_device_id dev, struct device_info *di) 3.344 +{ 3.345 + di->id = dev; 3.346 + 3.347 + 3.348 + clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0); 3.349 + clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0); 3.350 + clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0); 3.351 + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0); 3.352 + 3.353 + di->work_item_sizes = new size_t[di->dim]; 3.354 + 3.355 + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0); 3.356 + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0); 3.357 + clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0); 3.358 + 3.359 + return 0; 3.360 +} 3.361 + 3.362 +static int devcmp(struct device_info *a, struct device_info *b) 3.363 +{ 3.364 + unsigned int aval = a->units * a->clock; 3.365 + unsigned int bval = b->units * b->clock; 3.366 + 3.367 + return aval - bval; 3.368 +} 3.369 + 3.370 +static const char *devtypestr(cl_device_type type) 3.371 +{ 3.372 + switch(type) { 3.373 + case CL_DEVICE_TYPE_CPU: 3.374 + return "cpu"; 3.375 + case CL_DEVICE_TYPE_GPU: 3.376 + return "gpu"; 3.377 + case CL_DEVICE_TYPE_ACCELERATOR: 3.378 + return "accelerator"; 3.379 + default: 3.380 + break; 3.381 + } 3.382 + return "unknown"; 3.383 +} 3.384 + 3.385 +static void print_memsize(FILE *out, unsigned long bytes) 3.386 +{ 3.387 + int i; 3.388 + unsigned long memsz = bytes; 3.389 + const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0}; 3.390 + 3.391 + for(i=0; suffix[i]; i++) { 3.392 + if(memsz < 1024) { 3.393 + fprintf(out, "%lu %s", memsz, suffix[i]); 3.394 + if(i > 0) { 3.395 + fprintf(out, " (%lu bytes)", bytes); 3.396 + } 3.397 + return; 3.398 + } 3.399 + 3.400 + memsz /= 1024; 3.401 + } 3.402 +}
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/src/ocl.h Mon Jul 12 05:56:47 2010 +0300 4.3 @@ -0,0 +1,63 @@ 4.4 +#ifndef OCL_H_ 4.5 +#define OCL_H_ 4.6 + 4.7 +#include <vector> 4.8 +#include <string> 4.9 +#ifndef __APPLE__ 4.10 +#include <CL/opencl.h> 4.11 +#else 4.12 +#include <OpenCL/opencl.h> 4.13 +#endif 4.14 + 4.15 +enum { 4.16 + ARG_RD = CL_MEM_READ_ONLY, 4.17 + ARG_WR = CL_MEM_WRITE_ONLY, 4.18 + ARG_RDWR = CL_MEM_READ_WRITE 4.19 +}; 4.20 + 4.21 +enum { 4.22 + MAP_RD = CL_MAP_READ, 4.23 + MAP_WR = CL_MAP_WRITE, 4.24 + MAP_RDWR = CL_MAP_READ | CL_MAP_WRITE 4.25 +}; 4.26 + 4.27 +struct CLMemBuffer { 4.28 + cl_mem mem; 4.29 + size_t size; 4.30 + void *ptr; 4.31 +}; 4.32 + 4.33 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf); 4.34 +void destroy_mem_buffer(CLMemBuffer *mbuf); 4.35 + 4.36 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr); 4.37 +void unmap_mem_buffer(CLMemBuffer *mbuf); 4.38 + 4.39 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src); 4.40 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest); 4.41 + 4.42 + 4.43 +class CLProgram { 4.44 +private: 4.45 + std::string kname; 4.46 + cl_program prog; 4.47 + cl_kernel kernel; 4.48 + std::vector<CLMemBuffer*> mbuf; 4.49 + bool built; 4.50 + 4.51 +public: 4.52 + CLProgram(const char *kname); 4.53 + ~CLProgram(); 4.54 + 4.55 + bool load(const char *fname); 4.56 + 4.57 + bool set_arg(int arg, int rdwr, size_t sz, void *buf); 4.58 + CLMemBuffer *get_arg_buffer(int arg); 4.59 + 4.60 + bool build(); 4.61 + 4.62 + bool run() const; 4.63 + bool run(int dim, ...) const; 4.64 +}; 4.65 + 4.66 +#endif /* OCL_H_ */