clray

changeset 0:5767277e049f

first test works, let's try to make a raytracer now...
author John Tsiombikas <nuclear@member.fsf.org>
date Mon, 12 Jul 2010 05:56:47 +0300 (2010-07-12)
parents
children 0b0e4d18d53f
files Makefile src/clray.cc src/ocl.cc src/ocl.h test.cl
diffstat 5 files changed, 523 insertions(+), 0 deletions(-) [+]
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/Makefile	Mon Jul 12 05:56:47 2010 +0300
     1.3 @@ -0,0 +1,14 @@
     1.4 +src = $(wildcard src/*.cc)
     1.5 +obj = $(src:.cc=.o)
     1.6 +bin = test
     1.7 +
     1.8 +CXX = g++
     1.9 +CXXFLAGS = -pedantic -Wall -g
    1.10 +LDFLAGS = -framework OpenCL
    1.11 +
    1.12 +$(bin): $(obj)
    1.13 +	$(CXX) -o $@ $(obj) $(LDFLAGS)
    1.14 +
    1.15 +.PHONY: clean
    1.16 +clean:
    1.17 +	rm -f $(obj) $(bin)
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/src/clray.cc	Mon Jul 12 05:56:47 2010 +0300
     2.3 @@ -0,0 +1,41 @@
     2.4 +#include <stdio.h>
     2.5 +#include <assert.h>
     2.6 +#include "ocl.h"
     2.7 +
     2.8 +int main()
     2.9 +{
    2.10 +	int data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
    2.11 +	int res[16];
    2.12 +	int count = sizeof data / sizeof *data;
    2.13 +
    2.14 +	for(int i=0; i<count; i++) {
    2.15 +		printf("%d ", data[i]);
    2.16 +	}
    2.17 +	putchar('\n');
    2.18 +
    2.19 +	CLProgram prog("test");
    2.20 +	if(!prog.load("test.cl")) {
    2.21 +		return 1;
    2.22 +	}
    2.23 +	if(!prog.set_arg(0, ARG_RD, sizeof data, data)) {
    2.24 +		return 1;
    2.25 +	}
    2.26 +	if(!prog.set_arg(1, ARG_WR, sizeof res, res)) {
    2.27 +		return 1;
    2.28 +	}
    2.29 +
    2.30 +	if(!prog.run(1, 16)) {
    2.31 +		return 1;
    2.32 +	}
    2.33 +
    2.34 +	CLMemBuffer *mbuf = prog.get_arg_buffer(1);
    2.35 +	map_mem_buffer(mbuf, MAP_RD);
    2.36 +
    2.37 +	for(int i=0; i<count; i++) {
    2.38 +		printf("%d ", res[i]);
    2.39 +	}
    2.40 +	putchar('\n');
    2.41 +	unmap_mem_buffer(mbuf);
    2.42 +
    2.43 +	return 0;
    2.44 +}
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/ocl.cc	Mon Jul 12 05:56:47 2010 +0300
     3.3 @@ -0,0 +1,399 @@
     3.4 +#include <stdio.h>
     3.5 +#include <stdlib.h>
     3.6 +#include <string.h>
     3.7 +#include <errno.h>
     3.8 +#include <alloca.h>
     3.9 +#include <sys/stat.h>
    3.10 +#include "ocl.h"
    3.11 +
    3.12 +
    3.13 +class InitCL {
    3.14 +public:
    3.15 +	InitCL();
    3.16 +};
    3.17 +
    3.18 +struct device_info {
    3.19 +	cl_device_id id;
    3.20 +	cl_device_type type;
    3.21 +	unsigned int units;
    3.22 +	unsigned int clock;
    3.23 +
    3.24 +	unsigned int dim;
    3.25 +	size_t *work_item_sizes;
    3.26 +	size_t work_group_size;
    3.27 +
    3.28 +	unsigned long mem_size;
    3.29 +};
    3.30 +
    3.31 +static bool init_opencl(void);
    3.32 +static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*));
    3.33 +static int get_dev_info(cl_device_id dev, struct device_info *di);
    3.34 +static int devcmp(struct device_info *a, struct device_info *b);
    3.35 +static const char *devtypestr(cl_device_type type);
    3.36 +static void print_memsize(FILE *out, unsigned long memsz);
    3.37 +
    3.38 +
    3.39 +static InitCL initcl;
    3.40 +static cl_context ctx;
    3.41 +static cl_command_queue cmdq;
    3.42 +static device_info devinf;
    3.43 +
    3.44 +InitCL::InitCL()
    3.45 +{
    3.46 +	if(!init_opencl()) {
    3.47 +		exit(0);
    3.48 +	}
    3.49 +}
    3.50 +
    3.51 +static bool init_opencl(void)
    3.52 +{
    3.53 +	if(select_device(&devinf, devcmp) == -1) {
    3.54 +		return false;
    3.55 +	}
    3.56 +
    3.57 +
    3.58 +	if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) {
    3.59 +		fprintf(stderr, "failed to create opencl context\n");
    3.60 +		return false;
    3.61 +	}
    3.62 +
    3.63 +	if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) {
    3.64 +		fprintf(stderr, "failed to create command queue\n");
    3.65 +		return false;
    3.66 +	}
    3.67 +	return true;
    3.68 +}
    3.69 +
    3.70 +
    3.71 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf)
    3.72 +{
    3.73 +	int err;
    3.74 +	cl_mem mem;
    3.75 +
    3.76 +
    3.77 +	if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) {
    3.78 +		fprintf(stderr, "failed to create memory buffer (%d)\n", err);
    3.79 +		return 0;
    3.80 +	}
    3.81 +
    3.82 +	CLMemBuffer *mbuf = new CLMemBuffer;
    3.83 +	mbuf->mem = mem;
    3.84 +	mbuf->size = sz;
    3.85 +	return mbuf;
    3.86 +}
    3.87 +
    3.88 +void destroy_mem_buffer(CLMemBuffer *mbuf)
    3.89 +{
    3.90 +	if(mbuf) {
    3.91 +
    3.92 +		clReleaseMemObject(mbuf->mem);
    3.93 +		delete mbuf;
    3.94 +	}
    3.95 +}
    3.96 +
    3.97 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr)
    3.98 +{
    3.99 +	if(!mbuf) return 0;
   3.100 +
   3.101 +	int err;
   3.102 +	mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err);
   3.103 +	if(!mbuf->ptr) {
   3.104 +		fprintf(stderr, "failed to map buffer (%d)\n", err);
   3.105 +		return 0;
   3.106 +	}
   3.107 +	return mbuf->ptr;
   3.108 +}
   3.109 +
   3.110 +void unmap_mem_buffer(CLMemBuffer *mbuf)
   3.111 +{
   3.112 +	if(!mbuf || !mbuf->ptr) return;
   3.113 +	clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0);
   3.114 +}
   3.115 +
   3.116 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src)
   3.117 +{
   3.118 +	if(!mbuf) return false;
   3.119 +
   3.120 +	int err;
   3.121 +	if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) {
   3.122 +		fprintf(stderr, "failed to write buffer (%d)\n", err);
   3.123 +		return false;
   3.124 +	}
   3.125 +	return true;
   3.126 +}
   3.127 +
   3.128 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest)
   3.129 +{
   3.130 +	if(!mbuf) return false;
   3.131 +
   3.132 +	int err;
   3.133 +	if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) {
   3.134 +		fprintf(stderr, "failed to read buffer (%d)\n", err);
   3.135 +		return false;
   3.136 +	}
   3.137 +	return true;
   3.138 +}
   3.139 +
   3.140 +
   3.141 +CLProgram::CLProgram(const char *kname)
   3.142 +{
   3.143 +	prog = 0;
   3.144 +	kernel = 0;
   3.145 +	this->kname = kname;
   3.146 +	mbuf.resize(16);
   3.147 +	built = false;
   3.148 +}
   3.149 +
   3.150 +CLProgram::~CLProgram()
   3.151 +{
   3.152 +	if(prog) {
   3.153 +
   3.154 +		clReleaseProgram(prog);
   3.155 +	}
   3.156 +	if(kernel) {
   3.157 +
   3.158 +		clReleaseKernel(kernel);
   3.159 +	}
   3.160 +	for(size_t i=0; i<mbuf.size(); i++) {
   3.161 +		if(mbuf[i]) {
   3.162 +			destroy_mem_buffer(mbuf[i]);
   3.163 +		}
   3.164 +	}
   3.165 +}
   3.166 +
   3.167 +bool CLProgram::load(const char *fname)
   3.168 +{
   3.169 +	FILE *fp;
   3.170 +	char *src;
   3.171 +	struct stat st;
   3.172 +
   3.173 +	printf("loading opencl program (%s)\n", fname);
   3.174 +
   3.175 +	if(!(fp = fopen(fname, "rb"))) {
   3.176 +		fprintf(stderr, "failed to open %s: %s\n", fname, strerror(errno));
   3.177 +		return false;
   3.178 +	}
   3.179 +
   3.180 +	fstat(fileno(fp), &st);
   3.181 +
   3.182 +	src = new char[st.st_size + 1];
   3.183 +
   3.184 +	fread(src, 1, st.st_size, fp);
   3.185 +	src[st.st_size] = 0;
   3.186 +	fclose(fp);
   3.187 +
   3.188 +
   3.189 +	if(!(prog = clCreateProgramWithSource(ctx, 1, (const char**)&src, 0, 0))) {
   3.190 +		fprintf(stderr, "error creating program object: %s\n", fname);
   3.191 +		delete [] src;
   3.192 +		return false;
   3.193 +	}
   3.194 +	delete [] src;
   3.195 +	return true;
   3.196 +}
   3.197 +
   3.198 +bool CLProgram::set_arg(int arg, int rdwr, size_t sz, void *ptr)
   3.199 +{
   3.200 +	CLMemBuffer *buf;
   3.201 +
   3.202 +	if(!(buf = create_mem_buffer(rdwr, sz, ptr))) {
   3.203 +		return false;
   3.204 +	}
   3.205 +
   3.206 +	if((int)mbuf.size() <= arg) {
   3.207 +		mbuf.resize(arg + 1);
   3.208 +	}
   3.209 +	mbuf[arg] = buf;
   3.210 +	return true;
   3.211 +}
   3.212 +
   3.213 +CLMemBuffer *CLProgram::get_arg_buffer(int arg)
   3.214 +{
   3.215 +	if(arg < 0 || arg >= (int)mbuf.size()) {
   3.216 +		return 0;
   3.217 +	}
   3.218 +	return mbuf[arg];
   3.219 +}
   3.220 +
   3.221 +bool CLProgram::build()
   3.222 +{
   3.223 +	char errlog[512];
   3.224 +
   3.225 +
   3.226 +	if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) {
   3.227 +		clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0);
   3.228 +		fprintf(stderr, "failed to build program:\n%s\n", errlog);
   3.229 +		clReleaseProgram(prog);
   3.230 +		prog = 0;
   3.231 +		return false;
   3.232 +	}
   3.233 +
   3.234 +
   3.235 +	if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) {
   3.236 +		fprintf(stderr, "failed to create kernel: %s\n", kname.c_str());
   3.237 +		clReleaseProgram(prog);
   3.238 +		prog = 0;
   3.239 +		return false;
   3.240 +	}
   3.241 +
   3.242 +	for(size_t i=0; i<mbuf.size(); i++) {
   3.243 +		if(!mbuf[i]) break;
   3.244 +
   3.245 +		int err;
   3.246 +		if((err = clSetKernelArg(kernel, i, sizeof mbuf[i]->mem, &mbuf[i]->mem)) != 0) {
   3.247 +			fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err);
   3.248 +			clReleaseProgram(prog);
   3.249 +			clReleaseKernel(kernel);
   3.250 +			prog = 0;
   3.251 +			kernel = 0;
   3.252 +			return false;
   3.253 +		}
   3.254 +	}
   3.255 +
   3.256 +	built = true;
   3.257 +	return true;
   3.258 +}
   3.259 +
   3.260 +bool CLProgram::run() const
   3.261 +{
   3.262 +	return run(1, 1);
   3.263 +}
   3.264 +
   3.265 +bool CLProgram::run(int dim, ...) const
   3.266 +{
   3.267 +	if(!built) {
   3.268 +		if(!((CLProgram*)this)->build()) {
   3.269 +			return false;
   3.270 +		}
   3.271 +	}
   3.272 +
   3.273 +	va_list ap;
   3.274 +	size_t *global_size = (size_t*)alloca(dim * sizeof *global_size);
   3.275 +
   3.276 +	va_start(ap, dim);
   3.277 +	for(int i=0; i<dim; i++) {
   3.278 +		global_size[i] = va_arg(ap, int);
   3.279 +	}
   3.280 +	va_end(ap);
   3.281 +
   3.282 +	int err;
   3.283 +	if((err = clEnqueueNDRangeKernel(cmdq, kernel, dim, 0, global_size, 0, 0, 0, 0)) != 0) {
   3.284 +		fprintf(stderr, "error executing kernel (%d)\n", err);
   3.285 +		return false;
   3.286 +	}
   3.287 +	return true;
   3.288 +}
   3.289 +
   3.290 +static int select_device(struct device_info *dev_inf, int (*devcmp)(struct device_info*, struct device_info*))
   3.291 +{
   3.292 +	unsigned int i, j, num_dev, sel;
   3.293 +	cl_device_id dev[32];
   3.294 +
   3.295 +	dev_inf->work_item_sizes = 0;
   3.296 +
   3.297 +
   3.298 +	clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev);
   3.299 +	printf("found %d cl devices.\n", num_dev);
   3.300 +
   3.301 +	for(i=0; i<num_dev; i++) {
   3.302 +		struct device_info di;
   3.303 +
   3.304 +		if(get_dev_info(dev[i], &di) == -1) {
   3.305 +			free(dev_inf->work_item_sizes);
   3.306 +			return -1;
   3.307 +		}
   3.308 +
   3.309 +		printf("--> device %u (%s)\n", i, devtypestr(di.type));
   3.310 +		printf("max compute units: %u\n", di.units);
   3.311 +		printf("max clock frequency: %u\n", di.clock);
   3.312 +		printf("max work item dimensions: %u\n", di.dim);
   3.313 +
   3.314 +		printf("max work item sizes: ");
   3.315 +		for(j=0; j<di.dim; j++) {
   3.316 +			printf("%u", (unsigned int)di.work_item_sizes[j]);
   3.317 +			if(di.dim - j > 1) {
   3.318 +				printf(", ");
   3.319 +			}
   3.320 +		}
   3.321 +		putchar('\n');
   3.322 +
   3.323 +		printf("max work group size: %u\n", (unsigned int)di.work_group_size);
   3.324 +		printf("max object allocation size: ");
   3.325 +		print_memsize(stdout, di.mem_size);
   3.326 +		putchar('\n');
   3.327 +
   3.328 +		if(devcmp(&di, dev_inf) > 0) {
   3.329 +			free(dev_inf->work_item_sizes);
   3.330 +			memcpy(dev_inf, &di, sizeof di);
   3.331 +			sel = i;
   3.332 +		}
   3.333 +	}
   3.334 +
   3.335 +	if(num_dev) {
   3.336 +		printf("\nusing device: %d\n", sel);
   3.337 +		return 0;
   3.338 +	}
   3.339 +
   3.340 +	return -1;
   3.341 +}
   3.342 +
   3.343 +static int get_dev_info(cl_device_id dev, struct device_info *di)
   3.344 +{
   3.345 +	di->id = dev;
   3.346 +
   3.347 +
   3.348 +	clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0);
   3.349 +	clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0);
   3.350 +	clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0);
   3.351 +	clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0);
   3.352 +
   3.353 +	di->work_item_sizes = new size_t[di->dim];
   3.354 +
   3.355 +	clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0);
   3.356 +	clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0);
   3.357 +	clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0);
   3.358 +
   3.359 +	return 0;
   3.360 +}
   3.361 +
   3.362 +static int devcmp(struct device_info *a, struct device_info *b)
   3.363 +{
   3.364 +	unsigned int aval = a->units * a->clock;
   3.365 +	unsigned int bval = b->units * b->clock;
   3.366 +
   3.367 +	return aval - bval;
   3.368 +}
   3.369 +
   3.370 +static const char *devtypestr(cl_device_type type)
   3.371 +{
   3.372 +	switch(type) {
   3.373 +	case CL_DEVICE_TYPE_CPU:
   3.374 +		return "cpu";
   3.375 +	case CL_DEVICE_TYPE_GPU:
   3.376 +		return "gpu";
   3.377 +	case CL_DEVICE_TYPE_ACCELERATOR:
   3.378 +		return "accelerator";
   3.379 +	default:
   3.380 +		break;
   3.381 +	}
   3.382 +	return "unknown";
   3.383 +}
   3.384 +
   3.385 +static void print_memsize(FILE *out, unsigned long bytes)
   3.386 +{
   3.387 +	int i;
   3.388 +	unsigned long memsz = bytes;
   3.389 +	const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0};
   3.390 +
   3.391 +	for(i=0; suffix[i]; i++) {
   3.392 +		if(memsz < 1024) {
   3.393 +			fprintf(out, "%lu %s", memsz, suffix[i]);
   3.394 +			if(i > 0) {
   3.395 +				fprintf(out, " (%lu bytes)", bytes);
   3.396 +			}
   3.397 +			return;
   3.398 +		}
   3.399 +
   3.400 +		memsz /= 1024;
   3.401 +	}
   3.402 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/ocl.h	Mon Jul 12 05:56:47 2010 +0300
     4.3 @@ -0,0 +1,63 @@
     4.4 +#ifndef OCL_H_
     4.5 +#define OCL_H_
     4.6 +
     4.7 +#include <vector>
     4.8 +#include <string>
     4.9 +#ifndef __APPLE__
    4.10 +#include <CL/opencl.h>
    4.11 +#else
    4.12 +#include <OpenCL/opencl.h>
    4.13 +#endif
    4.14 +
    4.15 +enum {
    4.16 +	ARG_RD		= CL_MEM_READ_ONLY,
    4.17 +	ARG_WR		= CL_MEM_WRITE_ONLY,
    4.18 +	ARG_RDWR	= CL_MEM_READ_WRITE
    4.19 +};
    4.20 +
    4.21 +enum {
    4.22 +	MAP_RD		= CL_MAP_READ,
    4.23 +	MAP_WR		= CL_MAP_WRITE,
    4.24 +	MAP_RDWR	= CL_MAP_READ | CL_MAP_WRITE
    4.25 +};
    4.26 +
    4.27 +struct CLMemBuffer {
    4.28 +	cl_mem mem;
    4.29 +	size_t size;
    4.30 +	void *ptr;
    4.31 +};
    4.32 +
    4.33 +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf);
    4.34 +void destroy_mem_buffer(CLMemBuffer *mbuf);
    4.35 +
    4.36 +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr);
    4.37 +void unmap_mem_buffer(CLMemBuffer *mbuf);
    4.38 +
    4.39 +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src);
    4.40 +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest);
    4.41 +
    4.42 +
    4.43 +class CLProgram {
    4.44 +private:
    4.45 +	std::string kname;
    4.46 +	cl_program prog;
    4.47 +	cl_kernel kernel;
    4.48 +	std::vector<CLMemBuffer*> mbuf;
    4.49 +	bool built;
    4.50 +
    4.51 +public:
    4.52 +	CLProgram(const char *kname);
    4.53 +	~CLProgram();
    4.54 +
    4.55 +	bool load(const char *fname);
    4.56 +
    4.57 +	bool set_arg(int arg, int rdwr, size_t sz, void *buf);
    4.58 +	CLMemBuffer *get_arg_buffer(int arg);
    4.59 +
    4.60 +	bool build();
    4.61 +
    4.62 +	bool run() const;
    4.63 +	bool run(int dim, ...) const;
    4.64 +};
    4.65 +
    4.66 +#endif	/* OCL_H_ */
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/test.cl	Mon Jul 12 05:56:47 2010 +0300
     5.3 @@ -0,0 +1,6 @@
     5.4 +__kernel void test(__global const int *src, __global int *dst)
     5.5 +{
     5.6 +	int idx = get_global_id(0);
     5.7 +
     5.8 +	dst[idx] = src[idx] * 2.0;
     5.9 +}