# HG changeset patch # User John Tsiombikas # Date 1278903407 -10800 # Node ID 5767277e049fb59990b2104a124032bfdb5b2ccd first test works, let's try to make a raytracer now... diff -r 000000000000 -r 5767277e049f Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Mon Jul 12 05:56:47 2010 +0300 @@ -0,0 +1,14 @@ +src = $(wildcard src/*.cc) +obj = $(src:.cc=.o) +bin = test + +CXX = g++ +CXXFLAGS = -pedantic -Wall -g +LDFLAGS = -framework OpenCL + +$(bin): $(obj) + $(CXX) -o $@ $(obj) $(LDFLAGS) + +.PHONY: clean +clean: + rm -f $(obj) $(bin) diff -r 000000000000 -r 5767277e049f src/clray.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/clray.cc Mon Jul 12 05:56:47 2010 +0300 @@ -0,0 +1,41 @@ +#include +#include +#include "ocl.h" + +int main() +{ + int data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int res[16]; + int count = sizeof data / sizeof *data; + + for(int i=0; i +#include +#include +#include +#include +#include +#include "ocl.h" + + +class InitCL { +public: + InitCL(); +}; + +struct device_info { + cl_device_id id; + cl_device_type type; + unsigned int units; + unsigned int clock; + + unsigned int dim; + size_t *work_item_sizes; + size_t work_group_size; + + unsigned long mem_size; +}; + +static bool init_opencl(void); +static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); +static int get_dev_info(cl_device_id dev, struct device_info *di); +static int devcmp(struct device_info *a, struct device_info *b); +static const char *devtypestr(cl_device_type type); +static void print_memsize(FILE *out, unsigned long memsz); + + +static InitCL initcl; +static cl_context ctx; +static cl_command_queue cmdq; +static device_info devinf; + +InitCL::InitCL() +{ + if(!init_opencl()) { + exit(0); + } +} + +static bool init_opencl(void) +{ + if(select_device(&devinf, devcmp) == -1) { + return false; + } + + + if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { + fprintf(stderr, "failed to create opencl context\n"); + return false; + } + + if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) { + fprintf(stderr, "failed to create command queue\n"); + return false; + } + return true; +} + + +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf) +{ + int err; + cl_mem mem; + + + if(!(mem = clCreateBuffer(ctx, rdwr | CL_MEM_USE_HOST_PTR, sz, buf, &err))) { + fprintf(stderr, "failed to create memory buffer (%d)\n", err); + return 0; + } + + CLMemBuffer *mbuf = new CLMemBuffer; + mbuf->mem = mem; + mbuf->size = sz; + return mbuf; +} + +void destroy_mem_buffer(CLMemBuffer *mbuf) +{ + if(mbuf) { + + clReleaseMemObject(mbuf->mem); + delete mbuf; + } +} + +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) +{ + if(!mbuf) return 0; + + int err; + mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); + if(!mbuf->ptr) { + fprintf(stderr, "failed to map buffer (%d)\n", err); + return 0; + } + return mbuf->ptr; +} + +void unmap_mem_buffer(CLMemBuffer *mbuf) +{ + if(!mbuf || !mbuf->ptr) return; + clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); +} + +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src) +{ + if(!mbuf) return false; + + int err; + if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { + fprintf(stderr, "failed to write buffer (%d)\n", err); + return false; + } + return true; +} + +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) +{ + if(!mbuf) return false; + + int err; + if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { + fprintf(stderr, "failed to read buffer (%d)\n", err); + return false; + } + return true; +} + + +CLProgram::CLProgram(const char *kname) +{ + prog = 0; + kernel = 0; + this->kname = kname; + mbuf.resize(16); + built = false; +} + +CLProgram::~CLProgram() +{ + if(prog) { + + clReleaseProgram(prog); + } + if(kernel) { + + clReleaseKernel(kernel); + } + for(size_t i=0; i= (int)mbuf.size()) { + return 0; + } + return mbuf[arg]; +} + +bool CLProgram::build() +{ + char errlog[512]; + + + if(clBuildProgram(prog, 0, 0, 0, 0, 0) != 0) { + clGetProgramBuildInfo(prog, devinf.id, CL_PROGRAM_BUILD_LOG, sizeof errlog, errlog, 0); + fprintf(stderr, "failed to build program:\n%s\n", errlog); + clReleaseProgram(prog); + prog = 0; + return false; + } + + + if(!(kernel = clCreateKernel(prog, kname.c_str(), 0))) { + fprintf(stderr, "failed to create kernel: %s\n", kname.c_str()); + clReleaseProgram(prog); + prog = 0; + return false; + } + + for(size_t i=0; imem, &mbuf[i]->mem)) != 0) { + fprintf(stderr, "failed to bind kernel argument: %d (%d)\n", (int)i, err); + clReleaseProgram(prog); + clReleaseKernel(kernel); + prog = 0; + kernel = 0; + return false; + } + } + + built = true; + return true; +} + +bool CLProgram::run() const +{ + return run(1, 1); +} + +bool CLProgram::run(int dim, ...) const +{ + if(!built) { + if(!((CLProgram*)this)->build()) { + return false; + } + } + + va_list ap; + size_t *global_size = (size_t*)alloca(dim * sizeof *global_size); + + va_start(ap, dim); + for(int i=0; iwork_item_sizes = 0; + + + clGetDeviceIDs(0, CL_DEVICE_TYPE_ALL, 32, dev, &num_dev); + printf("found %d cl devices.\n", num_dev); + + for(i=0; iwork_item_sizes); + return -1; + } + + printf("--> device %u (%s)\n", i, devtypestr(di.type)); + printf("max compute units: %u\n", di.units); + printf("max clock frequency: %u\n", di.clock); + printf("max work item dimensions: %u\n", di.dim); + + printf("max work item sizes: "); + for(j=0; j 1) { + printf(", "); + } + } + putchar('\n'); + + printf("max work group size: %u\n", (unsigned int)di.work_group_size); + printf("max object allocation size: "); + print_memsize(stdout, di.mem_size); + putchar('\n'); + + if(devcmp(&di, dev_inf) > 0) { + free(dev_inf->work_item_sizes); + memcpy(dev_inf, &di, sizeof di); + sel = i; + } + } + + if(num_dev) { + printf("\nusing device: %d\n", sel); + return 0; + } + + return -1; +} + +static int get_dev_info(cl_device_id dev, struct device_info *di) +{ + di->id = dev; + + + clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0); + clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0); + clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0); + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0); + + di->work_item_sizes = new size_t[di->dim]; + + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0); + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0); + clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0); + + return 0; +} + +static int devcmp(struct device_info *a, struct device_info *b) +{ + unsigned int aval = a->units * a->clock; + unsigned int bval = b->units * b->clock; + + return aval - bval; +} + +static const char *devtypestr(cl_device_type type) +{ + switch(type) { + case CL_DEVICE_TYPE_CPU: + return "cpu"; + case CL_DEVICE_TYPE_GPU: + return "gpu"; + case CL_DEVICE_TYPE_ACCELERATOR: + return "accelerator"; + default: + break; + } + return "unknown"; +} + +static void print_memsize(FILE *out, unsigned long bytes) +{ + int i; + unsigned long memsz = bytes; + const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0}; + + for(i=0; suffix[i]; i++) { + if(memsz < 1024) { + fprintf(out, "%lu %s", memsz, suffix[i]); + if(i > 0) { + fprintf(out, " (%lu bytes)", bytes); + } + return; + } + + memsz /= 1024; + } +} diff -r 000000000000 -r 5767277e049f src/ocl.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ocl.h Mon Jul 12 05:56:47 2010 +0300 @@ -0,0 +1,63 @@ +#ifndef OCL_H_ +#define OCL_H_ + +#include +#include +#ifndef __APPLE__ +#include +#else +#include +#endif + +enum { + ARG_RD = CL_MEM_READ_ONLY, + ARG_WR = CL_MEM_WRITE_ONLY, + ARG_RDWR = CL_MEM_READ_WRITE +}; + +enum { + MAP_RD = CL_MAP_READ, + MAP_WR = CL_MAP_WRITE, + MAP_RDWR = CL_MAP_READ | CL_MAP_WRITE +}; + +struct CLMemBuffer { + cl_mem mem; + size_t size; + void *ptr; +}; + +CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, void *buf); +void destroy_mem_buffer(CLMemBuffer *mbuf); + +void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr); +void unmap_mem_buffer(CLMemBuffer *mbuf); + +bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *src); +bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest); + + +class CLProgram { +private: + std::string kname; + cl_program prog; + cl_kernel kernel; + std::vector mbuf; + bool built; + +public: + CLProgram(const char *kname); + ~CLProgram(); + + bool load(const char *fname); + + bool set_arg(int arg, int rdwr, size_t sz, void *buf); + CLMemBuffer *get_arg_buffer(int arg); + + bool build(); + + bool run() const; + bool run(int dim, ...) const; +}; + +#endif /* OCL_H_ */ diff -r 000000000000 -r 5767277e049f test.cl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test.cl Mon Jul 12 05:56:47 2010 +0300 @@ -0,0 +1,6 @@ +__kernel void test(__global const int *src, __global int *dst) +{ + int idx = get_global_id(0); + + dst[idx] = src[idx] * 2.0; +}