nuclear@8: #define OCL_CC_ nuclear@8: nuclear@0: #include nuclear@0: #include nuclear@0: #include nuclear@8: #include nuclear@0: #include John@11: #ifndef _MSC_VER nuclear@0: #include John@11: #else John@11: #include John@11: #endif nuclear@0: #include nuclear@0: #include "ocl.h" nuclear@8: #include "ocl_errstr.h" nuclear@0: nuclear@0: nuclear@0: class InitCL { nuclear@0: public: nuclear@0: InitCL(); nuclear@0: }; nuclear@0: nuclear@0: struct device_info { nuclear@0: cl_device_id id; nuclear@0: cl_device_type type; nuclear@0: unsigned int units; nuclear@0: unsigned int clock; nuclear@0: nuclear@0: unsigned int dim; nuclear@0: size_t *work_item_sizes; nuclear@0: size_t work_group_size; nuclear@0: nuclear@0: unsigned long mem_size; nuclear@0: }; nuclear@0: nuclear@0: static bool init_opencl(void); nuclear@0: static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); nuclear@0: static int get_dev_info(cl_device_id dev, struct device_info *di); nuclear@0: static int devcmp(struct device_info *a, struct device_info *b); nuclear@0: static const char *devtypestr(cl_device_type type); nuclear@0: static void print_memsize(FILE *out, unsigned long memsz); nuclear@8: static const char *clstrerror(int err); nuclear@0: nuclear@0: nuclear@0: static InitCL initcl; nuclear@0: static cl_context ctx; nuclear@0: static cl_command_queue cmdq; nuclear@0: static device_info devinf; nuclear@0: nuclear@0: InitCL::InitCL() nuclear@0: { nuclear@0: if(!init_opencl()) { nuclear@0: exit(0); nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: static bool init_opencl(void) nuclear@0: { nuclear@0: if(select_device(&devinf, devcmp) == -1) { nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: nuclear@0: if(!(ctx = clCreateContext(0, 1, &devinf.id, 0, 0, 0))) { nuclear@0: fprintf(stderr, "failed to create opencl context\n"); nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) { nuclear@0: fprintf(stderr, "failed to create command queue\n"); nuclear@0: return false; nuclear@0: } nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: nuclear@28: CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf) nuclear@0: { nuclear@0: int err; nuclear@0: cl_mem mem; nuclear@12: cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR; nuclear@0: nuclear@12: if(buf) { nuclear@12: flags |= CL_MEM_COPY_HOST_PTR; nuclear@12: } nuclear@0: nuclear@12: nuclear@28: if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) { nuclear@8: fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err)); nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: CLMemBuffer *mbuf = new CLMemBuffer; nuclear@0: mbuf->mem = mem; nuclear@0: mbuf->size = sz; nuclear@12: mbuf->ptr = 0; nuclear@0: return mbuf; nuclear@0: } nuclear@0: nuclear@0: void destroy_mem_buffer(CLMemBuffer *mbuf) nuclear@0: { nuclear@0: if(mbuf) { nuclear@0: clReleaseMemObject(mbuf->mem); nuclear@0: delete mbuf; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr) nuclear@0: { nuclear@0: if(!mbuf) return 0; nuclear@0: nuclear@12: #ifndef NDEBUG nuclear@12: if(mbuf->ptr) { nuclear@12: fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n"); nuclear@12: } nuclear@12: #endif nuclear@12: nuclear@0: int err; nuclear@0: mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, 0, &err); nuclear@0: if(!mbuf->ptr) { nuclear@8: fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err)); nuclear@0: return 0; nuclear@0: } nuclear@0: return mbuf->ptr; nuclear@0: } nuclear@0: nuclear@0: void unmap_mem_buffer(CLMemBuffer *mbuf) nuclear@0: { nuclear@0: if(!mbuf || !mbuf->ptr) return; nuclear@0: clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, 0); nuclear@12: mbuf->ptr = 0; nuclear@0: } nuclear@0: nuclear@28: bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src) nuclear@0: { nuclear@0: if(!mbuf) return false; nuclear@0: nuclear@0: int err; nuclear@0: if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, 0)) != 0) { nuclear@8: fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err)); nuclear@0: return false; nuclear@0: } nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest) nuclear@0: { nuclear@0: if(!mbuf) return false; nuclear@0: nuclear@0: int err; nuclear@0: if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, 0)) != 0) { nuclear@8: fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err)); nuclear@0: return false; nuclear@0: } nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: John@14: CLArg::CLArg() John@14: { John@14: memset(this, 0, sizeof *this); John@14: } John@14: John@14: nuclear@0: CLProgram::CLProgram(const char *kname) nuclear@0: { nuclear@0: prog = 0; nuclear@0: kernel = 0; nuclear@0: this->kname = kname; nuclear@1: args.resize(16); nuclear@0: built = false; nuclear@0: } nuclear@0: nuclear@0: CLProgram::~CLProgram() nuclear@0: { nuclear@0: if(prog) { nuclear@0: nuclear@0: clReleaseProgram(prog); nuclear@0: } nuclear@0: if(kernel) { nuclear@0: nuclear@0: clReleaseKernel(kernel); nuclear@0: } nuclear@1: for(size_t i=0; itype = ARGTYPE_INT; nuclear@1: arg->v.ival = val; nuclear@1: return true; nuclear@1: } nuclear@1: nuclear@1: bool CLProgram::set_argf(int idx, float val) nuclear@1: { nuclear@1: if((int)args.size() <= idx) { nuclear@1: args.resize(idx + 1); nuclear@1: } nuclear@1: nuclear@1: CLArg *arg = &args[idx]; nuclear@1: arg->type = ARGTYPE_FLOAT; nuclear@1: arg->v.fval = val; nuclear@1: return true; nuclear@1: } nuclear@1: nuclear@28: bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr) nuclear@0: { nuclear@13: printf("create argument %d buffer: %d bytes\n", idx, (int)sz); nuclear@0: CLMemBuffer *buf; nuclear@0: John@14: if(sz <= 0 || !(buf = create_mem_buffer(rdwr, sz, ptr))) { John@14: fprintf(stderr, "invalid size while creating argument buffer %d: %d\n", idx, (int)sz); nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@1: if((int)args.size() <= idx) { nuclear@1: args.resize(idx + 1); nuclear@0: } nuclear@1: args[idx].type = ARGTYPE_MEM_BUF; nuclear@1: args[idx].v.mbuf = buf; nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: CLMemBuffer *CLProgram::get_arg_buffer(int arg) nuclear@0: { nuclear@1: if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) { nuclear@0: return 0; nuclear@0: } nuclear@1: return args[arg].v.mbuf; nuclear@0: } nuclear@0: John@14: int CLProgram::get_num_args() const John@14: { John@14: int num_args = 0; John@14: for(size_t i=0; imem, &mbuf->mem)) != 0) { nuclear@8: fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err)); nuclear@1: goto fail; nuclear@1: } nuclear@1: } nuclear@1: break; nuclear@1: nuclear@1: default: nuclear@1: break; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: built = true; nuclear@0: return true; nuclear@1: nuclear@1: fail: nuclear@1: clReleaseProgram(prog); nuclear@1: clReleaseKernel(kernel); nuclear@1: prog = 0; nuclear@1: kernel = 0; nuclear@1: return false; nuclear@0: } nuclear@0: nuclear@0: bool CLProgram::run() const nuclear@0: { nuclear@0: return run(1, 1); nuclear@0: } nuclear@0: nuclear@0: bool CLProgram::run(int dim, ...) const nuclear@0: { nuclear@0: if(!built) { nuclear@0: if(!((CLProgram*)this)->build()) { nuclear@0: return false; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: va_list ap; nuclear@0: size_t *global_size = (size_t*)alloca(dim * sizeof *global_size); nuclear@0: nuclear@0: va_start(ap, dim); nuclear@0: for(int i=0; iwork_item_sizes = 0; nuclear@0: nuclear@8: if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) { nuclear@8: fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret)); nuclear@8: return -1; nuclear@8: } nuclear@8: if(!num_plat) { nuclear@8: fprintf(stderr, "OpenCL not available!\n"); nuclear@8: return -1; nuclear@8: } nuclear@0: nuclear@8: for(i=0; iwork_item_sizes); nuclear@0: return -1; nuclear@0: } nuclear@0: nuclear@0: printf("--> device %u (%s)\n", i, devtypestr(di.type)); nuclear@0: printf("max compute units: %u\n", di.units); nuclear@0: printf("max clock frequency: %u\n", di.clock); nuclear@0: printf("max work item dimensions: %u\n", di.dim); nuclear@0: nuclear@0: printf("max work item sizes: "); nuclear@0: for(j=0; j 1) { nuclear@0: printf(", "); nuclear@0: } nuclear@0: } nuclear@0: putchar('\n'); nuclear@0: nuclear@0: printf("max work group size: %u\n", (unsigned int)di.work_group_size); nuclear@0: printf("max object allocation size: "); nuclear@0: print_memsize(stdout, di.mem_size); nuclear@0: putchar('\n'); nuclear@0: nuclear@0: if(devcmp(&di, dev_inf) > 0) { nuclear@0: free(dev_inf->work_item_sizes); nuclear@0: memcpy(dev_inf, &di, sizeof di); nuclear@0: sel = i; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: if(num_dev) { nuclear@0: printf("\nusing device: %d\n", sel); nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: return -1; nuclear@0: } nuclear@0: nuclear@0: static int get_dev_info(cl_device_id dev, struct device_info *di) nuclear@0: { nuclear@0: di->id = dev; nuclear@0: nuclear@0: nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0); nuclear@0: nuclear@0: di->work_item_sizes = new size_t[di->dim]; nuclear@0: nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0); nuclear@0: nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: static int devcmp(struct device_info *a, struct device_info *b) nuclear@0: { nuclear@0: unsigned int aval = a->units * a->clock; nuclear@0: unsigned int bval = b->units * b->clock; nuclear@0: nuclear@0: return aval - bval; nuclear@0: } nuclear@0: nuclear@0: static const char *devtypestr(cl_device_type type) nuclear@0: { nuclear@0: switch(type) { nuclear@0: case CL_DEVICE_TYPE_CPU: nuclear@0: return "cpu"; nuclear@0: case CL_DEVICE_TYPE_GPU: nuclear@0: return "gpu"; nuclear@0: case CL_DEVICE_TYPE_ACCELERATOR: nuclear@0: return "accelerator"; nuclear@0: default: nuclear@0: break; nuclear@0: } nuclear@0: return "unknown"; nuclear@0: } nuclear@0: nuclear@0: static void print_memsize(FILE *out, unsigned long bytes) nuclear@0: { nuclear@0: int i; nuclear@0: unsigned long memsz = bytes; nuclear@0: const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0}; nuclear@0: nuclear@0: for(i=0; suffix[i]; i++) { nuclear@0: if(memsz < 1024) { nuclear@0: fprintf(out, "%lu %s", memsz, suffix[i]); nuclear@0: if(i > 0) { nuclear@0: fprintf(out, " (%lu bytes)", bytes); nuclear@0: } nuclear@0: return; nuclear@0: } nuclear@0: nuclear@0: memsz /= 1024; nuclear@0: } nuclear@0: } nuclear@8: nuclear@8: static const char *clstrerror(int err) nuclear@8: { nuclear@8: if(err > 0) { nuclear@8: return ""; nuclear@8: } nuclear@8: if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) { nuclear@8: return ""; nuclear@8: } nuclear@8: return ocl_errstr[-err]; nuclear@8: }