nuclear@8: #define OCL_CC_ nuclear@8: nuclear@0: #include nuclear@0: #include nuclear@0: #include nuclear@8: #include nuclear@0: #include nuclear@39: #include John@11: #ifndef _MSC_VER nuclear@0: #include John@11: #else John@11: #include John@11: #endif nuclear@0: #include nuclear@0: #include "ocl.h" nuclear@39: #include "ogl.h" nuclear@8: #include "ocl_errstr.h" nuclear@0: nuclear@39: #if defined(unix) || defined(__unix__) nuclear@39: #include nuclear@39: #include nuclear@39: #endif nuclear@0: nuclear@0: nuclear@0: struct device_info { nuclear@0: cl_device_id id; nuclear@0: cl_device_type type; nuclear@0: unsigned int units; nuclear@0: unsigned int clock; nuclear@0: nuclear@0: unsigned int dim; nuclear@0: size_t *work_item_sizes; nuclear@0: size_t work_group_size; nuclear@0: nuclear@0: unsigned long mem_size; nuclear@0: }; nuclear@0: nuclear@0: static int select_device(struct device_info *di, int (*devcmp)(struct device_info*, struct device_info*)); nuclear@0: static int get_dev_info(cl_device_id dev, struct device_info *di); nuclear@0: static int devcmp(struct device_info *a, struct device_info *b); nuclear@0: static const char *devtypestr(cl_device_type type); nuclear@0: static void print_memsize(FILE *out, unsigned long memsz); nuclear@8: static const char *clstrerror(int err); nuclear@0: nuclear@0: nuclear@0: static cl_context ctx; nuclear@0: static cl_command_queue cmdq; nuclear@0: static device_info devinf; nuclear@0: nuclear@39: bool init_opencl() nuclear@0: { nuclear@0: if(select_device(&devinf, devcmp) == -1) { nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@40: #ifndef CLGL_INTEROP nuclear@40: cl_context_properties *prop = 0; nuclear@40: nuclear@40: #else nuclear@40: nuclear@39: #if defined(__APPLE__) nuclear@39: #error "CL/GL context sharing not implemented on MacOSX yet" nuclear@39: #elif defined(unix) || defined(__unix__) nuclear@39: Display *dpy = glXGetCurrentDisplay(); nuclear@39: GLXContext glctx = glXGetCurrentContext(); nuclear@0: nuclear@39: assert(dpy && glctx); nuclear@39: nuclear@39: cl_context_properties prop[] = { nuclear@39: CL_GLX_DISPLAY_KHR, (cl_context_properties)dpy, nuclear@39: CL_GL_CONTEXT_KHR, (cl_context_properties)glctx, nuclear@39: 0 nuclear@39: }; nuclear@39: #elif defined(WIN32) || defined(__WIN32__) nuclear@39: #error "CL/GL context sharing not implemented on windows yet" nuclear@39: #else nuclear@39: #error "unknown or unsupported platform" nuclear@39: #endif nuclear@39: nuclear@40: #endif /* CLGL_INTEROP */ nuclear@40: nuclear@39: if(!(ctx = clCreateContext(prop, 1, &devinf.id, 0, 0, 0))) { nuclear@0: fprintf(stderr, "failed to create opencl context\n"); nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@0: if(!(cmdq = clCreateCommandQueue(ctx, devinf.id, 0, 0))) { nuclear@0: fprintf(stderr, "failed to create command queue\n"); nuclear@0: return false; nuclear@0: } nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@40: void destroy_opencl() nuclear@40: { nuclear@40: if(cmdq) { nuclear@40: clReleaseCommandQueue(cmdq); nuclear@40: cmdq = 0; nuclear@40: } nuclear@40: nuclear@40: if(ctx) { nuclear@40: clReleaseContext(ctx); nuclear@40: ctx = 0; nuclear@40: } nuclear@40: } nuclear@40: nuclear@0: nuclear@28: CLMemBuffer *create_mem_buffer(int rdwr, size_t sz, const void *buf) nuclear@0: { nuclear@0: int err; nuclear@0: cl_mem mem; nuclear@12: cl_mem_flags flags = rdwr | CL_MEM_ALLOC_HOST_PTR; nuclear@0: nuclear@12: if(buf) { nuclear@12: flags |= CL_MEM_COPY_HOST_PTR; nuclear@12: } nuclear@0: nuclear@12: nuclear@28: if(!(mem = clCreateBuffer(ctx, flags, sz, (void*)buf, &err))) { nuclear@8: fprintf(stderr, "failed to create memory buffer: %s\n", clstrerror(err)); nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: CLMemBuffer *mbuf = new CLMemBuffer; nuclear@0: mbuf->mem = mem; nuclear@0: mbuf->size = sz; nuclear@12: mbuf->ptr = 0; nuclear@39: mbuf->tex = 0; nuclear@39: return mbuf; nuclear@39: } nuclear@39: nuclear@39: CLMemBuffer *create_mem_buffer(int rdwr, unsigned int tex) nuclear@39: { nuclear@39: int err; nuclear@39: cl_mem mem; nuclear@39: nuclear@39: if(!(mem = clCreateFromGLTexture2D(ctx, rdwr, GL_TEXTURE_2D, 0, tex, &err))) { nuclear@39: fprintf(stderr, "failed to create memory buffer from GL texture %u: %s\n", tex, clstrerror(err)); nuclear@39: return 0; nuclear@39: } nuclear@39: nuclear@39: CLMemBuffer *mbuf = new CLMemBuffer; nuclear@39: mbuf->mem = mem; nuclear@39: mbuf->size = 0; nuclear@39: mbuf->ptr = 0; nuclear@39: mbuf->tex = tex; nuclear@0: return mbuf; nuclear@0: } nuclear@0: nuclear@0: void destroy_mem_buffer(CLMemBuffer *mbuf) nuclear@0: { nuclear@0: if(mbuf) { nuclear@0: clReleaseMemObject(mbuf->mem); nuclear@0: delete mbuf; nuclear@0: } nuclear@0: } nuclear@0: nuclear@39: void *map_mem_buffer(CLMemBuffer *mbuf, int rdwr, cl_event *ev) nuclear@0: { nuclear@0: if(!mbuf) return 0; nuclear@0: nuclear@12: #ifndef NDEBUG nuclear@12: if(mbuf->ptr) { nuclear@12: fprintf(stderr, "WARNING: map_mem_buffer called on already mapped buffer\n"); nuclear@12: } nuclear@12: #endif nuclear@12: nuclear@0: int err; nuclear@39: mbuf->ptr = clEnqueueMapBuffer(cmdq, mbuf->mem, 1, rdwr, 0, mbuf->size, 0, 0, ev, &err); nuclear@0: if(!mbuf->ptr) { nuclear@8: fprintf(stderr, "failed to map buffer: %s\n", clstrerror(err)); nuclear@0: return 0; nuclear@0: } nuclear@0: return mbuf->ptr; nuclear@0: } nuclear@0: nuclear@39: void unmap_mem_buffer(CLMemBuffer *mbuf, cl_event *ev) nuclear@0: { nuclear@0: if(!mbuf || !mbuf->ptr) return; nuclear@39: clEnqueueUnmapMemObject(cmdq, mbuf->mem, mbuf->ptr, 0, 0, ev); nuclear@12: mbuf->ptr = 0; nuclear@0: } nuclear@0: nuclear@39: bool write_mem_buffer(CLMemBuffer *mbuf, size_t sz, const void *src, cl_event *ev) nuclear@0: { nuclear@0: if(!mbuf) return false; nuclear@0: nuclear@0: int err; nuclear@39: if((err = clEnqueueWriteBuffer(cmdq, mbuf->mem, 1, 0, sz, src, 0, 0, ev)) != 0) { nuclear@8: fprintf(stderr, "failed to write buffer: %s\n", clstrerror(err)); nuclear@0: return false; nuclear@0: } nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@39: bool read_mem_buffer(CLMemBuffer *mbuf, size_t sz, void *dest, cl_event *ev) nuclear@0: { nuclear@0: if(!mbuf) return false; nuclear@0: nuclear@0: int err; nuclear@39: if((err = clEnqueueReadBuffer(cmdq, mbuf->mem, 1, 0, sz, dest, 0, 0, ev)) != 0) { nuclear@8: fprintf(stderr, "failed to read buffer: %s\n", clstrerror(err)); nuclear@0: return false; nuclear@0: } nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: nuclear@39: bool acquire_gl_object(CLMemBuffer *mbuf, cl_event *ev) nuclear@39: { nuclear@39: if(!mbuf || !mbuf->tex) { nuclear@39: return false; nuclear@39: } nuclear@39: nuclear@39: int err; nuclear@39: if((err = clEnqueueAcquireGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { nuclear@39: fprintf(stderr, "failed to acquire gl object: %s\n", clstrerror(err)); nuclear@39: return false; nuclear@39: } nuclear@39: return true; nuclear@39: } nuclear@39: nuclear@39: bool release_gl_object(CLMemBuffer *mbuf, cl_event *ev) nuclear@39: { nuclear@39: if(!mbuf || !mbuf->tex) { nuclear@39: return false; nuclear@39: } nuclear@39: nuclear@39: int err; nuclear@39: if((err = clEnqueueReleaseGLObjects(cmdq, 1, &mbuf->mem, 0, 0, ev)) != 0) { nuclear@39: fprintf(stderr, "failed to release gl object: %s\n", clstrerror(err)); nuclear@39: return false; nuclear@39: } nuclear@39: return true; nuclear@39: } nuclear@39: nuclear@39: John@14: CLArg::CLArg() John@14: { John@14: memset(this, 0, sizeof *this); John@14: } John@14: John@14: nuclear@0: CLProgram::CLProgram(const char *kname) nuclear@0: { nuclear@0: prog = 0; nuclear@0: kernel = 0; nuclear@0: this->kname = kname; nuclear@1: args.resize(16); nuclear@0: built = false; nuclear@39: nuclear@39: wait_event = last_event = 0; nuclear@0: } nuclear@0: nuclear@0: CLProgram::~CLProgram() nuclear@0: { nuclear@39: if(wait_event) { nuclear@39: clReleaseEvent(wait_event); nuclear@39: } nuclear@39: if(last_event) { nuclear@40: clWaitForEvents(1, &last_event); nuclear@39: clReleaseEvent(last_event); nuclear@39: } nuclear@39: nuclear@0: if(prog) { nuclear@0: clReleaseProgram(prog); nuclear@0: } nuclear@0: if(kernel) { nuclear@0: clReleaseKernel(kernel); nuclear@0: } nuclear@1: for(size_t i=0; itype = ARGTYPE_INT; nuclear@1: arg->v.ival = val; nuclear@1: return true; nuclear@1: } nuclear@1: nuclear@1: bool CLProgram::set_argf(int idx, float val) nuclear@1: { nuclear@1: if((int)args.size() <= idx) { nuclear@1: args.resize(idx + 1); nuclear@1: } nuclear@1: nuclear@1: CLArg *arg = &args[idx]; nuclear@1: arg->type = ARGTYPE_FLOAT; nuclear@1: arg->v.fval = val; nuclear@1: return true; nuclear@1: } nuclear@1: nuclear@28: bool CLProgram::set_arg_buffer(int idx, int rdwr, size_t sz, const void *ptr) nuclear@0: { nuclear@13: printf("create argument %d buffer: %d bytes\n", idx, (int)sz); nuclear@0: CLMemBuffer *buf; nuclear@0: nuclear@39: if(sz <= 0) { nuclear@39: fprintf(stderr, "invalid size while creating argument buffer %d: %d bytes\n", idx, (int)sz); nuclear@39: return false; nuclear@39: } nuclear@39: if(!(buf = create_mem_buffer(rdwr, sz, ptr))) { nuclear@39: return false; nuclear@39: } nuclear@39: nuclear@39: if((int)args.size() <= idx) { nuclear@39: args.resize(idx + 1); nuclear@39: } nuclear@39: args[idx].type = ARGTYPE_MEM_BUF; nuclear@39: args[idx].v.mbuf = buf; nuclear@39: return true; nuclear@39: } nuclear@39: nuclear@39: bool CLProgram::set_arg_texture(int idx, int rdwr, unsigned int tex) nuclear@39: { nuclear@39: printf("create argument %d from texture %u\n", idx, tex); nuclear@39: CLMemBuffer *buf; nuclear@39: nuclear@39: if(!(buf = create_mem_buffer(rdwr, tex))) { nuclear@0: return false; nuclear@0: } nuclear@0: nuclear@1: if((int)args.size() <= idx) { nuclear@1: args.resize(idx + 1); nuclear@0: } nuclear@1: args[idx].type = ARGTYPE_MEM_BUF; nuclear@1: args[idx].v.mbuf = buf; nuclear@0: return true; nuclear@0: } nuclear@0: nuclear@0: CLMemBuffer *CLProgram::get_arg_buffer(int arg) nuclear@0: { nuclear@1: if(arg < 0 || arg >= (int)args.size() || args[arg].type != ARGTYPE_MEM_BUF) { nuclear@0: return 0; nuclear@0: } nuclear@1: return args[arg].v.mbuf; nuclear@0: } nuclear@0: John@14: int CLProgram::get_num_args() const John@14: { John@14: int num_args = 0; John@14: for(size_t i=0; imem, &mbuf->mem)) != 0) { nuclear@8: fprintf(stderr, "failed to bind kernel argument %d: %s\n", (int)i, clstrerror(err)); nuclear@1: goto fail; nuclear@1: } nuclear@1: } nuclear@1: break; nuclear@1: nuclear@1: default: nuclear@1: break; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: built = true; nuclear@0: return true; nuclear@1: nuclear@1: fail: nuclear@1: clReleaseProgram(prog); nuclear@1: clReleaseKernel(kernel); nuclear@1: prog = 0; nuclear@1: kernel = 0; nuclear@1: return false; nuclear@0: } nuclear@0: nuclear@0: bool CLProgram::run() const nuclear@0: { nuclear@0: return run(1, 1); nuclear@0: } nuclear@0: nuclear@0: bool CLProgram::run(int dim, ...) const nuclear@0: { nuclear@0: va_list ap; nuclear@0: size_t *global_size = (size_t*)alloca(dim * sizeof *global_size); nuclear@0: nuclear@0: va_start(ap, dim); nuclear@0: for(int i=0; iwork_item_sizes = 0; nuclear@0: nuclear@8: if((ret = clGetPlatformIDs(32, plat, &num_plat)) != 0) { nuclear@8: fprintf(stderr, "clGetPlatformIDs failed: %s\n", clstrerror(ret)); nuclear@8: return -1; nuclear@8: } nuclear@8: if(!num_plat) { nuclear@8: fprintf(stderr, "OpenCL not available!\n"); nuclear@8: return -1; nuclear@8: } nuclear@0: nuclear@8: for(i=0; iwork_item_sizes); nuclear@0: return -1; nuclear@0: } nuclear@0: nuclear@0: printf("--> device %u (%s)\n", i, devtypestr(di.type)); nuclear@0: printf("max compute units: %u\n", di.units); nuclear@0: printf("max clock frequency: %u\n", di.clock); nuclear@0: printf("max work item dimensions: %u\n", di.dim); nuclear@0: nuclear@0: printf("max work item sizes: "); nuclear@0: for(j=0; j 1) { nuclear@0: printf(", "); nuclear@0: } nuclear@0: } nuclear@0: putchar('\n'); nuclear@0: nuclear@0: printf("max work group size: %u\n", (unsigned int)di.work_group_size); nuclear@0: printf("max object allocation size: "); nuclear@0: print_memsize(stdout, di.mem_size); nuclear@0: putchar('\n'); nuclear@0: nuclear@0: if(devcmp(&di, dev_inf) > 0) { nuclear@0: free(dev_inf->work_item_sizes); nuclear@0: memcpy(dev_inf, &di, sizeof di); nuclear@0: sel = i; nuclear@0: } nuclear@0: } nuclear@0: nuclear@0: if(num_dev) { nuclear@0: printf("\nusing device: %d\n", sel); nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: return -1; nuclear@0: } nuclear@0: nuclear@0: static int get_dev_info(cl_device_id dev, struct device_info *di) nuclear@0: { nuclear@0: di->id = dev; nuclear@0: nuclear@0: nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof di->type, &di->type, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof di->units, &di->units, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof di->clock, &di->clock, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof di->dim, &di->dim, 0); nuclear@0: nuclear@0: di->work_item_sizes = new size_t[di->dim]; nuclear@0: nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, di->dim * sizeof *di->work_item_sizes, di->work_item_sizes, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof di->work_group_size, &di->work_group_size, 0); nuclear@0: clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof di->mem_size, &di->mem_size, 0); nuclear@0: nuclear@0: return 0; nuclear@0: } nuclear@0: nuclear@0: static int devcmp(struct device_info *a, struct device_info *b) nuclear@0: { nuclear@0: unsigned int aval = a->units * a->clock; nuclear@0: unsigned int bval = b->units * b->clock; nuclear@0: nuclear@0: return aval - bval; nuclear@0: } nuclear@0: nuclear@0: static const char *devtypestr(cl_device_type type) nuclear@0: { nuclear@0: switch(type) { nuclear@0: case CL_DEVICE_TYPE_CPU: nuclear@0: return "cpu"; nuclear@0: case CL_DEVICE_TYPE_GPU: nuclear@0: return "gpu"; nuclear@0: case CL_DEVICE_TYPE_ACCELERATOR: nuclear@0: return "accelerator"; nuclear@0: default: nuclear@0: break; nuclear@0: } nuclear@0: return "unknown"; nuclear@0: } nuclear@0: nuclear@0: static void print_memsize(FILE *out, unsigned long bytes) nuclear@0: { nuclear@0: int i; nuclear@0: unsigned long memsz = bytes; nuclear@0: const char *suffix[] = {"bytes", "kb", "mb", "gb", "tb", "pb", 0}; nuclear@0: nuclear@0: for(i=0; suffix[i]; i++) { nuclear@0: if(memsz < 1024) { nuclear@0: fprintf(out, "%lu %s", memsz, suffix[i]); nuclear@0: if(i > 0) { nuclear@0: fprintf(out, " (%lu bytes)", bytes); nuclear@0: } nuclear@0: return; nuclear@0: } nuclear@0: nuclear@0: memsz /= 1024; nuclear@0: } nuclear@0: } nuclear@8: nuclear@8: static const char *clstrerror(int err) nuclear@8: { nuclear@8: if(err > 0) { nuclear@8: return ""; nuclear@8: } nuclear@8: if(err <= -(int)(sizeof ocl_errstr / sizeof *ocl_errstr)) { nuclear@8: return ""; nuclear@8: } nuclear@8: return ocl_errstr[-err]; nuclear@8: }