# HG changeset patch # User John Tsiombikas # Date 1401639580 -10800 # Node ID c8a6fb04fefa33164f6fda66e03f6801c6cb302b # Parent 6ef4b10fa46899cc59fda8957ad913a9a6bf6bf9 multithreadededit diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/erebus.cc --- a/liberebus/src/erebus.cc Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/erebus.cc Sun Jun 01 19:19:40 2014 +0300 @@ -12,8 +12,7 @@ using namespace std::chrono; -#define INVALID_RECT Rect{0, 0, 0, 0} - +static void render_block(struct erebus *ctx, Block blk); static void render_pixel(struct erebus *ctx, int x, int y, int sample); static std::mt19937 rnd_gen; @@ -34,7 +33,7 @@ ctx->scn = 0; ctx->cur_time = 0; - ctx->cur_rect = INVALID_RECT; + ctx->cur_frame = 0; erb_setoptf(ctx, ERB_OPT_GAMMA, 2.2); erb_setopti(ctx, ERB_OPT_MAX_ITER, 6); @@ -125,16 +124,24 @@ void erb_begin_frame(struct erebus *ctx, long ms) { printf("starting new frame...\n"); + ++ctx->cur_frame; + ctx->cur_sample = 0; ctx->cur_time = ms; int xsz = erb_getopti(ctx, ERB_OPT_WIDTH); int ysz = erb_getopti(ctx, ERB_OPT_HEIGHT); - ctx->fbimg.create(xsz, ysz); - ctx->accum.create(xsz, ysz); + if(!ctx->fbimg.get_pixels() || ctx->fbimg.get_width() != xsz || ctx->fbimg.get_height() < ysz) { + ctx->fbimg.create(xsz, ysz); + ctx->accum.create(xsz, ysz); + } else { + ctx->fbimg.clear(); + ctx->accum.clear(); + } - ctx->cur_rect = INVALID_RECT; ctx->inv_gamma = 1.0f / erb_getoptf(ctx, ERB_OPT_GAMMA); + + ctx->scn->update(ctx->cur_time); } int erb_render(struct erebus *ctx, long timeout) @@ -142,56 +149,48 @@ return erb_render_rect(ctx, 0, 0, ctx->fbimg.get_width(), ctx->fbimg.get_height(), timeout); } +#define BLKSZ 32 + int erb_render_rect(struct erebus *ctx, int x, int y, int width, int height, long timeout) { + while(ctx->tpool.pending()) { + if(timeout > 0) { + long wait_interval = ctx->tpool.wait(timeout); + timeout -= wait_interval; + } else { + return 1; + } + } + if(!width || !height) return -1; - Rect rect{x, y, width, height}; - if(ctx->cur_rect != rect) { - // starting a new rendering apparently - ctx->cur_rect = rect; - ctx->cur_pixel_x = x; - ctx->cur_pixel_y = y; - ctx->cur_sample = 0; + int startx = x; + int endx = x + width; + int endy = y + height; + + while(y < endy) { + x = startx; + while(x < endx) { + Block blk; + blk.x = x; + blk.y = y; + blk.width = std::min(BLKSZ, endx - x); + blk.height = std::min(BLKSZ, endy - y); + blk.sample = ctx->cur_sample; + blk.frame = ctx->cur_frame; + + ctx->tpool.add_work(std::bind(render_block, ctx, blk)); + + x += BLKSZ; + } + y += BLKSZ; } - ctx->scn->update(); + ++ctx->cur_sample; + ctx->tpool.wait(timeout); // wait for completion + return ctx->cur_sample > erb_getopti(ctx, ERB_OPT_MAX_SAMPLES) ? 0 : 1; +} - int max_samples = erb_getopti(ctx, ERB_OPT_MAX_SAMPLES); - - if(timeout > 0) { - auto start_time = steady_clock::now(); - while(duration_cast(steady_clock::now() - start_time).count() < timeout) { - render_pixel(ctx, ctx->cur_pixel_x, ctx->cur_pixel_y, ctx->cur_sample); - - if(++ctx->cur_pixel_x >= ctx->cur_rect.width) { - ctx->cur_pixel_x = ctx->cur_rect.x; - if(++ctx->cur_pixel_y >= ctx->cur_rect.height) { - ctx->cur_pixel_y = ctx->cur_rect.y; - if(++ctx->cur_sample >= max_samples) { - ctx->cur_rect = INVALID_RECT; - return 0; - } - } - } - } - return 1; - } - - if(max_samples == INF_SAMPLES) { - // don't allow infinite samples when rendering non-progressively - max_samples = 128; - } - - for(int i=0; icur_frame) { + return; // skip stale blocks + } + + for(int i=0; iscn->get_active_camera(); @@ -360,14 +372,3 @@ pix[2] = pow(accum[2] * inv_samples, ctx->inv_gamma); pix[3] = accum[3] * inv_samples; } - -bool Rect::operator ==(const Rect &r) const -{ - return memcmp(this, &r, sizeof r) == 0; -} - -bool Rect::operator !=(const Rect &r) const -{ - return memcmp(this, &r, sizeof r) != 0; -} - diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/erebus_impl.h --- a/liberebus/src/erebus_impl.h Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/erebus_impl.h Sun Jun 01 19:19:40 2014 +0300 @@ -15,11 +15,10 @@ Vector4 vval; }; -struct Rect { +struct Block { int x, y, width, height; - - bool operator ==(const Rect &r) const; - bool operator !=(const Rect &r) const; + int sample; + int frame; }; struct erebus { @@ -34,9 +33,8 @@ // render state float inv_gamma; long cur_time; - int cur_pixel_x, cur_pixel_y; - Rect cur_rect; int cur_sample; + int cur_frame; // interactive input std::vector keystate; diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/image.h --- a/liberebus/src/image.h Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/image.h Sun Jun 01 19:19:40 2014 +0300 @@ -27,6 +27,8 @@ void set_pixels(int xsz, int ysz, T *pix); T *get_pixels() const; + void clear(); + bool load(const char *fname); }; diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/image.inl --- a/liberebus/src/image.inl Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/image.inl Sun Jun 01 19:19:40 2014 +0300 @@ -119,6 +119,12 @@ } template +void Image::clear() +{ + memset(pixels, 0, width * height * 4 * sizeof(T)); +} + +template inline bool load_image(Image *img, const char *fname) { return false; diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/rt.cc --- a/liberebus/src/rt.cc Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/rt.cc Sun Jun 01 19:19:40 2014 +0300 @@ -28,11 +28,11 @@ const Material *mtl = &obj->mtl; const Reflectance *brdf = obj->brdf; const Ray &ray = hit.world_ray; - bool entering = true; + //bool entering = true; Vector3 norm = hit.calc_normal(); if(dot_product(ray.dir, norm) > 0.0) { - entering = false; + //entering = false; norm = -norm; } diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/threadpool.cc --- a/liberebus/src/threadpool.cc Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/threadpool.cc Sun Jun 01 19:19:40 2014 +0300 @@ -1,59 +1,128 @@ -#include "threadpool.h" - -ThreadPool::ThreadPool(int num_threads) -{ - quit = false; - - if(num_threads == -1) { - num_threads = std::thread::hardware_concurrency(); - } - - printf("creating thread pool with %d threads\n", num_threads); - - thread = new std::thread[num_threads]; - for(int i=0; inum_threads = num_threads; -} - -ThreadPool::~ThreadPool() -{ - quit = true; - condvar.notify_all(); - - printf("ThreadPool: waiting for %d worker threads to stop ", num_threads); - fflush(stdout); - for(int i=0; i func) -{ - std::unique_lock lock(workq_mutex); - workq.push_back(func); -} - -void ThreadPool::thread_func() -{ - std::unique_lock lock(workq_mutex); - for(;;) { - if(quit) break; - - condvar.wait(lock); - - if(!quit && !workq.empty()) { - std::function work = workq.front(); - workq.pop_front(); - lock.unlock(); - - work(); - - lock.lock(); - } - } -} +#include +#include +#include "threadpool.h" + +using namespace std::chrono; + +ThreadPool::ThreadPool(int num_threads) +{ + quit = false; + qsize = 0; + nactive = 0; + + if(num_threads == -1) { + num_threads = std::thread::hardware_concurrency(); + } + + printf("creating thread pool with %d threads\n", num_threads); + + thread = new std::thread[num_threads]; + for(int i=0; inum_threads = num_threads; +} + +ThreadPool::~ThreadPool() +{ + quit = true; + workq_condvar.notify_all(); + + printf("ThreadPool: waiting for %d worker threads to stop ", num_threads); + fflush(stdout); + for(int i=0; i func) +{ + add_work(func, std::function{}); +} + +void ThreadPool::add_work(std::function work_func, std::function done_func) +{ + std::unique_lock lock(workq_mutex); + workq.push_back(WorkItem{work_func, done_func}); + ++qsize; + workq_condvar.notify_all(); +} + +int ThreadPool::queued() const +{ + std::unique_lock lock(workq_mutex); + return qsize; +} + +int ThreadPool::active() const +{ + std::unique_lock lock(workq_mutex); + return nactive; +} + +int ThreadPool::pending() const +{ + std::unique_lock lock(workq_mutex); + return nactive + qsize; +} + +long ThreadPool::wait() +{ + auto start_time = steady_clock::now(); + + std::unique_lock lock(workq_mutex); + done_condvar.wait(lock, [this](){ return nactive == 0 && workq.empty(); }); + + auto dur = steady_clock::now() - start_time; + return duration_cast(dur).count(); +} + +long ThreadPool::wait(long timeout) +{ + auto start_time = steady_clock::now(); + duration dur, timeout_dur(std::max(timeout, 5L)); + + std::unique_lock lock(workq_mutex); + while(timeout_dur.count() > 0 && (nactive > 0 || !workq.empty())) { + if(done_condvar.wait_for(lock, timeout_dur) == std::cv_status::timeout) { + break; + } + dur = duration_cast(steady_clock::now() - start_time); + timeout_dur = milliseconds(std::max(timeout, 5L)) - dur; + } + + /*printf("waited for: %ld ms (%ld req) (na %d,qs %d,em %s)\n", dur.count(), timeout, + nactive, qsize, workq.empty() ? "true" : "false");*/ + return dur.count(); +} + +void ThreadPool::thread_func() +{ + std::unique_lock lock(workq_mutex); + for(;;) { + if(quit) break; + + workq_condvar.wait(lock); + + while(!quit && !workq.empty()) { + WorkItem witem = workq.front(); + workq.pop_front(); + ++nactive; + --qsize; + lock.unlock(); + + witem.work(); + if(witem.done) { + witem.done(); + } + + lock.lock(); + --nactive; + done_condvar.notify_all(); + } + } +} + diff -r 6ef4b10fa468 -r c8a6fb04fefa liberebus/src/threadpool.h --- a/liberebus/src/threadpool.h Sat May 31 06:21:09 2014 +0300 +++ b/liberebus/src/threadpool.h Sun Jun 01 19:19:40 2014 +0300 @@ -1,30 +1,53 @@ -#ifndef THREAD_POOL_H_ -#define THREAD_POOL_H_ - -#include -#include -#include -#include -#include - -class ThreadPool { -private: - int num_threads; - std::thread *thread; // array of threads - - std::list> workq; - std::mutex workq_mutex; - std::condition_variable condvar; - - bool quit; - - void thread_func(); - -public: - ThreadPool(int num_threads = -1); - ~ThreadPool(); - - void add_work(std::function func); -}; - -#endif // THREAD_POOL_H_ \ No newline at end of file +#ifndef THREAD_POOL_H_ +#define THREAD_POOL_H_ + +#include +#include +#include +#include +#include + +class ThreadPool { +private: + int num_threads; + std::thread *thread; // array of threads + + struct WorkItem { + std::function work; + std::function done; + }; + + int qsize; + std::list workq; + mutable std::mutex workq_mutex; + std::condition_variable workq_condvar; + + int nactive; // number of active workers (not sleeping) + + // condvar used by wait + std::condition_variable done_condvar; + + bool quit; + + void thread_func(); + +public: + ThreadPool(int num_threads = -1); + ~ThreadPool(); + + void add_work(std::function func); + void add_work(std::function work_func, std::function done_func); + + // returns the number of queued work items + int queued() const; + // returns the number of active threads + int active() const; + // returns number of pending work items (both in the queue and active) + int pending() const; + + // waits for all work to be completed + long wait(); + long wait(long timeout); +}; + +#endif // THREAD_POOL_H_ diff -r 6ef4b10fa468 -r c8a6fb04fefa src/main.cc --- a/src/main.cc Sat May 31 06:21:09 2014 +0300 +++ b/src/main.cc Sun Jun 01 19:19:40 2014 +0300 @@ -2,9 +2,12 @@ #include #include #include +#include #include "opengl.h" #include "erebus.h" +using namespace std::chrono; + #define SCALE 2 static bool init();