erebus
changeset 26:c8a6fb04fefa
multithreadededit
author | John Tsiombikas <nuclear@member.fsf.org> |
---|---|
date | Sun, 01 Jun 2014 19:19:40 +0300 |
parents | 6ef4b10fa468 |
children | 0ced900e15a7 |
files | liberebus/src/erebus.cc liberebus/src/erebus_impl.h liberebus/src/image.h liberebus/src/image.inl liberebus/src/rt.cc liberebus/src/threadpool.cc liberebus/src/threadpool.h src/main.cc |
diffstat | 8 files changed, 259 insertions(+), 157 deletions(-) [+] |
line diff
1.1 --- a/liberebus/src/erebus.cc Sat May 31 06:21:09 2014 +0300 1.2 +++ b/liberebus/src/erebus.cc Sun Jun 01 19:19:40 2014 +0300 1.3 @@ -12,8 +12,7 @@ 1.4 1.5 using namespace std::chrono; 1.6 1.7 -#define INVALID_RECT Rect{0, 0, 0, 0} 1.8 - 1.9 +static void render_block(struct erebus *ctx, Block blk); 1.10 static void render_pixel(struct erebus *ctx, int x, int y, int sample); 1.11 1.12 static std::mt19937 rnd_gen; 1.13 @@ -34,7 +33,7 @@ 1.14 1.15 ctx->scn = 0; 1.16 ctx->cur_time = 0; 1.17 - ctx->cur_rect = INVALID_RECT; 1.18 + ctx->cur_frame = 0; 1.19 1.20 erb_setoptf(ctx, ERB_OPT_GAMMA, 2.2); 1.21 erb_setopti(ctx, ERB_OPT_MAX_ITER, 6); 1.22 @@ -125,16 +124,24 @@ 1.23 void erb_begin_frame(struct erebus *ctx, long ms) 1.24 { 1.25 printf("starting new frame...\n"); 1.26 + ++ctx->cur_frame; 1.27 + ctx->cur_sample = 0; 1.28 ctx->cur_time = ms; 1.29 1.30 int xsz = erb_getopti(ctx, ERB_OPT_WIDTH); 1.31 int ysz = erb_getopti(ctx, ERB_OPT_HEIGHT); 1.32 1.33 - ctx->fbimg.create(xsz, ysz); 1.34 - ctx->accum.create(xsz, ysz); 1.35 + if(!ctx->fbimg.get_pixels() || ctx->fbimg.get_width() != xsz || ctx->fbimg.get_height() < ysz) { 1.36 + ctx->fbimg.create(xsz, ysz); 1.37 + ctx->accum.create(xsz, ysz); 1.38 + } else { 1.39 + ctx->fbimg.clear(); 1.40 + ctx->accum.clear(); 1.41 + } 1.42 1.43 - ctx->cur_rect = INVALID_RECT; 1.44 ctx->inv_gamma = 1.0f / erb_getoptf(ctx, ERB_OPT_GAMMA); 1.45 + 1.46 + ctx->scn->update(ctx->cur_time); 1.47 } 1.48 1.49 int erb_render(struct erebus *ctx, long timeout) 1.50 @@ -142,56 +149,48 @@ 1.51 return erb_render_rect(ctx, 0, 0, ctx->fbimg.get_width(), ctx->fbimg.get_height(), timeout); 1.52 } 1.53 1.54 +#define BLKSZ 32 1.55 + 1.56 int erb_render_rect(struct erebus *ctx, int x, int y, int width, int height, long timeout) 1.57 { 1.58 + while(ctx->tpool.pending()) { 1.59 + if(timeout > 0) { 1.60 + long wait_interval = ctx->tpool.wait(timeout); 1.61 + timeout -= wait_interval; 1.62 + } else { 1.63 + return 1; 1.64 + } 1.65 + } 1.66 + 1.67 if(!width || !height) return -1; 1.68 1.69 - Rect rect{x, y, width, height}; 1.70 - if(ctx->cur_rect != rect) { 1.71 - // starting a new rendering apparently 1.72 - ctx->cur_rect = rect; 1.73 - ctx->cur_pixel_x = x; 1.74 - ctx->cur_pixel_y = y; 1.75 - ctx->cur_sample = 0; 1.76 + int startx = x; 1.77 + int endx = x + width; 1.78 + int endy = y + height; 1.79 + 1.80 + while(y < endy) { 1.81 + x = startx; 1.82 + while(x < endx) { 1.83 + Block blk; 1.84 + blk.x = x; 1.85 + blk.y = y; 1.86 + blk.width = std::min(BLKSZ, endx - x); 1.87 + blk.height = std::min(BLKSZ, endy - y); 1.88 + blk.sample = ctx->cur_sample; 1.89 + blk.frame = ctx->cur_frame; 1.90 + 1.91 + ctx->tpool.add_work(std::bind(render_block, ctx, blk)); 1.92 + 1.93 + x += BLKSZ; 1.94 + } 1.95 + y += BLKSZ; 1.96 } 1.97 1.98 - ctx->scn->update(); 1.99 + ++ctx->cur_sample; 1.100 + ctx->tpool.wait(timeout); // wait for completion 1.101 + return ctx->cur_sample > erb_getopti(ctx, ERB_OPT_MAX_SAMPLES) ? 0 : 1; 1.102 +} 1.103 1.104 - int max_samples = erb_getopti(ctx, ERB_OPT_MAX_SAMPLES); 1.105 - 1.106 - if(timeout > 0) { 1.107 - auto start_time = steady_clock::now(); 1.108 - while(duration_cast<milliseconds>(steady_clock::now() - start_time).count() < timeout) { 1.109 - render_pixel(ctx, ctx->cur_pixel_x, ctx->cur_pixel_y, ctx->cur_sample); 1.110 - 1.111 - if(++ctx->cur_pixel_x >= ctx->cur_rect.width) { 1.112 - ctx->cur_pixel_x = ctx->cur_rect.x; 1.113 - if(++ctx->cur_pixel_y >= ctx->cur_rect.height) { 1.114 - ctx->cur_pixel_y = ctx->cur_rect.y; 1.115 - if(++ctx->cur_sample >= max_samples) { 1.116 - ctx->cur_rect = INVALID_RECT; 1.117 - return 0; 1.118 - } 1.119 - } 1.120 - } 1.121 - } 1.122 - return 1; 1.123 - } 1.124 - 1.125 - if(max_samples == INF_SAMPLES) { 1.126 - // don't allow infinite samples when rendering non-progressively 1.127 - max_samples = 128; 1.128 - } 1.129 - 1.130 - for(int i=0; i<height; i++) { 1.131 - for(int j=0; j<width; j++) { 1.132 - for(int k=0; k<max_samples; k++) { 1.133 - render_pixel(ctx, j, i, k); 1.134 - } 1.135 - } 1.136 - } 1.137 - return 0; 1.138 -} 1.139 1.140 int erb_get_progress(struct erebus *ctx) 1.141 { 1.142 @@ -335,6 +334,19 @@ 1.143 return unirnd(rnd_gen); 1.144 } 1.145 1.146 +static void render_block(struct erebus *ctx, Block blk) 1.147 +{ 1.148 + if(blk.frame < ctx->cur_frame) { 1.149 + return; // skip stale blocks 1.150 + } 1.151 + 1.152 + for(int i=0; i<blk.height; i++) { 1.153 + for(int j=0; j<blk.width; j++) { 1.154 + render_pixel(ctx, blk.x + j, blk.y + i, blk.sample); 1.155 + } 1.156 + } 1.157 +} 1.158 + 1.159 static void render_pixel(struct erebus *ctx, int x, int y, int sample) 1.160 { 1.161 Camera *cam = ctx->scn->get_active_camera(); 1.162 @@ -360,14 +372,3 @@ 1.163 pix[2] = pow(accum[2] * inv_samples, ctx->inv_gamma); 1.164 pix[3] = accum[3] * inv_samples; 1.165 } 1.166 - 1.167 -bool Rect::operator ==(const Rect &r) const 1.168 -{ 1.169 - return memcmp(this, &r, sizeof r) == 0; 1.170 -} 1.171 - 1.172 -bool Rect::operator !=(const Rect &r) const 1.173 -{ 1.174 - return memcmp(this, &r, sizeof r) != 0; 1.175 -} 1.176 -
2.1 --- a/liberebus/src/erebus_impl.h Sat May 31 06:21:09 2014 +0300 2.2 +++ b/liberebus/src/erebus_impl.h Sun Jun 01 19:19:40 2014 +0300 2.3 @@ -15,11 +15,10 @@ 2.4 Vector4 vval; 2.5 }; 2.6 2.7 -struct Rect { 2.8 +struct Block { 2.9 int x, y, width, height; 2.10 - 2.11 - bool operator ==(const Rect &r) const; 2.12 - bool operator !=(const Rect &r) const; 2.13 + int sample; 2.14 + int frame; 2.15 }; 2.16 2.17 struct erebus { 2.18 @@ -34,9 +33,8 @@ 2.19 // render state 2.20 float inv_gamma; 2.21 long cur_time; 2.22 - int cur_pixel_x, cur_pixel_y; 2.23 - Rect cur_rect; 2.24 int cur_sample; 2.25 + int cur_frame; 2.26 2.27 // interactive input 2.28 std::vector<bool> keystate;
3.1 --- a/liberebus/src/image.h Sat May 31 06:21:09 2014 +0300 3.2 +++ b/liberebus/src/image.h Sun Jun 01 19:19:40 2014 +0300 3.3 @@ -27,6 +27,8 @@ 3.4 void set_pixels(int xsz, int ysz, T *pix); 3.5 T *get_pixels() const; 3.6 3.7 + void clear(); 3.8 + 3.9 bool load(const char *fname); 3.10 }; 3.11
4.1 --- a/liberebus/src/image.inl Sat May 31 06:21:09 2014 +0300 4.2 +++ b/liberebus/src/image.inl Sun Jun 01 19:19:40 2014 +0300 4.3 @@ -119,6 +119,12 @@ 4.4 } 4.5 4.6 template <typename T> 4.7 +void Image<T>::clear() 4.8 +{ 4.9 + memset(pixels, 0, width * height * 4 * sizeof(T)); 4.10 +} 4.11 + 4.12 +template <typename T> 4.13 inline bool load_image(Image<T> *img, const char *fname) 4.14 { 4.15 return false;
5.1 --- a/liberebus/src/rt.cc Sat May 31 06:21:09 2014 +0300 5.2 +++ b/liberebus/src/rt.cc Sun Jun 01 19:19:40 2014 +0300 5.3 @@ -28,11 +28,11 @@ 5.4 const Material *mtl = &obj->mtl; 5.5 const Reflectance *brdf = obj->brdf; 5.6 const Ray &ray = hit.world_ray; 5.7 - bool entering = true; 5.8 + //bool entering = true; 5.9 5.10 Vector3 norm = hit.calc_normal(); 5.11 if(dot_product(ray.dir, norm) > 0.0) { 5.12 - entering = false; 5.13 + //entering = false; 5.14 norm = -norm; 5.15 } 5.16
6.1 --- a/liberebus/src/threadpool.cc Sat May 31 06:21:09 2014 +0300 6.2 +++ b/liberebus/src/threadpool.cc Sun Jun 01 19:19:40 2014 +0300 6.3 @@ -1,59 +1,128 @@ 6.4 -#include "threadpool.h" 6.5 - 6.6 -ThreadPool::ThreadPool(int num_threads) 6.7 -{ 6.8 - quit = false; 6.9 - 6.10 - if(num_threads == -1) { 6.11 - num_threads = std::thread::hardware_concurrency(); 6.12 - } 6.13 - 6.14 - printf("creating thread pool with %d threads\n", num_threads); 6.15 - 6.16 - thread = new std::thread[num_threads]; 6.17 - for(int i=0; i<num_threads; i++) { 6.18 - thread[i] = std::thread(&ThreadPool::thread_func, this); 6.19 - } 6.20 - this->num_threads = num_threads; 6.21 -} 6.22 - 6.23 -ThreadPool::~ThreadPool() 6.24 -{ 6.25 - quit = true; 6.26 - condvar.notify_all(); 6.27 - 6.28 - printf("ThreadPool: waiting for %d worker threads to stop ", num_threads); 6.29 - fflush(stdout); 6.30 - for(int i=0; i<num_threads; i++) { 6.31 - thread[i].join(); 6.32 - putchar('.'); 6.33 - fflush(stdout); 6.34 - } 6.35 - putchar('\n'); 6.36 -} 6.37 - 6.38 -void ThreadPool::add_work(std::function<void ()> func) 6.39 -{ 6.40 - std::unique_lock<std::mutex> lock(workq_mutex); 6.41 - workq.push_back(func); 6.42 -} 6.43 - 6.44 -void ThreadPool::thread_func() 6.45 -{ 6.46 - std::unique_lock<std::mutex> lock(workq_mutex); 6.47 - for(;;) { 6.48 - if(quit) break; 6.49 - 6.50 - condvar.wait(lock); 6.51 - 6.52 - if(!quit && !workq.empty()) { 6.53 - std::function<void ()> work = workq.front(); 6.54 - workq.pop_front(); 6.55 - lock.unlock(); 6.56 - 6.57 - work(); 6.58 - 6.59 - lock.lock(); 6.60 - } 6.61 - } 6.62 -} 6.63 +#include <algorithm> 6.64 +#include <chrono> 6.65 +#include "threadpool.h" 6.66 + 6.67 +using namespace std::chrono; 6.68 + 6.69 +ThreadPool::ThreadPool(int num_threads) 6.70 +{ 6.71 + quit = false; 6.72 + qsize = 0; 6.73 + nactive = 0; 6.74 + 6.75 + if(num_threads == -1) { 6.76 + num_threads = std::thread::hardware_concurrency(); 6.77 + } 6.78 + 6.79 + printf("creating thread pool with %d threads\n", num_threads); 6.80 + 6.81 + thread = new std::thread[num_threads]; 6.82 + for(int i=0; i<num_threads; i++) { 6.83 + thread[i] = std::thread(&ThreadPool::thread_func, this); 6.84 + } 6.85 + this->num_threads = num_threads; 6.86 +} 6.87 + 6.88 +ThreadPool::~ThreadPool() 6.89 +{ 6.90 + quit = true; 6.91 + workq_condvar.notify_all(); 6.92 + 6.93 + printf("ThreadPool: waiting for %d worker threads to stop ", num_threads); 6.94 + fflush(stdout); 6.95 + for(int i=0; i<num_threads; i++) { 6.96 + thread[i].join(); 6.97 + putchar('.'); 6.98 + fflush(stdout); 6.99 + } 6.100 + putchar('\n'); 6.101 +} 6.102 + 6.103 +void ThreadPool::add_work(std::function<void ()> func) 6.104 +{ 6.105 + add_work(func, std::function<void ()>{}); 6.106 +} 6.107 + 6.108 +void ThreadPool::add_work(std::function<void ()> work_func, std::function<void ()> done_func) 6.109 +{ 6.110 + std::unique_lock<std::mutex> lock(workq_mutex); 6.111 + workq.push_back(WorkItem{work_func, done_func}); 6.112 + ++qsize; 6.113 + workq_condvar.notify_all(); 6.114 +} 6.115 + 6.116 +int ThreadPool::queued() const 6.117 +{ 6.118 + std::unique_lock<std::mutex> lock(workq_mutex); 6.119 + return qsize; 6.120 +} 6.121 + 6.122 +int ThreadPool::active() const 6.123 +{ 6.124 + std::unique_lock<std::mutex> lock(workq_mutex); 6.125 + return nactive; 6.126 +} 6.127 + 6.128 +int ThreadPool::pending() const 6.129 +{ 6.130 + std::unique_lock<std::mutex> lock(workq_mutex); 6.131 + return nactive + qsize; 6.132 +} 6.133 + 6.134 +long ThreadPool::wait() 6.135 +{ 6.136 + auto start_time = steady_clock::now(); 6.137 + 6.138 + std::unique_lock<std::mutex> lock(workq_mutex); 6.139 + done_condvar.wait(lock, [this](){ return nactive == 0 && workq.empty(); }); 6.140 + 6.141 + auto dur = steady_clock::now() - start_time; 6.142 + return duration_cast<milliseconds>(dur).count(); 6.143 +} 6.144 + 6.145 +long ThreadPool::wait(long timeout) 6.146 +{ 6.147 + auto start_time = steady_clock::now(); 6.148 + duration<long, std::milli> dur, timeout_dur(std::max(timeout, 5L)); 6.149 + 6.150 + std::unique_lock<std::mutex> lock(workq_mutex); 6.151 + while(timeout_dur.count() > 0 && (nactive > 0 || !workq.empty())) { 6.152 + if(done_condvar.wait_for(lock, timeout_dur) == std::cv_status::timeout) { 6.153 + break; 6.154 + } 6.155 + dur = duration_cast<milliseconds>(steady_clock::now() - start_time); 6.156 + timeout_dur = milliseconds(std::max(timeout, 5L)) - dur; 6.157 + } 6.158 + 6.159 + /*printf("waited for: %ld ms (%ld req) (na %d,qs %d,em %s)\n", dur.count(), timeout, 6.160 + nactive, qsize, workq.empty() ? "true" : "false");*/ 6.161 + return dur.count(); 6.162 +} 6.163 + 6.164 +void ThreadPool::thread_func() 6.165 +{ 6.166 + std::unique_lock<std::mutex> lock(workq_mutex); 6.167 + for(;;) { 6.168 + if(quit) break; 6.169 + 6.170 + workq_condvar.wait(lock); 6.171 + 6.172 + while(!quit && !workq.empty()) { 6.173 + WorkItem witem = workq.front(); 6.174 + workq.pop_front(); 6.175 + ++nactive; 6.176 + --qsize; 6.177 + lock.unlock(); 6.178 + 6.179 + witem.work(); 6.180 + if(witem.done) { 6.181 + witem.done(); 6.182 + } 6.183 + 6.184 + lock.lock(); 6.185 + --nactive; 6.186 + done_condvar.notify_all(); 6.187 + } 6.188 + } 6.189 +} 6.190 +
7.1 --- a/liberebus/src/threadpool.h Sat May 31 06:21:09 2014 +0300 7.2 +++ b/liberebus/src/threadpool.h Sun Jun 01 19:19:40 2014 +0300 7.3 @@ -1,30 +1,53 @@ 7.4 -#ifndef THREAD_POOL_H_ 7.5 -#define THREAD_POOL_H_ 7.6 - 7.7 -#include <list> 7.8 -#include <functional> 7.9 -#include <thread> 7.10 -#include <mutex> 7.11 -#include <condition_variable> 7.12 - 7.13 -class ThreadPool { 7.14 -private: 7.15 - int num_threads; 7.16 - std::thread *thread; // array of threads 7.17 - 7.18 - std::list<std::function<void ()>> workq; 7.19 - std::mutex workq_mutex; 7.20 - std::condition_variable condvar; 7.21 - 7.22 - bool quit; 7.23 - 7.24 - void thread_func(); 7.25 - 7.26 -public: 7.27 - ThreadPool(int num_threads = -1); 7.28 - ~ThreadPool(); 7.29 - 7.30 - void add_work(std::function<void ()> func); 7.31 -}; 7.32 - 7.33 -#endif // THREAD_POOL_H_ 7.34 \ No newline at end of file 7.35 +#ifndef THREAD_POOL_H_ 7.36 +#define THREAD_POOL_H_ 7.37 + 7.38 +#include <list> 7.39 +#include <functional> 7.40 +#include <thread> 7.41 +#include <mutex> 7.42 +#include <condition_variable> 7.43 + 7.44 +class ThreadPool { 7.45 +private: 7.46 + int num_threads; 7.47 + std::thread *thread; // array of threads 7.48 + 7.49 + struct WorkItem { 7.50 + std::function<void ()> work; 7.51 + std::function<void ()> done; 7.52 + }; 7.53 + 7.54 + int qsize; 7.55 + std::list<WorkItem> workq; 7.56 + mutable std::mutex workq_mutex; 7.57 + std::condition_variable workq_condvar; 7.58 + 7.59 + int nactive; // number of active workers (not sleeping) 7.60 + 7.61 + // condvar used by wait 7.62 + std::condition_variable done_condvar; 7.63 + 7.64 + bool quit; 7.65 + 7.66 + void thread_func(); 7.67 + 7.68 +public: 7.69 + ThreadPool(int num_threads = -1); 7.70 + ~ThreadPool(); 7.71 + 7.72 + void add_work(std::function<void ()> func); 7.73 + void add_work(std::function<void ()> work_func, std::function<void ()> done_func); 7.74 + 7.75 + // returns the number of queued work items 7.76 + int queued() const; 7.77 + // returns the number of active threads 7.78 + int active() const; 7.79 + // returns number of pending work items (both in the queue and active) 7.80 + int pending() const; 7.81 + 7.82 + // waits for all work to be completed 7.83 + long wait(); 7.84 + long wait(long timeout); 7.85 +}; 7.86 + 7.87 +#endif // THREAD_POOL_H_
8.1 --- a/src/main.cc Sat May 31 06:21:09 2014 +0300 8.2 +++ b/src/main.cc Sun Jun 01 19:19:40 2014 +0300 8.3 @@ -2,9 +2,12 @@ 8.4 #include <stdlib.h> 8.5 #include <assert.h> 8.6 #include <vector> 8.7 +#include <chrono> 8.8 #include "opengl.h" 8.9 #include "erebus.h" 8.10 8.11 +using namespace std::chrono; 8.12 + 8.13 #define SCALE 2 8.14 8.15 static bool init();