erebus

changeset 26:c8a6fb04fefa

multithreadededit
author John Tsiombikas <nuclear@member.fsf.org>
date Sun, 01 Jun 2014 19:19:40 +0300
parents 6ef4b10fa468
children 0ced900e15a7
files liberebus/src/erebus.cc liberebus/src/erebus_impl.h liberebus/src/image.h liberebus/src/image.inl liberebus/src/rt.cc liberebus/src/threadpool.cc liberebus/src/threadpool.h src/main.cc
diffstat 8 files changed, 259 insertions(+), 157 deletions(-) [+]
line diff
     1.1 --- a/liberebus/src/erebus.cc	Sat May 31 06:21:09 2014 +0300
     1.2 +++ b/liberebus/src/erebus.cc	Sun Jun 01 19:19:40 2014 +0300
     1.3 @@ -12,8 +12,7 @@
     1.4  
     1.5  using namespace std::chrono;
     1.6  
     1.7 -#define INVALID_RECT	Rect{0, 0, 0, 0}
     1.8 -
     1.9 +static void render_block(struct erebus *ctx, Block blk);
    1.10  static void render_pixel(struct erebus *ctx, int x, int y, int sample);
    1.11  
    1.12  static std::mt19937 rnd_gen;
    1.13 @@ -34,7 +33,7 @@
    1.14  
    1.15  	ctx->scn = 0;
    1.16  	ctx->cur_time = 0;
    1.17 -	ctx->cur_rect = INVALID_RECT;
    1.18 +	ctx->cur_frame = 0;
    1.19  
    1.20  	erb_setoptf(ctx, ERB_OPT_GAMMA, 2.2);
    1.21  	erb_setopti(ctx, ERB_OPT_MAX_ITER, 6);
    1.22 @@ -125,16 +124,24 @@
    1.23  void erb_begin_frame(struct erebus *ctx, long ms)
    1.24  {
    1.25  	printf("starting new frame...\n");
    1.26 +	++ctx->cur_frame;
    1.27 +	ctx->cur_sample = 0;
    1.28  	ctx->cur_time = ms;
    1.29  
    1.30  	int xsz = erb_getopti(ctx, ERB_OPT_WIDTH);
    1.31  	int ysz = erb_getopti(ctx, ERB_OPT_HEIGHT);
    1.32  
    1.33 -	ctx->fbimg.create(xsz, ysz);
    1.34 -	ctx->accum.create(xsz, ysz);
    1.35 +	if(!ctx->fbimg.get_pixels() || ctx->fbimg.get_width() != xsz || ctx->fbimg.get_height() < ysz) {
    1.36 +		ctx->fbimg.create(xsz, ysz);
    1.37 +		ctx->accum.create(xsz, ysz);
    1.38 +	} else {
    1.39 +		ctx->fbimg.clear();
    1.40 +		ctx->accum.clear();
    1.41 +	}
    1.42  
    1.43 -	ctx->cur_rect = INVALID_RECT;
    1.44  	ctx->inv_gamma = 1.0f / erb_getoptf(ctx, ERB_OPT_GAMMA);
    1.45 +
    1.46 +	ctx->scn->update(ctx->cur_time);
    1.47  }
    1.48  
    1.49  int erb_render(struct erebus *ctx, long timeout)
    1.50 @@ -142,56 +149,48 @@
    1.51  	return erb_render_rect(ctx, 0, 0, ctx->fbimg.get_width(), ctx->fbimg.get_height(), timeout);
    1.52  }
    1.53  
    1.54 +#define BLKSZ	32
    1.55 +
    1.56  int erb_render_rect(struct erebus *ctx, int x, int y, int width, int height, long timeout)
    1.57  {
    1.58 +	while(ctx->tpool.pending()) {
    1.59 +		if(timeout > 0) {
    1.60 +			long wait_interval = ctx->tpool.wait(timeout);
    1.61 +			timeout -= wait_interval;
    1.62 +		} else {
    1.63 +			return 1;
    1.64 +		}
    1.65 +	}
    1.66 +
    1.67  	if(!width || !height) return -1;
    1.68  
    1.69 -	Rect rect{x, y, width, height};
    1.70 -	if(ctx->cur_rect != rect) {
    1.71 -		// starting a new rendering apparently
    1.72 -		ctx->cur_rect = rect;
    1.73 -		ctx->cur_pixel_x = x;
    1.74 -		ctx->cur_pixel_y = y;
    1.75 -		ctx->cur_sample = 0;
    1.76 +	int startx = x;
    1.77 +	int endx = x + width;
    1.78 +	int endy = y + height;
    1.79 +
    1.80 +	while(y < endy) {
    1.81 +		x = startx;
    1.82 +		while(x < endx) {
    1.83 +			Block blk;
    1.84 +			blk.x = x;
    1.85 +			blk.y = y;
    1.86 +			blk.width = std::min(BLKSZ, endx - x);
    1.87 +			blk.height = std::min(BLKSZ, endy - y);
    1.88 +			blk.sample = ctx->cur_sample;
    1.89 +			blk.frame = ctx->cur_frame;
    1.90 +
    1.91 +			ctx->tpool.add_work(std::bind(render_block, ctx, blk));
    1.92 +
    1.93 +			x += BLKSZ;
    1.94 +		}
    1.95 +		y += BLKSZ;
    1.96  	}
    1.97  
    1.98 -	ctx->scn->update();
    1.99 +	++ctx->cur_sample;
   1.100 +	ctx->tpool.wait(timeout);	// wait for completion
   1.101 +	return ctx->cur_sample > erb_getopti(ctx, ERB_OPT_MAX_SAMPLES) ? 0 : 1;
   1.102 +}
   1.103  
   1.104 -	int max_samples = erb_getopti(ctx, ERB_OPT_MAX_SAMPLES);
   1.105 -
   1.106 -	if(timeout > 0) {
   1.107 -		auto start_time = steady_clock::now();
   1.108 -		while(duration_cast<milliseconds>(steady_clock::now() - start_time).count() < timeout) {
   1.109 -			render_pixel(ctx, ctx->cur_pixel_x, ctx->cur_pixel_y, ctx->cur_sample);
   1.110 -
   1.111 -			if(++ctx->cur_pixel_x >= ctx->cur_rect.width) {
   1.112 -				ctx->cur_pixel_x = ctx->cur_rect.x;
   1.113 -				if(++ctx->cur_pixel_y >= ctx->cur_rect.height) {
   1.114 -					ctx->cur_pixel_y = ctx->cur_rect.y;
   1.115 -					if(++ctx->cur_sample >= max_samples) {
   1.116 -						ctx->cur_rect = INVALID_RECT;
   1.117 -						return 0;
   1.118 -					}
   1.119 -				}
   1.120 -			}
   1.121 -		}
   1.122 -		return 1;
   1.123 -	}
   1.124 -
   1.125 -	if(max_samples == INF_SAMPLES) {
   1.126 -		// don't allow infinite samples when rendering non-progressively
   1.127 -		max_samples = 128;
   1.128 -	}
   1.129 -
   1.130 -	for(int i=0; i<height; i++) {
   1.131 -		for(int j=0; j<width; j++) {
   1.132 -			for(int k=0; k<max_samples; k++) {
   1.133 -				render_pixel(ctx, j, i, k);
   1.134 -			}
   1.135 -		}
   1.136 -	}
   1.137 -	return 0;
   1.138 -}
   1.139  
   1.140  int erb_get_progress(struct erebus *ctx)
   1.141  {
   1.142 @@ -335,6 +334,19 @@
   1.143  	return unirnd(rnd_gen);
   1.144  }
   1.145  
   1.146 +static void render_block(struct erebus *ctx, Block blk)
   1.147 +{
   1.148 +	if(blk.frame < ctx->cur_frame) {
   1.149 +		return;	// skip stale blocks
   1.150 +	}
   1.151 +
   1.152 +	for(int i=0; i<blk.height; i++) {
   1.153 +		for(int j=0; j<blk.width; j++) {
   1.154 +			render_pixel(ctx, blk.x + j, blk.y + i, blk.sample);
   1.155 +		}
   1.156 +	}
   1.157 +}
   1.158 +
   1.159  static void render_pixel(struct erebus *ctx, int x, int y, int sample)
   1.160  {
   1.161  	Camera *cam = ctx->scn->get_active_camera();
   1.162 @@ -360,14 +372,3 @@
   1.163  	pix[2] = pow(accum[2] * inv_samples, ctx->inv_gamma);
   1.164  	pix[3] = accum[3] * inv_samples;
   1.165  }
   1.166 -
   1.167 -bool Rect::operator ==(const Rect &r) const
   1.168 -{
   1.169 -	return memcmp(this, &r, sizeof r) == 0;
   1.170 -}
   1.171 -
   1.172 -bool Rect::operator !=(const Rect &r) const
   1.173 -{
   1.174 -	return memcmp(this, &r, sizeof r) != 0;
   1.175 -}
   1.176 -
     2.1 --- a/liberebus/src/erebus_impl.h	Sat May 31 06:21:09 2014 +0300
     2.2 +++ b/liberebus/src/erebus_impl.h	Sun Jun 01 19:19:40 2014 +0300
     2.3 @@ -15,11 +15,10 @@
     2.4  	Vector4 vval;
     2.5  };
     2.6  
     2.7 -struct Rect {
     2.8 +struct Block {
     2.9  	int x, y, width, height;
    2.10 -
    2.11 -	bool operator ==(const Rect &r) const;
    2.12 -	bool operator !=(const Rect &r) const;
    2.13 +	int sample;
    2.14 +	int frame;
    2.15  };
    2.16  
    2.17  struct erebus {
    2.18 @@ -34,9 +33,8 @@
    2.19  	// render state
    2.20  	float inv_gamma;
    2.21  	long cur_time;
    2.22 -	int cur_pixel_x, cur_pixel_y;
    2.23 -	Rect cur_rect;
    2.24  	int cur_sample;
    2.25 +	int cur_frame;
    2.26  
    2.27  	// interactive input
    2.28  	std::vector<bool> keystate;
     3.1 --- a/liberebus/src/image.h	Sat May 31 06:21:09 2014 +0300
     3.2 +++ b/liberebus/src/image.h	Sun Jun 01 19:19:40 2014 +0300
     3.3 @@ -27,6 +27,8 @@
     3.4  	void set_pixels(int xsz, int ysz, T *pix);
     3.5  	T *get_pixels() const;
     3.6  
     3.7 +	void clear();
     3.8 +
     3.9  	bool load(const char *fname);
    3.10  };
    3.11  
     4.1 --- a/liberebus/src/image.inl	Sat May 31 06:21:09 2014 +0300
     4.2 +++ b/liberebus/src/image.inl	Sun Jun 01 19:19:40 2014 +0300
     4.3 @@ -119,6 +119,12 @@
     4.4  }
     4.5  
     4.6  template <typename T>
     4.7 +void Image<T>::clear()
     4.8 +{
     4.9 +	memset(pixels, 0, width * height * 4 * sizeof(T));
    4.10 +}
    4.11 +
    4.12 +template <typename T>
    4.13  inline bool load_image(Image<T> *img, const char *fname)
    4.14  {
    4.15  	return false;
     5.1 --- a/liberebus/src/rt.cc	Sat May 31 06:21:09 2014 +0300
     5.2 +++ b/liberebus/src/rt.cc	Sun Jun 01 19:19:40 2014 +0300
     5.3 @@ -28,11 +28,11 @@
     5.4  	const Material *mtl = &obj->mtl;
     5.5  	const Reflectance *brdf = obj->brdf;
     5.6  	const Ray &ray = hit.world_ray;
     5.7 -	bool entering = true;
     5.8 +	//bool entering = true;
     5.9  
    5.10  	Vector3 norm = hit.calc_normal();
    5.11  	if(dot_product(ray.dir, norm) > 0.0) {
    5.12 -		entering = false;
    5.13 +		//entering = false;
    5.14  		norm = -norm;
    5.15  	}
    5.16  
     6.1 --- a/liberebus/src/threadpool.cc	Sat May 31 06:21:09 2014 +0300
     6.2 +++ b/liberebus/src/threadpool.cc	Sun Jun 01 19:19:40 2014 +0300
     6.3 @@ -1,59 +1,128 @@
     6.4 -#include "threadpool.h"
     6.5 -
     6.6 -ThreadPool::ThreadPool(int num_threads)
     6.7 -{
     6.8 -	quit = false;
     6.9 -
    6.10 -	if(num_threads == -1) {
    6.11 -		num_threads = std::thread::hardware_concurrency();
    6.12 -	}
    6.13 -
    6.14 -	printf("creating thread pool with %d threads\n", num_threads);
    6.15 -
    6.16 -	thread = new std::thread[num_threads];
    6.17 -	for(int i=0; i<num_threads; i++) {
    6.18 -		thread[i] = std::thread(&ThreadPool::thread_func, this);
    6.19 -	}
    6.20 -	this->num_threads = num_threads;
    6.21 -}
    6.22 -
    6.23 -ThreadPool::~ThreadPool()
    6.24 -{
    6.25 -	quit = true;
    6.26 -	condvar.notify_all();
    6.27 -
    6.28 -	printf("ThreadPool: waiting for %d worker threads to stop ", num_threads);
    6.29 -	fflush(stdout);
    6.30 -	for(int i=0; i<num_threads; i++) {
    6.31 -		thread[i].join();
    6.32 -		putchar('.');
    6.33 -		fflush(stdout);
    6.34 -	}
    6.35 -	putchar('\n');
    6.36 -}
    6.37 -
    6.38 -void ThreadPool::add_work(std::function<void ()> func)
    6.39 -{
    6.40 -	std::unique_lock<std::mutex> lock(workq_mutex);
    6.41 -	workq.push_back(func);
    6.42 -}
    6.43 -
    6.44 -void ThreadPool::thread_func()
    6.45 -{
    6.46 -	std::unique_lock<std::mutex> lock(workq_mutex);
    6.47 -	for(;;) {
    6.48 -		if(quit) break;
    6.49 -
    6.50 -		condvar.wait(lock);
    6.51 -
    6.52 -		if(!quit && !workq.empty()) {
    6.53 -			std::function<void ()> work = workq.front();
    6.54 -			workq.pop_front();
    6.55 -			lock.unlock();
    6.56 -
    6.57 -			work();
    6.58 -
    6.59 -			lock.lock();
    6.60 -		}
    6.61 -	}
    6.62 -}
    6.63 +#include <algorithm>
    6.64 +#include <chrono>
    6.65 +#include "threadpool.h"
    6.66 +
    6.67 +using namespace std::chrono;
    6.68 +
    6.69 +ThreadPool::ThreadPool(int num_threads)
    6.70 +{
    6.71 +	quit = false;
    6.72 +	qsize = 0;
    6.73 +	nactive = 0;
    6.74 +
    6.75 +	if(num_threads == -1) {
    6.76 +		num_threads = std::thread::hardware_concurrency();
    6.77 +	}
    6.78 +
    6.79 +	printf("creating thread pool with %d threads\n", num_threads);
    6.80 +
    6.81 +	thread = new std::thread[num_threads];
    6.82 +	for(int i=0; i<num_threads; i++) {
    6.83 +		thread[i] = std::thread(&ThreadPool::thread_func, this);
    6.84 +	}
    6.85 +	this->num_threads = num_threads;
    6.86 +}
    6.87 +
    6.88 +ThreadPool::~ThreadPool()
    6.89 +{
    6.90 +	quit = true;
    6.91 +	workq_condvar.notify_all();
    6.92 +
    6.93 +	printf("ThreadPool: waiting for %d worker threads to stop ", num_threads);
    6.94 +	fflush(stdout);
    6.95 +	for(int i=0; i<num_threads; i++) {
    6.96 +		thread[i].join();
    6.97 +		putchar('.');
    6.98 +		fflush(stdout);
    6.99 +	}
   6.100 +	putchar('\n');
   6.101 +}
   6.102 +
   6.103 +void ThreadPool::add_work(std::function<void ()> func)
   6.104 +{
   6.105 +	add_work(func, std::function<void ()>{});
   6.106 +}
   6.107 +
   6.108 +void ThreadPool::add_work(std::function<void ()> work_func, std::function<void ()> done_func)
   6.109 +{
   6.110 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.111 +	workq.push_back(WorkItem{work_func, done_func});
   6.112 +	++qsize;
   6.113 +	workq_condvar.notify_all();
   6.114 +}
   6.115 +
   6.116 +int ThreadPool::queued() const
   6.117 +{
   6.118 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.119 +	return qsize;
   6.120 +}
   6.121 +
   6.122 +int ThreadPool::active() const
   6.123 +{
   6.124 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.125 +	return nactive;
   6.126 +}
   6.127 +
   6.128 +int ThreadPool::pending() const
   6.129 +{
   6.130 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.131 +	return nactive + qsize;
   6.132 +}
   6.133 +
   6.134 +long ThreadPool::wait()
   6.135 +{
   6.136 +	auto start_time = steady_clock::now();
   6.137 +
   6.138 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.139 +	done_condvar.wait(lock, [this](){ return nactive == 0 && workq.empty(); });
   6.140 +
   6.141 +	auto dur = steady_clock::now() - start_time;
   6.142 +	return duration_cast<milliseconds>(dur).count();
   6.143 +}
   6.144 +
   6.145 +long ThreadPool::wait(long timeout)
   6.146 +{
   6.147 +	auto start_time = steady_clock::now();
   6.148 +	duration<long, std::milli> dur, timeout_dur(std::max(timeout, 5L));
   6.149 +
   6.150 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.151 +	while(timeout_dur.count() > 0 && (nactive > 0 || !workq.empty())) {
   6.152 +		if(done_condvar.wait_for(lock, timeout_dur) == std::cv_status::timeout) {
   6.153 +			break;
   6.154 +		}
   6.155 +		dur = duration_cast<milliseconds>(steady_clock::now() - start_time);
   6.156 +		timeout_dur = milliseconds(std::max(timeout, 5L)) - dur;
   6.157 +	}
   6.158 +
   6.159 +	/*printf("waited for: %ld ms (%ld req) (na %d,qs %d,em %s)\n", dur.count(), timeout,
   6.160 +			nactive, qsize, workq.empty() ? "true" : "false");*/
   6.161 +	return dur.count();
   6.162 +}
   6.163 +
   6.164 +void ThreadPool::thread_func()
   6.165 +{
   6.166 +	std::unique_lock<std::mutex> lock(workq_mutex);
   6.167 +	for(;;) {
   6.168 +		if(quit) break;
   6.169 +
   6.170 +		workq_condvar.wait(lock);
   6.171 +
   6.172 +		while(!quit && !workq.empty()) {
   6.173 +			WorkItem witem = workq.front();
   6.174 +			workq.pop_front();
   6.175 +			++nactive;
   6.176 +			--qsize;
   6.177 +			lock.unlock();
   6.178 +
   6.179 +			witem.work();
   6.180 +			if(witem.done) {
   6.181 +				witem.done();
   6.182 +			}
   6.183 +
   6.184 +			lock.lock();
   6.185 +			--nactive;
   6.186 +			done_condvar.notify_all();
   6.187 +		}
   6.188 +	}
   6.189 +}
   6.190 +
     7.1 --- a/liberebus/src/threadpool.h	Sat May 31 06:21:09 2014 +0300
     7.2 +++ b/liberebus/src/threadpool.h	Sun Jun 01 19:19:40 2014 +0300
     7.3 @@ -1,30 +1,53 @@
     7.4 -#ifndef THREAD_POOL_H_
     7.5 -#define THREAD_POOL_H_
     7.6 -
     7.7 -#include <list>
     7.8 -#include <functional>
     7.9 -#include <thread>
    7.10 -#include <mutex>
    7.11 -#include <condition_variable>
    7.12 -
    7.13 -class ThreadPool {
    7.14 -private:
    7.15 -	int num_threads;
    7.16 -	std::thread *thread;	// array of threads
    7.17 -
    7.18 -	std::list<std::function<void ()>> workq;
    7.19 -	std::mutex workq_mutex;
    7.20 -	std::condition_variable condvar;
    7.21 -
    7.22 -	bool quit;
    7.23 -
    7.24 -	void thread_func();
    7.25 -
    7.26 -public:
    7.27 -	ThreadPool(int num_threads = -1);
    7.28 -	~ThreadPool();
    7.29 -
    7.30 -	void add_work(std::function<void ()> func);
    7.31 -};
    7.32 -
    7.33 -#endif	// THREAD_POOL_H_
    7.34 \ No newline at end of file
    7.35 +#ifndef THREAD_POOL_H_
    7.36 +#define THREAD_POOL_H_
    7.37 +
    7.38 +#include <list>
    7.39 +#include <functional>
    7.40 +#include <thread>
    7.41 +#include <mutex>
    7.42 +#include <condition_variable>
    7.43 +
    7.44 +class ThreadPool {
    7.45 +private:
    7.46 +	int num_threads;
    7.47 +	std::thread *thread;	// array of threads
    7.48 +
    7.49 +	struct WorkItem {
    7.50 +		std::function<void ()> work;
    7.51 +		std::function<void ()> done;
    7.52 +	};
    7.53 +
    7.54 +	int qsize;
    7.55 +	std::list<WorkItem> workq;
    7.56 +	mutable std::mutex workq_mutex;
    7.57 +	std::condition_variable workq_condvar;
    7.58 +
    7.59 +	int nactive;	// number of active workers (not sleeping)
    7.60 +
    7.61 +	// condvar used by wait
    7.62 +	std::condition_variable done_condvar;
    7.63 +
    7.64 +	bool quit;
    7.65 +
    7.66 +	void thread_func();
    7.67 +
    7.68 +public:
    7.69 +	ThreadPool(int num_threads = -1);
    7.70 +	~ThreadPool();
    7.71 +
    7.72 +	void add_work(std::function<void ()> func);
    7.73 +	void add_work(std::function<void ()> work_func, std::function<void ()> done_func);
    7.74 +
    7.75 +	// returns the number of queued work items
    7.76 +	int queued() const;
    7.77 +	// returns the number of active threads
    7.78 +	int active() const;
    7.79 +	// returns number of pending work items (both in the queue and active)
    7.80 +	int pending() const;
    7.81 +
    7.82 +	// waits for all work to be completed
    7.83 +	long wait();
    7.84 +	long wait(long timeout);
    7.85 +};
    7.86 +
    7.87 +#endif	// THREAD_POOL_H_
     8.1 --- a/src/main.cc	Sat May 31 06:21:09 2014 +0300
     8.2 +++ b/src/main.cc	Sun Jun 01 19:19:40 2014 +0300
     8.3 @@ -2,9 +2,12 @@
     8.4  #include <stdlib.h>
     8.5  #include <assert.h>
     8.6  #include <vector>
     8.7 +#include <chrono>
     8.8  #include "opengl.h"
     8.9  #include "erebus.h"
    8.10  
    8.11 +using namespace std::chrono;
    8.12 +
    8.13  #define SCALE 2
    8.14  
    8.15  static bool init();