Improve default for number of parallel frames

This commit is contained in:
Ari Koivula 2016-09-27 23:14:57 +03:00
parent 19d423ab29
commit d7391a9593

View file

@ -36,6 +36,7 @@ static int size_of_wpp_ends(int threads)
static int select_owf_auto(const kvz_config *const cfg) static int select_owf_auto(const kvz_config *const cfg)
{ {
if (cfg->intra_period == 1) {
if (cfg->wpp) { if (cfg->wpp) {
// If wpp is on, select owf such that less than 15% of the // If wpp is on, select owf such that less than 15% of the
// frame is covered by the are threads can not work at the same time. // frame is covered by the are threads can not work at the same time.
@ -49,8 +50,7 @@ static int select_owf_auto(const kvz_config *const cfg)
const int wpp_treshold = lcu_width * lcu_height * 15 / 100; const int wpp_treshold = lcu_width * lcu_height * 15 / 100;
while ((threads_per_frame + 1) * 2 < lcu_width && while ((threads_per_frame + 1) * 2 < lcu_width &&
threads_per_frame + 1 < lcu_height && threads_per_frame + 1 < lcu_height &&
size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) {
{
++threads_per_frame; ++threads_per_frame;
} }
@ -72,6 +72,30 @@ static int select_owf_auto(const kvz_config *const cfg)
frames = CLIP(1, threads * 4 / 3, frames); frames = CLIP(1, threads * 4 / 3, frames);
return frames - 1; return frames - 1;
} }
} else {
// Try and estimate a good number of parallel frames for inter.
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
int threads_per_frame = MIN(lcu_width / 2, lcu_height);
int threads = cfg->threads;
// If all threads fit into one frame, at least two parallel frames should
// be used to reduce the effect of WPP spin-up and wind-down.
int frames = 1;
while (threads > 0 && threads_per_frame > 0) {
frames += 1;
threads -= threads_per_frame;
threads_per_frame -= 2;
}
if (cfg->gop_lowdelay && cfg->gop_lp_definition.t > 1) {
// Temporal skipping makes every other frame very fast to encode so
// more parallel frames should be used.
frames *= 2;
}
return CLIP(0, cfg->threads * 2 - 1, frames - 1);
}
} }
/** /**