mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Improve default for number of parallel frames
This commit is contained in:
parent
19d423ab29
commit
d7391a9593
|
@ -36,41 +36,65 @@ static int size_of_wpp_ends(int threads)
|
||||||
|
|
||||||
static int select_owf_auto(const kvz_config *const cfg)
|
static int select_owf_auto(const kvz_config *const cfg)
|
||||||
{
|
{
|
||||||
if (cfg->wpp) {
|
if (cfg->intra_period == 1) {
|
||||||
// If wpp is on, select owf such that less than 15% of the
|
if (cfg->wpp) {
|
||||||
// frame is covered by the are threads can not work at the same time.
|
// If wpp is on, select owf such that less than 15% of the
|
||||||
|
// frame is covered by the are threads can not work at the same time.
|
||||||
|
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
|
||||||
|
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
|
||||||
|
|
||||||
|
// Find the largest number of threads per frame that satifies the
|
||||||
|
// the condition: wpp start/stop inefficiency takes up less than 15%
|
||||||
|
// of frame area.
|
||||||
|
int threads_per_frame = 1;
|
||||||
|
const int wpp_treshold = lcu_width * lcu_height * 15 / 100;
|
||||||
|
while ((threads_per_frame + 1) * 2 < lcu_width &&
|
||||||
|
threads_per_frame + 1 < lcu_height &&
|
||||||
|
size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) {
|
||||||
|
++threads_per_frame;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int threads = MAX(cfg->threads, 1);
|
||||||
|
const int frames = CEILDIV(threads, threads_per_frame);
|
||||||
|
|
||||||
|
// Convert from number of parallel frames to number of additional frames.
|
||||||
|
return CLIP(0, threads - 1, frames - 1);
|
||||||
|
} else {
|
||||||
|
// If wpp is not on, select owf such that there is enough
|
||||||
|
// tiles for twice the number of threads.
|
||||||
|
|
||||||
|
int tiles_per_frame = cfg->tiles_width_count * cfg->tiles_height_count;
|
||||||
|
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||||
|
int frames = CEILDIV(threads * 4, tiles_per_frame);
|
||||||
|
|
||||||
|
// Limit number of frames to 1.25x the number of threads for the case
|
||||||
|
// where there is only 1 tile per frame.
|
||||||
|
frames = CLIP(1, threads * 4 / 3, frames);
|
||||||
|
return frames - 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Try and estimate a good number of parallel frames for inter.
|
||||||
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
|
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
|
||||||
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
|
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
|
||||||
|
int threads_per_frame = MIN(lcu_width / 2, lcu_height);
|
||||||
|
int threads = cfg->threads;
|
||||||
|
|
||||||
// Find the largest number of threads per frame that satifies the
|
// If all threads fit into one frame, at least two parallel frames should
|
||||||
// the condition: wpp start/stop inefficiency takes up less than 15%
|
// be used to reduce the effect of WPP spin-up and wind-down.
|
||||||
// of frame area.
|
int frames = 1;
|
||||||
int threads_per_frame = 1;
|
|
||||||
const int wpp_treshold = lcu_width * lcu_height * 15 / 100;
|
while (threads > 0 && threads_per_frame > 0) {
|
||||||
while ((threads_per_frame + 1) * 2 < lcu_width &&
|
frames += 1;
|
||||||
threads_per_frame + 1 < lcu_height &&
|
threads -= threads_per_frame;
|
||||||
size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold)
|
threads_per_frame -= 2;
|
||||||
{
|
|
||||||
++threads_per_frame;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const int threads = MAX(cfg->threads, 1);
|
if (cfg->gop_lowdelay && cfg->gop_lp_definition.t > 1) {
|
||||||
const int frames = CEILDIV(threads, threads_per_frame);
|
// Temporal skipping makes every other frame very fast to encode so
|
||||||
|
// more parallel frames should be used.
|
||||||
// Convert from number of parallel frames to number of additional frames.
|
frames *= 2;
|
||||||
return CLIP(0, threads - 1, frames - 1);
|
}
|
||||||
} else {
|
return CLIP(0, cfg->threads * 2 - 1, frames - 1);
|
||||||
// If wpp is not on, select owf such that there is enough
|
|
||||||
// tiles for twice the number of threads.
|
|
||||||
|
|
||||||
int tiles_per_frame= cfg->tiles_width_count * cfg->tiles_height_count;
|
|
||||||
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
|
||||||
int frames = CEILDIV(threads * 4, tiles_per_frame);
|
|
||||||
|
|
||||||
// Limit number of frames to 1.25x the number of threads for the case
|
|
||||||
// where there is only 1 tile per frame.
|
|
||||||
frames = CLIP(1, threads * 4 / 3, frames);
|
|
||||||
return frames - 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue