mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Rewrite owf=auto code to be more general.
- Change the definition to be a bit more general. The mapping from resolution to owf frames stays mostly the same however, but should handle weird resolutions better. - Move everything to config module. - Fix handling of tiles. It had a bug where owf for tiles was always threads * 4/3 - 1. Works as intended now.
This commit is contained in:
parent
33b32de6c9
commit
51b5692121
56
src/config.c
56
src/config.c
|
@ -602,3 +602,59 @@ int config_validate(config *cfg)
|
|||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int size_of_wpp_ends(int threads)
|
||||
{
|
||||
// Based on the shape of the area where all threads can't yet run in parallel.
|
||||
return 4 * threads * threads - 2 * threads;
|
||||
}
|
||||
|
||||
int config_set_owf_auto(config *cfg)
|
||||
{
|
||||
if (cfg->wpp) {
|
||||
// If wpp is on, select owf such that less than 15% of the
|
||||
// frame is covered by the are threads can not work at the same time.
|
||||
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
|
||||
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
|
||||
|
||||
// Find the largest number of threads per frame that satifies the
|
||||
// the condition: wpp start/stop inefficiency takes up less than 15%
|
||||
// of frame area.
|
||||
int threads_per_frame = 1;
|
||||
const int wpp_treshold = lcu_width * lcu_height * 15 / 100;
|
||||
while ((threads_per_frame + 1) * 2 < lcu_width &&
|
||||
threads_per_frame + 1 < lcu_height &&
|
||||
size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold)
|
||||
{
|
||||
++threads_per_frame;
|
||||
}
|
||||
|
||||
const int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||
const int frames = CEILDIV(threads, threads_per_frame);
|
||||
|
||||
// Convert from number of parallel frames to number of additional frames.
|
||||
cfg->owf = CLIP(0, threads - 1, frames - 1);
|
||||
} else {
|
||||
// If wpp is not on, select owf such that there is enough
|
||||
// tiles for twice the number of threads.
|
||||
|
||||
int tiles_per_frame = 1;
|
||||
if (cfg->tiles_width_count > 0) {
|
||||
tiles_per_frame *= cfg->tiles_width_count + 1;
|
||||
}
|
||||
if (cfg->tiles_height_count > 0) {
|
||||
tiles_per_frame *= cfg->tiles_height_count + 1;
|
||||
}
|
||||
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||
int frames = CEILDIV(threads * 2, tiles_per_frame);
|
||||
|
||||
// Limit number of frames to 1.25x the number of threads for the case
|
||||
// where there is only 1 tile per frame.
|
||||
frames = CLIP(1, threads * 4 / 3, frames);
|
||||
cfg->owf = frames - 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "--owf=auto value set to %d.\n", cfg->owf);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -89,5 +89,6 @@ int config_init(config *cfg);
|
|||
int config_destroy(config *cfg);
|
||||
int config_read(config *cfg,int argc, char *argv[]);
|
||||
int config_validate(config *cfg);
|
||||
int config_set_owf_auto(config *cfg);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -195,49 +195,9 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
if (cfg->owf == -1) {
|
||||
if (cfg->wpp) {
|
||||
// If --owf=auto and wpp is on, select owf according to the lesser dimension.
|
||||
// An ok rule for all intra seems to be to always have at least 4 wpp
|
||||
// streams per thread. For a single frame that would mean that all threads
|
||||
// are working for at least half of the frame.
|
||||
int lcu_width = (cfg->width + LCU_WIDTH - 1) / LCU_WIDTH;
|
||||
int lcu_height = (cfg->height + LCU_WIDTH - 1) / LCU_WIDTH;
|
||||
int min_dimension = MIN(lcu_width, lcu_height);
|
||||
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||
|
||||
// Find the largest number of threads per frame that satifies the
|
||||
// the condition that there are 4 wpp streams per thread.
|
||||
int threads_per_frame = 1;
|
||||
while (min_dimension / (threads_per_frame + 1) >= 4) {
|
||||
++threads_per_frame;
|
||||
if (!config_set_owf_auto(cfg)) {
|
||||
goto exit_failure;
|
||||
}
|
||||
|
||||
// Get ceil(threads / threads_per_frame).
|
||||
int frames = (threads + threads_per_frame - 1) / threads_per_frame;
|
||||
cfg->owf = CLIP(0, threads - 1, frames - 1);
|
||||
} else {
|
||||
// If --owf=auto and wpp is not on, select owf such that there is enough
|
||||
// tiles for twice the number of threads. That should make sure there are
|
||||
// always some tiles to work on.
|
||||
|
||||
int tiles_per_frame = 1;
|
||||
if (cfg->tiles_width_split != NULL) {
|
||||
tiles_per_frame *= cfg->tiles_width_count;
|
||||
}
|
||||
if (cfg->tiles_height_split != NULL) {
|
||||
tiles_per_frame *= cfg->tiles_height_count;
|
||||
}
|
||||
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||
// Get ceil(threads * 2 / tiles_per_frame).
|
||||
int frames = (threads * 2 + tiles_per_frame - 1) / tiles_per_frame;
|
||||
|
||||
// Limit number of frames to 1.25x the number of threads for the case
|
||||
// where there is only 1 tile per frame.
|
||||
frames = CLIP(1, threads * 4 / 3, frames);
|
||||
cfg->owf = frames - 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "--owf=auto value set to %d.\n", cfg->owf);
|
||||
}
|
||||
|
||||
// Do more validation to make sure the parameters we have make sense.
|
||||
|
|
|
@ -124,6 +124,7 @@ typedef int16_t coefficient;
|
|||
#define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth)
|
||||
#define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))
|
||||
#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2) + 2 * ((y_pu) % 2))
|
||||
#define CEILDIV(x,y) (((x) + (y) - 1) / (y))
|
||||
|
||||
#define LOG2_LCU_WIDTH 6
|
||||
// CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width
|
||||
|
|
Loading…
Reference in a new issue