mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
Add --owf=auto option.
- The optimal value for Overlapping Wave Front (OWF) depends on a bunch of variables. Attempt to set the optimal owf value, at least for all intra.
This commit is contained in:
parent
5a946f24ea
commit
3ef88dfda5
|
@ -433,6 +433,8 @@ static int config_parse(config *cfg, const char *name, const char *value)
|
||||||
if (cfg->owf < 0) {
|
if (cfg->owf < 0) {
|
||||||
fprintf(stderr, "--owf parameter smaller than 0, set to 0\n");
|
fprintf(stderr, "--owf parameter smaller than 0, set to 0\n");
|
||||||
cfg->owf = 0;
|
cfg->owf = 0;
|
||||||
|
} else if (cfg->owf == 0 && !strcmp(value, "auto")) {
|
||||||
|
cfg->owf = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if OPT("slice-addresses")
|
else if OPT("slice-addresses")
|
||||||
|
|
|
@ -159,7 +159,7 @@ int main(int argc, char *argv[])
|
||||||
"\n"
|
"\n"
|
||||||
" Wpp:\n"
|
" Wpp:\n"
|
||||||
" --wpp : Enable wavefront parallel processing\n"
|
" --wpp : Enable wavefront parallel processing\n"
|
||||||
" --owf <integer> : Enable parallel processing of multiple frames\n"
|
" --owf <integer>|auto : Number of parallel frames to process. 0 to disable.\n"
|
||||||
"\n"
|
"\n"
|
||||||
" Slices:\n"
|
" Slices:\n"
|
||||||
" --slice-addresses <string>|u<int>: \n"
|
" --slice-addresses <string>|u<int>: \n"
|
||||||
|
@ -191,6 +191,52 @@ int main(int argc, char *argv[])
|
||||||
strcpy(cfg->debug + left_len, dim_str);
|
strcpy(cfg->debug + left_len, dim_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cfg->owf == -1) {
|
||||||
|
if (cfg->wpp) {
|
||||||
|
// If --owf=auto and wpp is on, select owf according to the lesser dimension.
|
||||||
|
// An ok rule for all intra seems to be to always have at least 4 wpp
|
||||||
|
// streams per thread. For a single frame that would mean that all threads
|
||||||
|
// are working for at least half of the frame.
|
||||||
|
int lcu_width = (cfg->width + LCU_WIDTH - 1) / LCU_WIDTH;
|
||||||
|
int lcu_height = (cfg->height + LCU_WIDTH - 1) / LCU_WIDTH;
|
||||||
|
int min_dimension = MIN(lcu_width, lcu_height);
|
||||||
|
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||||
|
|
||||||
|
// Find the largest number of threads per frame that satifies the
|
||||||
|
// the condition that there are 4 wpp streams per thread.
|
||||||
|
int threads_per_frame = 1;
|
||||||
|
while (min_dimension / (threads_per_frame + 1) >= 4) {
|
||||||
|
++threads_per_frame;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get ceil(threads / threads_per_frame).
|
||||||
|
int frames = (threads + threads_per_frame - 1) / threads_per_frame;
|
||||||
|
cfg->owf = CLIP(0, threads - 1, frames - 1);
|
||||||
|
} else {
|
||||||
|
// If --owf=auto and wpp is not on, select owf such that there is enough
|
||||||
|
// tiles for twice the number of threads. That should make sure there are
|
||||||
|
// always some tiles to work on.
|
||||||
|
|
||||||
|
int tiles_per_frame = 1;
|
||||||
|
if (cfg->tiles_width_split != NULL) {
|
||||||
|
tiles_per_frame *= cfg->tiles_width_count;
|
||||||
|
}
|
||||||
|
if (cfg->tiles_height_split != NULL) {
|
||||||
|
tiles_per_frame *= cfg->tiles_height_count;
|
||||||
|
}
|
||||||
|
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||||
|
// Get ceil(threads * 2 / tiles_per_frame).
|
||||||
|
int frames = (threads * 2 + tiles_per_frame - 1) / tiles_per_frame;
|
||||||
|
|
||||||
|
// Limit number of frames to 1.25x the number of threads for the case
|
||||||
|
// where there is only 1 tile per frame.
|
||||||
|
frames = CLIP(1, threads * 4 / 3, frames);
|
||||||
|
cfg->owf = frames - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "--owf=auto value set to %d.\n", cfg->owf);
|
||||||
|
}
|
||||||
|
|
||||||
// Do more validation to make sure the parameters we have make sense.
|
// Do more validation to make sure the parameters we have make sense.
|
||||||
if (!config_validate(cfg)) {
|
if (!config_validate(cfg)) {
|
||||||
goto exit_failure;
|
goto exit_failure;
|
||||||
|
|
Loading…
Reference in a new issue