mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Merge branch 'new_presets'
Significant boost to either BDRate, speed or both for every preset.
This commit is contained in:
commit
5f5fffb8b5
|
@ -89,8 +89,8 @@ matrix:
|
|||
- env: VALGRIND_TEST="-p4 -r1 --owf=0 --threads=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
- env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
- env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
- env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
- env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --tiles-height-split=u2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
- env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
- env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3"
|
||||
|
||||
# Tests for rdoq, sao, deblock and signhide and subme.
|
||||
- env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3"
|
||||
|
|
499
src/cfg.c
499
src/cfg.c
|
@ -45,21 +45,22 @@ int kvz_config_init(kvz_config *cfg)
|
|||
cfg->framerate = 25; // deprecated and will be removed.
|
||||
cfg->framerate_num = 0;
|
||||
cfg->framerate_denom = 1;
|
||||
cfg->qp = 32;
|
||||
cfg->intra_period = 0;
|
||||
cfg->qp = 22;
|
||||
cfg->intra_period = 64;
|
||||
cfg->vps_period = 0;
|
||||
cfg->deblock_enable = 1;
|
||||
cfg->deblock_beta = 0;
|
||||
cfg->deblock_tc = 0;
|
||||
cfg->sao_enable = 1;
|
||||
cfg->rdoq_enable = 1;
|
||||
cfg->rdoq_skip = 1;
|
||||
cfg->signhide_enable = true;
|
||||
cfg->smp_enable = false;
|
||||
cfg->amp_enable = false;
|
||||
cfg->rdo = 1;
|
||||
cfg->mv_rdo = 0;
|
||||
cfg->full_intra_search = 0;
|
||||
cfg->trskip_enable = 1;
|
||||
cfg->trskip_enable = 0;
|
||||
cfg->tr_depth_intra = 0;
|
||||
cfg->ime_algorithm = 0; /* hexbs */
|
||||
cfg->fme_level = 4;
|
||||
|
@ -76,7 +77,8 @@ int kvz_config_init(kvz_config *cfg)
|
|||
cfg->aud_enable = 0;
|
||||
cfg->cqmfile = NULL;
|
||||
cfg->ref_frames = DEFAULT_REF_PIC_COUNT;
|
||||
cfg->gop_len = 0;
|
||||
cfg->gop_len = 4;
|
||||
cfg->gop_lowdelay = true;
|
||||
cfg->bipred = 0;
|
||||
cfg->target_bitrate = 0;
|
||||
cfg->hash = KVZ_HASH_CHECKSUM;
|
||||
|
@ -90,20 +92,20 @@ int kvz_config_init(kvz_config *cfg)
|
|||
cfg->tiles_width_split = NULL;
|
||||
cfg->tiles_height_split = NULL;
|
||||
|
||||
cfg->wpp = 0;
|
||||
cfg->wpp = 1;
|
||||
cfg->owf = -1;
|
||||
cfg->slice_count = 1;
|
||||
cfg->slice_addresses_in_ts = MALLOC(int32_t, 1);
|
||||
cfg->slice_addresses_in_ts[0] = 0;
|
||||
|
||||
cfg->threads = 0;
|
||||
cfg->threads = -1;
|
||||
cfg->cpuid = 1;
|
||||
|
||||
// Defaults for what sizes of PUs are tried.
|
||||
cfg->pu_depth_inter.min = 0; // 0-3
|
||||
cfg->pu_depth_inter.min = 2; // 0-3
|
||||
cfg->pu_depth_inter.max = 3; // 0-3
|
||||
cfg->pu_depth_intra.min = 1; // 0-4
|
||||
cfg->pu_depth_intra.max = 4; // 0-4
|
||||
cfg->pu_depth_intra.min = 2; // 0-4
|
||||
cfg->pu_depth_intra.max = 3; // 0-4
|
||||
|
||||
cfg->add_encoder_info = true;
|
||||
cfg->calc_psnr = true;
|
||||
|
@ -111,13 +113,14 @@ int kvz_config_init(kvz_config *cfg)
|
|||
cfg->mv_constraint = KVZ_MV_CONSTRAIN_NONE;
|
||||
cfg->crypto_features = KVZ_CRYPTO_OFF;
|
||||
|
||||
cfg->me_early_termination = 0;
|
||||
|
||||
cfg->rdoq_skip = 0;
|
||||
cfg->me_early_termination = 1;
|
||||
|
||||
cfg->input_format = KVZ_FORMAT_P420;
|
||||
cfg->input_bitdepth = 8;
|
||||
|
||||
cfg->gop_lp_definition.d = 3;
|
||||
cfg->gop_lp_definition.t = 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -309,195 +312,235 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
|||
|
||||
static const char * const me_early_termination_names[] = { "off", "on", "sensitive", NULL };
|
||||
|
||||
static const char * const preset_values[11][32] = {
|
||||
static const char * const preset_values[11][20*2] = {
|
||||
{
|
||||
"ultrafast",
|
||||
"pu-depth-intra", "2-3",
|
||||
"pu-depth-inter", "1-3",
|
||||
"pu-depth-inter", "2-3",
|
||||
"rd", "0",
|
||||
"me", "hexbs",
|
||||
"ref", "1",
|
||||
"deblock", "1",
|
||||
"deblock", "0:0",
|
||||
"signhide", "0",
|
||||
"subme", "0",
|
||||
"sao", "0",
|
||||
"rdoq", "0",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "sensitive",
|
||||
"gop", "lp-g4d3t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"superfast",
|
||||
"pu-depth-intra", "2-3",
|
||||
"pu-depth-inter", "2-3",
|
||||
"rd", "0",
|
||||
"me", "hexbs",
|
||||
"ref", "1",
|
||||
"deblock", "0:0",
|
||||
"signhide", "0",
|
||||
"subme", "0",
|
||||
"sao", "1",
|
||||
"rdoq", "0",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "sensitive",
|
||||
"gop", "lp-g4d3t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"veryfast",
|
||||
"pu-depth-intra", "2-3",
|
||||
"pu-depth-inter", "2-3",
|
||||
"rd", "0",
|
||||
"me", "hexbs",
|
||||
"ref", "1",
|
||||
"deblock", "0:0",
|
||||
"signhide", "0",
|
||||
"subme", "2",
|
||||
"sao", "1",
|
||||
"rdoq", "0",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "sensitive",
|
||||
"gop", "lp-g4d3t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"faster",
|
||||
"pu-depth-intra", "2-3",
|
||||
"pu-depth-inter", "1-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "1",
|
||||
"deblock", "0:0",
|
||||
"signhide", "0",
|
||||
"subme", "2",
|
||||
"sao", "1",
|
||||
"rdoq", "0",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "sensitive",
|
||||
"gop", "lp-g4d3t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"fast",
|
||||
"pu-depth-intra", "2-3",
|
||||
"pu-depth-inter", "1-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "1",
|
||||
"deblock", "0:0",
|
||||
"signhide", "0",
|
||||
"subme", "4",
|
||||
"sao", "1",
|
||||
"rdoq", "0",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "on",
|
||||
"gop", "lp-g4d3t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"medium",
|
||||
"pu-depth-intra", "1-3",
|
||||
"pu-depth-inter", "1-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "1",
|
||||
"deblock", "1",
|
||||
"deblock", "0:0",
|
||||
"signhide", "0",
|
||||
"subme", "0",
|
||||
"sao", "0",
|
||||
"rdoq", "0",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"veryfast",
|
||||
"pu-depth-intra", "1-3",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "2",
|
||||
"deblock", "1",
|
||||
"signhide", "0",
|
||||
"subme", "0",
|
||||
"sao", "0",
|
||||
"rdoq", "0",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"faster",
|
||||
"pu-depth-intra", "1-3",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "2",
|
||||
"deblock", "1",
|
||||
"signhide", "1",
|
||||
"subme", "0",
|
||||
"sao", "0",
|
||||
"rdoq", "0",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"fast",
|
||||
"pu-depth-intra", "1-3",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "2",
|
||||
"deblock", "1",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "0",
|
||||
"rdoq", "0",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"medium",
|
||||
"pu-depth-intra", "1-4",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "3",
|
||||
"deblock", "1",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "0",
|
||||
"rdoq", "0",
|
||||
"sao", "1",
|
||||
"rdoq", "1",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "on",
|
||||
"gop", "lp-g4d3t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"slow",
|
||||
"pu-depth-intra", "1-4",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "2",
|
||||
"pu-depth-intra", "1-3",
|
||||
"pu-depth-inter", "1-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "3",
|
||||
"deblock", "1",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "1",
|
||||
"rdoq", "0",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"slower",
|
||||
"pu-depth-intra", "1-4",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "2",
|
||||
"me", "tz",
|
||||
"ref", "4",
|
||||
"deblock", "1",
|
||||
"ref", "2",
|
||||
"deblock", "0:0",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "1",
|
||||
"rdoq", "1",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "on",
|
||||
"gop", "lp-g4d2t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"slower",
|
||||
"pu-depth-intra", "1-3",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "2",
|
||||
"deblock", "0:0",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "1",
|
||||
"rdoq", "1",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "on",
|
||||
"gop", "lp-g4d2t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"veryslow",
|
||||
"pu-depth-intra", "1-4",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "2",
|
||||
"me", "tz",
|
||||
"ref", "4",
|
||||
"deblock", "1",
|
||||
"rd", "1",
|
||||
"me", "hexbs",
|
||||
"ref", "3",
|
||||
"deblock", "0:0",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "1",
|
||||
"rdoq", "1",
|
||||
"transform-skip", "1",
|
||||
"rdoq-skip", "1",
|
||||
"transform-skip", "0",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "1",
|
||||
"mv-rdo", "0",
|
||||
"smp", "0",
|
||||
"amp", "0",
|
||||
"cu-split-termination", "zero",
|
||||
"me-early-termination", "on",
|
||||
"gop", "lp-g4d2t1",
|
||||
NULL
|
||||
},
|
||||
{
|
||||
"placebo",
|
||||
"pu-depth-intra", "0-4",
|
||||
"pu-depth-intra", "1-4",
|
||||
"pu-depth-inter", "0-3",
|
||||
"rd", "3",
|
||||
"rd", "1",
|
||||
"me", "tz",
|
||||
"ref", "6",
|
||||
"deblock", "1",
|
||||
"ref", "4",
|
||||
"deblock", "0:0",
|
||||
"signhide", "1",
|
||||
"subme", "4",
|
||||
"sao", "1",
|
||||
"rdoq", "1",
|
||||
"rdoq-skip", "0",
|
||||
"transform-skip", "1",
|
||||
"full-intra-search", "1",
|
||||
"full-intra-search", "0",
|
||||
"mv-rdo", "1",
|
||||
"smp", "1",
|
||||
"amp", "1",
|
||||
"cu-split-termination", "off",
|
||||
"me-early-termination", "off",
|
||||
"gop", "lp-g4d2t1",
|
||||
NULL
|
||||
},
|
||||
{ NULL }
|
||||
|
@ -683,105 +726,32 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
|||
struct {
|
||||
unsigned g; // length
|
||||
unsigned d; // depth
|
||||
unsigned r; // references
|
||||
unsigned t; // temporal
|
||||
} gop = { 0, 0, 0, 0 };
|
||||
} gop = { 0, 0, 0 };
|
||||
|
||||
if (sscanf(value, "lp-g%ud%ur%ut%u", &gop.g, &gop.d, &gop.r, &gop.t) != 4) {
|
||||
fprintf(stderr, "Error in GOP syntax. Example: lp-g8d4r2t2\n");
|
||||
// Parse --gop=lp-g#d#t#
|
||||
if (sscanf(value, "lp-g%ud%ut%u", &gop.g, &gop.d, &gop.t) != 3) {
|
||||
fprintf(stderr, "Error in GOP syntax. Example: lp-g8d4t2\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (gop.g < 1 || gop.g > 32) {
|
||||
fprintf(stderr, "gop.g must be between 1 and 32.\n");
|
||||
return 0;
|
||||
}
|
||||
if (gop.d < 1 || gop.d > 8) {
|
||||
fprintf(stderr, "gop.d must be between 1 and 8.\n");
|
||||
}
|
||||
if (gop.r < 1 || gop.r > 15) {
|
||||
fprintf(stderr, "gop.d must be between 1 and 15.\n");
|
||||
return 0;
|
||||
}
|
||||
if (gop.t < 1 || gop.t > 15) {
|
||||
fprintf(stderr, "gop.t must be between 1 and 32.\n");
|
||||
fprintf(stderr, "gop.t must be between 1 and 15.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize modulos for testing depth.
|
||||
// The picture belong to the lowest depth in which (poc % modulo) == 0.
|
||||
unsigned depth_modulos[8] = { 0 };
|
||||
for (int d = 0; d < gop.d; ++d) {
|
||||
depth_modulos[gop.d - 1 - d] = 1 << d;
|
||||
}
|
||||
depth_modulos[0] = gop.g;
|
||||
|
||||
cfg->gop_lowdelay = 1;
|
||||
cfg->gop_lowdelay = true;
|
||||
cfg->gop_len = gop.g;
|
||||
for (int g = 1; g <= gop.g; ++g) {
|
||||
kvz_gop_config *gop_pic = &cfg->gop[g - 1];
|
||||
|
||||
// Find gop depth for picture.
|
||||
int gop_layer = 0;
|
||||
while (gop_layer < gop.d && (g % depth_modulos[gop_layer])) {
|
||||
++gop_layer;
|
||||
}
|
||||
|
||||
gop_pic->poc_offset = g;
|
||||
gop_pic->layer = gop_layer + 1;
|
||||
gop_pic->qp_offset = gop_layer + 1;
|
||||
gop_pic->ref_pos_count = 0;
|
||||
gop_pic->ref_neg_count = gop.r;
|
||||
gop_pic->is_ref = 0;
|
||||
|
||||
// Set first ref to point to previous frame, and the rest to previous
|
||||
// key-frames.
|
||||
// If gop.t > 1, have (poc % gop.t) == 0 point gop.t frames away,
|
||||
// instead of the previous frame. Set the frames in between to
|
||||
// point to the nearest frame with a lower gop-depth.
|
||||
if (gop.t > 1) {
|
||||
if (gop_pic->poc_offset % gop.t == 0) {
|
||||
gop_pic->ref_neg[0] = gop.t;
|
||||
} else {
|
||||
int r = gop_pic->poc_offset - 1;
|
||||
while (r > 0) {
|
||||
if (cfg->gop[r].layer < gop_pic->layer) break;
|
||||
--r;
|
||||
}
|
||||
// Var r is now 0 or index of the pic with layer < depth.
|
||||
if (cfg->gop[r].layer < gop_pic->layer) {
|
||||
gop_pic->ref_neg[0] = gop_pic->poc_offset - cfg->gop[r].poc_offset;
|
||||
cfg->gop[r].is_ref = 1;
|
||||
} else {
|
||||
// No ref was found, just refer to the previous key-frame.
|
||||
gop_pic->ref_neg[0] = gop_pic->poc_offset % gop.g;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gop_pic->ref_neg[0] = 1;
|
||||
if (gop_pic->poc_offset >= 2) {
|
||||
cfg->gop[gop_pic->poc_offset - 2].is_ref = 1;
|
||||
}
|
||||
}
|
||||
|
||||
int keyframe = gop_pic->poc_offset;
|
||||
for (int i = 1; i < gop_pic->ref_neg_count; ++i) {
|
||||
while (keyframe == gop_pic->ref_neg[i - 1]) {
|
||||
keyframe += gop.g;
|
||||
}
|
||||
gop_pic->ref_neg[i] = keyframe;
|
||||
}
|
||||
|
||||
gop_pic->qp_factor = 0.4624; // from HM
|
||||
}
|
||||
|
||||
for (int g = 0; g < gop.g; ++g) {
|
||||
kvz_gop_config *gop_pic = &cfg->gop[g];
|
||||
if (!gop_pic->is_ref) {
|
||||
gop_pic->qp_factor = 0.68 * 1.31; // derived from HM
|
||||
}
|
||||
}
|
||||
|
||||
// Key-frame is always a reference.
|
||||
cfg->gop[gop.g - 1].is_ref = 1;
|
||||
cfg->gop[gop.g - 1].qp_factor = 0.578; // from HM
|
||||
cfg->gop_lp_definition.d = gop.d;
|
||||
cfg->gop_lp_definition.t = gop.t;
|
||||
} else if (atoi(value) == 8) {
|
||||
cfg->gop_lowdelay = 0;
|
||||
// GOP
|
||||
|
@ -821,10 +791,6 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
|||
fprintf(stderr, "Input error: unsupported gop length, must be 0 or 8\n");
|
||||
return 0;
|
||||
}
|
||||
if (cfg->gop_len && cfg->tmvp_enable) {
|
||||
cfg->tmvp_enable = false;
|
||||
fprintf(stderr, "Disabling TMVP because GOP is used.\n");
|
||||
}
|
||||
}
|
||||
else if OPT("bipred")
|
||||
cfg->bipred = atobool(value);
|
||||
|
@ -985,6 +951,97 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
|||
return 1;
|
||||
}
|
||||
|
||||
void kvz_config_process_lp_gop(kvz_config *cfg)
|
||||
{
|
||||
struct {
|
||||
unsigned g;
|
||||
unsigned d;
|
||||
unsigned t;
|
||||
} gop;
|
||||
|
||||
gop.g = cfg->gop_len;
|
||||
gop.d = cfg->gop_lp_definition.d;
|
||||
gop.t = cfg->gop_lp_definition.t;
|
||||
|
||||
// Initialize modulos for testing depth.
|
||||
// The picture belong to the lowest depth in which (poc % modulo) == 0.
|
||||
unsigned depth_modulos[8] = { 0 };
|
||||
for (int d = 0; d < gop.d; ++d) {
|
||||
depth_modulos[gop.d - 1 - d] = 1 << d;
|
||||
}
|
||||
depth_modulos[0] = gop.g;
|
||||
|
||||
cfg->gop_lowdelay = 1;
|
||||
cfg->gop_len = gop.g;
|
||||
for (int g = 1; g <= gop.g; ++g) {
|
||||
kvz_gop_config *gop_pic = &cfg->gop[g - 1];
|
||||
|
||||
// Find gop depth for picture.
|
||||
int gop_layer = 1;
|
||||
while (gop_layer < gop.d && (g % depth_modulos[gop_layer - 1])) {
|
||||
++gop_layer;
|
||||
}
|
||||
|
||||
gop_pic->poc_offset = g;
|
||||
gop_pic->layer = gop_layer;
|
||||
gop_pic->qp_offset = gop_layer;
|
||||
gop_pic->ref_pos_count = 0;
|
||||
gop_pic->ref_neg_count = cfg->ref_frames;
|
||||
gop_pic->is_ref = 0;
|
||||
|
||||
// Set first ref to point to previous frame, and the rest to previous
|
||||
// key-frames.
|
||||
// If gop.t > 1, have (poc % gop.t) == 0 point gop.t frames away,
|
||||
// instead of the previous frame. Set the frames in between to
|
||||
// point to the nearest frame with a lower gop-depth.
|
||||
if (gop.t > 1) {
|
||||
if (gop_pic->poc_offset % gop.t == 0) {
|
||||
gop_pic->ref_neg[0] = gop.t;
|
||||
} else {
|
||||
int r = gop_pic->poc_offset - 1;
|
||||
while (r > 0) {
|
||||
if (cfg->gop[r].layer < gop_pic->layer) break;
|
||||
--r;
|
||||
}
|
||||
// Var r is now 0 or index of the pic with layer < depth.
|
||||
if (cfg->gop[r].layer < gop_pic->layer) {
|
||||
gop_pic->ref_neg[0] = gop_pic->poc_offset - cfg->gop[r].poc_offset;
|
||||
cfg->gop[r].is_ref = 1;
|
||||
} else {
|
||||
// No ref was found, just refer to the previous key-frame.
|
||||
gop_pic->ref_neg[0] = gop_pic->poc_offset % gop.g;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gop_pic->ref_neg[0] = 1;
|
||||
if (gop_pic->poc_offset >= 2) {
|
||||
cfg->gop[gop_pic->poc_offset - 2].is_ref = 1;
|
||||
}
|
||||
}
|
||||
|
||||
int keyframe = gop_pic->poc_offset;
|
||||
for (int i = 1; i < gop_pic->ref_neg_count; ++i) {
|
||||
while (keyframe == gop_pic->ref_neg[i - 1]) {
|
||||
keyframe += gop.g;
|
||||
}
|
||||
gop_pic->ref_neg[i] = keyframe;
|
||||
}
|
||||
|
||||
gop_pic->qp_factor = 0.4624; // from HM
|
||||
}
|
||||
|
||||
for (int g = 0; g < gop.g; ++g) {
|
||||
kvz_gop_config *gop_pic = &cfg->gop[g];
|
||||
if (!gop_pic->is_ref) {
|
||||
gop_pic->qp_factor = 0.68 * 1.31; // derived from HM
|
||||
}
|
||||
}
|
||||
|
||||
// Key-frame is always a reference.
|
||||
cfg->gop[gop.g - 1].is_ref = 1;
|
||||
cfg->gop[gop.g - 1].qp_factor = 0.578; // from HM
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Check that configuration is sensible.
|
||||
*
|
||||
|
@ -1028,11 +1085,11 @@ int kvz_config_validate(const kvz_config *const cfg)
|
|||
error = 1;
|
||||
}
|
||||
|
||||
if (cfg->gop_len &&
|
||||
cfg->intra_period &&
|
||||
cfg->intra_period % cfg->gop_len != 0) {
|
||||
if (cfg->gop_len && cfg->intra_period && !cfg->gop_lowdelay &&
|
||||
cfg->intra_period % cfg->gop_len != 0)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Input error: intra period (%d) not a multiple of gop length (%d)\n",
|
||||
"Input error: intra period (%d) not a multiple of B-gop length (%d)\n",
|
||||
cfg->intra_period,
|
||||
cfg->gop_len);
|
||||
error = 1;
|
||||
|
|
|
@ -36,6 +36,7 @@ kvz_config *kvz_config_alloc(void);
|
|||
int kvz_config_init(kvz_config *cfg);
|
||||
int kvz_config_destroy(kvz_config *cfg);
|
||||
int kvz_config_parse(kvz_config *cfg, const char *name, const char *value);
|
||||
void kvz_config_process_lp_gop(kvz_config *cfg);
|
||||
int kvz_config_validate(const kvz_config *cfg);
|
||||
|
||||
#endif
|
||||
|
|
116
src/encoder.c
116
src/encoder.c
|
@ -24,6 +24,7 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
#include "cfg.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
|
||||
static int encoder_control_init_gop_layer_weights(encoder_control_t * const);
|
||||
|
@ -36,51 +37,90 @@ static int size_of_wpp_ends(int threads)
|
|||
|
||||
static int select_owf_auto(const kvz_config *const cfg)
|
||||
{
|
||||
if (cfg->wpp) {
|
||||
// If wpp is on, select owf such that less than 15% of the
|
||||
// frame is covered by the are threads can not work at the same time.
|
||||
if (cfg->intra_period == 1) {
|
||||
if (cfg->wpp) {
|
||||
// If wpp is on, select owf such that less than 15% of the
|
||||
// frame is covered by the are threads can not work at the same time.
|
||||
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
|
||||
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
|
||||
|
||||
// Find the largest number of threads per frame that satifies the
|
||||
// the condition: wpp start/stop inefficiency takes up less than 15%
|
||||
// of frame area.
|
||||
int threads_per_frame = 1;
|
||||
const int wpp_treshold = lcu_width * lcu_height * 15 / 100;
|
||||
while ((threads_per_frame + 1) * 2 < lcu_width &&
|
||||
threads_per_frame + 1 < lcu_height &&
|
||||
size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) {
|
||||
++threads_per_frame;
|
||||
}
|
||||
|
||||
const int threads = MAX(cfg->threads, 1);
|
||||
const int frames = CEILDIV(threads, threads_per_frame);
|
||||
|
||||
// Convert from number of parallel frames to number of additional frames.
|
||||
return CLIP(0, threads - 1, frames - 1);
|
||||
} else {
|
||||
// If wpp is not on, select owf such that there is enough
|
||||
// tiles for twice the number of threads.
|
||||
|
||||
int tiles_per_frame = cfg->tiles_width_count * cfg->tiles_height_count;
|
||||
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||
int frames = CEILDIV(threads * 4, tiles_per_frame);
|
||||
|
||||
// Limit number of frames to 1.25x the number of threads for the case
|
||||
// where there is only 1 tile per frame.
|
||||
frames = CLIP(1, threads * 4 / 3, frames);
|
||||
return frames - 1;
|
||||
}
|
||||
} else {
|
||||
// Try and estimate a good number of parallel frames for inter.
|
||||
const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH);
|
||||
const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH);
|
||||
int threads_per_frame = MIN(lcu_width / 2, lcu_height);
|
||||
int threads = cfg->threads;
|
||||
|
||||
// Find the largest number of threads per frame that satifies the
|
||||
// the condition: wpp start/stop inefficiency takes up less than 15%
|
||||
// of frame area.
|
||||
int threads_per_frame = 1;
|
||||
const int wpp_treshold = lcu_width * lcu_height * 15 / 100;
|
||||
while ((threads_per_frame + 1) * 2 < lcu_width &&
|
||||
threads_per_frame + 1 < lcu_height &&
|
||||
size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold)
|
||||
{
|
||||
++threads_per_frame;
|
||||
// If all threads fit into one frame, at least two parallel frames should
|
||||
// be used to reduce the effect of WPP spin-up and wind-down.
|
||||
int frames = 1;
|
||||
|
||||
while (threads > 0 && threads_per_frame > 0) {
|
||||
frames += 1;
|
||||
threads -= threads_per_frame;
|
||||
threads_per_frame -= 2;
|
||||
}
|
||||
|
||||
const int threads = MAX(cfg->threads, 1);
|
||||
const int frames = CEILDIV(threads, threads_per_frame);
|
||||
|
||||
// Convert from number of parallel frames to number of additional frames.
|
||||
return CLIP(0, threads - 1, frames - 1);
|
||||
} else {
|
||||
// If wpp is not on, select owf such that there is enough
|
||||
// tiles for twice the number of threads.
|
||||
|
||||
int tiles_per_frame= cfg->tiles_width_count * cfg->tiles_height_count;
|
||||
int threads = (cfg->threads > 1 ? cfg->threads : 1);
|
||||
int frames = CEILDIV(threads * 4, tiles_per_frame);
|
||||
|
||||
// Limit number of frames to 1.25x the number of threads for the case
|
||||
// where there is only 1 tile per frame.
|
||||
frames = CLIP(1, threads * 4 / 3, frames);
|
||||
return frames - 1;
|
||||
if (cfg->gop_lowdelay && cfg->gop_lp_definition.t > 1) {
|
||||
// Temporal skipping makes every other frame very fast to encode so
|
||||
// more parallel frames should be used.
|
||||
frames *= 2;
|
||||
}
|
||||
return CLIP(0, cfg->threads * 2 - 1, frames - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static unsigned cfg_num_threads(void)
|
||||
{
|
||||
unsigned cpus = kvz_g_hardware_flags.physical_cpu_count;
|
||||
unsigned fake_cpus = kvz_g_hardware_flags.logical_cpu_count - cpus;
|
||||
|
||||
// Default to 4 if we don't know the number of CPUs.
|
||||
if (cpus == 0) return 4;
|
||||
|
||||
// 1.5 times the number of physical cores seems to be a good compromise
|
||||
// when hyperthreading is available on Haswell.
|
||||
return cpus + fake_cpus / 2;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Allocate and initialize an encoder control structure.
|
||||
*
|
||||
* \param cfg encoder configuration
|
||||
* \return initialized encoder control or NULL on failure
|
||||
*/
|
||||
encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) {
|
||||
encoder_control_t* kvz_encoder_control_init(kvz_config *const cfg) {
|
||||
encoder_control_t *encoder = NULL;
|
||||
|
||||
if (!cfg) {
|
||||
|
@ -88,6 +128,20 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) {
|
|||
goto init_failed;
|
||||
}
|
||||
|
||||
if (cfg->threads == -1) {
|
||||
cfg->threads = cfg_num_threads();
|
||||
}
|
||||
|
||||
if (cfg->gop_len > 0) {
|
||||
if (cfg->tmvp_enable) {
|
||||
cfg->tmvp_enable = false;
|
||||
fprintf(stderr, "Disabling TMVP because GOP is used.\n");
|
||||
}
|
||||
if (cfg->gop_lowdelay) {
|
||||
kvz_config_process_lp_gop(cfg);
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that the parameters make sense.
|
||||
if (!kvz_config_validate(cfg)) {
|
||||
goto init_failed;
|
||||
|
|
|
@ -155,7 +155,7 @@ typedef struct encoder_control_t
|
|||
|
||||
} encoder_control_t;
|
||||
|
||||
encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);
|
||||
encoder_control_t* kvz_encoder_control_init(kvz_config *cfg);
|
||||
void kvz_encoder_control_free(encoder_control_t *encoder);
|
||||
|
||||
void kvz_encoder_control_input_init(encoder_control_t *encoder, int32_t width, int32_t height);
|
||||
|
|
|
@ -75,7 +75,9 @@ static kvz_encoder * kvazaar_open(const kvz_config *cfg)
|
|||
goto kvazaar_open_failure;
|
||||
}
|
||||
|
||||
encoder->control = kvz_encoder_control_init(cfg);
|
||||
// FIXME: const qualifier disgarded. I don't want to change kvazaar_open
|
||||
// but I really need to change cfg.
|
||||
encoder->control = kvz_encoder_control_init((kvz_config*)cfg);
|
||||
if (!encoder->control) {
|
||||
goto kvazaar_open_failure;
|
||||
}
|
||||
|
|
|
@ -312,6 +312,11 @@ typedef struct kvz_config
|
|||
|
||||
enum kvz_input_format input_format; /*!< \brief Use Temporal Motion Vector Predictors. */
|
||||
int32_t input_bitdepth; /*!< \brief Use Temporal Motion Vector Predictors. */
|
||||
|
||||
struct {
|
||||
unsigned d; // depth
|
||||
unsigned t; // temporal
|
||||
} gop_lp_definition;
|
||||
} kvz_config;
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,6 +24,15 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#elif MACOS
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
hardware_flags_t kvz_g_hardware_flags;
|
||||
hardware_flags_t kvz_g_strategies_in_use;
|
||||
hardware_flags_t kvz_g_strategies_available;
|
||||
|
@ -410,6 +419,7 @@ static void set_hardware_flags(int32_t cpuid) {
|
|||
CPUID1_EDX_MMX = 1 << 23,
|
||||
CPUID1_EDX_SSE = 1 << 25,
|
||||
CPUID1_EDX_SSE2 = 1 << 26,
|
||||
CPUID1_EDX_HYPER_THREADING = 1 << 28,
|
||||
};
|
||||
enum {
|
||||
CPUID1_ECX_SSE3 = 1 << 0,
|
||||
|
@ -431,6 +441,21 @@ static void set_hardware_flags(int32_t cpuid) {
|
|||
// Dig CPU features with cpuid
|
||||
get_cpuid(1, 0, &cpuid1);
|
||||
|
||||
#ifdef _WIN32
|
||||
SYSTEM_INFO systeminfo;
|
||||
GetSystemInfo(&systeminfo);
|
||||
|
||||
kvz_g_hardware_flags.logical_cpu_count = systeminfo.dwNumberOfProcessors;
|
||||
#else
|
||||
kvz_g_hardware_flags.logical_cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#endif
|
||||
|
||||
kvz_g_hardware_flags.physical_cpu_count = kvz_g_hardware_flags.logical_cpu_count;
|
||||
kvz_g_hardware_flags.intel_flags.hyper_threading = cpuid1.edx & CPUID1_EDX_HYPER_THREADING;
|
||||
if (kvz_g_hardware_flags.intel_flags.hyper_threading) {
|
||||
kvz_g_hardware_flags.physical_cpu_count /= 2;
|
||||
}
|
||||
|
||||
// EDX
|
||||
if (cpuid1.edx & CPUID1_EDX_MMX) kvz_g_hardware_flags.intel_flags.mmx = 1;
|
||||
if (cpuid1.edx & CPUID1_EDX_SSE) kvz_g_hardware_flags.intel_flags.sse = 1;
|
||||
|
|
|
@ -63,6 +63,8 @@ typedef struct {
|
|||
int sse42;
|
||||
int avx;
|
||||
int avx2;
|
||||
|
||||
bool hyper_threading;
|
||||
} intel_flags;
|
||||
|
||||
struct {
|
||||
|
@ -72,6 +74,9 @@ typedef struct {
|
|||
struct {
|
||||
int neon;
|
||||
} arm_flags;
|
||||
|
||||
int logical_cpu_count;
|
||||
int physical_cpu_count;
|
||||
} hardware_flags_t;
|
||||
|
||||
extern hardware_flags_t kvz_g_hardware_flags;
|
||||
|
|
Loading…
Reference in a new issue