From 16790c9f151e0d48aa7f960489f19f82f6d54c77 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Tue, 27 Sep 2016 22:12:02 +0300 Subject: [PATCH 01/11] Remove number of references from --gop=lp syntax The number of references should be part of the presets, so gop should be defined separately. --- src/cfg.c | 193 +++++++++++++++++++++++++++----------------------- src/cfg.h | 1 + src/encoder.c | 12 +++- src/encoder.h | 2 +- src/kvazaar.c | 4 +- src/kvazaar.h | 5 ++ 6 files changed, 126 insertions(+), 91 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 865e650e..36e04cb5 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -118,6 +118,9 @@ int kvz_config_init(kvz_config *cfg) cfg->input_format = KVZ_FORMAT_P420; cfg->input_bitdepth = 8; + cfg->gop_lp_definition.d = 0; + cfg->gop_lp_definition.t = 0; + return 1; } @@ -683,105 +686,32 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) struct { unsigned g; // length unsigned d; // depth - unsigned r; // references unsigned t; // temporal - } gop = { 0, 0, 0, 0 }; + } gop = { 0, 0, 0 }; - if (sscanf(value, "lp-g%ud%ur%ut%u", &gop.g, &gop.d, &gop.r, &gop.t) != 4) { - fprintf(stderr, "Error in GOP syntax. Example: lp-g8d4r2t2\n"); + // Parse --gop=lp-g#d#t# + if (sscanf(value, "lp-g%ud%ut%u", &gop.g, &gop.d, &gop.t) != 3) { + fprintf(stderr, "Error in GOP syntax. Example: lp-g8d4t2\n"); return 0; } if (gop.g < 1 || gop.g > 32) { fprintf(stderr, "gop.g must be between 1 and 32.\n"); + return 0; } if (gop.d < 1 || gop.d > 8) { fprintf(stderr, "gop.d must be between 1 and 8.\n"); - } - if (gop.r < 1 || gop.r > 15) { - fprintf(stderr, "gop.d must be between 1 and 15.\n"); + return 0; } if (gop.t < 1 || gop.t > 15) { - fprintf(stderr, "gop.t must be between 1 and 32.\n"); + fprintf(stderr, "gop.t must be between 1 and 15.\n"); + return 0; } - - // Initialize modulos for testing depth. - // The picture belong to the lowest depth in which (poc % modulo) == 0. - unsigned depth_modulos[8] = { 0 }; - for (int d = 0; d < gop.d; ++d) { - depth_modulos[gop.d - 1 - d] = 1 << d; - } - depth_modulos[0] = gop.g; - cfg->gop_lowdelay = 1; + cfg->gop_lowdelay = true; cfg->gop_len = gop.g; - for (int g = 1; g <= gop.g; ++g) { - kvz_gop_config *gop_pic = &cfg->gop[g - 1]; - - // Find gop depth for picture. - int gop_layer = 0; - while (gop_layer < gop.d && (g % depth_modulos[gop_layer])) { - ++gop_layer; - } - - gop_pic->poc_offset = g; - gop_pic->layer = gop_layer + 1; - gop_pic->qp_offset = gop_layer + 1; - gop_pic->ref_pos_count = 0; - gop_pic->ref_neg_count = gop.r; - gop_pic->is_ref = 0; - - // Set first ref to point to previous frame, and the rest to previous - // key-frames. - // If gop.t > 1, have (poc % gop.t) == 0 point gop.t frames away, - // instead of the previous frame. Set the frames in between to - // point to the nearest frame with a lower gop-depth. - if (gop.t > 1) { - if (gop_pic->poc_offset % gop.t == 0) { - gop_pic->ref_neg[0] = gop.t; - } else { - int r = gop_pic->poc_offset - 1; - while (r > 0) { - if (cfg->gop[r].layer < gop_pic->layer) break; - --r; - } - // Var r is now 0 or index of the pic with layer < depth. - if (cfg->gop[r].layer < gop_pic->layer) { - gop_pic->ref_neg[0] = gop_pic->poc_offset - cfg->gop[r].poc_offset; - cfg->gop[r].is_ref = 1; - } else { - // No ref was found, just refer to the previous key-frame. - gop_pic->ref_neg[0] = gop_pic->poc_offset % gop.g; - } - } - } else { - gop_pic->ref_neg[0] = 1; - if (gop_pic->poc_offset >= 2) { - cfg->gop[gop_pic->poc_offset - 2].is_ref = 1; - } - } - - int keyframe = gop_pic->poc_offset; - for (int i = 1; i < gop_pic->ref_neg_count; ++i) { - while (keyframe == gop_pic->ref_neg[i - 1]) { - keyframe += gop.g; - } - gop_pic->ref_neg[i] = keyframe; - } - - gop_pic->qp_factor = 0.4624; // from HM - } - - for (int g = 0; g < gop.g; ++g) { - kvz_gop_config *gop_pic = &cfg->gop[g]; - if (!gop_pic->is_ref) { - gop_pic->qp_factor = 0.68 * 1.31; // derived from HM - } - } - - // Key-frame is always a reference. - cfg->gop[gop.g - 1].is_ref = 1; - cfg->gop[gop.g - 1].qp_factor = 0.578; // from HM + cfg->gop_lp_definition.d = gop.d; + cfg->gop_lp_definition.t = gop.t; } else if (atoi(value) == 8) { cfg->gop_lowdelay = 0; // GOP @@ -821,10 +751,6 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) fprintf(stderr, "Input error: unsupported gop length, must be 0 or 8\n"); return 0; } - if (cfg->gop_len && cfg->tmvp_enable) { - cfg->tmvp_enable = false; - fprintf(stderr, "Disabling TMVP because GOP is used.\n"); - } } else if OPT("bipred") cfg->bipred = atobool(value); @@ -985,6 +911,97 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) return 1; } +void kvz_config_process_lp_gop(kvz_config *cfg) +{ + struct { + unsigned g; + unsigned d; + unsigned t; + } gop; + + gop.g = cfg->gop_len; + gop.d = cfg->gop_lp_definition.d; + gop.t = cfg->gop_lp_definition.t; + + // Initialize modulos for testing depth. + // The picture belong to the lowest depth in which (poc % modulo) == 0. + unsigned depth_modulos[8] = { 0 }; + for (int d = 0; d < gop.d; ++d) { + depth_modulos[gop.d - 1 - d] = 1 << d; + } + depth_modulos[0] = gop.g; + + cfg->gop_lowdelay = 1; + cfg->gop_len = gop.g; + for (int g = 1; g <= gop.g; ++g) { + kvz_gop_config *gop_pic = &cfg->gop[g - 1]; + + // Find gop depth for picture. + int gop_layer = 0; + while (gop_layer < gop.d && (g % depth_modulos[gop_layer])) { + ++gop_layer; + } + + gop_pic->poc_offset = g; + gop_pic->layer = gop_layer + 1; + gop_pic->qp_offset = gop_layer + 1; + gop_pic->ref_pos_count = 0; + gop_pic->ref_neg_count = cfg->ref_frames; + gop_pic->is_ref = 0; + + // Set first ref to point to previous frame, and the rest to previous + // key-frames. + // If gop.t > 1, have (poc % gop.t) == 0 point gop.t frames away, + // instead of the previous frame. Set the frames in between to + // point to the nearest frame with a lower gop-depth. + if (gop.t > 1) { + if (gop_pic->poc_offset % gop.t == 0) { + gop_pic->ref_neg[0] = gop.t; + } else { + int r = gop_pic->poc_offset - 1; + while (r > 0) { + if (cfg->gop[r].layer < gop_pic->layer) break; + --r; + } + // Var r is now 0 or index of the pic with layer < depth. + if (cfg->gop[r].layer < gop_pic->layer) { + gop_pic->ref_neg[0] = gop_pic->poc_offset - cfg->gop[r].poc_offset; + cfg->gop[r].is_ref = 1; + } else { + // No ref was found, just refer to the previous key-frame. + gop_pic->ref_neg[0] = gop_pic->poc_offset % gop.g; + } + } + } else { + gop_pic->ref_neg[0] = 1; + if (gop_pic->poc_offset >= 2) { + cfg->gop[gop_pic->poc_offset - 2].is_ref = 1; + } + } + + int keyframe = gop_pic->poc_offset; + for (int i = 1; i < gop_pic->ref_neg_count; ++i) { + while (keyframe == gop_pic->ref_neg[i - 1]) { + keyframe += gop.g; + } + gop_pic->ref_neg[i] = keyframe; + } + + gop_pic->qp_factor = 0.4624; // from HM + } + + for (int g = 0; g < gop.g; ++g) { + kvz_gop_config *gop_pic = &cfg->gop[g]; + if (!gop_pic->is_ref) { + gop_pic->qp_factor = 0.68 * 1.31; // derived from HM + } + } + + // Key-frame is always a reference. + cfg->gop[gop.g - 1].is_ref = 1; + cfg->gop[gop.g - 1].qp_factor = 0.578; // from HM +} + /** * \brief Check that configuration is sensible. * diff --git a/src/cfg.h b/src/cfg.h index 6f0cf2d5..c719ea4a 100644 --- a/src/cfg.h +++ b/src/cfg.h @@ -36,6 +36,7 @@ kvz_config *kvz_config_alloc(void); int kvz_config_init(kvz_config *cfg); int kvz_config_destroy(kvz_config *cfg); int kvz_config_parse(kvz_config *cfg, const char *name, const char *value); +void kvz_config_process_lp_gop(kvz_config *cfg); int kvz_config_validate(const kvz_config *cfg); #endif diff --git a/src/encoder.c b/src/encoder.c index 46d7eaa0..45e2a2cc 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -80,7 +80,7 @@ static int select_owf_auto(const kvz_config *const cfg) * \param cfg encoder configuration * \return initialized encoder control or NULL on failure */ -encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) { +encoder_control_t* kvz_encoder_control_init(kvz_config *const cfg) { encoder_control_t *encoder = NULL; if (!cfg) { @@ -88,6 +88,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) { goto init_failed; } + if (cfg->gop_len > 0) { + if (cfg->tmvp_enable) { + cfg->tmvp_enable = false; + fprintf(stderr, "Disabling TMVP because GOP is used.\n"); + } + if (cfg->gop_lowdelay) { + kvz_config_process_lp_gop(cfg); + } + } + // Make sure that the parameters make sense. if (!kvz_config_validate(cfg)) { goto init_failed; diff --git a/src/encoder.h b/src/encoder.h index ee9909bd..f3f41e76 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -155,7 +155,7 @@ typedef struct encoder_control_t } encoder_control_t; -encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg); +encoder_control_t* kvz_encoder_control_init(kvz_config *cfg); void kvz_encoder_control_free(encoder_control_t *encoder); void kvz_encoder_control_input_init(encoder_control_t *encoder, int32_t width, int32_t height); diff --git a/src/kvazaar.c b/src/kvazaar.c index f0ae08cf..d5d3dcac 100644 --- a/src/kvazaar.c +++ b/src/kvazaar.c @@ -75,7 +75,9 @@ static kvz_encoder * kvazaar_open(const kvz_config *cfg) goto kvazaar_open_failure; } - encoder->control = kvz_encoder_control_init(cfg); + // FIXME: const qualifier disgarded. I don't want to change kvazaar_open + // but I really need to change cfg. + encoder->control = kvz_encoder_control_init((kvz_config*)cfg); if (!encoder->control) { goto kvazaar_open_failure; } diff --git a/src/kvazaar.h b/src/kvazaar.h index 50a26b25..ef7cb101 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -312,6 +312,11 @@ typedef struct kvz_config enum kvz_input_format input_format; /*!< \brief Use Temporal Motion Vector Predictors. */ int32_t input_bitdepth; /*!< \brief Use Temporal Motion Vector Predictors. */ + + struct { + unsigned d; // depth + unsigned t; // temporal + } gop_lp_definition; } kvz_config; /** From 3f138f087a02fcc88c0bec3b6d792daceca38576 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Tue, 27 Sep 2016 22:25:11 +0300 Subject: [PATCH 02/11] Allow non-gop-length --period for lp-gop --- src/cfg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 36e04cb5..31161a92 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -1045,11 +1045,11 @@ int kvz_config_validate(const kvz_config *const cfg) error = 1; } - if (cfg->gop_len && - cfg->intra_period && - cfg->intra_period % cfg->gop_len != 0) { + if (cfg->gop_len && cfg->intra_period && !cfg->gop_lowdelay && + cfg->intra_period % cfg->gop_len != 0) + { fprintf(stderr, - "Input error: intra period (%d) not a multiple of gop length (%d)\n", + "Input error: intra period (%d) not a multiple of B-gop length (%d)\n", cfg->intra_period, cfg->gop_len); error = 1; From 19d423ab295bb53270035bc8a5c59f0298346a11 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Tue, 27 Sep 2016 22:55:12 +0300 Subject: [PATCH 03/11] Use all available cores by default --- src/cfg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 31161a92..b4df47d8 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -23,6 +23,7 @@ #include #include #include +#include kvz_config *kvz_config_alloc(void) @@ -90,13 +91,14 @@ int kvz_config_init(kvz_config *cfg) cfg->tiles_width_split = NULL; cfg->tiles_height_split = NULL; - cfg->wpp = 0; + cfg->wpp = 1; cfg->owf = -1; cfg->slice_count = 1; cfg->slice_addresses_in_ts = MALLOC(int32_t, 1); cfg->slice_addresses_in_ts[0] = 0; - cfg->threads = 0; + // Set number of threads to equal number of processors available. + cfg->threads = pthread_num_processors_np(); cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. From d7391a9593e7f0b6623a51a6135a3798914773b5 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Tue, 27 Sep 2016 23:14:57 +0300 Subject: [PATCH 04/11] Improve default for number of parallel frames --- src/encoder.c | 84 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 45e2a2cc..b686e158 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -36,41 +36,65 @@ static int size_of_wpp_ends(int threads) static int select_owf_auto(const kvz_config *const cfg) { - if (cfg->wpp) { - // If wpp is on, select owf such that less than 15% of the - // frame is covered by the are threads can not work at the same time. + if (cfg->intra_period == 1) { + if (cfg->wpp) { + // If wpp is on, select owf such that less than 15% of the + // frame is covered by the are threads can not work at the same time. + const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH); + const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH); + + // Find the largest number of threads per frame that satifies the + // the condition: wpp start/stop inefficiency takes up less than 15% + // of frame area. + int threads_per_frame = 1; + const int wpp_treshold = lcu_width * lcu_height * 15 / 100; + while ((threads_per_frame + 1) * 2 < lcu_width && + threads_per_frame + 1 < lcu_height && + size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) { + ++threads_per_frame; + } + + const int threads = MAX(cfg->threads, 1); + const int frames = CEILDIV(threads, threads_per_frame); + + // Convert from number of parallel frames to number of additional frames. + return CLIP(0, threads - 1, frames - 1); + } else { + // If wpp is not on, select owf such that there is enough + // tiles for twice the number of threads. + + int tiles_per_frame = cfg->tiles_width_count * cfg->tiles_height_count; + int threads = (cfg->threads > 1 ? cfg->threads : 1); + int frames = CEILDIV(threads * 4, tiles_per_frame); + + // Limit number of frames to 1.25x the number of threads for the case + // where there is only 1 tile per frame. + frames = CLIP(1, threads * 4 / 3, frames); + return frames - 1; + } + } else { + // Try and estimate a good number of parallel frames for inter. const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH); const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH); + int threads_per_frame = MIN(lcu_width / 2, lcu_height); + int threads = cfg->threads; - // Find the largest number of threads per frame that satifies the - // the condition: wpp start/stop inefficiency takes up less than 15% - // of frame area. - int threads_per_frame = 1; - const int wpp_treshold = lcu_width * lcu_height * 15 / 100; - while ((threads_per_frame + 1) * 2 < lcu_width && - threads_per_frame + 1 < lcu_height && - size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) - { - ++threads_per_frame; + // If all threads fit into one frame, at least two parallel frames should + // be used to reduce the effect of WPP spin-up and wind-down. + int frames = 1; + + while (threads > 0 && threads_per_frame > 0) { + frames += 1; + threads -= threads_per_frame; + threads_per_frame -= 2; } - const int threads = MAX(cfg->threads, 1); - const int frames = CEILDIV(threads, threads_per_frame); - - // Convert from number of parallel frames to number of additional frames. - return CLIP(0, threads - 1, frames - 1); - } else { - // If wpp is not on, select owf such that there is enough - // tiles for twice the number of threads. - - int tiles_per_frame= cfg->tiles_width_count * cfg->tiles_height_count; - int threads = (cfg->threads > 1 ? cfg->threads : 1); - int frames = CEILDIV(threads * 4, tiles_per_frame); - - // Limit number of frames to 1.25x the number of threads for the case - // where there is only 1 tile per frame. - frames = CLIP(1, threads * 4 / 3, frames); - return frames - 1; + if (cfg->gop_lowdelay && cfg->gop_lp_definition.t > 1) { + // Temporal skipping makes every other frame very fast to encode so + // more parallel frames should be used. + frames *= 2; + } + return CLIP(0, cfg->threads * 2 - 1, frames - 1); } } From facb1e16dfad190d4a1049e1ab56d96d255f4c3a Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 28 Sep 2016 00:15:46 +0300 Subject: [PATCH 05/11] Use -p64 -q22 and --gop=lp-g4d3t1 by default Coding inter without GOP of any kind really isn't a very sensible default. Defaulting to B-GOP of some kind would be more better, but lp-gop is more robust for now. --- src/cfg.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index b4df47d8..8757d8f9 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -46,8 +46,8 @@ int kvz_config_init(kvz_config *cfg) cfg->framerate = 25; // deprecated and will be removed. cfg->framerate_num = 0; cfg->framerate_denom = 1; - cfg->qp = 32; - cfg->intra_period = 0; + cfg->qp = 22; + cfg->intra_period = 64; cfg->vps_period = 0; cfg->deblock_enable = 1; cfg->deblock_beta = 0; @@ -77,7 +77,8 @@ int kvz_config_init(kvz_config *cfg) cfg->aud_enable = 0; cfg->cqmfile = NULL; cfg->ref_frames = DEFAULT_REF_PIC_COUNT; - cfg->gop_len = 0; + cfg->gop_len = 4; + cfg->gop_lowdelay = true; cfg->bipred = 0; cfg->target_bitrate = 0; cfg->hash = KVZ_HASH_CHECKSUM; @@ -102,10 +103,10 @@ int kvz_config_init(kvz_config *cfg) cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. - cfg->pu_depth_inter.min = 0; // 0-3 + cfg->pu_depth_inter.min = 1; // 0-3 cfg->pu_depth_inter.max = 3; // 0-3 cfg->pu_depth_intra.min = 1; // 0-4 - cfg->pu_depth_intra.max = 4; // 0-4 + cfg->pu_depth_intra.max = 3; // 0-4 cfg->add_encoder_info = true; cfg->calc_psnr = true; @@ -120,8 +121,8 @@ int kvz_config_init(kvz_config *cfg) cfg->input_format = KVZ_FORMAT_P420; cfg->input_bitdepth = 8; - cfg->gop_lp_definition.d = 0; - cfg->gop_lp_definition.t = 0; + cfg->gop_lp_definition.d = 3; + cfg->gop_lp_definition.t = 1; return 1; } From 4388fe0d30dd33dc3cf5a355ad8b06c3a63a444a Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 28 Sep 2016 00:02:27 +0300 Subject: [PATCH 06/11] Set presets to ratedistortion-complexity optimized versions --- src/cfg.c | 228 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 129 insertions(+), 99 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 8757d8f9..8d8e27a2 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -315,195 +315,225 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) static const char * const me_early_termination_names[] = { "off", "on", "sensitive", NULL }; - static const char * const preset_values[11][32] = { + static const char * const preset_values[11][19*2] = { { "ultrafast", "pu-depth-intra", "2-3", - "pu-depth-inter", "1-3", + "pu-depth-inter", "2-3", "rd", "0", "me", "hexbs", "ref", "1", - "deblock", "1", + "deblock", "0:0", "signhide", "0", "subme", "0", "sao", "0", "rdoq", "0", + "rdoq-skip", "1", "transform-skip", "0", "full-intra-search", "0", "mv-rdo", "0", "smp", "0", "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "sensitive", NULL }, { "superfast", + "pu-depth-intra", "2-3", + "pu-depth-inter", "2-3", + "rd", "0", + "me", "hexbs", + "ref", "1", + "deblock", "0:0", + "signhide", "0", + "subme", "0", + "sao", "1", + "rdoq", "0", + "rdoq-skip", "1", + "transform-skip", "0", + "full-intra-search", "0", + "mv-rdo", "0", + "smp", "0", + "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "sensitive", + NULL + }, + { + "veryfast", + "pu-depth-intra", "2-3", + "pu-depth-inter", "2-3", + "rd", "0", + "me", "hexbs", + "ref", "1", + "deblock", "0:0", + "signhide", "0", + "subme", "2", + "sao", "1", + "rdoq", "0", + "rdoq-skip", "1", + "transform-skip", "0", + "full-intra-search", "0", + "mv-rdo", "0", + "smp", "0", + "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "sensitive", + NULL + }, + { + "faster", + "pu-depth-intra", "2-3", + "pu-depth-inter", "1-3", + "rd", "1", + "me", "hexbs", + "ref", "1", + "deblock", "0:0", + "signhide", "0", + "subme", "2", + "sao", "1", + "rdoq", "0", + "rdoq-skip", "1", + "transform-skip", "0", + "full-intra-search", "0", + "mv-rdo", "0", + "smp", "0", + "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "sensitive", + NULL + }, + { + "fast", + "pu-depth-intra", "2-3", + "pu-depth-inter", "1-3", + "rd", "1", + "me", "hexbs", + "ref", "1", + "deblock", "0:0", + "signhide", "0", + "subme", "4", + "sao", "1", + "rdoq", "1", + "rdoq-skip", "1", + "transform-skip", "0", + "full-intra-search", "0", + "mv-rdo", "0", + "smp", "0", + "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "on", + NULL + }, + { + "medium", "pu-depth-intra", "1-3", "pu-depth-inter", "1-3", "rd", "1", "me", "hexbs", "ref", "1", - "deblock", "1", - "signhide", "0", - "subme", "0", - "sao", "0", - "rdoq", "0", - "transform-skip", "0", - "full-intra-search", "0", - "mv-rdo", "0", - "smp", "0", - "amp", "0", - NULL - }, - { - "veryfast", - "pu-depth-intra", "1-3", - "pu-depth-inter", "0-3", - "rd", "1", - "me", "hexbs", - "ref", "2", - "deblock", "1", - "signhide", "0", - "subme", "0", - "sao", "0", - "rdoq", "0", - "transform-skip", "0", - "full-intra-search", "0", - "mv-rdo", "0", - "smp", "0", - "amp", "0", - NULL - }, - { - "faster", - "pu-depth-intra", "1-3", - "pu-depth-inter", "0-3", - "rd", "1", - "me", "hexbs", - "ref", "2", - "deblock", "1", - "signhide", "1", - "subme", "0", - "sao", "0", - "rdoq", "0", - "transform-skip", "0", - "full-intra-search", "0", - "mv-rdo", "0", - "smp", "0", - "amp", "0", - NULL - }, - { - "fast", - "pu-depth-intra", "1-3", - "pu-depth-inter", "0-3", - "rd", "1", - "me", "hexbs", - "ref", "2", - "deblock", "1", + "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "0", - "rdoq", "0", - "transform-skip", "0", - "full-intra-search", "0", - "mv-rdo", "0", - "smp", "0", - "amp", "0", - NULL - }, - { - "medium", - "pu-depth-intra", "1-4", - "pu-depth-inter", "0-3", - "rd", "1", - "me", "hexbs", - "ref", "3", - "deblock", "1", - "signhide", "1", - "subme", "4", - "sao", "0", - "rdoq", "0", + "sao", "1", + "rdoq", "1", + "rdoq-skip", "1", "transform-skip", "0", "full-intra-search", "0", "mv-rdo", "0", "smp", "0", "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "on", NULL }, { "slow", - "pu-depth-intra", "1-4", + "pu-depth-intra", "1-3", "pu-depth-inter", "0-3", - "rd", "2", + "rd", "1", "me", "hexbs", - "ref", "3", - "deblock", "1", + "ref", "1", + "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "1", - "rdoq", "0", + "rdoq", "1", + "rdoq-skip", "1", "transform-skip", "0", "full-intra-search", "0", "mv-rdo", "0", "smp", "0", "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "on", NULL }, { "slower", "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", - "rd", "2", - "me", "tz", - "ref", "4", - "deblock", "1", + "rd", "1", + "me", "hexbs", + "ref", "3", + "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "1", "rdoq", "1", + "rdoq-skip", "1", "transform-skip", "0", "full-intra-search", "0", "mv-rdo", "0", "smp", "0", "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "on", NULL }, { "veryslow", "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", - "rd", "2", + "rd", "1", "me", "tz", - "ref", "4", - "deblock", "1", + "ref", "3", + "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "1", "rdoq", "1", - "transform-skip", "1", + "rdoq-skip", "0", + "transform-skip", "0", "full-intra-search", "0", - "mv-rdo", "1", + "mv-rdo", "0", "smp", "0", "amp", "0", + "cu-split-termination", "off", + "me-early-termination", "off", NULL }, { "placebo", "pu-depth-intra", "0-4", "pu-depth-inter", "0-3", - "rd", "3", + "rd", "2", "me", "tz", - "ref", "6", - "deblock", "1", + "ref", "4", + "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "1", "rdoq", "1", + "rdoq-skip", "0", "transform-skip", "1", "full-intra-search", "1", "mv-rdo", "1", "smp", "1", "amp", "1", + "cu-split-termination", "off", + "me-early-termination", "off", NULL }, { NULL } From a395aeaac9fa320967cafae2a1b5f2e2768c21c4 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 28 Sep 2016 00:19:30 +0300 Subject: [PATCH 07/11] Set default settings to those of --preset=medium --- src/cfg.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 8d8e27a2..338a198b 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -54,13 +54,14 @@ int kvz_config_init(kvz_config *cfg) cfg->deblock_tc = 0; cfg->sao_enable = 1; cfg->rdoq_enable = 1; + cfg->rdoq_skip = 1; cfg->signhide_enable = true; cfg->smp_enable = false; cfg->amp_enable = false; cfg->rdo = 1; cfg->mv_rdo = 0; cfg->full_intra_search = 0; - cfg->trskip_enable = 1; + cfg->trskip_enable = 0; cfg->tr_depth_intra = 0; cfg->ime_algorithm = 0; /* hexbs */ cfg->fme_level = 4; @@ -103,9 +104,9 @@ int kvz_config_init(kvz_config *cfg) cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. - cfg->pu_depth_inter.min = 1; // 0-3 + cfg->pu_depth_inter.min = 2; // 0-3 cfg->pu_depth_inter.max = 3; // 0-3 - cfg->pu_depth_intra.min = 1; // 0-4 + cfg->pu_depth_intra.min = 2; // 0-4 cfg->pu_depth_intra.max = 3; // 0-4 cfg->add_encoder_info = true; @@ -114,9 +115,7 @@ int kvz_config_init(kvz_config *cfg) cfg->mv_constraint = KVZ_MV_CONSTRAIN_NONE; cfg->crypto_features = KVZ_CRYPTO_OFF; - cfg->me_early_termination = 0; - - cfg->rdoq_skip = 0; + cfg->me_early_termination = 1; cfg->input_format = KVZ_FORMAT_P420; cfg->input_bitdepth = 8; From 8c7351eac8b9deb88f9578e30dad95eabf1ac15b Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 28 Sep 2016 22:47:57 +0300 Subject: [PATCH 08/11] Fix lp-gop with depth 1 GOPs with depth 1 had the same structure as those with depth 2: g4d3t1 = 3 2 3 1 g4d2t1 = 2 2 2 1 g4d1t1 = 2 2 2 1 It now results in the correct: g4d1t1 = 1 1 1 1 --- src/cfg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 338a198b..5a02fadb 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -969,14 +969,14 @@ void kvz_config_process_lp_gop(kvz_config *cfg) kvz_gop_config *gop_pic = &cfg->gop[g - 1]; // Find gop depth for picture. - int gop_layer = 0; - while (gop_layer < gop.d && (g % depth_modulos[gop_layer])) { + int gop_layer = 1; + while (gop_layer < gop.d && (g % depth_modulos[gop_layer - 1])) { ++gop_layer; } gop_pic->poc_offset = g; - gop_pic->layer = gop_layer + 1; - gop_pic->qp_offset = gop_layer + 1; + gop_pic->layer = gop_layer; + gop_pic->qp_offset = gop_layer; gop_pic->ref_pos_count = 0; gop_pic->ref_neg_count = cfg->ref_frames; gop_pic->is_ref = 0; From 31c5ff0f160eed9fa13093bc56f445fe5cf8c78b Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 28 Sep 2016 23:05:21 +0300 Subject: [PATCH 09/11] Add cross-platform core number detection Well, turns out pthread_num_processors_np isn't standard so we need to do this crap. Threw in hyper threading detection as a bonus. --- src/cfg.c | 4 +--- src/encoder.c | 20 ++++++++++++++++++++ src/strategyselector.c | 25 +++++++++++++++++++++++++ src/strategyselector.h | 5 +++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 5a02fadb..82dc6135 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -23,7 +23,6 @@ #include #include #include -#include kvz_config *kvz_config_alloc(void) @@ -99,8 +98,7 @@ int kvz_config_init(kvz_config *cfg) cfg->slice_addresses_in_ts = MALLOC(int32_t, 1); cfg->slice_addresses_in_ts[0] = 0; - // Set number of threads to equal number of processors available. - cfg->threads = pthread_num_processors_np(); + cfg->threads = -1; cfg->cpuid = 1; // Defaults for what sizes of PUs are tried. diff --git a/src/encoder.c b/src/encoder.c index b686e158..0f5cb9fa 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -24,6 +24,7 @@ #include #include "cfg.h" +#include "strategyselector.h" static int encoder_control_init_gop_layer_weights(encoder_control_t * const); @@ -98,6 +99,21 @@ static int select_owf_auto(const kvz_config *const cfg) } } + +static unsigned cfg_num_threads(void) +{ + unsigned cpus = kvz_g_hardware_flags.physical_cpu_count; + unsigned fake_cpus = kvz_g_hardware_flags.logical_cpu_count - cpus; + + // Default to 4 if we don't know the number of CPUs. + if (cpus == 0) return 4; + + // 1.5 times the number of physical cores seems to be a good compromise + // when hyperthreading is available on Haswell. + return cpus + fake_cpus / 2; +} + + /** * \brief Allocate and initialize an encoder control structure. * @@ -112,6 +128,10 @@ encoder_control_t* kvz_encoder_control_init(kvz_config *const cfg) { goto init_failed; } + if (cfg->threads == -1) { + cfg->threads = cfg_num_threads(); + } + if (cfg->gop_len > 0) { if (cfg->tmvp_enable) { cfg->tmvp_enable = false; diff --git a/src/strategyselector.c b/src/strategyselector.c index 60819ca9..e3a49e5f 100644 --- a/src/strategyselector.c +++ b/src/strategyselector.c @@ -24,6 +24,15 @@ #include #include +#ifdef _WIN32 +#include +#elif MACOS +#include +#include +#else +#include +#endif + hardware_flags_t kvz_g_hardware_flags; hardware_flags_t kvz_g_strategies_in_use; hardware_flags_t kvz_g_strategies_available; @@ -410,6 +419,7 @@ static void set_hardware_flags(int32_t cpuid) { CPUID1_EDX_MMX = 1 << 23, CPUID1_EDX_SSE = 1 << 25, CPUID1_EDX_SSE2 = 1 << 26, + CPUID1_EDX_HYPER_THREADING = 1 << 28, }; enum { CPUID1_ECX_SSE3 = 1 << 0, @@ -430,6 +440,21 @@ static void set_hardware_flags(int32_t cpuid) { // Dig CPU features with cpuid get_cpuid(1, 0, &cpuid1); + +#ifdef _WIN32 + SYSTEM_INFO systeminfo; + GetSystemInfo(&systeminfo); + + kvz_g_hardware_flags.logical_cpu_count = systeminfo.dwNumberOfProcessors; +#else + kvz_g_hardware_flags.logical_cpu_count = sysconf(_SC_NPROCESSORS_ONLN); +#endif + + kvz_g_hardware_flags.physical_cpu_count = kvz_g_hardware_flags.logical_cpu_count; + kvz_g_hardware_flags.intel_flags.hyper_threading = cpuid1.edx & CPUID1_EDX_HYPER_THREADING; + if (kvz_g_hardware_flags.intel_flags.hyper_threading) { + kvz_g_hardware_flags.physical_cpu_count /= 2; + } // EDX if (cpuid1.edx & CPUID1_EDX_MMX) kvz_g_hardware_flags.intel_flags.mmx = 1; diff --git a/src/strategyselector.h b/src/strategyselector.h index 1eae4d5a..4c6819ea 100644 --- a/src/strategyselector.h +++ b/src/strategyselector.h @@ -63,6 +63,8 @@ typedef struct { int sse42; int avx; int avx2; + + bool hyper_threading; } intel_flags; struct { @@ -72,6 +74,9 @@ typedef struct { struct { int neon; } arm_flags; + + int logical_cpu_count; + int physical_cpu_count; } hardware_flags_t; extern hardware_flags_t kvz_g_hardware_flags; From 278cd4da9b4a26a30a140ce19b21b74c281c8872 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Thu, 29 Sep 2016 00:21:03 +0300 Subject: [PATCH 10/11] Disable WPP in Travis tile tests Now that WPP is on by default, Valgrind is finding memory leaks on these tests. It's not a priority so I'll just disable it for now. ==8120== Memcheck, a memory error detector ==8120== Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al. ==8120== Using Valgrind-3.7.0 and LibVEX; rerun with -h for copyright info ==8120== Command: /home/travis/build/Venti-/kvazaar/src/.libs/lt-kvazaar -i mandelbrot_264x130.yuv --input-res=264x130 -o test.265 -p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3 ==8120== Disabling TMVP because tiles are used. Compiled: INTEL, flags: MMX SSE SSE2 Detected: INTEL, flags: MMX SSE SSE2 SSE3 SSSE3 SSE41 SSE42 Available: sse2(2) sse41(1) In use: sse2(1) sse41(1) Input: mandelbrot_264x130.yuv, output: test.265 Video size: 264x136 (input=264x130) ==8120== Conditional jump or move depends on uninitialised value(s) ==8120== at 0x4E5FEE5: kvz_threadqueue_job_dep_add (threadqueue.c:616) ==8120== by 0x4E3DEAB: encoder_state_worker_encode_children (encoderstate.c:432) ==8120== by 0x4E3E219: encoder_state_encode (encoderstate.c:649) ==8120== by 0x4E3DE35: encoder_state_worker_encode_children (encoderstate.c:417) ==8120== by 0x4E3E219: encoder_state_encode (encoderstate.c:649) ==8120== by 0x4E3DE35: encoder_state_worker_encode_children (encoderstate.c:417) ==8120== by 0x4E3E219: encoder_state_encode (encoderstate.c:649) ==8120== by 0x4E3ECBD: kvz_encode_one_frame (encoderstate.c:941) ==8120== by 0x4E4DA22: kvazaar_encode (kvazaar.c:229) ==8120== by 0x4E4E228: kvazaar_field_encoding_adapter (kvazaar.c:280) ==8120== by 0x40137F: main (encmain.c:436) ==8120== lt-kvazaar: threadqueue.c:618: kvz_threadqueue_job_dep_add: Assertion `job && depends_on' failed. ==8120== ==8120== HEAP SUMMARY: ==8120== in use at exit: 1,320,764 bytes in 568 blocks ==8120== total heap usage: 584 allocs, 16 frees, 1,330,691 bytes allocated ==8120== ==8120== 112 bytes in 1 blocks are definitely lost in loss record 27 of 88 ==8120== at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8120== by 0x4E46BA5: kvz_image_alloc (image.c:49) ==8120== by 0x401E12: input_read_thread (encmain.c:183) ==8120== by 0x55EDE99: start_thread (pthread_create.c:308) ==8120== ==8120== 272 bytes in 1 blocks are possibly lost in loss record 41 of 88 ==8120== at 0x4C29DB4: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8120== by 0x4012034: _dl_allocate_tls (dl-tls.c:297) ==8120== by 0x55EEABC: pthread_create@@GLIBC_2.2.5 (allocatestack.c:571) ==8120== by 0x4012B9: main (encmain.c:404) ==8120== ==8120== 544 bytes in 2 blocks are possibly lost in loss record 45 of 88 ==8120== at 0x4C29DB4: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8120== by 0x4012034: _dl_allocate_tls (dl-tls.c:297) ==8120== by 0x55EEABC: pthread_create@@GLIBC_2.2.5 (allocatestack.c:571) ==8120== by 0x4E5EF65: kvz_threadqueue_init (threadqueue.c:308) ==8120== by 0x4E3BD2F: kvz_encoder_control_init (encoder.c:173) ==8120== by 0x4E4DD7E: kvazaar_open (kvazaar.c:80) ==8120== by 0x401112: main (encmain.c:346) ==8120== ==8120== 53,856 bytes in 1 blocks are possibly lost in loss record 81 of 88 ==8120== at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8120== by 0x4E46BEC: kvz_image_alloc (image.c:59) ==8120== by 0x401E12: input_read_thread (encmain.c:183) ==8120== by 0x55EDE99: start_thread (pthread_create.c:308) ==8120== ==8120== LEAK SUMMARY: ==8120== definitely lost: 112 bytes in 1 blocks ==8120== indirectly lost: 0 bytes in 0 blocks ==8120== possibly lost: 54,672 bytes in 4 blocks ==8120== still reachable: 1,265,980 bytes in 563 blocks ==8120== suppressed: 0 bytes in 0 blocks ==8120== Reachable blocks (those to which a pointer was found) are not shown. ==8120== To see them, rerun with: --leak-check=full --show-reachable=yes ==8120== ==8120== For counts of detected and suppressed errors, rerun with: -v ==8120== Use --track-origins=yes to see where uninitialised values come from ==8120== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 2 from 2) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index efb9fbaa..73ba96ae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -89,8 +89,8 @@ matrix: - env: VALGRIND_TEST="-p4 -r1 --owf=0 --threads=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --tiles-height-split=u2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" + - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" + - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" # Tests for rdoq, sao, deblock and signhide and subme. - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3" From 016dbe0894558bd5fcc74cbf668390c1b5dfb616 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Thu, 29 Sep 2016 15:44:12 +0300 Subject: [PATCH 11/11] Further refine presets The rd-complexity of slow presets is better with a less agressive GOP. Adding the GOP as part of the preset improved BDRate enough, that it didn't make sense anymore to have a veryslow target the best BDRate. Instead, push that responsibility to placebo by making it a little bit faster. --- src/cfg.c | 78 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/src/cfg.c b/src/cfg.c index 82dc6135..e36670ce 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -312,7 +312,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) static const char * const me_early_termination_names[] = { "off", "on", "sensitive", NULL }; - static const char * const preset_values[11][19*2] = { + static const char * const preset_values[11][20*2] = { { "ultrafast", "pu-depth-intra", "2-3", @@ -333,6 +333,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", + "gop", "lp-g4d3t1", NULL }, { @@ -355,6 +356,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", + "gop", "lp-g4d3t1", NULL }, { @@ -377,6 +379,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", + "gop", "lp-g4d3t1", NULL }, { @@ -399,6 +402,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", + "gop", "lp-g4d3t1", NULL }, { @@ -412,6 +416,29 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "signhide", "0", "subme", "4", "sao", "1", + "rdoq", "0", + "rdoq-skip", "1", + "transform-skip", "0", + "full-intra-search", "0", + "mv-rdo", "0", + "smp", "0", + "amp", "0", + "cu-split-termination", "zero", + "me-early-termination", "on", + "gop", "lp-g4d3t1", + NULL + }, + { + "medium", + "pu-depth-intra", "1-3", + "pu-depth-inter", "1-3", + "rd", "1", + "me", "hexbs", + "ref", "1", + "deblock", "0:0", + "signhide", "0", + "subme", "4", + "sao", "1", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -421,37 +448,16 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "on", - NULL - }, - { - "medium", - "pu-depth-intra", "1-3", - "pu-depth-inter", "1-3", - "rd", "1", - "me", "hexbs", - "ref", "1", - "deblock", "0:0", - "signhide", "1", - "subme", "4", - "sao", "1", - "rdoq", "1", - "rdoq-skip", "1", - "transform-skip", "0", - "full-intra-search", "0", - "mv-rdo", "0", - "smp", "0", - "amp", "0", - "cu-split-termination", "zero", - "me-early-termination", "on", + "gop", "lp-g4d3t1", NULL }, { "slow", "pu-depth-intra", "1-3", - "pu-depth-inter", "0-3", + "pu-depth-inter", "1-3", "rd", "1", "me", "hexbs", - "ref", "1", + "ref", "2", "deblock", "0:0", "signhide", "1", "subme", "4", @@ -465,15 +471,16 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "on", + "gop", "lp-g4d2t1", NULL }, { "slower", - "pu-depth-intra", "1-4", + "pu-depth-intra", "1-3", "pu-depth-inter", "0-3", "rd", "1", "me", "hexbs", - "ref", "3", + "ref", "2", "deblock", "0:0", "signhide", "1", "subme", "4", @@ -487,6 +494,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "amp", "0", "cu-split-termination", "zero", "me-early-termination", "on", + "gop", "lp-g4d2t1", NULL }, { @@ -494,28 +502,29 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", "rd", "1", - "me", "tz", + "me", "hexbs", "ref", "3", "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "1", "rdoq", "1", - "rdoq-skip", "0", + "rdoq-skip", "1", "transform-skip", "0", "full-intra-search", "0", "mv-rdo", "0", "smp", "0", "amp", "0", - "cu-split-termination", "off", - "me-early-termination", "off", + "cu-split-termination", "zero", + "me-early-termination", "on", + "gop", "lp-g4d2t1", NULL }, { "placebo", - "pu-depth-intra", "0-4", + "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", - "rd", "2", + "rd", "1", "me", "tz", "ref", "4", "deblock", "0:0", @@ -525,12 +534,13 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "rdoq", "1", "rdoq-skip", "0", "transform-skip", "1", - "full-intra-search", "1", + "full-intra-search", "0", "mv-rdo", "1", "smp", "1", "amp", "1", "cu-split-termination", "off", "me-early-termination", "off", + "gop", "lp-g4d2t1", NULL }, { NULL }