diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj b/build/kvazaar_lib/kvazaar_lib.vcxproj index 67ee5ac4..de6bd428 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj @@ -267,6 +267,7 @@ + diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters index 06ee72a1..45e196d1 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters @@ -498,6 +498,9 @@ Optimization\strategies\avx2 + + Reconstruction + diff --git a/src/Makefile.am b/src/Makefile.am index 57e50e41..20db2fc0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -90,6 +90,7 @@ libkvazaar_la_SOURCES = \ kvazaar.c \ kvazaar_internal.h \ kvz_math.h \ + mip_data.h \ ml_intra_cu_depth_pred.c \ ml_intra_cu_depth_pred.h \ nal.c \ diff --git a/src/cabac.h b/src/cabac.h index 77d6251b..8489333c 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -107,6 +107,7 @@ typedef struct cabac_ctx_t sig_coeff_group_model[4]; cabac_ctx_t luma_planar_model[2]; cabac_ctx_t multi_ref_line[2]; + cabac_ctx_t mip_flag[4]; cabac_ctx_t bdpcm_mode[4]; cabac_ctx_t joint_cb_cr[3]; cabac_ctx_t transform_skip_model_luma; diff --git a/src/cfg.c b/src/cfg.c index 10946d28..2be8c8c6 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -203,6 +203,8 @@ int kvz_config_init(kvz_config *cfg) cfg->chroma_scale_out[2][0] = cfg->chroma_scale_in[2][0] = -1; cfg->mrl = false; + + cfg->mip = false; parse_qp_map(cfg, 0); @@ -1488,6 +1490,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) else if OPT("mrl") { cfg->mrl = atobool(value); } + else if OPT("mip") { + cfg->mip = atobool(value); + } else if OPT("jccr") { cfg->jccr = (bool)atobool(value); } diff --git a/src/cli.c b/src/cli.c index 9bacb135..baa5a07a 100644 --- a/src/cli.c +++ b/src/cli.c @@ -175,6 +175,8 @@ static const struct option long_options[] = { { "chroma-qp-out", required_argument, NULL, 0 }, { "mrl", no_argument, NULL, 0 }, { "no-mrl", no_argument, NULL, 0 }, + { "mip", no_argument, NULL, 0 }, + { "no-mip", no_argument, NULL, 0 }, { "jccr", no_argument, NULL, 0 }, { "no-jccr", no_argument, NULL, 0 }, { "amvr", no_argument, NULL, 0 }, @@ -626,6 +628,7 @@ void print_help(void) " --(no-)tmvp : Temporal motion vector prediction [enabled]\n" " --(no-)mrl : Enable use of multiple reference lines in intra\n" " predictions.\n" + " --(no-)mip : Enable matrix weighted intra prediction." " --mts : Multiple Transform Selection [off].\n" " (Currently only implemented for intra\n" " and has effect only when rd >= 2)\n" diff --git a/src/context.c b/src/context.c index b7b1142e..3e834744 100644 --- a/src/context.c +++ b/src/context.c @@ -93,6 +93,13 @@ static const uint8_t MULTI_REF_LINE_MODE[4][2] = { { 5, 8, }, }; +static const uint8_t MIP_FLAG[4][4] = { + { 56, 57, 50, 26, }, + { 41, 57, 58, 26, }, + { 33, 49, 50, 25, }, + { 9, 10, 9, 6, }, +}; + static const uint8_t INIT_INTRA_LUMA_MPM_FLAG[4] = { 44, 36, 45, 6 }; @@ -483,6 +490,10 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) kvz_ctx_init(&cabac->ctx.multi_ref_line[0], QP, MULTI_REF_LINE_MODE[slice][0], MULTI_REF_LINE_MODE[3][0]); kvz_ctx_init(&cabac->ctx.multi_ref_line[1], QP, MULTI_REF_LINE_MODE[slice][1], MULTI_REF_LINE_MODE[3][1]); + for (i = 0; i < 4; i++) { + kvz_ctx_init(&cabac->ctx.mip_flag[i], QP, MIP_FLAG[slice][i], MIP_FLAG[3][i]); + } + kvz_ctx_init(&cabac->ctx.chroma_pred_model, QP, INIT_CHROMA_PRED_MODE[slice], INIT_CHROMA_PRED_MODE[3]); kvz_ctx_init(&cabac->ctx.cclm_flag, QP, INIT_CCLM_FLAG[slice], INIT_CCLM_FLAG[3]); diff --git a/src/cu.h b/src/cu.h index be081f61..4be18926 100644 --- a/src/cu.h +++ b/src/cu.h @@ -169,6 +169,8 @@ typedef struct int8_t mode; int8_t mode_chroma; uint8_t multi_ref_idx; + int8_t mip_flag; + int8_t mip_is_transposed; } intra; struct { mv_t mv[2][2]; // \brief Motion vectors for L0 and L1 diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 26f65f06..21c368e0 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -854,6 +854,43 @@ static void encode_intra_coding_unit(encoder_state_t * const state, //isp_mode += ((height > TR_MAX_WIDTH) || !enough_samples) ? 2 : 0; bool allow_isp = enough_samples; + // Code MIP related bits + bool enable_mip = state->encoder_control->cfg.mip; + int8_t mip_flag = enable_mip ? cur_cu->intra.mip_flag : false; + bool mip_transpose = enable_mip ? cur_cu->intra.mip_is_transposed : false; + int8_t mip_mode = enable_mip ? cur_cu->intra.mode : 0; + uint8_t num_mip_modes; + + // Number of MIP modes for this block + if (width == 4 && height == 4) { + num_mip_modes = 16; + } + else if (width == 4 || height == 4 || (width == 8 && height == 8)) { + num_mip_modes = 8; + } + else { + num_mip_modes = 6; + } + + if (mip_flag) { + assert(mip_mode >= 0 && mip_mode < num_mip_modes && "Invalid MIP mode."); + } + + if (cur_cu->type == CU_INTRA && !cur_cu->bdpcmMode && enable_mip) { + const int cu_width = LCU_WIDTH >> depth; + const int cu_height = cu_width; // TODO: height for non-square blocks + uint8_t ctx_id = kvz_get_mip_flag_context(x, y, cu_width, cu_height, NULL, frame->cu_array); + + // Write MIP flag + cabac->cur_ctx = &(cabac->ctx.mip_flag[ctx_id]); + CABAC_BIN(cabac, mip_flag, "mip_flag"); + if (mip_flag) { + // Write MIP transpose flag & mode + CABAC_BIN_EP(cabac, mip_transpose, "mip_transposed"); + kvz_cabac_encode_trunc_bin(cabac, mip_mode, num_mip_modes); + } + } + // Code MRL related bits bool enable_mrl = state->encoder_control->cfg.mrl; int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0; @@ -862,7 +899,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state, if(multi_ref_idx) DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_MRL, x, y, width, width, multi_ref_idx); #endif - if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl) { + if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl && !mip_flag) { if (MAX_REF_LINE_IDX > 1) { cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]); CABAC_BIN(cabac, multi_ref_idx != 0, "multi_ref_line"); @@ -875,7 +912,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state, // ToDo: update real usage, these if clauses as such don't make any sense - if (isp_mode != 0 && multi_ref_idx == 0) { + if (isp_mode != 0 && multi_ref_idx == 0 && !mip_flag) { if (isp_mode) { cabac->cur_ctx = &(cabac->ctx.intra_subpart_model[0]); CABAC_BIN(cabac, 0, "intra_subPartitions"); @@ -890,121 +927,129 @@ static void encode_intra_coding_unit(encoder_state_t * const state, } } - // PREDINFO CODING - // If intra prediction mode is found from the predictors, - // it can be signaled with two EP's. Otherwise we can send - // 5 EP bins with the full predmode - // ToDo: fix comments for VVC const int cu_width = LCU_WIDTH >> depth; - - cabac->cur_ctx = &(cabac->ctx.intra_luma_mpm_flag_model); - for (int j = 0; j < num_pred_units; ++j) { - const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, j); - const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, j); - const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); - - const cu_info_t *left_pu = NULL; - const cu_info_t *above_pu = NULL; - - if (pu_x > 0) { - assert(pu_x >> 2 > 0); - left_pu = kvz_cu_array_at_const(frame->cu_array, pu_x - 1, pu_y + cu_width - 1); - } - // Don't take the above PU across the LCU boundary. - if (pu_y % LCU_WIDTH > 0 && pu_y > 0) { - assert(pu_y >> 2 > 0); - above_pu = kvz_cu_array_at_const(frame->cu_array, pu_x + cu_width - 1, pu_y - 1); - } - - - kvz_intra_get_dir_luma_predictor(pu_x, pu_y, - intra_preds[j], - cur_pu, - left_pu, above_pu); - - - intra_pred_mode_actual[j] = cur_pu->intra.mode; - - for (int i = 0; i < INTRA_MPM_COUNT; i++) { - if (intra_preds[j][i] == intra_pred_mode[j]) { - mpm_preds[j] = (int8_t)i; - break; - } - } - // Is the mode in the MPM array or not - flag[j] = (mpm_preds[j] == -1) ? 0 : 1; - if (!(cur_pu->intra.multi_ref_idx || (isp_mode))) { - CABAC_BIN(cabac, flag[j], "prev_intra_luma_pred_flag"); - } - } - - for (int j = 0; j < num_pred_units; ++j) { - // Signal index of the prediction mode in the prediction list, if it is there - if (flag[j]) { - + // If MIP is used, skip writing normal intra modes + if (!mip_flag) { + // PREDINFO CODING + // If intra prediction mode is found from the predictors, + // it can be signaled with two EP's. Otherwise we can send + // 5 EP bins with the full predmode + // ToDo: fix comments for VVC + + cabac->cur_ctx = &(cabac->ctx.intra_luma_mpm_flag_model); + for (int j = 0; j < num_pred_units; ++j) { const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, j); const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, j); - const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); - cabac->cur_ctx = &(cabac->ctx.luma_planar_model[(isp_mode ? 0 : 1)]); - if (cur_pu->intra.multi_ref_idx == 0) { - CABAC_BIN(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx_luma_planar"); - } - //CABAC_BIN_EP(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx"); - if (mpm_preds[j] > 0) { - CABAC_BIN_EP(cabac, (mpm_preds[j] > 1 ? 1 : 0), "mpm_idx"); - } - if (mpm_preds[j] > 1) { - CABAC_BIN_EP(cabac, (mpm_preds[j] > 2 ? 1 : 0), "mpm_idx"); - } - if (mpm_preds[j] > 2) { - CABAC_BIN_EP(cabac, (mpm_preds[j] > 3 ? 1 : 0), "mpm_idx"); - } - if (mpm_preds[j] > 3) { - CABAC_BIN_EP(cabac, (mpm_preds[j] > 4 ? 1 : 0), "mpm_idx"); - } - } else { - // Signal the actual prediction mode. - int32_t tmp_pred = intra_pred_mode[j]; + const cu_info_t* cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); - uint8_t intra_preds_temp[INTRA_MPM_COUNT+2]; - memcpy(intra_preds_temp, intra_preds[j], sizeof(int8_t)*3); - memcpy(intra_preds_temp+4, &intra_preds[j][3], sizeof(int8_t)*3); - intra_preds_temp[3] = 255; - intra_preds_temp[7] = 255; + const cu_info_t* left_pu = NULL; + const cu_info_t* above_pu = NULL; - // Improvised merge sort - // Sort prediction list from lowest to highest. - if (intra_preds_temp[0] > intra_preds_temp[1]) SWAP(intra_preds_temp[0], intra_preds_temp[1], uint8_t); - if (intra_preds_temp[0] > intra_preds_temp[2]) SWAP(intra_preds_temp[0], intra_preds_temp[2], uint8_t); - if (intra_preds_temp[1] > intra_preds_temp[2]) SWAP(intra_preds_temp[1], intra_preds_temp[2], uint8_t); + if (pu_x > 0) { + assert(pu_x >> 2 > 0); + left_pu = kvz_cu_array_at_const(frame->cu_array, pu_x - 1, pu_y + cu_width - 1); + } + // Don't take the above PU across the LCU boundary. + if (pu_y % LCU_WIDTH > 0 && pu_y > 0) { + assert(pu_y >> 2 > 0); + above_pu = kvz_cu_array_at_const(frame->cu_array, pu_x + cu_width - 1, pu_y - 1); + } - if (intra_preds_temp[4] > intra_preds_temp[5]) SWAP(intra_preds_temp[4], intra_preds_temp[5], uint8_t); - if (intra_preds_temp[4] > intra_preds_temp[6]) SWAP(intra_preds_temp[4], intra_preds_temp[6], uint8_t); - if (intra_preds_temp[5] > intra_preds_temp[6]) SWAP(intra_preds_temp[5], intra_preds_temp[6], uint8_t); - // Merge two subarrays - int32_t array1 = 0; - int32_t array2 = 4; - for (int item = 0; item < INTRA_MPM_COUNT; item++) { - if (intra_preds_temp[array1] < intra_preds_temp[array2]) { - intra_preds[j][item] = intra_preds_temp[array1]; - array1++; - } else { - intra_preds[j][item] = intra_preds_temp[array2]; - array2++; + kvz_intra_get_dir_luma_predictor(pu_x, pu_y, + intra_preds[j], + cur_pu, + left_pu, above_pu); + + + intra_pred_mode_actual[j] = cur_pu->intra.mode; + + for (int i = 0; i < INTRA_MPM_COUNT; i++) { + if (intra_preds[j][i] == intra_pred_mode[j]) { + mpm_preds[j] = (int8_t)i; + break; } } + // Is the mode in the MPM array or not + flag[j] = (mpm_preds[j] == -1) ? 0 : 1; + if (!(cur_pu->intra.multi_ref_idx || (isp_mode))) { + CABAC_BIN(cabac, flag[j], "prev_intra_luma_pred_flag"); + } + } - // Reduce the index of the signaled prediction mode according to the - // prediction list, as it has been already signaled that it's not one - // of the prediction modes. - for (int i = INTRA_MPM_COUNT-1; i >= 0; i--) { - if (tmp_pred > intra_preds[j][i]) { - tmp_pred--; + for (int j = 0; j < num_pred_units; ++j) { + // TODO: this loop is unnecessary in VVC. Remove in future + assert(j == 0 && "In VVC this loop should be run only once."); + + // Signal index of the prediction mode in the prediction list, if it is there + if (flag[j]) { + + const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, j); + const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, j); + const cu_info_t* cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); + cabac->cur_ctx = &(cabac->ctx.luma_planar_model[(isp_mode ? 0 : 1)]); + if (cur_pu->intra.multi_ref_idx == 0) { + CABAC_BIN(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx_luma_planar"); + } + //CABAC_BIN_EP(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx"); + if (mpm_preds[j] > 0) { + CABAC_BIN_EP(cabac, (mpm_preds[j] > 1 ? 1 : 0), "mpm_idx"); + } + if (mpm_preds[j] > 1) { + CABAC_BIN_EP(cabac, (mpm_preds[j] > 2 ? 1 : 0), "mpm_idx"); + } + if (mpm_preds[j] > 2) { + CABAC_BIN_EP(cabac, (mpm_preds[j] > 3 ? 1 : 0), "mpm_idx"); + } + if (mpm_preds[j] > 3) { + CABAC_BIN_EP(cabac, (mpm_preds[j] > 4 ? 1 : 0), "mpm_idx"); } } - - kvz_cabac_encode_trunc_bin(cabac, tmp_pred, 67 - INTRA_MPM_COUNT); + else { + // Signal the actual prediction mode. + int32_t tmp_pred = intra_pred_mode[j]; + + uint8_t intra_preds_temp[INTRA_MPM_COUNT + 2]; + memcpy(intra_preds_temp, intra_preds[j], sizeof(int8_t) * 3); + memcpy(intra_preds_temp + 4, &intra_preds[j][3], sizeof(int8_t) * 3); + intra_preds_temp[3] = 255; + intra_preds_temp[7] = 255; + + // Improvised merge sort + // Sort prediction list from lowest to highest. + if (intra_preds_temp[0] > intra_preds_temp[1]) SWAP(intra_preds_temp[0], intra_preds_temp[1], uint8_t); + if (intra_preds_temp[0] > intra_preds_temp[2]) SWAP(intra_preds_temp[0], intra_preds_temp[2], uint8_t); + if (intra_preds_temp[1] > intra_preds_temp[2]) SWAP(intra_preds_temp[1], intra_preds_temp[2], uint8_t); + + if (intra_preds_temp[4] > intra_preds_temp[5]) SWAP(intra_preds_temp[4], intra_preds_temp[5], uint8_t); + if (intra_preds_temp[4] > intra_preds_temp[6]) SWAP(intra_preds_temp[4], intra_preds_temp[6], uint8_t); + if (intra_preds_temp[5] > intra_preds_temp[6]) SWAP(intra_preds_temp[5], intra_preds_temp[6], uint8_t); + + // Merge two subarrays + int32_t array1 = 0; + int32_t array2 = 4; + for (int item = 0; item < INTRA_MPM_COUNT; item++) { + if (intra_preds_temp[array1] < intra_preds_temp[array2]) { + intra_preds[j][item] = intra_preds_temp[array1]; + array1++; + } + else { + intra_preds[j][item] = intra_preds_temp[array2]; + array2++; + } + } + + // Reduce the index of the signaled prediction mode according to the + // prediction list, as it has been already signaled that it's not one + // of the prediction modes. + for (int i = INTRA_MPM_COUNT - 1; i >= 0; i--) { + if (tmp_pred > intra_preds[j][i]) { + tmp_pred--; + } + } + + kvz_cabac_encode_trunc_bin(cabac, tmp_pred, 67 - INTRA_MPM_COUNT); + } } } diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 20ff50b5..0f84b512 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -668,7 +668,11 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, WRITE_U(stream, 0, 1, "sps_mrl_enabled_flag"); } - WRITE_U(stream, 0, 1, "sps_mip_enabled_flag"); + if (state->encoder_control->cfg.mip) { + WRITE_U(stream, 1, 1, "sps_mip_enabled_flag"); + } else { + WRITE_U(stream, 0, 1, "sps_mip_enabled_flag"); + } // if(!no_cclm_constraint_flag) if(encoder->chroma_format != KVZ_CSP_400) { WRITE_U(stream, encoder->cfg.cclm, 1, "sps_cclm_enabled_flag"); diff --git a/src/global.h b/src/global.h index 7d8507b8..8b959f99 100644 --- a/src/global.h +++ b/src/global.h @@ -216,6 +216,11 @@ typedef int16_t mv_t; */ #define MAX_REF_LINE_IDX 3 +#define MIP_MAX_INPUT_SIZE 8 +#define MIP_MAX_REDUCED_OUTPUT_SAMPLES 64 +#define MIP_MAX_WIDTH 64 +#define MIP_MAX_HEIGHT 64 + /** * \brief Number of pixels to delay deblocking. * diff --git a/src/intra.c b/src/intra.c index 01f0a767..4c6e204b 100644 --- a/src/intra.c +++ b/src/intra.c @@ -36,6 +36,7 @@ #include "image.h" #include "kvz_math.h" +#include "mip_data.h" #include "strategies/strategies-intra.h" #include "tables.h" #include "transform.h" @@ -99,14 +100,23 @@ int8_t kvz_intra_get_dir_luma_predictor( int8_t number_of_candidates = 0; // The default mode if block is not coded yet is INTRA_PLANAR. + // If the neighboring blocks were MIP blocks, intra mode is set to planar. int8_t left_intra_dir = 0; if (left_pu && left_pu->type == CU_INTRA) { - left_intra_dir = left_pu->intra.mode; + if (left_pu->intra.mip_flag) { + left_intra_dir = PLANAR_IDX; + } else { + left_intra_dir = left_pu->intra.mode; + } } int8_t above_intra_dir = 0; if (above_pu && above_pu->type == CU_INTRA && y % LCU_WIDTH != 0) { - above_intra_dir = above_pu->intra.mode; + if (above_pu->intra.mip_flag) { + above_intra_dir = PLANAR_IDX; + } else { + above_intra_dir = above_pu->intra.mode; + } } const int offset = 61; @@ -544,6 +554,327 @@ void kvz_predict_cclm( linear_transform_cclm(cclm_params, sampled_luma, dst, width, height); } + +int kvz_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a) { + assert(!(lcu && cu_a)); + if (width > 2 * height || height > 2 * width) { + return 3; + } + + int context = 0; + const cu_info_t* left = NULL; + const cu_info_t* top = NULL; + if (lcu) { + int x_local = SUB_SCU(x); + int y_local = SUB_SCU(y); + if (x) { + left = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); + } + if (y) { + top = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); + } + } + else { + if (x > 0) { + left = kvz_cu_array_at_const(cu_a, x - 1, y); + } + if (y > 0) { + top = kvz_cu_array_at_const(cu_a, x, y - 1); + } + } + context += left && left->type == CU_INTRA ? left->intra.mip_flag : 0; + context += top && top->type == CU_INTRA ? top->intra.mip_flag : 0; + return context; +} + + +void kvz_mip_boundary_downsampling_1D(int* reduced_dst, const int* const ref_src, int src_len, int dst_len) +{ + if (dst_len < src_len) + { + // Create reduced boundary by downsampling + uint16_t down_smp_factor = src_len / dst_len; + const int log2_factor = kvz_math_floor_log2(down_smp_factor); + const int rounding_offset = (1 << (log2_factor - 1)); + + uint16_t src_idx = 0; + for (uint16_t dst_idx = 0; dst_idx < dst_len; dst_idx++) + { + int sum = 0; + for (int k = 0; k < down_smp_factor; k++) + { + sum += ref_src[src_idx++]; + } + reduced_dst[dst_idx] = (sum + rounding_offset) >> log2_factor; + } + } + else + { + // Copy boundary if no downsampling is needed + for (uint16_t i = 0; i < dst_len; ++i) + { + reduced_dst[i] = ref_src[i]; + } + } +} + + +void kvz_mip_reduced_pred(int* const output, + const int* const input, + const uint8_t* matrix, + const bool transpose, + const int red_bdry_size, + const int red_pred_size, + const int size_id, + const int in_offset, + const int in_offset_tr) +{ + const int input_size = 2 * red_bdry_size; + + // Use local buffer for transposed result + int out_buf_transposed[LCU_WIDTH * LCU_WIDTH]; + int* const out_ptr = transpose ? out_buf_transposed : output; + + int sum = 0; + for (int i = 0; i < input_size; i++) { + sum += input[i]; + } + const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; + assert((input_size == 4 * (input_size >> 2)) && "MIP input size must be divisible by four"); + + const uint8_t* weight = matrix; + const int input_offset = transpose ? in_offset_tr : in_offset; + + const bool red_size = (size_id == 2); + int pos_res = 0; + for (int y = 0; y < red_pred_size; y++) { + for (int x = 0; x < red_pred_size; x++) { + if (red_size) { + weight -= 1; + } + int tmp0 = red_size ? 0 : (input[0] * weight[0]); + int tmp1 = input[1] * weight[1]; + int tmp2 = input[2] * weight[2]; + int tmp3 = input[3] * weight[3]; + for (int i = 4; i < input_size; i += 4) { + tmp0 += input[i] * weight[i]; + tmp1 += input[i + 1] * weight[i + 1]; + tmp2 += input[i + 2] * weight[i + 2]; + tmp3 += input[i + 3] * weight[i + 3]; + } + out_ptr[pos_res] = CLIP_TO_PIXEL(((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) + input_offset); + pos_res++; + weight += input_size; + } + } + + if (transpose) { + for (int y = 0; y < red_pred_size; y++) { + for (int x = 0; x < red_pred_size; x++) { + output[y * red_pred_size + x] = out_ptr[x * red_pred_size + y]; + } + } + } +} + + +void kvz_mip_pred_upsampling_1D(int* const dst, const int* const src, const int* const boundary, + const uint16_t src_size_ups_dim, const uint16_t src_size_orth_dim, + const uint16_t src_step, const uint16_t src_stride, + const uint16_t dst_step, const uint16_t dst_stride, + const uint16_t boundary_step, + const uint16_t ups_factor) +{ + const int log2_factor = kvz_math_floor_log2(ups_factor); + assert(ups_factor >= 2 && "Upsampling factor must be at least 2."); + const int rounding_offset = 1 << (log2_factor - 1); + + uint16_t idx_orth_dim = 0; + const int* src_line = src; + int* dst_line = dst; + const int* boundary_line = boundary + boundary_step - 1; + while (idx_orth_dim < src_size_orth_dim) + { + uint16_t idx_upsample_dim = 0; + const int* before = boundary_line; + const int* behind = src_line; + int* cur_dst = dst_line; + while (idx_upsample_dim < src_size_ups_dim) + { + uint16_t pos = 1; + int scaled_before = (*before) << log2_factor; + int scaled_behind = 0; + while (pos <= ups_factor) + { + scaled_before -= *before; + scaled_behind += *behind; + *cur_dst = (scaled_before + scaled_behind + rounding_offset) >> log2_factor; + + pos++; + cur_dst += dst_step; + } + + idx_upsample_dim++; + before = behind; + behind += src_step; + } + + idx_orth_dim++; + src_line += src_stride; + dst_line += dst_stride; + boundary_line += boundary_step; + } +} + + +/** \brief Matrix weighted intra prediction. +*/ +void kvz_mip_predict(encoder_state_t const* const state, kvz_intra_references* const refs, + const uint16_t pred_block_width, const uint16_t pred_block_height, + kvz_pixel* dst, + const int mip_mode, const bool mip_transp) +{ + // MIP prediction uses int values instead of kvz_pixel as some temp values may be negative + + kvz_pixel* out = dst; + int result[32*32] = {0}; + const int mode_idx = mip_mode; + + // *** INPUT PREP *** + + // Initialize prediction parameters START + uint16_t width = pred_block_width; + uint16_t height = pred_block_height; + + int size_id; // Prediction block type + if (width == 4 && height == 4) { + size_id = 0; + } + else if (width == 4 || height == 4 || (width == 8 && height == 8)) { + size_id = 1; + } + else { + size_id = 2; + } + + // Reduced boundary and prediction sizes + int red_bdry_size = (size_id == 0) ? 2 : 4; + int red_pred_size = (size_id < 2) ? 4 : 8; + + // Upsampling factors + uint16_t ups_hor_factor = width / red_pred_size; + uint16_t ups_ver_factor = height / red_pred_size; + + // Upsampling factors must be powers of two + assert(!((ups_hor_factor < 1) || ((ups_hor_factor & (ups_hor_factor - 1))) != 0) && "Horizontal upsampling factor must be power of two."); + assert(!((ups_ver_factor < 1) || ((ups_ver_factor & (ups_ver_factor - 1))) != 0) && "Vertical upsampling factor must be power of two."); + + // Initialize prediction parameters END + + int ref_samples_top[INTRA_REF_LENGTH]; + int ref_samples_left[INTRA_REF_LENGTH]; + + for (int i = 1; i < INTRA_REF_LENGTH; i++) { + ref_samples_top[i-1] = (int)refs->ref.top[i]; // NOTE: in VTM code these are indexed as x + 1 & y + 1 during init + ref_samples_left[i-1] = (int)refs->ref.left[i]; + } + + // Compute reduced boundary with Haar-downsampling + const int input_size = 2 * red_bdry_size; + + int red_bdry[MIP_MAX_INPUT_SIZE]; + int red_bdry_trans[MIP_MAX_INPUT_SIZE]; + + int* const top_reduced = &red_bdry[0]; + int* const left_reduced = &red_bdry[red_bdry_size]; + + kvz_mip_boundary_downsampling_1D(top_reduced, ref_samples_top, width, red_bdry_size); + kvz_mip_boundary_downsampling_1D(left_reduced, ref_samples_left, height, red_bdry_size); + + // Transposed reduced boundaries + int* const left_reduced_trans = &red_bdry_trans[0]; + int* const top_reduced_trans = &red_bdry_trans[red_bdry_size]; + + for (int x = 0; x < red_bdry_size; x++) { + top_reduced_trans[x] = top_reduced[x]; + } + for (int y = 0; y < red_bdry_size; y++) { + left_reduced_trans[y] = left_reduced[y]; + } + + int input_offset = red_bdry[0]; + int input_offset_trans = red_bdry_trans[0]; + + const bool has_first_col = (size_id < 2); + // First column of matrix not needed for large blocks + red_bdry[0] = has_first_col ? ((1 << (KVZ_BIT_DEPTH - 1)) - input_offset) : 0; + red_bdry_trans[0] = has_first_col ? ((1 << (KVZ_BIT_DEPTH - 1)) - input_offset_trans) : 0; + + for (int i = 1; i < input_size; ++i) { + red_bdry[i] -= input_offset; + red_bdry_trans[i] -= input_offset_trans; + } + + // *** INPUT PREP *** END + + // *** BLOCK PREDICT *** + + const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1); + const bool transpose = mip_transp; + + const uint8_t* matrix; + switch (size_id) { + case 0: + matrix = &kvz_mip_matrix_4x4[mode_idx][0][0]; + break; + case 1: + matrix = &kvz_mip_matrix_8x8[mode_idx][0][0]; + break; + case 2: + matrix = &kvz_mip_matrix_16x16[mode_idx][0][0]; + break; + default: + assert(false && "Invalid MIP size id."); + } + + // Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8 + int red_pred_buffer[8*8]; + int* const reduced_pred = need_upsampling ? red_pred_buffer : result; + + const int* const reduced_bdry = transpose ? red_bdry_trans : red_bdry; + + kvz_mip_reduced_pred(reduced_pred, reduced_bdry, matrix, transpose, red_bdry_size, red_pred_size, size_id, input_offset, input_offset_trans); + if (need_upsampling) { + const int* ver_src = reduced_pred; + uint16_t ver_src_step = width; + + if (ups_hor_factor > 1) { + int* const hor_dst = result + (ups_ver_factor - 1) * width; + ver_src = hor_dst; + ver_src_step *= ups_ver_factor; + + kvz_mip_pred_upsampling_1D(hor_dst, reduced_pred, ref_samples_left, + red_pred_size, red_pred_size, + 1, red_pred_size, 1, ver_src_step, + ups_ver_factor, ups_hor_factor); + } + + if (ups_ver_factor > 1) { + kvz_mip_pred_upsampling_1D(result, ver_src, ref_samples_top, + red_pred_size, width, + ver_src_step, 1, width, 1, + 1, ups_ver_factor); + } + } + + // Assign and cast values from temp array to output + for (int i = 0; i < 32 * 32; i++) { + out[i] = (kvz_pixel)result[i]; + } + // *** BLOCK PREDICT *** END +} + + void kvz_intra_predict( encoder_state_t *const state, kvz_intra_references *refs, @@ -1028,7 +1359,9 @@ static void intra_recon_tb_leaf( cclm_parameters_t *cclm_params, lcu_t *lcu, color_t color, - uint8_t multi_ref_idx) + uint8_t multi_ref_idx, + bool mip_flag, + bool mip_transp) { const kvz_config *cfg = &state->encoder_control->cfg; const int shift = color == COLOR_Y ? 0 : 1; @@ -1039,6 +1372,7 @@ static void intra_recon_tb_leaf( log2width -= 1; } const int width = 1 << log2width; + const int height = width; // TODO: proper height for non-square blocks const int lcu_width = LCU_WIDTH >> shift; const vector2d_t luma_px = { x, y }; @@ -1074,8 +1408,29 @@ static void intra_recon_tb_leaf( kvz_pixel pred[32 * 32]; int stride = state->tile->frame->source->stride; const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); + bool use_mip = false; + if (mip_flag) { + if (color == COLOR_Y) { + use_mip = true; + } else { + // MIP can be used for chroma if the chroma scheme is 444 + if (state->encoder_control->chroma_format == KVZ_CSP_444) { + use_mip = true; + } else { + // If MIP cannot be used for chroma, set mode to planar + intra_mode = 0; + } + } + } + if(intra_mode < 68) { - kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index); + if (use_mip) { + assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]"); + kvz_mip_predict(state, &refs, width, height, pred, intra_mode, mip_transp); + } + else { + kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index); + } } else { kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width); if(cclm_params == NULL) { @@ -1123,6 +1478,8 @@ static void intra_recon_tb_leaf( * \param mode_chroma intra mode for chroma, or -1 to skip chroma recon * \param cur_cu pointer to the CU, or NULL to fetch CU from LCU * \param cclm_params pointer for the cclm_parameters, can be NULL if the mode is not cclm mode + * \param mip_flag indicates whether the passed mode_luma is a MIP mode + * \param mip_transp indicates whether the used MIP mode is transposed * \param lcu containing LCU */ void kvz_intra_recon_cu( @@ -1135,6 +1492,8 @@ void kvz_intra_recon_cu( cu_info_t *cur_cu, cclm_parameters_t *cclm_params, uint8_t multi_ref_idx, + bool mip_flag, + bool mip_transp, lcu_t *lcu) { const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; @@ -1143,6 +1502,14 @@ void kvz_intra_recon_cu( cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } uint8_t multi_ref_index = multi_ref_idx; + bool use_mip = mip_flag; + bool mip_transposed = mip_transp; + + if (mode_luma != -1 && mode_chroma != -1) { + if (use_mip) { + assert(mode_luma == mode_chroma && "Chroma mode must be derived from luma mode if block uses MIP."); + } + } // Reset CBFs because CBFs might have been set // for depth earlier @@ -1160,10 +1527,10 @@ void kvz_intra_recon_cu( const int32_t x2 = x + offset; const int32_t y2 = y + offset; - kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu); - kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu); - kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu); - kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu); + kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu); + kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu); + kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu); + kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu); // Propagate coded block flags from child CUs to parent CU. uint16_t child_cbfs[3] = { @@ -1182,13 +1549,14 @@ void kvz_intra_recon_cu( } else { const bool has_luma = mode_luma != -1; const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0); + // Process a leaf TU. if (has_luma) { - intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index); + intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index, use_mip, mip_transposed); } if (has_chroma) { - intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0); - intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0); + intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0, use_mip, mip_transposed); + intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0, use_mip, mip_transposed); } kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false); diff --git a/src/intra.h b/src/intra.h index 1d05fea0..4e3542c3 100644 --- a/src/intra.h +++ b/src/intra.h @@ -42,9 +42,12 @@ #include "global.h" // IWYU pragma: keep #include "kvazaar.h" +// Maximum possible reference line length for intra blocks +#define INTRA_REF_LENGTH (2 * 128 + 3 + 33 * MAX_REF_LINE_IDX) + typedef struct { - kvz_pixel left[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX]; - kvz_pixel top[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX]; + kvz_pixel left[INTRA_REF_LENGTH]; + kvz_pixel top[INTRA_REF_LENGTH]; } kvz_intra_ref; typedef struct { @@ -130,6 +133,8 @@ void kvz_intra_recon_cu( cu_info_t *cur_cu, cclm_parameters_t* cclm_params, uint8_t multi_ref_idx, + bool mip_flag, + bool mip_transp, lcu_t *lcu); @@ -146,4 +151,16 @@ void kvz_predict_cclm( kvz_intra_references* chroma_ref, kvz_pixel* dst, cclm_parameters_t* cclm_params +); + +int kvz_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a); + +void kvz_mip_predict( + encoder_state_t const * const state, + kvz_intra_references * refs, + const uint16_t width, + const uint16_t height, + kvz_pixel* dst, + const int mip_mode, + const bool mip_transp ); \ No newline at end of file diff --git a/src/kvazaar.h b/src/kvazaar.h index 9b4da0cb..00052f83 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -517,6 +517,9 @@ typedef struct kvz_config /** \brief enable use of multiple reference lines in intra prediction */ int8_t mrl; + /** \brief enable matrix weighted intra prediction */ + int8_t mip; + int8_t jccr; diff --git a/src/mip_data.h b/src/mip_data.h new file mode 100644 index 00000000..2ace73ab --- /dev/null +++ b/src/mip_data.h @@ -0,0 +1,885 @@ +/***************************************************************************** + * This file is part of uvg266 VVC encoder. + * + * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS + ****************************************************************************/ + + /** +* \ingroup Reconstruction +* \file +* MIP weight matrix data. +*/ + +/** \file MipData.h +\brief weight and bias data for matrix-based intra prediction (MIP) +*/ + +#define MIP_SHIFT_MATRIX 6 +#define MIP_OFFSET_MATRIX 32 + +// NOTE: these matrices need to be aligned if used with avx2 +const uint8_t kvz_mip_matrix_4x4[16][16][4] = +{ + { + { 32, 30, 90, 28}, + { 32, 32, 72, 28}, + { 34, 77, 53, 30}, + { 51, 124, 36, 37}, + { 31, 31, 95, 37}, + { 33, 31, 70, 50}, + { 52, 80, 25, 60}, + { 78, 107, 1, 65}, + { 31, 29, 37, 95}, + { 38, 34, 19, 101}, + { 73, 85, 0, 81}, + { 92, 99, 0, 65}, + { 34, 29, 14, 111}, + { 48, 48, 7, 100}, + { 80, 91, 0, 74}, + { 89, 97, 0, 64} + }, + { + { 31, 23, 34, 29}, + { 31, 43, 34, 31}, + { 30, 95, 34, 32}, + { 29, 100, 35, 33}, + { 31, 23, 34, 29}, + { 31, 43, 34, 31}, + { 30, 95, 34, 32}, + { 29, 99, 35, 33}, + { 31, 24, 35, 29}, + { 31, 44, 34, 31}, + { 30, 95, 35, 32}, + { 29, 99, 35, 33}, + { 31, 24, 35, 30}, + { 31, 44, 35, 31}, + { 30, 95, 35, 32}, + { 29, 99, 35, 33} + }, + { + { 32, 32, 36, 58}, + { 32, 29, 26, 66}, + { 36, 37, 23, 61}, + { 79, 84, 3, 37}, + { 32, 32, 30, 69}, + { 33, 29, 24, 71}, + { 44, 16, 21, 70}, + { 96, 18, 0, 57}, + { 32, 31, 24, 74}, + { 33, 30, 23, 71}, + { 36, 24, 24, 71}, + { 59, 9, 16, 68}, + { 32, 32, 23, 75}, + { 33, 30, 24, 70}, + { 32, 30, 25, 71}, + { 36, 26, 25, 70} + }, + { + { 32, 33, 34, 32}, + { 32, 30, 22, 38}, + { 29, 46, 25, 38}, + { 53, 123, 28, 22}, + { 32, 33, 30, 37}, + { 32, 30, 21, 38}, + { 32, 40, 24, 38}, + { 64, 116, 26, 17}, + { 32, 32, 23, 49}, + { 32, 30, 21, 39}, + { 34, 39, 24, 37}, + { 72, 109, 23, 16}, + { 33, 31, 17, 60}, + { 32, 31, 21, 39}, + { 35, 41, 24, 37}, + { 72, 106, 22, 18} + }, + { + { 34, 25, 89, 20}, + { 38, 32, 47, 24}, + { 40, 86, 29, 27}, + { 38, 98, 32, 29}, + { 34, 31, 94, 40}, + { 44, 25, 83, 27}, + { 54, 72, 43, 16}, + { 47, 94, 33, 22}, + { 33, 31, 36, 94}, + { 43, 23, 51, 76}, + { 62, 55, 64, 25}, + { 57, 89, 38, 15}, + { 32, 32, 28, 101}, + { 38, 26, 33, 94}, + { 55, 38, 68, 47}, + { 59, 80, 52, 16} + }, + { + { 28, 30, 68, 29}, + { 23, 48, 23, 48}, + { 39, 98, 16, 42}, + { 84, 86, 20, 17}, + { 25, 31, 52, 74}, + { 38, 68, 5, 70}, + { 95, 78, 7, 21}, + { 127, 54, 12, 0}, + { 30, 47, 14, 107}, + { 79, 76, 0, 53}, + { 127, 59, 7, 1}, + { 127, 51, 9, 0}, + { 50, 71, 1, 96}, + { 109, 69, 7, 25}, + { 127, 56, 9, 0}, + { 123, 53, 13, 0} + }, + { + { 40, 20, 72, 18}, + { 48, 29, 44, 18}, + { 53, 81, 35, 18}, + { 48, 96, 33, 22}, + { 45, 23, 79, 49}, + { 61, 21, 56, 49}, + { 72, 52, 32, 48}, + { 65, 69, 20, 50}, + { 41, 27, 29, 96}, + { 49, 22, 28, 94}, + { 52, 22, 28, 93}, + { 49, 27, 27, 92}, + { 37, 29, 26, 98}, + { 39, 28, 28, 97}, + { 38, 28, 30, 97}, + { 38, 29, 30, 95} + }, + { + { 33, 27, 43, 27}, + { 32, 29, 31, 31}, + { 31, 73, 33, 31}, + { 35, 104, 34, 28}, + { 32, 30, 63, 22}, + { 33, 26, 33, 29}, + { 33, 57, 33, 30}, + { 37, 100, 35, 27}, + { 32, 31, 85, 25}, + { 34, 25, 39, 25}, + { 35, 39, 32, 28}, + { 40, 91, 35, 25}, + { 32, 30, 77, 50}, + { 34, 26, 54, 22}, + { 37, 31, 34, 27}, + { 45, 75, 34, 23} + }, + { + { 34, 25, 77, 19}, + { 36, 34, 56, 24}, + { 41, 83, 39, 30}, + { 47, 96, 28, 35}, + { 34, 31, 70, 65}, + { 38, 29, 53, 77}, + { 43, 36, 37, 83}, + { 48, 39, 28, 83}, + { 33, 31, 31, 98}, + { 33, 31, 30, 99}, + { 34, 30, 31, 98}, + { 36, 29, 31, 96}, + { 32, 32, 30, 97}, + { 32, 32, 31, 96}, + { 31, 33, 33, 96}, + { 32, 33, 34, 94} + }, + { + { 30, 30, 93, 19}, + { 31, 59, 67, 34}, + { 31, 79, 36, 59}, + { 30, 67, 17, 79}, + { 30, 38, 68, 69}, + { 29, 40, 43, 91}, + { 26, 35, 32, 101}, + { 23, 32, 30, 101}, + { 26, 34, 30, 101}, + { 23, 33, 30, 102}, + { 20, 32, 31, 102}, + { 18, 33, 32, 102}, + { 23, 33, 31, 100}, + { 20, 34, 32, 100}, + { 18, 35, 33, 100}, + { 18, 35, 33, 100} + }, + { + { 31, 54, 90, 26}, + { 32, 60, 53, 61}, + { 34, 49, 37, 84}, + { 34, 39, 35, 89}, + { 35, 38, 41, 88}, + { 35, 35, 32, 96}, + { 35, 31, 33, 96}, + { 35, 32, 35, 94}, + { 34, 34, 30, 97}, + { 35, 32, 33, 95}, + { 35, 32, 34, 94}, + { 35, 34, 34, 93}, + { 34, 34, 34, 93}, + { 35, 34, 34, 93}, + { 35, 34, 34, 92}, + { 36, 34, 35, 91} + }, + { + { 32, 29, 54, 24}, + { 31, 32, 34, 29}, + { 31, 43, 34, 29}, + { 32, 67, 36, 28}, + { 31, 34, 69, 37}, + { 31, 35, 46, 33}, + { 30, 35, 39, 33}, + { 30, 42, 39, 36}, + { 31, 35, 39, 88}, + { 30, 38, 41, 84}, + { 30, 39, 40, 81}, + { 39, 46, 38, 78}, + { 31, 36, 34, 96}, + { 34, 38, 37, 93}, + { 55, 42, 38, 82}, + { 89, 53, 38, 65} + }, + { + { 32, 33, 43, 29}, + { 32, 30, 29, 33}, + { 31, 47, 31, 33}, + { 33, 100, 31, 31}, + { 32, 33, 74, 25}, + { 32, 32, 34, 31}, + { 32, 33, 30, 33}, + { 32, 68, 30, 32}, + { 32, 31, 91, 40}, + { 32, 32, 58, 26}, + { 31, 31, 30, 32}, + { 31, 42, 30, 33}, + { 32, 31, 49, 85}, + { 32, 31, 83, 35}, + { 31, 33, 48, 29}, + { 31, 36, 32, 33} + }, + { + { 31, 29, 81, 35}, + { 32, 28, 34, 50}, + { 31, 75, 16, 43}, + { 34, 103, 29, 32}, + { 32, 32, 53, 78}, + { 31, 28, 36, 88}, + { 30, 52, 18, 73}, + { 52, 88, 17, 35}, + { 32, 32, 35, 94}, + { 30, 31, 35, 95}, + { 36, 29, 31, 92}, + { 100, 43, 16, 40}, + { 32, 32, 35, 93}, + { 30, 32, 38, 93}, + { 55, 18, 37, 83}, + { 127, 0, 30, 40} + }, + { + { 31, 22, 47, 30}, + { 31, 48, 25, 34}, + { 30, 95, 31, 32}, + { 32, 103, 33, 32}, + { 30, 24, 57, 31}, + { 30, 47, 26, 34}, + { 31, 95, 31, 32}, + { 43, 97, 35, 25}, + { 29, 26, 44, 63}, + { 37, 38, 24, 47}, + { 74, 63, 28, 20}, + { 110, 58, 34, 3}, + { 46, 22, 5, 108}, + { 93, 5, 9, 77}, + { 127, 0, 17, 52}, + { 127, 0, 15, 50} + }, + { + { 32, 27, 68, 24}, + { 35, 23, 35, 28}, + { 35, 64, 29, 29}, + { 37, 104, 33, 28}, + { 32, 32, 91, 40}, + { 36, 23, 67, 36}, + { 49, 23, 39, 28}, + { 60, 67, 30, 20}, + { 32, 32, 36, 95}, + { 35, 29, 38, 93}, + { 50, 16, 30, 84}, + { 72, 16, 15, 65}, + { 32, 32, 27, 100}, + { 33, 32, 29, 100}, + { 37, 29, 30, 98}, + { 48, 21, 29, 90} + } +}; + +const uint8_t kvz_mip_matrix_8x8[8][16][8] = +{ + { + { 30, 63, 46, 37, 25, 33, 33, 34}, + { 30, 60, 66, 38, 32, 31, 32, 33}, + { 29, 45, 74, 42, 32, 32, 32, 33}, + { 30, 39, 62, 58, 32, 33, 32, 33}, + { 30, 66, 55, 39, 32, 30, 30, 36}, + { 29, 54, 69, 40, 33, 31, 31, 33}, + { 28, 48, 71, 43, 32, 33, 32, 33}, + { 28, 41, 72, 46, 32, 34, 32, 33}, + { 30, 66, 56, 40, 32, 33, 28, 33}, + { 29, 55, 69, 39, 33, 33, 30, 32}, + { 27, 46, 72, 43, 33, 33, 32, 33}, + { 27, 42, 69, 48, 32, 34, 32, 33}, + { 30, 63, 55, 40, 32, 33, 35, 30}, + { 29, 56, 66, 40, 33, 33, 33, 30}, + { 27, 47, 69, 44, 33, 33, 33, 32}, + { 27, 42, 65, 50, 32, 34, 32, 33} + }, + { + { 32, 33, 30, 31, 74, 30, 31, 32}, + { 33, 56, 28, 30, 41, 29, 32, 32}, + { 33, 77, 52, 26, 29, 34, 30, 32}, + { 33, 37, 80, 41, 31, 34, 30, 32}, + { 32, 32, 33, 31, 59, 76, 28, 31}, + { 33, 31, 31, 30, 78, 40, 28, 32}, + { 33, 47, 28, 29, 53, 27, 31, 31}, + { 33, 61, 44, 28, 34, 32, 31, 31}, + { 32, 31, 34, 30, 26, 64, 76, 27}, + { 32, 31, 34, 29, 45, 86, 36, 29}, + { 33, 27, 34, 29, 73, 55, 25, 32}, + { 33, 33, 34, 30, 62, 33, 30, 31}, + { 32, 31, 34, 30, 30, 29, 58, 74}, + { 32, 31, 35, 29, 27, 53, 77, 35}, + { 32, 30, 36, 29, 40, 80, 44, 31}, + { 33, 28, 37, 30, 58, 60, 31, 33} + }, + { + { 32, 51, 27, 32, 27, 50, 29, 32}, + { 32, 95, 42, 29, 29, 42, 30, 32}, + { 32, 27, 99, 34, 31, 41, 29, 32}, + { 32, 34, 21, 104, 31, 42, 30, 32}, + { 32, 45, 30, 32, 9, 88, 40, 30}, + { 32, 77, 38, 30, 9, 76, 38, 30}, + { 32, 38, 78, 33, 14, 67, 37, 30}, + { 32, 30, 30, 87, 20, 59, 38, 31}, + { 33, 37, 32, 32, 27, 18, 106, 34}, + { 34, 44, 34, 31, 25, 17, 108, 31}, + { 36, 39, 45, 31, 24, 15, 108, 30}, + { 37, 31, 31, 54, 25, 14, 101, 32}, + { 36, 33, 32, 30, 29, 37, 13, 110}, + { 39, 32, 32, 29, 27, 37, 15, 108}, + { 44, 33, 31, 27, 25, 37, 16, 106}, + { 47, 30, 31, 32, 25, 34, 19, 102} + }, + { + { 32, 48, 35, 35, 47, 68, 31, 31}, + { 32, 33, 59, 40, 27, 71, 33, 30}, + { 32, 29, 47, 65, 24, 62, 37, 30}, + { 33, 33, 31, 81, 26, 50, 42, 32}, + { 32, 30, 40, 38, 30, 70, 55, 31}, + { 32, 20, 46, 50, 26, 55, 64, 31}, + { 33, 30, 29, 66, 25, 41, 72, 33}, + { 36, 34, 27, 69, 26, 31, 67, 39}, + { 33, 28, 36, 40, 30, 26, 85, 47}, + { 36, 27, 33, 50, 31, 20, 79, 53}, + { 43, 30, 26, 57, 28, 17, 67, 62}, + { 51, 27, 28, 55, 22, 23, 49, 70}, + { 38, 29, 32, 39, 28, 30, 22, 104}, + { 51, 31, 28, 43, 24, 31, 17, 102}, + { 69, 23, 30, 40, 15, 38, 10, 95}, + { 77, 13, 35, 38, 8, 43, 8, 90} + }, + { + { 32, 38, 32, 33, 101, 40, 29, 32}, + { 32, 40, 37, 32, 100, 36, 30, 32}, + { 32, 37, 46, 35, 94, 33, 30, 31}, + { 33, 34, 30, 62, 81, 35, 30, 31}, + { 32, 32, 33, 32, 22, 102, 39, 29}, + { 32, 31, 33, 33, 26, 104, 34, 28}, + { 33, 33, 33, 33, 31, 103, 32, 28}, + { 33, 32, 34, 36, 37, 94, 33, 28}, + { 32, 33, 32, 32, 34, 24, 99, 36}, + { 32, 34, 33, 33, 33, 30, 98, 32}, + { 33, 33, 34, 33, 31, 37, 95, 29}, + { 33, 33, 33, 36, 30, 46, 85, 31}, + { 32, 33, 32, 33, 30, 34, 23, 104}, + { 32, 34, 33, 33, 31, 32, 30, 98}, + { 32, 33, 34, 34, 31, 29, 39, 91}, + { 33, 33, 32, 37, 32, 30, 47, 82} + }, + { + { 32, 52, 48, 31, 38, 76, 26, 32}, + { 33, 19, 62, 50, 25, 50, 51, 31}, + { 33, 30, 20, 74, 29, 29, 54, 51}, + { 34, 35, 23, 56, 31, 25, 41, 76}, + { 33, 25, 38, 39, 28, 39, 83, 35}, + { 35, 28, 25, 47, 31, 23, 57, 74}, + { 37, 35, 22, 38, 31, 27, 30, 101}, + { 38, 32, 33, 29, 30, 31, 27, 103}, + { 34, 32, 27, 37, 32, 25, 41, 92}, + { 38, 33, 28, 32, 30, 31, 18, 111}, + { 40, 32, 33, 27, 29, 33, 18, 111}, + { 40, 32, 34, 27, 28, 33, 23, 105}, + { 35, 32, 30, 33, 31, 33, 20, 107}, + { 38, 31, 33, 30, 29, 33, 21, 106}, + { 40, 32, 33, 29, 29, 34, 22, 105}, + { 40, 32, 33, 30, 29, 34, 24, 101} + }, + { + { 32, 28, 31, 33, 92, 33, 30, 31}, + { 33, 30, 28, 33, 71, 26, 32, 30}, + { 33, 60, 26, 33, 47, 28, 33, 30}, + { 33, 63, 44, 36, 37, 31, 33, 30}, + { 33, 30, 31, 33, 43, 90, 33, 29}, + { 33, 28, 29, 34, 71, 71, 26, 30}, + { 33, 30, 26, 33, 86, 45, 28, 30}, + { 33, 38, 29, 32, 74, 32, 33, 29}, + { 33, 32, 30, 32, 29, 41, 95, 27}, + { 34, 31, 29, 33, 26, 71, 73, 22}, + { 34, 31, 29, 33, 37, 88, 46, 25}, + { 33, 32, 28, 34, 55, 75, 36, 28}, + { 34, 31, 30, 32, 33, 27, 43, 89}, + { 35, 32, 28, 33, 33, 23, 77, 59}, + { 34, 33, 28, 33, 30, 35, 91, 37}, + { 34, 34, 28, 34, 33, 53, 74, 31} + }, + { + { 33, 49, 26, 32, 26, 52, 28, 31}, + { 33, 71, 72, 24, 30, 32, 34, 31}, + { 32, 23, 70, 68, 32, 32, 32, 32}, + { 31, 33, 21, 106, 33, 32, 32, 33}, + { 34, 47, 32, 29, 5, 86, 44, 26}, + { 34, 44, 89, 28, 28, 37, 33, 30}, + { 32, 27, 46, 89, 33, 31, 31, 32}, + { 30, 33, 20, 107, 33, 33, 32, 33}, + { 35, 39, 42, 27, 26, 24, 92, 35}, + { 34, 27, 87, 43, 30, 34, 38, 31}, + { 31, 31, 32, 100, 32, 33, 30, 32}, + { 29, 32, 22, 106, 33, 33, 32, 33}, + { 35, 29, 47, 32, 32, 32, 17, 100}, + { 34, 24, 69, 60, 34, 33, 28, 44}, + { 31, 33, 31, 99, 32, 33, 32, 31}, + { 29, 33, 25, 103, 33, 33, 32, 35} + } +}; + +const uint8_t kvz_mip_matrix_16x16[6][64][7] = +{ + { + { 42, 37, 33, 27, 44, 33, 35}, + { 71, 39, 34, 24, 36, 35, 36}, + { 77, 46, 35, 33, 30, 34, 36}, + { 64, 60, 35, 33, 31, 32, 36}, + { 49, 71, 38, 32, 32, 31, 36}, + { 42, 66, 50, 33, 31, 32, 36}, + { 40, 52, 67, 33, 31, 32, 35}, + { 38, 43, 75, 33, 32, 32, 35}, + { 56, 40, 33, 26, 43, 38, 36}, + { 70, 49, 34, 30, 28, 38, 38}, + { 65, 57, 36, 34, 28, 33, 39}, + { 59, 60, 39, 33, 30, 31, 38}, + { 55, 60, 43, 33, 30, 31, 38}, + { 51, 61, 47, 33, 30, 32, 37}, + { 46, 62, 51, 34, 30, 32, 37}, + { 42, 60, 55, 33, 31, 32, 37}, + { 60, 42, 34, 30, 37, 43, 38}, + { 68, 52, 35, 35, 22, 37, 40}, + { 62, 58, 37, 34, 28, 31, 40}, + { 58, 59, 41, 33, 30, 30, 39}, + { 56, 59, 44, 34, 30, 31, 38}, + { 53, 60, 45, 33, 30, 31, 38}, + { 49, 65, 45, 33, 30, 31, 38}, + { 45, 64, 47, 33, 31, 32, 38}, + { 59, 44, 35, 31, 34, 43, 41}, + { 66, 53, 36, 35, 25, 31, 43}, + { 61, 58, 38, 34, 29, 30, 40}, + { 59, 57, 41, 33, 30, 31, 39}, + { 57, 58, 43, 33, 30, 31, 39}, + { 54, 61, 43, 33, 31, 31, 39}, + { 51, 64, 43, 33, 31, 31, 39}, + { 48, 64, 45, 33, 32, 31, 39}, + { 57, 45, 35, 30, 35, 40, 44}, + { 65, 54, 37, 33, 33, 24, 44}, + { 63, 56, 38, 34, 30, 29, 39}, + { 61, 56, 41, 34, 30, 32, 39}, + { 58, 58, 42, 33, 31, 31, 39}, + { 54, 62, 41, 33, 31, 31, 39}, + { 51, 65, 42, 33, 31, 31, 39}, + { 48, 63, 43, 33, 32, 31, 39}, + { 55, 46, 35, 30, 36, 38, 47}, + { 65, 53, 37, 32, 36, 26, 40}, + { 65, 54, 38, 33, 31, 30, 38}, + { 63, 55, 39, 33, 30, 32, 38}, + { 59, 58, 40, 33, 31, 31, 39}, + { 54, 64, 40, 33, 31, 30, 40}, + { 49, 66, 40, 32, 32, 30, 41}, + { 48, 64, 42, 32, 32, 30, 41}, + { 54, 46, 35, 30, 34, 39, 49}, + { 64, 52, 36, 32, 34, 34, 35}, + { 65, 53, 37, 33, 32, 32, 37}, + { 63, 55, 38, 33, 31, 31, 39}, + { 59, 60, 38, 33, 31, 31, 40}, + { 54, 64, 38, 33, 32, 30, 40}, + { 49, 66, 39, 33, 32, 29, 41}, + { 47, 64, 42, 32, 33, 29, 42}, + { 51, 46, 35, 31, 33, 37, 54}, + { 61, 51, 36, 32, 33, 38, 36}, + { 63, 53, 37, 32, 32, 34, 37}, + { 62, 55, 37, 33, 32, 32, 39}, + { 58, 59, 37, 33, 32, 31, 40}, + { 53, 63, 38, 33, 32, 31, 40}, + { 49, 64, 40, 33, 33, 30, 41}, + { 46, 62, 42, 33, 33, 30, 42} + }, + { + { 39, 34, 33, 58, 44, 31, 32}, + { 60, 38, 32, 40, 51, 30, 31}, + { 73, 49, 31, 39, 48, 32, 31}, + { 60, 73, 30, 39, 46, 33, 32}, + { 43, 87, 35, 38, 45, 33, 32}, + { 35, 78, 54, 36, 45, 33, 32}, + { 33, 47, 86, 35, 44, 33, 32}, + { 31, 17, 114, 34, 44, 34, 33}, + { 43, 37, 32, 53, 70, 30, 31}, + { 53, 50, 30, 42, 72, 31, 30}, + { 52, 66, 30, 39, 70, 32, 30}, + { 46, 78, 35, 37, 68, 34, 30}, + { 43, 75, 48, 37, 66, 34, 30}, + { 40, 62, 68, 35, 65, 35, 30}, + { 33, 37, 97, 33, 62, 37, 31}, + { 26, 14, 122, 32, 59, 38, 33}, + { 40, 39, 33, 34, 87, 37, 30}, + { 45, 54, 32, 34, 84, 41, 29}, + { 41, 70, 35, 33, 83, 40, 29}, + { 37, 73, 44, 32, 82, 40, 30}, + { 37, 65, 60, 31, 81, 41, 29}, + { 35, 48, 82, 30, 79, 43, 29}, + { 28, 27, 108, 28, 76, 45, 30}, + { 19, 11, 127, 27, 70, 46, 32}, + { 38, 40, 34, 27, 73, 62, 28}, + { 39, 54, 35, 30, 73, 62, 28}, + { 33, 65, 41, 29, 75, 59, 28}, + { 30, 65, 53, 27, 76, 58, 29}, + { 29, 53, 72, 26, 77, 58, 29}, + { 27, 35, 95, 24, 77, 60, 28}, + { 19, 19, 117, 23, 74, 61, 30}, + { 9, 16, 127, 23, 68, 60, 34}, + { 35, 40, 35, 29, 44, 89, 30}, + { 33, 51, 39, 29, 49, 86, 30}, + { 28, 57, 49, 28, 53, 83, 30}, + { 24, 52, 65, 26, 56, 82, 30}, + { 22, 39, 86, 24, 58, 82, 30}, + { 18, 22, 108, 23, 59, 82, 31}, + { 10, 13, 125, 22, 58, 80, 33}, + { 0, 19, 127, 22, 56, 74, 40}, + { 33, 40, 36, 31, 28, 90, 45}, + { 29, 46, 44, 29, 31, 92, 43}, + { 24, 45, 58, 28, 34, 91, 43}, + { 19, 37, 78, 26, 37, 91, 43}, + { 15, 22, 99, 25, 38, 91, 42}, + { 11, 11, 118, 24, 39, 90, 44}, + { 2, 11, 127, 23, 41, 85, 48}, + { 0, 17, 127, 23, 43, 75, 55}, + { 31, 37, 39, 30, 28, 54, 82}, + { 27, 37, 52, 28, 30, 58, 79}, + { 22, 30, 70, 27, 32, 58, 79}, + { 15, 19, 91, 26, 33, 58, 79}, + { 10, 8, 111, 25, 34, 58, 79}, + { 5, 2, 125, 25, 35, 57, 80}, + { 0, 9, 127, 25, 36, 53, 84}, + { 0, 13, 127, 25, 39, 47, 88}, + { 28, 29, 46, 28, 39, 2, 123}, + { 24, 24, 62, 27, 41, 1, 125}, + { 19, 14, 81, 25, 43, 0, 126}, + { 13, 4, 101, 24, 44, 0, 127}, + { 6, 0, 116, 23, 45, 0, 127}, + { 0, 0, 126, 23, 45, 1, 127}, + { 0, 4, 127, 25, 44, 2, 127}, + { 0, 9, 127, 25, 44, 3, 127} + }, + { + { 30, 32, 32, 42, 34, 32, 32}, + { 63, 26, 34, 16, 38, 32, 32}, + { 98, 26, 34, 25, 34, 33, 32}, + { 75, 61, 30, 31, 32, 33, 32}, + { 36, 94, 32, 30, 33, 32, 32}, + { 26, 76, 58, 30, 33, 32, 32}, + { 30, 39, 91, 31, 32, 33, 31}, + { 32, 23, 105, 32, 32, 32, 32}, + { 34, 30, 33, 31, 52, 29, 32}, + { 66, 24, 34, 11, 41, 33, 32}, + { 97, 28, 34, 24, 34, 33, 32}, + { 71, 65, 30, 30, 32, 33, 32}, + { 34, 92, 35, 30, 33, 32, 32}, + { 26, 70, 64, 29, 34, 32, 32}, + { 30, 37, 94, 30, 33, 32, 31}, + { 32, 23, 105, 31, 33, 33, 31}, + { 37, 29, 33, 8, 79, 27, 32}, + { 71, 22, 35, 5, 50, 32, 32}, + { 98, 29, 34, 23, 34, 34, 32}, + { 66, 70, 30, 31, 31, 33, 32}, + { 31, 92, 38, 30, 33, 32, 32}, + { 26, 66, 68, 29, 34, 32, 31}, + { 30, 34, 97, 30, 34, 33, 31}, + { 31, 22, 106, 30, 34, 33, 31}, + { 40, 28, 34, 0, 76, 46, 28}, + { 76, 21, 35, 0, 55, 35, 32}, + { 97, 32, 34, 21, 37, 33, 33}, + { 61, 75, 29, 30, 32, 32, 32}, + { 29, 92, 40, 29, 33, 32, 32}, + { 26, 62, 73, 29, 34, 32, 31}, + { 29, 32, 99, 30, 34, 33, 30}, + { 31, 22, 107, 30, 34, 33, 31}, + { 42, 27, 34, 1, 48, 79, 25}, + { 80, 20, 35, 0, 48, 47, 31}, + { 94, 36, 32, 17, 40, 33, 33}, + { 55, 80, 29, 27, 35, 31, 32}, + { 27, 90, 43, 28, 34, 32, 31}, + { 26, 58, 76, 29, 33, 33, 30}, + { 29, 30, 101, 29, 34, 34, 30}, + { 31, 21, 108, 29, 35, 34, 30}, + { 44, 26, 34, 6, 30, 80, 40}, + { 81, 21, 35, 0, 41, 52, 35}, + { 90, 41, 31, 14, 41, 35, 33}, + { 51, 82, 29, 24, 37, 32, 32}, + { 27, 87, 47, 27, 35, 32, 31}, + { 26, 54, 79, 29, 34, 33, 30}, + { 29, 29, 102, 28, 34, 33, 30}, + { 31, 21, 108, 28, 35, 33, 31}, + { 47, 26, 34, 7, 34, 44, 75}, + { 80, 24, 34, 0, 41, 41, 50}, + { 84, 45, 31, 12, 40, 36, 36}, + { 49, 81, 31, 22, 37, 33, 32}, + { 28, 81, 51, 26, 35, 33, 31}, + { 28, 51, 81, 28, 34, 33, 30}, + { 29, 30, 101, 28, 35, 33, 31}, + { 31, 22, 107, 28, 35, 33, 32}, + { 48, 27, 34, 10, 40, 16, 97}, + { 75, 27, 34, 3, 42, 26, 66}, + { 77, 47, 33, 12, 40, 32, 43}, + { 49, 75, 36, 21, 37, 33, 35}, + { 32, 72, 55, 25, 36, 33, 32}, + { 30, 49, 81, 27, 35, 33, 31}, + { 30, 32, 98, 28, 35, 32, 32}, + { 31, 24, 104, 28, 35, 32, 33} + }, + { + { 36, 29, 33, 43, 47, 29, 31}, + { 74, 20, 35, 19, 47, 34, 32}, + { 92, 35, 32, 29, 31, 40, 34}, + { 53, 80, 26, 33, 28, 36, 37}, + { 24, 91, 41, 31, 31, 31, 38}, + { 25, 57, 74, 31, 32, 30, 37}, + { 32, 28, 99, 32, 32, 29, 36}, + { 34, 20, 105, 33, 32, 30, 35}, + { 50, 26, 34, 33, 74, 30, 31}, + { 75, 28, 33, 23, 46, 47, 33}, + { 64, 58, 29, 30, 26, 46, 40}, + { 31, 85, 37, 31, 27, 33, 44}, + { 22, 67, 64, 30, 31, 28, 42}, + { 29, 35, 93, 31, 32, 27, 40}, + { 33, 20, 105, 32, 33, 27, 37}, + { 34, 19, 106, 33, 32, 29, 36}, + { 51, 29, 33, 25, 72, 51, 30}, + { 61, 42, 31, 30, 31, 60, 39}, + { 40, 70, 34, 32, 24, 41, 50}, + { 22, 72, 54, 30, 31, 27, 50}, + { 25, 44, 83, 30, 33, 25, 44}, + { 32, 23, 102, 32, 33, 26, 40}, + { 34, 18, 107, 32, 33, 28, 37}, + { 34, 19, 105, 33, 32, 30, 35}, + { 45, 35, 32, 30, 39, 79, 33}, + { 43, 53, 33, 35, 24, 53, 55}, + { 27, 67, 45, 32, 29, 27, 61}, + { 22, 53, 72, 30, 33, 22, 52}, + { 28, 31, 95, 31, 33, 25, 43}, + { 32, 20, 105, 32, 33, 27, 38}, + { 34, 18, 107, 32, 32, 29, 36}, + { 34, 20, 105, 33, 31, 31, 35}, + { 38, 40, 32, 35, 23, 72, 54}, + { 31, 55, 39, 34, 29, 32, 73}, + { 22, 57, 60, 31, 35, 18, 64}, + { 25, 39, 86, 31, 35, 22, 49}, + { 30, 24, 101, 32, 33, 27, 40}, + { 33, 19, 106, 32, 32, 30, 36}, + { 34, 18, 107, 33, 31, 31, 35}, + { 34, 20, 104, 33, 31, 32, 34}, + { 33, 42, 35, 34, 28, 39, 82}, + { 26, 51, 50, 33, 34, 18, 80}, + { 23, 46, 74, 31, 35, 20, 59}, + { 27, 32, 93, 32, 34, 26, 44}, + { 31, 22, 103, 32, 32, 30, 37}, + { 33, 19, 106, 33, 31, 31, 35}, + { 34, 19, 106, 33, 31, 32, 34}, + { 35, 21, 103, 34, 31, 32, 34}, + { 29, 41, 41, 33, 34, 20, 92}, + { 24, 44, 62, 34, 35, 18, 73}, + { 24, 37, 83, 34, 33, 25, 52}, + { 28, 28, 97, 33, 32, 30, 40}, + { 32, 23, 103, 33, 31, 32, 36}, + { 34, 20, 105, 34, 30, 33, 34}, + { 35, 20, 104, 34, 30, 33, 33}, + { 35, 22, 102, 34, 30, 33, 34}, + { 27, 38, 51, 34, 34, 20, 86}, + { 26, 37, 71, 35, 34, 24, 64}, + { 27, 33, 87, 35, 32, 30, 47}, + { 30, 28, 96, 34, 31, 32, 39}, + { 32, 24, 100, 35, 30, 32, 36}, + { 34, 23, 101, 34, 30, 33, 34}, + { 35, 23, 101, 34, 30, 32, 34}, + { 34, 24, 99, 35, 30, 33, 34} + }, + { + { 39, 30, 31, 67, 33, 34, 31}, + { 72, 21, 32, 43, 39, 33, 31}, + { 100, 23, 32, 35, 39, 34, 31}, + { 75, 63, 24, 32, 38, 34, 32}, + { 32, 98, 26, 29, 37, 35, 32}, + { 22, 77, 55, 29, 36, 35, 31}, + { 31, 37, 90, 31, 35, 35, 32}, + { 35, 22, 100, 33, 33, 36, 33}, + { 47, 29, 32, 74, 54, 32, 31}, + { 71, 24, 32, 60, 50, 36, 30}, + { 86, 31, 30, 46, 48, 37, 30}, + { 65, 63, 25, 34, 46, 39, 30}, + { 33, 85, 32, 28, 43, 40, 30}, + { 26, 64, 60, 27, 39, 41, 30}, + { 33, 33, 87, 29, 35, 41, 31}, + { 37, 23, 93, 32, 33, 41, 32}, + { 41, 32, 32, 45, 84, 32, 32}, + { 55, 31, 32, 50, 70, 40, 30}, + { 62, 37, 31, 45, 61, 45, 29}, + { 53, 55, 31, 36, 55, 48, 29}, + { 38, 63, 40, 29, 48, 50, 28}, + { 34, 49, 60, 27, 43, 51, 29}, + { 38, 30, 78, 28, 38, 50, 31}, + { 40, 24, 83, 30, 36, 48, 33}, + { 35, 33, 33, 29, 75, 58, 29}, + { 39, 35, 33, 34, 68, 59, 29}, + { 41, 39, 34, 36, 61, 62, 29}, + { 41, 43, 37, 33, 54, 64, 28}, + { 41, 43, 45, 30, 48, 65, 29}, + { 42, 36, 56, 27, 44, 63, 30}, + { 42, 30, 65, 27, 41, 60, 33}, + { 42, 28, 68, 28, 37, 56, 36}, + { 33, 34, 33, 31, 42, 88, 30}, + { 31, 36, 34, 31, 44, 84, 31}, + { 31, 37, 35, 32, 43, 83, 31}, + { 35, 35, 39, 32, 40, 82, 31}, + { 40, 32, 44, 31, 38, 81, 31}, + { 44, 30, 48, 30, 37, 78, 33}, + { 44, 30, 52, 28, 37, 72, 36}, + { 43, 30, 55, 29, 35, 66, 40}, + { 32, 33, 33, 34, 25, 85, 48}, + { 30, 34, 34, 33, 25, 88, 44}, + { 30, 34, 36, 34, 25, 90, 41}, + { 33, 32, 38, 34, 25, 90, 40}, + { 38, 29, 41, 34, 26, 88, 40}, + { 42, 29, 41, 33, 27, 85, 41}, + { 43, 30, 42, 31, 28, 80, 43}, + { 42, 31, 45, 31, 30, 72, 47}, + { 32, 33, 33, 33, 26, 54, 79}, + { 31, 32, 34, 35, 20, 68, 68}, + { 32, 32, 35, 36, 17, 76, 62}, + { 34, 31, 36, 36, 17, 79, 59}, + { 37, 29, 37, 36, 18, 78, 58}, + { 39, 29, 37, 35, 20, 77, 58}, + { 41, 30, 37, 34, 22, 74, 58}, + { 40, 31, 40, 32, 26, 68, 59}, + { 33, 31, 34, 33, 29, 31, 98}, + { 34, 30, 34, 35, 23, 45, 88}, + { 34, 31, 34, 36, 20, 54, 82}, + { 35, 31, 34, 36, 18, 59, 78}, + { 36, 31, 34, 37, 19, 60, 76}, + { 38, 30, 34, 36, 20, 61, 74}, + { 39, 31, 35, 35, 22, 60, 73}, + { 39, 31, 37, 34, 24, 59, 71} + }, + { + { 30, 33, 32, 55, 32, 32, 32}, + { 47, 30, 31, 29, 36, 32, 32}, + { 81, 28, 32, 28, 34, 32, 32}, + { 85, 46, 29, 32, 32, 33, 32}, + { 54, 82, 26, 32, 32, 33, 32}, + { 30, 90, 38, 31, 32, 33, 32}, + { 30, 56, 73, 31, 33, 32, 32}, + { 37, 21, 102, 32, 32, 32, 32}, + { 33, 32, 31, 68, 39, 31, 31}, + { 38, 32, 31, 43, 34, 33, 31}, + { 63, 30, 31, 29, 34, 32, 32}, + { 82, 37, 30, 29, 33, 32, 32}, + { 71, 63, 27, 31, 32, 33, 32}, + { 44, 86, 30, 30, 33, 33, 32}, + { 33, 72, 55, 30, 32, 32, 31}, + { 37, 37, 86, 31, 32, 33, 31}, + { 34, 33, 32, 60, 61, 29, 32}, + { 36, 33, 31, 56, 38, 32, 31}, + { 51, 30, 31, 38, 33, 33, 32}, + { 75, 31, 31, 30, 33, 33, 32}, + { 80, 47, 29, 30, 32, 33, 31}, + { 60, 73, 27, 30, 33, 33, 31}, + { 41, 78, 41, 30, 33, 32, 31}, + { 38, 53, 68, 30, 32, 33, 31}, + { 33, 33, 32, 43, 77, 35, 30}, + { 35, 33, 31, 55, 54, 29, 32}, + { 43, 32, 31, 46, 39, 31, 32}, + { 64, 30, 31, 35, 34, 33, 32}, + { 79, 37, 30, 31, 32, 33, 31}, + { 73, 57, 28, 30, 32, 33, 31}, + { 54, 73, 33, 30, 32, 33, 31}, + { 43, 64, 52, 30, 32, 33, 31}, + { 33, 33, 32, 34, 68, 58, 28}, + { 34, 33, 31, 45, 70, 33, 31}, + { 38, 33, 31, 48, 52, 29, 32}, + { 54, 31, 31, 40, 39, 31, 32}, + { 73, 32, 31, 34, 34, 33, 31}, + { 77, 45, 29, 31, 32, 32, 32}, + { 65, 63, 30, 31, 31, 33, 31}, + { 51, 66, 42, 30, 32, 33, 31}, + { 33, 32, 32, 34, 44, 81, 31}, + { 34, 33, 31, 38, 66, 52, 28}, + { 36, 33, 30, 44, 62, 34, 31}, + { 47, 31, 31, 43, 48, 30, 32}, + { 64, 31, 31, 38, 38, 32, 32}, + { 75, 38, 30, 33, 34, 32, 32}, + { 71, 53, 30, 31, 32, 33, 32}, + { 59, 61, 37, 30, 32, 33, 32}, + { 33, 32, 31, 35, 31, 71, 54}, + { 34, 33, 31, 37, 49, 70, 33}, + { 36, 33, 31, 41, 60, 48, 30}, + { 43, 32, 31, 43, 54, 35, 31}, + { 56, 31, 31, 40, 44, 32, 32}, + { 68, 35, 30, 36, 37, 32, 32}, + { 70, 45, 30, 33, 34, 33, 32}, + { 63, 55, 35, 31, 33, 33, 32}, + { 33, 32, 31, 33, 34, 36, 87}, + { 34, 32, 31, 36, 38, 62, 52}, + { 36, 33, 31, 39, 50, 57, 36}, + { 41, 33, 31, 41, 53, 43, 33}, + { 50, 33, 31, 41, 48, 36, 32}, + { 59, 35, 31, 37, 41, 34, 32}, + { 65, 42, 31, 35, 36, 33, 32}, + { 62, 49, 35, 33, 34, 34, 33} + } +}; diff --git a/src/search.c b/src/search.c index 5c8be359..1bdc67d5 100644 --- a/src/search.c +++ b/src/search.c @@ -161,6 +161,8 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in to->intra.mode = cu->intra.mode; to->intra.mode_chroma = cu->intra.mode_chroma; to->intra.multi_ref_idx = cu->intra.multi_ref_idx; + to->intra.mip_flag = cu->intra.mip_flag; + to->intra.mip_is_transposed = cu->intra.mip_is_transposed; } else { to->skipped = cu->skipped; to->merged = cu->merged; @@ -502,7 +504,11 @@ static double calc_mode_bits(const encoder_state_t *state, kvz_intra_get_dir_luma_predictor(x, y, candidate_modes, cur_cu, left_cu, above_cu); } - double mode_bits = kvz_luma_mode_bits(state, cur_cu->intra.mode, candidate_modes, cur_cu->intra.multi_ref_idx); + int width = LCU_WIDTH >> depth; + int height = width; // TODO: height for non-square blocks + int num_mip_modes_half = NUM_MIP_MODES_HALF(width, height); + int mip_flag_ctx_id = kvz_get_mip_flag_context(x, y, width, height, lcu, NULL); + double mode_bits = kvz_luma_mode_bits(state, cur_cu->intra.mode, candidate_modes, cur_cu->intra.multi_ref_idx, num_mip_modes_half, mip_flag_ctx_id); if (((depth == 4 && x % 8 && y % 8) || (depth != 4)) && state->encoder_control->chroma_format != KVZ_CSP_400) { mode_bits += kvz_chroma_mode_bits(state, cur_cu->intra.mode_chroma, cur_cu->intra.mode); @@ -727,14 +733,18 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, int8_t intra_trafo; double intra_cost; uint8_t multi_ref_index = 0; + bool mip_flag = false; + bool mip_transposed = false; kvz_search_cu_intra(state, x, y, depth, lcu, - &intra_mode, &intra_trafo, &intra_cost, &multi_ref_index); + &intra_mode, &intra_trafo, &intra_cost, &multi_ref_index, &mip_flag, &mip_transposed); if (intra_cost < cost) { cost = intra_cost; cur_cu->type = CU_INTRA; cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N; cur_cu->intra.mode = intra_mode; cur_cu->intra.multi_ref_idx = multi_ref_index; + cur_cu->intra.mip_flag = mip_flag; + cur_cu->intra.mip_is_transposed = mip_transposed; //If the CU is not split from 64x64 block, the MTS is disabled for that CU. cur_cu->tr_idx = (depth > 0) ? intra_trafo : 0; @@ -746,12 +756,15 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, if (cur_cu->type == CU_INTRA) { assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN); cur_cu->intra.mode_chroma = cur_cu->intra.mode; + lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); kvz_intra_recon_cu(state, x, y, depth, cur_cu->intra.mode, -1, // skip chroma - NULL, NULL, cur_cu->intra.multi_ref_idx, lcu); + NULL, NULL, cur_cu->intra.multi_ref_idx, + cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed, + lcu); downsample_cclm_rec( state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] @@ -764,7 +777,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // into account, so there is less of a chanse of luma mode being // really bad for chroma. cclm_parameters_t cclm_params[2]; - if (ctrl->cfg.rdo >= 3) { + if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) { cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); } @@ -773,7 +786,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, x & ~7, y & ~7, // TODO: as does this depth, -1, cur_cu->intra.mode_chroma, // skip luma - NULL, cclm_params, 0, lcu); + NULL, cclm_params, 0, + cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed, + lcu); } } else if (cur_cu->type == CU_INTER) { @@ -908,6 +923,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // of the top left CU from the next depth. This should ensure that 64x64 // gets used, at least in the most obvious cases, while avoiding any // searching. + if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH && x + cu_width <= frame->width && y + cu_width <= frame->height && 0) { @@ -933,7 +949,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, x, y, depth, cur_cu->intra.mode, mode_chroma, - NULL,NULL, 0, lcu); + NULL,NULL, 0, cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed, + lcu); cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu); if (has_chroma) { diff --git a/src/search.h b/src/search.h index 3694a2ff..4eb5943f 100644 --- a/src/search.h +++ b/src/search.h @@ -44,6 +44,9 @@ #include "image.h" #include "constraint.h" +#define NUM_MIP_MODES_FULL(width, height) ((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12) +#define NUM_MIP_MODES_HALF(width, height) NUM_MIP_MODES_FULL((width), (height)) >> 1 + void kvz_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length); void kvz_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length); diff --git a/src/search_intra.c b/src/search_intra.c index 8615565a..87139b93 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -328,12 +328,14 @@ static double search_intra_trdepth(encoder_state_t * const state, continue; } } - + kvz_intra_recon_cu(state, x_px, y_px, depth, intra_mode, -1, - pred_cu, cclm_params, pred_cu->intra.multi_ref_idx, lcu); + pred_cu, cclm_params, pred_cu->intra.multi_ref_idx, + pred_cu->intra.mip_flag, pred_cu->intra.mip_is_transposed, + lcu); // TODO: Not sure if this should be 0 or 1 but at least seems to work with 1 if (pred_cu->tr_idx > 1) @@ -361,7 +363,9 @@ static double search_intra_trdepth(encoder_state_t * const state, x_px, y_px, depth, -1, chroma_mode, - pred_cu, cclm_params, 0, lcu); + pred_cu, cclm_params, 0, + pred_cu->intra.mip_flag, pred_cu->intra.mip_is_transposed, + lcu); best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); } pred_cu->tr_skip = best_tr_idx == MTS_SKIP; @@ -675,7 +679,7 @@ static int8_t search_intra_rough(encoder_state_t * const state, // affecting the halving search. int lambda_cost = (int)(state->lambda_sqrt + 0.5); for (int mode_i = 0; mode_i < modes_selected; ++mode_i) { - costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds, 0); + costs[mode_i] += lambda_cost * kvz_luma_mode_bits(state, modes[mode_i], intra_preds, 0, 0, 0); } #undef PARALLEL_BLKS @@ -715,16 +719,18 @@ static int8_t search_intra_rdo(encoder_state_t * const state, int8_t *intra_preds, int modes_to_check, int8_t modes[67], int8_t trafo[67], double costs[67], + int num_mip_modes_full, + int8_t mip_modes[32], int8_t mip_trafo[32], double mip_costs[32], lcu_t *lcu, uint8_t multi_ref_idx) { const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra); const int width = LCU_WIDTH >> depth; + const int height = width; // TODO: proper height for non-square blocks kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; - // TODO: height for non-square blocks - kvz_pixels_blit(orig, orig_block, width, width, origstride, width); + kvz_pixels_blit(orig, orig_block, width, height, origstride, width); // Check that the predicted modes are in the RDO mode list if (modes_to_check < 67) { @@ -749,38 +755,68 @@ static int8_t search_intra_rdo(encoder_state_t * const state, } } - for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { - int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds, multi_ref_idx); + // MIP_TODO: implement this inside the standard intra for loop. Code duplication is bad. + // MIP_TODO: loop through normal intra modes first + + for (int mip = 0; mip <= 1; mip++) { + const int transp_off = mip ? num_mip_modes_full >> 1 : 0; + uint8_t ctx_id = mip ? kvz_get_mip_flag_context(x_px, y_px, width, height, lcu, NULL) : 0; + uint8_t multi_ref_index = mip ? 0 : multi_ref_idx; + int *num_modes = mip ? &num_mip_modes_full : &modes_to_check; - costs[rdo_mode] = rdo_bitcost * (int)(state->lambda + 0.5); + for (uint8_t i = 0; i < *num_modes; i++) { + int8_t mode = mip ? mip_modes[i] : modes[i]; + double *mode_cost_p = mip ? &mip_costs[i] : &costs[i]; + int8_t *mode_trafo_p = mip ? &mip_trafo[i] : &trafo[i]; + int rdo_bitcost = kvz_luma_mode_bits(state, mode, intra_preds, multi_ref_index, transp_off, ctx_id); - // Perform transform split search and save mode RD cost for the best one. - cu_info_t pred_cu; - pred_cu.depth = depth; - pred_cu.type = CU_INTRA; - pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N); - pred_cu.intra.mode = modes[rdo_mode]; - pred_cu.intra.mode_chroma = modes[rdo_mode]; - pred_cu.intra.multi_ref_idx = multi_ref_idx; - pred_cu.joint_cb_cr = 0; - FILL(pred_cu.cbf, 0); + *mode_cost_p = rdo_bitcost * (int)(state->lambda + 0.5); - // Reset transform split data in lcu.cu for this area. - kvz_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth); + // Mip related stuff + // There can be 32 MIP modes, but only mode numbers [0, 15] are ever written to bitstream. + // Half of the modes [16, 31] are indicated with the separate transpose flag. + // Number of possible modes is less for larger blocks. + const bool is_transposed = mip ? (mode >= transp_off ? true : false) : 0; + int8_t pred_mode = (is_transposed ? mode - transp_off : mode); - double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, NULL, -1); - costs[rdo_mode] += mode_cost; - trafo[rdo_mode] = pred_cu.tr_idx; + // Perform transform split search and save mode RD cost for the best one. + cu_info_t pred_cu; + pred_cu.depth = depth; + pred_cu.type = CU_INTRA; + pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N); // TODO: non-square blocks + pred_cu.intra.mode = pred_mode; + pred_cu.intra.mode_chroma = pred_mode; + pred_cu.intra.multi_ref_idx = multi_ref_index; + pred_cu.intra.mip_is_transposed = is_transposed; + pred_cu.intra.mip_flag = mip ? true : false; + pred_cu.joint_cb_cr = 0; + FILL(pred_cu.cbf, 0); - // Early termination if no coefficients has to be coded - if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(pred_cu.cbf, depth)) { - modes_to_check = rdo_mode + 1; - break; + // Reset transform split data in lcu.cu for this area. + kvz_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth); + + double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, pred_mode, MAX_INT, &pred_cu, lcu, NULL, -1); + *mode_cost_p += mode_cost; + *mode_trafo_p = pred_cu.tr_idx; + + // Early termination if no coefficients has to be coded + if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(pred_cu.cbf, depth)) { + *num_modes = i + 1; + break; + } } } // Update order according to new costs kvz_sort_modes_intra_luma(modes, trafo, costs, modes_to_check); + bool use_mip = false; + if (num_mip_modes_full) { + kvz_sort_modes_intra_luma(mip_modes, mip_trafo, mip_costs, num_mip_modes_full); + if (costs[0] > mip_costs[0]) { + use_mip = true; + } + } + // The best transform split hierarchy is not saved anywhere, so to get the // transform split hierarchy the search has to be performed again with the @@ -790,57 +826,110 @@ static int8_t search_intra_rdo(encoder_state_t * const state, pred_cu.depth = depth; pred_cu.type = CU_INTRA; pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N); - pred_cu.intra.mode = modes[0]; - pred_cu.intra.mode_chroma = modes[0]; - pred_cu.intra.multi_ref_idx = multi_ref_idx; + if (use_mip) { + int transp_off = num_mip_modes_full >> 1; + bool is_transposed = (mip_modes[0] >= transp_off ? true : false); + int8_t pred_mode = (is_transposed ? mip_modes[0] - transp_off : mip_modes[0]); + pred_cu.intra.mode = pred_mode; + pred_cu.intra.mode_chroma = pred_mode; + pred_cu.intra.multi_ref_idx = 0; + pred_cu.intra.mip_flag = true; + pred_cu.intra.mip_is_transposed = is_transposed; + } + else { + pred_cu.intra.mode = modes[0]; + pred_cu.intra.mode_chroma = modes[0]; + pred_cu.intra.multi_ref_idx = multi_ref_idx; + pred_cu.intra.mip_flag = false; + pred_cu.intra.mip_is_transposed = false; + } FILL(pred_cu.cbf, 0); - search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, NULL, trafo[0]); + search_intra_trdepth(state, x_px, y_px, depth, tr_depth, pred_cu.intra.mode, MAX_INT, &pred_cu, lcu, NULL, trafo[0]); } + // TODO: modes to check does not consider mip modes. Maybe replace with array when mip search is optimized? return modes_to_check; } -double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds, const uint8_t multi_ref_idx) +double kvz_luma_mode_bits(const encoder_state_t *state, int8_t luma_mode, const int8_t *intra_preds, const uint8_t multi_ref_idx, const uint8_t num_mip_modes_half, int mip_flag_ctx_id) { double mode_bits = 0.0; - int8_t mode_in_preds = -1; - for (int i = 0; i < INTRA_MPM_COUNT; ++i) { - if (luma_mode == intra_preds[i]) { - mode_in_preds = i; - break; + bool enable_mip = state->encoder_control->cfg.mip; + bool mip_flag = enable_mip ? (num_mip_modes_half > 0 ? true : false) : false; + + // Mip flag cost must be calculated even if mip is not used in this block + if (enable_mip) { + // Make a copy of state->cabac for bit cost estimation. + cabac_data_t state_cabac_copy; + cabac_data_t* cabac; + memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t)); + // Clear data and set mode to count only + state_cabac_copy.only_count = 1; + state_cabac_copy.num_buffered_bytes = 0; + state_cabac_copy.bits_left = 23; + + cabac = &state_cabac_copy; + + // Do cabac writes as normal + const int transp_off = num_mip_modes_half; + const bool is_transposed = luma_mode >= transp_off ? true : false; + int8_t mip_mode = is_transposed ? luma_mode - transp_off : luma_mode; + + // Write MIP flag + cabac->cur_ctx = &(cabac->ctx.mip_flag[mip_flag_ctx_id]); + CABAC_BIN(cabac, mip_flag, "mip_flag"); + + if (mip_flag) { + // Write MIP transpose flag & mode + CABAC_BIN_EP(cabac, is_transposed, "mip_transposed"); + kvz_cabac_encode_trunc_bin(cabac, mip_mode, transp_off); } + + // Write is done. Get bit cost out of cabac + mode_bits += (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3); } - bool enable_mrl = state->encoder_control->cfg.mrl; - uint8_t multi_ref_index = enable_mrl ? multi_ref_idx : 0; - - const cabac_ctx_t* ctx = &(state->cabac.ctx.intra_luma_mpm_flag_model); - - if (multi_ref_index == 0) { - mode_bits += CTX_ENTROPY_FBITS(ctx, mode_in_preds != -1); - } - - // Add MRL bits. - if (enable_mrl && MAX_REF_LINE_IDX > 1) { - ctx = &(state->cabac.ctx.multi_ref_line[0]); - mode_bits += CTX_ENTROPY_FBITS(ctx, multi_ref_index != 0); - - if (multi_ref_index != 0 && MAX_REF_LINE_IDX > 2) { - ctx = &(state->cabac.ctx.multi_ref_line[1]); - mode_bits += CTX_ENTROPY_FBITS(ctx, multi_ref_index != 1); + if (!mip_flag) { + int8_t mode_in_preds = -1; + for (int i = 0; i < INTRA_MPM_COUNT; ++i) { + if (luma_mode == intra_preds[i]) { + mode_in_preds = i; + break; + } } - } - if (mode_in_preds != -1 || multi_ref_index != 0) { - ctx = &(state->cabac.ctx.luma_planar_model[0]); + bool enable_mrl = state->encoder_control->cfg.mrl; + uint8_t multi_ref_index = enable_mrl ? multi_ref_idx : 0; + + const cabac_ctx_t* ctx = &(state->cabac.ctx.intra_luma_mpm_flag_model); + if (multi_ref_index == 0) { - mode_bits += CTX_ENTROPY_FBITS(ctx, mode_in_preds>0); + mode_bits += CTX_ENTROPY_FBITS(ctx, mode_in_preds != -1); + } + + // Add MRL bits. + if (enable_mrl && MAX_REF_LINE_IDX > 1) { + ctx = &(state->cabac.ctx.multi_ref_line[0]); + mode_bits += CTX_ENTROPY_FBITS(ctx, multi_ref_index != 0); + + if (multi_ref_index != 0 && MAX_REF_LINE_IDX > 2) { + ctx = &(state->cabac.ctx.multi_ref_line[1]); + mode_bits += CTX_ENTROPY_FBITS(ctx, multi_ref_index != 1); + } + } + + if (mode_in_preds != -1 || multi_ref_index != 0) { + ctx = &(state->cabac.ctx.luma_planar_model[0]); + if (multi_ref_index == 0) { + mode_bits += CTX_ENTROPY_FBITS(ctx, mode_in_preds > 0); + } + mode_bits += MIN(4.0, mode_in_preds); + } + else { + mode_bits += 6.0; } - mode_bits += MIN(4.0,mode_in_preds); - } else { - mode_bits += 6.0; } return mode_bits; @@ -921,7 +1010,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, x_px, y_px, depth, -1, chroma.mode, // skip luma - NULL, NULL, 0, lcu); + NULL, NULL, 0, false, false, lcu); } else { @@ -954,7 +1043,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, x_px, y_px, depth, -1, chroma.mode, // skip luma - NULL, cclm_params, 0, lcu); + NULL, cclm_params, 0, false, false, lcu); } chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); @@ -1044,7 +1133,9 @@ void kvz_search_cu_intra(encoder_state_t * const state, int8_t *mode_out, int8_t *trafo_out, double *cost_out, - uint8_t *multi_ref_idx_out) + uint8_t *multi_ref_idx_out, + bool *mip_flag_out, + bool * mip_transposed_out) { const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; const int8_t cu_width = LCU_WIDTH >> depth; @@ -1081,6 +1172,29 @@ void kvz_search_cu_intra(encoder_state_t * const state, int8_t trafo[MAX_REF_LINE_IDX][67] = { 0 }; double costs[MAX_REF_LINE_IDX][67]; + bool enable_mip = state->encoder_control->cfg.mip; + // The maximum number of mip modes is 32. Max modes can be less depending on block size. + // Half of the possible modes are transposed, which is indicated by a separate transpose flag + int8_t mip_modes[32]; + int8_t mip_trafo[32]; + double mip_costs[32]; + + // The maximum number of possible MIP modes depend on block size & shape + int width = LCU_WIDTH >> depth; + int height = width; // TODO: proper height for non-square blocks. + int num_mip_modes = 0; + + if (enable_mip) { + for (int i = 0; i < 32; ++i) { + mip_modes[i] = i; + mip_costs[i] = MAX_INT; + } + // MIP is not allowed for 64 x 4 or 4 x 64 blocks + if (!((width == 64 && height == 4) || (width == 4 && height == 64))) { + num_mip_modes = NUM_MIP_MODES_FULL(width, height); + } + } + // Find best intra mode for 2Nx2N. kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; @@ -1132,24 +1246,37 @@ void kvz_search_cu_intra(encoder_state_t * const state, } for(int8_t line = 0; line < lines; ++line) { - // For extra reference lines, only check predicted modes + // For extra reference lines, only check predicted modes & no MIP search. if (line != 0) { number_of_modes_to_search = 0; + num_mip_modes = 0; } int num_modes_to_check = MIN(number_of_modes[line], number_of_modes_to_search); kvz_sort_modes(modes[line], costs[line], number_of_modes[line]); + // TODO: if rough search is implemented for MIP, sort mip_modes here. number_of_modes[line] = search_intra_rdo(state, x_px, y_px, depth, ref_pixels, LCU_WIDTH, candidate_modes, num_modes_to_check, - modes[line], trafo[line], costs[line], lcu, line); + modes[line], trafo[line], costs[line], + num_mip_modes, + mip_modes, mip_trafo, mip_costs, + lcu, line); } } uint8_t best_line = 0; double best_line_mode_cost = costs[0][0]; + uint8_t best_mip_mode_idx = 0; uint8_t best_mode_indices[MAX_REF_LINE_IDX]; + + int8_t tmp_best_mode; + int8_t tmp_best_trafo; + double tmp_best_cost; + bool tmp_mip_flag = false; + bool tmp_mip_transp = false; + for (int line = 0; line < lines; ++line) { best_mode_indices[line] = select_best_mode_index(modes[line], costs[line], number_of_modes[line]); if (best_line_mode_cost > costs[line][best_mode_indices[line]]) { @@ -1158,8 +1285,31 @@ void kvz_search_cu_intra(encoder_state_t * const state, } } - *mode_out = modes[best_line][best_mode_indices[best_line]]; - *trafo_out = trafo[best_line][best_mode_indices[best_line]]; - *cost_out = costs[best_line][best_mode_indices[best_line]]; - *multi_ref_idx_out = best_line; + tmp_best_mode = modes[best_line][best_mode_indices[best_line]]; + tmp_best_trafo = trafo[best_line][best_mode_indices[best_line]]; + tmp_best_cost = costs[best_line][best_mode_indices[best_line]]; + + if (num_mip_modes) { + best_mip_mode_idx = select_best_mode_index(mip_modes, mip_costs, num_mip_modes); + if (tmp_best_cost > mip_costs[best_mip_mode_idx]) { + tmp_best_mode = mip_modes[best_mip_mode_idx]; + tmp_best_trafo = mip_trafo[best_mip_mode_idx]; + tmp_best_cost = mip_costs[best_mip_mode_idx]; + tmp_mip_flag = true; + tmp_mip_transp = (tmp_best_mode >= (num_mip_modes >> 1)) ? 1 : 0; + } + } + + if (tmp_mip_flag) { + // Transform best mode index to proper form. + // Max mode index is half of max number of modes - 1 (i. e. for size id 2, max mode id is 5) + tmp_best_mode = (tmp_mip_transp ? tmp_best_mode - (num_mip_modes >> 1) : tmp_best_mode); + } + + *mode_out = tmp_best_mode; + *trafo_out = tmp_best_trafo; + *cost_out = tmp_best_cost; + *mip_flag_out = tmp_mip_flag; + *mip_transposed_out = tmp_mip_transp; + *multi_ref_idx_out = tmp_mip_flag ? 0 : best_line; } diff --git a/src/search_intra.h b/src/search_intra.h index 4fc7210d..659695b3 100644 --- a/src/search_intra.h +++ b/src/search_intra.h @@ -43,9 +43,8 @@ #include "global.h" // IWYU pragma: keep #include "intra.h" - double kvz_luma_mode_bits(const encoder_state_t *state, - int8_t luma_mode, const int8_t *intra_preds, uint8_t multi_ref_idx); + int8_t luma_mode, const int8_t *intra_preds, uint8_t multi_ref_idx, const uint8_t num_mip_modes, int mip_flag_ctx_id); double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, int8_t luma_mode); @@ -60,6 +59,8 @@ void kvz_search_cu_intra(encoder_state_t * const state, int8_t *mode_out, int8_t *trafo_out, double *cost_out, - uint8_t *multi_ref_idx_out); + uint8_t *multi_ref_idx_out, + bool *mip_flag, + bool *mip_transp); #endif // SEARCH_INTRA_H_ diff --git a/tests/test_intra.sh b/tests/test_intra.sh index 1d417223..3a560002 100755 --- a/tests/test_intra.sh +++ b/tests/test_intra.sh @@ -11,6 +11,7 @@ valgrind_test $common_args --rd=1 valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq valgrind_test $common_args --rd=2 --mrl +valgrind_test $common_args --rd=2 --mip valgrind_test $common_args --rd=3 valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0 valgrind_test $common_args --alf=full --wpp --threads=1