diff --git a/src/search.c b/src/search.c index f26a0f0a..ebd550d1 100644 --- a/src/search.c +++ b/src/search.c @@ -393,6 +393,8 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, // block and return 0 cost for all others. return 0; } + int u_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 2) >> 1 : cbf_is_set(pred_cu->cbf, depth, COLOR_U); + int v_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 1) : cbf_is_set(pred_cu->cbf, depth, COLOR_V); // See luma for why the second condition if (!skip_residual_coding) { @@ -401,13 +403,10 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_cb[0]); cabac->cur_ctx = ctx; if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { - int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search"); } - int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); - ctx = &(cabac->ctx.qt_cbf_model_cr[is_set]); + ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]); if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { - int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search"); } } @@ -426,7 +425,7 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, } if (state->encoder_control->cfg.jccr) { - int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1; + int cbf_mask = u_is_set * 2 + v_is_set - 1; cabac_ctx_t* ctx = NULL; if (cbf_mask != -1) { cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac; @@ -448,13 +447,18 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, ssd = ssd_u + ssd_v; } - if (!skip_residual_coding) - { + if (!skip_residual_coding) { int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, 2, scan_order, 0); - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, 2, scan_order, 0); + if((pred_cu->joint_cb_cr & 3) == 0){ + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, 2, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, 2, scan_order, 0); + } + else { + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, 2, scan_order, 0); + + } } @@ -620,10 +624,10 @@ static double cu_rd_cost_tr_split_accurate( else { { int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x; - int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index], + int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index], LCU_WIDTH_C, LCU_WIDTH_C, chroma_width); - int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index], + int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index], LCU_WIDTH_C, LCU_WIDTH_C, chroma_width); chroma_ssd = ssd_u_joint + ssd_v_joint; @@ -1150,45 +1154,10 @@ static double search_cu( memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac)); - state->search_cabac.update = 1; - double split_bits = 0; if (depth < MAX_DEPTH) { - // Add cost of cu_split_flag. - const cu_info_t* left_cu = NULL, * above_cu = NULL; - if (x) { - if (x_local || tree_type != UVG_CHROMA_T) { - left_cu = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); - } - else { - left_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, (x >> 1) - 1, y >> 1); - } - } - if (y) { - if (y_local || tree_type != UVG_CHROMA_T) { - above_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1); - } - else { - above_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, x >> 1, (y >> 1) - 1); - } - } - uvg_write_split_flag( - state, - &state->search_cabac, - left_cu, - above_cu, - 1, - depth, - cu_width, - x >> (tree_type == UVG_CHROMA_T), - y >> (tree_type == UVG_CHROMA_T), - tree_type, - &split_bits); - } - state->search_cabac.update = 0; - split_cost += split_bits * state->lambda; // If skip mode was selected for the block, skip further search. // Skip mode means there's no coefficients in the block, so splitting @@ -1197,14 +1166,49 @@ static double search_cu( // the split costs at least as much as not splitting. if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { if (split_cost < cost) split_cost += search_cu(state, x, y, depth + 1, work_tree, tree_type); - if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y, depth + 1, work_tree, tree_type); - if (split_cost < cost) split_cost += search_cu(state, x, y + half_cu, depth + 1, work_tree, tree_type); - if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y + half_cu, depth + 1, work_tree, tree_type); + if (split_cost < cost || 1) split_cost += search_cu(state, x + half_cu, y, depth + 1, work_tree, tree_type); + if (split_cost < cost || 1) split_cost += search_cu(state, x, y + half_cu, depth + 1, work_tree, tree_type); + if (split_cost < cost || 1) split_cost += search_cu(state, x + half_cu, y + half_cu, depth + 1, work_tree, tree_type); } else { split_cost = INT_MAX; } + state->search_cabac.update = 1; + // Add cost of cu_split_flag. + const cu_info_t* left_cu = NULL, * above_cu = NULL; + if (x) { + if (x_local || tree_type != UVG_CHROMA_T) { + left_cu = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); + } + else { + left_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, (x >> 1) - 1, y >> 1); + } + } + if (y) { + if (y_local || tree_type != UVG_CHROMA_T) { + above_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1); + } + else { + above_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, x >> 1, (y >> 1) - 1); + } + } + uvg_write_split_flag( + state, + &state->search_cabac, + left_cu, + above_cu, + 1, + depth, + cu_width, + x >> (tree_type == UVG_CHROMA_T), + y >> (tree_type == UVG_CHROMA_T), + tree_type, + &split_bits); + } + + state->search_cabac.update = 0; + split_cost += split_bits * state->lambda; // If no search is not performed for this depth, try just the best mode // of the top left CU from the next depth. This should ensure that 64x64 // gets used, at least in the most obvious cases, while avoiding any diff --git a/src/search_intra.c b/src/search_intra.c index 215e64ab..d1b5a3b7 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -1844,6 +1844,7 @@ void uvg_search_cu_intra( search_data[i].cost = MAX_INT; } number_of_modes = UVG_NUM_INTRA_MODES; + num_regular_modes = UVG_NUM_INTRA_MODES; } uint8_t num_mrl_modes = 0;