diff --git a/src/cu.h b/src/cu.h index cf2a4e9a..6b5bdf38 100644 --- a/src/cu.h +++ b/src/cu.h @@ -126,6 +126,13 @@ typedef struct uint16_t cbf; + /** + * \brief QP used for the CU. + * + * This is required for deblocking when per-LCU QPs are enabled. + */ + uint8_t qp; + union { struct { int8_t mode; diff --git a/src/encoderstate.c b/src/encoderstate.c index 967cfa8c..825a7373 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -196,7 +196,76 @@ static void encode_sao(encoder_state_t * const state, } -static void encoder_state_worker_encode_lcu(void * opaque) { +/** + * \brief Sets the QP for each CU in state->tile->frame->cu_array. + * + * The QPs are used in deblocking. + * + * The delta QP for an LCU is coded when the first CU with coded block flag + * set is encountered. Hence, for the purposes of deblocking, all CUs + * before the first one with cbf set use state->ref_qp and all CUs after + * that use state->qp. + * + * \param state encoder state + * \param x x-coordinate of the left edge of the root CU + * \param y y-coordinate of the top edge of the root CU + * \param depth depth in the CU quadtree + * \param coeffs_coded Used for tracking whether a CU with a residual + * has been encountered. Should be set to false at + * the top level. + * \return Whether there were any CUs with residual or not. + */ +static bool set_cu_qps(encoder_state_t *state, int x, int y, int depth, bool coeffs_coded) +{ + if (state->qp == state->ref_qp) { + // If the QPs are equal there is no need to care about the residuals. + coeffs_coded = true; + } + + cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y); + const int cu_width = LCU_WIDTH >> depth; + coeffs_coded = coeffs_coded || cbf_is_set_any(cu->cbf, cu->depth); + + if (!coeffs_coded && cu->depth > depth) { + // Recursively process sub-CUs. + const int d = cu_width >> 1; + coeffs_coded = set_cu_qps(state, x, y, depth + 1, coeffs_coded); + coeffs_coded = set_cu_qps(state, x + d, y, depth + 1, coeffs_coded); + coeffs_coded = set_cu_qps(state, x, y + d, depth + 1, coeffs_coded); + coeffs_coded = set_cu_qps(state, x + d, y + d, depth + 1, coeffs_coded); + + } else { + if (!coeffs_coded && cu->tr_depth > depth) { + // The CU is split into smaller transform units. Check whether coded + // block flag is set for any of the TUs. + const int tu_width = LCU_WIDTH >> cu->tr_depth; + for (int y_scu = y; y_scu < y + cu_width; y_scu += tu_width) { + for (int x_scu = x; x_scu < x + cu_width; x_scu += tu_width) { + cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu); + if (cbf_is_set_any(tu->cbf, cu->depth)) { + coeffs_coded = true; + } + } + } + } + + // Set the correct QP for all state->tile->frame->cu_array elements in + // the area covered by the CU. + const int8_t qp = coeffs_coded ? state->qp : state->ref_qp; + + for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) { + for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) { + kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp; + } + } + } + + return coeffs_coded; +} + + +static void encoder_state_worker_encode_lcu(void * opaque) +{ const lcu_order_element_t * const lcu = opaque; encoder_state_t *state = lcu->encoder_state; const encoder_control_t * const encoder = state->encoder_control; @@ -211,6 +280,10 @@ static void encoder_state_worker_encode_lcu(void * opaque) { encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search); if (encoder->deblock_enable) { + if (encoder->cfg->target_bitrate > 0) { + set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, false); + } + kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y); } diff --git a/src/filter.c b/src/filter.c index dce73199..d96db710 100644 --- a/src/filter.c +++ b/src/filter.c @@ -247,6 +247,27 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir) } } +static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir) +{ + if (state->encoder_control->cfg->target_bitrate <= 0) { + return state->qp; + } + + int32_t qp_p; + if (dir == EDGE_HOR && y > 0) { + qp_p = kvz_cu_array_at_const(state->tile->frame->cu_array, x, y - 1)->qp; + } else if (dir == EDGE_VER && x > 0) { + qp_p = kvz_cu_array_at_const(state->tile->frame->cu_array, x - 1, y)->qp; + } else { + qp_p = state->frame->QP; + } + + const int32_t qp_q = + kvz_cu_array_at_const(state->tile->frame->cu_array, x, y)->qp; + + return (qp_p + qp_q + 1) >> 1; +} + /** * \brief Apply the deblocking filter to luma pixels on a single edge. * @@ -290,8 +311,9 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, kvz_pixel *orig_src = &frame->rec->y[x + y*stride]; kvz_pixel *src = orig_src; + const int32_t qp = get_qp_y_pred(state, x, y, dir); + int8_t strength = 0; - int32_t qp = state->qp; int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8); int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1)); int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale; @@ -490,7 +512,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, }; int8_t strength = 2; - int32_t QP = kvz_g_chroma_scale[state->qp]; + const int32_t luma_qp = get_qp_y_pred(state, x << 1, y << 1, dir); + int32_t QP = kvz_g_chroma_scale[luma_qp]; int32_t bitdepth_scale = 1 << (encoder->bitdepth-8); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale;