mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
[mtt] Actually remove the last width dependency to depth
This commit is contained in:
parent
dcf879e5ed
commit
6a0864839c
|
@ -47,12 +47,13 @@
|
|||
#include "tables.h"
|
||||
#include "videoframe.h"
|
||||
|
||||
bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pred_cu)
|
||||
bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pred_cu, const cu_loc_t*
|
||||
const cu_loc)
|
||||
{
|
||||
uint32_t ts_max_size = 1 << state->encoder_control->cfg.trskip_max_size;
|
||||
const uint32_t max_size = 32; // CU::isIntra(cu) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE;
|
||||
const uint32_t cu_width = LCU_WIDTH >> pred_cu->depth;
|
||||
const uint32_t cu_height = LCU_WIDTH >> pred_cu->depth;
|
||||
const uint32_t cu_width = cu_loc->width;
|
||||
const uint32_t cu_height = cu_loc->height;
|
||||
//bool mts_allowed = cu.chType == CHANNEL_TYPE_LUMA && compID == COMPONENT_Y;
|
||||
|
||||
uint8_t mts_type = state->encoder_control->cfg.mts;
|
||||
|
@ -66,14 +67,16 @@ bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pr
|
|||
return mts_allowed;
|
||||
}
|
||||
|
||||
static void encode_mts_idx(encoder_state_t * const state,
|
||||
static void encode_mts_idx(
|
||||
encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const cu_info_t *const pred_cu)
|
||||
const cu_info_t *const pred_cu,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
//TransformUnit &tu = *cu.firstTU;
|
||||
int mts_idx = pred_cu->tr_idx;
|
||||
|
||||
if (uvg_is_mts_allowed(state, (cu_info_t* const )pred_cu) && mts_idx != MTS_SKIP
|
||||
if (uvg_is_mts_allowed(state, (cu_info_t* const )pred_cu, cu_loc) && mts_idx != MTS_SKIP
|
||||
&& !pred_cu->violates_mts_coeff_constraint
|
||||
&& pred_cu->mts_last_scan_pos
|
||||
)
|
||||
|
@ -498,7 +501,7 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
|
|||
|
||||
static void encode_chroma_tu(
|
||||
encoder_state_t* const state,
|
||||
const cu_loc_t *cu_loc,
|
||||
const cu_loc_t * const cu_loc,
|
||||
int depth,
|
||||
cu_info_t* cur_pu,
|
||||
int8_t* scan_idx,
|
||||
|
@ -541,8 +544,7 @@ static void encode_chroma_tu(
|
|||
}
|
||||
}
|
||||
else {
|
||||
// const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
|
||||
const coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
uvg_get_sub_coeff(coeff_uv, coeff->joint_uv, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
|
||||
if (state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)) {
|
||||
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
|
||||
|
@ -700,7 +702,7 @@ static void encode_transform_coeff(
|
|||
}
|
||||
*/
|
||||
|
||||
int8_t split = (LCU_WIDTH >> depth > TR_MAX_WIDTH);
|
||||
int8_t split = (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH);
|
||||
|
||||
const int cb_flag_y = tree_type != UVG_CHROMA_T ? cbf_is_set(cur_pu->cbf, depth, COLOR_Y) : 0;
|
||||
const int cb_flag_u = tree_type != UVG_LUMA_T ?( cur_pu->joint_cb_cr ? (cur_pu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U)) : 0;
|
||||
|
@ -1290,15 +1292,13 @@ bool uvg_write_split_flag(
|
|||
const cu_info_t * left_cu,
|
||||
const cu_info_t * above_cu,
|
||||
uint8_t split_flag,
|
||||
const cu_loc_t* const cu_loc,
|
||||
int depth,
|
||||
int cu_width,
|
||||
int x,
|
||||
int y,
|
||||
enum uvg_tree_type tree_type,
|
||||
double* bits_out)
|
||||
{
|
||||
uint16_t abs_x = x + (state->tile->offset_x >> (tree_type == UVG_CHROMA_T));
|
||||
uint16_t abs_y = y + (state->tile->offset_y >> (tree_type == UVG_CHROMA_T));
|
||||
uint16_t abs_x = (cu_loc->x + state->tile->offset_x) >> (tree_type == UVG_CHROMA_T);
|
||||
uint16_t abs_y = (cu_loc->y + state->tile->offset_y) >> (tree_type == UVG_CHROMA_T);
|
||||
double bits = 0;
|
||||
const encoder_control_t* const ctrl = state->encoder_control;
|
||||
// Implisit split flag when on border
|
||||
|
@ -1311,10 +1311,12 @@ bool uvg_write_split_flag(
|
|||
// ToDo: update this when btt is actually used
|
||||
bool allow_btt = false;// when mt_depth < MAX_BT_DEPTH
|
||||
|
||||
const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
|
||||
const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
|
||||
|
||||
uint8_t implicit_split_mode = UVG_NO_SPLIT;
|
||||
//bool implicit_split = border;
|
||||
bool bottom_left_available = ((abs_y + cu_width - 1) < (ctrl->in.height >> (tree_type == UVG_CHROMA_T)));
|
||||
bool bottom_left_available = ((abs_y + cu_height - 1) < (ctrl->in.height >> (tree_type == UVG_CHROMA_T)));
|
||||
bool top_right_available = ((abs_x + cu_width - 1) < (ctrl->in.width >> (tree_type == UVG_CHROMA_T)));
|
||||
|
||||
if (!bottom_left_available && !top_right_available && allow_qt) {
|
||||
|
@ -1349,11 +1351,11 @@ bool uvg_write_split_flag(
|
|||
if (no_split && allow_split) {
|
||||
// Get left and top block split_flags and if they are present and true, increase model number
|
||||
// ToDo: should use height and width to increase model, PU_GET_W() ?
|
||||
if (left_cu && LCU_WIDTH >> left_cu->depth < LCU_WIDTH >> depth) {
|
||||
if (left_cu && left_cu->depth > depth) {
|
||||
split_model++;
|
||||
}
|
||||
|
||||
if (above_cu && LCU_WIDTH >> above_cu->depth < LCU_WIDTH >> depth) {
|
||||
if (above_cu && above_cu->depth > depth) {
|
||||
split_model++;
|
||||
}
|
||||
|
||||
|
@ -1457,7 +1459,16 @@ void uvg_encode_coding_tree(
|
|||
// When not in MAX_DEPTH, insert split flag and split the blocks if needed
|
||||
if (depth != MAX_DEPTH && !(tree_type == UVG_CHROMA_T && depth == MAX_DEPTH -1)) {
|
||||
|
||||
const int split_flag = uvg_write_split_flag(state, cabac, left_cu, above_cu, (cur_cu->split_tree >> (split_tree.current_depth * 3)) & 7, depth, cu_width, x, y, tree_type,NULL);
|
||||
const int split_flag = uvg_write_split_flag(
|
||||
state,
|
||||
cabac,
|
||||
left_cu,
|
||||
above_cu,
|
||||
(cur_cu->split_tree >> (split_tree.current_depth * 3)) & 7,
|
||||
cu_loc,
|
||||
depth,
|
||||
tree_type,
|
||||
NULL);
|
||||
|
||||
if (split_flag || border) {
|
||||
const int half_luma = cu_loc->width / 2;
|
||||
|
@ -1597,8 +1608,8 @@ void uvg_encode_coding_tree(
|
|||
uvg_pixel *rec_base_v = &frame->rec->v[x / 2 + y / 2 * ctrl->in.width / 2];
|
||||
|
||||
// Luma
|
||||
for (unsigned y_px = 0; y_px < LCU_WIDTH >> depth; y_px++) {
|
||||
for (unsigned x_px = 0; x_px < LCU_WIDTH >> depth; x_px++) {
|
||||
for (unsigned y_px = 0; y_px < cu_height; y_px++) {
|
||||
for (unsigned x_px = 0; x_px < cu_width; x_px++) {
|
||||
uvg_bitstream_put(cabac->stream, base_y[x_px + y_px * ctrl->in.width], 8);
|
||||
rec_base_y[x_px + y_px * ctrl->in.width] = base_y[x_px + y_px * ctrl->in.width];
|
||||
}
|
||||
|
@ -1606,14 +1617,14 @@ void uvg_encode_coding_tree(
|
|||
|
||||
// Chroma
|
||||
if (ctrl->chroma_format != UVG_CSP_400) {
|
||||
for (unsigned y_px = 0; y_px < LCU_WIDTH >> (depth + 1); y_px++) {
|
||||
for (unsigned x_px = 0; x_px < LCU_WIDTH >> (depth + 1); x_px++) {
|
||||
for (unsigned y_px = 0; y_px < cu_loc->chroma_height; y_px++) {
|
||||
for (unsigned x_px = 0; x_px < cu_loc->chroma_width; x_px++) {
|
||||
uvg_bitstream_put(cabac->stream, base_u[x_px + y_px * (ctrl->in.width >> 1)], 8);
|
||||
rec_base_u[x_px + y_px * (ctrl->in.width >> 1)] = base_u[x_px + y_px * (ctrl->in.width >> 1)];
|
||||
}
|
||||
}
|
||||
for (unsigned y_px = 0; y_px < LCU_WIDTH >> (depth + 1); y_px++) {
|
||||
for (unsigned x_px = 0; x_px < LCU_WIDTH >> (depth + 1); x_px++) {
|
||||
for (unsigned y_px = 0; y_px < cu_loc->chroma_height; y_px++) {
|
||||
for (unsigned x_px = 0; x_px < cu_loc->chroma_width; x_px++) {
|
||||
uvg_bitstream_put(cabac->stream, base_v[x_px + y_px * (ctrl->in.width >> 1)], 8);
|
||||
rec_base_v[x_px + y_px * (ctrl->in.width >> 1)] = base_v[x_px + y_px * (ctrl->in.width >> 1)];
|
||||
}
|
||||
|
@ -1664,7 +1675,7 @@ void uvg_encode_coding_tree(
|
|||
encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 0, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc);
|
||||
}
|
||||
|
||||
encode_mts_idx(state, cabac, cur_cu);
|
||||
encode_mts_idx(state, cabac, cur_cu, cu_loc);
|
||||
|
||||
}
|
||||
} else if (cur_cu->type == CU_INTRA) {
|
||||
|
@ -1701,7 +1712,7 @@ void uvg_encode_coding_tree(
|
|||
if (tree_type != UVG_CHROMA_T) {
|
||||
bool lfnst_written = encode_lfnst_idx(state, cabac, cur_cu, x, y, depth, cu_width, cu_height, tree_type, COLOR_Y);
|
||||
}
|
||||
encode_mts_idx(state, cabac, cur_cu);
|
||||
encode_mts_idx(state, cabac, cur_cu, cu_loc);
|
||||
|
||||
// For 4x4 the chroma PU/TU is coded after the last
|
||||
if (state->encoder_control->chroma_format != UVG_CSP_400 &&
|
||||
|
@ -1731,7 +1742,7 @@ void uvg_encode_coding_tree(
|
|||
|
||||
end:
|
||||
|
||||
if (is_last_cu_in_qg(state, x, y, depth)) {
|
||||
if (is_last_cu_in_qg(state, cu_loc)) {
|
||||
state->last_qp = cur_cu->qp;
|
||||
}
|
||||
|
||||
|
@ -1752,10 +1763,8 @@ double uvg_mock_encode_coding_unit(
|
|||
|
||||
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
|
||||
int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T);
|
||||
int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T);
|
||||
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T);
|
||||
int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T);
|
||||
|
||||
const cu_info_t* left_cu = NULL, *above_cu = NULL;
|
||||
if (x) {
|
||||
|
@ -1787,16 +1796,14 @@ double uvg_mock_encode_coding_unit(
|
|||
left_cu,
|
||||
above_cu,
|
||||
0,
|
||||
cu_loc,
|
||||
depth,
|
||||
cu_width >> (tree_type == UVG_CHROMA_T),
|
||||
x >> (tree_type == UVG_CHROMA_T),
|
||||
y >> (tree_type == UVG_CHROMA_T),
|
||||
tree_type,
|
||||
&bits);
|
||||
}
|
||||
|
||||
// Encode skip flag
|
||||
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
|
||||
if (state->frame->slicetype != UVG_SLICE_I && (cu_loc->width != 4 || cu_loc->height != 4)) {
|
||||
int8_t ctx_skip = 0;
|
||||
|
||||
if (left_cu && left_cu->skipped) {
|
||||
|
@ -1829,7 +1836,7 @@ double uvg_mock_encode_coding_unit(
|
|||
}
|
||||
}
|
||||
// Prediction mode
|
||||
if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) {
|
||||
if (state->frame->slicetype != UVG_SLICE_I && (cu_loc->width != 4 || cu_loc->height != 4)) {
|
||||
|
||||
int8_t ctx_predmode = 0;
|
||||
|
||||
|
|
|
@ -40,7 +40,8 @@
|
|||
#include "encoderstate.h"
|
||||
#include "global.h"
|
||||
|
||||
bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu);
|
||||
bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu, const cu_loc_t*
|
||||
const cu_loc);
|
||||
bool uvg_is_lfnst_allowed(
|
||||
const encoder_state_t* const state,
|
||||
const cu_info_t* const pred_cu,
|
||||
|
@ -105,10 +106,8 @@ bool uvg_write_split_flag(
|
|||
const cu_info_t* left_cu,
|
||||
const cu_info_t* above_cu,
|
||||
uint8_t split_flag,
|
||||
const cu_loc_t* const cu_loc,
|
||||
int depth,
|
||||
int cu_width,
|
||||
int x,
|
||||
int y,
|
||||
enum uvg_tree_type tree_type,
|
||||
double* bits_out);
|
||||
|
||||
|
|
|
@ -627,36 +627,45 @@ static void encode_sao(encoder_state_t * const state,
|
|||
* \param prev_qp -1 if QP delta has not been coded in current QG,
|
||||
* otherwise the QP of the current QG
|
||||
*/
|
||||
static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp)
|
||||
static void set_cu_qps(encoder_state_t *state, const cu_loc_t* const cu_loc, int *last_qp, int *prev_qp, const
|
||||
int depth)
|
||||
{
|
||||
|
||||
// Stop recursion if the CU is completely outside the frame.
|
||||
if (x >= state->tile->frame->width || y >= state->tile->frame->height) return;
|
||||
if (cu_loc->x >= state->tile->frame->width || cu_loc->y >= state->tile->frame->height) return;
|
||||
|
||||
cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, x, y);
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, cu_loc->x, cu_loc->y);
|
||||
const int width = LCU_WIDTH >> cu->depth;
|
||||
|
||||
if (depth <= state->frame->max_qp_delta_depth) {
|
||||
*prev_qp = -1;
|
||||
}
|
||||
|
||||
if (cu->depth > depth) {
|
||||
if (cu_loc->width > width) {
|
||||
// Recursively process sub-CUs.
|
||||
const int d = cu_width >> 1;
|
||||
set_cu_qps(state, x, y, depth + 1, last_qp, prev_qp);
|
||||
set_cu_qps(state, x + d, y, depth + 1, last_qp, prev_qp);
|
||||
set_cu_qps(state, x, y + d, depth + 1, last_qp, prev_qp);
|
||||
set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp);
|
||||
const int half_width = cu_loc->width >> 1;
|
||||
const int half_height = cu_loc->height >> 1;
|
||||
cu_loc_t split_cu_loc;
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
|
||||
set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1);
|
||||
|
||||
} else {
|
||||
bool cbf_found = *prev_qp >= 0;
|
||||
|
||||
int y_limit = cu_loc->y + cu_loc->height;
|
||||
int x_limit = cu_loc->x + cu_loc->width;
|
||||
if (cu->tr_depth > depth) {
|
||||
// The CU is split into smaller transform units. Check whether coded
|
||||
// block flag is set for any of the TUs.
|
||||
const int tu_width = LCU_WIDTH >> cu->tr_depth;
|
||||
for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) {
|
||||
for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) {
|
||||
for (int y_scu = cu_loc->y; !cbf_found && y_scu < y_limit; y_scu += tu_width) {
|
||||
for (int x_scu = cu_loc->x; !cbf_found && x_scu < x_limit; x_scu += tu_width) {
|
||||
cu_info_t *tu = uvg_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu);
|
||||
if (cbf_is_set_any(tu->cbf, cu->depth)) {
|
||||
cbf_found = true;
|
||||
|
@ -671,18 +680,18 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
|
|||
if (cbf_found) {
|
||||
*prev_qp = qp = cu->qp;
|
||||
} else {
|
||||
qp = uvg_get_cu_ref_qp(state, x, y, *last_qp);
|
||||
qp = uvg_get_cu_ref_qp(state, cu_loc->x, cu_loc->y, *last_qp);
|
||||
}
|
||||
|
||||
// Set the correct QP for all state->tile->frame->cu_array elements in
|
||||
// the area covered by the CU.
|
||||
for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) {
|
||||
for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) {
|
||||
for (int y_scu = cu_loc->y; y_scu < y_limit; y_scu += SCU_WIDTH) {
|
||||
for (int x_scu = cu_loc->x; x_scu < x_limit; x_scu += SCU_WIDTH) {
|
||||
uvg_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_last_cu_in_qg(state, x, y, depth)) {
|
||||
if (is_last_cu_in_qg(state, cu_loc)) {
|
||||
*last_qp = cu->qp;
|
||||
}
|
||||
}
|
||||
|
@ -812,7 +821,9 @@ static void encoder_state_worker_encode_lcu_search(void * opaque)
|
|||
if (state->frame->max_qp_delta_depth >= 0) {
|
||||
int last_qp = state->last_qp;
|
||||
int prev_qp = -1;
|
||||
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
|
||||
cu_loc_t cu_loc;
|
||||
uvg_cu_loc_ctor(&cu_loc, lcu->position_px.x, lcu->position_px.y, LCU_WIDTH, LCU_WIDTH);
|
||||
set_cu_qps(state, &cu_loc, &last_qp, &prev_qp, 0);
|
||||
}
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.sliceReshaperEnableFlag) {
|
||||
|
|
|
@ -401,14 +401,13 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
|
|||
* \param depth depth in the CU tree
|
||||
* \return true, if it's the last CU in its QG, otherwise false
|
||||
*/
|
||||
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth)
|
||||
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, const cu_loc_t* const cu_loc)
|
||||
{
|
||||
if (state->frame->max_qp_delta_depth < 0) return false;
|
||||
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth;
|
||||
const int right = x + cu_width;
|
||||
const int bottom = y + cu_width;
|
||||
const int right = cu_loc->x + cu_loc->width;
|
||||
const int bottom = cu_loc->y + cu_loc->height;
|
||||
return (right % qg_width == 0 || right >= state->tile->frame->width) &&
|
||||
(bottom % qg_width == 0 || bottom >= state->tile->frame->height);
|
||||
}
|
||||
|
|
|
@ -856,8 +856,7 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
uint8_t max_filter_length_Q = 0;
|
||||
const int cu_size = LCU_WIDTH >> cu_q->depth;
|
||||
// TODO: NON square
|
||||
const int pu_size = dir == EDGE_HOR ? cu_size
|
||||
: cu_size;
|
||||
const int pu_size = dir == EDGE_HOR ? cu_size : cu_size;
|
||||
const int pu_pos = dir == EDGE_HOR ? y_coord
|
||||
: x_coord;
|
||||
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
|
||||
|
|
|
@ -273,7 +273,6 @@ typedef int32_t mv_t;
|
|||
#define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value))
|
||||
#define CLIP_TO_QP(value) CLIP(0, 51, (value))
|
||||
#define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; }
|
||||
#define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth)
|
||||
#define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))
|
||||
#define CEILDIV(x,y) (((x) + (y) - 1) / (y))
|
||||
|
||||
|
|
|
@ -1555,7 +1555,7 @@ void uvg_intra_predict(
|
|||
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width);
|
||||
if (data->pred_cu.depth != data->pred_cu.tr_depth || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
|
||||
predict_cclm(
|
||||
state, color, width, width, x, y, stride, intra_mode, lcu, refs, dst,
|
||||
state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst,
|
||||
(cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1],
|
||||
tree_type);
|
||||
}
|
||||
|
|
|
@ -297,7 +297,7 @@ out:
|
|||
static INLINE double get_coeff_cabac_cost(
|
||||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
cu_loc_t *cu_loc,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip,
|
||||
|
@ -415,7 +415,7 @@ double uvg_get_coeff_cost(
|
|||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
cu_info_t* cur_tu,
|
||||
cu_loc_t *cu_loc,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip,
|
||||
|
@ -1409,7 +1409,6 @@ void uvg_rdoq(
|
|||
int8_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t block_type,
|
||||
int8_t tr_depth,
|
||||
uint16_t cbf,
|
||||
uint8_t lfnst_idx)
|
||||
{
|
||||
|
|
|
@ -60,7 +60,6 @@ void uvg_rdoq(
|
|||
int8_t type,
|
||||
int8_t scan_mode,
|
||||
int8_t block_type,
|
||||
int8_t tr_depth,
|
||||
uint16_t cbf,
|
||||
uint8_t lfnst_idx);
|
||||
|
||||
|
@ -73,7 +72,7 @@ double uvg_get_coeff_cost(
|
|||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
cu_info_t* cur_tu,
|
||||
cu_loc_t *cu_loc,
|
||||
const cu_loc_t* const cu_loc,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip,
|
||||
|
|
329
src/search.c
329
src/search.c
|
@ -63,30 +63,39 @@
|
|||
static const int INTRA_THRESHOLD = 8;
|
||||
|
||||
|
||||
static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
|
||||
static INLINE void copy_cu_info(lcu_t *from, lcu_t *to, const cu_loc_t* const cu_loc, enum uvg_tree_type
|
||||
tree_type)
|
||||
{
|
||||
for (int y = y_local; y < y_local + width; y += SCU_WIDTH) {
|
||||
for (int x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
||||
const int y_limit = (cu_loc->local_y + cu_loc->height) >> (tree_type == UVG_CHROMA_T);
|
||||
const int x_limit = (cu_loc->local_x + cu_loc->width) >> (tree_type == UVG_CHROMA_T);
|
||||
for (int y = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); y < y_limit; y += SCU_WIDTH) {
|
||||
for (int x = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); x < x_limit; x += SCU_WIDTH) {
|
||||
*LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *from, lcu_t *to, enum uvg_tree_type
|
||||
static INLINE void copy_cu_pixels(
|
||||
lcu_t *from,
|
||||
lcu_t *to,
|
||||
const cu_loc_t* const cu_loc,
|
||||
enum uvg_tree_type
|
||||
tree_type)
|
||||
{
|
||||
const int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T);
|
||||
const int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T);
|
||||
const int luma_index = x_local + y_local * LCU_WIDTH;
|
||||
const int chroma_index = tree_type == UVG_CHROMA_T ? x_local + y_local * LCU_WIDTH_C : (x_local / 2) + (y_local / 2) * LCU_WIDTH_C;
|
||||
|
||||
if(tree_type != UVG_CHROMA_T) {
|
||||
uvg_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index],
|
||||
width, width, LCU_WIDTH, LCU_WIDTH);
|
||||
cu_loc->width, cu_loc->height, LCU_WIDTH, LCU_WIDTH);
|
||||
}
|
||||
if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
|
||||
uvg_pixels_blit(&from->rec.u[chroma_index], &to->rec.u[chroma_index],
|
||||
width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
||||
cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
uvg_pixels_blit(&from->rec.v[chroma_index], &to->rec.v[chroma_index],
|
||||
width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
||||
cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,8 +112,8 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to
|
|||
|
||||
if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
|
||||
//const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
|
||||
const int chroma_x = cu_loc->x >> (tree_type != UVG_CHROMA_T);
|
||||
const int chroma_y = cu_loc->y >> (tree_type != UVG_CHROMA_T);
|
||||
const int chroma_x = (cu_loc->x >> 1) & ~3;
|
||||
const int chroma_y = (cu_loc->y >> 1) & ~3;
|
||||
|
||||
const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C);
|
||||
copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
|
||||
|
@ -118,15 +127,17 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to
|
|||
/**
|
||||
* Copy all non-reference CU data from next level to current level.
|
||||
*/
|
||||
static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree, bool joint, enum
|
||||
uvg_tree_type tree_type)
|
||||
static void work_tree_copy_up(
|
||||
lcu_t *work_tree,
|
||||
bool joint,
|
||||
enum
|
||||
uvg_tree_type tree_type,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int depth)
|
||||
{
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
cu_loc_t loc;
|
||||
uvg_cu_loc_ctor(&loc, x_local, y_local, width, width);
|
||||
copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
|
||||
copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth], tree_type);
|
||||
copy_cu_coeffs(&loc, &work_tree[depth + 1], &work_tree[depth], joint, tree_type);
|
||||
copy_cu_info (&work_tree[depth + 1], &work_tree[depth], cu_loc, tree_type);
|
||||
copy_cu_pixels(&work_tree[depth + 1], &work_tree[depth], cu_loc, tree_type);
|
||||
copy_cu_coeffs(cu_loc, &work_tree[depth + 1], &work_tree[depth], joint, tree_type);
|
||||
|
||||
}
|
||||
|
||||
|
@ -134,24 +145,32 @@ static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_t
|
|||
/**
|
||||
* Copy all non-reference CU data from current level to all lower levels.
|
||||
*/
|
||||
static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t *work_tree, enum uvg_tree_type
|
||||
tree_type)
|
||||
static void work_tree_copy_down(
|
||||
int depth,
|
||||
lcu_t *work_tree,
|
||||
enum uvg_tree_type
|
||||
tree_type,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
const int width = tree_type != UVG_CHROMA_T ? LCU_WIDTH >> depth : LCU_WIDTH_C >> 1;
|
||||
for (int i = depth + 1; i <= MAX_PU_DEPTH; i++) {
|
||||
copy_cu_info (x_local, y_local, width, &work_tree[depth], &work_tree[i]);
|
||||
copy_cu_pixels(x_local, y_local, LCU_WIDTH >> depth, &work_tree[depth], &work_tree[i], tree_type);
|
||||
copy_cu_info (&work_tree[depth], &work_tree[i], cu_loc, tree_type);
|
||||
copy_cu_pixels(&work_tree[depth], &work_tree[i], cu_loc, tree_type);
|
||||
}
|
||||
}
|
||||
|
||||
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type
|
||||
void uvg_lcu_fill_trdepth(
|
||||
lcu_t *lcu,
|
||||
const cu_loc_t* const cu_loc,
|
||||
uint8_t tr_depth,
|
||||
enum uvg_tree_type
|
||||
tree_type)
|
||||
{
|
||||
const int x_local = SUB_SCU(x_px);
|
||||
const int y_local = SUB_SCU(y_px);
|
||||
const unsigned width = (tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C) >> depth;
|
||||
const int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T);
|
||||
const int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T);
|
||||
const unsigned width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
|
||||
const unsigned height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
|
||||
|
||||
for (unsigned y = 0; y < width; y += SCU_WIDTH) {
|
||||
for (unsigned y = 0; y < height; y += SCU_WIDTH) {
|
||||
for (unsigned x = 0; x < width; x += SCU_WIDTH) {
|
||||
LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y)->tr_depth = tr_depth;
|
||||
}
|
||||
|
@ -167,6 +186,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
|
|||
to->type = cu->type;
|
||||
to->depth = cu->depth;
|
||||
to->qp = cu->qp;
|
||||
to->split_tree = cu->split_tree;
|
||||
//to->tr_idx = cu->tr_idx;
|
||||
to->lfnst_idx = cu->lfnst_idx;
|
||||
to->lfnst_last_scan_pos = cu->lfnst_last_scan_pos;
|
||||
|
@ -214,34 +234,37 @@ static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned wid
|
|||
|
||||
|
||||
//Calculates cost for all zero coeffs
|
||||
static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree, const int x, const int y,
|
||||
static double cu_zero_coeff_cost(
|
||||
const encoder_state_t *state,
|
||||
lcu_t *work_tree,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int depth)
|
||||
{
|
||||
int x_local = SUB_SCU(x);
|
||||
int y_local = SUB_SCU(y);
|
||||
int cu_width = LCU_WIDTH >> depth;
|
||||
lcu_t *const lcu = &work_tree[depth];
|
||||
|
||||
const int y_local = cu_loc->local_y;
|
||||
const int x_local = cu_loc->local_x;
|
||||
|
||||
const int luma_index = y_local * LCU_WIDTH + x_local;
|
||||
const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2);
|
||||
|
||||
double ssd = 0.0;
|
||||
ssd += UVG_LUMA_MULT * uvg_pixels_calc_ssd(
|
||||
&lcu->ref.y[luma_index], &lcu->rec.y[luma_index],
|
||||
LCU_WIDTH, LCU_WIDTH, cu_width
|
||||
LCU_WIDTH, LCU_WIDTH, cu_loc->width
|
||||
);
|
||||
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
if (y_local % 8 == 0 && x_local % 8 == 0 && state->encoder_control->chroma_format != UVG_CSP_400) {
|
||||
ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd(
|
||||
&lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_loc->chroma_width
|
||||
);
|
||||
ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd(
|
||||
&lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2
|
||||
LCU_WIDTH_C, LCU_WIDTH_C, cu_loc->chroma_width
|
||||
);
|
||||
}
|
||||
// Save the pixels at a lower level of the working tree.
|
||||
copy_cu_pixels(x_local, y_local, cu_width, lcu, &work_tree[depth + 1], UVG_BOTH_T);
|
||||
copy_cu_pixels(lcu, &work_tree[depth + 1], cu_loc, UVG_BOTH_T);
|
||||
|
||||
return ssd;
|
||||
}
|
||||
|
@ -295,46 +318,45 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
|
|||
* Takes into account SSD of reconstruction and the cost of encoding whatever
|
||||
* prediction unit data needs to be coded.
|
||||
*/
|
||||
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
double uvg_cu_rd_cost_luma(
|
||||
const encoder_state_t *const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu,
|
||||
uint8_t isp_cbf)
|
||||
{
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
const int height = width; // TODO: height for non-square blocks
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
|
||||
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
|
||||
|
||||
cu_loc_t loc;
|
||||
uvg_cu_loc_ctor(&loc, x_px, y_px, width, height);
|
||||
|
||||
// cur_cu is used for TU parameters.
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y);
|
||||
|
||||
double coeff_bits = 0;
|
||||
double tr_tree_bits = 0;
|
||||
|
||||
// Check that lcu is not in
|
||||
assert(x_px >= 0 && x_px < LCU_WIDTH);
|
||||
assert(y_px >= 0 && y_px < LCU_WIDTH);
|
||||
|
||||
const uint8_t tr_depth = tr_cu->tr_depth - depth;
|
||||
|
||||
if (tr_depth > 0) {
|
||||
int offset = width / 2;
|
||||
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
|
||||
double sum = 0;
|
||||
const int half_width = cu_loc->width >> 1;
|
||||
const int half_height = cu_loc->height >> 1;
|
||||
cu_loc_t split_cu_loc;
|
||||
|
||||
sum += uvg_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu, isp_cbf);
|
||||
sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, isp_cbf);
|
||||
sum += uvg_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, isp_cbf);
|
||||
sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, isp_cbf);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y+ half_height, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf);
|
||||
|
||||
return sum + tr_tree_bits * state->lambda;
|
||||
}
|
||||
|
||||
// Add transform_tree cbf_luma bit cost.
|
||||
if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
|
||||
const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
const int is_tr_split = tr_cu->tr_depth - tr_cu->depth;
|
||||
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y);
|
||||
if (pred_cu->type == CU_INTRA ||
|
||||
|
@ -347,7 +369,9 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search");
|
||||
}
|
||||
|
||||
if (is_set && state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size)) {
|
||||
if (is_set && state->encoder_control->cfg.trskip_enable
|
||||
&& cu_loc->width <= (1 << state->encoder_control->cfg.trskip_max_size)
|
||||
&& cu_loc->height <= (1 << state->encoder_control->cfg.trskip_max_size)) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, pred_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag");
|
||||
}
|
||||
}
|
||||
|
@ -367,28 +391,28 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
// SSD between reconstruction and original
|
||||
int ssd = 0;
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = y_px * LCU_WIDTH + x_px;
|
||||
int index = cu_loc->local_y * LCU_WIDTH + cu_loc->local_x;
|
||||
ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
|
||||
LCU_WIDTH, LCU_WIDTH,
|
||||
width);
|
||||
cu_loc->width);
|
||||
}
|
||||
|
||||
|
||||
if (!skip_residual_coding) {
|
||||
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
|
||||
int8_t luma_scan_mode = SCAN_DIAG;
|
||||
if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
|
||||
//const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
|
||||
const coeff_t* coeffs = lcu->coeff.y;
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, cu_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU);
|
||||
}
|
||||
else {
|
||||
int split_type = pred_cu->intra.isp_mode;
|
||||
int split_limit = uvg_get_isp_split_num(width, height, split_type, true);
|
||||
int split_limit = uvg_get_isp_split_num(cu_loc->width, cu_loc->height, split_type, true);
|
||||
|
||||
for (int i = 0; i < split_limit; ++i) {
|
||||
cu_loc_t split_loc;
|
||||
uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true);
|
||||
uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height, i, split_type, true);
|
||||
const int part_x = split_loc.x;
|
||||
const int part_y = split_loc.y;
|
||||
|
||||
|
@ -406,34 +430,32 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
|
||||
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
double uvg_cu_rd_cost_chroma(
|
||||
const encoder_state_t *const state,
|
||||
cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu)
|
||||
lcu_t *const lcu,
|
||||
const cu_loc_t * const cu_loc)
|
||||
{
|
||||
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
|
||||
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||
const int height = width; // TODO: height for non-square blocks
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
const vector2d_t lcu_px = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 };
|
||||
const int width = cu_loc->chroma_width;
|
||||
const int height = cu_loc->chroma_height;
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
|
||||
|
||||
cu_loc_t loc;
|
||||
uvg_cu_loc_ctor(&loc, x_px, y_px, width, height);
|
||||
|
||||
double tr_tree_bits = 0;
|
||||
double coeff_bits = 0;
|
||||
|
||||
assert(x_px >= 0 && x_px < LCU_WIDTH);
|
||||
assert(y_px >= 0 && y_px < LCU_WIDTH);
|
||||
|
||||
if (depth == 4 && (x_px % 8 == 0 || y_px % 8 == 0)) {
|
||||
if (cu_loc->width == 4 && cu_loc->height == 4 && (cu_loc->x % 8 == 0 || cu_loc->y % 8 == 0)) {
|
||||
// For MAX_PU_DEPTH calculate chroma for previous depth for the first
|
||||
// block and return 0 cost for all others.
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
int u_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 2) >> 1 : cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
int v_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 1) : cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
|
||||
|
||||
// See luma for why the second condition
|
||||
if (!skip_residual_coding) {
|
||||
const int tr_depth = depth - pred_cu->depth;
|
||||
|
@ -450,14 +472,21 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
|
||||
if (tr_cu->tr_depth > depth) {
|
||||
int offset = LCU_WIDTH >> (depth + 1);
|
||||
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
|
||||
double sum = 0;
|
||||
// Recursively process sub-CUs.
|
||||
const int half_width = cu_loc->width >> 1;
|
||||
const int half_height = cu_loc->height >> 1;
|
||||
cu_loc_t split_cu_loc;
|
||||
|
||||
sum += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu);
|
||||
sum += uvg_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
|
||||
sum += uvg_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
|
||||
sum += uvg_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
|
||||
sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc);
|
||||
|
||||
return sum + tr_tree_bits * state->lambda;
|
||||
}
|
||||
|
@ -487,14 +516,17 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
|
||||
if (!skip_residual_coding) {
|
||||
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
//const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
|
||||
// We need the rounded & shifted coordinates for the chroma coeff calculation
|
||||
cu_loc_t chroma_loc;
|
||||
uvg_cu_loc_ctor(&chroma_loc, lcu_px.x, lcu_px.y, cu_loc->width, cu_loc->height);
|
||||
|
||||
if((pred_cu->joint_cb_cr & 3) == 0){
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, NULL, cu_loc, 2, scan_order, 0, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, NULL, cu_loc, 2, scan_order, 0, COEFF_ORDER_CU);
|
||||
}
|
||||
else {
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, NULL, cu_loc, 2, scan_order, 0, COEFF_ORDER_CU);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -507,31 +539,22 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
|
||||
static double cu_rd_cost_tr_split_accurate(
|
||||
const encoder_state_t* const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
const cu_info_t* const pred_cu,
|
||||
lcu_t* const lcu,
|
||||
enum uvg_tree_type tree_type,
|
||||
uint8_t isp_cbf) {
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
const int height = width; // TODO: height for non-square blocks
|
||||
|
||||
cu_loc_t loc;
|
||||
uvg_cu_loc_ctor(&loc, x_px, y_px, width, height);
|
||||
uint8_t isp_cbf,
|
||||
const cu_loc_t* const cu_loc) {
|
||||
const int width = cu_loc->width;
|
||||
const int height = cu_loc->height; // TODO: height for non-square blocks
|
||||
|
||||
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
|
||||
// cur_cu is used for TU parameters.
|
||||
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T));
|
||||
|
||||
double coeff_bits = 0;
|
||||
double tr_tree_bits = 0;
|
||||
|
||||
// Check that lcu is not in
|
||||
assert(x_px >= 0 && x_px < LCU_WIDTH);
|
||||
assert(y_px >= 0 && y_px < LCU_WIDTH);
|
||||
|
||||
const uint8_t tr_depth = tr_cu->tr_depth - depth;
|
||||
const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
|
||||
const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, depth, COLOR_U);
|
||||
const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, depth, COLOR_V);
|
||||
|
@ -539,7 +562,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
|
||||
{
|
||||
int cbf = cbf_is_set_any(pred_cu->cbf, depth);
|
||||
int cbf = cbf_is_set_any(tr_cu->cbf, depth);
|
||||
// Only need to signal coded block flag if not skipped or merged
|
||||
// skip = no coded residual, merge = coded residual
|
||||
if (pred_cu->type != CU_INTRA && (!pred_cu->merged)) {
|
||||
|
@ -548,24 +571,30 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
|
||||
}
|
||||
|
||||
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (x_px % 8 && y_px % 8)) && tree_type != UVG_LUMA_T;
|
||||
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (cu_loc->x % 8 && cu_loc->y % 8)) && tree_type != UVG_LUMA_T;
|
||||
if( !skip_residual_coding && has_chroma) {
|
||||
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
|
||||
if(tr_cu->tr_depth == depth) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
|
||||
}
|
||||
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
|
||||
if(tr_cu->tr_depth == depth) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr");
|
||||
}
|
||||
}
|
||||
|
||||
if (tr_depth > 0) {
|
||||
int offset = LCU_WIDTH >> (depth + 1);
|
||||
if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) {
|
||||
double sum = 0;
|
||||
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu, tree_type, isp_cbf);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, tree_type, isp_cbf);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, tree_type, isp_cbf);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, tree_type, isp_cbf);
|
||||
const int half_width = cu_loc->width >> 1;
|
||||
const int half_height = cu_loc->height >> 1;
|
||||
cu_loc_t split_cu_loc;
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
|
||||
uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height);
|
||||
sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc);
|
||||
return sum + tr_tree_bits * state->lambda;
|
||||
}
|
||||
const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) && tree_type != UVG_CHROMA_T;
|
||||
|
@ -573,7 +602,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
const bool is_isp = !(pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP);
|
||||
// Add transform_tree cbf_luma bit cost.
|
||||
if (!is_isp) {
|
||||
const int is_tr_split = depth - tr_cu->depth;
|
||||
const int is_tr_split = cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH;
|
||||
if ((pred_cu->type == CU_INTRA ||
|
||||
is_tr_split ||
|
||||
cb_flag_u ||
|
||||
|
@ -610,7 +639,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
// SSD between reconstruction and original
|
||||
unsigned luma_ssd = 0;
|
||||
if (!state->encoder_control->cfg.lossless && tree_type != UVG_CHROMA_T) {
|
||||
int index = y_px * LCU_WIDTH + x_px;
|
||||
int index = cu_loc->local_x + LCU_WIDTH * cu_loc->local_y;
|
||||
luma_ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
|
||||
LCU_WIDTH, LCU_WIDTH,
|
||||
width);
|
||||
|
@ -623,12 +652,12 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
if (can_use_tr_skip) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag");
|
||||
}
|
||||
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
|
||||
int8_t luma_scan_mode = SCAN_DIAG;
|
||||
if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) {
|
||||
//const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
|
||||
const coeff_t* coeffs = lcu->coeff.y;
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, cu_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU);
|
||||
}
|
||||
else {
|
||||
int split_type = pred_cu->intra.isp_mode;
|
||||
|
@ -636,7 +665,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
|
||||
for (int i = 0; i < split_limit; ++i) {
|
||||
cu_loc_t split_loc;
|
||||
uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true);
|
||||
uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true);
|
||||
const int part_x = split_loc.x;
|
||||
const int part_y = split_loc.y;
|
||||
|
||||
|
@ -649,8 +678,8 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
}
|
||||
}
|
||||
|
||||
if(depth == 4 || tree_type == UVG_LUMA_T) {
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, width, x_px, y_px, tree_type, COLOR_Y, lcu)) {
|
||||
if(cu_loc->width == 4 || tree_type == UVG_LUMA_T) {
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, height, cu_loc->local_x, cu_loc->local_y, tree_type, COLOR_Y, lcu)) {
|
||||
const int lfnst_idx = tr_cu->lfnst_idx;
|
||||
CABAC_FBITS_UPDATE(
|
||||
cabac,
|
||||
|
@ -672,14 +701,17 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
|
||||
unsigned chroma_ssd = 0;
|
||||
if(has_chroma) {
|
||||
const vector2d_t lcu_px = { (x_px >> (tree_type != UVG_CHROMA_T)) & ~3, (y_px >> (tree_type != UVG_CHROMA_T)) &~3 };
|
||||
uvg_cu_loc_ctor(&loc, lcu_px.x, lcu_px.y, width, height);
|
||||
const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1));
|
||||
const int chroma_height = chroma_width; // TODO: height for non-square blocks
|
||||
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
cu_loc_t chroma_loc;
|
||||
const vector2d_t lcu_px = { (cu_loc->local_x >> 1) & ~3, (cu_loc->local_y >> 1) &~3 };
|
||||
uvg_cu_loc_ctor(&chroma_loc, lcu_px.x, lcu_px.y, width, height);
|
||||
const int chroma_width = cu_loc->chroma_width;
|
||||
const int chroma_height = cu_loc->chroma_height; // TODO: height for non-square blocks
|
||||
int8_t scan_order = SCAN_DIAG;
|
||||
//const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
|
||||
const bool chroma_can_use_tr_skip = state->encoder_control->cfg.trskip_enable && chroma_width <= (1 << state->encoder_control->cfg.trskip_max_size);
|
||||
const bool chroma_can_use_tr_skip = state->encoder_control->cfg.trskip_enable
|
||||
&& chroma_width <= (1 << state->encoder_control->cfg.trskip_max_size)
|
||||
&& chroma_height <= (1 << state->encoder_control->cfg.trskip_max_size);
|
||||
if(pred_cu->joint_cb_cr == 0) {
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
|
@ -697,8 +729,8 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
if(chroma_can_use_tr_skip && cb_flag_v) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
|
||||
}
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU);
|
||||
|
||||
}
|
||||
else {
|
||||
|
@ -715,12 +747,12 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
if (chroma_can_use_tr_skip) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
|
||||
}
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
|
||||
coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU);
|
||||
}
|
||||
}
|
||||
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, height, x_px, y_px, tree_type, depth == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y, lcu)) {
|
||||
const int lfnst_idx = (depth != 4 && tree_type != UVG_CHROMA_T) ? tr_cu->lfnst_idx : tr_cu->cr_lfnst_idx;
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, height, cu_loc->local_x, cu_loc->local_y, tree_type, cu_loc->width == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y, lcu)) {
|
||||
const int lfnst_idx = (cu_loc->width != 4 && tree_type != UVG_CHROMA_T) ? tr_cu->lfnst_idx : tr_cu->cr_lfnst_idx;
|
||||
CABAC_FBITS_UPDATE(
|
||||
cabac,
|
||||
&cabac->ctx.lfnst_idx_model[tr_cu->depth == 4 || tree_type != UVG_BOTH_T],
|
||||
|
@ -739,7 +771,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
tr_cu->lfnst_last_scan_pos = false;
|
||||
tr_cu->violates_lfnst_constrained_luma = false;
|
||||
tr_cu->violates_lfnst_constrained_chroma = false;
|
||||
if (uvg_is_mts_allowed(state, tr_cu) && tree_type != UVG_CHROMA_T) {
|
||||
if (uvg_is_mts_allowed(state, tr_cu, cu_loc) && tree_type != UVG_CHROMA_T) {
|
||||
|
||||
bool symbol = tr_cu->tr_idx != 0;
|
||||
int ctx_idx = 0;
|
||||
|
@ -1035,10 +1067,6 @@ static double search_cu(
|
|||
if ((split_tree.current_depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
|
||||
|
||||
intra_search.pred_cu.joint_cb_cr = 0;
|
||||
// There is almost no benefit to doing the chroma mode search for
|
||||
// rd2. Possibly because the luma mode search already takes chroma
|
||||
// into account, so there is less of a chanse of luma mode being
|
||||
// really bad for chroma.
|
||||
if(tree_type == UVG_CHROMA_T) {
|
||||
intra_search.pred_cu.intra = uvg_get_co_located_luma_cu(x, y, luma_width, luma_width, NULL, state->tile->frame->cu_array, UVG_CHROMA_T)->intra;
|
||||
intra_mode = intra_search.pred_cu.intra.mode;
|
||||
|
@ -1046,7 +1074,7 @@ static double search_cu(
|
|||
}
|
||||
intra_search.pred_cu.intra.mode_chroma = intra_search.pred_cu.intra.mode;
|
||||
if (ctrl->cfg.rdo >= 2 || ctrl->cfg.jccr || ctrl->cfg.lfnst) {
|
||||
uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search, tree_type);
|
||||
uvg_search_cu_intra_chroma(state, cu_loc, lcu, &intra_search, tree_type);
|
||||
|
||||
if (intra_search.pred_cu.joint_cb_cr == 0) {
|
||||
intra_search.pred_cu.joint_cb_cr = 4;
|
||||
|
@ -1066,7 +1094,7 @@ static double search_cu(
|
|||
false,
|
||||
true);
|
||||
if(tree_type != UVG_CHROMA_T) {
|
||||
intra_cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, &intra_search.pred_cu, lcu);
|
||||
intra_cost += uvg_cu_rd_cost_chroma(state, &intra_search.pred_cu, lcu, cu_loc);
|
||||
}
|
||||
else {
|
||||
intra_cost = intra_search.cost;
|
||||
|
@ -1080,7 +1108,7 @@ static double search_cu(
|
|||
}
|
||||
intra_search.pred_cu.intra.mode = intra_mode;
|
||||
if(tree_type == UVG_CHROMA_T) {
|
||||
uvg_lcu_fill_trdepth(lcu, x_local, y_local, split_tree.current_depth, split_tree.current_depth, tree_type);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, split_tree.current_depth, tree_type);
|
||||
}
|
||||
}
|
||||
if (intra_cost < cost) {
|
||||
|
@ -1187,14 +1215,14 @@ static double search_cu(
|
|||
// This will no longer be necessary if the transform depths are not shared.
|
||||
int tr_depth = MAX(1, split_tree.current_depth);
|
||||
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, tr_depth, tree_type);
|
||||
|
||||
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
|
||||
uvg_inter_recon_cu(state, lcu, true, has_chroma, cu_loc);
|
||||
|
||||
if (ctrl->cfg.zero_coeff_rdo && !ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) {
|
||||
//Calculate cost for zero coeffs
|
||||
inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, split_tree.current_depth) + inter_bitcost * state->lambda;
|
||||
inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, cu_loc, split_tree.current_depth) + inter_bitcost * state->lambda;
|
||||
|
||||
}
|
||||
cu_loc_t loc;
|
||||
|
@ -1239,13 +1267,13 @@ static double search_cu(
|
|||
|
||||
cost = bits * state->lambda;
|
||||
|
||||
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0);
|
||||
cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc);
|
||||
|
||||
if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) {
|
||||
cost = inter_zero_coeff_cost;
|
||||
|
||||
// Restore saved pixels from lower level of the working tree.
|
||||
copy_cu_pixels(x_local, y_local, cu_width, &work_tree[split_tree.current_depth + 1], lcu, tree_type);
|
||||
copy_cu_pixels(&work_tree[split_tree.current_depth + 1], lcu, cu_loc, tree_type);
|
||||
|
||||
if (cur_cu->merged) {
|
||||
cur_cu->merged = 0;
|
||||
|
@ -1256,7 +1284,7 @@ static double search_cu(
|
|||
if (cur_cu->tr_depth != 0) {
|
||||
// Reset transform depth since there are no coefficients. This
|
||||
// ensures that CBF is cleared for the whole area of the CU.
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type);
|
||||
}
|
||||
|
||||
cur_cu->cbf = 0;
|
||||
|
@ -1317,10 +1345,8 @@ static double search_cu(
|
|||
left_cu,
|
||||
above_cu,
|
||||
1,
|
||||
cu_loc,
|
||||
depth,
|
||||
cu_width,
|
||||
x >> (tree_type == UVG_CHROMA_T),
|
||||
y >> (tree_type == UVG_CHROMA_T),
|
||||
tree_type,
|
||||
&split_bits);
|
||||
}
|
||||
|
@ -1380,8 +1406,7 @@ static double search_cu(
|
|||
uvg_write_split_flag(state, &state->search_cabac,
|
||||
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
|
||||
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
|
||||
0, depth, cu_width, x, y, tree_type,
|
||||
&bits);
|
||||
0, cu_loc, depth, tree_type, &bits);
|
||||
|
||||
cur_cu->intra = cu_d1->intra;
|
||||
cur_cu->type = CU_INTRA;
|
||||
|
@ -1391,7 +1416,7 @@ static double search_cu(
|
|||
cur_cu->lfnst_idx = 0;
|
||||
cur_cu->cr_lfnst_idx = 0;
|
||||
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth, tree_type);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, cur_cu->tr_depth, tree_type);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
|
||||
intra_search_data_t proxy;
|
||||
|
@ -1404,12 +1429,12 @@ static double search_cu(
|
|||
lcu,
|
||||
tree_type,
|
||||
true,
|
||||
state->encoder_control->chroma_format == UVG_CSP_400);
|
||||
state->encoder_control->chroma_format != UVG_CSP_400);
|
||||
|
||||
double mode_bits = calc_mode_bits(state, lcu, cur_cu, cu_loc) + bits;
|
||||
cost += mode_bits * state->lambda;
|
||||
|
||||
cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0);
|
||||
cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc);
|
||||
|
||||
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac));
|
||||
|
@ -1419,7 +1444,7 @@ static double search_cu(
|
|||
if (split_cost < cost) {
|
||||
// Copy split modes to this depth.
|
||||
cost = split_cost;
|
||||
work_tree_copy_up(x_local, y_local, depth, work_tree, state->encoder_control->cfg.jccr, tree_type);
|
||||
work_tree_copy_up(work_tree, state->encoder_control->cfg.jccr, tree_type, cu_loc, depth);
|
||||
#if UVG_DEBUG
|
||||
//debug_split = 1;
|
||||
#endif
|
||||
|
@ -1427,7 +1452,7 @@ static double search_cu(
|
|||
// Copy this CU's mode all the way down for use in adjacent CUs mode
|
||||
// search.
|
||||
memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac));
|
||||
work_tree_copy_down(x_local, y_local, depth, work_tree, tree_type);
|
||||
work_tree_copy_down(depth, work_tree, tree_type, cu_loc);
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
|
@ -1454,7 +1479,7 @@ static double search_cu(
|
|||
} else if (depth >= 0 && depth < MAX_PU_DEPTH) {
|
||||
// Need to copy modes down since the lower level of the work tree is used
|
||||
// when searching SMP and AMP blocks.
|
||||
work_tree_copy_down(x_local, y_local, depth, work_tree, tree_type);
|
||||
work_tree_copy_down(depth, work_tree, tree_type, cu_loc);
|
||||
if(tree_type != UVG_CHROMA_T) {
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
|
|
18
src/search.h
18
src/search.h
|
@ -84,17 +84,23 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map);
|
|||
|
||||
void uvg_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf, lcu_coeff_t *coeff);
|
||||
|
||||
double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
double uvg_cu_rd_cost_luma(
|
||||
const encoder_state_t *const state,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu,
|
||||
uint8_t isp_cbf);
|
||||
double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
||||
const int x_px, const int y_px, const int depth,
|
||||
double uvg_cu_rd_cost_chroma(
|
||||
const encoder_state_t *const state,
|
||||
cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu);
|
||||
lcu_t *const lcu,
|
||||
const cu_loc_t * const);
|
||||
|
||||
void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type
|
||||
void uvg_lcu_fill_trdepth(
|
||||
lcu_t *lcu,
|
||||
const cu_loc_t* const cu_loc,
|
||||
uint8_t tr_depth,
|
||||
enum uvg_tree_type
|
||||
tree_type);
|
||||
|
||||
void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
||||
|
|
|
@ -1811,7 +1811,7 @@ static void search_pu_inter(
|
|||
cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1];
|
||||
cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0];
|
||||
cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1];
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, MAX(1, depth), UVG_BOTH_T);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, MAX(1, depth), UVG_BOTH_T);
|
||||
uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
|
||||
|
||||
uvg_quantize_lcu_residual(state, true, false, false, cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
|
||||
|
@ -2129,12 +2129,12 @@ void uvg_cu_cost_inter_rd2(
|
|||
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
int tr_depth = MAX(1, depth);
|
||||
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, tr_depth, UVG_BOTH_T);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, tr_depth, UVG_BOTH_T);
|
||||
|
||||
const int x_px = SUB_SCU(cu_loc->x);
|
||||
const int y_px = SUB_SCU(cu_loc->y);
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
const int height = width; // TODO: non-square blocks
|
||||
const int width = cu_loc->width;
|
||||
const int height = cu_loc->height;
|
||||
|
||||
cabac_data_t cabac_copy;
|
||||
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
|
||||
|
@ -2155,10 +2155,10 @@ void uvg_cu_cost_inter_rd2(
|
|||
int index = y_px / 2 * LCU_WIDTH_C + x_px / 2;
|
||||
double ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width / 2);
|
||||
cu_loc->chroma_width);
|
||||
double ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width / 2);
|
||||
cu_loc->chroma_width);
|
||||
ssd += (ssd_u + ssd_v) * UVG_CHROMA_MULT;
|
||||
}
|
||||
double no_cbf_bits;
|
||||
|
@ -2217,12 +2217,10 @@ void uvg_cu_cost_inter_rd2(
|
|||
uvg_chorma_ts_out_t chorma_ts_out;
|
||||
uvg_chroma_transform_search(
|
||||
state,
|
||||
depth,
|
||||
lcu,
|
||||
&cabac_copy,
|
||||
cu_loc,
|
||||
index,
|
||||
0,
|
||||
cur_cu,
|
||||
u_pred,
|
||||
v_pred,
|
||||
|
@ -2262,10 +2260,10 @@ void uvg_cu_cost_inter_rd2(
|
|||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||
|
||||
if(cbf) {
|
||||
*inter_cost = uvg_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu, 0);
|
||||
*inter_cost = uvg_cu_rd_cost_luma(state, cu_loc, cur_cu, lcu, 0);
|
||||
if (reconstruct_chroma) {
|
||||
if (cur_cu->depth != cur_cu->tr_depth || !state->encoder_control->cfg.jccr) {
|
||||
*inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu);
|
||||
*inter_cost += uvg_cu_rd_cost_chroma(state, cur_cu, lcu, cu_loc);
|
||||
}
|
||||
else {
|
||||
*inter_cost += chroma_cost;
|
||||
|
|
|
@ -431,9 +431,7 @@ static double search_intra_trdepth(
|
|||
}
|
||||
double rd_cost = uvg_cu_rd_cost_luma(
|
||||
state,
|
||||
lcu_px.x,
|
||||
lcu_px.y,
|
||||
depth,
|
||||
cu_loc,
|
||||
pred_cu,
|
||||
lcu,
|
||||
search_data->best_isp_cbfs);
|
||||
|
@ -502,11 +500,9 @@ static double search_intra_trdepth(
|
|||
);
|
||||
best_rd_cost += uvg_cu_rd_cost_chroma(
|
||||
state,
|
||||
lcu_px.x,
|
||||
lcu_px.y,
|
||||
depth,
|
||||
pred_cu,
|
||||
lcu);
|
||||
lcu,
|
||||
cu_loc);
|
||||
pred_cu->intra.mode = luma_mode;
|
||||
|
||||
// Check lfnst constraints for chroma
|
||||
|
@ -552,7 +548,7 @@ static double search_intra_trdepth(
|
|||
UVG_BOTH_T,
|
||||
false,
|
||||
true);
|
||||
best_rd_cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
|
||||
best_rd_cost += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, cu_loc);
|
||||
pred_cu->intra.mode = luma_mode;
|
||||
}
|
||||
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
|
||||
|
@ -655,7 +651,7 @@ static double search_intra_trdepth(
|
|||
if (depth == 0 || split_cost < nosplit_cost) {
|
||||
return split_cost;
|
||||
} else {
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type);
|
||||
|
||||
pred_cu->cbf = nosplit_cbf;
|
||||
|
||||
|
@ -690,19 +686,15 @@ static void sort_modes(intra_search_data_t* __restrict modes, uint8_t length)
|
|||
|
||||
static int search_intra_chroma_rough(
|
||||
encoder_state_t * const state,
|
||||
int x_px,
|
||||
int y_px,
|
||||
int depth,
|
||||
const vector2d_t* const lcu_px,
|
||||
intra_search_data_t* chroma_data,
|
||||
lcu_t* lcu,
|
||||
int8_t luma_mode,
|
||||
enum uvg_tree_type tree_type)
|
||||
enum uvg_tree_type tree_type,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
assert(depth != 4 || (x_px & 4 && y_px & 4));
|
||||
const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
|
||||
const int_fast8_t log2_width_c = uvg_g_convert_to_log2[cu_loc->chroma_width];
|
||||
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
|
||||
const vector2d_t luma_px = { x_px & ~7, y_px & ~7 };
|
||||
const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 };
|
||||
const int width = 1 << log2_width_c;
|
||||
const int height = width; // TODO: height for non-square blocks
|
||||
|
||||
|
@ -714,7 +706,7 @@ static int search_intra_chroma_rough(
|
|||
uvg_intra_references refs_v;
|
||||
uvg_intra_build_reference(&loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0);
|
||||
|
||||
vector2d_t lcu_cpx = { (lcu_px->x & ~7) / 2, (lcu_px->y & ~7) / 2 };
|
||||
vector2d_t lcu_cpx = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 };
|
||||
uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
||||
uvg_pixel* orig_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
||||
|
||||
|
@ -1494,29 +1486,19 @@ double uvg_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
|
|||
|
||||
int8_t uvg_search_intra_chroma_rdo(
|
||||
encoder_state_t * const state,
|
||||
int x_px,
|
||||
int y_px,
|
||||
int depth,
|
||||
int8_t num_modes,
|
||||
lcu_t *const lcu,
|
||||
intra_search_data_t* chroma_data,
|
||||
int8_t luma_mode,
|
||||
enum uvg_tree_type tree_type)
|
||||
enum uvg_tree_type tree_type,
|
||||
const cu_loc_t* const cu_loc)
|
||||
{
|
||||
const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4);
|
||||
const bool reconstruct_chroma = true;
|
||||
|
||||
const int luma_width = LCU_WIDTH >> depth;
|
||||
const int luma_height = LCU_WIDTH >> depth; // TODO: height
|
||||
|
||||
int log2_width = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
|
||||
|
||||
cu_loc_t loc;
|
||||
uvg_cu_loc_ctor(&loc, x_px & ~7, y_px & ~7, luma_width, luma_height);
|
||||
|
||||
const int chroma_width = loc.chroma_width;
|
||||
const int chroma_height = loc.chroma_height;
|
||||
const int chroma_width = cu_loc->chroma_width;
|
||||
const int chroma_height = cu_loc->chroma_height;
|
||||
uvg_intra_references refs[2];
|
||||
const vector2d_t luma_px = { x_px & ~7, y_px & ~7 };
|
||||
const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 };
|
||||
const vector2d_t pic_px = {
|
||||
state->tile->frame->width,
|
||||
state->tile->frame->height,
|
||||
|
@ -1524,17 +1506,17 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
|
||||
|
||||
if (reconstruct_chroma) {
|
||||
uvg_intra_build_reference(&loc, &loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0);
|
||||
uvg_intra_build_reference(&loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0);
|
||||
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0);
|
||||
uvg_intra_build_reference(cu_loc, cu_loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0);
|
||||
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y };
|
||||
cabac_data_t temp_cabac;
|
||||
memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t));
|
||||
|
||||
const int offset = ((lcu_px.x & ~7) >> 1) + ((lcu_px.y & ~7) >> 1)* LCU_WIDTH_C;
|
||||
const int offset = ((cu_loc->local_x & ~7) >> 1) + ((cu_loc->local_y & ~7) >> 1)* LCU_WIDTH_C;
|
||||
|
||||
int lfnst_modes_to_check[3];
|
||||
if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) {
|
||||
if((cu_loc->width == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
lfnst_modes_to_check[i] = i;
|
||||
}
|
||||
|
@ -1572,7 +1554,7 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
uvg_intra_predict(
|
||||
state,
|
||||
&refs[COLOR_U - 1],
|
||||
&loc,
|
||||
cu_loc,
|
||||
COLOR_U,
|
||||
u_pred,
|
||||
&chroma_data[mode_i],
|
||||
|
@ -1581,7 +1563,7 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
uvg_intra_predict(
|
||||
state,
|
||||
&refs[COLOR_V - 1],
|
||||
&loc,
|
||||
cu_loc,
|
||||
COLOR_V,
|
||||
v_pred,
|
||||
&chroma_data[mode_i],
|
||||
|
@ -1606,12 +1588,10 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
uvg_chorma_ts_out_t chorma_ts_out;
|
||||
uvg_chroma_transform_search(
|
||||
state,
|
||||
depth,
|
||||
lcu,
|
||||
&temp_cabac,
|
||||
&loc,
|
||||
cu_loc,
|
||||
offset,
|
||||
mode,
|
||||
pred_cu,
|
||||
u_pred,
|
||||
v_pred,
|
||||
|
@ -1653,12 +1633,12 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
state->search_cabac.update = 1;
|
||||
chroma_data[mode_i].cost = mode_bits * state->lambda;
|
||||
uvg_intra_recon_cu(state,
|
||||
&chroma_data[mode_i], &loc,
|
||||
&chroma_data[mode_i], cu_loc,
|
||||
pred_cu, lcu,
|
||||
tree_type,
|
||||
false,
|
||||
true);
|
||||
chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
|
||||
chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, cu_loc);
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
|
||||
}
|
||||
}
|
||||
|
@ -1677,14 +1657,11 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
|
||||
int8_t uvg_search_cu_intra_chroma(
|
||||
encoder_state_t * const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
const cu_loc_t* const cu_loc,
|
||||
lcu_t *lcu,
|
||||
intra_search_data_t *search_data,
|
||||
enum uvg_tree_type tree_type)
|
||||
{
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
|
||||
const cu_info_t *cur_pu = &search_data->pred_cu;
|
||||
int8_t intra_mode = !cur_pu->intra.mip_flag ? cur_pu->intra.mode : 0;
|
||||
|
@ -1698,6 +1675,9 @@ int8_t uvg_search_cu_intra_chroma(
|
|||
}
|
||||
}
|
||||
|
||||
cu_loc_t chroma_loc;
|
||||
uvg_cu_loc_ctor(&chroma_loc, cu_loc->x & ~7, cu_loc->y & ~7, cu_loc->width, cu_loc->height);
|
||||
|
||||
// The number of modes to select for slower chroma search. Luma mode
|
||||
// is always one of the modes, so 2 means the final decision is made
|
||||
// between luma mode and one other mode that looks the best
|
||||
|
@ -1715,7 +1695,7 @@ int8_t uvg_search_cu_intra_chroma(
|
|||
chroma_data[i].pred_cu = *cur_pu;
|
||||
chroma_data[i].pred_cu.intra.mode_chroma = num_modes == 1 ? intra_mode : modes[i];
|
||||
chroma_data[i].cost = 0;
|
||||
if(depth != 4 && tree_type == UVG_BOTH_T) {
|
||||
if(cu_loc->width != 4 && tree_type == UVG_BOTH_T) {
|
||||
memcpy(chroma_data[i].lfnst_costs, search_data->lfnst_costs, sizeof(double) * 3);
|
||||
}
|
||||
}
|
||||
|
@ -1726,16 +1706,13 @@ int8_t uvg_search_cu_intra_chroma(
|
|||
if(state->encoder_control->cfg.cclm && 0){
|
||||
|
||||
|
||||
num_modes = search_intra_chroma_rough(state, x_px, y_px, depth,
|
||||
&lcu_px,
|
||||
chroma_data,
|
||||
lcu,
|
||||
intra_mode,
|
||||
tree_type);
|
||||
num_modes = search_intra_chroma_rough(state, chroma_data, lcu, intra_mode,
|
||||
tree_type,
|
||||
&chroma_loc);
|
||||
}
|
||||
|
||||
if (num_modes > 1 || state->encoder_control->cfg.jccr) {
|
||||
uvg_search_intra_chroma_rdo(state, x_px, y_px, depth, num_modes, lcu, chroma_data, intra_mode, tree_type);
|
||||
uvg_search_intra_chroma_rdo(state, num_modes, lcu, chroma_data, intra_mode, tree_type, &chroma_loc);
|
||||
}
|
||||
else if(cur_pu->lfnst_idx) {
|
||||
chroma_data[0].pred_cu.cr_lfnst_idx = cur_pu->lfnst_idx;
|
||||
|
@ -1983,7 +1960,7 @@ void uvg_search_cu_intra(
|
|||
// Set transform depth to current depth, meaning no transform splits.
|
||||
{
|
||||
const int8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type);
|
||||
uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type);
|
||||
}
|
||||
// Refine results with slower search or get some results if rough search was skipped.
|
||||
const int32_t rdo_level = state->encoder_control->cfg.rdo;
|
||||
|
|
|
@ -52,9 +52,7 @@ double uvg_chroma_mode_bits(const encoder_state_t *state,
|
|||
|
||||
int8_t uvg_search_cu_intra_chroma(
|
||||
encoder_state_t * const state,
|
||||
const int x_px,
|
||||
const int y_px,
|
||||
const int depth,
|
||||
const cu_loc_t* const cu_loc,
|
||||
lcu_t *lcu,
|
||||
intra_search_data_t* best_cclm,
|
||||
enum uvg_tree_type tree_type);
|
||||
|
|
|
@ -709,7 +709,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
uvg_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf, lfnst_index);
|
||||
scan_order, cur_cu->type, cur_cu->cbf, lfnst_index);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && use_trskip) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
const cu_loc_t *cu_loc,
|
||||
const cu_loc_t * const cu_loc,
|
||||
uint8_t color,
|
||||
int8_t scan_mode,
|
||||
cu_info_t* cur_cu,
|
||||
|
@ -80,8 +80,8 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
|||
|
||||
// CONSTANTS
|
||||
|
||||
const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
|
||||
const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
|
||||
const uint8_t log2_block_width = uvg_g_convert_to_log2[width];
|
||||
const uint8_t log2_block_height = uvg_g_convert_to_log2[height];
|
||||
|
||||
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1];
|
||||
const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height);
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
const cu_loc_t *loc,
|
||||
const cu_loc_t * const loc,
|
||||
uint8_t color,
|
||||
int8_t scan_mode,
|
||||
cu_info_t* cur_cu,
|
||||
|
|
|
@ -317,8 +317,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
|
||||
cur_cu->cr_lfnst_idx);
|
||||
scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && false) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
|
||||
|
@ -499,8 +498,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
uvg_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
|
||||
lfnst_index);
|
||||
scan_order, cur_cu->type, cur_cu->cbf, lfnst_index);
|
||||
} else if(state->encoder_control->cfg.rdoq_enable && use_trskip) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, height, color,
|
||||
scan_order);
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
typedef unsigned (encode_coeff_nxn_func)(encoder_state_t * const state,
|
||||
cabac_data_t * const cabac,
|
||||
const coeff_t *coeff,
|
||||
const cu_loc_t *loc,
|
||||
const cu_loc_t * const loc,
|
||||
uint8_t color,
|
||||
int8_t scan_mode,
|
||||
cu_info_t* cur_cu,
|
||||
|
|
|
@ -434,8 +434,7 @@ static void quantize_chroma(
|
|||
(transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
uvg_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
scan_order, CU_INTRA, depth, 0,
|
||||
lfnst_idx);
|
||||
scan_order, CU_INTRA, 0, lfnst_idx);
|
||||
|
||||
int j;
|
||||
for (j = 0; j < width * height; ++j) {
|
||||
|
@ -449,8 +448,7 @@ static void quantize_chroma(
|
|||
uint16_t temp_cbf = 0;
|
||||
if (*u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U);
|
||||
uvg_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
|
||||
scan_order, CU_INTRA, depth, temp_cbf,
|
||||
lfnst_idx);
|
||||
scan_order, CU_INTRA, temp_cbf, lfnst_idx);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -486,12 +484,10 @@ static void quantize_chroma(
|
|||
|
||||
void uvg_chroma_transform_search(
|
||||
encoder_state_t* const state,
|
||||
int depth,
|
||||
lcu_t* const lcu,
|
||||
cabac_data_t* temp_cabac,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int offset,
|
||||
const uint8_t mode,
|
||||
cu_info_t* pred_cu,
|
||||
uvg_pixel u_pred[1024],
|
||||
uvg_pixel v_pred[1024],
|
||||
|
@ -507,6 +503,8 @@ void uvg_chroma_transform_search(
|
|||
const int width = cu_loc->chroma_width;
|
||||
const int height = cu_loc->chroma_height;
|
||||
|
||||
const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
|
||||
|
||||
uvg_transform2d(
|
||||
state->encoder_control, u_resi, u_coeff, width, height, COLOR_U, pred_cu
|
||||
);
|
||||
|
@ -553,8 +551,6 @@ void uvg_chroma_transform_search(
|
|||
coeff_t v_quant_coeff[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
int16_t u_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
int16_t v_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
const coeff_scan_order_t scan_order =
|
||||
uvg_get_scan_order(pred_cu->type, mode, depth);
|
||||
bool u_has_coeffs = false;
|
||||
bool v_has_coeffs = false;
|
||||
if(pred_cu->cr_lfnst_idx) {
|
||||
|
@ -575,13 +571,13 @@ void uvg_chroma_transform_search(
|
|||
i,
|
||||
u_quant_coeff,
|
||||
v_quant_coeff,
|
||||
scan_order,
|
||||
SCAN_DIAG,
|
||||
&u_has_coeffs,
|
||||
&v_has_coeffs,
|
||||
pred_cu->cr_lfnst_idx);
|
||||
if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue;
|
||||
|
||||
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (depth == 4 || tree_type == UVG_CHROMA_T)) {
|
||||
if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (cu_loc->width == 4 || tree_type == UVG_CHROMA_T)) {
|
||||
bool constraints[2] = { false, false };
|
||||
uvg_derive_lfnst_constraints(pred_cu, constraints, u_quant_coeff, width, height, NULL, COLOR_U);
|
||||
if(!IS_JCCR_MODE(transforms[i])) {
|
||||
|
@ -593,9 +589,9 @@ void uvg_chroma_transform_search(
|
|||
if (IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue;
|
||||
|
||||
if (u_has_coeffs) {
|
||||
|
||||
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
pred_cu->type, transforms[i] == CHROMA_TS);
|
||||
|
||||
if (transforms[i] != CHROMA_TS) {
|
||||
if (pred_cu->cr_lfnst_idx) {
|
||||
uvg_inv_lfnst(pred_cu, width, height, COLOR_U, pred_cu->cr_lfnst_idx, &u_coeff[i * trans_offset], tree_type);
|
||||
|
@ -606,6 +602,7 @@ void uvg_chroma_transform_search(
|
|||
else {
|
||||
uvg_itransformskip(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width, height);
|
||||
}
|
||||
|
||||
if (transforms[i] != JCCR_1) {
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((uvg_pixel)(u_pred[j] + u_recon_resi[j]));
|
||||
|
@ -620,9 +617,12 @@ void uvg_chroma_transform_search(
|
|||
else {
|
||||
uvg_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width);
|
||||
}
|
||||
|
||||
|
||||
if (v_has_coeffs && !(IS_JCCR_MODE(transforms[i]))) {
|
||||
uvg_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V,
|
||||
pred_cu->type, transforms[i] == CHROMA_TS);
|
||||
|
||||
if (transforms[i] != CHROMA_TS) {
|
||||
if (pred_cu->cr_lfnst_idx) {
|
||||
uvg_inv_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type);
|
||||
|
@ -633,6 +633,7 @@ void uvg_chroma_transform_search(
|
|||
else {
|
||||
uvg_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width, height);
|
||||
}
|
||||
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + v_recon_resi[j]);
|
||||
}
|
||||
|
@ -700,7 +701,7 @@ void uvg_chroma_transform_search(
|
|||
pred_cu,
|
||||
cu_loc,
|
||||
COLOR_U,
|
||||
scan_order,
|
||||
SCAN_DIAG,
|
||||
transforms[i] == CHROMA_TS,
|
||||
COEFF_ORDER_LINEAR);
|
||||
u_bits += coeff_cost;
|
||||
|
@ -717,7 +718,7 @@ void uvg_chroma_transform_search(
|
|||
pred_cu,
|
||||
cu_loc,
|
||||
COLOR_V,
|
||||
scan_order,
|
||||
SCAN_DIAG,
|
||||
transforms[i] == CHROMA_TS,
|
||||
COEFF_ORDER_LINEAR);
|
||||
}
|
||||
|
|
|
@ -104,12 +104,10 @@ void uvg_quantize_lcu_residual(
|
|||
|
||||
void uvg_chroma_transform_search(
|
||||
encoder_state_t* const state,
|
||||
int depth,
|
||||
lcu_t* const lcu,
|
||||
cabac_data_t* temp_cabac,
|
||||
const cu_loc_t* const cu_loc,
|
||||
const int offset,
|
||||
const uint8_t mode,
|
||||
cu_info_t* pred_cu,
|
||||
uvg_pixel u_pred[1024],
|
||||
uvg_pixel v_pred[1024],
|
||||
|
|
|
@ -6,10 +6,10 @@ set -eu
|
|||
|
||||
cabacfile="$(mktemp)"
|
||||
|
||||
valgrind_test 256x128 10 yuv420p --preset veryslow --rd 3 --mip --jccr --mrl --lfnst -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
valgrind_test 256x128 10 yuv420p --preset veryslow --pu-depth-intra 0-4 --cclm --rd 3 --mip --jccr --mrl --lfnst -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
python3 check_cabac_state_consistency.py "${cabacfile}"
|
||||
|
||||
valgrind_test 256x128 10 yuv420p --preset veryslow --rd 3 --mip --jccr --mrl --lfnst --dual-tree -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
valgrind_test 256x128 10 yuv420p --preset veryslow --pu-depth-intra 0-4 --cclm --rd 3 --mip --jccr --mrl --lfnst --dual-tree -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}"
|
||||
python3 check_cabac_state_consistency.py "${cabacfile}"
|
||||
|
||||
rm -rf "${cabacfile}"
|
||||
|
|
Loading…
Reference in a new issue