mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Merge branch 'intra-fix-up' into 'master'
Intra fix up See merge request cs/ultravideo/vvc/uvg266!12
This commit is contained in:
commit
98322c943e
|
@ -166,7 +166,8 @@ static bool can_use_lfnst_with_isp(const int width, const int height, const int
|
|||
const int x,
|
||||
const int y,
|
||||
enum uvg_tree_type tree_type,
|
||||
const color_t color)
|
||||
const color_t color,
|
||||
const lcu_t* lcu)
|
||||
{
|
||||
if (state->encoder_control->cfg.lfnst && pred_cu->type == CU_INTRA) {
|
||||
const int isp_mode = 0; // ISP_TODO: assign proper ISP mode when ISP is implemented
|
||||
|
@ -201,12 +202,12 @@ static bool can_use_lfnst_with_isp(const int width, const int height, const int
|
|||
const int tu_height = tu_width; // TODO: height for non-square blocks
|
||||
|
||||
// TODO: chroma transform skip
|
||||
if (tree_type != UVG_BOTH_T) {
|
||||
if (color == COLOR_Y) {
|
||||
for (int i = 0; i < num_transform_units; i++) {
|
||||
// TODO: this works only for square blocks
|
||||
const int pu_x = x + ((i % tu_row_length) * tu_width);
|
||||
const int pu_y = y + ((i / tu_row_length) * tu_height);
|
||||
const cu_info_t* cur_tu = uvg_cu_array_at_const(frame->cu_array, pu_x, pu_y);
|
||||
const int tu_x = x + ((i % tu_row_length) * tu_width);
|
||||
const int tu_y = y + ((i / tu_row_length) * tu_height);
|
||||
const cu_info_t* cur_tu = lcu ? LCU_GET_CU_AT_PX(lcu, tu_x, tu_y) : uvg_cu_array_at_const(frame->cu_array, tu_x, tu_y);
|
||||
assert(cur_tu != NULL && "NULL transform unit.");
|
||||
bool cbf_set = cbf_is_set(cur_tu->cbf, tr_depth, COLOR_Y);
|
||||
|
||||
|
@ -239,7 +240,7 @@ static bool encode_lfnst_idx(
|
|||
const color_t color)
|
||||
{
|
||||
|
||||
if (uvg_is_lfnst_allowed(state, pred_cu, width, height, x, y, tree_type, color)) {
|
||||
if (uvg_is_lfnst_allowed(state, pred_cu, width, height, x, y, tree_type, color, NULL)) {
|
||||
// Getting separate tree bool from block size is a temporary fix until a proper dual tree check is possible (there is no dual tree structure at time of writing this).
|
||||
// VTM seems to force explicit dual tree structure for small 4x4 blocks
|
||||
bool is_separate_tree = depth == 4 || tree_type != UVG_BOTH_T;
|
||||
|
@ -399,7 +400,7 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
|
|||
belowPixel = pos_y > 0 ? coeff[pos_x + (pos_y - 1) * width] : 0;
|
||||
absLevel = uvg_derive_mod_coeff(rightPixel, belowPixel, abs(coeff[blk_pos]), 0);
|
||||
cutoffVal = 2;
|
||||
for (i = 0; i < numGtBins; i++)
|
||||
for (int j = 0; j < numGtBins; j++)
|
||||
{
|
||||
if (absLevel >= cutoffVal)
|
||||
{
|
||||
|
@ -1738,7 +1739,6 @@ void uvg_encode_coding_tree(
|
|||
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff, tree_type);
|
||||
// Write LFNST only once for single tree structure
|
||||
encode_lfnst_idx(state, cabac, tmp, x, y, depth, cu_width, cu_height, tree_type, COLOR_UV);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,8 @@ bool uvg_is_lfnst_allowed(
|
|||
const int x,
|
||||
const int y,
|
||||
enum uvg_tree_type tree_type,
|
||||
const color_t color);
|
||||
const color_t color,
|
||||
const lcu_t* lcu);
|
||||
|
||||
void uvg_encode_coding_tree(
|
||||
encoder_state_t * const state,
|
||||
|
|
43
src/search.c
43
src/search.c
|
@ -393,6 +393,8 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
// block and return 0 cost for all others.
|
||||
return 0;
|
||||
}
|
||||
int u_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 2) >> 1 : cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
int v_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 1) : cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
|
||||
// See luma for why the second condition
|
||||
if (!skip_residual_coding) {
|
||||
|
@ -401,13 +403,10 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
cabac_ctx_t *ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
|
||||
cabac->cur_ctx = ctx;
|
||||
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
|
||||
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
}
|
||||
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[is_set]);
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
|
||||
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
|
||||
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
|
||||
}
|
||||
}
|
||||
|
@ -426,7 +425,7 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
}
|
||||
|
||||
if (state->encoder_control->cfg.jccr) {
|
||||
int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1;
|
||||
int cbf_mask = u_is_set * 2 + v_is_set - 1;
|
||||
cabac_ctx_t* ctx = NULL;
|
||||
if (cbf_mask != -1) {
|
||||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
|
@ -448,14 +447,19 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
ssd = ssd_u + ssd_v;
|
||||
}
|
||||
|
||||
if (!skip_residual_coding)
|
||||
{
|
||||
if (!skip_residual_coding) {
|
||||
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
|
||||
if((pred_cu->joint_cb_cr & 3) == 0){
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, 2, scan_order, 0);
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, 2, scan_order, 0);
|
||||
}
|
||||
else {
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, 2, scan_order, 0);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double bits = tr_tree_bits + coeff_bits;
|
||||
|
@ -568,7 +572,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
}
|
||||
|
||||
if(depth == 4 || tree_type == UVG_LUMA_T) {
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, width, x_px, y_px, tree_type, COLOR_Y)) {
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, width, x_px, y_px, tree_type, COLOR_Y, lcu)) {
|
||||
const int lfnst_idx = tr_cu->lfnst_idx;
|
||||
CABAC_FBITS_UPDATE(
|
||||
cabac,
|
||||
|
@ -620,10 +624,10 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
else {
|
||||
{
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
|
||||
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
|
||||
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
chroma_ssd = ssd_u_joint + ssd_v_joint;
|
||||
|
@ -635,7 +639,7 @@ static double cu_rd_cost_tr_split_accurate(
|
|||
}
|
||||
}
|
||||
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, width, x_px, y_px, tree_type, depth == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y)) {
|
||||
if (uvg_is_lfnst_allowed(state, tr_cu, width, width, x_px, y_px, tree_type, depth == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y, lcu)) {
|
||||
const int lfnst_idx = (depth != 4 && tree_type != UVG_CHROMA_T) ? tr_cu->lfnst_idx : tr_cu->cr_lfnst_idx;
|
||||
CABAC_FBITS_UPDATE(
|
||||
cabac,
|
||||
|
@ -956,7 +960,7 @@ static double search_cu(
|
|||
intra_search.pred_cu.type = CU_INTRA;
|
||||
}
|
||||
intra_search.pred_cu.intra.mode_chroma = intra_search.pred_cu.intra.mode;
|
||||
if (ctrl->cfg.rdo >= 3 || ctrl->cfg.jccr || ctrl->cfg.lfnst) {
|
||||
if (ctrl->cfg.rdo >= 2 || ctrl->cfg.jccr || ctrl->cfg.lfnst) {
|
||||
uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search, tree_type);
|
||||
|
||||
if (intra_search.pred_cu.joint_cb_cr == 0) {
|
||||
|
@ -1150,11 +1154,11 @@ static double search_cu(
|
|||
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
|
||||
|
||||
|
||||
state->search_cabac.update = 1;
|
||||
|
||||
double split_bits = 0;
|
||||
|
||||
if (depth < MAX_DEPTH) {
|
||||
|
||||
state->search_cabac.update = 1;
|
||||
// Add cost of cu_split_flag.
|
||||
const cu_info_t* left_cu = NULL, * above_cu = NULL;
|
||||
if (x) {
|
||||
|
@ -1197,14 +1201,13 @@ static double search_cu(
|
|||
// the split costs at least as much as not splitting.
|
||||
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
|
||||
if (split_cost < cost) split_cost += search_cu(state, x, y, depth + 1, work_tree, tree_type);
|
||||
if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y, depth + 1, work_tree, tree_type);
|
||||
if (split_cost < cost) split_cost += search_cu(state, x, y + half_cu, depth + 1, work_tree, tree_type);
|
||||
if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y + half_cu, depth + 1, work_tree, tree_type);
|
||||
if (split_cost < cost || 1) split_cost += search_cu(state, x + half_cu, y, depth + 1, work_tree, tree_type);
|
||||
if (split_cost < cost || 1) split_cost += search_cu(state, x, y + half_cu, depth + 1, work_tree, tree_type);
|
||||
if (split_cost < cost || 1) split_cost += search_cu(state, x + half_cu, y + half_cu, depth + 1, work_tree, tree_type);
|
||||
} else {
|
||||
split_cost = INT_MAX;
|
||||
}
|
||||
|
||||
|
||||
// If no search is not performed for this depth, try just the best mode
|
||||
// of the top left CU from the next depth. This should ensure that 64x64
|
||||
// gets used, at least in the most obvious cases, while avoiding any
|
||||
|
@ -1230,7 +1233,7 @@ static double search_cu(
|
|||
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
|
||||
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
|
||||
0, depth, cu_width, x, y, tree_type,
|
||||
& split_bits);
|
||||
&bits);
|
||||
|
||||
cur_cu->intra = cu_d1->intra;
|
||||
cur_cu->type = CU_INTRA;
|
||||
|
@ -1238,6 +1241,8 @@ static double search_cu(
|
|||
|
||||
// Disable MRL in this case
|
||||
cur_cu->intra.multi_ref_idx = 0;
|
||||
cur_cu->lfnst_idx = 0;
|
||||
cur_cu->cr_lfnst_idx = 0;
|
||||
|
||||
uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth, tree_type);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
|
|
|
@ -314,7 +314,7 @@ static double search_intra_trdepth(
|
|||
int best_tr_idx = 0;
|
||||
int best_lfnst_idx = 0;
|
||||
|
||||
int trafo;
|
||||
uint8_t trafo;
|
||||
int num_transforms = 1;
|
||||
if (pred_cu->tr_idx != MTS_TR_NUM)
|
||||
{
|
||||
|
@ -362,16 +362,14 @@ static double search_intra_trdepth(
|
|||
|
||||
for (trafo = mts_start; trafo < num_transforms; trafo++) {
|
||||
pred_cu->tr_idx = trafo;
|
||||
bool constraints[2] = { pred_cu->violates_lfnst_constrained_luma,
|
||||
pred_cu->lfnst_last_scan_pos };
|
||||
pred_cu->tr_skip = trafo == MTS_SKIP;
|
||||
bool constraints[2] = { false, false};
|
||||
if (mts_enabled) {
|
||||
pred_cu->mts_last_scan_pos = 0;
|
||||
pred_cu->violates_mts_coeff_constraint = 0;
|
||||
|
||||
if (trafo == MTS_SKIP && width > (
|
||||
1 << state->encoder_control->cfg.trskip_max_size)) {
|
||||
//TODO: parametrize that this is not hardcoded
|
||||
// TODO: this probably should currently trip for chroma?
|
||||
if ((trafo == MTS_SKIP && width > (1 << state->encoder_control->cfg.trskip_max_size))
|
||||
|| !state->encoder_control->cfg.trskip_enable) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -380,16 +378,20 @@ static double search_intra_trdepth(
|
|||
continue;
|
||||
}
|
||||
|
||||
|
||||
uvg_intra_recon_cu(state,
|
||||
x_px, y_px,
|
||||
depth, search_data,
|
||||
uvg_intra_recon_cu(
|
||||
state,
|
||||
x_px,
|
||||
y_px,
|
||||
depth,
|
||||
search_data,
|
||||
pred_cu,
|
||||
lcu,
|
||||
UVG_LUMA_T,true,false);
|
||||
UVG_LUMA_T,
|
||||
true,
|
||||
false);
|
||||
|
||||
if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue;
|
||||
|
||||
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
|
||||
derive_mts_constraints(pred_cu, lcu, depth, lcu_px);
|
||||
if (pred_cu->tr_idx > 1) {
|
||||
if (pred_cu->violates_mts_coeff_constraint || !pred_cu->
|
||||
|
@ -398,8 +400,12 @@ static double search_intra_trdepth(
|
|||
}
|
||||
}
|
||||
|
||||
const unsigned scan_offset = xy_to_zorder(LCU_WIDTH, lcu_px.x, lcu_px.y);
|
||||
const unsigned scan_offset = xy_to_zorder(
|
||||
LCU_WIDTH,
|
||||
lcu_px.x,
|
||||
lcu_px.y);
|
||||
|
||||
if (trafo != MTS_SKIP && end_idx != 0) {
|
||||
uvg_derive_lfnst_constraints(
|
||||
pred_cu,
|
||||
depth,
|
||||
|
@ -408,7 +414,7 @@ static double search_intra_trdepth(
|
|||
width,
|
||||
height
|
||||
);
|
||||
// Temp constraints. Updating the actual pred_cu constraints here will break things later
|
||||
}
|
||||
|
||||
if (!constraints[1] && cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) {
|
||||
//end_idx = 0;
|
||||
|
@ -424,15 +430,21 @@ static double search_intra_trdepth(
|
|||
pred_cu,
|
||||
lcu);
|
||||
double transform_bits = 0;
|
||||
if(state->encoder_control->cfg.lfnst && depth == pred_cu->tr_depth) {
|
||||
if(!constraints[0] && constraints[1]) {
|
||||
transform_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.lfnst_idx_model[tr_cu->depth == 4 || tree_type == UVG_LUMA_T], lfnst_idx != 0);
|
||||
if(lfnst_idx > 0) {
|
||||
transform_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.lfnst_idx_model[2], lfnst_idx == 2);
|
||||
if (state->encoder_control->cfg.lfnst && depth == pred_cu->tr_depth &&
|
||||
trafo != MTS_SKIP) {
|
||||
if (!constraints[0] && constraints[1]) {
|
||||
transform_bits += CTX_ENTROPY_FBITS(
|
||||
&state->search_cabac.ctx.lfnst_idx_model[tr_cu->depth == 4 ||
|
||||
tree_type == UVG_LUMA_T],
|
||||
lfnst_idx != 0);
|
||||
if (lfnst_idx > 0) {
|
||||
transform_bits += CTX_ENTROPY_FBITS(
|
||||
&state->search_cabac.ctx.lfnst_idx_model[2],
|
||||
lfnst_idx == 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (num_transforms > 1 && trafo != MTS_SKIP && width <= 32
|
||||
if (num_transforms > 2 && trafo != MTS_SKIP && width <= 32
|
||||
/*&& height <= 32*/
|
||||
&& !pred_cu->violates_mts_coeff_constraint && pred_cu->
|
||||
mts_last_scan_pos && lfnst_idx == 0) {
|
||||
|
@ -454,7 +466,9 @@ static double search_intra_trdepth(
|
|||
}
|
||||
rd_cost += transform_bits * state->frame->lambda;
|
||||
|
||||
search_data->lfnst_costs[lfnst_idx] = MIN(search_data->lfnst_costs[lfnst_idx], rd_cost);
|
||||
search_data->lfnst_costs[lfnst_idx] = MIN(
|
||||
search_data->lfnst_costs[lfnst_idx],
|
||||
rd_cost);
|
||||
if (rd_cost < best_rd_cost) {
|
||||
best_rd_cost = rd_cost;
|
||||
best_lfnst_idx = pred_cu->lfnst_idx;
|
||||
|
@ -468,7 +482,10 @@ static double search_intra_trdepth(
|
|||
pred_cu->intra.mode_chroma = chroma_mode;
|
||||
pred_cu->joint_cb_cr = 4;
|
||||
// TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
|
||||
const unsigned scan_offset = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
|
||||
const unsigned scan_offset = xy_to_zorder(
|
||||
LCU_WIDTH_C,
|
||||
lcu_px.x,
|
||||
lcu_px.y);
|
||||
uvg_intra_recon_cu(
|
||||
state,
|
||||
x_px,
|
||||
|
@ -477,7 +494,9 @@ static double search_intra_trdepth(
|
|||
search_data,
|
||||
pred_cu,
|
||||
lcu,
|
||||
UVG_BOTH_T,false,true);
|
||||
UVG_BOTH_T,
|
||||
false,
|
||||
true);
|
||||
best_rd_cost += uvg_cu_rd_cost_chroma(
|
||||
state,
|
||||
lcu_px.x,
|
||||
|
@ -497,8 +516,8 @@ static double search_intra_trdepth(
|
|||
depth,
|
||||
constraints,
|
||||
&lcu->coeff.u[scan_offset],
|
||||
width_c,
|
||||
width_c
|
||||
, width_c
|
||||
);
|
||||
if (constraints[0] || !constraints[1]) {
|
||||
best_lfnst_idx = 0;
|
||||
|
@ -509,8 +528,8 @@ static double search_intra_trdepth(
|
|||
depth,
|
||||
constraints,
|
||||
&lcu->coeff.u[scan_offset],
|
||||
width_c,
|
||||
width_c
|
||||
, width_c
|
||||
);
|
||||
if (constraints[0] || !constraints[1]) {
|
||||
best_lfnst_idx = 0;
|
||||
|
@ -1621,7 +1640,7 @@ int8_t uvg_search_cu_intra_chroma(
|
|||
// const int8_t modes_in_depth[5] = { 1, 1, 1, 1, 2 };
|
||||
int num_modes = 1;
|
||||
|
||||
if (state->encoder_control->cfg.rdo >= 3) {
|
||||
if (state->encoder_control->cfg.rdo >= 2 || tree_type == UVG_CHROMA_T) {
|
||||
num_modes = total_modes;
|
||||
}
|
||||
|
||||
|
@ -1666,9 +1685,12 @@ int8_t uvg_search_cu_intra_chroma(
|
|||
tree_type);
|
||||
}
|
||||
|
||||
if (num_modes > 1) {
|
||||
if (num_modes > 1 || state->encoder_control->cfg.jccr) {
|
||||
uvg_search_intra_chroma_rdo(state, x_px, y_px, depth, num_modes, lcu, chroma_data, intra_mode, tree_type);
|
||||
}
|
||||
else if(cur_pu->lfnst_idx) {
|
||||
chroma_data[0].pred_cu.cr_lfnst_idx = cur_pu->lfnst_idx;
|
||||
}
|
||||
*search_data = chroma_data[0];
|
||||
return chroma_data[0].pred_cu.intra.mode_chroma;
|
||||
}
|
||||
|
@ -1844,6 +1866,7 @@ void uvg_search_cu_intra(
|
|||
search_data[i].cost = MAX_INT;
|
||||
}
|
||||
number_of_modes = UVG_NUM_INTRA_MODES;
|
||||
num_regular_modes = UVG_NUM_INTRA_MODES;
|
||||
}
|
||||
|
||||
uint8_t num_mrl_modes = 0;
|
||||
|
@ -1969,7 +1992,6 @@ void uvg_search_cu_intra(
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: if rough search is implemented for MIP, sort mip_modes here.
|
||||
search_intra_rdo(
|
||||
state,
|
||||
x_px,
|
||||
|
|
|
@ -233,7 +233,10 @@ static unsigned satd_4x4_8bit_avx2(const uint8_t *org, const uint8_t *cur)
|
|||
row3 = _mm_add_epi16(row3, _mm_shuffle_epi32(row3, _MM_SHUFFLE(0, 1, 0, 1) ));
|
||||
row3 = _mm_add_epi16(row3, _mm_shufflelo_epi16(row3, _MM_SHUFFLE(0, 1, 0, 1) ));
|
||||
|
||||
const int16_t temp1 = _mm_extract_epi16(row2, 0);
|
||||
const int dc1 = abs(temp1);
|
||||
unsigned sum = _mm_extract_epi16(row3, 0);
|
||||
sum -= dc1 - (dc1 >> 2);
|
||||
unsigned satd = (sum + 1) >> 1;
|
||||
|
||||
return satd;
|
||||
|
@ -280,10 +283,16 @@ static void satd_8bit_4x4_dual_avx2(
|
|||
row3 = _mm256_add_epi16(row3, _mm256_shuffle_epi32(row3, _MM_SHUFFLE(0, 1, 0, 1) ));
|
||||
row3 = _mm256_add_epi16(row3, _mm256_shufflelo_epi16(row3, _MM_SHUFFLE(0, 1, 0, 1) ));
|
||||
|
||||
const int16_t temp2 = _mm256_extract_epi16(row2, 0);
|
||||
const int dc1 = abs(temp2);
|
||||
unsigned sum1 = _mm_extract_epi16(_mm256_castsi256_si128(row3), 0);
|
||||
sum1 -= dc1 - (dc1 >> 2);
|
||||
sum1 = (sum1 + 1) >> 1;
|
||||
|
||||
const int16_t temp3 = _mm256_extract_epi16(row2, 8);
|
||||
const int dc2 = abs(temp3);
|
||||
unsigned sum2 = _mm_extract_epi16(_mm256_extracti128_si256(row3, 1), 0);
|
||||
sum2 -= dc2 - (dc2 >> 2);
|
||||
sum2 = (sum2 + 1) >> 1;
|
||||
|
||||
satds_out[0] = sum1;
|
||||
|
@ -522,6 +531,13 @@ static void uvg_satd_8bit_8x8_general_dual_avx2(const uint8_t * buf1, unsigned s
|
|||
|
||||
sum_block_dual_avx2(temp, sum0, sum1);
|
||||
|
||||
const int16_t temp2 = _mm256_extract_epi16(temp[0], 0);
|
||||
const int dc1 = abs(temp2);
|
||||
const int16_t temp3 = _mm256_extract_epi16(temp[0], 8);
|
||||
const int dc2 = abs(temp3);
|
||||
*sum0 -= dc1 - (dc1 >> 2);
|
||||
*sum1 -= dc2 - (dc2 >> 2);
|
||||
|
||||
*sum0 = (*sum0 + 2) >> 2;
|
||||
*sum1 = (*sum1 + 2) >> 2;
|
||||
}
|
||||
|
@ -558,6 +574,10 @@ static unsigned satd_8x8_subblock_8bit_avx2(const uint8_t * buf1, unsigned strid
|
|||
|
||||
unsigned sad = sum_block_avx2(temp);
|
||||
|
||||
const int16_t temp1 = _mm_extract_epi16(temp[0], 0);
|
||||
const int dc1 = abs(temp1);
|
||||
sad -= dc1 - (dc1 >> 2);
|
||||
|
||||
unsigned result = (sad + 2) >> 2;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -703,7 +703,7 @@ void uvg_chroma_transform_search(
|
|||
transforms[i] == CHROMA_TS);
|
||||
}
|
||||
if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst && 0) {
|
||||
if(uvg_is_lfnst_allowed(state, pred_cu, width, height, 0, 0 , UVG_CHROMA_T, COLOR_UV)) {
|
||||
if(uvg_is_lfnst_allowed(state, pred_cu, width, height, 0, 0 , UVG_CHROMA_T, COLOR_UV, lcu)) {
|
||||
const int lfnst_idx = pred_cu->cr_lfnst_idx;
|
||||
CABAC_FBITS_UPDATE(
|
||||
&state->search_cabac,
|
||||
|
|
|
@ -119,7 +119,7 @@ static void satd_tear_down_tests()
|
|||
|
||||
TEST satd_test_black_and_white(void)
|
||||
{
|
||||
const int satd_results[5] = {2040, 4080, 16320, 65280, 261120};
|
||||
const int satd_results[5] = {510, 1020, 4080, 16320, 65280};
|
||||
|
||||
const int test = 0;
|
||||
|
||||
|
@ -137,7 +137,7 @@ TEST satd_test_black_and_white(void)
|
|||
|
||||
TEST satd_test_checkers(void)
|
||||
{
|
||||
const int satd_checkers_results[5] = { 2040, 4080, 16320, 65280, 261120 };
|
||||
const int satd_checkers_results[5] = { 1278, 2556, 10224, 40896, 163584 };
|
||||
|
||||
const int test = 1;
|
||||
|
||||
|
@ -156,7 +156,7 @@ TEST satd_test_checkers(void)
|
|||
|
||||
TEST satd_test_gradient(void)
|
||||
{
|
||||
const int satd_gradient_results[5] = {3140,9004,20481,67262,258672};
|
||||
const int satd_gradient_results[5] = {2728,7158,10775,23399,72780};
|
||||
|
||||
const int test = 2;
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ GREATEST_MAIN_DEFS();
|
|||
#if UVG_BIT_DEPTH == 8
|
||||
extern SUITE(sad_tests);
|
||||
extern SUITE(intra_sad_tests);
|
||||
// extern SUITE(satd_tests);
|
||||
extern SUITE(satd_tests);
|
||||
extern SUITE(speed_tests);
|
||||
extern SUITE(dct_tests);
|
||||
extern SUITE(mts_tests);
|
||||
|
@ -56,7 +56,7 @@ int main(int argc, char **argv)
|
|||
#if UVG_BIT_DEPTH == 8
|
||||
RUN_SUITE(sad_tests);
|
||||
RUN_SUITE(intra_sad_tests);
|
||||
// RUN_SUITE(satd_tests);
|
||||
RUN_SUITE(satd_tests);
|
||||
RUN_SUITE(dct_tests);
|
||||
RUN_SUITE(mts_tests);
|
||||
|
||||
|
|
Loading…
Reference in a new issue