[isp] Add non-square block handling to functions.

This commit is contained in:
siivonek 2022-08-18 15:07:22 +03:00 committed by Marko Viitanen
parent 031a758d6c
commit ae0336fdfc
15 changed files with 65 additions and 42 deletions

View file

@ -657,7 +657,7 @@ uint32_t uvg_context_get_sig_coeff_group_ts(uint32_t* sig_coeff_group_flag,
* \returns context index for current scan position * \returns context index for current scan position
*/ */
uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
uint32_t width, uint32_t height, int8_t type, uint32_t width, uint32_t height, int8_t color,
int32_t* temp_diag, int32_t* temp_sum) int32_t* temp_diag, int32_t* temp_sum)
{ {
const coeff_t* data = coeff + pos_x + pos_y * width; const coeff_t* data = coeff + pos_x + pos_y * width;
@ -687,7 +687,7 @@ uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, u
} }
#undef UPDATE #undef UPDATE
int ctx_ofs = MIN((sum_abs+1)>>1, 3) + (diag < 2 ? 4 : 0); int ctx_ofs = MIN((sum_abs+1)>>1, 3) + (diag < 2 ? 4 : 0);
if (type == 0 /* Luma */) if (color == COLOR_Y)
{ {
ctx_ofs += diag < 5 ? 4 : 0; ctx_ofs += diag < 5 ? 4 : 0;
} }
@ -815,7 +815,7 @@ unsigned uvg_lrg1_ctx_id_abs_ts(const coeff_t* coeff, int32_t pos_x, int32_t pos
* \returns context go rice parameter * \returns context go rice parameter
*/ */
uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
uint32_t height, uint32_t width, uint32_t baselevel) uint32_t width, uint32_t height, uint32_t baselevel)
{ {
#define UPDATE(x) sum+=abs(x)/*-(x?1:0)*/ #define UPDATE(x) sum+=abs(x)/*-(x?1:0)*/
@ -857,8 +857,8 @@ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
* \returns context go rice parameter * \returns context go rice parameter
*/ */
uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
uint32_t height, uint32_t width, uint32_t baselevel) uint32_t width, uint32_t height, uint32_t baselevel)
{ {
uint32_t check = uvg_abs_sum(coeff, pos_x, pos_y, height, width, baselevel); uint32_t check = uvg_abs_sum(coeff, pos_x, pos_y, width, height, baselevel);
return g_go_rice_pars[check]; return g_go_rice_pars[check];
} }

View file

@ -66,7 +66,7 @@ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
uint32_t height, uint32_t width, uint32_t baselevel); uint32_t height, uint32_t width, uint32_t baselevel);
uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y,
uint32_t height, uint32_t width, uint32_t baselevel); uint32_t width, uint32_t height, uint32_t baselevel);
#define CNU 35 #define CNU 35
#define DWS 8 #define DWS 8

View file

@ -213,6 +213,7 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
cabac_data_t* const cabac, cabac_data_t* const cabac,
const coeff_t* coeff, const coeff_t* coeff,
uint32_t width, uint32_t width,
uint32_t height,
uint8_t type, uint8_t type,
int8_t scan_mode, int8_t scan_mode,
double* bits_out) double* bits_out)
@ -227,8 +228,9 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
// CONSTANTS // CONSTANTS
const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
const uint32_t log2_block_height = log2_block_width; // TODO: height const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
// TODO: log2_cg_size is wrong if width != height
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1];
const uint32_t* old_scan = uvg_g_sig_last_scan[scan_mode][log2_block_width - 1]; const uint32_t* old_scan = uvg_g_sig_last_scan[scan_mode][log2_block_width - 1];
const uint32_t* old_scan_cg = g_sig_last_scan_cg[log2_block_width - 1][scan_mode]; const uint32_t* old_scan_cg = g_sig_last_scan_cg[log2_block_width - 1][scan_mode];
@ -243,13 +245,11 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
cabac->cur_ctx = base_coeff_group_ctx; cabac->cur_ctx = base_coeff_group_ctx;
// ISP_TODO: height int maxCtxBins = (width * height * 7) >> 2;
int maxCtxBins = (width * width * 7) >> 2;
unsigned scan_cg_last = (unsigned )-1; unsigned scan_cg_last = (unsigned )-1;
//unsigned scan_pos_last = (unsigned )-1; //unsigned scan_pos_last = (unsigned )-1;
// ISP_TODO: height for (i = 0; i < width * height; i++) {
for (i = 0; i < width * width; i++) {
if (coeff[scan[i]]) { if (coeff[scan[i]]) {
// ISP_DEBUG // ISP_DEBUG
assert(old_scan[i] == scan[i] && "Old scan_cg differs from the new one."); assert(old_scan[i] == scan[i] && "Old scan_cg differs from the new one.");
@ -258,7 +258,8 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1; sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1;
} }
} }
scan_cg_last = (width * width - 1) >> log2_cg_size; // TODO: this won't work with non-square blocks
scan_cg_last = (width * height - 1) >> log2_cg_size;
const uint32_t cg_width = (MIN((uint8_t)32, width) >> (log2_cg_size / 2)); const uint32_t cg_width = (MIN((uint8_t)32, width) >> (log2_cg_size / 2));
bool no_sig_group_before_last = true; bool no_sig_group_before_last = true;
@ -481,6 +482,7 @@ static void encode_chroma_tu(
enum enum
uvg_tree_type tree_type) uvg_tree_type tree_type)
{ {
int height_c = width_c; // TODO: height for non-square blocks
int x_local = ((x >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; int x_local = ((x >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C;
int y_local = ((y >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; int y_local = ((y >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C;
cabac_data_t* const cabac = &state->cabac; cabac_data_t* const cabac = &state->cabac;
@ -496,7 +498,7 @@ static void encode_chroma_tu(
// TODO: transform skip for chroma blocks // TODO: transform skip for chroma blocks
CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_U) & 1, "transform_skip_flag"); CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_U) & 1, "transform_skip_flag");
} }
uvg_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, COLOR_U, *scan_idx, cur_pu, NULL); uvg_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, height_c, COLOR_U, *scan_idx, cur_pu, NULL);
} }
if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) { if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) {
@ -504,7 +506,7 @@ static void encode_chroma_tu(
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_V) & 1, "transform_skip_flag"); CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_V) & 1, "transform_skip_flag");
} }
uvg_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, COLOR_V, *scan_idx, cur_pu, NULL); uvg_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, height_c, COLOR_V, *scan_idx, cur_pu, NULL);
} }
} }
else { else {
@ -513,7 +515,7 @@ static void encode_chroma_tu(
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
CABAC_BIN(cabac, 0, "transform_skip_flag"); CABAC_BIN(cabac, 0, "transform_skip_flag");
} }
uvg_encode_coeff_nxn(state, &state->cabac, coeff_uv, width_c, COLOR_V, *scan_idx, cur_pu, NULL); uvg_encode_coeff_nxn(state, &state->cabac, coeff_uv, width_c, height_c, COLOR_V, *scan_idx, cur_pu, NULL);
} }
} }
@ -534,6 +536,9 @@ static void encode_transform_unit(
cabac_data_t* const cabac = &state->cabac; cabac_data_t* const cabac = &state->cabac;
const uint8_t width = LCU_WIDTH >> depth; const uint8_t width = LCU_WIDTH >> depth;
const uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); const uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
// TODO: height for non-square blocks
const uint8_t height = width;
const uint8_t height_c = width_c;
cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array;
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, x, y); const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, x, y);
@ -556,13 +561,14 @@ static void encode_transform_unit(
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0); DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0);
} }
if(cur_pu->tr_idx == MTS_SKIP) { if(cur_pu->tr_idx == MTS_SKIP) {
uvg_encode_ts_residual(state, cabac, coeff_y, width, 0, scan_idx, NULL); uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL);
} }
else { else {
uvg_encode_coeff_nxn(state, uvg_encode_coeff_nxn(state,
cabac, cabac,
coeff_y, coeff_y,
width, width,
height,
0, 0,
scan_idx, scan_idx,
(cu_info_t * )cur_pu, (cu_info_t * )cur_pu,

View file

@ -64,6 +64,7 @@ void uvg_encode_ts_residual(encoder_state_t* const state,
cabac_data_t* const cabac, cabac_data_t* const cabac,
const coeff_t* coeff, const coeff_t* coeff,
uint32_t width, uint32_t width,
uint32_t height,
uint8_t type, uint8_t type,
int8_t scan_mode, int8_t scan_mode,
double* bits); double* bits);

View file

@ -298,6 +298,7 @@ static INLINE double get_coeff_cabac_cost(
const encoder_state_t * const state, const encoder_state_t * const state,
const coeff_t *coeff, const coeff_t *coeff,
int32_t width, int32_t width,
int32_t height,
color_t color, color_t color,
int8_t scan_mode, int8_t scan_mode,
int8_t tr_skip, int8_t tr_skip,
@ -305,7 +306,7 @@ static INLINE double get_coeff_cabac_cost(
{ {
// Make sure there are coeffs present // Make sure there are coeffs present
bool found = false; bool found = false;
for (int i = 0; i < width*width; i++) { for (int i = 0; i < width * height; i++) {
if (coeff[i] != 0) { if (coeff[i] != 0) {
found = 1; found = 1;
break; break;
@ -331,6 +332,7 @@ static INLINE double get_coeff_cabac_cost(
&cabac_copy, &cabac_copy,
coeff, coeff,
width, width,
height,
color, color,
scan_mode, scan_mode,
cur_tu, cur_tu,
@ -341,6 +343,7 @@ static INLINE double get_coeff_cabac_cost(
&cabac_copy, &cabac_copy,
coeff, coeff,
width, width,
height,
color, color,
scan_mode, scan_mode,
&bits); &bits);
@ -392,6 +395,7 @@ double uvg_get_coeff_cost(
const coeff_t *coeff, const coeff_t *coeff,
cu_info_t* cur_tu, cu_info_t* cur_tu,
int32_t width, int32_t width,
int32_t height,
color_t color, color_t color,
int8_t scan_mode, int8_t scan_mode,
int8_t tr_skip) int8_t tr_skip)
@ -409,15 +413,15 @@ double uvg_get_coeff_cost(
return UINT32_MAX; // Hush little compiler don't you cry, not really gonna return anything after assert(0) return UINT32_MAX; // Hush little compiler don't you cry, not really gonna return anything after assert(0)
} else { } else {
uint64_t weights = uvg_fast_coeff_get_weights(state); uint64_t weights = uvg_fast_coeff_get_weights(state);
uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, weights); uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, height, weights);
if (check_accuracy) { if (check_accuracy) {
double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu); double ccc = get_coeff_cabac_cost(state, coeff, width, height, color, scan_mode, tr_skip, cur_tu);
save_accuracy(state->qp, ccc, fast_cost); save_accuracy(state->qp, ccc, fast_cost);
} }
return fast_cost; return fast_cost;
} }
} else { } else {
double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu); double ccc = get_coeff_cabac_cost(state, coeff, width, height, color, scan_mode, tr_skip, cur_tu);
if (save_cccs) { if (save_cccs) {
save_ccc(state->qp, coeff, width * width, ccc); save_ccc(state->qp, coeff, width * width, ccc);
} }

View file

@ -74,6 +74,7 @@ double uvg_get_coeff_cost(
const coeff_t *coeff, const coeff_t *coeff,
cu_info_t* cur_tu, cu_info_t* cur_tu,
int32_t width, int32_t width,
int32_t height,
color_t color, color_t color,
int8_t scan_mode, int8_t scan_mode,
int8_t tr_skip); int8_t tr_skip);

View file

@ -310,7 +310,8 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
lcu_t *const lcu, lcu_t *const lcu,
uint8_t isp_cbf) uint8_t isp_cbf)
{ {
const int width = LCU_WIDTH >> depth; const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac; cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac;
@ -380,7 +381,7 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state,
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, width, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, width, height, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP);
} }
double bits = tr_tree_bits + coeff_bits; double bits = tr_tree_bits + coeff_bits;
@ -394,7 +395,8 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
lcu_t *const lcu) lcu_t *const lcu)
{ {
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 }; const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
@ -468,11 +470,11 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
if((pred_cu->joint_cb_cr & 3) == 0){ if((pred_cu->joint_cb_cr & 3) == 0){
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, 2, scan_order, 0); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, height, 2, scan_order, 0);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, 2, scan_order, 0); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, height, 2, scan_order, 0);
} }
else { else {
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, 2, scan_order, 0); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, height, 2, scan_order, 0);
} }
} }
@ -493,6 +495,7 @@ static double cu_rd_cost_tr_split_accurate(
enum uvg_tree_type tree_type, enum uvg_tree_type tree_type,
uint8_t isp_cbf) { uint8_t isp_cbf) {
const int width = LCU_WIDTH >> depth; const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0);
// cur_cu is used for TU parameters. // cur_cu is used for TU parameters.
@ -597,7 +600,7 @@ static double cu_rd_cost_tr_split_accurate(
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, width, 0, luma_scan_mode, tr_cu->tr_skip & 1); coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, width, height, 0, luma_scan_mode, tr_cu->tr_skip & 1);
} }
if(depth == 4 || tree_type == UVG_LUMA_T) { if(depth == 4 || tree_type == UVG_LUMA_T) {
@ -624,7 +627,8 @@ static double cu_rd_cost_tr_split_accurate(
unsigned chroma_ssd = 0; unsigned chroma_ssd = 0;
if(has_chroma) { if(has_chroma) {
const vector2d_t lcu_px = { (x_px >> (tree_type != UVG_CHROMA_T)) & ~3, (y_px >> (tree_type != UVG_CHROMA_T)) &~3 }; const vector2d_t lcu_px = { (x_px >> (tree_type != UVG_CHROMA_T)) & ~3, (y_px >> (tree_type != UVG_CHROMA_T)) &~3 };
const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1)); const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1));
const int chroma_height = chroma_width; // TODO: height for non-square blocks
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
@ -646,8 +650,8 @@ static double cu_rd_cost_tr_split_accurate(
if(chroma_can_use_tr_skip && cb_flag_v) { if(chroma_can_use_tr_skip && cb_flag_v) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
} }
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], tr_cu, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], tr_cu, chroma_width, chroma_height, COLOR_U, scan_order, tr_cu->tr_skip & 2);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], tr_cu, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], tr_cu, chroma_width, chroma_height, COLOR_V, scan_order, tr_cu->tr_skip & 4);
} }
else { else {
@ -664,7 +668,7 @@ static double cu_rd_cost_tr_split_accurate(
if (chroma_can_use_tr_skip) { if (chroma_can_use_tr_skip) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag");
} }
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], tr_cu, chroma_width, COLOR_U, scan_order, 0); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], tr_cu, chroma_width, chroma_height, COLOR_U, scan_order, 0);
} }
} }

View file

@ -45,6 +45,7 @@ void uvg_encode_coeff_nxn_avx2(encoder_state_t * const state,
cabac_data_t * const cabac, cabac_data_t * const cabac,
const coeff_t *coeff, const coeff_t *coeff,
uint8_t width, uint8_t width,
uint8_t height,
uint8_t type, uint8_t type,
int8_t scan_mode, int8_t scan_mode,
int8_t tr_skip, int8_t tr_skip,

View file

@ -875,8 +875,9 @@ static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length)
return parts[0] + parts[1] + parts[2] + parts[3]; return parts[0] + parts[1] + parts[2] + parts[3];
} }
static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64_t weights) static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights)
{ {
assert((width == height) && "Non-square block handling not implemented for this function.");
const __m256i zero = _mm256_setzero_si256(); const __m256i zero = _mm256_setzero_si256();
const __m256i threes = _mm256_set1_epi16(3); const __m256i threes = _mm256_set1_epi16(3);
const __m256i negate_hibytes = _mm256_set1_epi16(0xff00); const __m256i negate_hibytes = _mm256_set1_epi16(0xff00);
@ -893,7 +894,7 @@ static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64
__m256i wts_lo = _mm256_broadcastsi128_si256(wts_lo_128); __m256i wts_lo = _mm256_broadcastsi128_si256(wts_lo_128);
__m256i wts_hi = _mm256_broadcastsi128_si256(wts_hi_128); __m256i wts_hi = _mm256_broadcastsi128_si256(wts_hi_128);
for (int i = 0; i < width * width; i += 32) { for (int i = 0; i < width * height; i += 32) {
__m256i curr_lo = _mm256_loadu_si256 ((const __m256i *)(coeff + i)); __m256i curr_lo = _mm256_loadu_si256 ((const __m256i *)(coeff + i));
__m256i curr_abs_lo = _mm256_abs_epi16 (curr_lo); __m256i curr_abs_lo = _mm256_abs_epi16 (curr_lo);
__m256i curr_max3_lo = _mm256_min_epu16 (curr_abs_lo, threes); __m256i curr_max3_lo = _mm256_min_epu16 (curr_abs_lo, threes);

View file

@ -55,6 +55,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
cabac_data_t * const cabac, cabac_data_t * const cabac,
const coeff_t *coeff, const coeff_t *coeff,
uint8_t width, uint8_t width,
uint8_t height,
uint8_t color, uint8_t color,
int8_t scan_mode, int8_t scan_mode,
cu_info_t* cur_cu, cu_info_t* cur_cu,
@ -75,7 +76,6 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
// CONSTANTS // CONSTANTS
const int height = width; // TODO: height for non-square blocks.
const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; const uint32_t log2_block_width = uvg_g_convert_to_log2[width];
const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; const uint32_t log2_block_height = uvg_g_convert_to_log2[height];
const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1];
@ -192,7 +192,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
sig = (coeff[blk_pos] != 0) ? 1 : 0; sig = (coeff[blk_pos] != 0) ? 1 : 0;
if (num_non_zero || next_sig_pos != infer_sig_pos) { if (num_non_zero || next_sig_pos != infer_sig_pos) {
ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, width, color, &temp_diag, &temp_sum); ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum);
cabac_ctx_t* sig_ctx_luma = &(cabac->ctx.cu_sig_model_luma[MAX(0, (quant_state - 1))][ctx_sig]); cabac_ctx_t* sig_ctx_luma = &(cabac->ctx.cu_sig_model_luma[MAX(0, (quant_state - 1))][ctx_sig]);
cabac_ctx_t* sig_ctx_chroma = &(cabac->ctx.cu_sig_model_chroma[MAX(0, (quant_state - 1))][MIN(ctx_sig,7)]); cabac_ctx_t* sig_ctx_chroma = &(cabac->ctx.cu_sig_model_chroma[MAX(0, (quant_state - 1))][MIN(ctx_sig,7)]);
@ -200,7 +200,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
reg_bins--; reg_bins--;
} else if (next_sig_pos != scan_pos_last) { } else if (next_sig_pos != scan_pos_last) {
ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, width, color, &temp_diag, &temp_sum); ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum);
} }
@ -266,7 +266,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
blk_pos = scan[scan_pos]; blk_pos = scan[scan_pos];
pos_y = blk_pos / width; pos_y = blk_pos / width;
pos_x = blk_pos - (pos_y * width); pos_x = blk_pos - (pos_y * width);
int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, width, 4); int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, height, 4);
rice_param = g_go_rice_pars[abs_sum]; rice_param = g_go_rice_pars[abs_sum];
uint32_t second_pass_abs_coeff = abs(coeff[blk_pos]); uint32_t second_pass_abs_coeff = abs(coeff[blk_pos]);
@ -284,7 +284,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
pos_y = blk_pos / width; pos_y = blk_pos / width;
pos_x = blk_pos - (pos_y * width); pos_x = blk_pos - (pos_y * width);
uint32_t coeff_abs = abs(coeff[blk_pos]); uint32_t coeff_abs = abs(coeff[blk_pos]);
int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, width, 0); int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, height, 0);
rice_param = g_go_rice_pars[abs_sum]; rice_param = g_go_rice_pars[abs_sum];
pos0 = ((quant_state<2)?1:2) << rice_param; pos0 = ((quant_state<2)?1:2) << rice_param;
uint32_t remainder = (coeff_abs == 0 ? pos0 : coeff_abs <= pos0 ? coeff_abs - 1 : coeff_abs); uint32_t remainder = (coeff_abs == 0 ? pos0 : coeff_abs <= pos0 ? coeff_abs - 1 : coeff_abs);

View file

@ -45,6 +45,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
cabac_data_t * const cabac, cabac_data_t * const cabac,
const coeff_t *coeff, const coeff_t *coeff,
uint8_t width, uint8_t width,
uint8_t height,
uint8_t color, uint8_t color,
int8_t scan_mode, int8_t scan_mode,
cu_info_t* cur_cu, cu_info_t* cur_cu,

View file

@ -653,14 +653,15 @@ static INLINE void get_coeff_weights(uint64_t wts_packed, uint16_t *weights)
weights[3] = (wts_packed >> 48) & 0xffff; weights[3] = (wts_packed >> 48) & 0xffff;
} }
static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, uint64_t weights) static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights)
{ {
assert((width == height) && "Non-square block handling not implemented for this function.");
uint32_t sum = 0; uint32_t sum = 0;
uint16_t weights_unpacked[4]; uint16_t weights_unpacked[4];
get_coeff_weights(weights, weights_unpacked); get_coeff_weights(weights, weights_unpacked);
for (int32_t i = 0; i < width * width; i++) { for (int32_t i = 0; i < width * height; i++) {
int16_t curr = coeff[i]; int16_t curr = coeff[i];
uint32_t curr_abs = abs(curr); uint32_t curr_abs = abs(curr);
if (curr_abs > 3) { if (curr_abs > 3) {

View file

@ -50,6 +50,7 @@ typedef unsigned (encode_coeff_nxn_func)(encoder_state_t * const state,
cabac_data_t * const cabac, cabac_data_t * const cabac,
const coeff_t *coeff, const coeff_t *coeff,
uint8_t width, uint8_t width,
uint8_t heigth,
uint8_t color, uint8_t color,
int8_t scan_mode, int8_t scan_mode,
cu_info_t* cur_cu, cu_info_t* cur_cu,

View file

@ -86,7 +86,7 @@ typedef unsigned (quant_residual_func)(encoder_state_t *const state,
typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width,
int32_t height, color_t color, int8_t block_type, int8_t transform_skip); int32_t height, color_t color, int8_t block_type, int8_t transform_skip);
typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights); typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights);
typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length); typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length);

View file

@ -690,6 +690,7 @@ void uvg_chroma_transform_search(
u_quant_coeff, u_quant_coeff,
pred_cu, pred_cu,
width, width,
height,
COLOR_U, COLOR_U,
scan_order, scan_order,
transforms[i] == CHROMA_TS); transforms[i] == CHROMA_TS);
@ -706,6 +707,7 @@ void uvg_chroma_transform_search(
v_quant_coeff, v_quant_coeff,
pred_cu, pred_cu,
width, width,
height,
COLOR_V, COLOR_V,
scan_order, scan_order,
transforms[i] == CHROMA_TS); transforms[i] == CHROMA_TS);