diff --git a/src/context.c b/src/context.c index 31124b02..708b9da4 100644 --- a/src/context.c +++ b/src/context.c @@ -657,7 +657,7 @@ uint32_t uvg_context_get_sig_coeff_group_ts(uint32_t* sig_coeff_group_flag, * \returns context index for current scan position */ uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, - uint32_t width, uint32_t height, int8_t type, + uint32_t width, uint32_t height, int8_t color, int32_t* temp_diag, int32_t* temp_sum) { const coeff_t* data = coeff + pos_x + pos_y * width; @@ -687,7 +687,7 @@ uint32_t uvg_context_get_sig_ctx_idx_abs(const coeff_t* coeff, uint32_t pos_x, u } #undef UPDATE int ctx_ofs = MIN((sum_abs+1)>>1, 3) + (diag < 2 ? 4 : 0); - if (type == 0 /* Luma */) + if (color == COLOR_Y) { ctx_ofs += diag < 5 ? 4 : 0; } @@ -815,7 +815,7 @@ unsigned uvg_lrg1_ctx_id_abs_ts(const coeff_t* coeff, int32_t pos_x, int32_t pos * \returns context go rice parameter */ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, - uint32_t height, uint32_t width, uint32_t baselevel) + uint32_t width, uint32_t height, uint32_t baselevel) { #define UPDATE(x) sum+=abs(x)/*-(x?1:0)*/ @@ -857,8 +857,8 @@ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, * \returns context go rice parameter */ uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, - uint32_t height, uint32_t width, uint32_t baselevel) + uint32_t width, uint32_t height, uint32_t baselevel) { - uint32_t check = uvg_abs_sum(coeff, pos_x, pos_y, height, width, baselevel); + uint32_t check = uvg_abs_sum(coeff, pos_x, pos_y, width, height, baselevel); return g_go_rice_pars[check]; } \ No newline at end of file diff --git a/src/context.h b/src/context.h index 3f342409..f083e44c 100644 --- a/src/context.h +++ b/src/context.h @@ -66,7 +66,7 @@ uint32_t uvg_abs_sum(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, uint32_t height, uint32_t width, uint32_t baselevel); uint32_t uvg_go_rice_par_abs(const coeff_t* coeff, uint32_t pos_x, uint32_t pos_y, - uint32_t height, uint32_t width, uint32_t baselevel); + uint32_t width, uint32_t height, uint32_t baselevel); #define CNU 35 #define DWS 8 diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 019c1d03..f917b31d 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -213,6 +213,7 @@ void uvg_encode_ts_residual(encoder_state_t* const state, cabac_data_t* const cabac, const coeff_t* coeff, uint32_t width, + uint32_t height, uint8_t type, int8_t scan_mode, double* bits_out) @@ -227,8 +228,9 @@ void uvg_encode_ts_residual(encoder_state_t* const state, // CONSTANTS - const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; - const uint32_t log2_block_height = log2_block_width; // TODO: height + const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; + const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; + // TODO: log2_cg_size is wrong if width != height const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; const uint32_t* old_scan = uvg_g_sig_last_scan[scan_mode][log2_block_width - 1]; const uint32_t* old_scan_cg = g_sig_last_scan_cg[log2_block_width - 1][scan_mode]; @@ -243,13 +245,11 @@ void uvg_encode_ts_residual(encoder_state_t* const state, cabac->cur_ctx = base_coeff_group_ctx; - // ISP_TODO: height - int maxCtxBins = (width * width * 7) >> 2; + int maxCtxBins = (width * height * 7) >> 2; unsigned scan_cg_last = (unsigned )-1; //unsigned scan_pos_last = (unsigned )-1; - // ISP_TODO: height - for (i = 0; i < width * width; i++) { + for (i = 0; i < width * height; i++) { if (coeff[scan[i]]) { // ISP_DEBUG assert(old_scan[i] == scan[i] && "Old scan_cg differs from the new one."); @@ -258,7 +258,8 @@ void uvg_encode_ts_residual(encoder_state_t* const state, sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1; } } - scan_cg_last = (width * width - 1) >> log2_cg_size; + // TODO: this won't work with non-square blocks + scan_cg_last = (width * height - 1) >> log2_cg_size; const uint32_t cg_width = (MIN((uint8_t)32, width) >> (log2_cg_size / 2)); bool no_sig_group_before_last = true; @@ -481,6 +482,7 @@ static void encode_chroma_tu( enum uvg_tree_type tree_type) { + int height_c = width_c; // TODO: height for non-square blocks int x_local = ((x >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; int y_local = ((y >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; cabac_data_t* const cabac = &state->cabac; @@ -496,7 +498,7 @@ static void encode_chroma_tu( // TODO: transform skip for chroma blocks CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_U) & 1, "transform_skip_flag"); } - uvg_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, COLOR_U, *scan_idx, cur_pu, NULL); + uvg_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, height_c, COLOR_U, *scan_idx, cur_pu, NULL); } if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) { @@ -504,7 +506,7 @@ static void encode_chroma_tu( cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_V) & 1, "transform_skip_flag"); } - uvg_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, COLOR_V, *scan_idx, cur_pu, NULL); + uvg_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, height_c, COLOR_V, *scan_idx, cur_pu, NULL); } } else { @@ -513,7 +515,7 @@ static void encode_chroma_tu( cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; CABAC_BIN(cabac, 0, "transform_skip_flag"); } - uvg_encode_coeff_nxn(state, &state->cabac, coeff_uv, width_c, COLOR_V, *scan_idx, cur_pu, NULL); + uvg_encode_coeff_nxn(state, &state->cabac, coeff_uv, width_c, height_c, COLOR_V, *scan_idx, cur_pu, NULL); } } @@ -534,6 +536,9 @@ static void encode_transform_unit( cabac_data_t* const cabac = &state->cabac; const uint8_t width = LCU_WIDTH >> depth; const uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); + // TODO: height for non-square blocks + const uint8_t height = width; + const uint8_t height_c = width_c; cu_array_t* used_cu_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; const cu_info_t *cur_pu = uvg_cu_array_at_const(used_cu_array, x, y); @@ -556,13 +561,14 @@ static void encode_transform_unit( DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_TR_SKIP, x, y, width, width, (cur_pu->tr_idx == MTS_SKIP) ? 1 : 0); } if(cur_pu->tr_idx == MTS_SKIP) { - uvg_encode_ts_residual(state, cabac, coeff_y, width, 0, scan_idx, NULL); + uvg_encode_ts_residual(state, cabac, coeff_y, width, height, 0, scan_idx, NULL); } else { uvg_encode_coeff_nxn(state, cabac, coeff_y, width, + height, 0, scan_idx, (cu_info_t * )cur_pu, diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index c2cd39da..9757a327 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -64,6 +64,7 @@ void uvg_encode_ts_residual(encoder_state_t* const state, cabac_data_t* const cabac, const coeff_t* coeff, uint32_t width, + uint32_t height, uint8_t type, int8_t scan_mode, double* bits); diff --git a/src/rdo.c b/src/rdo.c index fc4052c4..9f5abd21 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -298,6 +298,7 @@ static INLINE double get_coeff_cabac_cost( const encoder_state_t * const state, const coeff_t *coeff, int32_t width, + int32_t height, color_t color, int8_t scan_mode, int8_t tr_skip, @@ -305,7 +306,7 @@ static INLINE double get_coeff_cabac_cost( { // Make sure there are coeffs present bool found = false; - for (int i = 0; i < width*width; i++) { + for (int i = 0; i < width * height; i++) { if (coeff[i] != 0) { found = 1; break; @@ -331,6 +332,7 @@ static INLINE double get_coeff_cabac_cost( &cabac_copy, coeff, width, + height, color, scan_mode, cur_tu, @@ -341,6 +343,7 @@ static INLINE double get_coeff_cabac_cost( &cabac_copy, coeff, width, + height, color, scan_mode, &bits); @@ -392,6 +395,7 @@ double uvg_get_coeff_cost( const coeff_t *coeff, cu_info_t* cur_tu, int32_t width, + int32_t height, color_t color, int8_t scan_mode, int8_t tr_skip) @@ -409,15 +413,15 @@ double uvg_get_coeff_cost( return UINT32_MAX; // Hush little compiler don't you cry, not really gonna return anything after assert(0) } else { uint64_t weights = uvg_fast_coeff_get_weights(state); - uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, weights); + uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, height, weights); if (check_accuracy) { - double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu); + double ccc = get_coeff_cabac_cost(state, coeff, width, height, color, scan_mode, tr_skip, cur_tu); save_accuracy(state->qp, ccc, fast_cost); } return fast_cost; } } else { - double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu); + double ccc = get_coeff_cabac_cost(state, coeff, width, height, color, scan_mode, tr_skip, cur_tu); if (save_cccs) { save_ccc(state->qp, coeff, width * width, ccc); } diff --git a/src/rdo.h b/src/rdo.h index 7f325cfd..88a6548b 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -74,6 +74,7 @@ double uvg_get_coeff_cost( const coeff_t *coeff, cu_info_t* cur_tu, int32_t width, + int32_t height, color_t color, int8_t scan_mode, int8_t tr_skip); diff --git a/src/search.c b/src/search.c index 64dd263b..ba2f79c9 100644 --- a/src/search.c +++ b/src/search.c @@ -310,7 +310,8 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, lcu_t *const lcu, uint8_t isp_cbf) { - const int width = LCU_WIDTH >> depth; + const int width = LCU_WIDTH >> depth; + const int height = width; // TODO: height for non-square blocks const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac; @@ -380,7 +381,7 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t *coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; - coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, width, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); + coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, width, height, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP); } double bits = tr_tree_bits + coeff_bits; @@ -394,7 +395,8 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, lcu_t *const lcu) { const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 }; - const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; + const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; + const int height = width; // TODO: height for non-square blocks cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); @@ -468,11 +470,11 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); if((pred_cu->joint_cb_cr & 3) == 0){ - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, 2, scan_order, 0); - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, 2, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, width, height, 2, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, width, height, 2, scan_order, 0); } else { - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, 2, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, height, 2, scan_order, 0); } } @@ -493,6 +495,7 @@ static double cu_rd_cost_tr_split_accurate( enum uvg_tree_type tree_type, uint8_t isp_cbf) { const int width = LCU_WIDTH >> depth; + const int height = width; // TODO: height for non-square blocks const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); // cur_cu is used for TU parameters. @@ -597,7 +600,7 @@ static double cu_rd_cost_tr_split_accurate( int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; - coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, width, 0, luma_scan_mode, tr_cu->tr_skip & 1); + coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, width, height, 0, luma_scan_mode, tr_cu->tr_skip & 1); } if(depth == 4 || tree_type == UVG_LUMA_T) { @@ -624,7 +627,8 @@ static double cu_rd_cost_tr_split_accurate( unsigned chroma_ssd = 0; if(has_chroma) { const vector2d_t lcu_px = { (x_px >> (tree_type != UVG_CHROMA_T)) & ~3, (y_px >> (tree_type != UVG_CHROMA_T)) &~3 }; - const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1)); + const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1)); + const int chroma_height = chroma_width; // TODO: height for non-square blocks int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); @@ -646,8 +650,8 @@ static double cu_rd_cost_tr_split_accurate( if(chroma_can_use_tr_skip && cb_flag_v) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], tr_cu, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2); - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], tr_cu, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], tr_cu, chroma_width, chroma_height, COLOR_U, scan_order, tr_cu->tr_skip & 2); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], tr_cu, chroma_width, chroma_height, COLOR_V, scan_order, tr_cu->tr_skip & 4); } else { @@ -664,7 +668,7 @@ static double cu_rd_cost_tr_split_accurate( if (chroma_can_use_tr_skip) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], tr_cu, chroma_width, COLOR_U, scan_order, 0); + coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], tr_cu, chroma_width, chroma_height, COLOR_U, scan_order, 0); } } diff --git a/src/strategies/avx2/encode_coding_tree-avx2.h b/src/strategies/avx2/encode_coding_tree-avx2.h index ae1845c8..9fc75c8a 100644 --- a/src/strategies/avx2/encode_coding_tree-avx2.h +++ b/src/strategies/avx2/encode_coding_tree-avx2.h @@ -45,6 +45,7 @@ void uvg_encode_coeff_nxn_avx2(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, uint8_t width, + uint8_t height, uint8_t type, int8_t scan_mode, int8_t tr_skip, diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index 078df533..962a671a 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -875,8 +875,9 @@ static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length) return parts[0] + parts[1] + parts[2] + parts[3]; } -static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64_t weights) +static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights) { + assert((width == height) && "Non-square block handling not implemented for this function."); const __m256i zero = _mm256_setzero_si256(); const __m256i threes = _mm256_set1_epi16(3); const __m256i negate_hibytes = _mm256_set1_epi16(0xff00); @@ -893,7 +894,7 @@ static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, uint64 __m256i wts_lo = _mm256_broadcastsi128_si256(wts_lo_128); __m256i wts_hi = _mm256_broadcastsi128_si256(wts_hi_128); - for (int i = 0; i < width * width; i += 32) { + for (int i = 0; i < width * height; i += 32) { __m256i curr_lo = _mm256_loadu_si256 ((const __m256i *)(coeff + i)); __m256i curr_abs_lo = _mm256_abs_epi16 (curr_lo); __m256i curr_max3_lo = _mm256_min_epu16 (curr_abs_lo, threes); diff --git a/src/strategies/generic/encode_coding_tree-generic.c b/src/strategies/generic/encode_coding_tree-generic.c index 189334b5..21785501 100644 --- a/src/strategies/generic/encode_coding_tree-generic.c +++ b/src/strategies/generic/encode_coding_tree-generic.c @@ -55,6 +55,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, uint8_t width, + uint8_t height, uint8_t color, int8_t scan_mode, cu_info_t* cur_cu, @@ -75,7 +76,6 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, // CONSTANTS - const int height = width; // TODO: height for non-square blocks. const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_width][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_width][1]; @@ -192,7 +192,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, sig = (coeff[blk_pos] != 0) ? 1 : 0; if (num_non_zero || next_sig_pos != infer_sig_pos) { - ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, width, color, &temp_diag, &temp_sum); + ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum); cabac_ctx_t* sig_ctx_luma = &(cabac->ctx.cu_sig_model_luma[MAX(0, (quant_state - 1))][ctx_sig]); cabac_ctx_t* sig_ctx_chroma = &(cabac->ctx.cu_sig_model_chroma[MAX(0, (quant_state - 1))][MIN(ctx_sig,7)]); @@ -200,7 +200,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, reg_bins--; } else if (next_sig_pos != scan_pos_last) { - ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, width, color, &temp_diag, &temp_sum); + ctx_sig = uvg_context_get_sig_ctx_idx_abs(coeff, pos_x, pos_y, width, height, color, &temp_diag, &temp_sum); } @@ -266,7 +266,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, blk_pos = scan[scan_pos]; pos_y = blk_pos / width; pos_x = blk_pos - (pos_y * width); - int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, width, 4); + int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, height, 4); rice_param = g_go_rice_pars[abs_sum]; uint32_t second_pass_abs_coeff = abs(coeff[blk_pos]); @@ -284,7 +284,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, pos_y = blk_pos / width; pos_x = blk_pos - (pos_y * width); uint32_t coeff_abs = abs(coeff[blk_pos]); - int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, width, 0); + int32_t abs_sum = uvg_abs_sum(coeff, pos_x, pos_y, width, height, 0); rice_param = g_go_rice_pars[abs_sum]; pos0 = ((quant_state<2)?1:2) << rice_param; uint32_t remainder = (coeff_abs == 0 ? pos0 : coeff_abs <= pos0 ? coeff_abs - 1 : coeff_abs); diff --git a/src/strategies/generic/encode_coding_tree-generic.h b/src/strategies/generic/encode_coding_tree-generic.h index 8cfe497d..bcf51f15 100644 --- a/src/strategies/generic/encode_coding_tree-generic.h +++ b/src/strategies/generic/encode_coding_tree-generic.h @@ -45,6 +45,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, uint8_t width, + uint8_t height, uint8_t color, int8_t scan_mode, cu_info_t* cur_cu, diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index deb5c962..16fbce38 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -653,14 +653,15 @@ static INLINE void get_coeff_weights(uint64_t wts_packed, uint16_t *weights) weights[3] = (wts_packed >> 48) & 0xffff; } -static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, uint64_t weights) +static uint32_t fast_coeff_cost_generic(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights) { + assert((width == height) && "Non-square block handling not implemented for this function."); uint32_t sum = 0; uint16_t weights_unpacked[4]; get_coeff_weights(weights, weights_unpacked); - for (int32_t i = 0; i < width * width; i++) { + for (int32_t i = 0; i < width * height; i++) { int16_t curr = coeff[i]; uint32_t curr_abs = abs(curr); if (curr_abs > 3) { diff --git a/src/strategies/strategies-encode.h b/src/strategies/strategies-encode.h index 8743a6ed..f503eb73 100644 --- a/src/strategies/strategies-encode.h +++ b/src/strategies/strategies-encode.h @@ -50,6 +50,7 @@ typedef unsigned (encode_coeff_nxn_func)(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, uint8_t width, + uint8_t heigth, uint8_t color, int8_t scan_mode, cu_info_t* cur_cu, diff --git a/src/strategies/strategies-quant.h b/src/strategies/strategies-quant.h index 2920ed82..b0e75046 100644 --- a/src/strategies/strategies-quant.h +++ b/src/strategies/strategies-quant.h @@ -86,7 +86,7 @@ typedef unsigned (quant_residual_func)(encoder_state_t *const state, typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, color_t color, int8_t block_type, int8_t transform_skip); -typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights); +typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, int32_t height, uint64_t weights); typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length); diff --git a/src/transform.c b/src/transform.c index 01f6289f..4738f942 100644 --- a/src/transform.c +++ b/src/transform.c @@ -690,6 +690,7 @@ void uvg_chroma_transform_search( u_quant_coeff, pred_cu, width, + height, COLOR_U, scan_order, transforms[i] == CHROMA_TS); @@ -706,6 +707,7 @@ void uvg_chroma_transform_search( v_quant_coeff, pred_cu, width, + height, COLOR_V, scan_order, transforms[i] == CHROMA_TS);