[isp] Modify transform and quantization functions to handle non-square blocks. Add strategy headers to CMakelist.

This commit is contained in:
siivonek 2022-08-03 13:23:27 +03:00 committed by Marko Viitanen
parent 06532dce02
commit 626c9b02ea
20 changed files with 153 additions and 96 deletions

View file

@ -105,7 +105,7 @@ file(GLOB LIB_SOURCES RELATIVE ${PROJECT_SOURCE_DIR} "src/*.h" "src/*.c")
list(REMOVE_ITEM LIB_SOURCES "src/encmain.c" "src/cli.c" "src/cli.h" "src/yuv_io.c" "src/yuv_io.h")
# Add also all the strategies
file(GLOB_RECURSE LIB_SOURCES_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.c")
file(GLOB_RECURSE LIB_SOURCES_STRATEGIES RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/*.h" "src/strategies/*.c")
# ToDo: do something with encode_coding_tree-avx2, currently not converted to VVC
list(REMOVE_ITEM LIB_SOURCES_STRATEGIES "src/strategies/avx2/encode_coding_tree-avx2.c")

View file

@ -415,9 +415,9 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu
*/
static INLINE void copy_coeffs(const coeff_t *__restrict src,
coeff_t *__restrict dest,
size_t width)
size_t width, size_t height)
{
memcpy(dest, src, width * width * sizeof(coeff_t));
memcpy(dest, src, width * height * sizeof(coeff_t));
}

View file

@ -89,20 +89,20 @@ static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *fr
}
}
static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to, bool joint, enum
static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to, bool joint, enum
uvg_tree_type tree_type)
{
if (tree_type != UVG_CHROMA_T) {
const int luma_z = xy_to_zorder(LCU_WIDTH, x_local, y_local);
copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], width);
const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y);
copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], cu_loc->width, cu_loc->height);
}
if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x_local >> (tree_type != UVG_CHROMA_T), y_local >> (tree_type != UVG_CHROMA_T));
copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], width >> 1);
copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], width >> 1);
const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
if (joint) {
copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], width >> 1);
copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
}
}
}
@ -114,9 +114,11 @@ static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_t
uvg_tree_type tree_type)
{
const int width = LCU_WIDTH >> depth;
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x_local, y_local, width, width);
copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth], tree_type);
copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth], joint, tree_type);
copy_cu_coeffs(&loc, &work_tree[depth + 1], &work_tree[depth], joint, tree_type);
}
@ -1093,7 +1095,7 @@ static double search_cu(
}
cu_loc_t loc;
const int width = LCU_WIDTH << depth;
const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
uvg_cu_loc_ctor(&loc, x, y, width, height);
uvg_quantize_lcu_residual(state,
@ -1579,7 +1581,7 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
copy_lcu_to_cu_data(state, x, y, &work_tree[0], tree_type);
// Copy coeffs to encoder state.
copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH);
copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH);
if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) {
cost = search_cu(
@ -1596,9 +1598,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
copy_lcu_to_cu_data(state, x, y, &work_tree[0], UVG_CHROMA_T);
}
copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C);
if (state->encoder_control->cfg.jccr) {
copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C);
}
}

View file

@ -2225,6 +2225,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
u_pred,
u_resi,
width,
height,
LCU_WIDTH_C,
width);
uvg_generate_residual(
@ -2232,6 +2233,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
v_pred,
v_resi,
width,
height,
LCU_WIDTH_C,
width);

View file

@ -249,8 +249,11 @@ static void derive_mts_constraints(cu_info_t *const pred_cu,
// ISP_TODO: move this function if it is used elsewhere
bool can_use_isp(const int width, const int height, const int max_tr_size)
static INLINE bool can_use_isp(const int width, const int height, const int max_tr_size)
{
assert(!(width > LCU_WIDTH || height > LCU_WIDTH) && "Block size larger than max LCU size.");
assert(!(width < TR_MIN_WIDTH || height < TR_MIN_WIDTH) && "Block size smaller than min TR_WIDTH.");
const int log2_width = uvg_g_convert_to_bit[width] + 2;
const int log2_height = uvg_g_convert_to_bit[height] + 2;
@ -300,16 +303,14 @@ int uvg_get_isp_split_dim(const int width, const int height, const int split_typ
// ISP_TODO: move this function if it is used elsewhere
bool can_use_isp_with_lfnst(const int width, const int height, const int isp_mode)
static INLINE bool can_use_isp_with_lfnst(const int width, const int height, const int isp_mode)
{
if (isp_mode == ISP_MODE_NO_ISP) {
return false;
}
const int tu_width = isp_mode == ISP_MODE_HOR ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER);
const int tu_height = isp_mode == ISP_MODE_HOR ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR) : height;
// ISP_TODO: make a define for this or use existing
const int min_tb_size = 4;
const int min_tb_size = TR_MIN_WIDTH;
if (!(tu_width >= min_tb_size && tu_height >= min_tb_size)) {
return false;
@ -1449,7 +1450,7 @@ static int8_t search_intra_rdo(
enum uvg_tree_type tree_type)
{
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra);
const int width = LCU_WIDTH << depth;
const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: height for non-square blocks
for (int mode = 0; mode < modes_to_check; mode++) {
@ -1633,6 +1634,7 @@ int8_t uvg_search_intra_chroma_rdo(
u_pred,
u_resi,
width,
height,
LCU_WIDTH_C,
width);
uvg_generate_residual(
@ -1640,6 +1642,7 @@ int8_t uvg_search_intra_chroma_rdo(
v_pred,
v_resi,
width,
height,
LCU_WIDTH_C,
width);
uvg_chorma_ts_out_t chorma_ts_out;

View file

@ -1590,18 +1590,20 @@ static void mts_dct_avx2(
const color_t color,
const cu_info_t* tu,
const int8_t width,
const int8_t height,
const int16_t* input,
int16_t* output,
const int8_t mts_idx)
{
tr_type_t type_hor;
tr_type_t type_ver;
// ISP_TODO: height passed but not used
uvg_get_tr_type(width, color, tu, &type_hor, &type_ver, mts_idx);
if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx)
{
dct_func* dct_func = uvg_get_dct_func(width, color, tu->type);
dct_func* dct_func = uvg_get_dct_func(width, height, color, tu->type);
dct_func(bitdepth, input, output);
}
else

View file

@ -61,6 +61,7 @@ static void uvg_angular_pred_avx2(
uvg_pixel *const dst,
const uint8_t multi_ref_idx)
{
// ISP_TODO: non-square block implementation, height is passed but not used
const int width = channel_type == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = channel_type == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int log2_width = uvg_g_convert_to_bit[width] + 2;
@ -512,6 +513,7 @@ static void uvg_intra_pred_planar_avx2(
const uint8_t *const ref_left,
uint8_t *const dst)
{
// ISP_TODO: non-square block implementation, height is passed but not used
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int log2_width = uvg_g_convert_to_bit[width] + 2;
@ -977,6 +979,7 @@ static void uvg_pdpc_planar_dc_avx2(
const uvg_intra_ref *const used_ref,
uvg_pixel *const dst)
{
// ISP_TODO: non-square block implementation, height is passed but not used
assert(mode == 0 || mode == 1); // planar or DC
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;

View file

@ -1743,8 +1743,8 @@ static INLINE __m128i get_residual_8x1_avx2(const uint8_t* a_in, const uint8_t*
return diff;
}
static void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride) {
static void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in, int16_t* residual, int width, int height, int ref_stride, int pred_stride) {
// ISP_TODO: non-square block implementation, height is passed but not used
__m128i diff = _mm_setzero_si128();
switch (width) {
case 4:

View file

@ -626,7 +626,7 @@ static void get_quantized_recon_avx2(int16_t *residual, const uint8_t *pred_in,
* \returns Whether coeff_out contains any non-zero coefficients.
*/
int uvg_quantize_residual_avx2(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
const coeff_scan_order_t scan_order, const int use_trskip,
const int in_stride, const int out_stride,
const uint8_t *const ref_in, const uint8_t *const pred_in,
@ -637,15 +637,15 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
// Temporary arrays to pass data to and from uvg_quant and transform functions.
ALIGNED(64) int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
const int height = width; // TODO: height for non-square blocks
// ISP_TODO: non-square block implementation, height is passed but not used
int has_coeffs = 0;
assert(width <= TR_MAX_WIDTH);
assert(width >= TR_MIN_WIDTH);
// Get residual. (ref_in - pred_in -> residual)
uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
uvg_generate_residual(ref_in, pred_in, residual, width, height, in_stride, in_stride);
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
int y, x;
@ -662,10 +662,10 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
// Transform residual. (residual -> coeff)
if (use_trskip) {
uvg_transformskip(state->encoder_control, residual, coeff, width);
uvg_transformskip(state->encoder_control, residual, coeff, width, height);
}
else {
uvg_transform2d(state->encoder_control, residual, coeff, width, color, cur_cu);
uvg_transform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
}
const uint16_t lfnst_index = color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;

View file

@ -739,6 +739,11 @@ static void idct_ ## n ## x ## n ## _generic(int8_t bitdepth, const int16_t *inp
partial_butterfly_inverse_ ## n ## _generic(tmp, output, shift_2nd); \
}
static void dct_non_square_generic(int8_t bitdepth, const int16_t* input, int16_t* output)
{
// ISP_TODO: non-square transform here
}
DCT_NXN_GENERIC(4);
DCT_NXN_GENERIC(8);
DCT_NXN_GENERIC(16);
@ -2487,26 +2492,28 @@ static void mts_dct_generic(
const color_t color,
const cu_info_t* tu,
const int8_t width,
const int8_t height,
const int16_t* input,
int16_t* output,
const int8_t mts_idx)
{
tr_type_t type_hor;
tr_type_t type_ver;
// ISP_TODO: height passed but not used
uvg_get_tr_type(width, color, tu, &type_hor, &type_ver, mts_idx);
if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx && !tu->cr_lfnst_idx)
if (type_hor == DCT2 && type_ver == DCT2 && !tu->lfnst_idx && !tu->cr_lfnst_idx || width != height)
{
dct_func *dct_func = uvg_get_dct_func(width, color, tu->type);
dct_func *dct_func = uvg_get_dct_func(width, height, color, tu->type);
dct_func(bitdepth, input, output);
}
else
{
const int height = width;
int skip_width = (type_hor != DCT2 && width == 32) ? 16 : (width > 32 ? width - 32 : 0);
int skip_height = (type_ver != DCT2 && height == 32) ? 16 : (height > 32 ? height - 32 : 0);
const int log2_width_minus2 = uvg_g_convert_to_bit[width];
const int log2_height_minus2 = uvg_g_convert_to_bit[height];
if(tu->lfnst_idx || tu->cr_lfnst_idx) {
if ((width == 4 && height > 4) || (width > 4 && height == 4))
{
@ -2521,11 +2528,11 @@ static void mts_dct_generic(
}
partial_tr_func* dct_hor = dct_table[type_hor][log2_width_minus2];
partial_tr_func* dct_ver = dct_table[type_ver][log2_width_minus2];
partial_tr_func* dct_ver = dct_table[type_ver][log2_height_minus2];
int16_t tmp[32 * 32];
const int32_t shift_1st = log2_width_minus2 + bitdepth - 7;
const int32_t shift_2nd = log2_width_minus2 + 8;
const int32_t shift_2nd = log2_height_minus2 + 8;
dct_hor(input, tmp, shift_1st, height, 0, skip_width);
dct_ver(tmp, output, shift_2nd, width, skip_width, skip_height);
@ -2582,6 +2589,7 @@ int uvg_strategy_register_dct_generic(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "dct_8x8", "generic", 0, &dct_8x8_generic);
success &= uvg_strategyselector_register(opaque, "dct_16x16", "generic", 0, &dct_16x16_generic);
success &= uvg_strategyselector_register(opaque, "dct_32x32", "generic", 0, &dct_32x32_generic);
success &= uvg_strategyselector_register(opaque, "dct_non_square", "generic", 0, &dct_non_square_generic);
success &= uvg_strategyselector_register(opaque, "fast_inverse_dst_4x4", "generic", 0, &fast_inverse_dst_4x4_generic);

View file

@ -783,10 +783,10 @@ static double pixel_var_generic(const uvg_pixel *arr, const uint32_t len)
static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual,
int width, int ref_stride, int pred_stride)
int width, int height, int ref_stride, int pred_stride)
{
int y, x;
for (y = 0; y < width; ++y) {
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
residual[x + y * width] = (int16_t)(ref_in[x + y * ref_stride] - pred_in[x + y * pred_stride]);
}

View file

@ -237,6 +237,7 @@ int uvg_quant_cbcr_residual_generic(
encoder_state_t* const state,
const cu_info_t* const cur_cu,
const int width,
const int height,
const coeff_scan_order_t scan_order,
const int in_stride, const int out_stride,
const uvg_pixel* const u_ref_in,
@ -247,28 +248,28 @@ int uvg_quant_cbcr_residual_generic(
uvg_pixel* v_rec_out,
coeff_t* coeff_out,
bool early_skip,
int lmcs_chroma_adj, enum uvg_tree_type tree_type
) {
int lmcs_chroma_adj, enum uvg_tree_type tree_type)
{
ALIGNED(64) int16_t u_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) int16_t combined_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
// ISP_TODO: this function is not fully converted to handle non-square blocks
{
int y, x;
for (y = 0; y < width; ++y) {
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
u_residual[x + y * width] = (int16_t)(u_ref_in[x + y * in_stride] - u_pred_in[x + y * in_stride]);
v_residual[x + y * width] = (int16_t)(v_ref_in[x + y * in_stride] - v_pred_in[x + y * in_stride]);
}
}
}
uvg_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride);
uvg_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride);
uvg_generate_residual(u_ref_in, u_pred_in, u_residual, width, height, in_stride, in_stride);
uvg_generate_residual(v_ref_in, v_pred_in, v_residual, width, height, in_stride, in_stride);
const int cbf_mask = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
for (int y = 0; y < width; y++)
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
@ -305,9 +306,9 @@ int uvg_quant_cbcr_residual_generic(
}
uvg_transform2d(state->encoder_control, combined_residual, coeff, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
uvg_transform2d(state->encoder_control, combined_residual, coeff, width, height, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
if(cur_cu->cr_lfnst_idx) {
uvg_fwd_lfnst(cur_cu, width, width, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
uvg_fwd_lfnst(cur_cu, width, height, COLOR_UV, cur_cu->cr_lfnst_idx, coeff, tree_type);
}
if (state->encoder_control->cfg.rdoq_enable &&
@ -441,7 +442,7 @@ int uvg_quant_cbcr_residual_generic(
* \returns Whether coeff_out contains any non-zero coefficients.
*/
int uvg_quantize_residual_generic(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
const coeff_scan_order_t scan_order, const int use_trskip,
const int in_stride, const int out_stride,
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
@ -454,19 +455,17 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
int has_coeffs = 0;
assert(width <= TR_MAX_WIDTH);
assert(width >= TR_MIN_WIDTH);
const int height = width; // TODO: height for non-square blocks
assert(width <= TR_MAX_WIDTH && height <= TR_MAX_WIDTH);
assert(width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH);
// Get residual. (ref_in - pred_in -> residual)
uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
uvg_generate_residual(ref_in, pred_in, residual, width, height, in_stride, in_stride);
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
int y, x;
int sign, absval;
int maxAbsclipBD = (1 << UVG_BIT_DEPTH) - 1;
for (y = 0; y < width; ++y) {
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
sign = residual[x + y * width] >= 0 ? 1 : -1;
absval = sign * residual[x + y * width];
@ -477,10 +476,10 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
// Transform residual. (residual -> coeff)
if (use_trskip) {
uvg_transformskip(state->encoder_control, residual, coeff, width);
uvg_transformskip(state->encoder_control, residual, coeff, width, height);
}
else {
uvg_transform2d(state->encoder_control, residual, coeff, width, color, cur_cu);
uvg_transform2d(state->encoder_control, residual, coeff, width, height, color, cur_cu);
}
const uint8_t lfnst_index = color == COLOR_Y ? cur_cu->lfnst_idx : cur_cu->cr_lfnst_idx;

View file

@ -60,7 +60,7 @@ void uvg_quant_generic(
uint8_t lfnst_idx);
int uvg_quantize_residual_generic(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
const coeff_scan_order_t scan_order, const int use_trskip,
const int in_stride, const int out_stride,
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
@ -71,6 +71,7 @@ int uvg_quant_cbcr_residual_generic(
encoder_state_t* const state,
const cu_info_t* const cur_cu,
const int width,
const int height,
const coeff_scan_order_t scan_order,
const int in_stride, const int out_stride,
const uvg_pixel* const u_ref_in,

View file

@ -44,6 +44,7 @@ dct_func * uvg_dct_4x4 = 0;
dct_func * uvg_dct_8x8 = 0;
dct_func * uvg_dct_16x16 = 0;
dct_func * uvg_dct_32x32 = 0;
dct_func * uvg_dct_non_square = 0;
dct_func * uvg_fast_inverse_dst_4x4 = 0;
@ -56,9 +57,11 @@ void(*uvg_mts_dct)(int8_t bitdepth,
color_t color,
const cu_info_t *tu,
int8_t width,
int8_t height,
const int16_t *input,
int16_t *output,
const int8_t mts_idx);
void(*uvg_mts_idct)(int8_t bitdepth,
color_t color,
const cu_info_t *tu,
@ -90,8 +93,13 @@ int uvg_strategy_register_dct(void* opaque, uint8_t bitdepth) {
*
* \returns Pointer to the function.
*/
dct_func * uvg_get_dct_func(int8_t width, color_t color, cu_type_t type)
dct_func * uvg_get_dct_func(int8_t width, int8_t height, color_t color, cu_type_t type)
{
if (width != height) {
// Non-square block. Return generic dct for non-square blokcs.
assert(false && "This should never be called at this point. Non-square stuff is done inside mts_dct function.");
return uvg_dct_non_square;
}
switch (width) {
case 4:
//if (color == COLOR_Y && type == CU_INTRA) {

View file

@ -51,6 +51,7 @@ extern dct_func * uvg_dct_4x4;
extern dct_func * uvg_dct_8x8;
extern dct_func * uvg_dct_16x16;
extern dct_func * uvg_dct_32x32;
extern dct_func * uvg_dct_non_square;
extern dct_func * uvg_fast_inverse_dst_4x4;
@ -64,6 +65,7 @@ typedef void (mts_dct_func)(
color_t color,
const cu_info_t* tu,
int8_t width,
int8_t height,
const int16_t* input,
int16_t* output,
const int8_t mts_idx);
@ -82,7 +84,7 @@ typedef void (mts_idct_func)(
extern mts_idct_func* uvg_mts_idct;
int uvg_strategy_register_dct(void* opaque, uint8_t bitdepth);
dct_func * uvg_get_dct_func(int8_t width, color_t color, cu_type_t type);
dct_func * uvg_get_dct_func(int8_t width, int8_t height, color_t color, cu_type_t type);
dct_func * uvg_get_idct_func(int8_t width, color_t color, cu_type_t type);

View file

@ -149,7 +149,7 @@ typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int height, int ref_stride, int pred_stride);
extern const uint32_t uvg_crc_table[256];

View file

@ -45,12 +45,23 @@
#include "tables.h"
// Declare function pointers.
typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip, uint8_t lfnst_idx);
typedef unsigned (quant_func)(
const encoder_state_t * const state,
coeff_t *coef,
coeff_t *q_coef,
int32_t width,
int32_t height,
color_t color,
int8_t scan_idx,
int8_t block_type,
int8_t transform_skip,
uint8_t lfnst_idx);
typedef unsigned (quant_cbcr_func)(
encoder_state_t* const state,
const cu_info_t* const cur_cu,
const int width,
const int height,
const coeff_scan_order_t scan_order,
const int in_stride, const int out_stride,
const uvg_pixel* const u_ref_in,
@ -63,15 +74,18 @@ typedef unsigned (quant_cbcr_func)(
bool early_skip,
int lmcs_chroma_adj,
enum uvg_tree_type tree_type);
typedef unsigned (quant_residual_func)(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
const coeff_scan_order_t scan_order, const int use_trskip,
const int in_stride, const int out_stride,
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
uvg_pixel *rec_out, coeff_t *coeff_out,
bool early_skip, int lmcs_chroma_adj, enum uvg_tree_type tree_type);
typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width,
int32_t height, color_t color, int8_t block_type, int8_t transform_skip);
typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights);
typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length);

View file

@ -77,6 +77,7 @@ const uint8_t uvg_g_chroma_scale[58]=
* Parameters pred_in and rec_out may be aliased.
*
* \param width Transform width.
* \param height Transform height.
* \param in_stride Stride for ref_in and pred_in
* \param out_stride Stride for rec_out.
* \param ref_in Reference pixels.
@ -87,6 +88,7 @@ const uint8_t uvg_g_chroma_scale[58]=
* \returns Whether coeff_out contains any non-zero coefficients.
*/
static bool bypass_transquant(const int width,
const int height,
const int in_stride,
const int out_stride,
const uvg_pixel *const ref_in,
@ -96,7 +98,7 @@ static bool bypass_transquant(const int width,
{
bool nonzero_coeffs = false;
for (int y = 0; y < width; ++y) {
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
int32_t in_idx = x + y * in_stride;
int32_t out_idx = x + y * out_stride;
@ -123,6 +125,7 @@ static bool bypass_transquant(const int width,
* \param coeff coefficients (residual) to filter
*/
static void rdpcm(const int width,
const int height,
const rdpcm_dir dir,
coeff_t *coeff)
{
@ -130,7 +133,7 @@ static void rdpcm(const int width,
const int min_x = (dir == RDPCM_HOR) ? 1 : 0;
const int min_y = (dir == RDPCM_HOR) ? 0 : 1;
for (int y = width - 1; y >= min_y; y--) {
for (int y = height - 1; y >= min_y; y--) {
for (int x = width - 1; x >= min_x; x--) {
const int index = x + y * width;
coeff[index] -= coeff[index - offset];
@ -203,17 +206,18 @@ void uvg_derive_lfnst_constraints(
/**
* \brief NxN inverse transform (2D)
* \param coeff input data (transform coefficients)
* \param block output data (residual)
* \param block_size input data (width of transform)
* \param coeff input data (transform coefficients)
* \param block output data (residual)
* \param width transform width
* \param height transform height
*/
void uvg_transformskip(const encoder_control_t * const encoder, int16_t *block,int16_t *coeff, int8_t block_size)
void uvg_transformskip(const encoder_control_t * const encoder, int16_t *block,int16_t *coeff, int8_t width, int8_t height)
{
int32_t j,k;
for (j = 0; j < block_size; j++) {
for(k = 0; k < block_size; k ++) {
int32_t j, k;
for (j = 0; j < height; j++) {
for(k = 0; k < width; k ++) {
// Casting back and forth to make UBSan not trigger due to left-shifting negatives
coeff[j * block_size + k] = (int16_t)((uint16_t)(block[j * block_size + k]));
coeff[j * width + k] = (int16_t)((uint16_t)(block[j * width + k]));
}
}
}
@ -243,17 +247,18 @@ void uvg_itransformskip(const encoder_control_t * const encoder, int16_t *block,
void uvg_transform2d(const encoder_control_t * const encoder,
int16_t *block,
int16_t *coeff,
int8_t block_size,
int8_t block_width,
int8_t block_height,
color_t color,
const cu_info_t *tu)
{
if (encoder->cfg.mts || tu->lfnst_idx || tu->cr_lfnst_idx)
if (encoder->cfg.mts || tu->lfnst_idx || tu->cr_lfnst_idx || block_width != block_height)
{
uvg_mts_dct(encoder->bitdepth, color, tu, block_size, block, coeff, encoder->cfg.mts);
uvg_mts_dct(encoder->bitdepth, color, tu, block_width, block_height, block, coeff, encoder->cfg.mts);
}
else
{
dct_func *dct_func = uvg_get_dct_func(block_size, color, tu->type);
dct_func *dct_func = uvg_get_dct_func(block_width, block_height, color, tu->type);
dct_func(encoder->bitdepth, block, coeff);
}
}
@ -373,6 +378,7 @@ static void generate_jccr_transforms(
&temp_resi[(cbf_mask1 - 1) * trans_offset],
&u_coeff[*num_transforms * trans_offset],
width,
height,
COLOR_U,
pred_cu
);
@ -386,6 +392,7 @@ static void generate_jccr_transforms(
&temp_resi[(cbf_mask2 - 1) * trans_offset],
&u_coeff[*num_transforms * trans_offset],
width,
height,
COLOR_U,
pred_cu
);
@ -492,10 +499,10 @@ void uvg_chroma_transform_search(
ALIGNED(64) coeff_t v_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 2];
ALIGNED(64) uint8_t v_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
uvg_transform2d(
state->encoder_control, u_resi, u_coeff, width, COLOR_U, pred_cu
state->encoder_control, u_resi, u_coeff, width, height, COLOR_U, pred_cu
);
uvg_transform2d(
state->encoder_control, v_resi, v_coeff, width, COLOR_V, pred_cu
state->encoder_control, v_resi, v_coeff, width, height, COLOR_V, pred_cu
);
enum uvg_chroma_transforms transforms[5];
transforms[0] = DCT7_CHROMA;
@ -508,8 +515,8 @@ void uvg_chroma_transform_search(
pred_cu->cr_lfnst_idx == 0 ;
if (can_use_tr_skip) {
uvg_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width);
uvg_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width);
uvg_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width, height);
uvg_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width, height);
transforms[num_transforms] = CHROMA_TS;
num_transforms++;
}
@ -1053,7 +1060,7 @@ void uvg_inv_lfnst(
*/
int uvg_quantize_residual_trskip(
encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const cu_info_t *const cur_cu, const int width, const int height, const color_t color,
const coeff_scan_order_t scan_order, int8_t *trskip_out,
const int in_stride, const int out_stride,
const uvg_pixel *const ref_in, const uvg_pixel *const pred_in,
@ -1074,7 +1081,7 @@ int uvg_quantize_residual_trskip(
//noskip.cost += uvg_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost;
skip.has_coeffs = uvg_quantize_residual(
state, cur_cu, width, color, scan_order,
state, cur_cu, width, height, color, scan_order,
1, in_stride, width,
ref_in, pred_in, skip.rec, skip.coeff, false, lmcs_chroma_adj,
UVG_BOTH_T /* tree type doesn't matter for transformskip*/);
@ -1090,9 +1097,9 @@ int uvg_quantize_residual_trskip(
if (best->has_coeffs || rec_out != pred_in) {
// If there is no residual and reconstruction is already in rec_out,
// we can skip this.
uvg_pixels_blit(best->rec, rec_out, width, width, width, out_stride);
uvg_pixels_blit(best->rec, rec_out, width, height, width, out_stride);
}
copy_coeffs(best->coeff, coeff_out, width);
copy_coeffs(best->coeff, coeff_out, width, height);
return best->has_coeffs;
}
@ -1131,8 +1138,8 @@ static void quantize_tr_residual(
// This should ensure that the CBF data doesn't get corrupted if this function
// is called more than once.
int32_t tr_width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
int32_t tr_height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int32_t tr_width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int32_t tr_height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int32_t lcu_width = LCU_WIDTH >> shift;
const int8_t mode =
@ -1183,7 +1190,9 @@ static void quantize_tr_residual(
}
if (cfg->lossless) {
// ISP_TODO: is there any sensible case where in and out strides would be different?
has_coeffs = bypass_transquant(tr_width,
tr_height,
lcu_width, // in stride
lcu_width, // out stride
ref,
@ -1193,9 +1202,9 @@ static void quantize_tr_residual(
if (cfg->implicit_rdpcm && cur_pu->type == CU_INTRA) {
// implicit rdpcm for horizontal and vertical intra modes
if (mode == 18) {
rdpcm(tr_width, RDPCM_HOR, coeff);
rdpcm(tr_width, tr_height, RDPCM_HOR, coeff);
} else if (mode == 50) {
rdpcm(tr_width, RDPCM_VER, coeff);
rdpcm(tr_width, tr_height, RDPCM_VER, coeff);
}
}
@ -1206,6 +1215,7 @@ static void quantize_tr_residual(
has_coeffs = uvg_quantize_residual_trskip(state,
cur_pu,
tr_width,
tr_height,
color,
scan_idx,
&tr_skip,
@ -1222,6 +1232,7 @@ static void quantize_tr_residual(
state,
cur_pu,
tr_width,
tr_height,
scan_idx,
lcu_width,
lcu_width,
@ -1240,6 +1251,7 @@ static void quantize_tr_residual(
has_coeffs = uvg_quantize_residual(state,
cur_pu,
tr_width,
tr_height,
color,
scan_idx,
false, // tr skip
@ -1326,8 +1338,8 @@ void uvg_quantize_lcu_residual(
const int offset = width / 2;
for (int j = 0; j < 2; ++j) {
for (int i = 0; i < 2; ++i) {
const cu_loc_t loc;
uvg_cu_loc_ctor(&loc, (x + i * offset), (y + j * offset), width, height);
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, (x + i * offset), (y + j * offset), width >> 1, height >> 1);
// jccr is currently not supported if transform is split
uvg_quantize_lcu_residual(state, luma, chroma, 0, &loc, depth + 1, NULL, lcu, early_skip, tree_type);
}

View file

@ -47,13 +47,14 @@ extern const uint8_t uvg_g_chroma_scale[58];
extern const int16_t uvg_g_inv_quant_scales[6];
extern const int16_t uvg_g_quant_scales[6];
void uvg_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
void uvg_transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t width, int8_t height);
void uvg_itransformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
void uvg_transform2d(const encoder_control_t * const encoder,
int16_t *block,
int16_t *coeff,
int8_t block_size,
int8_t block_width,
int8_t block_height,
color_t color,
const cu_info_t *tu);

View file

@ -111,7 +111,7 @@ static void setup_tests()
tu.tr_idx = MTS_DST7_DST7 + trafo;
tu.lfnst_idx = 0;
tu.cr_lfnst_idx = 0;
mts_generic(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + block), dct_bufs[trafo*NUM_SIZES+block], dct_result[trafo][block], UVG_MTS_BOTH);
mts_generic(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + block), 1 << (LCU_MIN_LOG_W + block), dct_bufs[trafo*NUM_SIZES+block], dct_result[trafo][block], UVG_MTS_BOTH);
}
}
}
@ -167,7 +167,7 @@ TEST dct(void)
int16_t* buf = dct_bufs[trafo * NUM_SIZES + blocksize];
ALIGNED(32) int16_t test_result[LCU_WIDTH * LCU_WIDTH] = { 0 };
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
for (int i = 0; i < LCU_WIDTH * LCU_WIDTH; ++i) {
ASSERT_EQm(testname, test_result[i], dct_result[trafo][blocksize][i]);
@ -192,7 +192,7 @@ TEST idct(void)
int16_t* buf = dct_bufs[trafo * NUM_SIZES + blocksize];
ALIGNED(32) int16_t test_result[LCU_WIDTH * LCU_WIDTH] = { 0 };
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
test_env.tested_func(UVG_BIT_DEPTH, COLOR_Y, &tu, 1 << (LCU_MIN_LOG_W + blocksize), 1 << (LCU_MIN_LOG_W + blocksize), buf, test_result, UVG_MTS_BOTH);
for (int i = 0; i < LCU_WIDTH * LCU_WIDTH; ++i) {
ASSERT_EQm(testname, test_result[i], idct_result[trafo][blocksize][i]);