[isp] Convert functions to handle new coeff array order. Add function for getting coeff array subset. Fix assert.

This commit is contained in:
siivonek 2022-08-25 14:54:42 +03:00 committed by Marko Viitanen
parent 69dcb04c99
commit b8506c757c
8 changed files with 92 additions and 58 deletions

View file

@ -415,9 +415,11 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu
*/
static INLINE void copy_coeffs(const coeff_t *__restrict src,
coeff_t *__restrict dest,
size_t width, size_t height)
size_t width, size_t height, const int lcu_width)
{
memcpy(dest, src, width * height * sizeof(coeff_t));
for (int j = 0; j < height; ++j) {
memcpy(dest + j * lcu_width, src + j * lcu_width, width * sizeof(coeff_t));
}
}

View file

@ -417,7 +417,7 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
//ToDo: own ctx_offset and shift for X and Y
uint8_t ctx_offset_x = type ? 0 : prefix_ctx[index_x];
uint8_t ctx_offset_y = type ? 0 : prefix_ctx[index_y];
uint8_t shift_x = type ? CLIP(0, 2, width>>3) : (index_x+1)>>2;
uint8_t shift_x = type ? CLIP(0, 2, width >> 3) : (index_x + 1) >> 2;
uint8_t shift_y = type ? CLIP(0, 2, width >> 3) : (index_y + 1) >> 2;
double bits = 0;
@ -481,11 +481,15 @@ static void encode_chroma_tu(
cabac_data_t* const cabac = &state->cabac;
*scan_idx = uvg_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth);
if(!joint_chroma){
const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
// const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
// const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
coeff_t coeff_u[TR_MAX_WIDTH * TR_MAX_WIDTH];
coeff_t coeff_v[TR_MAX_WIDTH * TR_MAX_WIDTH];
uvg_get_sub_coeff(coeff_u, coeff->u, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
uvg_get_sub_coeff(coeff_v, coeff->v, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) {
// ISP_TODO: do these checks need height?
// TODO: height for this check and the others below
if(state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)){
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
// HEVC only supports transform_skip for Luma
@ -504,7 +508,9 @@ static void encode_chroma_tu(
}
}
else {
const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
// const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
const coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH];
uvg_get_sub_coeff(coeff_uv, coeff->joint_uv, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
if (state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)) {
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
CABAC_BIN(cabac, 0, "transform_skip_flag");
@ -544,7 +550,9 @@ static void encode_transform_unit(
if (cbf_y && !only_chroma) {
int x_local = x % LCU_WIDTH;
int y_local = y % LCU_WIDTH;
const coeff_t *coeff_y = &coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)];
// const coeff_t *coeff_y = &coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)];
coeff_t coeff_y[TR_MAX_WIDTH * TR_MAX_WIDTH];
uvg_get_sub_coeff(coeff_y, coeff->y, x_local, y_local, width, height, LCU_WIDTH);
// CoeffNxN
// Residual Coding
@ -1849,3 +1857,26 @@ void uvg_encode_mvd(encoder_state_t * const state,
if(bits_out) *bits_out = temp_bits_out;
}
/**
* \brief Get a subset of LCU coeff array.
*
* \param dst Destination array. Should be coeff_t [32*32].
* \param src Coeff LCU array.
* \param lcu_x Local LCU x coordinate.
* \param lcu_y Local LCU y coordinate.
* \param width Block width.
* \param height Block height.
* \param lcu_width LCU_WIDTH for luma, LCU_WIDTH_C for chroma.
*
*/
void uvg_get_sub_coeff(coeff_t *dst, const coeff_t * const src, const int lcu_x, const int lcu_y, const int block_w, const int block_h, const int lcu_width)
{
// Take subset of coeff array
const coeff_t* coeff_ptr = &src[lcu_x + lcu_y * lcu_width];
for (int j = 0; j < block_h; ++j) {
//memcpy(dst_coeff + (j * lcu_width), &coeff[j * tr_width], tr_width * sizeof(coeff_t));
memcpy(&dst[j * block_w], &coeff_ptr[j * lcu_width], block_w * sizeof(coeff_t));
}
}

View file

@ -116,3 +116,5 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
uint8_t lastpos_x, uint8_t lastpos_y,
uint8_t width, uint8_t height,
uint8_t type, uint8_t scan, double* bits_out);
void uvg_get_sub_coeff(coeff_t* dst, const coeff_t* const src, const int lcu_x, const int lcu_y, const int block_w, const int block_h);

View file

@ -1520,7 +1520,7 @@ int uvg_get_isp_split_num(const int width, const int height, const int split_typ
void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type)
{
assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3].");
assert((split_type == ISP_MODE_NO_ISP && split_idx == 0) && "Trying to ISP split when split type = NO_ISP.");
assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP.");
int part_dim = block_w;
if (split_type != ISP_MODE_NO_ISP) {
part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type);

View file

@ -305,11 +305,21 @@ static INLINE double get_coeff_cabac_cost(
{
const int width = cu_loc->width;
const int height = cu_loc->height;
const int sub_coeff_w = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int sub_coeff_h = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int lcu_width = color == COLOR_Y ? LCU_WIDTH : LCU_WIDTH_C;
int x_local = cu_loc->x % LCU_WIDTH;
int y_local = cu_loc->y % LCU_WIDTH;
// Make sure there are coeffs present
bool found = false;
// ISP_TODO: this needs to be two separate x, y loops?
for (int i = 0; i < width * height; i++) {
if (coeff[i] != 0) {
coeff_t sub_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
uvg_get_sub_coeff(sub_coeff, coeff, x_local, y_local, sub_coeff_w, sub_coeff_h, lcu_width);
for (int i = 0; i < sub_coeff_w * sub_coeff_h; i++) {
if (sub_coeff[i] != 0) {
found = 1;
break;
}
@ -332,7 +342,7 @@ static INLINE double get_coeff_cabac_cost(
if(!tr_skip) {
uvg_encode_coeff_nxn((encoder_state_t*) state,
&cabac_copy,
coeff,
sub_coeff,
cu_loc,
color,
scan_mode,
@ -342,7 +352,7 @@ static INLINE double get_coeff_cabac_cost(
else {
uvg_encode_ts_residual((encoder_state_t* const)state,
&cabac_copy,
coeff,
sub_coeff,
width,
height,
color,

View file

@ -90,20 +90,27 @@ static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *fr
}
}
// ISP_TODO: this needs to work with the new coeff cu orderr
static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to, bool joint, enum
uvg_tree_type tree_type)
{
if (tree_type != UVG_CHROMA_T) {
const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y);
copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], cu_loc->width, cu_loc->height);
//const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y);
const int idx = (cu_loc->x % LCU_WIDTH) + ((cu_loc->y % LCU_WIDTH) * LCU_WIDTH);
copy_coeffs(&from->coeff.y[idx], &to->coeff.y[idx], cu_loc->width, cu_loc->height, LCU_WIDTH);
}
if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
//const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
const int chroma_x = cu_loc->x >> (tree_type != UVG_CHROMA_T);
const int chroma_y = cu_loc->y >> (tree_type != UVG_CHROMA_T);
const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C);
copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
copy_coeffs(&from->coeff.v[idx], &to->coeff.v[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
if (joint) {
copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
copy_coeffs(&from->coeff.joint_uv[idx], &to->coeff.joint_uv[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
}
}
}
@ -1672,7 +1679,7 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
copy_lcu_to_cu_data(state, x, y, &work_tree[0], tree_type);
// Copy coeffs to encoder state.
copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH);
copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);
if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) {
cost = search_cu(
@ -1689,9 +1696,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
copy_lcu_to_cu_data(state, x, y, &work_tree[0], UVG_CHROMA_T);
}
copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C);
if (state->encoder_control->cfg.jccr) {
copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C);
}
}

View file

@ -64,6 +64,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
const int y = cu_loc->y;
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
//const encoder_control_t * const encoder = state->encoder_control;
//int c1 = 1;
uint8_t last_coeff_x = 0;
@ -94,14 +95,13 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
unsigned scan_cg_last = (unsigned)-1;
unsigned scan_pos_last = (unsigned)-1;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
if (coeff[scan[i + j * width]]) {
scan_pos_last = i + j * width;
sig_coeffgroup_flag[scan_cg[(i + j * width) >> log2_cg_size]] = 1;
}
for (int i = 0; i < (width * height); ++i) {
if (coeff[scan[i]]) {
scan_pos_last = i;
sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1;
}
}
scan_cg_last = scan_pos_last >> log2_cg_size;
int pos_last = scan[scan_pos_last];
@ -139,7 +139,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
int32_t temp_diag = -1;
int32_t temp_sum = -1;
int32_t reg_bins = (width*width * 28) >> 4; //8 for 2x2
int32_t reg_bins = (width * height * 28) >> 4; //8 for 2x2
// significant_coeff_flag
for (i = scan_cg_last; i >= 0; i--) {

View file

@ -1102,24 +1102,13 @@ int uvg_quantize_residual_trskip(
// we can skip this.
uvg_pixels_blit(best->rec, rec_out, width, height, width, out_stride);
}
copy_coeffs(best->coeff, coeff_out, width, height);
// TODO: copying coeffs here is very suspect
copy_coeffs(best->coeff, coeff_out, width, height, width);
return best->has_coeffs;
}
static INLINE int translate_to_cu_order_idx(const int lcu_x, const int lcu_y, const int block_w, const int block_h, const int linear_idx)
{
// ISP_TODO: get rid of all there temp variables after making sure this works
const int start_idx = lcu_x + lcu_y * LCU_WIDTH;
const int offset_x = linear_idx % block_w;
const int local_y = linear_idx / block_h;
const int offset_y = local_y * LCU_WIDTH;
return (start_idx + offset_x + offset_y);
}
/**
* Calculate the residual coefficients for a single TU.
*
@ -1176,23 +1165,21 @@ static void quantize_tr_residual(
coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
coeff_t *dst_coeff = NULL;
// ISP_TODO: use temp coeff array size MAX_TR_WIDTH^2 instead of coeff pointers
// ISP_TODO: inside temp coeff array, entries are in the old linear order. PÖTKÖ
switch (color) {
case COLOR_Y:
pred = &lcu->rec.y[offset];
ref = &lcu->ref.y[offset];
dst_coeff = &lcu->coeff.y;
dst_coeff = &lcu->coeff.y[lcu_px.x + lcu_px.y * lcu_width];
break;
case COLOR_U:
pred = &lcu->rec.u[offset];
ref = &lcu->ref.u[offset];
dst_coeff = &lcu->coeff.u;
dst_coeff = &lcu->coeff.u[lcu_px.x + lcu_px.y * lcu_width];
break;
case COLOR_V:
pred = &lcu->rec.v[offset];
ref = &lcu->ref.v[offset];
dst_coeff = &lcu->coeff.v;
dst_coeff = &lcu->coeff.v[lcu_px.x + lcu_px.y * lcu_width];
break;
default:
break;
@ -1248,6 +1235,7 @@ static void quantize_tr_residual(
lmcs_chroma_adj);
} else {
if(color == COLOR_UV) {
// ISP_TODO: fix this
has_coeffs = uvg_quant_cbcr_residual(
state,
cur_pu,
@ -1289,20 +1277,14 @@ static void quantize_tr_residual(
cbf_clear(&cur_pu->cbf, depth, color);
if (has_coeffs) {
const int coeffs_to_copy = tr_width * tr_height;
for (int i = 0; i < coeffs_to_copy; ++i) {
const coeff_t c = coeff[i];
const idx = translate_to_cu_order_idx(lcu_px.x, lcu_px.y, tr_width, tr_height, i);
dst_coeff[idx] = c;
for (int j = 0; j < tr_height; ++j) {
memcpy(&dst_coeff[j * lcu_width], &coeff[j * tr_width], tr_width * sizeof(coeff_t));
}
cbf_set(&cur_pu->cbf, depth, color);
}
else {
// ISP_TODO: if no coeffs, mem set width * height amount of coeffs to zero
int idx = lcu_px.x + lcu_px.y * LCU_WIDTH;
for (int j = 0; j < tr_height; ++j) {
memset(dst_coeff[idx], 0, (sizeof(coeff_t) * tr_width));
idx += LCU_WIDTH;
memset(&dst_coeff[j * lcu_width], 0, (sizeof(coeff_t) * tr_width));
}
}
}