diff --git a/src/cu.h b/src/cu.h index f5eeb5e6..dae446c4 100644 --- a/src/cu.h +++ b/src/cu.h @@ -415,9 +415,11 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu */ static INLINE void copy_coeffs(const coeff_t *__restrict src, coeff_t *__restrict dest, - size_t width, size_t height) + size_t width, size_t height, const int lcu_width) { - memcpy(dest, src, width * height * sizeof(coeff_t)); + for (int j = 0; j < height; ++j) { + memcpy(dest + j * lcu_width, src + j * lcu_width, width * sizeof(coeff_t)); + } } diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 3c2d1947..2421f7a0 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -417,7 +417,7 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac, //ToDo: own ctx_offset and shift for X and Y uint8_t ctx_offset_x = type ? 0 : prefix_ctx[index_x]; uint8_t ctx_offset_y = type ? 0 : prefix_ctx[index_y]; - uint8_t shift_x = type ? CLIP(0, 2, width>>3) : (index_x+1)>>2; + uint8_t shift_x = type ? CLIP(0, 2, width >> 3) : (index_x + 1) >> 2; uint8_t shift_y = type ? CLIP(0, 2, width >> 3) : (index_y + 1) >> 2; double bits = 0; @@ -481,11 +481,15 @@ static void encode_chroma_tu( cabac_data_t* const cabac = &state->cabac; *scan_idx = uvg_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth); if(!joint_chroma){ - const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + // const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + // const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + coeff_t coeff_u[TR_MAX_WIDTH * TR_MAX_WIDTH]; + coeff_t coeff_v[TR_MAX_WIDTH * TR_MAX_WIDTH]; + uvg_get_sub_coeff(coeff_u, coeff->u, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); + uvg_get_sub_coeff(coeff_v, coeff->v, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) { - // ISP_TODO: do these checks need height? + // TODO: height for this check and the others below if(state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)){ cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; // HEVC only supports transform_skip for Luma @@ -504,7 +508,9 @@ static void encode_chroma_tu( } } else { - const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + // const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + const coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH]; + uvg_get_sub_coeff(coeff_uv, coeff->joint_uv, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); if (state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)) { cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; CABAC_BIN(cabac, 0, "transform_skip_flag"); @@ -544,7 +550,9 @@ static void encode_transform_unit( if (cbf_y && !only_chroma) { int x_local = x % LCU_WIDTH; int y_local = y % LCU_WIDTH; - const coeff_t *coeff_y = &coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)]; + // const coeff_t *coeff_y = &coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)]; + coeff_t coeff_y[TR_MAX_WIDTH * TR_MAX_WIDTH]; + uvg_get_sub_coeff(coeff_y, coeff->y, x_local, y_local, width, height, LCU_WIDTH); // CoeffNxN // Residual Coding @@ -1849,3 +1857,26 @@ void uvg_encode_mvd(encoder_state_t * const state, if(bits_out) *bits_out = temp_bits_out; } + + +/** + * \brief Get a subset of LCU coeff array. + * + * \param dst Destination array. Should be coeff_t [32*32]. + * \param src Coeff LCU array. + * \param lcu_x Local LCU x coordinate. + * \param lcu_y Local LCU y coordinate. + * \param width Block width. + * \param height Block height. + * \param lcu_width LCU_WIDTH for luma, LCU_WIDTH_C for chroma. + * + */ +void uvg_get_sub_coeff(coeff_t *dst, const coeff_t * const src, const int lcu_x, const int lcu_y, const int block_w, const int block_h, const int lcu_width) +{ + // Take subset of coeff array + const coeff_t* coeff_ptr = &src[lcu_x + lcu_y * lcu_width]; + for (int j = 0; j < block_h; ++j) { + //memcpy(dst_coeff + (j * lcu_width), &coeff[j * tr_width], tr_width * sizeof(coeff_t)); + memcpy(&dst[j * block_w], &coeff_ptr[j * lcu_width], block_w * sizeof(coeff_t)); + } +} diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 9757a327..7410a073 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -116,3 +116,5 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, uint8_t type, uint8_t scan, double* bits_out); + +void uvg_get_sub_coeff(coeff_t* dst, const coeff_t* const src, const int lcu_x, const int lcu_y, const int block_w, const int block_h); diff --git a/src/intra.c b/src/intra.c index 0c8150ea..41a119d2 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1520,7 +1520,7 @@ int uvg_get_isp_split_num(const int width, const int height, const int split_typ void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type) { assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3]."); - assert((split_type == ISP_MODE_NO_ISP && split_idx == 0) && "Trying to ISP split when split type = NO_ISP."); + assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP."); int part_dim = block_w; if (split_type != ISP_MODE_NO_ISP) { part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type); diff --git a/src/rdo.c b/src/rdo.c index c467dc94..bad372a9 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -305,11 +305,21 @@ static INLINE double get_coeff_cabac_cost( { const int width = cu_loc->width; const int height = cu_loc->height; + const int sub_coeff_w = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; + const int sub_coeff_h = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + const int lcu_width = color == COLOR_Y ? LCU_WIDTH : LCU_WIDTH_C; + + int x_local = cu_loc->x % LCU_WIDTH; + int y_local = cu_loc->y % LCU_WIDTH; + // Make sure there are coeffs present bool found = false; - // ISP_TODO: this needs to be two separate x, y loops? - for (int i = 0; i < width * height; i++) { - if (coeff[i] != 0) { + + coeff_t sub_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; + uvg_get_sub_coeff(sub_coeff, coeff, x_local, y_local, sub_coeff_w, sub_coeff_h, lcu_width); + + for (int i = 0; i < sub_coeff_w * sub_coeff_h; i++) { + if (sub_coeff[i] != 0) { found = 1; break; } @@ -332,7 +342,7 @@ static INLINE double get_coeff_cabac_cost( if(!tr_skip) { uvg_encode_coeff_nxn((encoder_state_t*) state, &cabac_copy, - coeff, + sub_coeff, cu_loc, color, scan_mode, @@ -342,7 +352,7 @@ static INLINE double get_coeff_cabac_cost( else { uvg_encode_ts_residual((encoder_state_t* const)state, &cabac_copy, - coeff, + sub_coeff, width, height, color, diff --git a/src/search.c b/src/search.c index 293a807f..ddbdfd33 100644 --- a/src/search.c +++ b/src/search.c @@ -90,20 +90,27 @@ static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *fr } } +// ISP_TODO: this needs to work with the new coeff cu orderr static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to, bool joint, enum uvg_tree_type tree_type) { if (tree_type != UVG_CHROMA_T) { - const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y); - copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], cu_loc->width, cu_loc->height); + //const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y); + const int idx = (cu_loc->x % LCU_WIDTH) + ((cu_loc->y % LCU_WIDTH) * LCU_WIDTH); + copy_coeffs(&from->coeff.y[idx], &to->coeff.y[idx], cu_loc->width, cu_loc->height, LCU_WIDTH); + } if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { - const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T)); - copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height); - copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height); + //const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T)); + const int chroma_x = cu_loc->x >> (tree_type != UVG_CHROMA_T); + const int chroma_y = cu_loc->y >> (tree_type != UVG_CHROMA_T); + + const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C); + copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); + copy_coeffs(&from->coeff.v[idx], &to->coeff.v[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); if (joint) { - copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height); + copy_coeffs(&from->coeff.joint_uv[idx], &to->coeff.joint_uv[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); } } } @@ -1672,7 +1679,7 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con copy_lcu_to_cu_data(state, x, y, &work_tree[0], tree_type); // Copy coeffs to encoder state. - copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH); + copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH, LCU_WIDTH); if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) { cost = search_cu( @@ -1689,9 +1696,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con copy_lcu_to_cu_data(state, x, y, &work_tree[0], UVG_CHROMA_T); } - copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C); - copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C); + copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); + copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); if (state->encoder_control->cfg.jccr) { - copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C); + copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C); } } diff --git a/src/strategies/generic/encode_coding_tree-generic.c b/src/strategies/generic/encode_coding_tree-generic.c index 756cd6d6..acdfab94 100644 --- a/src/strategies/generic/encode_coding_tree-generic.c +++ b/src/strategies/generic/encode_coding_tree-generic.c @@ -64,6 +64,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, const int y = cu_loc->y; const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + //const encoder_control_t * const encoder = state->encoder_control; //int c1 = 1; uint8_t last_coeff_x = 0; @@ -94,14 +95,13 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, unsigned scan_cg_last = (unsigned)-1; unsigned scan_pos_last = (unsigned)-1; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - if (coeff[scan[i + j * width]]) { - scan_pos_last = i + j * width; - sig_coeffgroup_flag[scan_cg[(i + j * width) >> log2_cg_size]] = 1; - } + for (int i = 0; i < (width * height); ++i) { + if (coeff[scan[i]]) { + scan_pos_last = i; + sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1; } } + scan_cg_last = scan_pos_last >> log2_cg_size; int pos_last = scan[scan_pos_last]; @@ -139,7 +139,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, int32_t temp_diag = -1; int32_t temp_sum = -1; - int32_t reg_bins = (width*width * 28) >> 4; //8 for 2x2 + int32_t reg_bins = (width * height * 28) >> 4; //8 for 2x2 // significant_coeff_flag for (i = scan_cg_last; i >= 0; i--) { diff --git a/src/transform.c b/src/transform.c index c5a38475..e5a3dc82 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1102,24 +1102,13 @@ int uvg_quantize_residual_trskip( // we can skip this. uvg_pixels_blit(best->rec, rec_out, width, height, width, out_stride); } - copy_coeffs(best->coeff, coeff_out, width, height); + // TODO: copying coeffs here is very suspect + copy_coeffs(best->coeff, coeff_out, width, height, width); return best->has_coeffs; } -static INLINE int translate_to_cu_order_idx(const int lcu_x, const int lcu_y, const int block_w, const int block_h, const int linear_idx) -{ - // ISP_TODO: get rid of all there temp variables after making sure this works - const int start_idx = lcu_x + lcu_y * LCU_WIDTH; - const int offset_x = linear_idx % block_w; - const int local_y = linear_idx / block_h; - const int offset_y = local_y * LCU_WIDTH; - - return (start_idx + offset_x + offset_y); -} - - /** * Calculate the residual coefficients for a single TU. * @@ -1176,23 +1165,21 @@ static void quantize_tr_residual( coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; coeff_t *dst_coeff = NULL; - // ISP_TODO: use temp coeff array size MAX_TR_WIDTH^2 instead of coeff pointers - // ISP_TODO: inside temp coeff array, entries are in the old linear order. PÖTKÖ switch (color) { case COLOR_Y: pred = &lcu->rec.y[offset]; ref = &lcu->ref.y[offset]; - dst_coeff = &lcu->coeff.y; + dst_coeff = &lcu->coeff.y[lcu_px.x + lcu_px.y * lcu_width]; break; case COLOR_U: pred = &lcu->rec.u[offset]; ref = &lcu->ref.u[offset]; - dst_coeff = &lcu->coeff.u; + dst_coeff = &lcu->coeff.u[lcu_px.x + lcu_px.y * lcu_width]; break; case COLOR_V: pred = &lcu->rec.v[offset]; ref = &lcu->ref.v[offset]; - dst_coeff = &lcu->coeff.v; + dst_coeff = &lcu->coeff.v[lcu_px.x + lcu_px.y * lcu_width]; break; default: break; @@ -1248,6 +1235,7 @@ static void quantize_tr_residual( lmcs_chroma_adj); } else { if(color == COLOR_UV) { + // ISP_TODO: fix this has_coeffs = uvg_quant_cbcr_residual( state, cur_pu, @@ -1289,20 +1277,14 @@ static void quantize_tr_residual( cbf_clear(&cur_pu->cbf, depth, color); if (has_coeffs) { - const int coeffs_to_copy = tr_width * tr_height; - for (int i = 0; i < coeffs_to_copy; ++i) { - const coeff_t c = coeff[i]; - const idx = translate_to_cu_order_idx(lcu_px.x, lcu_px.y, tr_width, tr_height, i); - dst_coeff[idx] = c; + for (int j = 0; j < tr_height; ++j) { + memcpy(&dst_coeff[j * lcu_width], &coeff[j * tr_width], tr_width * sizeof(coeff_t)); } cbf_set(&cur_pu->cbf, depth, color); } else { - // ISP_TODO: if no coeffs, mem set width * height amount of coeffs to zero - int idx = lcu_px.x + lcu_px.y * LCU_WIDTH; for (int j = 0; j < tr_height; ++j) { - memset(dst_coeff[idx], 0, (sizeof(coeff_t) * tr_width)); - idx += LCU_WIDTH; + memset(&dst_coeff[j * lcu_width], 0, (sizeof(coeff_t) * tr_width)); } } }