diff --git a/src/cu.h b/src/cu.h index 4b68bca4..585f823b 100644 --- a/src/cu.h +++ b/src/cu.h @@ -106,10 +106,15 @@ typedef struct { cu_array_t * kvz_cu_array_alloc(int width_in_scu, int height_in_scu); int kvz_cu_array_free(cu_array_t *cua); - -#define SUB_SCU_BIT_MASK (64 - 1) -#define SUB_SCU(xy) (xy & SUB_SCU_BIT_MASK) +/** + * \brief Return the 7 lowest-order bits of the pixel coordinate. + * + * The 7 lower-order bits correspond to the distance from the left or top edge + * of the containing LCU. + */ +#define SUB_SCU(xy) ((xy) & (LCU_WIDTH - 1)) + #define LCU_CU_WIDTH 8 #define LCU_T_CU_WIDTH 9 #define LCU_CU_OFFSET 10 @@ -153,10 +158,66 @@ typedef struct { * - Left reference CUs on column 0. * - All of LCUs CUs on 1:9, 1:9. * - Top right reference CU on the last slot. + * + \verbatim + + .-- left reference CUs + v + 0 | 1 2 3 4 5 6 7 8 | 81 <-- top reference CUs + ----+-------------------------+---- + 9 | 10 11 12 13 14 15 16 17 | + 18 | 19 20 21 22 23 24 25 26 <-- this LCU + 27 | 28 29 30 31 32 33 34 35 | + 36 | 37 38 39 40 41 42 43 44 | + 45 | 46 47 48 49 50 51 52 53 | + 54 | 55 56 57 58 59 60 61 62 | + 63 | 64 65 66 67 68 69 70 71 | + 72 | 73 74 75 76 77 78 79 80 | + ----+-------------------------+---- + + \endverbatim */ cu_info_t cu[9*9+1]; } lcu_t; +/** + * \brief Return pointer to a given CU. + * + * \param lcu pointer to the containing LCU + * \param x_cu x-index of the CU + * \param y_cu y-index of the CU + * \return pointer to the CU + */ +#define LCU_GET_CU(lcu, x_cu, y_cu) \ + (&(lcu)->cu[LCU_CU_OFFSET + (x_cu) + (y_cu) * LCU_T_CU_WIDTH]) + +/** + * \brief Return pointer to the top right reference CU. + */ +#define LCU_GET_TOP_RIGHT_CU(lcu) \ + (&(lcu)->cu[LCU_T_CU_WIDTH * LCU_T_CU_WIDTH]) + +/** + * \brief Return pointer to the CU containing a given pixel. + * + * \param lcu pointer to the containing LCU + * \param x_px x-coordinate relative to the upper left corner of the LCU + * \param y_px y-coordinate relative to the upper left corner of the LCU + * \return pointer to the CU at coordinates (x_px, y_px) + */ +#define LCU_GET_CU_AT_PX(lcu, x_px, y_px) LCU_GET_CU(lcu, (x_px) >> 3, (y_px) >> 3) + +/** + * \brief Return pointer to a CU relative to the given CU. + * + * \param cu pointer to a CU in the array at some location (x, y) + * \param x_offs x-offset + * \param y_offs y-offset + * \return pointer to the CU at (x + x_offs, y + y_offs) + */ +#define CU_GET_CU(cu_array, x_offs, y_offs) \ + (&cu_array[(x_offs) + (y_offs) * LCU_T_CU_WIDTH]) + #define CHECKPOINT_LCU(prefix_str, lcu) do { \ CHECKPOINT_CU(prefix_str " cu[0]", (lcu).cu[0]); \ CHECKPOINT_CU(prefix_str " cu[1]", (lcu).cu[1]); \ diff --git a/src/filter.c b/src/filter.c index 465e3cce..c223e8b6 100644 --- a/src/filter.c +++ b/src/filter.c @@ -83,11 +83,16 @@ const int8_t kvz_g_chroma_filter[8][4] = /** * \brief */ -INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, - int32_t tc, int8_t sw, - int8_t part_P_nofilter, int8_t part_Q_nofilter, - int32_t thr_cut, - int8_t filter_second_P, int8_t filter_second_Q) +static INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, + kvz_pixel *src, + int32_t offset, + int32_t tc, + int8_t sw, + int8_t part_P_nofilter, + int8_t part_Q_nofilter, + int32_t thr_cut, + int8_t filter_second_P, + int8_t filter_second_Q) { int32_t delta; @@ -143,8 +148,12 @@ INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz /** * \brief */ -INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc, - int8_t part_P_nofilter, int8_t part_Q_nofilter) +static INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, + kvz_pixel *src, + int32_t offset, + int32_t tc, + int8_t part_P_nofilter, + int8_t part_Q_nofilter) { int32_t delta; int16_t m2 = src[-offset * 2]; @@ -161,72 +170,119 @@ INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, k } } + /** - * \brief + * \brief Check wheter an edge is a TU boundary. + * + * \param state encoder state + * \param x x-coordinate of the scu in pixels + * \param y y-coordinate of the scu in pixels + * \param dir direction of the edge to check + * \return true, if the edge is a TU boundary, otherwise false */ -void kvz_filter_deblock_edge_luma(encoder_state_t * const state, - int32_t xpos, int32_t ypos, - int8_t depth, int8_t dir) +static bool is_tu_boundary(const encoder_state_t *const state, + int32_t x, + int32_t y, + edge_dir dir) +{ + const cu_info_t *const scu = kvz_videoframe_get_cu(state->tile->frame, + x >> MIN_SIZE, + y >> MIN_SIZE); + const int tu_width = LCU_WIDTH >> scu->tr_depth; + + if (dir == EDGE_HOR) { + return (y & (tu_width - 1)) == 0; + } else { + return (x & (tu_width - 1)) == 0; + } +} + + +/** + * \brief Check wheter an edge is aligned on a 8x8 grid. + * + * \param x x-coordinate of the edge + * \param y y-coordinate of the edge + * \param dir direction of the edge + * \return true, if the edge is aligned on a 8x8 grid, otherwise false + */ +static bool is_on_8x8_grid(int x, int y, edge_dir dir) +{ + if (dir == EDGE_HOR) { + return (y & 7) == 0; + } else { + return (x & 7) == 0; + } +} + +/** + * \brief Apply the deblocking filter to luma pixels on a single edge. + * + * The caller should check that the edge is a TU boundary or a PU boundary. + * + \verbatim + + .-- filter this edge if dir == EDGE_HOR + v + +--------+ + |o <-- pixel at (x, y) + | | + |<-- filter this edge if dir == EDGE_VER + | | + +--------+ + + \endverbatim + * + * \param state encoder state + * \param x x-coordinate in pixels (see above) + * \param y y-coordinate in pixels (see above) + * \param length length of the edge in pixels + * \param dir direction of the edge to filter + */ +static void filter_deblock_edge_luma(encoder_state_t * const state, + int32_t x, + int32_t y, + int32_t length, + edge_dir dir) { videoframe_t * const frame = state->tile->frame; const encoder_control_t * const encoder = state->encoder_control; - cu_info_t *cu_q = kvz_videoframe_get_cu(frame, xpos >> MIN_SIZE, ypos >> MIN_SIZE); - - { - // Return if called with a coordinate which is not at CU or TU boundary. - // TODO: Add handling for asymmetric inter CU boundaries which do not coincide - // with transform boundaries. - const int tu_width = LCU_WIDTH >> cu_q->tr_depth; - if (dir == EDGE_HOR && (ypos & (tu_width - 1))) return; - if (dir == EDGE_VER && (xpos & (tu_width - 1))) return; - } + cu_info_t *cu_q = kvz_videoframe_get_cu(frame, x >> MIN_SIZE, y >> MIN_SIZE); { int32_t stride = frame->rec->stride; - int32_t offset = stride; int32_t beta_offset_div2 = encoder->beta_offset_div2; int32_t tc_offset_div2 = encoder->tc_offset_div2; // TODO: support 10+bits - kvz_pixel *orig_src = &frame->rec->y[xpos + ypos*stride]; + kvz_pixel *orig_src = &frame->rec->y[x + y*stride]; kvz_pixel *src = orig_src; - int32_t step = 1; cu_info_t *cu_p = NULL; - int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE; - int8_t strength = 0; + int16_t x_cu = x >> MIN_SIZE; + int16_t y_cu = y >> MIN_SIZE; + int8_t strength = 0; int32_t qp = state->global->QP; int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8); int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1)); int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale; int32_t side_threshold = (beta + (beta >>1 )) >> 3; - uint32_t blocks_in_part = (LCU_WIDTH >> depth) / 4; - uint32_t block_idx; - int32_t tc_index,tc,thr_cut; + int32_t tc_index; + int32_t tc; + int32_t thr_cut; - if (dir == EDGE_VER) { - offset = 1; - step = stride; - } + uint32_t num_4px_parts = length / 4; + + const int32_t offset = (dir == EDGE_HOR) ? stride : 1; + const int32_t step = (dir == EDGE_HOR) ? 1 : stride; // TODO: add CU based QP calculation // For each 4-pixel part in the edge - for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) { + for (uint32_t block_idx = 0; block_idx < num_4px_parts; ++block_idx) { int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d; { - vector2d_t px = { - (dir == EDGE_HOR ? xpos + block_idx * 4 : xpos), - (dir == EDGE_VER ? ypos + block_idx * 4 : ypos) - }; - - // Don't deblock the last 4x4 block of the LCU. This will be deblocked - // when processing the next LCU. - if (block_idx > 0 && dir == EDGE_HOR && (px.x + 4) % 64 == 0 && (px.x + 4 != frame->width)) { - continue; - } - // CU in the side we are filtering, update every 8-pixels cu_p = kvz_videoframe_get_cu(frame, x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0), y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0)); @@ -341,53 +397,60 @@ void kvz_filter_deblock_edge_luma(encoder_state_t * const state, useStrongFiltering(offset, 2*d3, (src+step*(block_idx*4+3))); // Filter four rows/columns - kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 0), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q); - kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 1), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q); - kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 2), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q); - kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 3), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q); + for (int i = 0; i < 4; i++) { + kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + i), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q); + } } } } } /** - * \brief + * \brief Apply the deblocking filter to chroma pixels on a single edge. + * + * The caller should check that the edge is a TU boundary or a PU boundary. + * + \verbatim + + .-- filter this edge if dir == EDGE_HOR + v + +--------+ + |o <-- pixel at (x, y) + | | + |<-- filter this edge if dir == EDGE_VER + | | + +--------+ + + \endverbatim + * + * \param state encoder state + * \param x x-coordinate in chroma pixels (see above) + * \param y y-coordinate in chroma pixels (see above) + * \param length length of the edge in chroma pixels + * \param dir direction of the edge to filter */ -void kvz_filter_deblock_edge_chroma(encoder_state_t * const state, - int32_t x, int32_t y, - int8_t depth, int8_t dir) +static void filter_deblock_edge_chroma(encoder_state_t * const state, + int32_t x, + int32_t y, + int32_t length, + edge_dir dir) { const encoder_control_t * const encoder = state->encoder_control; const videoframe_t * const frame = state->tile->frame; const cu_info_t *cu_q = kvz_videoframe_get_cu_const(frame, x >> (MIN_SIZE - 1), y >> (MIN_SIZE - 1)); - - // Chroma edges that do not lay on a 8x8 grid are not deblocked. - if (depth >= MAX_DEPTH) { - if (dir == EDGE_HOR && (y & (8 - 1))) return; - if (dir == EDGE_VER && (x & (8 - 1))) return; - } - - { - // Return if called with a coordinate which is not at CU or TU boundary. - // TODO: Add handling for asymmetric inter CU boundaries which do not coincide - // with transform boundaries. - const int tu_width = (LCU_WIDTH / 2) >> cu_q->tr_depth; - if (dir == EDGE_HOR && (y & (tu_width - 1))) return; - if (dir == EDGE_VER && (x & (tu_width - 1))) return; - } // For each subpart { int32_t stride = frame->rec->stride >> 1; int32_t tc_offset_div2 = encoder->tc_offset_div2; // TODO: support 10+bits - kvz_pixel *src_u = &frame->rec->u[x + y*stride]; - kvz_pixel *src_v = &frame->rec->v[x + y*stride]; - // Init offset and step to EDGE_HOR - int32_t offset = stride; - int32_t step = 1; + kvz_pixel *src[] = { + &frame->rec->u[x + y*stride], + &frame->rec->v[x + y*stride], + }; const cu_info_t *cu_p = NULL; - int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1); + int16_t x_cu = x >> (MIN_SIZE-1); + int16_t y_cu = y >> (MIN_SIZE-1); int8_t strength = 2; int32_t QP = kvz_g_chroma_scale[state->global->QP]; @@ -395,42 +458,22 @@ void kvz_filter_deblock_edge_chroma(encoder_state_t * const state, int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale; - // Special handling for depth 4. It's meaning is that we want to bypass - // last block in LCU check in order to deblock just that block. - uint32_t blocks_in_part= (LCU_WIDTH>>(depth == 4 ? depth : depth + 1)) / 4; - uint32_t blk_idx; + const uint32_t num_4px_parts = length / 4; - if(dir == EDGE_VER) { - offset = 1; - step = stride; - } + const int32_t offset = (dir == EDGE_HOR) ? stride : 1; + const int32_t step = (dir == EDGE_HOR) ? 1 : stride; - for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx) + for (uint32_t blk_idx = 0; blk_idx < num_4px_parts; ++blk_idx) { - vector2d_t px = { - (dir == EDGE_HOR ? x + blk_idx * 4 : x), - (dir == EDGE_VER ? y + blk_idx * 4 : y) - }; cu_p = kvz_videoframe_get_cu_const(frame, x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0), y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0)); - // Don't deblock the last 4x4 block of the LCU. This will be deblocked - // when processing the next LCU. - if (depth != 4 && dir == EDGE_HOR && (px.x + 4) % 32 == 0 && (px.x + 4 != frame->width / 2)) { - continue; - } - // Only filter when strenght == 2 (one of the blocks is intra coded) if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { - // Chroma U - kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 0), offset, Tc, 0, 0); - kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 1), offset, Tc, 0, 0); - kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 2), offset, Tc, 0, 0); - kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 3), offset, Tc, 0, 0); - // Chroma V - kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 0), offset, Tc, 0, 0); - kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 1), offset, Tc, 0, 0); - kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 2), offset, Tc, 0, 0); - kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 3), offset, Tc, 0, 0); + for (int component = 0; component < 2; component++) { + for (int i = 0; i < 4; i++) { + kvz_filter_deblock_chroma(encoder, src[component] + step * (4*blk_idx + i), offset, Tc, 0, 0); + } + } } } } @@ -438,88 +481,154 @@ void kvz_filter_deblock_edge_chroma(encoder_state_t * const state, /** * \brief function to split LCU into smaller CU blocks - * \param encoder the encoder info structure - * \param xCtb block x-position (as SCU) - * \param yCtb block y-position (as SCU) - * \param depth block depth - * \param edge which edge we are filtering * - * This function takes (SCU) block position as input and splits the block - * until the coded block size has been achived. Calls luma and chroma filtering - * functions for each coded CU size. + * \param encoder the encoder info structure + * \param x_px block x-position in pixels + * \param y_px block y-position in pixels + * \param depth block depth + * \param dir direction of the edges to filter + * + * Recursively traverse the CU/TU quadtree. At the lowest level, apply the + * deblocking filter to the left edge (when dir == EDGE_VER) or the top edge + * (when dir == EDGE_HOR) as needed. Both luma and chroma are filtered. */ -void kvz_filter_deblock_cu(encoder_state_t * const state, int32_t x, int32_t y, int8_t depth, int32_t edge) +static void filter_deblock_cu(encoder_state_t * const state, + int32_t x, + int32_t y, + int8_t depth, + edge_dir dir) { const videoframe_t * const frame = state->tile->frame; - const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x, y); - uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0; - uint8_t tr_split = (cur_cu->tr_depth > depth) ? 1 : 0; - uint8_t border_x = (frame->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0; - uint8_t border_y = (frame->height < y*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0; - uint8_t border_split_x = (frame->width < ((x + 1) * (LCU_WIDTH >> MAX_DEPTH)) + (LCU_WIDTH >> (depth + 1))) ? 0 : 1; - uint8_t border_split_y = (frame->height < ((y + 1) * (LCU_WIDTH >> MAX_DEPTH)) + (LCU_WIDTH >> (depth + 1))) ? 0 : 1; + const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, + x >> MAX_DEPTH, + y >> MAX_DEPTH); - uint8_t border = border_x | border_y; // are we in any border CU? + const int cu_width = LCU_WIDTH >> depth; + const int half_cu_width = cu_width >> 1; + const int scu_width = LCU_WIDTH >> MAX_DEPTH; + const bool split_flag = cur_cu->depth > depth; + const bool tr_split = cur_cu->tr_depth > depth; + const bool border_x = frame->width < x + cu_width; + const bool border_y = frame->height < y + cu_width; + const bool border_split_x = frame->width >= x + scu_width + half_cu_width; + const bool border_split_y = frame->height >= y + scu_width + half_cu_width; + const bool border = border_x || border_y; // are we in any border CU? // split 64x64, on split flag and on border if (depth < MAX_DEPTH && (depth == 0 || split_flag || border || tr_split)) { // Split the four sub-blocks of this block recursively. - uint8_t change; - assert(depth >= 0); // for clang-analyzer - change = 1 << (MAX_DEPTH - 1 - depth); + const int32_t x2 = x + half_cu_width; + const int32_t y2 = y + half_cu_width; - kvz_filter_deblock_cu(state, x, y, depth + 1, edge); - if(!border_x || border_split_x) { - kvz_filter_deblock_cu(state, x + change, y, depth + 1, edge); + filter_deblock_cu(state, x, y, depth + 1, dir); + if (!border_x || border_split_x) { + filter_deblock_cu(state, x2, y, depth + 1, dir); } - if(!border_y || border_split_y) { - kvz_filter_deblock_cu(state, x , y + change, depth + 1, edge); + if (!border_y || border_split_y) { + filter_deblock_cu(state, x, y2, depth + 1, dir); } - if((!border_x && !border_y) || (border_split_x && border_split_y)) { - kvz_filter_deblock_cu(state, x + change, y + change, depth + 1, edge); + if (!border || (border_split_x && border_split_y)) { + filter_deblock_cu(state, x2, y2, depth + 1, dir); } return; } // no filtering on borders (where filter would use pixels outside the picture) - if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return; + if ((x == 0 && dir == EDGE_VER) || (y == 0 && dir == EDGE_HOR)) return; // do the filtering for block edge - kvz_filter_deblock_edge_luma(state, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge); - kvz_filter_deblock_edge_chroma(state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge); + if (is_tu_boundary(state, x, y, dir)) { + // Length of luma and chroma edges. + int32_t length; + int32_t length_c; + + const int32_t x_right = x + cu_width; + const bool rightmost_4px_of_lcu = x_right % LCU_WIDTH == 0; + const bool rightmost_4px_of_frame = x_right == frame->width; + + if (dir == EDGE_HOR && + rightmost_4px_of_lcu && + !rightmost_4px_of_frame) { + // The last 4 pixels will be deblocked when processing the next LCU. + length = cu_width - 4; + length_c = half_cu_width - 4; + + } else { + length = cu_width; + length_c = half_cu_width; + } + + filter_deblock_edge_luma(state, x, y, length, dir); + + // Chroma pixel coordinates. + const int32_t x_c = x >> 1; + const int32_t y_c = y >> 1; + if (is_on_8x8_grid(x_c, y_c, dir)) { + filter_deblock_edge_chroma(state, x_c, y_c, length_c, dir); + } + } } /** * \brief Deblock a single LCU without using data from right or down. * - * Filter all the following edges: - * - All edges within the LCU, except for the last 4 pixels on the right when - * using horizontal filtering. - * - Left edge and top edge. - * - After vertical filtering the left edge, filter the last 4 pixels of - * horizontal edges in the LCU to the left. + * Filter the following vertical edges (horizontal filtering): + * 1. The left edge of the LCU. + * 2. All vertical edges within the LCU. + * + * Filter the following horizontal edges (vertical filtering): + * 1. The rightmost 4 pixels of the top edge of the LCU to the left. + * 2. The rightmost 4 pixels of all horizontal edges within the LCU to the + * left. + * 3. The top edge and all horizontal edges within the LCU, excluding the + * rightmost 4 pixels. If the LCU is the rightmost LCU of the frame, the + * last 4 pixels are also filtered. + * + * What is not filtered: + * - The rightmost 4 pixels of the top edge and all horizontal edges within + * the LCU, unless the LCU is the rightmost LCU of the frame. + * - The bottom edge of the LCU. + * - The right edge of the LCU. + * + * \param state encoder state + * \param x_px x-coordinate of the left edge of the LCU in pixels + * \param y_px y-coordinate of the top edge of the LCU in pixels */ void kvz_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px) { const vector2d_t lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH }; - kvz_filter_deblock_cu(state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER); + filter_deblock_cu(state, x_px, y_px, 0, EDGE_VER); + + assert(x_px == lcu.x * LCU_WIDTH); + assert(y_px == lcu.y * LCU_WIDTH); // Filter rightmost 4 pixels from last LCU now that they have been // finally deblocked vertically. - if (lcu.x > 0) { - int y; - for (y = 0; y < 64; y += 8) { - if (lcu.y + y == 0) continue; - kvz_filter_deblock_edge_luma(state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR); + if (x_px > 0) { + // Luma + const int x = x_px - 4; + const int end = MIN(y_px + LCU_WIDTH, state->tile->frame->height); + for (int y = y_px; y < end; y += 8) { + // The top edge of the whole frame is not filtered. + if (y > 0 && is_tu_boundary(state, x, y, EDGE_HOR)) { + filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR); + } } - for (y = 0; y < 32; y += 8) { - if (lcu.y + y == 0) continue; - kvz_filter_deblock_edge_chroma(state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR); + + // Chroma + const int x_px_c = x_px >> 1; + const int y_px_c = y_px >> 1; + const int x_c = x_px_c - 4; + const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1); + for (int y_c = y_px_c; y_c < end_c; y_c += 8) { + // The top edge of the whole frame is not filtered. + if (y_c > 0 && is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR)) { + filter_deblock_edge_chroma(state, x_c, y_c, 4, EDGE_HOR); + } } } - kvz_filter_deblock_cu(state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR); + filter_deblock_cu(state, x_px, y_px, 0, EDGE_HOR); } - diff --git a/src/filter.h b/src/filter.h index fdf30356..50906441 100644 --- a/src/filter.h +++ b/src/filter.h @@ -31,30 +31,15 @@ #include "encoderstate.h" -////////////////////////////////////////////////////////////////////////// -// FUNCTIONS -// Deblocking -void kvz_filter_deblock_cu(encoder_state_t *state, int32_t x_px, int32_t y_px, - int8_t depth, int32_t edge); -void kvz_filter_deblock_edge_luma(encoder_state_t *state, - int32_t x_pos, int32_t y_pos, - int8_t depth, int8_t dir); -void kvz_filter_deblock_edge_chroma(encoder_state_t *state, - int32_t xpos, int32_t ypos, - int8_t depth, int8_t dir); +/** + * \brief Edge direction. + */ +typedef enum edge_dir { + EDGE_VER = 0, // vertical + EDGE_HOR = 1, // horizontal +} edge_dir; + + void kvz_filter_deblock_lcu(encoder_state_t *state, int x_px, int y_px); -void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc , int8_t sw, - int8_t part_p_nofilter, int8_t part_q_nofilter, - int32_t thr_cut, - int8_t filter_second_p, int8_t filter_second_q); -void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc, - int8_t part_p_nofilter, int8_t part_q_nofilter); - -// SAO - -////////////////////////////////////////////////////////////////////////// -// MACROS -#define EDGE_VER 0 -#define EDGE_HOR 1 #endif diff --git a/src/inter.c b/src/inter.c index cd26486f..e9c2d62f 100644 --- a/src/inter.c +++ b/src/inter.c @@ -395,15 +395,12 @@ void kvz_inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_p * \param cu coding unit to clear */ static void inter_clear_cu_unused(cu_info_t* cu) { - if(!(cu->inter.mv_dir & 1)) { - cu->inter.mv[0][0] = 0; - cu->inter.mv[0][1] = 0; - cu->inter.mv_ref[0] = 255; - } - if(!(cu->inter.mv_dir & 2)) { - cu->inter.mv[1][0] = 0; - cu->inter.mv[1][1] = 0; - cu->inter.mv_ref[1] = 255; + for (unsigned i = 0; i < 2; ++i) { + if (cu->inter.mv_dir & (1 << i)) continue; + + cu->inter.mv[i][0] = 0; + cu->inter.mv[i][1] = 0; + cu->inter.mv_ref[i] = 255; } } @@ -433,17 +430,16 @@ void kvz_inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, |A1|_________| |A0| */ - int32_t x_cu = (x & (LCU_WIDTH - 1)) >> MAX_DEPTH; //!< coordinates from top-left of this LCU - int32_t y_cu = (y & (LCU_WIDTH - 1)) >> MAX_DEPTH; - cu_info_t* cu = &lcu->cu[LCU_CU_OFFSET]; + int32_t x_cu = SUB_SCU(x) >> MAX_DEPTH; //!< coordinates from top-left of this LCU + int32_t y_cu = SUB_SCU(y) >> MAX_DEPTH; // A0 and A1 availability testing if (x != 0) { - *a1 = &cu[x_cu - 1 + (y_cu + cur_block_in_scu - 1) * LCU_T_CU_WIDTH]; + *a1 = LCU_GET_CU(lcu, x_cu - 1, y_cu + cur_block_in_scu - 1); if (!(*a1)->coded) *a1 = NULL; if(*a1) inter_clear_cu_unused(*a1); if (y_cu + cur_block_in_scu < LCU_WIDTH>>3) { - *a0 = &cu[x_cu - 1 + (y_cu + cur_block_in_scu) * LCU_T_CU_WIDTH]; + *a0 = LCU_GET_CU(lcu, x_cu - 1, y_cu + cur_block_in_scu); if (!(*a0)->coded) *a0 = NULL; } if(*a0) inter_clear_cu_unused(*a0); @@ -452,21 +448,21 @@ void kvz_inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, // B0, B1 and B2 availability testing if (y != 0) { if (x_cu + cur_block_in_scu < LCU_WIDTH>>3) { - *b0 = &cu[x_cu + cur_block_in_scu + (y_cu - 1) * LCU_T_CU_WIDTH]; + *b0 = LCU_GET_CU(lcu, x_cu + cur_block_in_scu, y_cu - 1); if (!(*b0)->coded) *b0 = NULL; } else if(y_cu == 0) { - // Special case, top-right cu from LCU is the last in lcu->cu array - *b0 = &lcu->cu[LCU_T_CU_WIDTH*LCU_T_CU_WIDTH]; + // Special case, top-right CU + *b0 = LCU_GET_TOP_RIGHT_CU(lcu); if (!(*b0)->coded) *b0 = NULL; } if(*b0) inter_clear_cu_unused(*b0); - *b1 = &cu[x_cu + cur_block_in_scu - 1 + (y_cu - 1) * LCU_T_CU_WIDTH]; + *b1 = LCU_GET_CU(lcu, x_cu + cur_block_in_scu - 1, y_cu - 1); if (!(*b1)->coded) *b1 = NULL; if(*b1) inter_clear_cu_unused(*b1); if (x != 0) { - *b2 = &cu[x_cu - 1 + (y_cu - 1) * LCU_T_CU_WIDTH]; + *b2 = LCU_GET_CU(lcu, x_cu - 1, y_cu - 1); if(!(*b2)->coded) *b2 = NULL; } if(*b2) inter_clear_cu_unused(*b2); diff --git a/src/intra.c b/src/intra.c index 0e542890..87a6f52f 100644 --- a/src/intra.c +++ b/src/intra.c @@ -485,9 +485,9 @@ void kvz_intra_recon_lcu_luma( cu_info_t *cur_cu, lcu_t *lcu) { - const vector2d_t lcu_px = { x & 0x3f, y & 0x3f }; + const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; if (cur_cu == NULL) { - cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; + cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } const int8_t width = LCU_WIDTH >> depth; @@ -500,9 +500,9 @@ void kvz_intra_recon_lcu_luma( kvz_intra_recon_lcu_luma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu); if (depth < MAX_DEPTH) { - cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH]; - cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; - cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; + cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y); + cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset); + cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset); if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) { cbf_set(&cur_cu->cbf.y, depth); } @@ -537,12 +537,12 @@ void kvz_intra_recon_lcu_chroma( cu_info_t *cur_cu, lcu_t *lcu) { - const vector2d_t lcu_px = { x & 0x3f, y & 0x3f }; + const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH >> depth; const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); if (cur_cu == NULL) { - cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; + cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } if (depth == 0 || cur_cu->tr_depth > depth) { @@ -554,9 +554,9 @@ void kvz_intra_recon_lcu_chroma( kvz_intra_recon_lcu_chroma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu); if (depth < MAX_DEPTH) { - cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH]; - cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; - cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; + cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y); + cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset); + cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset); if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) { cbf_set(&cur_cu->cbf.u, depth); } diff --git a/src/search.c b/src/search.c index ac349cb3..f21dd8ab 100644 --- a/src/search.c +++ b/src/search.c @@ -82,8 +82,8 @@ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX int x, y; for (y = y_cu; y < y_cu + width_cu; ++y) { for (x = x_cu; x < x_cu + width_cu; ++x) { - const cu_info_t *from_cu = &work_tree[depth + 1].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH]; - cu_info_t *to_cu = &work_tree[depth].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH]; + const cu_info_t *from_cu = LCU_GET_CU(&work_tree[depth + 1], x, y); + cu_info_t *to_cu = LCU_GET_CU(&work_tree[depth], x, y); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } @@ -142,8 +142,8 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M int x, y; for (y = y_cu; y < y_cu + width_cu; ++y) { for (x = x_cu; x < x_cu + width_cu; ++x) { - const cu_info_t *from_cu = &work_tree[depth].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH]; - cu_info_t *to_cu = &work_tree[d].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH]; + const cu_info_t *from_cu = LCU_GET_CU(&work_tree[depth], x, y); + cu_info_t *to_cu = LCU_GET_CU(&work_tree[d], x, y); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } @@ -173,16 +173,15 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth) { const int width_cu = LCU_CU_WIDTH >> depth; - const vector2d_t lcu_cu = { (x_px & (LCU_WIDTH - 1)) / 8, (y_px & (LCU_WIDTH - 1)) / 8 }; - cu_info_t *const cur_cu = &lcu->cu[lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH + LCU_CU_OFFSET]; + const vector2d_t lcu_cu = { SUB_SCU(x_px) / 8, SUB_SCU(y_px) / 8 }; int x, y; // Depth 4 doesn't go inside the loop. Set the top-left CU. - cur_cu->tr_depth = tr_depth; + LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth; for (y = 0; y < width_cu; ++y) { for (x = 0; x < width_cu; ++x) { - cu_info_t *cu = &cur_cu[x + y * LCU_T_CU_WIDTH]; + cu_info_t *cu = LCU_GET_CU(lcu, lcu_cu.x + x, lcu_cu.y + y); cu->tr_depth = tr_depth; } } @@ -194,12 +193,11 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr const int width_cu = LCU_CU_WIDTH >> depth; const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH; const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH; - cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET]; int x, y; // NxN can only be applied to a single CU at a time. if (part_mode == SIZE_NxN) { - cu_info_t *cu = &lcu_cu[x_cu + y_cu * LCU_T_CU_WIDTH]; + cu_info_t *cu = LCU_GET_CU(lcu, x_cu, y_cu); cu->depth = MAX_DEPTH; cu->type = CU_INTRA; cu->intra[PU_INDEX(x_px / 4, y_px / 4)].mode = pred_mode; @@ -211,7 +209,7 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr // Set mode in every CU covered by part_mode in this depth. for (y = y_cu; y < y_cu + width_cu; ++y) { for (x = x_cu; x < x_cu + width_cu; ++x) { - cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH]; + cu_info_t *cu = LCU_GET_CU(lcu, x, y); cu->depth = depth; cu->type = CU_INTRA; cu->intra[0].mode = pred_mode; @@ -231,12 +229,11 @@ static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t * const int width_cu = LCU_CU_WIDTH >> depth; const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH; const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH; - cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET]; int x, y; // Set mode in every CU covered by part_mode in this depth. for (y = y_cu; y < y_cu + width_cu; ++y) { for (x = x_cu; x < x_cu + width_cu; ++x) { - cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH]; + cu_info_t *cu = LCU_GET_CU(lcu, x, y); //Check if this could be moved inside the if cu->coded = 1; if (cu != cur_cu) { @@ -257,17 +254,16 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t * const int width_cu = LCU_CU_WIDTH >> depth; const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH; const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH; - cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET]; int x, y; int tr_split = cur_cu->tr_depth-cur_cu->depth; // Set coeff flags in every CU covered by part_mode in this depth. for (y = y_cu; y < y_cu + width_cu; ++y) { for (x = x_cu; x < x_cu + width_cu; ++x) { - cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH]; + cu_info_t *cu = LCU_GET_CU(lcu, x, y); // Use TU top-left CU to propagate coeff flags uint32_t mask = ~((width_cu>>tr_split)-1); - cu_info_t *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH]; + cu_info_t *cu_from = LCU_GET_CU(lcu, x & mask, y & mask); if (cu != cu_from) { // Chroma coeff data is not used, luma is needed for deblocking cu->cbf.y = cu_from->cbf.y; @@ -295,7 +291,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const uint8_t pu_index = PU_INDEX(x_px / 4, y_px / 4); // cur_cu is used for TU parameters. - cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (x_px / 8) + (y_px / 8) * LCU_T_CU_WIDTH]; + cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); double coeff_bits = 0; double tr_tree_bits = 0; @@ -368,7 +364,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, { const vector2d_t lcu_px = { x_px / 2, y_px / 2 }; const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; - cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x / 4) + (lcu_px.y / 4)*LCU_T_CU_WIDTH]; + cu_info_t *const tr_cu = LCU_GET_CU(lcu, lcu_px.x / 4, lcu_px.y / 4); double tr_tree_bits = 0; double coeff_bits = 0; @@ -450,8 +446,8 @@ static double calc_mode_bits(const encoder_state_t *state, } else { int8_t candidate_modes[3]; { - const cu_info_t *left_cu = ((x > 8) ? &cur_cu[-1] : NULL); - const cu_info_t *above_cu = ((y > 8) ? &cur_cu[-LCU_T_CU_WIDTH] : NULL); + const cu_info_t *left_cu = ((x > 8) ? CU_GET_CU(cur_cu, -1, 0) : NULL); + const cu_info_t *above_cu = ((y > 8) ? CU_GET_CU(cur_cu, 0, -1) : NULL); kvz_intra_get_dir_luma_predictor(x, y, candidate_modes, cur_cu, left_cu, above_cu); } @@ -467,10 +463,9 @@ static double calc_mode_bits(const encoder_state_t *state, static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth) { - vector2d_t lcu_cu = { (x & 0x3f) / 8, (y & 0x3f) / 8 }; - const cu_info_t *cu_array = &(lcu)->cu[LCU_CU_OFFSET]; - bool condA = x >= 8 && cu_array[(lcu_cu.x - 1) + lcu_cu.y * LCU_T_CU_WIDTH].depth > depth; - bool condL = y >= 8 && cu_array[lcu_cu.x + (lcu_cu.y - 1) * LCU_T_CU_WIDTH].depth > depth; + vector2d_t lcu_cu = { SUB_SCU(x) / 8, SUB_SCU(y) / 8 }; + bool condA = x >= 8 && LCU_GET_CU(lcu, lcu_cu.x - 1, lcu_cu.y )->depth > depth; + bool condL = y >= 8 && LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y - 1)->depth > depth; return condA + condL; } @@ -494,7 +489,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, lcu_t *const lcu = &work_tree[depth]; - int x_local = (x&0x3f), y_local = (y&0x3f); + int x_local = SUB_SCU(x); + int y_local = SUB_SCU(y); #ifdef KVZ_DEBUG int debug_split = 0; #endif @@ -506,7 +502,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, return 0; } - cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; + cur_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x_local, y_local); // Assign correct depth cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth; cur_cu->tr_depth = depth > 0 ? depth : 1; @@ -647,8 +643,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, && x + cu_width <= frame->width && y + cu_width <= frame->height) { vector2d_t lcu_cu = { x_local / 8, y_local / 8 }; - cu_info_t *cu_array_d1 = &(&work_tree[depth + 1])->cu[LCU_CU_OFFSET]; - cu_info_t *cu_d1 = &cu_array_d1[(lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH)]; + cu_info_t *cu_d1 = LCU_GET_CU(&work_tree[depth + 1], lcu_cu.x, lcu_cu.y); // If the best CU in depth+1 is intra and the biggest it can be, try it. if (cu_d1->type == CU_INTRA && cu_d1->depth == depth + 1) { @@ -715,16 +710,12 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i const int x_cu = x >> MAX_DEPTH; const int y_cu = y >> MAX_DEPTH; - // Use top-left sub-cu of LCU as pointer to lcu->cu array to make things - // simpler. - cu_info_t *lcu_cu = &lcu->cu[LCU_CU_OFFSET]; - // Copy top CU row. if (y_cu > 0) { int i; for (i = 0; i < LCU_CU_WIDTH; ++i) { const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu + i, y_cu - 1); - cu_info_t *to_cu = &lcu_cu[i - LCU_T_CU_WIDTH]; + cu_info_t *to_cu = LCU_GET_CU(lcu, i, -1); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } @@ -733,21 +724,21 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i int i; for (i = 0; i < LCU_CU_WIDTH; ++i) { const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu - 1, y_cu + i); - cu_info_t *to_cu = &lcu_cu[-1 + i * LCU_T_CU_WIDTH]; + cu_info_t *to_cu = LCU_GET_CU(lcu, -1, i); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } // Copy top-left CU. if (x_cu > 0 && y_cu > 0) { const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu - 1, y_cu - 1); - cu_info_t *to_cu = &lcu_cu[-1 - LCU_T_CU_WIDTH]; + cu_info_t *to_cu = LCU_GET_CU(lcu, -1, -1); memcpy(to_cu, from_cu, sizeof(*to_cu)); } // Copy top-right CU. if (y_cu > 0 && x + LCU_WIDTH < frame->width) { const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu + LCU_CU_WIDTH, y_cu - 1); - cu_info_t *to_cu = &lcu->cu[LCU_T_CU_WIDTH*LCU_T_CU_WIDTH]; + cu_info_t *to_cu = LCU_GET_TOP_RIGHT_CU(lcu); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } @@ -806,14 +797,10 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i const int y_cu = y_px >> MAX_DEPTH; videoframe_t * const frame = state->tile->frame; - // Use top-left sub-cu of LCU as pointer to lcu->cu array to make things - // simpler. - const cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET]; - int x, y; for (y = 0; y < LCU_CU_WIDTH; ++y) { for (x = 0; x < LCU_CU_WIDTH; ++x) { - const cu_info_t *from_cu = &lcu_cu[x + y * LCU_T_CU_WIDTH]; + const cu_info_t *from_cu = LCU_GET_CU(lcu, x, y); cu_info_t *to_cu = kvz_videoframe_get_cu(frame, x_cu + x, y_cu + y); memcpy(to_cu, from_cu, sizeof(*to_cu)); } diff --git a/src/search_inter.c b/src/search_inter.c index dd04259e..92ec6f13 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -951,6 +951,127 @@ static unsigned search_frac(const encoder_state_t * const state, } +/** + * \brief Perform inter search for a single reference frame. + */ +static void search_cu_inter_ref(const encoder_state_t * const state, + int x, int y, int depth, + lcu_t *lcu, cu_info_t *cur_cu, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + unsigned ref_idx, + uint32_t(*get_mvd_cost)(vector2d_t *, cabac_data_t*)) +{ + const int x_cu = x >> 3; + const int y_cu = y >> 3; + const videoframe_t * const frame = state->tile->frame; + kvz_picture *ref_image = state->global->ref->images[ref_idx]; + uint32_t temp_bitcost = 0; + uint32_t temp_cost = 0; + vector2d_t orig, mvd; + int32_t merged = 0; + uint8_t cu_mv_cand = 0; + int8_t merge_idx = 0; + int8_t ref_list = state->global->refmap[ref_idx].list-1; + int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list]; + orig.x = x_cu * CU_MIN_SIZE_PIXELS; + orig.y = y_cu * CU_MIN_SIZE_PIXELS; + // Get MV candidates + cur_cu->inter.mv_ref[ref_list] = ref_idx; + kvz_inter_get_mv_cand(state, x, y, depth, mv_cand, cur_cu, lcu, ref_list); + cur_cu->inter.mv_ref[ref_list] = temp_ref_idx; + + + vector2d_t mv = { 0, 0 }; + { + // Take starting point for MV search from previous frame. + // When temporal motion vector candidates are added, there is probably + // no point to this anymore, but for now it helps. + int mid_x_cu = (x + (LCU_WIDTH >> (depth+1))) / 8; + int mid_y_cu = (y + (LCU_WIDTH >> (depth+1))) / 8; + cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)]; + if (ref_cu->type == CU_INTER) { + if (ref_cu->inter.mv_dir & 1) { + mv.x = ref_cu->inter.mv[0][0]; + mv.y = ref_cu->inter.mv[0][1]; + } else { + mv.x = ref_cu->inter.mv[1][0]; + mv.y = ref_cu->inter.mv[1][1]; + } + } + } + +#if SEARCH_MV_FULL_RADIUS + temp_cost += search_mv_full(depth, frame, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); +#else + switch (state->encoder_control->cfg->ime_algorithm) { + case KVZ_IME_TZ: + temp_cost += tz_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); + break; + + default: + temp_cost += hexagon_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); + break; + } +#endif + if (state->encoder_control->cfg->fme_level > 0) { + temp_cost = search_frac(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); + } + + merged = 0; + // Check every candidate to find a match + for(merge_idx = 0; merge_idx < num_cand; merge_idx++) { + if (merge_cand[merge_idx].dir != 3 && + merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x && + merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y && + (uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) { + merged = 1; + break; + } + } + + // Only check when candidates are different + if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) { + vector2d_t mvd_temp1, mvd_temp2; + int cand1_cost,cand2_cost; + + mvd_temp1.x = mv.x - mv_cand[0][0]; + mvd_temp1.y = mv.y - mv_cand[0][1]; + cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac); + + mvd_temp2.x = mv.x - mv_cand[1][0]; + mvd_temp2.y = mv.y - mv_cand[1][1]; + cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac); + + // Select candidate 1 if it has lower cost + if (cand2_cost < cand1_cost) { + cu_mv_cand = 1; + } + } + mvd.x = mv.x - mv_cand[cu_mv_cand][0]; + mvd.y = mv.y - mv_cand[cu_mv_cand][1]; + + if(temp_cost < cur_cu->inter.cost) { + + // Map reference index to L0/L1 pictures + cur_cu->inter.mv_dir = ref_list+1; + cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx; + + cur_cu->merged = merged; + cur_cu->merge_idx = merge_idx; + cur_cu->inter.mv_ref[ref_list] = ref_idx; + cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x; + cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y; + cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x; + cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y; + cur_cu->inter.cost = temp_cost; + cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list]; + cur_cu->inter.mv_cand[ref_list] = cu_mv_cand; + } +} + + /** * Update lcu to have best modes at this depth. * \return Cost of best mode. @@ -959,12 +1080,9 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d { const videoframe_t * const frame = state->tile->frame; uint32_t ref_idx = 0; - int x_local = (x&0x3f), y_local = (y&0x3f); - int x_cu = x>>3; - int y_cu = y>>3; - int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH; - - cu_info_t *cur_cu = &lcu->cu[cu_pos]; + int x_local = SUB_SCU(x); + int y_local = SUB_SCU(y); + cu_info_t *cur_cu = LCU_GET_CU(lcu, x_local >> 3, y_local >> 3); int16_t mv_cand[2][2]; // Search for merge mode candidate @@ -991,108 +1109,12 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d cur_cu->inter.cost = UINT_MAX; for (ref_idx = 0; ref_idx < state->global->ref->used_size; ref_idx++) { - kvz_picture *ref_image = state->global->ref->images[ref_idx]; - uint32_t temp_bitcost = 0; - uint32_t temp_cost = 0; - vector2d_t orig, mvd; - int32_t merged = 0; - uint8_t cu_mv_cand = 0; - int8_t merge_idx = 0; - int8_t ref_list = state->global->refmap[ref_idx].list-1; - int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list]; - orig.x = x_cu * CU_MIN_SIZE_PIXELS; - orig.y = y_cu * CU_MIN_SIZE_PIXELS; - // Get MV candidates - cur_cu->inter.mv_ref[ref_list] = ref_idx; - kvz_inter_get_mv_cand(state, x, y, depth, mv_cand, cur_cu, lcu, ref_list); - cur_cu->inter.mv_ref[ref_list] = temp_ref_idx; - - vector2d_t mv = { 0, 0 }; - { - // Take starting point for MV search from previous frame. - // When temporal motion vector candidates are added, there is probably - // no point to this anymore, but for now it helps. - int mid_x_cu = (x + (LCU_WIDTH >> (depth+1))) / 8; - int mid_y_cu = (y + (LCU_WIDTH >> (depth+1))) / 8; - cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)]; - if (ref_cu->type == CU_INTER) { - if (ref_cu->inter.mv_dir & 1) { - mv.x = ref_cu->inter.mv[0][0]; - mv.y = ref_cu->inter.mv[0][1]; - } else { - mv.x = ref_cu->inter.mv[1][0]; - mv.y = ref_cu->inter.mv[1][1]; - } - } - } - -#if SEARCH_MV_FULL_RADIUS - temp_cost += search_mv_full(depth, frame, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); -#else - switch (state->encoder_control->cfg->ime_algorithm) { - case KVZ_IME_TZ: - temp_cost += tz_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); - break; - - default: - temp_cost += hexagon_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); - break; - } -#endif - if (state->encoder_control->cfg->fme_level > 0) { - temp_cost = search_frac(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); - } - - merged = 0; - // Check every candidate to find a match - for(merge_idx = 0; merge_idx < num_cand; merge_idx++) { - if (merge_cand[merge_idx].dir != 3 && - merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x && - merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y && - (uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) { - merged = 1; - break; - } - } - - // Only check when candidates are different - if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) { - vector2d_t mvd_temp1, mvd_temp2; - int cand1_cost,cand2_cost; - - mvd_temp1.x = mv.x - mv_cand[0][0]; - mvd_temp1.y = mv.y - mv_cand[0][1]; - cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac); - - mvd_temp2.x = mv.x - mv_cand[1][0]; - mvd_temp2.y = mv.y - mv_cand[1][1]; - cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac); - - // Select candidate 1 if it has lower cost - if (cand2_cost < cand1_cost) { - cu_mv_cand = 1; - } - } - mvd.x = mv.x - mv_cand[cu_mv_cand][0]; - mvd.y = mv.y - mv_cand[cu_mv_cand][1]; - - if(temp_cost < cur_cu->inter.cost) { - - // Map reference index to L0/L1 pictures - cur_cu->inter.mv_dir = ref_list+1; - cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx; - - cur_cu->merged = merged; - cur_cu->merge_idx = merge_idx; - cur_cu->inter.mv_ref[ref_list] = ref_idx; - cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x; - cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y; - cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x; - cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y; - cur_cu->inter.cost = temp_cost; - cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list]; - cur_cu->inter.mv_cand[ref_list] = cu_mv_cand; - } + search_cu_inter_ref(state, + x, y, depth, + lcu, cur_cu, + mv_cand, merge_cand, num_cand, + ref_idx, + get_mvd_cost); } // Search bi-pred positions @@ -1155,7 +1177,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d for (int ypos = 0; ypos < LCU_WIDTH >> depth; ++ypos) { int dst_y = ypos*(LCU_WIDTH >> depth); for (int xpos = 0; xpos < (LCU_WIDTH >> depth); ++xpos) { - tmp_block[dst_y + xpos] = templcu->rec.y[((y + ypos)&(LCU_WIDTH - 1))*LCU_WIDTH + ((x + xpos)&(LCU_WIDTH - 1))]; + tmp_block[dst_y + xpos] = templcu->rec.y[ + SUB_SCU(y + ypos) * LCU_WIDTH + SUB_SCU(x + xpos)]; tmp_pic[dst_y + xpos] = frame->source->y[x + xpos + (y + ypos)*frame->source->width]; } } diff --git a/src/search_intra.c b/src/search_intra.c index 09efa9b2..45790ef9 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -145,8 +145,8 @@ static double search_intra_trdepth(encoder_state_t * const state, const int width_c = width > TR_MIN_WIDTH ? width / 2 : width; const int offset = width / 2; - const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; - cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; + const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; + cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); const bool reconstruct_chroma = !(x_px & 4 || y_px & 4); @@ -609,8 +609,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, const bool reconstruct_chroma = !(x_px & 4 || y_px & 4); if (reconstruct_chroma) { - const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; - cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; + const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; + cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); struct { double cost; @@ -645,11 +645,10 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, const int x_px, const int y_px, const int depth, lcu_t *lcu) { - const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; + const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; - const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH; - cu_info_t *cur_cu = &lcu->cu[cu_index]; + cu_info_t *cur_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y); int8_t intra_mode = cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].mode; double costs[5]; @@ -710,13 +709,12 @@ double kvz_search_cu_intra(encoder_state_t * const state, const int x_px, const int y_px, const int depth, lcu_t *lcu) { - const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; + const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const int8_t cu_width = (LCU_WIDTH >> (depth)); - const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH; const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth; - cu_info_t *cur_cu = &lcu->cu[cu_index]; + cu_info_t *cur_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y); kvz_intra_references refs; @@ -728,10 +726,10 @@ double kvz_search_cu_intra(encoder_state_t * const state, // Select left and top CUs if they are available. // Top CU is not available across LCU boundary. if ((x_px >> 3) > 0) { - left_cu = &lcu->cu[cu_index - 1]; + left_cu = LCU_GET_CU(lcu, lcu_cu.x - 1, lcu_cu.y); } if ((y_px >> 3) > 0 && lcu_cu.y != 0) { - above_cu = &lcu->cu[cu_index - LCU_T_CU_WIDTH]; + above_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y - 1); } kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); diff --git a/src/transform.c b/src/transform.c index 954ead17..d5c62b88 100644 --- a/src/transform.c +++ b/src/transform.c @@ -220,10 +220,10 @@ int kvz_quantize_residual_trskip( void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu) { // we have 64>>depth transform size - const vector2d_t lcu_px = {x & 0x3f, y & 0x3f}; + const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4); if (cur_cu == NULL) { - cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; + cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } const int8_t width = LCU_WIDTH>>depth; @@ -241,9 +241,9 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in // Propagate coded block flags from child CUs to parent CU. if (depth < MAX_DEPTH) { - cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH]; - cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; - cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; + cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y); + cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset); + cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset); if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) { cbf_set(&cur_cu->cbf.y, depth); } @@ -304,11 +304,11 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu) { // we have 64>>depth transform size - const vector2d_t lcu_px = {x & 0x3f, y & 0x3f}; + const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4); const int8_t width = LCU_WIDTH>>depth; if (cur_cu == NULL) { - cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH]; + cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } // Tell clang-analyzer what is up. For some reason it can't figure out from @@ -325,9 +325,9 @@ void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, // Propagate coded block flags from child CUs to parent CU. if (depth < MAX_DEPTH) { - cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH]; - cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; - cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH]; + cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y); + cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset); + cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset); if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) { cbf_set(&cur_cu->cbf.u, depth); }