Merge branch 'refactor-stuff'

This commit is contained in:
Arttu Ylä-Outinen 2015-11-23 09:47:43 +02:00
commit ec7c84f7e0
9 changed files with 537 additions and 378 deletions

View file

@ -106,10 +106,15 @@ typedef struct {
cu_array_t * kvz_cu_array_alloc(int width_in_scu, int height_in_scu);
int kvz_cu_array_free(cu_array_t *cua);
#define SUB_SCU_BIT_MASK (64 - 1)
#define SUB_SCU(xy) (xy & SUB_SCU_BIT_MASK)
/**
* \brief Return the 7 lowest-order bits of the pixel coordinate.
*
* The 7 lower-order bits correspond to the distance from the left or top edge
* of the containing LCU.
*/
#define SUB_SCU(xy) ((xy) & (LCU_WIDTH - 1))
#define LCU_CU_WIDTH 8
#define LCU_T_CU_WIDTH 9
#define LCU_CU_OFFSET 10
@ -153,10 +158,66 @@ typedef struct {
* - Left reference CUs on column 0.
* - All of LCUs CUs on 1:9, 1:9.
* - Top right reference CU on the last slot.
*
\verbatim
.-- left reference CUs
v
0 | 1 2 3 4 5 6 7 8 | 81 <-- top reference CUs
----+-------------------------+----
9 | 10 11 12 13 14 15 16 17 |
18 | 19 20 21 22 23 24 25 26 <-- this LCU
27 | 28 29 30 31 32 33 34 35 |
36 | 37 38 39 40 41 42 43 44 |
45 | 46 47 48 49 50 51 52 53 |
54 | 55 56 57 58 59 60 61 62 |
63 | 64 65 66 67 68 69 70 71 |
72 | 73 74 75 76 77 78 79 80 |
----+-------------------------+----
\endverbatim
*/
cu_info_t cu[9*9+1];
} lcu_t;
/**
* \brief Return pointer to a given CU.
*
* \param lcu pointer to the containing LCU
* \param x_cu x-index of the CU
* \param y_cu y-index of the CU
* \return pointer to the CU
*/
#define LCU_GET_CU(lcu, x_cu, y_cu) \
(&(lcu)->cu[LCU_CU_OFFSET + (x_cu) + (y_cu) * LCU_T_CU_WIDTH])
/**
* \brief Return pointer to the top right reference CU.
*/
#define LCU_GET_TOP_RIGHT_CU(lcu) \
(&(lcu)->cu[LCU_T_CU_WIDTH * LCU_T_CU_WIDTH])
/**
* \brief Return pointer to the CU containing a given pixel.
*
* \param lcu pointer to the containing LCU
* \param x_px x-coordinate relative to the upper left corner of the LCU
* \param y_px y-coordinate relative to the upper left corner of the LCU
* \return pointer to the CU at coordinates (x_px, y_px)
*/
#define LCU_GET_CU_AT_PX(lcu, x_px, y_px) LCU_GET_CU(lcu, (x_px) >> 3, (y_px) >> 3)
/**
* \brief Return pointer to a CU relative to the given CU.
*
* \param cu pointer to a CU in the array at some location (x, y)
* \param x_offs x-offset
* \param y_offs y-offset
* \return pointer to the CU at (x + x_offs, y + y_offs)
*/
#define CU_GET_CU(cu_array, x_offs, y_offs) \
(&cu_array[(x_offs) + (y_offs) * LCU_T_CU_WIDTH])
#define CHECKPOINT_LCU(prefix_str, lcu) do { \
CHECKPOINT_CU(prefix_str " cu[0]", (lcu).cu[0]); \
CHECKPOINT_CU(prefix_str " cu[1]", (lcu).cu[1]); \

View file

@ -83,11 +83,16 @@ const int8_t kvz_g_chroma_filter[8][4] =
/**
* \brief
*/
INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset,
int32_t tc, int8_t sw,
int8_t part_P_nofilter, int8_t part_Q_nofilter,
int32_t thr_cut,
int8_t filter_second_P, int8_t filter_second_Q)
static INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder,
kvz_pixel *src,
int32_t offset,
int32_t tc,
int8_t sw,
int8_t part_P_nofilter,
int8_t part_Q_nofilter,
int32_t thr_cut,
int8_t filter_second_P,
int8_t filter_second_Q)
{
int32_t delta;
@ -143,8 +148,12 @@ INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz
/**
* \brief
*/
INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc,
int8_t part_P_nofilter, int8_t part_Q_nofilter)
static INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder,
kvz_pixel *src,
int32_t offset,
int32_t tc,
int8_t part_P_nofilter,
int8_t part_Q_nofilter)
{
int32_t delta;
int16_t m2 = src[-offset * 2];
@ -161,72 +170,119 @@ INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, k
}
}
/**
* \brief
* \brief Check wheter an edge is a TU boundary.
*
* \param state encoder state
* \param x x-coordinate of the scu in pixels
* \param y y-coordinate of the scu in pixels
* \param dir direction of the edge to check
* \return true, if the edge is a TU boundary, otherwise false
*/
void kvz_filter_deblock_edge_luma(encoder_state_t * const state,
int32_t xpos, int32_t ypos,
int8_t depth, int8_t dir)
static bool is_tu_boundary(const encoder_state_t *const state,
int32_t x,
int32_t y,
edge_dir dir)
{
const cu_info_t *const scu = kvz_videoframe_get_cu(state->tile->frame,
x >> MIN_SIZE,
y >> MIN_SIZE);
const int tu_width = LCU_WIDTH >> scu->tr_depth;
if (dir == EDGE_HOR) {
return (y & (tu_width - 1)) == 0;
} else {
return (x & (tu_width - 1)) == 0;
}
}
/**
* \brief Check wheter an edge is aligned on a 8x8 grid.
*
* \param x x-coordinate of the edge
* \param y y-coordinate of the edge
* \param dir direction of the edge
* \return true, if the edge is aligned on a 8x8 grid, otherwise false
*/
static bool is_on_8x8_grid(int x, int y, edge_dir dir)
{
if (dir == EDGE_HOR) {
return (y & 7) == 0;
} else {
return (x & 7) == 0;
}
}
/**
* \brief Apply the deblocking filter to luma pixels on a single edge.
*
* The caller should check that the edge is a TU boundary or a PU boundary.
*
\verbatim
.-- filter this edge if dir == EDGE_HOR
v
+--------+
|o <-- pixel at (x, y)
| |
|<-- filter this edge if dir == EDGE_VER
| |
+--------+
\endverbatim
*
* \param state encoder state
* \param x x-coordinate in pixels (see above)
* \param y y-coordinate in pixels (see above)
* \param length length of the edge in pixels
* \param dir direction of the edge to filter
*/
static void filter_deblock_edge_luma(encoder_state_t * const state,
int32_t x,
int32_t y,
int32_t length,
edge_dir dir)
{
videoframe_t * const frame = state->tile->frame;
const encoder_control_t * const encoder = state->encoder_control;
cu_info_t *cu_q = kvz_videoframe_get_cu(frame, xpos >> MIN_SIZE, ypos >> MIN_SIZE);
{
// Return if called with a coordinate which is not at CU or TU boundary.
// TODO: Add handling for asymmetric inter CU boundaries which do not coincide
// with transform boundaries.
const int tu_width = LCU_WIDTH >> cu_q->tr_depth;
if (dir == EDGE_HOR && (ypos & (tu_width - 1))) return;
if (dir == EDGE_VER && (xpos & (tu_width - 1))) return;
}
cu_info_t *cu_q = kvz_videoframe_get_cu(frame, x >> MIN_SIZE, y >> MIN_SIZE);
{
int32_t stride = frame->rec->stride;
int32_t offset = stride;
int32_t beta_offset_div2 = encoder->beta_offset_div2;
int32_t tc_offset_div2 = encoder->tc_offset_div2;
// TODO: support 10+bits
kvz_pixel *orig_src = &frame->rec->y[xpos + ypos*stride];
kvz_pixel *orig_src = &frame->rec->y[x + y*stride];
kvz_pixel *src = orig_src;
int32_t step = 1;
cu_info_t *cu_p = NULL;
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
int8_t strength = 0;
int16_t x_cu = x >> MIN_SIZE;
int16_t y_cu = y >> MIN_SIZE;
int8_t strength = 0;
int32_t qp = state->global->QP;
int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8);
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale;
int32_t side_threshold = (beta + (beta >>1 )) >> 3;
uint32_t blocks_in_part = (LCU_WIDTH >> depth) / 4;
uint32_t block_idx;
int32_t tc_index,tc,thr_cut;
int32_t tc_index;
int32_t tc;
int32_t thr_cut;
if (dir == EDGE_VER) {
offset = 1;
step = stride;
}
uint32_t num_4px_parts = length / 4;
const int32_t offset = (dir == EDGE_HOR) ? stride : 1;
const int32_t step = (dir == EDGE_HOR) ? 1 : stride;
// TODO: add CU based QP calculation
// For each 4-pixel part in the edge
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) {
for (uint32_t block_idx = 0; block_idx < num_4px_parts; ++block_idx) {
int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d;
{
vector2d_t px = {
(dir == EDGE_HOR ? xpos + block_idx * 4 : xpos),
(dir == EDGE_VER ? ypos + block_idx * 4 : ypos)
};
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
// when processing the next LCU.
if (block_idx > 0 && dir == EDGE_HOR && (px.x + 4) % 64 == 0 && (px.x + 4 != frame->width)) {
continue;
}
// CU in the side we are filtering, update every 8-pixels
cu_p = kvz_videoframe_get_cu(frame, x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0), y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0));
@ -341,53 +397,60 @@ void kvz_filter_deblock_edge_luma(encoder_state_t * const state,
useStrongFiltering(offset, 2*d3, (src+step*(block_idx*4+3)));
// Filter four rows/columns
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 0), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 1), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 2), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 3), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
for (int i = 0; i < 4; i++) {
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + i), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
}
}
}
}
}
/**
* \brief
* \brief Apply the deblocking filter to chroma pixels on a single edge.
*
* The caller should check that the edge is a TU boundary or a PU boundary.
*
\verbatim
.-- filter this edge if dir == EDGE_HOR
v
+--------+
|o <-- pixel at (x, y)
| |
|<-- filter this edge if dir == EDGE_VER
| |
+--------+
\endverbatim
*
* \param state encoder state
* \param x x-coordinate in chroma pixels (see above)
* \param y y-coordinate in chroma pixels (see above)
* \param length length of the edge in chroma pixels
* \param dir direction of the edge to filter
*/
void kvz_filter_deblock_edge_chroma(encoder_state_t * const state,
int32_t x, int32_t y,
int8_t depth, int8_t dir)
static void filter_deblock_edge_chroma(encoder_state_t * const state,
int32_t x,
int32_t y,
int32_t length,
edge_dir dir)
{
const encoder_control_t * const encoder = state->encoder_control;
const videoframe_t * const frame = state->tile->frame;
const cu_info_t *cu_q = kvz_videoframe_get_cu_const(frame, x >> (MIN_SIZE - 1), y >> (MIN_SIZE - 1));
// Chroma edges that do not lay on a 8x8 grid are not deblocked.
if (depth >= MAX_DEPTH) {
if (dir == EDGE_HOR && (y & (8 - 1))) return;
if (dir == EDGE_VER && (x & (8 - 1))) return;
}
{
// Return if called with a coordinate which is not at CU or TU boundary.
// TODO: Add handling for asymmetric inter CU boundaries which do not coincide
// with transform boundaries.
const int tu_width = (LCU_WIDTH / 2) >> cu_q->tr_depth;
if (dir == EDGE_HOR && (y & (tu_width - 1))) return;
if (dir == EDGE_VER && (x & (tu_width - 1))) return;
}
// For each subpart
{
int32_t stride = frame->rec->stride >> 1;
int32_t tc_offset_div2 = encoder->tc_offset_div2;
// TODO: support 10+bits
kvz_pixel *src_u = &frame->rec->u[x + y*stride];
kvz_pixel *src_v = &frame->rec->v[x + y*stride];
// Init offset and step to EDGE_HOR
int32_t offset = stride;
int32_t step = 1;
kvz_pixel *src[] = {
&frame->rec->u[x + y*stride],
&frame->rec->v[x + y*stride],
};
const cu_info_t *cu_p = NULL;
int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
int16_t x_cu = x >> (MIN_SIZE-1);
int16_t y_cu = y >> (MIN_SIZE-1);
int8_t strength = 2;
int32_t QP = kvz_g_chroma_scale[state->global->QP];
@ -395,42 +458,22 @@ void kvz_filter_deblock_edge_chroma(encoder_state_t * const state,
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale;
// Special handling for depth 4. It's meaning is that we want to bypass
// last block in LCU check in order to deblock just that block.
uint32_t blocks_in_part= (LCU_WIDTH>>(depth == 4 ? depth : depth + 1)) / 4;
uint32_t blk_idx;
const uint32_t num_4px_parts = length / 4;
if(dir == EDGE_VER) {
offset = 1;
step = stride;
}
const int32_t offset = (dir == EDGE_HOR) ? stride : 1;
const int32_t step = (dir == EDGE_HOR) ? 1 : stride;
for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx)
for (uint32_t blk_idx = 0; blk_idx < num_4px_parts; ++blk_idx)
{
vector2d_t px = {
(dir == EDGE_HOR ? x + blk_idx * 4 : x),
(dir == EDGE_VER ? y + blk_idx * 4 : y)
};
cu_p = kvz_videoframe_get_cu_const(frame, x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0), y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0));
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
// when processing the next LCU.
if (depth != 4 && dir == EDGE_HOR && (px.x + 4) % 32 == 0 && (px.x + 4 != frame->width / 2)) {
continue;
}
// Only filter when strenght == 2 (one of the blocks is intra coded)
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
// Chroma U
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 0), offset, Tc, 0, 0);
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 1), offset, Tc, 0, 0);
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 2), offset, Tc, 0, 0);
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 3), offset, Tc, 0, 0);
// Chroma V
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 0), offset, Tc, 0, 0);
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 1), offset, Tc, 0, 0);
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 2), offset, Tc, 0, 0);
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 3), offset, Tc, 0, 0);
for (int component = 0; component < 2; component++) {
for (int i = 0; i < 4; i++) {
kvz_filter_deblock_chroma(encoder, src[component] + step * (4*blk_idx + i), offset, Tc, 0, 0);
}
}
}
}
}
@ -438,88 +481,154 @@ void kvz_filter_deblock_edge_chroma(encoder_state_t * const state,
/**
* \brief function to split LCU into smaller CU blocks
* \param encoder the encoder info structure
* \param xCtb block x-position (as SCU)
* \param yCtb block y-position (as SCU)
* \param depth block depth
* \param edge which edge we are filtering
*
* This function takes (SCU) block position as input and splits the block
* until the coded block size has been achived. Calls luma and chroma filtering
* functions for each coded CU size.
* \param encoder the encoder info structure
* \param x_px block x-position in pixels
* \param y_px block y-position in pixels
* \param depth block depth
* \param dir direction of the edges to filter
*
* Recursively traverse the CU/TU quadtree. At the lowest level, apply the
* deblocking filter to the left edge (when dir == EDGE_VER) or the top edge
* (when dir == EDGE_HOR) as needed. Both luma and chroma are filtered.
*/
void kvz_filter_deblock_cu(encoder_state_t * const state, int32_t x, int32_t y, int8_t depth, int32_t edge)
static void filter_deblock_cu(encoder_state_t * const state,
int32_t x,
int32_t y,
int8_t depth,
edge_dir dir)
{
const videoframe_t * const frame = state->tile->frame;
const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x, y);
uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
uint8_t tr_split = (cur_cu->tr_depth > depth) ? 1 : 0;
uint8_t border_x = (frame->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
uint8_t border_y = (frame->height < y*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
uint8_t border_split_x = (frame->width < ((x + 1) * (LCU_WIDTH >> MAX_DEPTH)) + (LCU_WIDTH >> (depth + 1))) ? 0 : 1;
uint8_t border_split_y = (frame->height < ((y + 1) * (LCU_WIDTH >> MAX_DEPTH)) + (LCU_WIDTH >> (depth + 1))) ? 0 : 1;
const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame,
x >> MAX_DEPTH,
y >> MAX_DEPTH);
uint8_t border = border_x | border_y; // are we in any border CU?
const int cu_width = LCU_WIDTH >> depth;
const int half_cu_width = cu_width >> 1;
const int scu_width = LCU_WIDTH >> MAX_DEPTH;
const bool split_flag = cur_cu->depth > depth;
const bool tr_split = cur_cu->tr_depth > depth;
const bool border_x = frame->width < x + cu_width;
const bool border_y = frame->height < y + cu_width;
const bool border_split_x = frame->width >= x + scu_width + half_cu_width;
const bool border_split_y = frame->height >= y + scu_width + half_cu_width;
const bool border = border_x || border_y; // are we in any border CU?
// split 64x64, on split flag and on border
if (depth < MAX_DEPTH && (depth == 0 || split_flag || border || tr_split)) {
// Split the four sub-blocks of this block recursively.
uint8_t change;
assert(depth >= 0); // for clang-analyzer
change = 1 << (MAX_DEPTH - 1 - depth);
const int32_t x2 = x + half_cu_width;
const int32_t y2 = y + half_cu_width;
kvz_filter_deblock_cu(state, x, y, depth + 1, edge);
if(!border_x || border_split_x) {
kvz_filter_deblock_cu(state, x + change, y, depth + 1, edge);
filter_deblock_cu(state, x, y, depth + 1, dir);
if (!border_x || border_split_x) {
filter_deblock_cu(state, x2, y, depth + 1, dir);
}
if(!border_y || border_split_y) {
kvz_filter_deblock_cu(state, x , y + change, depth + 1, edge);
if (!border_y || border_split_y) {
filter_deblock_cu(state, x, y2, depth + 1, dir);
}
if((!border_x && !border_y) || (border_split_x && border_split_y)) {
kvz_filter_deblock_cu(state, x + change, y + change, depth + 1, edge);
if (!border || (border_split_x && border_split_y)) {
filter_deblock_cu(state, x2, y2, depth + 1, dir);
}
return;
}
// no filtering on borders (where filter would use pixels outside the picture)
if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return;
if ((x == 0 && dir == EDGE_VER) || (y == 0 && dir == EDGE_HOR)) return;
// do the filtering for block edge
kvz_filter_deblock_edge_luma(state, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge);
kvz_filter_deblock_edge_chroma(state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
if (is_tu_boundary(state, x, y, dir)) {
// Length of luma and chroma edges.
int32_t length;
int32_t length_c;
const int32_t x_right = x + cu_width;
const bool rightmost_4px_of_lcu = x_right % LCU_WIDTH == 0;
const bool rightmost_4px_of_frame = x_right == frame->width;
if (dir == EDGE_HOR &&
rightmost_4px_of_lcu &&
!rightmost_4px_of_frame) {
// The last 4 pixels will be deblocked when processing the next LCU.
length = cu_width - 4;
length_c = half_cu_width - 4;
} else {
length = cu_width;
length_c = half_cu_width;
}
filter_deblock_edge_luma(state, x, y, length, dir);
// Chroma pixel coordinates.
const int32_t x_c = x >> 1;
const int32_t y_c = y >> 1;
if (is_on_8x8_grid(x_c, y_c, dir)) {
filter_deblock_edge_chroma(state, x_c, y_c, length_c, dir);
}
}
}
/**
* \brief Deblock a single LCU without using data from right or down.
*
* Filter all the following edges:
* - All edges within the LCU, except for the last 4 pixels on the right when
* using horizontal filtering.
* - Left edge and top edge.
* - After vertical filtering the left edge, filter the last 4 pixels of
* horizontal edges in the LCU to the left.
* Filter the following vertical edges (horizontal filtering):
* 1. The left edge of the LCU.
* 2. All vertical edges within the LCU.
*
* Filter the following horizontal edges (vertical filtering):
* 1. The rightmost 4 pixels of the top edge of the LCU to the left.
* 2. The rightmost 4 pixels of all horizontal edges within the LCU to the
* left.
* 3. The top edge and all horizontal edges within the LCU, excluding the
* rightmost 4 pixels. If the LCU is the rightmost LCU of the frame, the
* last 4 pixels are also filtered.
*
* What is not filtered:
* - The rightmost 4 pixels of the top edge and all horizontal edges within
* the LCU, unless the LCU is the rightmost LCU of the frame.
* - The bottom edge of the LCU.
* - The right edge of the LCU.
*
* \param state encoder state
* \param x_px x-coordinate of the left edge of the LCU in pixels
* \param y_px y-coordinate of the top edge of the LCU in pixels
*/
void kvz_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px)
{
const vector2d_t lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH };
kvz_filter_deblock_cu(state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
filter_deblock_cu(state, x_px, y_px, 0, EDGE_VER);
assert(x_px == lcu.x * LCU_WIDTH);
assert(y_px == lcu.y * LCU_WIDTH);
// Filter rightmost 4 pixels from last LCU now that they have been
// finally deblocked vertically.
if (lcu.x > 0) {
int y;
for (y = 0; y < 64; y += 8) {
if (lcu.y + y == 0) continue;
kvz_filter_deblock_edge_luma(state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
if (x_px > 0) {
// Luma
const int x = x_px - 4;
const int end = MIN(y_px + LCU_WIDTH, state->tile->frame->height);
for (int y = y_px; y < end; y += 8) {
// The top edge of the whole frame is not filtered.
if (y > 0 && is_tu_boundary(state, x, y, EDGE_HOR)) {
filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR);
}
}
for (y = 0; y < 32; y += 8) {
if (lcu.y + y == 0) continue;
kvz_filter_deblock_edge_chroma(state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
// Chroma
const int x_px_c = x_px >> 1;
const int y_px_c = y_px >> 1;
const int x_c = x_px_c - 4;
const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
for (int y_c = y_px_c; y_c < end_c; y_c += 8) {
// The top edge of the whole frame is not filtered.
if (y_c > 0 && is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR)) {
filter_deblock_edge_chroma(state, x_c, y_c, 4, EDGE_HOR);
}
}
}
kvz_filter_deblock_cu(state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
filter_deblock_cu(state, x_px, y_px, 0, EDGE_HOR);
}

View file

@ -31,30 +31,15 @@
#include "encoderstate.h"
//////////////////////////////////////////////////////////////////////////
// FUNCTIONS
// Deblocking
void kvz_filter_deblock_cu(encoder_state_t *state, int32_t x_px, int32_t y_px,
int8_t depth, int32_t edge);
void kvz_filter_deblock_edge_luma(encoder_state_t *state,
int32_t x_pos, int32_t y_pos,
int8_t depth, int8_t dir);
void kvz_filter_deblock_edge_chroma(encoder_state_t *state,
int32_t xpos, int32_t ypos,
int8_t depth, int8_t dir);
/**
* \brief Edge direction.
*/
typedef enum edge_dir {
EDGE_VER = 0, // vertical
EDGE_HOR = 1, // horizontal
} edge_dir;
void kvz_filter_deblock_lcu(encoder_state_t *state, int x_px, int y_px);
void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc , int8_t sw,
int8_t part_p_nofilter, int8_t part_q_nofilter,
int32_t thr_cut,
int8_t filter_second_p, int8_t filter_second_q);
void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc,
int8_t part_p_nofilter, int8_t part_q_nofilter);
// SAO
//////////////////////////////////////////////////////////////////////////
// MACROS
#define EDGE_VER 0
#define EDGE_HOR 1
#endif

View file

@ -395,15 +395,12 @@ void kvz_inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_p
* \param cu coding unit to clear
*/
static void inter_clear_cu_unused(cu_info_t* cu) {
if(!(cu->inter.mv_dir & 1)) {
cu->inter.mv[0][0] = 0;
cu->inter.mv[0][1] = 0;
cu->inter.mv_ref[0] = 255;
}
if(!(cu->inter.mv_dir & 2)) {
cu->inter.mv[1][0] = 0;
cu->inter.mv[1][1] = 0;
cu->inter.mv_ref[1] = 255;
for (unsigned i = 0; i < 2; ++i) {
if (cu->inter.mv_dir & (1 << i)) continue;
cu->inter.mv[i][0] = 0;
cu->inter.mv[i][1] = 0;
cu->inter.mv_ref[i] = 255;
}
}
@ -433,17 +430,16 @@ void kvz_inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth,
|A1|_________|
|A0|
*/
int32_t x_cu = (x & (LCU_WIDTH - 1)) >> MAX_DEPTH; //!< coordinates from top-left of this LCU
int32_t y_cu = (y & (LCU_WIDTH - 1)) >> MAX_DEPTH;
cu_info_t* cu = &lcu->cu[LCU_CU_OFFSET];
int32_t x_cu = SUB_SCU(x) >> MAX_DEPTH; //!< coordinates from top-left of this LCU
int32_t y_cu = SUB_SCU(y) >> MAX_DEPTH;
// A0 and A1 availability testing
if (x != 0) {
*a1 = &cu[x_cu - 1 + (y_cu + cur_block_in_scu - 1) * LCU_T_CU_WIDTH];
*a1 = LCU_GET_CU(lcu, x_cu - 1, y_cu + cur_block_in_scu - 1);
if (!(*a1)->coded) *a1 = NULL;
if(*a1) inter_clear_cu_unused(*a1);
if (y_cu + cur_block_in_scu < LCU_WIDTH>>3) {
*a0 = &cu[x_cu - 1 + (y_cu + cur_block_in_scu) * LCU_T_CU_WIDTH];
*a0 = LCU_GET_CU(lcu, x_cu - 1, y_cu + cur_block_in_scu);
if (!(*a0)->coded) *a0 = NULL;
}
if(*a0) inter_clear_cu_unused(*a0);
@ -452,21 +448,21 @@ void kvz_inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth,
// B0, B1 and B2 availability testing
if (y != 0) {
if (x_cu + cur_block_in_scu < LCU_WIDTH>>3) {
*b0 = &cu[x_cu + cur_block_in_scu + (y_cu - 1) * LCU_T_CU_WIDTH];
*b0 = LCU_GET_CU(lcu, x_cu + cur_block_in_scu, y_cu - 1);
if (!(*b0)->coded) *b0 = NULL;
} else if(y_cu == 0) {
// Special case, top-right cu from LCU is the last in lcu->cu array
*b0 = &lcu->cu[LCU_T_CU_WIDTH*LCU_T_CU_WIDTH];
// Special case, top-right CU
*b0 = LCU_GET_TOP_RIGHT_CU(lcu);
if (!(*b0)->coded) *b0 = NULL;
}
if(*b0) inter_clear_cu_unused(*b0);
*b1 = &cu[x_cu + cur_block_in_scu - 1 + (y_cu - 1) * LCU_T_CU_WIDTH];
*b1 = LCU_GET_CU(lcu, x_cu + cur_block_in_scu - 1, y_cu - 1);
if (!(*b1)->coded) *b1 = NULL;
if(*b1) inter_clear_cu_unused(*b1);
if (x != 0) {
*b2 = &cu[x_cu - 1 + (y_cu - 1) * LCU_T_CU_WIDTH];
*b2 = LCU_GET_CU(lcu, x_cu - 1, y_cu - 1);
if(!(*b2)->coded) *b2 = NULL;
}
if(*b2) inter_clear_cu_unused(*b2);

View file

@ -485,9 +485,9 @@ void kvz_intra_recon_lcu_luma(
cu_info_t *cur_cu,
lcu_t *lcu)
{
const vector2d_t lcu_px = { x & 0x3f, y & 0x3f };
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
if (cur_cu == NULL) {
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
const int8_t width = LCU_WIDTH >> depth;
@ -500,9 +500,9 @@ void kvz_intra_recon_lcu_luma(
kvz_intra_recon_lcu_luma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu);
if (depth < MAX_DEPTH) {
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) {
cbf_set(&cur_cu->cbf.y, depth);
}
@ -537,12 +537,12 @@ void kvz_intra_recon_lcu_chroma(
cu_info_t *cur_cu,
lcu_t *lcu)
{
const vector2d_t lcu_px = { x & 0x3f, y & 0x3f };
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
const int8_t width = LCU_WIDTH >> depth;
const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
if (cur_cu == NULL) {
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
if (depth == 0 || cur_cu->tr_depth > depth) {
@ -554,9 +554,9 @@ void kvz_intra_recon_lcu_chroma(
kvz_intra_recon_lcu_chroma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu);
if (depth < MAX_DEPTH) {
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) {
cbf_set(&cur_cu->cbf.u, depth);
}

View file

@ -82,8 +82,8 @@ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX
int x, y;
for (y = y_cu; y < y_cu + width_cu; ++y) {
for (x = x_cu; x < x_cu + width_cu; ++x) {
const cu_info_t *from_cu = &work_tree[depth + 1].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
cu_info_t *to_cu = &work_tree[depth].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
const cu_info_t *from_cu = LCU_GET_CU(&work_tree[depth + 1], x, y);
cu_info_t *to_cu = LCU_GET_CU(&work_tree[depth], x, y);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}
}
@ -142,8 +142,8 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M
int x, y;
for (y = y_cu; y < y_cu + width_cu; ++y) {
for (x = x_cu; x < x_cu + width_cu; ++x) {
const cu_info_t *from_cu = &work_tree[depth].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
cu_info_t *to_cu = &work_tree[d].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
const cu_info_t *from_cu = LCU_GET_CU(&work_tree[depth], x, y);
cu_info_t *to_cu = LCU_GET_CU(&work_tree[d], x, y);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}
}
@ -173,16 +173,15 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M
void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth)
{
const int width_cu = LCU_CU_WIDTH >> depth;
const vector2d_t lcu_cu = { (x_px & (LCU_WIDTH - 1)) / 8, (y_px & (LCU_WIDTH - 1)) / 8 };
cu_info_t *const cur_cu = &lcu->cu[lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH + LCU_CU_OFFSET];
const vector2d_t lcu_cu = { SUB_SCU(x_px) / 8, SUB_SCU(y_px) / 8 };
int x, y;
// Depth 4 doesn't go inside the loop. Set the top-left CU.
cur_cu->tr_depth = tr_depth;
LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth;
for (y = 0; y < width_cu; ++y) {
for (x = 0; x < width_cu; ++x) {
cu_info_t *cu = &cur_cu[x + y * LCU_T_CU_WIDTH];
cu_info_t *cu = LCU_GET_CU(lcu, lcu_cu.x + x, lcu_cu.y + y);
cu->tr_depth = tr_depth;
}
}
@ -194,12 +193,11 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
const int width_cu = LCU_CU_WIDTH >> depth;
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
int x, y;
// NxN can only be applied to a single CU at a time.
if (part_mode == SIZE_NxN) {
cu_info_t *cu = &lcu_cu[x_cu + y_cu * LCU_T_CU_WIDTH];
cu_info_t *cu = LCU_GET_CU(lcu, x_cu, y_cu);
cu->depth = MAX_DEPTH;
cu->type = CU_INTRA;
cu->intra[PU_INDEX(x_px / 4, y_px / 4)].mode = pred_mode;
@ -211,7 +209,7 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
// Set mode in every CU covered by part_mode in this depth.
for (y = y_cu; y < y_cu + width_cu; ++y) {
for (x = x_cu; x < x_cu + width_cu; ++x) {
cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
cu_info_t *cu = LCU_GET_CU(lcu, x, y);
cu->depth = depth;
cu->type = CU_INTRA;
cu->intra[0].mode = pred_mode;
@ -231,12 +229,11 @@ static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *
const int width_cu = LCU_CU_WIDTH >> depth;
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
int x, y;
// Set mode in every CU covered by part_mode in this depth.
for (y = y_cu; y < y_cu + width_cu; ++y) {
for (x = x_cu; x < x_cu + width_cu; ++x) {
cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
cu_info_t *cu = LCU_GET_CU(lcu, x, y);
//Check if this could be moved inside the if
cu->coded = 1;
if (cu != cur_cu) {
@ -257,17 +254,16 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *
const int width_cu = LCU_CU_WIDTH >> depth;
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
int x, y;
int tr_split = cur_cu->tr_depth-cur_cu->depth;
// Set coeff flags in every CU covered by part_mode in this depth.
for (y = y_cu; y < y_cu + width_cu; ++y) {
for (x = x_cu; x < x_cu + width_cu; ++x) {
cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
cu_info_t *cu = LCU_GET_CU(lcu, x, y);
// Use TU top-left CU to propagate coeff flags
uint32_t mask = ~((width_cu>>tr_split)-1);
cu_info_t *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH];
cu_info_t *cu_from = LCU_GET_CU(lcu, x & mask, y & mask);
if (cu != cu_from) {
// Chroma coeff data is not used, luma is needed for deblocking
cu->cbf.y = cu_from->cbf.y;
@ -295,7 +291,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
const uint8_t pu_index = PU_INDEX(x_px / 4, y_px / 4);
// cur_cu is used for TU parameters.
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (x_px / 8) + (y_px / 8) * LCU_T_CU_WIDTH];
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
double coeff_bits = 0;
double tr_tree_bits = 0;
@ -368,7 +364,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
{
const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x / 4) + (lcu_px.y / 4)*LCU_T_CU_WIDTH];
cu_info_t *const tr_cu = LCU_GET_CU(lcu, lcu_px.x / 4, lcu_px.y / 4);
double tr_tree_bits = 0;
double coeff_bits = 0;
@ -450,8 +446,8 @@ static double calc_mode_bits(const encoder_state_t *state,
} else {
int8_t candidate_modes[3];
{
const cu_info_t *left_cu = ((x > 8) ? &cur_cu[-1] : NULL);
const cu_info_t *above_cu = ((y > 8) ? &cur_cu[-LCU_T_CU_WIDTH] : NULL);
const cu_info_t *left_cu = ((x > 8) ? CU_GET_CU(cur_cu, -1, 0) : NULL);
const cu_info_t *above_cu = ((y > 8) ? CU_GET_CU(cur_cu, 0, -1) : NULL);
kvz_intra_get_dir_luma_predictor(x, y, candidate_modes, cur_cu, left_cu, above_cu);
}
@ -467,10 +463,9 @@ static double calc_mode_bits(const encoder_state_t *state,
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
{
vector2d_t lcu_cu = { (x & 0x3f) / 8, (y & 0x3f) / 8 };
const cu_info_t *cu_array = &(lcu)->cu[LCU_CU_OFFSET];
bool condA = x >= 8 && cu_array[(lcu_cu.x - 1) + lcu_cu.y * LCU_T_CU_WIDTH].depth > depth;
bool condL = y >= 8 && cu_array[lcu_cu.x + (lcu_cu.y - 1) * LCU_T_CU_WIDTH].depth > depth;
vector2d_t lcu_cu = { SUB_SCU(x) / 8, SUB_SCU(y) / 8 };
bool condA = x >= 8 && LCU_GET_CU(lcu, lcu_cu.x - 1, lcu_cu.y )->depth > depth;
bool condL = y >= 8 && LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y - 1)->depth > depth;
return condA + condL;
}
@ -494,7 +489,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
lcu_t *const lcu = &work_tree[depth];
int x_local = (x&0x3f), y_local = (y&0x3f);
int x_local = SUB_SCU(x);
int y_local = SUB_SCU(y);
#ifdef KVZ_DEBUG
int debug_split = 0;
#endif
@ -506,7 +502,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
return 0;
}
cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
cur_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x_local, y_local);
// Assign correct depth
cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
cur_cu->tr_depth = depth > 0 ? depth : 1;
@ -647,8 +643,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
&& x + cu_width <= frame->width && y + cu_width <= frame->height)
{
vector2d_t lcu_cu = { x_local / 8, y_local / 8 };
cu_info_t *cu_array_d1 = &(&work_tree[depth + 1])->cu[LCU_CU_OFFSET];
cu_info_t *cu_d1 = &cu_array_d1[(lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH)];
cu_info_t *cu_d1 = LCU_GET_CU(&work_tree[depth + 1], lcu_cu.x, lcu_cu.y);
// If the best CU in depth+1 is intra and the biggest it can be, try it.
if (cu_d1->type == CU_INTRA && cu_d1->depth == depth + 1) {
@ -715,16 +710,12 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i
const int x_cu = x >> MAX_DEPTH;
const int y_cu = y >> MAX_DEPTH;
// Use top-left sub-cu of LCU as pointer to lcu->cu array to make things
// simpler.
cu_info_t *lcu_cu = &lcu->cu[LCU_CU_OFFSET];
// Copy top CU row.
if (y_cu > 0) {
int i;
for (i = 0; i < LCU_CU_WIDTH; ++i) {
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu + i, y_cu - 1);
cu_info_t *to_cu = &lcu_cu[i - LCU_T_CU_WIDTH];
cu_info_t *to_cu = LCU_GET_CU(lcu, i, -1);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}
}
@ -733,21 +724,21 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i
int i;
for (i = 0; i < LCU_CU_WIDTH; ++i) {
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu - 1, y_cu + i);
cu_info_t *to_cu = &lcu_cu[-1 + i * LCU_T_CU_WIDTH];
cu_info_t *to_cu = LCU_GET_CU(lcu, -1, i);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}
}
// Copy top-left CU.
if (x_cu > 0 && y_cu > 0) {
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu - 1, y_cu - 1);
cu_info_t *to_cu = &lcu_cu[-1 - LCU_T_CU_WIDTH];
cu_info_t *to_cu = LCU_GET_CU(lcu, -1, -1);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}
// Copy top-right CU.
if (y_cu > 0 && x + LCU_WIDTH < frame->width) {
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu + LCU_CU_WIDTH, y_cu - 1);
cu_info_t *to_cu = &lcu->cu[LCU_T_CU_WIDTH*LCU_T_CU_WIDTH];
cu_info_t *to_cu = LCU_GET_TOP_RIGHT_CU(lcu);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}
}
@ -806,14 +797,10 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i
const int y_cu = y_px >> MAX_DEPTH;
videoframe_t * const frame = state->tile->frame;
// Use top-left sub-cu of LCU as pointer to lcu->cu array to make things
// simpler.
const cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
int x, y;
for (y = 0; y < LCU_CU_WIDTH; ++y) {
for (x = 0; x < LCU_CU_WIDTH; ++x) {
const cu_info_t *from_cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
const cu_info_t *from_cu = LCU_GET_CU(lcu, x, y);
cu_info_t *to_cu = kvz_videoframe_get_cu(frame, x_cu + x, y_cu + y);
memcpy(to_cu, from_cu, sizeof(*to_cu));
}

View file

@ -951,6 +951,127 @@ static unsigned search_frac(const encoder_state_t * const state,
}
/**
* \brief Perform inter search for a single reference frame.
*/
static void search_cu_inter_ref(const encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu, cu_info_t *cur_cu,
int16_t mv_cand[2][2],
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
unsigned ref_idx,
uint32_t(*get_mvd_cost)(vector2d_t *, cabac_data_t*))
{
const int x_cu = x >> 3;
const int y_cu = y >> 3;
const videoframe_t * const frame = state->tile->frame;
kvz_picture *ref_image = state->global->ref->images[ref_idx];
uint32_t temp_bitcost = 0;
uint32_t temp_cost = 0;
vector2d_t orig, mvd;
int32_t merged = 0;
uint8_t cu_mv_cand = 0;
int8_t merge_idx = 0;
int8_t ref_list = state->global->refmap[ref_idx].list-1;
int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list];
orig.x = x_cu * CU_MIN_SIZE_PIXELS;
orig.y = y_cu * CU_MIN_SIZE_PIXELS;
// Get MV candidates
cur_cu->inter.mv_ref[ref_list] = ref_idx;
kvz_inter_get_mv_cand(state, x, y, depth, mv_cand, cur_cu, lcu, ref_list);
cur_cu->inter.mv_ref[ref_list] = temp_ref_idx;
vector2d_t mv = { 0, 0 };
{
// Take starting point for MV search from previous frame.
// When temporal motion vector candidates are added, there is probably
// no point to this anymore, but for now it helps.
int mid_x_cu = (x + (LCU_WIDTH >> (depth+1))) / 8;
int mid_y_cu = (y + (LCU_WIDTH >> (depth+1))) / 8;
cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)];
if (ref_cu->type == CU_INTER) {
if (ref_cu->inter.mv_dir & 1) {
mv.x = ref_cu->inter.mv[0][0];
mv.y = ref_cu->inter.mv[0][1];
} else {
mv.x = ref_cu->inter.mv[1][0];
mv.y = ref_cu->inter.mv[1][1];
}
}
}
#if SEARCH_MV_FULL_RADIUS
temp_cost += search_mv_full(depth, frame, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
#else
switch (state->encoder_control->cfg->ime_algorithm) {
case KVZ_IME_TZ:
temp_cost += tz_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
break;
default:
temp_cost += hexagon_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
break;
}
#endif
if (state->encoder_control->cfg->fme_level > 0) {
temp_cost = search_frac(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
}
merged = 0;
// Check every candidate to find a match
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
if (merge_cand[merge_idx].dir != 3 &&
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x &&
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y &&
(uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
merged = 1;
break;
}
}
// Only check when candidates are different
if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) {
vector2d_t mvd_temp1, mvd_temp2;
int cand1_cost,cand2_cost;
mvd_temp1.x = mv.x - mv_cand[0][0];
mvd_temp1.y = mv.y - mv_cand[0][1];
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
mvd_temp2.x = mv.x - mv_cand[1][0];
mvd_temp2.y = mv.y - mv_cand[1][1];
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
cu_mv_cand = 1;
}
}
mvd.x = mv.x - mv_cand[cu_mv_cand][0];
mvd.y = mv.y - mv_cand[cu_mv_cand][1];
if(temp_cost < cur_cu->inter.cost) {
// Map reference index to L0/L1 pictures
cur_cu->inter.mv_dir = ref_list+1;
cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx;
cur_cu->merged = merged;
cur_cu->merge_idx = merge_idx;
cur_cu->inter.mv_ref[ref_list] = ref_idx;
cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x;
cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y;
cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x;
cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y;
cur_cu->inter.cost = temp_cost;
cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list];
cur_cu->inter.mv_cand[ref_list] = cu_mv_cand;
}
}
/**
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
@ -959,12 +1080,9 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
{
const videoframe_t * const frame = state->tile->frame;
uint32_t ref_idx = 0;
int x_local = (x&0x3f), y_local = (y&0x3f);
int x_cu = x>>3;
int y_cu = y>>3;
int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH;
cu_info_t *cur_cu = &lcu->cu[cu_pos];
int x_local = SUB_SCU(x);
int y_local = SUB_SCU(y);
cu_info_t *cur_cu = LCU_GET_CU(lcu, x_local >> 3, y_local >> 3);
int16_t mv_cand[2][2];
// Search for merge mode candidate
@ -991,108 +1109,12 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
cur_cu->inter.cost = UINT_MAX;
for (ref_idx = 0; ref_idx < state->global->ref->used_size; ref_idx++) {
kvz_picture *ref_image = state->global->ref->images[ref_idx];
uint32_t temp_bitcost = 0;
uint32_t temp_cost = 0;
vector2d_t orig, mvd;
int32_t merged = 0;
uint8_t cu_mv_cand = 0;
int8_t merge_idx = 0;
int8_t ref_list = state->global->refmap[ref_idx].list-1;
int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list];
orig.x = x_cu * CU_MIN_SIZE_PIXELS;
orig.y = y_cu * CU_MIN_SIZE_PIXELS;
// Get MV candidates
cur_cu->inter.mv_ref[ref_list] = ref_idx;
kvz_inter_get_mv_cand(state, x, y, depth, mv_cand, cur_cu, lcu, ref_list);
cur_cu->inter.mv_ref[ref_list] = temp_ref_idx;
vector2d_t mv = { 0, 0 };
{
// Take starting point for MV search from previous frame.
// When temporal motion vector candidates are added, there is probably
// no point to this anymore, but for now it helps.
int mid_x_cu = (x + (LCU_WIDTH >> (depth+1))) / 8;
int mid_y_cu = (y + (LCU_WIDTH >> (depth+1))) / 8;
cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)];
if (ref_cu->type == CU_INTER) {
if (ref_cu->inter.mv_dir & 1) {
mv.x = ref_cu->inter.mv[0][0];
mv.y = ref_cu->inter.mv[0][1];
} else {
mv.x = ref_cu->inter.mv[1][0];
mv.y = ref_cu->inter.mv[1][1];
}
}
}
#if SEARCH_MV_FULL_RADIUS
temp_cost += search_mv_full(depth, frame, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
#else
switch (state->encoder_control->cfg->ime_algorithm) {
case KVZ_IME_TZ:
temp_cost += tz_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
break;
default:
temp_cost += hexagon_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
break;
}
#endif
if (state->encoder_control->cfg->fme_level > 0) {
temp_cost = search_frac(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
}
merged = 0;
// Check every candidate to find a match
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
if (merge_cand[merge_idx].dir != 3 &&
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x &&
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y &&
(uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
merged = 1;
break;
}
}
// Only check when candidates are different
if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) {
vector2d_t mvd_temp1, mvd_temp2;
int cand1_cost,cand2_cost;
mvd_temp1.x = mv.x - mv_cand[0][0];
mvd_temp1.y = mv.y - mv_cand[0][1];
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
mvd_temp2.x = mv.x - mv_cand[1][0];
mvd_temp2.y = mv.y - mv_cand[1][1];
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
cu_mv_cand = 1;
}
}
mvd.x = mv.x - mv_cand[cu_mv_cand][0];
mvd.y = mv.y - mv_cand[cu_mv_cand][1];
if(temp_cost < cur_cu->inter.cost) {
// Map reference index to L0/L1 pictures
cur_cu->inter.mv_dir = ref_list+1;
cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx;
cur_cu->merged = merged;
cur_cu->merge_idx = merge_idx;
cur_cu->inter.mv_ref[ref_list] = ref_idx;
cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x;
cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y;
cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x;
cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y;
cur_cu->inter.cost = temp_cost;
cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list];
cur_cu->inter.mv_cand[ref_list] = cu_mv_cand;
}
search_cu_inter_ref(state,
x, y, depth,
lcu, cur_cu,
mv_cand, merge_cand, num_cand,
ref_idx,
get_mvd_cost);
}
// Search bi-pred positions
@ -1155,7 +1177,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
for (int ypos = 0; ypos < LCU_WIDTH >> depth; ++ypos) {
int dst_y = ypos*(LCU_WIDTH >> depth);
for (int xpos = 0; xpos < (LCU_WIDTH >> depth); ++xpos) {
tmp_block[dst_y + xpos] = templcu->rec.y[((y + ypos)&(LCU_WIDTH - 1))*LCU_WIDTH + ((x + xpos)&(LCU_WIDTH - 1))];
tmp_block[dst_y + xpos] = templcu->rec.y[
SUB_SCU(y + ypos) * LCU_WIDTH + SUB_SCU(x + xpos)];
tmp_pic[dst_y + xpos] = frame->source->y[x + xpos + (y + ypos)*frame->source->width];
}
}

View file

@ -145,8 +145,8 @@ static double search_intra_trdepth(encoder_state_t * const state,
const int width_c = width > TR_MIN_WIDTH ? width / 2 : width;
const int offset = width / 2;
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
@ -609,8 +609,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
if (reconstruct_chroma) {
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
struct {
double cost;
@ -645,11 +645,10 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu)
{
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
cu_info_t *cur_cu = &lcu->cu[cu_index];
cu_info_t *cur_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y);
int8_t intra_mode = cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].mode;
double costs[5];
@ -710,13 +709,12 @@ double kvz_search_cu_intra(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu)
{
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
const int8_t cu_width = (LCU_WIDTH >> (depth));
const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth;
cu_info_t *cur_cu = &lcu->cu[cu_index];
cu_info_t *cur_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y);
kvz_intra_references refs;
@ -728,10 +726,10 @@ double kvz_search_cu_intra(encoder_state_t * const state,
// Select left and top CUs if they are available.
// Top CU is not available across LCU boundary.
if ((x_px >> 3) > 0) {
left_cu = &lcu->cu[cu_index - 1];
left_cu = LCU_GET_CU(lcu, lcu_cu.x - 1, lcu_cu.y);
}
if ((y_px >> 3) > 0 && lcu_cu.y != 0) {
above_cu = &lcu->cu[cu_index - LCU_T_CU_WIDTH];
above_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y - 1);
}
kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);

View file

@ -220,10 +220,10 @@ int kvz_quantize_residual_trskip(
void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu)
{
// we have 64>>depth transform size
const vector2d_t lcu_px = {x & 0x3f, y & 0x3f};
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4);
if (cur_cu == NULL) {
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
const int8_t width = LCU_WIDTH>>depth;
@ -241,9 +241,9 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in
// Propagate coded block flags from child CUs to parent CU.
if (depth < MAX_DEPTH) {
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) {
cbf_set(&cur_cu->cbf.y, depth);
}
@ -304,11 +304,11 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in
void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu)
{
// we have 64>>depth transform size
const vector2d_t lcu_px = {x & 0x3f, y & 0x3f};
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4);
const int8_t width = LCU_WIDTH>>depth;
if (cur_cu == NULL) {
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
// Tell clang-analyzer what is up. For some reason it can't figure out from
@ -325,9 +325,9 @@ void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x,
// Propagate coded block flags from child CUs to parent CU.
if (depth < MAX_DEPTH) {
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) {
cbf_set(&cur_cu->cbf.u, depth);
}