mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Merge branch 'refactor-stuff'
This commit is contained in:
commit
ec7c84f7e0
67
src/cu.h
67
src/cu.h
|
@ -106,10 +106,15 @@ typedef struct {
|
|||
|
||||
cu_array_t * kvz_cu_array_alloc(int width_in_scu, int height_in_scu);
|
||||
int kvz_cu_array_free(cu_array_t *cua);
|
||||
|
||||
|
||||
#define SUB_SCU_BIT_MASK (64 - 1)
|
||||
#define SUB_SCU(xy) (xy & SUB_SCU_BIT_MASK)
|
||||
/**
|
||||
* \brief Return the 7 lowest-order bits of the pixel coordinate.
|
||||
*
|
||||
* The 7 lower-order bits correspond to the distance from the left or top edge
|
||||
* of the containing LCU.
|
||||
*/
|
||||
#define SUB_SCU(xy) ((xy) & (LCU_WIDTH - 1))
|
||||
|
||||
#define LCU_CU_WIDTH 8
|
||||
#define LCU_T_CU_WIDTH 9
|
||||
#define LCU_CU_OFFSET 10
|
||||
|
@ -153,10 +158,66 @@ typedef struct {
|
|||
* - Left reference CUs on column 0.
|
||||
* - All of LCUs CUs on 1:9, 1:9.
|
||||
* - Top right reference CU on the last slot.
|
||||
*
|
||||
\verbatim
|
||||
|
||||
.-- left reference CUs
|
||||
v
|
||||
0 | 1 2 3 4 5 6 7 8 | 81 <-- top reference CUs
|
||||
----+-------------------------+----
|
||||
9 | 10 11 12 13 14 15 16 17 |
|
||||
18 | 19 20 21 22 23 24 25 26 <-- this LCU
|
||||
27 | 28 29 30 31 32 33 34 35 |
|
||||
36 | 37 38 39 40 41 42 43 44 |
|
||||
45 | 46 47 48 49 50 51 52 53 |
|
||||
54 | 55 56 57 58 59 60 61 62 |
|
||||
63 | 64 65 66 67 68 69 70 71 |
|
||||
72 | 73 74 75 76 77 78 79 80 |
|
||||
----+-------------------------+----
|
||||
|
||||
\endverbatim
|
||||
*/
|
||||
cu_info_t cu[9*9+1];
|
||||
} lcu_t;
|
||||
|
||||
/**
|
||||
* \brief Return pointer to a given CU.
|
||||
*
|
||||
* \param lcu pointer to the containing LCU
|
||||
* \param x_cu x-index of the CU
|
||||
* \param y_cu y-index of the CU
|
||||
* \return pointer to the CU
|
||||
*/
|
||||
#define LCU_GET_CU(lcu, x_cu, y_cu) \
|
||||
(&(lcu)->cu[LCU_CU_OFFSET + (x_cu) + (y_cu) * LCU_T_CU_WIDTH])
|
||||
|
||||
/**
|
||||
* \brief Return pointer to the top right reference CU.
|
||||
*/
|
||||
#define LCU_GET_TOP_RIGHT_CU(lcu) \
|
||||
(&(lcu)->cu[LCU_T_CU_WIDTH * LCU_T_CU_WIDTH])
|
||||
|
||||
/**
|
||||
* \brief Return pointer to the CU containing a given pixel.
|
||||
*
|
||||
* \param lcu pointer to the containing LCU
|
||||
* \param x_px x-coordinate relative to the upper left corner of the LCU
|
||||
* \param y_px y-coordinate relative to the upper left corner of the LCU
|
||||
* \return pointer to the CU at coordinates (x_px, y_px)
|
||||
*/
|
||||
#define LCU_GET_CU_AT_PX(lcu, x_px, y_px) LCU_GET_CU(lcu, (x_px) >> 3, (y_px) >> 3)
|
||||
|
||||
/**
|
||||
* \brief Return pointer to a CU relative to the given CU.
|
||||
*
|
||||
* \param cu pointer to a CU in the array at some location (x, y)
|
||||
* \param x_offs x-offset
|
||||
* \param y_offs y-offset
|
||||
* \return pointer to the CU at (x + x_offs, y + y_offs)
|
||||
*/
|
||||
#define CU_GET_CU(cu_array, x_offs, y_offs) \
|
||||
(&cu_array[(x_offs) + (y_offs) * LCU_T_CU_WIDTH])
|
||||
|
||||
#define CHECKPOINT_LCU(prefix_str, lcu) do { \
|
||||
CHECKPOINT_CU(prefix_str " cu[0]", (lcu).cu[0]); \
|
||||
CHECKPOINT_CU(prefix_str " cu[1]", (lcu).cu[1]); \
|
||||
|
|
409
src/filter.c
409
src/filter.c
|
@ -83,11 +83,16 @@ const int8_t kvz_g_chroma_filter[8][4] =
|
|||
/**
|
||||
* \brief
|
||||
*/
|
||||
INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset,
|
||||
int32_t tc, int8_t sw,
|
||||
int8_t part_P_nofilter, int8_t part_Q_nofilter,
|
||||
int32_t thr_cut,
|
||||
int8_t filter_second_P, int8_t filter_second_Q)
|
||||
static INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder,
|
||||
kvz_pixel *src,
|
||||
int32_t offset,
|
||||
int32_t tc,
|
||||
int8_t sw,
|
||||
int8_t part_P_nofilter,
|
||||
int8_t part_Q_nofilter,
|
||||
int32_t thr_cut,
|
||||
int8_t filter_second_P,
|
||||
int8_t filter_second_Q)
|
||||
{
|
||||
int32_t delta;
|
||||
|
||||
|
@ -143,8 +148,12 @@ INLINE void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz
|
|||
/**
|
||||
* \brief
|
||||
*/
|
||||
INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc,
|
||||
int8_t part_P_nofilter, int8_t part_Q_nofilter)
|
||||
static INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder,
|
||||
kvz_pixel *src,
|
||||
int32_t offset,
|
||||
int32_t tc,
|
||||
int8_t part_P_nofilter,
|
||||
int8_t part_Q_nofilter)
|
||||
{
|
||||
int32_t delta;
|
||||
int16_t m2 = src[-offset * 2];
|
||||
|
@ -161,72 +170,119 @@ INLINE void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, k
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief
|
||||
* \brief Check wheter an edge is a TU boundary.
|
||||
*
|
||||
* \param state encoder state
|
||||
* \param x x-coordinate of the scu in pixels
|
||||
* \param y y-coordinate of the scu in pixels
|
||||
* \param dir direction of the edge to check
|
||||
* \return true, if the edge is a TU boundary, otherwise false
|
||||
*/
|
||||
void kvz_filter_deblock_edge_luma(encoder_state_t * const state,
|
||||
int32_t xpos, int32_t ypos,
|
||||
int8_t depth, int8_t dir)
|
||||
static bool is_tu_boundary(const encoder_state_t *const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
edge_dir dir)
|
||||
{
|
||||
const cu_info_t *const scu = kvz_videoframe_get_cu(state->tile->frame,
|
||||
x >> MIN_SIZE,
|
||||
y >> MIN_SIZE);
|
||||
const int tu_width = LCU_WIDTH >> scu->tr_depth;
|
||||
|
||||
if (dir == EDGE_HOR) {
|
||||
return (y & (tu_width - 1)) == 0;
|
||||
} else {
|
||||
return (x & (tu_width - 1)) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Check wheter an edge is aligned on a 8x8 grid.
|
||||
*
|
||||
* \param x x-coordinate of the edge
|
||||
* \param y y-coordinate of the edge
|
||||
* \param dir direction of the edge
|
||||
* \return true, if the edge is aligned on a 8x8 grid, otherwise false
|
||||
*/
|
||||
static bool is_on_8x8_grid(int x, int y, edge_dir dir)
|
||||
{
|
||||
if (dir == EDGE_HOR) {
|
||||
return (y & 7) == 0;
|
||||
} else {
|
||||
return (x & 7) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Apply the deblocking filter to luma pixels on a single edge.
|
||||
*
|
||||
* The caller should check that the edge is a TU boundary or a PU boundary.
|
||||
*
|
||||
\verbatim
|
||||
|
||||
.-- filter this edge if dir == EDGE_HOR
|
||||
v
|
||||
+--------+
|
||||
|o <-- pixel at (x, y)
|
||||
| |
|
||||
|<-- filter this edge if dir == EDGE_VER
|
||||
| |
|
||||
+--------+
|
||||
|
||||
\endverbatim
|
||||
*
|
||||
* \param state encoder state
|
||||
* \param x x-coordinate in pixels (see above)
|
||||
* \param y y-coordinate in pixels (see above)
|
||||
* \param length length of the edge in pixels
|
||||
* \param dir direction of the edge to filter
|
||||
*/
|
||||
static void filter_deblock_edge_luma(encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t length,
|
||||
edge_dir dir)
|
||||
{
|
||||
videoframe_t * const frame = state->tile->frame;
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
|
||||
cu_info_t *cu_q = kvz_videoframe_get_cu(frame, xpos >> MIN_SIZE, ypos >> MIN_SIZE);
|
||||
|
||||
{
|
||||
// Return if called with a coordinate which is not at CU or TU boundary.
|
||||
// TODO: Add handling for asymmetric inter CU boundaries which do not coincide
|
||||
// with transform boundaries.
|
||||
const int tu_width = LCU_WIDTH >> cu_q->tr_depth;
|
||||
if (dir == EDGE_HOR && (ypos & (tu_width - 1))) return;
|
||||
if (dir == EDGE_VER && (xpos & (tu_width - 1))) return;
|
||||
}
|
||||
cu_info_t *cu_q = kvz_videoframe_get_cu(frame, x >> MIN_SIZE, y >> MIN_SIZE);
|
||||
|
||||
{
|
||||
int32_t stride = frame->rec->stride;
|
||||
int32_t offset = stride;
|
||||
int32_t beta_offset_div2 = encoder->beta_offset_div2;
|
||||
int32_t tc_offset_div2 = encoder->tc_offset_div2;
|
||||
// TODO: support 10+bits
|
||||
kvz_pixel *orig_src = &frame->rec->y[xpos + ypos*stride];
|
||||
kvz_pixel *orig_src = &frame->rec->y[x + y*stride];
|
||||
kvz_pixel *src = orig_src;
|
||||
int32_t step = 1;
|
||||
cu_info_t *cu_p = NULL;
|
||||
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
||||
int8_t strength = 0;
|
||||
int16_t x_cu = x >> MIN_SIZE;
|
||||
int16_t y_cu = y >> MIN_SIZE;
|
||||
|
||||
int8_t strength = 0;
|
||||
int32_t qp = state->global->QP;
|
||||
int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8);
|
||||
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
|
||||
int32_t beta = kvz_g_beta_table_8x8[b_index] * bitdepth_scale;
|
||||
int32_t side_threshold = (beta + (beta >>1 )) >> 3;
|
||||
uint32_t blocks_in_part = (LCU_WIDTH >> depth) / 4;
|
||||
uint32_t block_idx;
|
||||
int32_t tc_index,tc,thr_cut;
|
||||
int32_t tc_index;
|
||||
int32_t tc;
|
||||
int32_t thr_cut;
|
||||
|
||||
if (dir == EDGE_VER) {
|
||||
offset = 1;
|
||||
step = stride;
|
||||
}
|
||||
uint32_t num_4px_parts = length / 4;
|
||||
|
||||
const int32_t offset = (dir == EDGE_HOR) ? stride : 1;
|
||||
const int32_t step = (dir == EDGE_HOR) ? 1 : stride;
|
||||
|
||||
// TODO: add CU based QP calculation
|
||||
|
||||
// For each 4-pixel part in the edge
|
||||
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) {
|
||||
for (uint32_t block_idx = 0; block_idx < num_4px_parts; ++block_idx) {
|
||||
int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d;
|
||||
|
||||
{
|
||||
vector2d_t px = {
|
||||
(dir == EDGE_HOR ? xpos + block_idx * 4 : xpos),
|
||||
(dir == EDGE_VER ? ypos + block_idx * 4 : ypos)
|
||||
};
|
||||
|
||||
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
|
||||
// when processing the next LCU.
|
||||
if (block_idx > 0 && dir == EDGE_HOR && (px.x + 4) % 64 == 0 && (px.x + 4 != frame->width)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// CU in the side we are filtering, update every 8-pixels
|
||||
cu_p = kvz_videoframe_get_cu(frame, x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? block_idx>>1 : 0), y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? block_idx>>1 : 0));
|
||||
|
||||
|
@ -341,53 +397,60 @@ void kvz_filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
useStrongFiltering(offset, 2*d3, (src+step*(block_idx*4+3)));
|
||||
|
||||
// Filter four rows/columns
|
||||
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 0), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
|
||||
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 1), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
|
||||
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 2), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
|
||||
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + 3), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
kvz_filter_deblock_luma(encoder, src + step * (4*block_idx + i), offset, tc, sw, 0, 0, thr_cut, filter_P, filter_Q);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief
|
||||
* \brief Apply the deblocking filter to chroma pixels on a single edge.
|
||||
*
|
||||
* The caller should check that the edge is a TU boundary or a PU boundary.
|
||||
*
|
||||
\verbatim
|
||||
|
||||
.-- filter this edge if dir == EDGE_HOR
|
||||
v
|
||||
+--------+
|
||||
|o <-- pixel at (x, y)
|
||||
| |
|
||||
|<-- filter this edge if dir == EDGE_VER
|
||||
| |
|
||||
+--------+
|
||||
|
||||
\endverbatim
|
||||
*
|
||||
* \param state encoder state
|
||||
* \param x x-coordinate in chroma pixels (see above)
|
||||
* \param y y-coordinate in chroma pixels (see above)
|
||||
* \param length length of the edge in chroma pixels
|
||||
* \param dir direction of the edge to filter
|
||||
*/
|
||||
void kvz_filter_deblock_edge_chroma(encoder_state_t * const state,
|
||||
int32_t x, int32_t y,
|
||||
int8_t depth, int8_t dir)
|
||||
static void filter_deblock_edge_chroma(encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int32_t length,
|
||||
edge_dir dir)
|
||||
{
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
const cu_info_t *cu_q = kvz_videoframe_get_cu_const(frame, x >> (MIN_SIZE - 1), y >> (MIN_SIZE - 1));
|
||||
|
||||
// Chroma edges that do not lay on a 8x8 grid are not deblocked.
|
||||
if (depth >= MAX_DEPTH) {
|
||||
if (dir == EDGE_HOR && (y & (8 - 1))) return;
|
||||
if (dir == EDGE_VER && (x & (8 - 1))) return;
|
||||
}
|
||||
|
||||
{
|
||||
// Return if called with a coordinate which is not at CU or TU boundary.
|
||||
// TODO: Add handling for asymmetric inter CU boundaries which do not coincide
|
||||
// with transform boundaries.
|
||||
const int tu_width = (LCU_WIDTH / 2) >> cu_q->tr_depth;
|
||||
if (dir == EDGE_HOR && (y & (tu_width - 1))) return;
|
||||
if (dir == EDGE_VER && (x & (tu_width - 1))) return;
|
||||
}
|
||||
|
||||
// For each subpart
|
||||
{
|
||||
int32_t stride = frame->rec->stride >> 1;
|
||||
int32_t tc_offset_div2 = encoder->tc_offset_div2;
|
||||
// TODO: support 10+bits
|
||||
kvz_pixel *src_u = &frame->rec->u[x + y*stride];
|
||||
kvz_pixel *src_v = &frame->rec->v[x + y*stride];
|
||||
// Init offset and step to EDGE_HOR
|
||||
int32_t offset = stride;
|
||||
int32_t step = 1;
|
||||
kvz_pixel *src[] = {
|
||||
&frame->rec->u[x + y*stride],
|
||||
&frame->rec->v[x + y*stride],
|
||||
};
|
||||
const cu_info_t *cu_p = NULL;
|
||||
int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
|
||||
int16_t x_cu = x >> (MIN_SIZE-1);
|
||||
int16_t y_cu = y >> (MIN_SIZE-1);
|
||||
int8_t strength = 2;
|
||||
|
||||
int32_t QP = kvz_g_chroma_scale[state->global->QP];
|
||||
|
@ -395,42 +458,22 @@ void kvz_filter_deblock_edge_chroma(encoder_state_t * const state,
|
|||
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
|
||||
int32_t Tc = kvz_g_tc_table_8x8[TC_index]*bitdepth_scale;
|
||||
|
||||
// Special handling for depth 4. It's meaning is that we want to bypass
|
||||
// last block in LCU check in order to deblock just that block.
|
||||
uint32_t blocks_in_part= (LCU_WIDTH>>(depth == 4 ? depth : depth + 1)) / 4;
|
||||
uint32_t blk_idx;
|
||||
const uint32_t num_4px_parts = length / 4;
|
||||
|
||||
if(dir == EDGE_VER) {
|
||||
offset = 1;
|
||||
step = stride;
|
||||
}
|
||||
const int32_t offset = (dir == EDGE_HOR) ? stride : 1;
|
||||
const int32_t step = (dir == EDGE_HOR) ? 1 : stride;
|
||||
|
||||
for (blk_idx = 0; blk_idx < blocks_in_part; ++blk_idx)
|
||||
for (uint32_t blk_idx = 0; blk_idx < num_4px_parts; ++blk_idx)
|
||||
{
|
||||
vector2d_t px = {
|
||||
(dir == EDGE_HOR ? x + blk_idx * 4 : x),
|
||||
(dir == EDGE_VER ? y + blk_idx * 4 : y)
|
||||
};
|
||||
cu_p = kvz_videoframe_get_cu_const(frame, x_cu - (dir == EDGE_VER) + (dir == EDGE_HOR ? blk_idx : 0), y_cu - (dir == EDGE_HOR) + (dir == EDGE_VER ? blk_idx : 0));
|
||||
|
||||
// Don't deblock the last 4x4 block of the LCU. This will be deblocked
|
||||
// when processing the next LCU.
|
||||
if (depth != 4 && dir == EDGE_HOR && (px.x + 4) % 32 == 0 && (px.x + 4 != frame->width / 2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only filter when strenght == 2 (one of the blocks is intra coded)
|
||||
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
|
||||
// Chroma U
|
||||
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 0), offset, Tc, 0, 0);
|
||||
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 1), offset, Tc, 0, 0);
|
||||
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 2), offset, Tc, 0, 0);
|
||||
kvz_filter_deblock_chroma(encoder, src_u + step * (4*blk_idx + 3), offset, Tc, 0, 0);
|
||||
// Chroma V
|
||||
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 0), offset, Tc, 0, 0);
|
||||
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 1), offset, Tc, 0, 0);
|
||||
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 2), offset, Tc, 0, 0);
|
||||
kvz_filter_deblock_chroma(encoder, src_v + step * (4*blk_idx + 3), offset, Tc, 0, 0);
|
||||
for (int component = 0; component < 2; component++) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
kvz_filter_deblock_chroma(encoder, src[component] + step * (4*blk_idx + i), offset, Tc, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -438,88 +481,154 @@ void kvz_filter_deblock_edge_chroma(encoder_state_t * const state,
|
|||
|
||||
/**
|
||||
* \brief function to split LCU into smaller CU blocks
|
||||
* \param encoder the encoder info structure
|
||||
* \param xCtb block x-position (as SCU)
|
||||
* \param yCtb block y-position (as SCU)
|
||||
* \param depth block depth
|
||||
* \param edge which edge we are filtering
|
||||
*
|
||||
* This function takes (SCU) block position as input and splits the block
|
||||
* until the coded block size has been achived. Calls luma and chroma filtering
|
||||
* functions for each coded CU size.
|
||||
* \param encoder the encoder info structure
|
||||
* \param x_px block x-position in pixels
|
||||
* \param y_px block y-position in pixels
|
||||
* \param depth block depth
|
||||
* \param dir direction of the edges to filter
|
||||
*
|
||||
* Recursively traverse the CU/TU quadtree. At the lowest level, apply the
|
||||
* deblocking filter to the left edge (when dir == EDGE_VER) or the top edge
|
||||
* (when dir == EDGE_HOR) as needed. Both luma and chroma are filtered.
|
||||
*/
|
||||
void kvz_filter_deblock_cu(encoder_state_t * const state, int32_t x, int32_t y, int8_t depth, int32_t edge)
|
||||
static void filter_deblock_cu(encoder_state_t * const state,
|
||||
int32_t x,
|
||||
int32_t y,
|
||||
int8_t depth,
|
||||
edge_dir dir)
|
||||
{
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x, y);
|
||||
uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
|
||||
uint8_t tr_split = (cur_cu->tr_depth > depth) ? 1 : 0;
|
||||
uint8_t border_x = (frame->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
|
||||
uint8_t border_y = (frame->height < y*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
|
||||
uint8_t border_split_x = (frame->width < ((x + 1) * (LCU_WIDTH >> MAX_DEPTH)) + (LCU_WIDTH >> (depth + 1))) ? 0 : 1;
|
||||
uint8_t border_split_y = (frame->height < ((y + 1) * (LCU_WIDTH >> MAX_DEPTH)) + (LCU_WIDTH >> (depth + 1))) ? 0 : 1;
|
||||
const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame,
|
||||
x >> MAX_DEPTH,
|
||||
y >> MAX_DEPTH);
|
||||
|
||||
uint8_t border = border_x | border_y; // are we in any border CU?
|
||||
const int cu_width = LCU_WIDTH >> depth;
|
||||
const int half_cu_width = cu_width >> 1;
|
||||
const int scu_width = LCU_WIDTH >> MAX_DEPTH;
|
||||
const bool split_flag = cur_cu->depth > depth;
|
||||
const bool tr_split = cur_cu->tr_depth > depth;
|
||||
const bool border_x = frame->width < x + cu_width;
|
||||
const bool border_y = frame->height < y + cu_width;
|
||||
const bool border_split_x = frame->width >= x + scu_width + half_cu_width;
|
||||
const bool border_split_y = frame->height >= y + scu_width + half_cu_width;
|
||||
const bool border = border_x || border_y; // are we in any border CU?
|
||||
|
||||
// split 64x64, on split flag and on border
|
||||
if (depth < MAX_DEPTH && (depth == 0 || split_flag || border || tr_split)) {
|
||||
// Split the four sub-blocks of this block recursively.
|
||||
uint8_t change;
|
||||
assert(depth >= 0); // for clang-analyzer
|
||||
change = 1 << (MAX_DEPTH - 1 - depth);
|
||||
const int32_t x2 = x + half_cu_width;
|
||||
const int32_t y2 = y + half_cu_width;
|
||||
|
||||
kvz_filter_deblock_cu(state, x, y, depth + 1, edge);
|
||||
if(!border_x || border_split_x) {
|
||||
kvz_filter_deblock_cu(state, x + change, y, depth + 1, edge);
|
||||
filter_deblock_cu(state, x, y, depth + 1, dir);
|
||||
if (!border_x || border_split_x) {
|
||||
filter_deblock_cu(state, x2, y, depth + 1, dir);
|
||||
}
|
||||
if(!border_y || border_split_y) {
|
||||
kvz_filter_deblock_cu(state, x , y + change, depth + 1, edge);
|
||||
if (!border_y || border_split_y) {
|
||||
filter_deblock_cu(state, x, y2, depth + 1, dir);
|
||||
}
|
||||
if((!border_x && !border_y) || (border_split_x && border_split_y)) {
|
||||
kvz_filter_deblock_cu(state, x + change, y + change, depth + 1, edge);
|
||||
if (!border || (border_split_x && border_split_y)) {
|
||||
filter_deblock_cu(state, x2, y2, depth + 1, dir);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// no filtering on borders (where filter would use pixels outside the picture)
|
||||
if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return;
|
||||
if ((x == 0 && dir == EDGE_VER) || (y == 0 && dir == EDGE_HOR)) return;
|
||||
|
||||
// do the filtering for block edge
|
||||
kvz_filter_deblock_edge_luma(state, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge);
|
||||
kvz_filter_deblock_edge_chroma(state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
|
||||
if (is_tu_boundary(state, x, y, dir)) {
|
||||
// Length of luma and chroma edges.
|
||||
int32_t length;
|
||||
int32_t length_c;
|
||||
|
||||
const int32_t x_right = x + cu_width;
|
||||
const bool rightmost_4px_of_lcu = x_right % LCU_WIDTH == 0;
|
||||
const bool rightmost_4px_of_frame = x_right == frame->width;
|
||||
|
||||
if (dir == EDGE_HOR &&
|
||||
rightmost_4px_of_lcu &&
|
||||
!rightmost_4px_of_frame) {
|
||||
// The last 4 pixels will be deblocked when processing the next LCU.
|
||||
length = cu_width - 4;
|
||||
length_c = half_cu_width - 4;
|
||||
|
||||
} else {
|
||||
length = cu_width;
|
||||
length_c = half_cu_width;
|
||||
}
|
||||
|
||||
filter_deblock_edge_luma(state, x, y, length, dir);
|
||||
|
||||
// Chroma pixel coordinates.
|
||||
const int32_t x_c = x >> 1;
|
||||
const int32_t y_c = y >> 1;
|
||||
if (is_on_8x8_grid(x_c, y_c, dir)) {
|
||||
filter_deblock_edge_chroma(state, x_c, y_c, length_c, dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Deblock a single LCU without using data from right or down.
|
||||
*
|
||||
* Filter all the following edges:
|
||||
* - All edges within the LCU, except for the last 4 pixels on the right when
|
||||
* using horizontal filtering.
|
||||
* - Left edge and top edge.
|
||||
* - After vertical filtering the left edge, filter the last 4 pixels of
|
||||
* horizontal edges in the LCU to the left.
|
||||
* Filter the following vertical edges (horizontal filtering):
|
||||
* 1. The left edge of the LCU.
|
||||
* 2. All vertical edges within the LCU.
|
||||
*
|
||||
* Filter the following horizontal edges (vertical filtering):
|
||||
* 1. The rightmost 4 pixels of the top edge of the LCU to the left.
|
||||
* 2. The rightmost 4 pixels of all horizontal edges within the LCU to the
|
||||
* left.
|
||||
* 3. The top edge and all horizontal edges within the LCU, excluding the
|
||||
* rightmost 4 pixels. If the LCU is the rightmost LCU of the frame, the
|
||||
* last 4 pixels are also filtered.
|
||||
*
|
||||
* What is not filtered:
|
||||
* - The rightmost 4 pixels of the top edge and all horizontal edges within
|
||||
* the LCU, unless the LCU is the rightmost LCU of the frame.
|
||||
* - The bottom edge of the LCU.
|
||||
* - The right edge of the LCU.
|
||||
*
|
||||
* \param state encoder state
|
||||
* \param x_px x-coordinate of the left edge of the LCU in pixels
|
||||
* \param y_px y-coordinate of the top edge of the LCU in pixels
|
||||
*/
|
||||
void kvz_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px)
|
||||
{
|
||||
const vector2d_t lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH };
|
||||
|
||||
kvz_filter_deblock_cu(state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
|
||||
filter_deblock_cu(state, x_px, y_px, 0, EDGE_VER);
|
||||
|
||||
assert(x_px == lcu.x * LCU_WIDTH);
|
||||
assert(y_px == lcu.y * LCU_WIDTH);
|
||||
|
||||
// Filter rightmost 4 pixels from last LCU now that they have been
|
||||
// finally deblocked vertically.
|
||||
if (lcu.x > 0) {
|
||||
int y;
|
||||
for (y = 0; y < 64; y += 8) {
|
||||
if (lcu.y + y == 0) continue;
|
||||
kvz_filter_deblock_edge_luma(state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
|
||||
if (x_px > 0) {
|
||||
// Luma
|
||||
const int x = x_px - 4;
|
||||
const int end = MIN(y_px + LCU_WIDTH, state->tile->frame->height);
|
||||
for (int y = y_px; y < end; y += 8) {
|
||||
// The top edge of the whole frame is not filtered.
|
||||
if (y > 0 && is_tu_boundary(state, x, y, EDGE_HOR)) {
|
||||
filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR);
|
||||
}
|
||||
}
|
||||
for (y = 0; y < 32; y += 8) {
|
||||
if (lcu.y + y == 0) continue;
|
||||
kvz_filter_deblock_edge_chroma(state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
|
||||
|
||||
// Chroma
|
||||
const int x_px_c = x_px >> 1;
|
||||
const int y_px_c = y_px >> 1;
|
||||
const int x_c = x_px_c - 4;
|
||||
const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
|
||||
for (int y_c = y_px_c; y_c < end_c; y_c += 8) {
|
||||
// The top edge of the whole frame is not filtered.
|
||||
if (y_c > 0 && is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR)) {
|
||||
filter_deblock_edge_chroma(state, x_c, y_c, 4, EDGE_HOR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kvz_filter_deblock_cu(state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
|
||||
filter_deblock_cu(state, x_px, y_px, 0, EDGE_HOR);
|
||||
}
|
||||
|
||||
|
|
33
src/filter.h
33
src/filter.h
|
@ -31,30 +31,15 @@
|
|||
#include "encoderstate.h"
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// FUNCTIONS
|
||||
// Deblocking
|
||||
void kvz_filter_deblock_cu(encoder_state_t *state, int32_t x_px, int32_t y_px,
|
||||
int8_t depth, int32_t edge);
|
||||
void kvz_filter_deblock_edge_luma(encoder_state_t *state,
|
||||
int32_t x_pos, int32_t y_pos,
|
||||
int8_t depth, int8_t dir);
|
||||
void kvz_filter_deblock_edge_chroma(encoder_state_t *state,
|
||||
int32_t xpos, int32_t ypos,
|
||||
int8_t depth, int8_t dir);
|
||||
/**
|
||||
* \brief Edge direction.
|
||||
*/
|
||||
typedef enum edge_dir {
|
||||
EDGE_VER = 0, // vertical
|
||||
EDGE_HOR = 1, // horizontal
|
||||
} edge_dir;
|
||||
|
||||
|
||||
void kvz_filter_deblock_lcu(encoder_state_t *state, int x_px, int y_px);
|
||||
void kvz_filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc , int8_t sw,
|
||||
int8_t part_p_nofilter, int8_t part_q_nofilter,
|
||||
int32_t thr_cut,
|
||||
int8_t filter_second_p, int8_t filter_second_q);
|
||||
void kvz_filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc,
|
||||
int8_t part_p_nofilter, int8_t part_q_nofilter);
|
||||
|
||||
// SAO
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// MACROS
|
||||
#define EDGE_VER 0
|
||||
#define EDGE_HOR 1
|
||||
|
||||
#endif
|
||||
|
|
34
src/inter.c
34
src/inter.c
|
@ -395,15 +395,12 @@ void kvz_inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_p
|
|||
* \param cu coding unit to clear
|
||||
*/
|
||||
static void inter_clear_cu_unused(cu_info_t* cu) {
|
||||
if(!(cu->inter.mv_dir & 1)) {
|
||||
cu->inter.mv[0][0] = 0;
|
||||
cu->inter.mv[0][1] = 0;
|
||||
cu->inter.mv_ref[0] = 255;
|
||||
}
|
||||
if(!(cu->inter.mv_dir & 2)) {
|
||||
cu->inter.mv[1][0] = 0;
|
||||
cu->inter.mv[1][1] = 0;
|
||||
cu->inter.mv_ref[1] = 255;
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
if (cu->inter.mv_dir & (1 << i)) continue;
|
||||
|
||||
cu->inter.mv[i][0] = 0;
|
||||
cu->inter.mv[i][1] = 0;
|
||||
cu->inter.mv_ref[i] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -433,17 +430,16 @@ void kvz_inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth,
|
|||
|A1|_________|
|
||||
|A0|
|
||||
*/
|
||||
int32_t x_cu = (x & (LCU_WIDTH - 1)) >> MAX_DEPTH; //!< coordinates from top-left of this LCU
|
||||
int32_t y_cu = (y & (LCU_WIDTH - 1)) >> MAX_DEPTH;
|
||||
cu_info_t* cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
int32_t x_cu = SUB_SCU(x) >> MAX_DEPTH; //!< coordinates from top-left of this LCU
|
||||
int32_t y_cu = SUB_SCU(y) >> MAX_DEPTH;
|
||||
// A0 and A1 availability testing
|
||||
if (x != 0) {
|
||||
*a1 = &cu[x_cu - 1 + (y_cu + cur_block_in_scu - 1) * LCU_T_CU_WIDTH];
|
||||
*a1 = LCU_GET_CU(lcu, x_cu - 1, y_cu + cur_block_in_scu - 1);
|
||||
if (!(*a1)->coded) *a1 = NULL;
|
||||
if(*a1) inter_clear_cu_unused(*a1);
|
||||
|
||||
if (y_cu + cur_block_in_scu < LCU_WIDTH>>3) {
|
||||
*a0 = &cu[x_cu - 1 + (y_cu + cur_block_in_scu) * LCU_T_CU_WIDTH];
|
||||
*a0 = LCU_GET_CU(lcu, x_cu - 1, y_cu + cur_block_in_scu);
|
||||
if (!(*a0)->coded) *a0 = NULL;
|
||||
}
|
||||
if(*a0) inter_clear_cu_unused(*a0);
|
||||
|
@ -452,21 +448,21 @@ void kvz_inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth,
|
|||
// B0, B1 and B2 availability testing
|
||||
if (y != 0) {
|
||||
if (x_cu + cur_block_in_scu < LCU_WIDTH>>3) {
|
||||
*b0 = &cu[x_cu + cur_block_in_scu + (y_cu - 1) * LCU_T_CU_WIDTH];
|
||||
*b0 = LCU_GET_CU(lcu, x_cu + cur_block_in_scu, y_cu - 1);
|
||||
if (!(*b0)->coded) *b0 = NULL;
|
||||
} else if(y_cu == 0) {
|
||||
// Special case, top-right cu from LCU is the last in lcu->cu array
|
||||
*b0 = &lcu->cu[LCU_T_CU_WIDTH*LCU_T_CU_WIDTH];
|
||||
// Special case, top-right CU
|
||||
*b0 = LCU_GET_TOP_RIGHT_CU(lcu);
|
||||
if (!(*b0)->coded) *b0 = NULL;
|
||||
}
|
||||
if(*b0) inter_clear_cu_unused(*b0);
|
||||
|
||||
*b1 = &cu[x_cu + cur_block_in_scu - 1 + (y_cu - 1) * LCU_T_CU_WIDTH];
|
||||
*b1 = LCU_GET_CU(lcu, x_cu + cur_block_in_scu - 1, y_cu - 1);
|
||||
if (!(*b1)->coded) *b1 = NULL;
|
||||
if(*b1) inter_clear_cu_unused(*b1);
|
||||
|
||||
if (x != 0) {
|
||||
*b2 = &cu[x_cu - 1 + (y_cu - 1) * LCU_T_CU_WIDTH];
|
||||
*b2 = LCU_GET_CU(lcu, x_cu - 1, y_cu - 1);
|
||||
if(!(*b2)->coded) *b2 = NULL;
|
||||
}
|
||||
if(*b2) inter_clear_cu_unused(*b2);
|
||||
|
|
20
src/intra.c
20
src/intra.c
|
@ -485,9 +485,9 @@ void kvz_intra_recon_lcu_luma(
|
|||
cu_info_t *cur_cu,
|
||||
lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { x & 0x3f, y & 0x3f };
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
if (cur_cu == NULL) {
|
||||
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
|
||||
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
}
|
||||
const int8_t width = LCU_WIDTH >> depth;
|
||||
|
||||
|
@ -500,9 +500,9 @@ void kvz_intra_recon_lcu_luma(
|
|||
kvz_intra_recon_lcu_luma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu);
|
||||
|
||||
if (depth < MAX_DEPTH) {
|
||||
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
|
||||
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
|
||||
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
|
||||
if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) {
|
||||
cbf_set(&cur_cu->cbf.y, depth);
|
||||
}
|
||||
|
@ -537,12 +537,12 @@ void kvz_intra_recon_lcu_chroma(
|
|||
cu_info_t *cur_cu,
|
||||
lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { x & 0x3f, y & 0x3f };
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
const int8_t width = LCU_WIDTH >> depth;
|
||||
const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
|
||||
|
||||
if (cur_cu == NULL) {
|
||||
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
|
||||
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
}
|
||||
|
||||
if (depth == 0 || cur_cu->tr_depth > depth) {
|
||||
|
@ -554,9 +554,9 @@ void kvz_intra_recon_lcu_chroma(
|
|||
kvz_intra_recon_lcu_chroma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu);
|
||||
|
||||
if (depth < MAX_DEPTH) {
|
||||
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
|
||||
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
|
||||
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
|
||||
if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) {
|
||||
cbf_set(&cur_cu->cbf.u, depth);
|
||||
}
|
||||
|
|
69
src/search.c
69
src/search.c
|
@ -82,8 +82,8 @@ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX
|
|||
int x, y;
|
||||
for (y = y_cu; y < y_cu + width_cu; ++y) {
|
||||
for (x = x_cu; x < x_cu + width_cu; ++x) {
|
||||
const cu_info_t *from_cu = &work_tree[depth + 1].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
|
||||
cu_info_t *to_cu = &work_tree[depth].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
|
||||
const cu_info_t *from_cu = LCU_GET_CU(&work_tree[depth + 1], x, y);
|
||||
cu_info_t *to_cu = LCU_GET_CU(&work_tree[depth], x, y);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
}
|
||||
|
@ -142,8 +142,8 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M
|
|||
int x, y;
|
||||
for (y = y_cu; y < y_cu + width_cu; ++y) {
|
||||
for (x = x_cu; x < x_cu + width_cu; ++x) {
|
||||
const cu_info_t *from_cu = &work_tree[depth].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
|
||||
cu_info_t *to_cu = &work_tree[d].cu[LCU_CU_OFFSET + x + y * LCU_T_CU_WIDTH];
|
||||
const cu_info_t *from_cu = LCU_GET_CU(&work_tree[depth], x, y);
|
||||
cu_info_t *to_cu = LCU_GET_CU(&work_tree[d], x, y);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
}
|
||||
|
@ -173,16 +173,15 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M
|
|||
void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth)
|
||||
{
|
||||
const int width_cu = LCU_CU_WIDTH >> depth;
|
||||
const vector2d_t lcu_cu = { (x_px & (LCU_WIDTH - 1)) / 8, (y_px & (LCU_WIDTH - 1)) / 8 };
|
||||
cu_info_t *const cur_cu = &lcu->cu[lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH + LCU_CU_OFFSET];
|
||||
const vector2d_t lcu_cu = { SUB_SCU(x_px) / 8, SUB_SCU(y_px) / 8 };
|
||||
int x, y;
|
||||
|
||||
// Depth 4 doesn't go inside the loop. Set the top-left CU.
|
||||
cur_cu->tr_depth = tr_depth;
|
||||
LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth;
|
||||
|
||||
for (y = 0; y < width_cu; ++y) {
|
||||
for (x = 0; x < width_cu; ++x) {
|
||||
cu_info_t *cu = &cur_cu[x + y * LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu = LCU_GET_CU(lcu, lcu_cu.x + x, lcu_cu.y + y);
|
||||
cu->tr_depth = tr_depth;
|
||||
}
|
||||
}
|
||||
|
@ -194,12 +193,11 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
|
|||
const int width_cu = LCU_CU_WIDTH >> depth;
|
||||
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
|
||||
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
|
||||
cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
int x, y;
|
||||
|
||||
// NxN can only be applied to a single CU at a time.
|
||||
if (part_mode == SIZE_NxN) {
|
||||
cu_info_t *cu = &lcu_cu[x_cu + y_cu * LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu = LCU_GET_CU(lcu, x_cu, y_cu);
|
||||
cu->depth = MAX_DEPTH;
|
||||
cu->type = CU_INTRA;
|
||||
cu->intra[PU_INDEX(x_px / 4, y_px / 4)].mode = pred_mode;
|
||||
|
@ -211,7 +209,7 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
|
|||
// Set mode in every CU covered by part_mode in this depth.
|
||||
for (y = y_cu; y < y_cu + width_cu; ++y) {
|
||||
for (x = x_cu; x < x_cu + width_cu; ++x) {
|
||||
cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu = LCU_GET_CU(lcu, x, y);
|
||||
cu->depth = depth;
|
||||
cu->type = CU_INTRA;
|
||||
cu->intra[0].mode = pred_mode;
|
||||
|
@ -231,12 +229,11 @@ static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *
|
|||
const int width_cu = LCU_CU_WIDTH >> depth;
|
||||
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
|
||||
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
|
||||
cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
int x, y;
|
||||
// Set mode in every CU covered by part_mode in this depth.
|
||||
for (y = y_cu; y < y_cu + width_cu; ++y) {
|
||||
for (x = x_cu; x < x_cu + width_cu; ++x) {
|
||||
cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu = LCU_GET_CU(lcu, x, y);
|
||||
//Check if this could be moved inside the if
|
||||
cu->coded = 1;
|
||||
if (cu != cur_cu) {
|
||||
|
@ -257,17 +254,16 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *
|
|||
const int width_cu = LCU_CU_WIDTH >> depth;
|
||||
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
|
||||
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
|
||||
cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
int x, y;
|
||||
int tr_split = cur_cu->tr_depth-cur_cu->depth;
|
||||
|
||||
// Set coeff flags in every CU covered by part_mode in this depth.
|
||||
for (y = y_cu; y < y_cu + width_cu; ++y) {
|
||||
for (x = x_cu; x < x_cu + width_cu; ++x) {
|
||||
cu_info_t *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu = LCU_GET_CU(lcu, x, y);
|
||||
// Use TU top-left CU to propagate coeff flags
|
||||
uint32_t mask = ~((width_cu>>tr_split)-1);
|
||||
cu_info_t *cu_from = &lcu_cu[(x & mask) + (y & mask) * LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_from = LCU_GET_CU(lcu, x & mask, y & mask);
|
||||
if (cu != cu_from) {
|
||||
// Chroma coeff data is not used, luma is needed for deblocking
|
||||
cu->cbf.y = cu_from->cbf.y;
|
||||
|
@ -295,7 +291,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
|
|||
const uint8_t pu_index = PU_INDEX(x_px / 4, y_px / 4);
|
||||
|
||||
// cur_cu is used for TU parameters.
|
||||
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (x_px / 8) + (y_px / 8) * LCU_T_CU_WIDTH];
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
|
||||
|
||||
double coeff_bits = 0;
|
||||
double tr_tree_bits = 0;
|
||||
|
@ -368,7 +364,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
|
|||
{
|
||||
const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
|
||||
const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
|
||||
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x / 4) + (lcu_px.y / 4)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *const tr_cu = LCU_GET_CU(lcu, lcu_px.x / 4, lcu_px.y / 4);
|
||||
|
||||
double tr_tree_bits = 0;
|
||||
double coeff_bits = 0;
|
||||
|
@ -450,8 +446,8 @@ static double calc_mode_bits(const encoder_state_t *state,
|
|||
} else {
|
||||
int8_t candidate_modes[3];
|
||||
{
|
||||
const cu_info_t *left_cu = ((x > 8) ? &cur_cu[-1] : NULL);
|
||||
const cu_info_t *above_cu = ((y > 8) ? &cur_cu[-LCU_T_CU_WIDTH] : NULL);
|
||||
const cu_info_t *left_cu = ((x > 8) ? CU_GET_CU(cur_cu, -1, 0) : NULL);
|
||||
const cu_info_t *above_cu = ((y > 8) ? CU_GET_CU(cur_cu, 0, -1) : NULL);
|
||||
kvz_intra_get_dir_luma_predictor(x, y, candidate_modes, cur_cu, left_cu, above_cu);
|
||||
}
|
||||
|
||||
|
@ -467,10 +463,9 @@ static double calc_mode_bits(const encoder_state_t *state,
|
|||
|
||||
static uint8_t get_ctx_cu_split_model(const lcu_t *lcu, int x, int y, int depth)
|
||||
{
|
||||
vector2d_t lcu_cu = { (x & 0x3f) / 8, (y & 0x3f) / 8 };
|
||||
const cu_info_t *cu_array = &(lcu)->cu[LCU_CU_OFFSET];
|
||||
bool condA = x >= 8 && cu_array[(lcu_cu.x - 1) + lcu_cu.y * LCU_T_CU_WIDTH].depth > depth;
|
||||
bool condL = y >= 8 && cu_array[lcu_cu.x + (lcu_cu.y - 1) * LCU_T_CU_WIDTH].depth > depth;
|
||||
vector2d_t lcu_cu = { SUB_SCU(x) / 8, SUB_SCU(y) / 8 };
|
||||
bool condA = x >= 8 && LCU_GET_CU(lcu, lcu_cu.x - 1, lcu_cu.y )->depth > depth;
|
||||
bool condL = y >= 8 && LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y - 1)->depth > depth;
|
||||
return condA + condL;
|
||||
}
|
||||
|
||||
|
@ -494,7 +489,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
|
||||
lcu_t *const lcu = &work_tree[depth];
|
||||
|
||||
int x_local = (x&0x3f), y_local = (y&0x3f);
|
||||
int x_local = SUB_SCU(x);
|
||||
int y_local = SUB_SCU(y);
|
||||
#ifdef KVZ_DEBUG
|
||||
int debug_split = 0;
|
||||
#endif
|
||||
|
@ -506,7 +502,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
return 0;
|
||||
}
|
||||
|
||||
cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
|
||||
cur_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x_local, y_local);
|
||||
// Assign correct depth
|
||||
cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
|
||||
cur_cu->tr_depth = depth > 0 ? depth : 1;
|
||||
|
@ -647,8 +643,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
&& x + cu_width <= frame->width && y + cu_width <= frame->height)
|
||||
{
|
||||
vector2d_t lcu_cu = { x_local / 8, y_local / 8 };
|
||||
cu_info_t *cu_array_d1 = &(&work_tree[depth + 1])->cu[LCU_CU_OFFSET];
|
||||
cu_info_t *cu_d1 = &cu_array_d1[(lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH)];
|
||||
cu_info_t *cu_d1 = LCU_GET_CU(&work_tree[depth + 1], lcu_cu.x, lcu_cu.y);
|
||||
|
||||
// If the best CU in depth+1 is intra and the biggest it can be, try it.
|
||||
if (cu_d1->type == CU_INTRA && cu_d1->depth == depth + 1) {
|
||||
|
@ -715,16 +710,12 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i
|
|||
const int x_cu = x >> MAX_DEPTH;
|
||||
const int y_cu = y >> MAX_DEPTH;
|
||||
|
||||
// Use top-left sub-cu of LCU as pointer to lcu->cu array to make things
|
||||
// simpler.
|
||||
cu_info_t *lcu_cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
|
||||
// Copy top CU row.
|
||||
if (y_cu > 0) {
|
||||
int i;
|
||||
for (i = 0; i < LCU_CU_WIDTH; ++i) {
|
||||
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu + i, y_cu - 1);
|
||||
cu_info_t *to_cu = &lcu_cu[i - LCU_T_CU_WIDTH];
|
||||
cu_info_t *to_cu = LCU_GET_CU(lcu, i, -1);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
}
|
||||
|
@ -733,21 +724,21 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i
|
|||
int i;
|
||||
for (i = 0; i < LCU_CU_WIDTH; ++i) {
|
||||
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu - 1, y_cu + i);
|
||||
cu_info_t *to_cu = &lcu_cu[-1 + i * LCU_T_CU_WIDTH];
|
||||
cu_info_t *to_cu = LCU_GET_CU(lcu, -1, i);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
}
|
||||
// Copy top-left CU.
|
||||
if (x_cu > 0 && y_cu > 0) {
|
||||
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu - 1, y_cu - 1);
|
||||
cu_info_t *to_cu = &lcu_cu[-1 - LCU_T_CU_WIDTH];
|
||||
cu_info_t *to_cu = LCU_GET_CU(lcu, -1, -1);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
|
||||
// Copy top-right CU.
|
||||
if (y_cu > 0 && x + LCU_WIDTH < frame->width) {
|
||||
const cu_info_t *from_cu = kvz_videoframe_get_cu_const(frame, x_cu + LCU_CU_WIDTH, y_cu - 1);
|
||||
cu_info_t *to_cu = &lcu->cu[LCU_T_CU_WIDTH*LCU_T_CU_WIDTH];
|
||||
cu_info_t *to_cu = LCU_GET_TOP_RIGHT_CU(lcu);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
}
|
||||
|
@ -806,14 +797,10 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i
|
|||
const int y_cu = y_px >> MAX_DEPTH;
|
||||
videoframe_t * const frame = state->tile->frame;
|
||||
|
||||
// Use top-left sub-cu of LCU as pointer to lcu->cu array to make things
|
||||
// simpler.
|
||||
const cu_info_t *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
|
||||
int x, y;
|
||||
for (y = 0; y < LCU_CU_WIDTH; ++y) {
|
||||
for (x = 0; x < LCU_CU_WIDTH; ++x) {
|
||||
const cu_info_t *from_cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
|
||||
const cu_info_t *from_cu = LCU_GET_CU(lcu, x, y);
|
||||
cu_info_t *to_cu = kvz_videoframe_get_cu(frame, x_cu + x, y_cu + y);
|
||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
||||
}
|
||||
|
|
|
@ -951,6 +951,127 @@ static unsigned search_frac(const encoder_state_t * const state,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Perform inter search for a single reference frame.
|
||||
*/
|
||||
static void search_cu_inter_ref(const encoder_state_t * const state,
|
||||
int x, int y, int depth,
|
||||
lcu_t *lcu, cu_info_t *cur_cu,
|
||||
int16_t mv_cand[2][2],
|
||||
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
||||
int16_t num_cand,
|
||||
unsigned ref_idx,
|
||||
uint32_t(*get_mvd_cost)(vector2d_t *, cabac_data_t*))
|
||||
{
|
||||
const int x_cu = x >> 3;
|
||||
const int y_cu = y >> 3;
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
kvz_picture *ref_image = state->global->ref->images[ref_idx];
|
||||
uint32_t temp_bitcost = 0;
|
||||
uint32_t temp_cost = 0;
|
||||
vector2d_t orig, mvd;
|
||||
int32_t merged = 0;
|
||||
uint8_t cu_mv_cand = 0;
|
||||
int8_t merge_idx = 0;
|
||||
int8_t ref_list = state->global->refmap[ref_idx].list-1;
|
||||
int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list];
|
||||
orig.x = x_cu * CU_MIN_SIZE_PIXELS;
|
||||
orig.y = y_cu * CU_MIN_SIZE_PIXELS;
|
||||
// Get MV candidates
|
||||
cur_cu->inter.mv_ref[ref_list] = ref_idx;
|
||||
kvz_inter_get_mv_cand(state, x, y, depth, mv_cand, cur_cu, lcu, ref_list);
|
||||
cur_cu->inter.mv_ref[ref_list] = temp_ref_idx;
|
||||
|
||||
|
||||
vector2d_t mv = { 0, 0 };
|
||||
{
|
||||
// Take starting point for MV search from previous frame.
|
||||
// When temporal motion vector candidates are added, there is probably
|
||||
// no point to this anymore, but for now it helps.
|
||||
int mid_x_cu = (x + (LCU_WIDTH >> (depth+1))) / 8;
|
||||
int mid_y_cu = (y + (LCU_WIDTH >> (depth+1))) / 8;
|
||||
cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)];
|
||||
if (ref_cu->type == CU_INTER) {
|
||||
if (ref_cu->inter.mv_dir & 1) {
|
||||
mv.x = ref_cu->inter.mv[0][0];
|
||||
mv.y = ref_cu->inter.mv[0][1];
|
||||
} else {
|
||||
mv.x = ref_cu->inter.mv[1][0];
|
||||
mv.y = ref_cu->inter.mv[1][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if SEARCH_MV_FULL_RADIUS
|
||||
temp_cost += search_mv_full(depth, frame, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
#else
|
||||
switch (state->encoder_control->cfg->ime_algorithm) {
|
||||
case KVZ_IME_TZ:
|
||||
temp_cost += tz_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
break;
|
||||
|
||||
default:
|
||||
temp_cost += hexagon_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (state->encoder_control->cfg->fme_level > 0) {
|
||||
temp_cost = search_frac(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
}
|
||||
|
||||
merged = 0;
|
||||
// Check every candidate to find a match
|
||||
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
|
||||
if (merge_cand[merge_idx].dir != 3 &&
|
||||
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x &&
|
||||
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y &&
|
||||
(uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
|
||||
merged = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Only check when candidates are different
|
||||
if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) {
|
||||
vector2d_t mvd_temp1, mvd_temp2;
|
||||
int cand1_cost,cand2_cost;
|
||||
|
||||
mvd_temp1.x = mv.x - mv_cand[0][0];
|
||||
mvd_temp1.y = mv.y - mv_cand[0][1];
|
||||
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
|
||||
|
||||
mvd_temp2.x = mv.x - mv_cand[1][0];
|
||||
mvd_temp2.y = mv.y - mv_cand[1][1];
|
||||
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
|
||||
|
||||
// Select candidate 1 if it has lower cost
|
||||
if (cand2_cost < cand1_cost) {
|
||||
cu_mv_cand = 1;
|
||||
}
|
||||
}
|
||||
mvd.x = mv.x - mv_cand[cu_mv_cand][0];
|
||||
mvd.y = mv.y - mv_cand[cu_mv_cand][1];
|
||||
|
||||
if(temp_cost < cur_cu->inter.cost) {
|
||||
|
||||
// Map reference index to L0/L1 pictures
|
||||
cur_cu->inter.mv_dir = ref_list+1;
|
||||
cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx;
|
||||
|
||||
cur_cu->merged = merged;
|
||||
cur_cu->merge_idx = merge_idx;
|
||||
cur_cu->inter.mv_ref[ref_list] = ref_idx;
|
||||
cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x;
|
||||
cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y;
|
||||
cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x;
|
||||
cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y;
|
||||
cur_cu->inter.cost = temp_cost;
|
||||
cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list];
|
||||
cur_cu->inter.mv_cand[ref_list] = cu_mv_cand;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Update lcu to have best modes at this depth.
|
||||
* \return Cost of best mode.
|
||||
|
@ -959,12 +1080,9 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
|
|||
{
|
||||
const videoframe_t * const frame = state->tile->frame;
|
||||
uint32_t ref_idx = 0;
|
||||
int x_local = (x&0x3f), y_local = (y&0x3f);
|
||||
int x_cu = x>>3;
|
||||
int y_cu = y>>3;
|
||||
int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH;
|
||||
|
||||
cu_info_t *cur_cu = &lcu->cu[cu_pos];
|
||||
int x_local = SUB_SCU(x);
|
||||
int y_local = SUB_SCU(y);
|
||||
cu_info_t *cur_cu = LCU_GET_CU(lcu, x_local >> 3, y_local >> 3);
|
||||
|
||||
int16_t mv_cand[2][2];
|
||||
// Search for merge mode candidate
|
||||
|
@ -991,108 +1109,12 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
|
|||
cur_cu->inter.cost = UINT_MAX;
|
||||
|
||||
for (ref_idx = 0; ref_idx < state->global->ref->used_size; ref_idx++) {
|
||||
kvz_picture *ref_image = state->global->ref->images[ref_idx];
|
||||
uint32_t temp_bitcost = 0;
|
||||
uint32_t temp_cost = 0;
|
||||
vector2d_t orig, mvd;
|
||||
int32_t merged = 0;
|
||||
uint8_t cu_mv_cand = 0;
|
||||
int8_t merge_idx = 0;
|
||||
int8_t ref_list = state->global->refmap[ref_idx].list-1;
|
||||
int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list];
|
||||
orig.x = x_cu * CU_MIN_SIZE_PIXELS;
|
||||
orig.y = y_cu * CU_MIN_SIZE_PIXELS;
|
||||
// Get MV candidates
|
||||
cur_cu->inter.mv_ref[ref_list] = ref_idx;
|
||||
kvz_inter_get_mv_cand(state, x, y, depth, mv_cand, cur_cu, lcu, ref_list);
|
||||
cur_cu->inter.mv_ref[ref_list] = temp_ref_idx;
|
||||
|
||||
vector2d_t mv = { 0, 0 };
|
||||
{
|
||||
// Take starting point for MV search from previous frame.
|
||||
// When temporal motion vector candidates are added, there is probably
|
||||
// no point to this anymore, but for now it helps.
|
||||
int mid_x_cu = (x + (LCU_WIDTH >> (depth+1))) / 8;
|
||||
int mid_y_cu = (y + (LCU_WIDTH >> (depth+1))) / 8;
|
||||
cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)];
|
||||
if (ref_cu->type == CU_INTER) {
|
||||
if (ref_cu->inter.mv_dir & 1) {
|
||||
mv.x = ref_cu->inter.mv[0][0];
|
||||
mv.y = ref_cu->inter.mv[0][1];
|
||||
} else {
|
||||
mv.x = ref_cu->inter.mv[1][0];
|
||||
mv.y = ref_cu->inter.mv[1][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if SEARCH_MV_FULL_RADIUS
|
||||
temp_cost += search_mv_full(depth, frame, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
#else
|
||||
switch (state->encoder_control->cfg->ime_algorithm) {
|
||||
case KVZ_IME_TZ:
|
||||
temp_cost += tz_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
break;
|
||||
|
||||
default:
|
||||
temp_cost += hexagon_search(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
if (state->encoder_control->cfg->fme_level > 0) {
|
||||
temp_cost = search_frac(state, depth, frame->source, ref_image, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||
}
|
||||
|
||||
merged = 0;
|
||||
// Check every candidate to find a match
|
||||
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
|
||||
if (merge_cand[merge_idx].dir != 3 &&
|
||||
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x &&
|
||||
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y &&
|
||||
(uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
|
||||
merged = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Only check when candidates are different
|
||||
if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) {
|
||||
vector2d_t mvd_temp1, mvd_temp2;
|
||||
int cand1_cost,cand2_cost;
|
||||
|
||||
mvd_temp1.x = mv.x - mv_cand[0][0];
|
||||
mvd_temp1.y = mv.y - mv_cand[0][1];
|
||||
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
|
||||
|
||||
mvd_temp2.x = mv.x - mv_cand[1][0];
|
||||
mvd_temp2.y = mv.y - mv_cand[1][1];
|
||||
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
|
||||
|
||||
// Select candidate 1 if it has lower cost
|
||||
if (cand2_cost < cand1_cost) {
|
||||
cu_mv_cand = 1;
|
||||
}
|
||||
}
|
||||
mvd.x = mv.x - mv_cand[cu_mv_cand][0];
|
||||
mvd.y = mv.y - mv_cand[cu_mv_cand][1];
|
||||
|
||||
if(temp_cost < cur_cu->inter.cost) {
|
||||
|
||||
// Map reference index to L0/L1 pictures
|
||||
cur_cu->inter.mv_dir = ref_list+1;
|
||||
cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx;
|
||||
|
||||
cur_cu->merged = merged;
|
||||
cur_cu->merge_idx = merge_idx;
|
||||
cur_cu->inter.mv_ref[ref_list] = ref_idx;
|
||||
cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x;
|
||||
cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y;
|
||||
cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x;
|
||||
cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y;
|
||||
cur_cu->inter.cost = temp_cost;
|
||||
cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list];
|
||||
cur_cu->inter.mv_cand[ref_list] = cu_mv_cand;
|
||||
}
|
||||
search_cu_inter_ref(state,
|
||||
x, y, depth,
|
||||
lcu, cur_cu,
|
||||
mv_cand, merge_cand, num_cand,
|
||||
ref_idx,
|
||||
get_mvd_cost);
|
||||
}
|
||||
|
||||
// Search bi-pred positions
|
||||
|
@ -1155,7 +1177,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
|
|||
for (int ypos = 0; ypos < LCU_WIDTH >> depth; ++ypos) {
|
||||
int dst_y = ypos*(LCU_WIDTH >> depth);
|
||||
for (int xpos = 0; xpos < (LCU_WIDTH >> depth); ++xpos) {
|
||||
tmp_block[dst_y + xpos] = templcu->rec.y[((y + ypos)&(LCU_WIDTH - 1))*LCU_WIDTH + ((x + xpos)&(LCU_WIDTH - 1))];
|
||||
tmp_block[dst_y + xpos] = templcu->rec.y[
|
||||
SUB_SCU(y + ypos) * LCU_WIDTH + SUB_SCU(x + xpos)];
|
||||
tmp_pic[dst_y + xpos] = frame->source->y[x + xpos + (y + ypos)*frame->source->width];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -145,8 +145,8 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
const int width_c = width > TR_MIN_WIDTH ? width / 2 : width;
|
||||
|
||||
const int offset = width / 2;
|
||||
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
|
||||
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
|
||||
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
|
||||
|
||||
|
@ -609,8 +609,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
|||
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
|
||||
|
||||
if (reconstruct_chroma) {
|
||||
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
|
||||
cu_info_t *const tr_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
|
||||
struct {
|
||||
double cost;
|
||||
|
@ -645,11 +645,10 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
|
|||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
|
||||
const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
|
||||
|
||||
cu_info_t *cur_cu = &lcu->cu[cu_index];
|
||||
cu_info_t *cur_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y);
|
||||
int8_t intra_mode = cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].mode;
|
||||
|
||||
double costs[5];
|
||||
|
@ -710,13 +709,12 @@ double kvz_search_cu_intra(encoder_state_t * const state,
|
|||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
|
||||
const int8_t cu_width = (LCU_WIDTH >> (depth));
|
||||
const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
|
||||
const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth;
|
||||
|
||||
cu_info_t *cur_cu = &lcu->cu[cu_index];
|
||||
cu_info_t *cur_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y);
|
||||
|
||||
kvz_intra_references refs;
|
||||
|
||||
|
@ -728,10 +726,10 @@ double kvz_search_cu_intra(encoder_state_t * const state,
|
|||
// Select left and top CUs if they are available.
|
||||
// Top CU is not available across LCU boundary.
|
||||
if ((x_px >> 3) > 0) {
|
||||
left_cu = &lcu->cu[cu_index - 1];
|
||||
left_cu = LCU_GET_CU(lcu, lcu_cu.x - 1, lcu_cu.y);
|
||||
}
|
||||
if ((y_px >> 3) > 0 && lcu_cu.y != 0) {
|
||||
above_cu = &lcu->cu[cu_index - LCU_T_CU_WIDTH];
|
||||
above_cu = LCU_GET_CU(lcu, lcu_cu.x, lcu_cu.y - 1);
|
||||
}
|
||||
kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
|
||||
|
||||
|
|
|
@ -220,10 +220,10 @@ int kvz_quantize_residual_trskip(
|
|||
void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu)
|
||||
{
|
||||
// we have 64>>depth transform size
|
||||
const vector2d_t lcu_px = {x & 0x3f, y & 0x3f};
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4);
|
||||
if (cur_cu == NULL) {
|
||||
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
|
||||
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
}
|
||||
const int8_t width = LCU_WIDTH>>depth;
|
||||
|
||||
|
@ -241,9 +241,9 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in
|
|||
|
||||
// Propagate coded block flags from child CUs to parent CU.
|
||||
if (depth < MAX_DEPTH) {
|
||||
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
|
||||
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
|
||||
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
|
||||
if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) {
|
||||
cbf_set(&cur_cu->cbf.y, depth);
|
||||
}
|
||||
|
@ -304,11 +304,11 @@ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, in
|
|||
void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu)
|
||||
{
|
||||
// we have 64>>depth transform size
|
||||
const vector2d_t lcu_px = {x & 0x3f, y & 0x3f};
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4);
|
||||
const int8_t width = LCU_WIDTH>>depth;
|
||||
if (cur_cu == NULL) {
|
||||
cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + (lcu_px.y >> 3)*LCU_T_CU_WIDTH];
|
||||
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
}
|
||||
|
||||
// Tell clang-analyzer what is up. For some reason it can't figure out from
|
||||
|
@ -325,9 +325,9 @@ void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x,
|
|||
|
||||
// Propagate coded block flags from child CUs to parent CU.
|
||||
if (depth < MAX_DEPTH) {
|
||||
cu_info_t *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + (lcu_px.y >> 3) *LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset) >> 3) + ((lcu_px.y + offset) >> 3)*LCU_T_CU_WIDTH];
|
||||
cu_info_t *cu_a = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y);
|
||||
cu_info_t *cu_b = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset);
|
||||
cu_info_t *cu_c = LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset);
|
||||
if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) {
|
||||
cbf_set(&cur_cu->cbf.u, depth);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue