mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Merge branch 'search-refac'
This commit is contained in:
commit
2c734760bc
322
src/search.c
322
src/search.c
|
@ -58,216 +58,133 @@
|
||||||
# define CHROMA_MULT 1.5
|
# define CHROMA_MULT 1.5
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
|
||||||
/**
|
|
||||||
* Copy all non-reference CU data from depth+1 to depth.
|
|
||||||
*/
|
|
||||||
static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
|
|
||||||
{
|
{
|
||||||
assert(depth >= 0 && depth < MAX_PU_DEPTH);
|
for (int y = y_local; y < y_local + width; y += SCU_WIDTH) {
|
||||||
|
for (int x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
||||||
// Copy non-reference CUs.
|
*LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y);
|
||||||
{
|
|
||||||
const int x_orig = SUB_SCU(x_px);
|
|
||||||
const int y_orig = SUB_SCU(y_px);
|
|
||||||
const int width_cu = LCU_WIDTH >> depth;
|
|
||||||
for (int y = y_orig; y < y_orig + width_cu; y += SCU_WIDTH) {
|
|
||||||
for (int x = x_orig; x < x_orig + width_cu; x += SCU_WIDTH) {
|
|
||||||
const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x, y);
|
|
||||||
cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y);
|
|
||||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy reconstructed pixels.
|
|
||||||
{
|
|
||||||
const int x = SUB_SCU(x_px);
|
|
||||||
const int y = SUB_SCU(y_px);
|
|
||||||
const int width_px = LCU_WIDTH >> depth;
|
|
||||||
const int luma_index = x + y * LCU_WIDTH;
|
|
||||||
const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2);
|
|
||||||
|
|
||||||
const lcu_yuv_t *from = &work_tree[depth + 1].rec;
|
|
||||||
lcu_yuv_t *to = &work_tree[depth].rec;
|
|
||||||
|
|
||||||
const lcu_coeff_t *from_coeff = &work_tree[depth + 1].coeff;
|
|
||||||
lcu_coeff_t *to_coeff = &work_tree[depth].coeff;
|
|
||||||
|
|
||||||
kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index],
|
|
||||||
width_px, width_px, LCU_WIDTH, LCU_WIDTH);
|
|
||||||
if (from->chroma_format != KVZ_CSP_400) {
|
|
||||||
kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index],
|
|
||||||
width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
|
||||||
kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index],
|
|
||||||
width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy coefficients up. They do not have to be copied down because they
|
|
||||||
// are not used for the search.
|
|
||||||
const int luma_z = xy_to_zorder(LCU_WIDTH, x, y);
|
|
||||||
copy_coeffs(&from_coeff->y[luma_z], &to_coeff->y[luma_z], width_px);
|
|
||||||
|
|
||||||
if (from->chroma_format != KVZ_CSP_400) {
|
|
||||||
const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x >> 1, y >> 1);
|
|
||||||
copy_coeffs(&from_coeff->u[chroma_z], &to_coeff->u[chroma_z], width_px >> 1);
|
|
||||||
copy_coeffs(&from_coeff->v[chroma_z], &to_coeff->v[chroma_z], width_px >> 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
|
||||||
/**
|
|
||||||
* Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH.
|
|
||||||
*/
|
|
||||||
static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
|
|
||||||
{
|
{
|
||||||
assert(depth >= 0 && depth < MAX_PU_DEPTH);
|
const int luma_index = x_local + y_local * LCU_WIDTH;
|
||||||
|
const int chroma_index = (x_local / 2) + (y_local / 2) * (LCU_WIDTH / 2);
|
||||||
|
|
||||||
// TODO: clean up to remove the copy pasta
|
kvz_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index],
|
||||||
const int width_px = LCU_WIDTH >> depth;
|
width, width, LCU_WIDTH, LCU_WIDTH);
|
||||||
|
if (from->rec.chroma_format != KVZ_CSP_400) {
|
||||||
int d;
|
kvz_pixels_blit(&from->rec.u[chroma_index], &to->rec.u[chroma_index],
|
||||||
|
width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
||||||
for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) {
|
kvz_pixels_blit(&from->rec.v[chroma_index], &to->rec.v[chroma_index],
|
||||||
const int x_orig = SUB_SCU(x_px);
|
width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
||||||
const int y_orig = SUB_SCU(y_px);
|
|
||||||
|
|
||||||
for (int y = y_orig; y < y_orig + width_px; y += SCU_WIDTH) {
|
|
||||||
for (int x = x_orig; x < x_orig + width_px; x += SCU_WIDTH) {
|
|
||||||
const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y);
|
|
||||||
cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[d], x, y);
|
|
||||||
memcpy(to_cu, from_cu, sizeof(*to_cu));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy reconstructed pixels.
|
|
||||||
for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) {
|
|
||||||
const int x = SUB_SCU(x_px);
|
|
||||||
const int y = SUB_SCU(y_px);
|
|
||||||
|
|
||||||
const int luma_index = x + y * LCU_WIDTH;
|
|
||||||
const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2);
|
|
||||||
|
|
||||||
lcu_yuv_t *from = &work_tree[depth].rec;
|
|
||||||
lcu_yuv_t *to = &work_tree[d].rec;
|
|
||||||
|
|
||||||
kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index],
|
|
||||||
width_px, width_px, LCU_WIDTH, LCU_WIDTH);
|
|
||||||
if (from->chroma_format != KVZ_CSP_400) {
|
|
||||||
kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index],
|
|
||||||
width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
|
||||||
kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index],
|
|
||||||
width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
|
||||||
|
{
|
||||||
|
const int luma_z = xy_to_zorder(LCU_WIDTH, x_local, y_local);
|
||||||
|
copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], width);
|
||||||
|
|
||||||
|
if (from->rec.chroma_format != KVZ_CSP_400) {
|
||||||
|
const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x_local >> 1, y_local >> 1);
|
||||||
|
copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], width >> 1);
|
||||||
|
copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], width >> 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy all non-reference CU data from next level to current level.
|
||||||
|
*/
|
||||||
|
static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
|
||||||
|
{
|
||||||
|
const int width = LCU_WIDTH >> depth;
|
||||||
|
copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
|
||||||
|
copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
|
||||||
|
copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy all non-reference CU data from current level to all lower levels.
|
||||||
|
*/
|
||||||
|
static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
|
||||||
|
{
|
||||||
|
const int width = LCU_WIDTH >> depth;
|
||||||
|
for (int i = depth + 1; i <= MAX_PU_DEPTH; i++) {
|
||||||
|
copy_cu_info (x_local, y_local, width, &work_tree[depth], &work_tree[i]);
|
||||||
|
copy_cu_pixels(x_local, y_local, width, &work_tree[depth], &work_tree[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth)
|
void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth)
|
||||||
{
|
{
|
||||||
|
const int x_local = SUB_SCU(x_px);
|
||||||
|
const int y_local = SUB_SCU(y_px);
|
||||||
const int width = LCU_WIDTH >> depth;
|
const int width = LCU_WIDTH >> depth;
|
||||||
const vector2d_t lcu_cu = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
|
||||||
|
|
||||||
// Depth 4 doesn't go inside the loop. Set the top-left CU.
|
|
||||||
LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth;
|
|
||||||
|
|
||||||
for (unsigned y = 0; y < width; y += SCU_WIDTH) {
|
for (unsigned y = 0; y < width; y += SCU_WIDTH) {
|
||||||
for (unsigned x = 0; x < width; x += SCU_WIDTH) {
|
for (unsigned x = 0; x < width; x += SCU_WIDTH) {
|
||||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, lcu_cu.x + x, lcu_cu.y + y);
|
LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y)->tr_depth = tr_depth;
|
||||||
cu->tr_depth = tr_depth;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, int height, cu_info_t *cu)
|
||||||
static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pred_mode, int chroma_mode, int part_mode)
|
|
||||||
{
|
|
||||||
const int width = LCU_WIDTH >> depth;
|
|
||||||
const int x_cu = SUB_SCU(x_px);
|
|
||||||
const int y_cu = SUB_SCU(y_px);
|
|
||||||
|
|
||||||
if (part_mode == SIZE_NxN) {
|
|
||||||
assert(depth == MAX_DEPTH + 1);
|
|
||||||
assert(width == SCU_WIDTH);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (depth > MAX_DEPTH) {
|
|
||||||
depth = MAX_DEPTH;
|
|
||||||
assert(part_mode == SIZE_NxN);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set mode in every CU covered by part_mode in this depth.
|
|
||||||
for (int y = y_cu; y < y_cu + width; y += SCU_WIDTH) {
|
|
||||||
for (int x = x_cu; x < x_cu + width; x += SCU_WIDTH) {
|
|
||||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
|
|
||||||
cu->depth = depth;
|
|
||||||
cu->type = CU_INTRA;
|
|
||||||
cu->intra.mode = pred_mode;
|
|
||||||
cu->intra.mode_chroma = chroma_mode;
|
|
||||||
cu->part_size = part_mode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void lcu_set_inter_pu(lcu_t *lcu, int x_px, int y_px, int width, int height, cu_info_t *cur_pu)
|
|
||||||
{
|
{
|
||||||
// Set mode in every CU covered by part_mode in this depth.
|
// Set mode in every CU covered by part_mode in this depth.
|
||||||
for (int y = y_px; y < y_px + height; y += SCU_WIDTH) {
|
for (int y = y_local; y < y_local + height; y += SCU_WIDTH) {
|
||||||
for (int x = x_px; x < x_px + width; x += SCU_WIDTH) {
|
for (int x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
||||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
|
cu_info_t *to = LCU_GET_CU_AT_PX(lcu, x, y);
|
||||||
//Check if this could be moved inside the if
|
to->type = cu->type;
|
||||||
if (cu != cur_pu) {
|
to->depth = cu->depth;
|
||||||
cu->depth = cur_pu->depth;
|
to->part_size = cu->part_size;
|
||||||
cu->part_size = cur_pu->part_size;
|
|
||||||
cu->type = CU_INTER;
|
if (cu->type == CU_INTRA) {
|
||||||
cu->tr_depth = cur_pu->tr_depth;
|
to->intra.mode = cu->intra.mode;
|
||||||
cu->merged = cur_pu->merged;
|
to->intra.mode_chroma = cu->intra.mode_chroma;
|
||||||
cu->skipped = cur_pu->skipped;
|
} else {
|
||||||
memcpy(&cu->inter, &cur_pu->inter, sizeof(cur_pu->inter));
|
to->skipped = cu->skipped;
|
||||||
|
to->merged = cu->merged;
|
||||||
|
to->merge_idx = cu->merge_idx;
|
||||||
|
to->inter = cu->inter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lcu_set_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
|
||||||
static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu)
|
|
||||||
{
|
{
|
||||||
const int width = LCU_WIDTH >> depth;
|
const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
|
||||||
const int x_local = SUB_SCU(x_px);
|
const int num_pu = kvz_part_mode_num_parts[part_mode];
|
||||||
const int y_local = SUB_SCU(y_px);
|
|
||||||
const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size];
|
|
||||||
|
|
||||||
for (int i = 0; i < num_pu; ++i) {
|
for (int i = 0; i < num_pu; ++i) {
|
||||||
const int x_pu = PU_GET_X(cur_cu->part_size, width, x_local, i);
|
const int x_pu = PU_GET_X(part_mode, cu_width, x_local, i);
|
||||||
const int y_pu = PU_GET_Y(cur_cu->part_size, width, y_local, i);
|
const int y_pu = PU_GET_Y(part_mode, cu_width, y_local, i);
|
||||||
const int width_pu = PU_GET_W(cur_cu->part_size, width, i);
|
const int width_pu = PU_GET_W(part_mode, cu_width, i);
|
||||||
const int height_pu = PU_GET_H(cur_cu->part_size, width, i);
|
const int height_pu = PU_GET_H(part_mode, cu_width, i);
|
||||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
|
|
||||||
lcu_set_inter_pu(lcu, x_pu, y_pu, width_pu, height_pu, cur_pu);
|
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
|
||||||
|
pu->type = CU_INTER;
|
||||||
|
lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void lcu_set_coeff(lcu_t *lcu, int x_local, int y_local, int width, cu_info_t *cur_cu)
|
||||||
static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu)
|
|
||||||
{
|
{
|
||||||
const uint32_t width = LCU_WIDTH >> depth;
|
const uint32_t tr_split = cur_cu->tr_depth - cur_cu->depth;
|
||||||
const uint32_t x_local = SUB_SCU(x_px);
|
|
||||||
const uint32_t y_local = SUB_SCU(y_px);
|
|
||||||
const uint32_t tr_split = cur_cu->tr_depth-cur_cu->depth;
|
|
||||||
const uint32_t mask = ~((width >> tr_split)-1);
|
const uint32_t mask = ~((width >> tr_split)-1);
|
||||||
|
|
||||||
// Set coeff flags in every CU covered by part_mode in this depth.
|
// Set coeff flags in every CU covered by part_mode in this depth.
|
||||||
for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) {
|
for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) {
|
||||||
for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
|
||||||
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
|
|
||||||
// Use TU top-left CU to propagate coeff flags
|
// Use TU top-left CU to propagate coeff flags
|
||||||
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask);
|
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask);
|
||||||
if (cu != cu_from) {
|
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y);
|
||||||
|
if (cu_from != cu_to) {
|
||||||
// Chroma coeff data is not used, luma is needed for deblocking
|
// Chroma coeff data is not used, luma is needed for deblocking
|
||||||
cbf_copy(&cu->cbf, cu_from->cbf, COLOR_Y);
|
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_Y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -498,7 +415,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
cur_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x_local, y_local);
|
cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
||||||
// Assign correct depth
|
// Assign correct depth
|
||||||
cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
|
cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
|
||||||
cur_cu->tr_depth = depth > 0 ? depth : 1;
|
cur_cu->tr_depth = depth > 0 ? depth : 1;
|
||||||
|
@ -526,7 +443,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
kvz_search_cu_inter(state,
|
kvz_search_cu_inter(state,
|
||||||
x, y,
|
x, y,
|
||||||
depth,
|
depth,
|
||||||
&work_tree[depth],
|
lcu,
|
||||||
&mode_cost, &mode_bitcost);
|
&mode_cost, &mode_bitcost);
|
||||||
if (mode_cost < cost) {
|
if (mode_cost < cost) {
|
||||||
cost = mode_cost;
|
cost = mode_cost;
|
||||||
|
@ -557,7 +474,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
cost = mode_cost;
|
cost = mode_cost;
|
||||||
inter_bitcost = mode_bitcost;
|
inter_bitcost = mode_bitcost;
|
||||||
// TODO: only copy inter prediction info, not pixels
|
// TODO: only copy inter prediction info, not pixels
|
||||||
work_tree_copy_up(x, y, depth, work_tree);
|
work_tree_copy_up(x_local, y_local, depth, work_tree);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -581,7 +498,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
if (can_use_intra && !skip_intra) {
|
if (can_use_intra && !skip_intra) {
|
||||||
int8_t intra_mode;
|
int8_t intra_mode;
|
||||||
double intra_cost;
|
double intra_cost;
|
||||||
kvz_search_cu_intra(state, x, y, depth, &work_tree[depth],
|
kvz_search_cu_intra(state, x, y, depth, lcu,
|
||||||
&intra_mode, &intra_cost);
|
&intra_mode, &intra_cost);
|
||||||
if (intra_cost < cost) {
|
if (intra_cost < cost) {
|
||||||
cost = intra_cost;
|
cost = intra_cost;
|
||||||
|
@ -595,46 +512,37 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
// mode search of adjacent CUs.
|
// mode search of adjacent CUs.
|
||||||
if (cur_cu->type == CU_INTRA) {
|
if (cur_cu->type == CU_INTRA) {
|
||||||
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
|
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
|
||||||
int8_t intra_mode = cur_cu->intra.mode;
|
cur_cu->intra.mode_chroma = cur_cu->intra.mode;
|
||||||
lcu_set_intra_mode(&work_tree[depth], x, y, depth,
|
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||||
intra_mode,
|
|
||||||
intra_mode,
|
|
||||||
cur_cu->part_size);
|
|
||||||
kvz_intra_recon_cu(state,
|
kvz_intra_recon_cu(state,
|
||||||
x, y,
|
x, y,
|
||||||
depth,
|
depth,
|
||||||
intra_mode, -1, // skip chroma
|
cur_cu->intra.mode, -1, // skip chroma
|
||||||
NULL, &work_tree[depth]);
|
NULL, lcu);
|
||||||
|
|
||||||
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != KVZ_CSP_400) {
|
if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||||
int8_t intra_mode_chroma = intra_mode;
|
|
||||||
|
|
||||||
// There is almost no benefit to doing the chroma mode search for
|
// There is almost no benefit to doing the chroma mode search for
|
||||||
// rd2. Possibly because the luma mode search already takes chroma
|
// rd2. Possibly because the luma mode search already takes chroma
|
||||||
// into account, so there is less of a chanse of luma mode being
|
// into account, so there is less of a chanse of luma mode being
|
||||||
// really bad for chroma.
|
// really bad for chroma.
|
||||||
if (state->encoder_control->cfg.rdo == 3) {
|
if (state->encoder_control->cfg.rdo == 3) {
|
||||||
intra_mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, &work_tree[depth]);
|
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu);
|
||||||
lcu_set_intra_mode(&work_tree[depth], x, y, depth,
|
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||||
intra_mode, intra_mode_chroma,
|
|
||||||
cur_cu->part_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kvz_intra_recon_cu(state,
|
kvz_intra_recon_cu(state,
|
||||||
x, y,
|
x, y,
|
||||||
depth,
|
depth,
|
||||||
-1, intra_mode_chroma, // skip luma
|
-1, cur_cu->intra.mode_chroma, // skip luma
|
||||||
NULL, &work_tree[depth]);
|
NULL, lcu);
|
||||||
}
|
}
|
||||||
} else if (cur_cu->type == CU_INTER) {
|
} else if (cur_cu->type == CU_INTER) {
|
||||||
// Reset transform depth because intra messes with them.
|
// Reset transform depth because intra messes with them.
|
||||||
// This will no longer be necessary if the transform depths are not shared.
|
// This will no longer be necessary if the transform depths are not shared.
|
||||||
int tr_depth = depth > 0 ? depth : 1;
|
int tr_depth = depth > 0 ? depth : 1;
|
||||||
kvz_lcu_set_trdepth(&work_tree[depth], x, y, depth, tr_depth);
|
kvz_lcu_set_trdepth(lcu, x, y, depth, tr_depth);
|
||||||
|
|
||||||
const int cu_width = LCU_WIDTH >> depth;
|
|
||||||
const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size];
|
const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size];
|
||||||
|
|
||||||
for (int i = 0; i < num_pu; ++i) {
|
for (int i = 0; i < num_pu; ++i) {
|
||||||
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i);
|
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i);
|
||||||
const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i);
|
const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i);
|
||||||
|
@ -653,7 +561,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
pu_x, pu_y,
|
pu_x, pu_y,
|
||||||
pu_w, pu_h,
|
pu_w, pu_h,
|
||||||
cur_pu->inter.mv,
|
cur_pu->inter.mv,
|
||||||
&work_tree[depth]);
|
lcu);
|
||||||
} else {
|
} else {
|
||||||
const int mv_idx = cur_pu->inter.mv_dir - 1;
|
const int mv_idx = cur_pu->inter.mv_dir - 1;
|
||||||
const kvz_picture *const ref =
|
const kvz_picture *const ref =
|
||||||
|
@ -663,7 +571,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
pu_x, pu_y,
|
pu_x, pu_y,
|
||||||
pu_w, pu_h,
|
pu_w, pu_h,
|
||||||
cur_pu->inter.mv[mv_idx],
|
cur_pu->inter.mv[mv_idx],
|
||||||
&work_tree[depth],
|
lcu,
|
||||||
0);
|
0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -673,7 +581,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
true, has_chroma,
|
true, has_chroma,
|
||||||
x, y, depth,
|
x, y, depth,
|
||||||
NULL,
|
NULL,
|
||||||
&work_tree[depth]);
|
lcu);
|
||||||
|
|
||||||
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
|
||||||
|
|
||||||
|
@ -685,19 +593,19 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
inter_bitcost -= 1;
|
inter_bitcost -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lcu_set_inter(&work_tree[depth], x, y, depth, cur_cu);
|
lcu_set_inter(lcu, x_local, y_local, cu_width);
|
||||||
lcu_set_coeff(&work_tree[depth], x, y, depth, cur_cu);
|
lcu_set_coeff(lcu, x_local, y_local, cu_width, cur_cu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
||||||
cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]);
|
cost = kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||||
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||||
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]);
|
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
double mode_bits;
|
double mode_bits;
|
||||||
if (cur_cu->type == CU_INTRA) {
|
if (cur_cu->type == CU_INTRA) {
|
||||||
mode_bits = calc_mode_bits(state, &work_tree[depth], cur_cu, x, y);
|
mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y);
|
||||||
} else {
|
} else {
|
||||||
mode_bits = inter_bitcost;
|
mode_bits = inter_bitcost;
|
||||||
}
|
}
|
||||||
|
@ -765,10 +673,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
cur_cu->type = CU_INTRA;
|
cur_cu->type = CU_INTRA;
|
||||||
cur_cu->part_size = SIZE_2Nx2N;
|
cur_cu->part_size = SIZE_2Nx2N;
|
||||||
|
|
||||||
kvz_lcu_set_trdepth(&work_tree[depth], x, y, depth, cur_cu->tr_depth);
|
kvz_lcu_set_trdepth(lcu, x, y, depth, cur_cu->tr_depth);
|
||||||
lcu_set_intra_mode(&work_tree[depth], x, y, depth,
|
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||||
cur_cu->intra.mode, cur_cu->intra.mode_chroma,
|
|
||||||
cur_cu->part_size);
|
|
||||||
|
|
||||||
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
|
||||||
const int8_t mode_chroma = has_chroma ? cur_cu->intra.mode_chroma : -1;
|
const int8_t mode_chroma = has_chroma ? cur_cu->intra.mode_chroma : -1;
|
||||||
|
@ -776,11 +682,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
x, y,
|
x, y,
|
||||||
depth,
|
depth,
|
||||||
cur_cu->intra.mode, mode_chroma,
|
cur_cu->intra.mode, mode_chroma,
|
||||||
NULL, &work_tree[depth]);
|
NULL, lcu);
|
||||||
|
|
||||||
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]);
|
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||||
if (has_chroma) {
|
if (has_chroma) {
|
||||||
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, &work_tree[depth]);
|
cost += kvz_cu_rd_cost_chroma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the cost of coding no-split.
|
// Add the cost of coding no-split.
|
||||||
|
@ -789,7 +695,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
|
cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda;
|
||||||
|
|
||||||
// Add the cost of coding intra mode only once.
|
// Add the cost of coding intra mode only once.
|
||||||
double mode_bits = calc_mode_bits(state, &work_tree[depth], cur_cu, x, y);
|
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y);
|
||||||
cost += mode_bits * state->lambda;
|
cost += mode_bits * state->lambda;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -797,19 +703,19 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
if (split_cost < cost) {
|
if (split_cost < cost) {
|
||||||
// Copy split modes to this depth.
|
// Copy split modes to this depth.
|
||||||
cost = split_cost;
|
cost = split_cost;
|
||||||
work_tree_copy_up(x, y, depth, work_tree);
|
work_tree_copy_up(x_local, y_local, depth, work_tree);
|
||||||
#if KVZ_DEBUG
|
#if KVZ_DEBUG
|
||||||
debug_split = 1;
|
debug_split = 1;
|
||||||
#endif
|
#endif
|
||||||
} else if (depth > 0) {
|
} else if (depth > 0) {
|
||||||
// Copy this CU's mode all the way down for use in adjacent CUs mode
|
// Copy this CU's mode all the way down for use in adjacent CUs mode
|
||||||
// search.
|
// search.
|
||||||
work_tree_copy_down(x, y, depth, work_tree);
|
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
||||||
}
|
}
|
||||||
} else if (depth >= 0 && depth < MAX_PU_DEPTH) {
|
} else if (depth >= 0 && depth < MAX_PU_DEPTH) {
|
||||||
// Need to copy modes down since the lower level of the work tree is used
|
// Need to copy modes down since the lower level of the work tree is used
|
||||||
// when searching SMP and AMP blocks.
|
// when searching SMP and AMP blocks.
|
||||||
work_tree_copy_down(x, y, depth, work_tree);
|
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->frame->num, state->tile->id, state->slice->id,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHCU, state->encoder_control->threadqueue, "type=search_cu,frame=%d,tile=%d,slice=%d,px_x=%d-%d,px_y=%d-%d,depth=%d,split=%d,cur_cu_is_intra=%d", state->frame->num, state->tile->id, state->slice->id,
|
||||||
|
|
Loading…
Reference in a new issue