mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
Combine tile mv constraint with OWF mv constraint
This also fixes movement vectors in tiles when OWF is on. The OWF mv constraint assumed WPP, so it didn't work with tiles.
This commit is contained in:
parent
7981609cd0
commit
f4ebff12b0
|
@ -28,20 +28,21 @@
|
||||||
#include "rdo.h"
|
#include "rdo.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \return True if referred block is within current tile.
|
* \return True if referred block is within current tile.
|
||||||
*/
|
*/
|
||||||
static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width)
|
static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int wpp_limit)
|
||||||
{
|
{
|
||||||
if (state->encoder_control->cfg->mv_constraint != KVZ_MV_CONSTRAIN_FRAME_AND_TILE) return true;
|
if (state->encoder_control->cfg->mv_constraint != KVZ_MV_CONSTRAIN_FRAME_AND_TILE) {
|
||||||
|
return (wpp_limit == -1 || y + (width << 2) <= (wpp_limit << 2));
|
||||||
|
};
|
||||||
|
|
||||||
// TODO implement KVZ_MV_CONSTRAIN_FRAM and KVZ_MV_CONSTRAIN_TILE.
|
// TODO implement KVZ_MV_CONSTRAIN_FRAM and KVZ_MV_CONSTRAIN_TILE.
|
||||||
|
const vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y };
|
||||||
vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y };
|
|
||||||
|
|
||||||
if (abs_mv.x >= 0 && abs_mv.x + (width << 2) <= (state->tile->frame->width << 2) &&
|
if (abs_mv.x >= 0 && abs_mv.x + (width << 2) <= (state->tile->frame->width << 2) &&
|
||||||
abs_mv.y >= 0 && abs_mv.y + (width << 2) <= (state->tile->frame->height << 2))
|
abs_mv.y >= 0 && abs_mv.y + (width << 2) <= (state->tile->frame->height << 2) &&
|
||||||
|
(wpp_limit == -1 || y + (width << 2) <= (wpp_limit << 2)))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -53,9 +54,9 @@ static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector
|
||||||
/**
|
/**
|
||||||
* \return True if referred block is within current tile.
|
* \return True if referred block is within current tile.
|
||||||
*/
|
*/
|
||||||
static INLINE bool intmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width)
|
static INLINE bool intmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int wpp_limit)
|
||||||
{
|
{
|
||||||
return fracmv_within_tile(state, orig, x << 2, y << 2, width);
|
return fracmv_within_tile(state, orig, x << 2, y << 2, width, wpp_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -156,7 +157,7 @@ static int calc_mvd_cost(const encoder_state_t * const state, int x, int y, int
|
||||||
unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref, unsigned pattern_type,
|
unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref, unsigned pattern_type,
|
||||||
const vector2d_t *orig, const int iDist, vector2d_t *mv, unsigned best_cost, int *best_dist,
|
const vector2d_t *orig, const int iDist, vector2d_t *mv, unsigned best_cost, int *best_dist,
|
||||||
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
|
||||||
int width, int height, int max_px_below_lcu)
|
int width, int height, int wpp_limit)
|
||||||
{
|
{
|
||||||
int n_points;
|
int n_points;
|
||||||
int best_index = -1;
|
int best_index = -1;
|
||||||
|
@ -274,7 +275,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
|
||||||
for (i = 0; i < n_points; i++)
|
for (i = 0; i < n_points; i++)
|
||||||
{
|
{
|
||||||
vector2d_t *current = &pattern[pattern_type][i];
|
vector2d_t *current = &pattern[pattern_type][i];
|
||||||
if (!intmv_within_tile(state, orig, mv->x + current->x, mv->y + current->y, width)) {
|
if (!intmv_within_tile(state, orig, mv->x + current->x, mv->y + current->y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,7 +287,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
|
@ -322,7 +323,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
|
||||||
unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref,
|
unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref,
|
||||||
const vector2d_t *orig, vector2d_t *mv, unsigned best_cost,
|
const vector2d_t *orig, vector2d_t *mv, unsigned best_cost,
|
||||||
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
|
||||||
int width, int height, int iSearchRange, int iRaster, int max_px_below_lcu)
|
int width, int height, int iSearchRange, int iRaster, int wpp_limit)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int k;
|
int k;
|
||||||
|
@ -342,7 +343,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
|
||||||
for (k = -iSearchRange; k <= iSearchRange; k += iRaster)
|
for (k = -iSearchRange; k <= iSearchRange; k += iRaster)
|
||||||
{
|
{
|
||||||
vector2d_t current = { k, i };
|
vector2d_t current = { k, i };
|
||||||
if (!intmv_within_tile(state, orig, mv->x + current.x, mv->y + current.y, width)) {
|
if (!intmv_within_tile(state, orig, mv->x + current.x, mv->y + current.y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -354,7 +355,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
|
@ -405,7 +406,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
int iDist;
|
int iDist;
|
||||||
int best_dist = 0;
|
int best_dist = 0;
|
||||||
unsigned best_index = num_cand;
|
unsigned best_index = num_cand;
|
||||||
int max_px_below_lcu = -1;
|
int wpp_limit = -1;
|
||||||
|
|
||||||
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||||
|
@ -415,28 +416,28 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state->encoder_control->owf) {
|
if (state->encoder_control->owf) {
|
||||||
max_px_below_lcu = LCU_WIDTH;
|
wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
|
||||||
if (state->encoder_control->fme_level > 0) {
|
if (state->encoder_control->fme_level > 0) {
|
||||||
// Fractional motion estimation can change the mv by at most 1 pixel.
|
// Fractional motion estimation can change the mv by at most 1 pixel.
|
||||||
max_px_below_lcu -= 1;
|
wpp_limit -= 1;
|
||||||
}
|
}
|
||||||
if (state->encoder_control->deblock_enable) {
|
if (state->encoder_control->deblock_enable) {
|
||||||
// Strong deblock filter modifies 3 pixels.
|
// Strong deblock filter modifies 3 pixels.
|
||||||
max_px_below_lcu -= 3;
|
wpp_limit -= 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//step 1, compare (0,0) vector to predicted vectors
|
//step 1, compare (0,0) vector to predicted vectors
|
||||||
|
|
||||||
// Check whatever input vector we got, unless its (0, 0) which will be checked later.
|
// Check whatever input vector we got, unless its (0, 0) which will be checked later.
|
||||||
if ((mv.x || mv.y) && intmv_within_tile(state, orig, mv.x, mv.y, width))
|
if ((mv.x || mv.y) && intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit))
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
|
|
||||||
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
|
@ -454,7 +455,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
if (merge_cand[i].dir == 3) continue;
|
if (merge_cand[i].dir == 3) continue;
|
||||||
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
||||||
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
||||||
if (!intmv_within_tile(state, orig, mv.x, mv.y, width)) {
|
if (!intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -464,7 +465,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
|
@ -492,7 +493,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
|
for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
|
||||||
{
|
{
|
||||||
best_cost = kvz_tz_pattern_search(state, pic, ref, step2_type, orig, iDist, &mv, best_cost, &best_dist,
|
best_cost = kvz_tz_pattern_search(state, pic, ref, step2_type, orig, iDist, &mv, best_cost, &best_dist,
|
||||||
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, max_px_below_lcu);
|
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
//step 3, raster scan
|
//step 3, raster scan
|
||||||
|
@ -501,7 +502,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
best_dist = iRaster;
|
best_dist = iRaster;
|
||||||
|
|
||||||
best_cost = kvz_tz_raster_search(state, pic, ref, orig, &mv, best_cost, mv_cand, merge_cand,
|
best_cost = kvz_tz_raster_search(state, pic, ref, orig, &mv, best_cost, mv_cand, merge_cand,
|
||||||
num_cand, ref_idx, &best_bitcost, width, height, iSearchRange, iRaster, max_px_below_lcu);
|
num_cand, ref_idx, &best_bitcost, width, height, iSearchRange, iRaster, wpp_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
//step 4
|
//step 4
|
||||||
|
@ -513,7 +514,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
while (iDist > 0)
|
while (iDist > 0)
|
||||||
{
|
{
|
||||||
best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
|
best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
|
||||||
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, max_px_below_lcu);
|
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
|
||||||
|
|
||||||
iDist = iDist >> 1;
|
iDist = iDist >> 1;
|
||||||
}
|
}
|
||||||
|
@ -525,7 +526,7 @@ static unsigned tz_search(const encoder_state_t * const state,
|
||||||
for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
|
for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
|
||||||
{
|
{
|
||||||
best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
|
best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
|
||||||
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, max_px_below_lcu);
|
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -594,7 +595,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
uint32_t best_bitcost = 0, bitcost;
|
uint32_t best_bitcost = 0, bitcost;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
|
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
|
||||||
int max_px_below_lcu = -1;
|
int wpp_limit = -1;
|
||||||
|
|
||||||
int (*calc_mvd)(const encoder_state_t * const, int, int, int,
|
int (*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||||
|
@ -603,16 +604,15 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (state->encoder_control->owf) {
|
if (state->encoder_control->owf) {
|
||||||
max_px_below_lcu = LCU_WIDTH;
|
wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
|
||||||
if (state->encoder_control->fme_level > 0) {
|
if (state->encoder_control->fme_level > 0) {
|
||||||
// Fractional motion estimation can change the mv by at most 1 pixel.
|
// Fractional motion estimation can change the mv by at most 1 pixel.
|
||||||
max_px_below_lcu -= 1;
|
wpp_limit -= 1;
|
||||||
}
|
}
|
||||||
if (state->encoder_control->deblock_enable) {
|
if (state->encoder_control->deblock_enable) {
|
||||||
// Strong deblock filter modifies 3 pixels.
|
// Strong deblock filter modifies 3 pixels.
|
||||||
max_px_below_lcu -= 3;
|
wpp_limit -= 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -628,14 +628,14 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mv_in_merge_cand &&
|
if (!mv_in_merge_cand &&
|
||||||
intmv_within_tile(state, orig, mv.x, mv.y, width))
|
intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit))
|
||||||
{
|
{
|
||||||
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
||||||
|
|
||||||
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
best_bitcost = bitcost;
|
best_bitcost = bitcost;
|
||||||
best_index = num_cand;
|
best_index = num_cand;
|
||||||
|
@ -653,7 +653,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
if (merge_cand[i].dir == 3) continue;
|
if (merge_cand[i].dir == 3) continue;
|
||||||
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
||||||
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
||||||
if (!intmv_within_tile(state, orig, mv.x, mv.y, width)) {
|
if (!intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -662,7 +662,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
|
@ -689,7 +689,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
best_index = 0;
|
best_index = 0;
|
||||||
for (i = 0; i < 7; ++i) {
|
for (i = 0; i < 7; ++i) {
|
||||||
const vector2d_t *pattern = &large_hexbs[i];
|
const vector2d_t *pattern = &large_hexbs[i];
|
||||||
if (!intmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width)) {
|
if (!intmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -699,7 +699,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
|
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
|
@ -736,7 +736,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
// Iterate through the next 3 points.
|
// Iterate through the next 3 points.
|
||||||
for (i = 0; i < 3; ++i) {
|
for (i = 0; i < 3; ++i) {
|
||||||
const vector2d_t *offset = &large_hexbs[start + i];
|
const vector2d_t *offset = &large_hexbs[start + i];
|
||||||
if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width)) {
|
if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -746,7 +746,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
|
@ -772,7 +772,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
// Do the final step of the search with a small pattern.
|
// Do the final step of the search with a small pattern.
|
||||||
for (i = 1; i < 5; ++i) {
|
for (i = 1; i < 5; ++i) {
|
||||||
const vector2d_t *offset = &small_hexbs[i];
|
const vector2d_t *offset = &small_hexbs[i];
|
||||||
if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width)) {
|
if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -782,7 +782,7 @@ static unsigned hexagon_search(const encoder_state_t * const state,
|
||||||
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
width, height, max_px_below_lcu);
|
width, height, -1);
|
||||||
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
||||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
|
@ -906,6 +906,15 @@ static unsigned search_frac(const encoder_state_t * const state,
|
||||||
{ -1, -1 }, { 0, -1 }, { 1, -1 }
|
{ -1, -1 }, { 0, -1 }, { 1, -1 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int wpp_limit = -1;
|
||||||
|
if (state->encoder_control->owf) {
|
||||||
|
wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
|
||||||
|
if (state->encoder_control->deblock_enable) {
|
||||||
|
// Strong deblock filter modifies 3 pixels.
|
||||||
|
wpp_limit -= 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Set mv to halfpel precision
|
//Set mv to halfpel precision
|
||||||
vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
||||||
unsigned best_cost = UINT32_MAX;
|
unsigned best_cost = UINT32_MAX;
|
||||||
|
@ -955,7 +964,7 @@ static unsigned search_frac(const encoder_state_t * const state,
|
||||||
// Search halfpel positions around best integer mv
|
// Search halfpel positions around best integer mv
|
||||||
for (i = 0; i < 9; ++i) {
|
for (i = 0; i < 9; ++i) {
|
||||||
const vector2d_t *pattern = &square[i];
|
const vector2d_t *pattern = &square[i];
|
||||||
if (!fracmv_within_tile(state, orig, (mv.x + pattern->x) << 1, (mv.y + pattern->y) << 1, width)) {
|
if (!fracmv_within_tile(state, orig, (mv.x + pattern->x) << 1, (mv.y + pattern->y) << 1, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -995,7 +1004,7 @@ static unsigned search_frac(const encoder_state_t * const state,
|
||||||
//Search quarterpel points around best halfpel mv
|
//Search quarterpel points around best halfpel mv
|
||||||
for (i = 0; i < 9; ++i) {
|
for (i = 0; i < 9; ++i) {
|
||||||
const vector2d_t *pattern = &square[i];
|
const vector2d_t *pattern = &square[i];
|
||||||
if (!fracmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width)) {
|
if (!fracmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, wpp_limit)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1132,7 +1141,7 @@ static void search_pu_inter_ref(const encoder_state_t * const state,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width));
|
assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width, -1));
|
||||||
|
|
||||||
if (state->encoder_control->cfg->fme_level > 0) {
|
if (state->encoder_control->cfg->fme_level > 0) {
|
||||||
temp_cost = search_frac(state,
|
temp_cost = search_frac(state,
|
||||||
|
@ -1146,7 +1155,7 @@ static void search_pu_inter_ref(const encoder_state_t * const state,
|
||||||
num_cand,
|
num_cand,
|
||||||
ref_idx,
|
ref_idx,
|
||||||
&temp_bitcost);
|
&temp_bitcost);
|
||||||
assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width));
|
assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width, -1));
|
||||||
}
|
}
|
||||||
|
|
||||||
merged = 0;
|
merged = 0;
|
||||||
|
|
Loading…
Reference in a new issue