2015-07-21 08:49:04 +00:00
|
|
|
/*****************************************************************************
|
|
|
|
* This file is part of Kvazaar HEVC encoder.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
|
|
|
* COPYING file).
|
|
|
|
*
|
|
|
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
|
|
|
* the terms of the GNU Lesser General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
* option) any later version.
|
|
|
|
*
|
|
|
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
|
|
|
* more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
#include "search_inter.h"
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#include "inter.h"
|
|
|
|
#include "strategies/strategies-picture.h"
|
|
|
|
#include "strategies/strategies-ipol.h"
|
2015-08-21 06:11:30 +00:00
|
|
|
#include "rdo.h"
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
|
2016-02-24 10:10:51 +00:00
|
|
|
/**
|
|
|
|
* \return True if referred block is within current tile.
|
|
|
|
*/
|
2016-02-24 16:10:32 +00:00
|
|
|
static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int wpp_limit)
|
2016-02-24 10:10:51 +00:00
|
|
|
{
|
2016-02-29 16:39:21 +00:00
|
|
|
if (state->encoder_control->cfg->mv_constraint == KVZ_MV_CONSTRAIN_NONE) {
|
2016-02-24 16:10:32 +00:00
|
|
|
return (wpp_limit == -1 || y + (width << 2) <= (wpp_limit << 2));
|
|
|
|
};
|
2016-02-24 10:10:51 +00:00
|
|
|
|
2016-02-29 16:39:21 +00:00
|
|
|
int lt_margin = 0;
|
|
|
|
int rb_margin = 0;
|
|
|
|
if (KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) {
|
|
|
|
// Enforce a distance of 8 from any tile boundary.
|
|
|
|
lt_margin = 8;
|
|
|
|
rb_margin = 16;
|
|
|
|
}
|
|
|
|
|
2016-02-24 10:10:51 +00:00
|
|
|
// TODO implement KVZ_MV_CONSTRAIN_FRAM and KVZ_MV_CONSTRAIN_TILE.
|
2016-02-24 16:10:32 +00:00
|
|
|
const vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y };
|
2016-02-24 10:10:51 +00:00
|
|
|
|
2016-02-29 16:39:21 +00:00
|
|
|
if (abs_mv.x >= lt_margin && abs_mv.x + (width << 2) <= (state->tile->frame->width << 2) - rb_margin &&
|
|
|
|
abs_mv.y >= lt_margin && abs_mv.y + (width << 2) <= (state->tile->frame->height << 2) - rb_margin &&
|
2016-02-24 16:10:32 +00:00
|
|
|
(wpp_limit == -1 || y + (width << 2) <= (wpp_limit << 2)))
|
2016-02-24 10:10:51 +00:00
|
|
|
{
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \return True if referred block is within current tile.
|
|
|
|
*/
|
2016-02-24 16:10:32 +00:00
|
|
|
static INLINE bool intmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int wpp_limit)
|
2016-02-24 10:10:51 +00:00
|
|
|
{
|
2016-02-24 16:10:32 +00:00
|
|
|
return fracmv_within_tile(state, orig, x << 2, y << 2, width, wpp_limit);
|
2016-02-24 10:10:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
static uint32_t get_ep_ex_golomb_bitcost(uint32_t symbol, uint32_t count)
|
|
|
|
{
|
|
|
|
int32_t num_bins = 0;
|
|
|
|
while (symbol >= (uint32_t)(1 << count)) {
|
|
|
|
++num_bins;
|
|
|
|
symbol -= 1 << count;
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
num_bins ++;
|
|
|
|
|
|
|
|
return num_bins;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-11-05 12:31:37 +00:00
|
|
|
static uint32_t get_mvd_coding_cost(vector2d_t *mvd, cabac_data_t* cabac)
|
2015-07-21 09:02:54 +00:00
|
|
|
{
|
|
|
|
uint32_t bitcost = 0;
|
|
|
|
const int32_t mvd_hor = mvd->x;
|
|
|
|
const int32_t mvd_ver = mvd->y;
|
|
|
|
const int8_t hor_abs_gr0 = mvd_hor != 0;
|
|
|
|
const int8_t ver_abs_gr0 = mvd_ver != 0;
|
|
|
|
const uint32_t mvd_hor_abs = abs(mvd_hor);
|
|
|
|
const uint32_t mvd_ver_abs = abs(mvd_ver);
|
|
|
|
|
|
|
|
// Greater than 0 for x/y
|
|
|
|
bitcost += 2;
|
|
|
|
|
|
|
|
if (hor_abs_gr0) {
|
|
|
|
if (mvd_hor_abs > 1) {
|
|
|
|
bitcost += get_ep_ex_golomb_bitcost(mvd_hor_abs-2, 1) - 2; // TODO: tune the costs
|
|
|
|
}
|
|
|
|
// Greater than 1 + sign
|
|
|
|
bitcost += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ver_abs_gr0) {
|
|
|
|
if (mvd_ver_abs > 1) {
|
|
|
|
bitcost += get_ep_ex_golomb_bitcost(mvd_ver_abs-2, 1) - 2; // TODO: tune the costs
|
|
|
|
}
|
|
|
|
// Greater than 1 + sign
|
|
|
|
bitcost += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bitcost;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int calc_mvd_cost(const encoder_state_t * const state, int x, int y, int mv_shift,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t num_cand,int32_t ref_idx, uint32_t *bitcost)
|
|
|
|
{
|
|
|
|
uint32_t temp_bitcost = 0;
|
|
|
|
uint32_t merge_idx;
|
|
|
|
int cand1_cost,cand2_cost;
|
|
|
|
vector2d_t mvd_temp1, mvd_temp2;
|
|
|
|
int8_t merged = 0;
|
|
|
|
int8_t cur_mv_cand = 0;
|
|
|
|
|
|
|
|
x <<= mv_shift;
|
|
|
|
y <<= mv_shift;
|
|
|
|
|
|
|
|
// Check every candidate to find a match
|
|
|
|
for(merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
|
|
|
|
if (merge_cand[merge_idx].dir == 3) continue;
|
|
|
|
if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x &&
|
|
|
|
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y &&
|
|
|
|
merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
|
|
|
|
temp_bitcost += merge_idx;
|
|
|
|
merged = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check mvd cost only if mv is not merged
|
|
|
|
if(!merged) {
|
|
|
|
mvd_temp1.x = x - mv_cand[0][0];
|
|
|
|
mvd_temp1.y = y - mv_cand[0][1];
|
2015-11-05 12:31:37 +00:00
|
|
|
cand1_cost = get_mvd_coding_cost(&mvd_temp1, NULL);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
mvd_temp2.x = x - mv_cand[1][0];
|
|
|
|
mvd_temp2.y = y - mv_cand[1][1];
|
2015-11-05 12:31:37 +00:00
|
|
|
cand2_cost = get_mvd_coding_cost(&mvd_temp2, NULL);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
// Select candidate 1 if it has lower cost
|
|
|
|
if (cand2_cost < cand1_cost) {
|
|
|
|
cur_mv_cand = 1;
|
|
|
|
}
|
|
|
|
temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
|
|
|
|
}
|
|
|
|
*bitcost = temp_bitcost;
|
|
|
|
return temp_bitcost*(int32_t)(state->global->cur_lambda_cost_sqrt+0.5);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref, unsigned pattern_type,
|
2015-07-21 09:02:54 +00:00
|
|
|
const vector2d_t *orig, const int iDist, vector2d_t *mv, unsigned best_cost, int *best_dist,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
|
2016-02-24 16:10:32 +00:00
|
|
|
int width, int height, int wpp_limit)
|
2015-07-21 09:02:54 +00:00
|
|
|
{
|
|
|
|
int n_points;
|
|
|
|
int best_index = -1;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
vector2d_t mv_best = { 0, 0 };
|
|
|
|
|
2015-11-05 10:24:03 +00:00
|
|
|
|
|
|
|
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
assert(pattern_type < 4);
|
|
|
|
|
|
|
|
//implemented search patterns
|
|
|
|
vector2d_t pattern[4][8] = {
|
|
|
|
//diamond (8 points)
|
|
|
|
//[ ][ ][ ][ ][1][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][8][ ][ ][ ][5][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[4][ ][ ][ ][o][ ][ ][ ][2]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][7][ ][ ][ ][6][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][3][ ][ ][ ][ ]
|
|
|
|
{
|
|
|
|
{ 0, iDist }, { iDist, 0 }, { 0, -iDist }, { -iDist, 0 },
|
|
|
|
{ iDist / 2, iDist / 2 }, { iDist / 2, -iDist / 2 }, { -iDist / 2, -iDist / 2 }, { -iDist / 2, iDist / 2 }
|
|
|
|
},
|
|
|
|
|
|
|
|
//square (8 points)
|
|
|
|
//[8][ ][ ][ ][1][ ][ ][ ][2]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[7][ ][ ][ ][o][ ][ ][ ][3]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[6][ ][ ][ ][5][ ][ ][ ][4]
|
|
|
|
{
|
|
|
|
{ 0, iDist }, { iDist, iDist }, { iDist, 0 }, { iDist, -iDist }, { 0, -iDist },
|
|
|
|
{ -iDist, -iDist }, { -iDist, 0 }, { -iDist, iDist }
|
|
|
|
},
|
|
|
|
|
|
|
|
//octagon (8 points)
|
|
|
|
//[ ][ ][5][ ][ ][ ][1][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][2]
|
|
|
|
//[4][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][o][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[8][ ][ ][ ][ ][ ][ ][ ][6]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][7][ ][ ][ ][3][ ][ ]
|
|
|
|
{
|
|
|
|
{ iDist / 2, iDist }, { iDist, iDist / 2 }, { iDist / 2, -iDist }, { -iDist, iDist / 2 },
|
|
|
|
{ -iDist / 2, iDist }, { iDist, -iDist / 2 }, { -iDist / 2, -iDist }, { -iDist, -iDist / 2 }
|
|
|
|
},
|
|
|
|
|
|
|
|
//hexagon (6 points)
|
|
|
|
//[ ][ ][5][ ][ ][ ][1][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[4][ ][ ][ ][o][ ][ ][ ][2]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][ ][ ][ ][ ][ ][ ][ ]
|
|
|
|
//[ ][ ][6][ ][ ][ ][3][ ][ ]
|
|
|
|
{
|
|
|
|
{ iDist / 2, iDist }, { iDist, 0 }, { iDist / 2, -iDist }, { -iDist, 0 },
|
|
|
|
{ iDist / 2, iDist }, { -iDist / 2, -iDist }, { 0, 0 }, { 0, 0 }
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
//set the number of points to be checked
|
|
|
|
if (iDist == 1)
|
|
|
|
{
|
|
|
|
switch (pattern_type)
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
n_points = 4;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
n_points = 4;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
n_points = 4;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
n_points = 8;
|
|
|
|
break;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
switch (pattern_type)
|
|
|
|
{
|
|
|
|
case 3:
|
|
|
|
n_points = 6;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
n_points = 8;
|
|
|
|
break;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
//compute SAD values for all chosen points
|
|
|
|
for (i = 0; i < n_points; i++)
|
|
|
|
{
|
|
|
|
vector2d_t *current = &pattern[pattern_type][i];
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv->x + current->x, mv->y + current->y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
unsigned cost;
|
|
|
|
uint32_t bitcost;
|
|
|
|
|
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-08-26 08:50:27 +00:00
|
|
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-20 07:06:28 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cost < best_cost)
|
|
|
|
{
|
|
|
|
best_cost = cost;
|
|
|
|
*best_bitcost = bitcost;
|
|
|
|
best_index = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (best_index >= 0)
|
|
|
|
{
|
|
|
|
mv_best = pattern[pattern_type][best_index];
|
|
|
|
*best_dist = iDist;
|
|
|
|
}
|
|
|
|
|
|
|
|
mv->x += mv_best.x;
|
|
|
|
mv->y += mv_best.y;
|
|
|
|
|
|
|
|
return best_cost;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_picture *pic, const kvz_picture *ref,
|
2015-07-21 09:02:54 +00:00
|
|
|
const vector2d_t *orig, vector2d_t *mv, unsigned best_cost,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, uint32_t *best_bitcost,
|
2016-02-24 16:10:32 +00:00
|
|
|
int width, int height, int iSearchRange, int iRaster, int wpp_limit)
|
2015-07-21 09:02:54 +00:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int k;
|
|
|
|
|
|
|
|
vector2d_t mv_best = { 0, 0 };
|
2015-11-05 10:24:03 +00:00
|
|
|
|
|
|
|
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
//compute SAD values for every point in the iRaster downsampled version of the current search area
|
|
|
|
for (i = iSearchRange; i >= -iSearchRange; i -= iRaster)
|
|
|
|
{
|
|
|
|
for (k = -iSearchRange; k <= iSearchRange; k += iRaster)
|
|
|
|
{
|
|
|
|
vector2d_t current = { k, i };
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv->x + current.x, mv->y + current.y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
unsigned cost;
|
|
|
|
uint32_t bitcost;
|
|
|
|
|
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-08-26 08:50:27 +00:00
|
|
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-20 07:06:28 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cost < best_cost)
|
|
|
|
{
|
|
|
|
best_cost = cost;
|
|
|
|
*best_bitcost = bitcost;
|
|
|
|
mv_best = current;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mv->x += mv_best.x;
|
|
|
|
mv->y += mv_best.y;
|
|
|
|
|
|
|
|
return best_cost;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-11-20 07:06:28 +00:00
|
|
|
static unsigned tz_search(const encoder_state_t * const state,
|
|
|
|
unsigned width, unsigned height,
|
2015-07-21 09:02:54 +00:00
|
|
|
const kvz_picture *pic, const kvz_picture *ref,
|
|
|
|
const vector2d_t *orig, vector2d_t *mv_in_out,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out)
|
|
|
|
{
|
|
|
|
|
|
|
|
//TZ parameters
|
|
|
|
const int iSearchRange = 96; // search range for each stage
|
|
|
|
const int iRaster = 5; // search distance limit and downsampling factor for step 3
|
|
|
|
const unsigned step2_type = 0; // search patterns for steps 2 and 4
|
|
|
|
const unsigned step4_type = 0;
|
|
|
|
const bool bRasterRefinementEnable = true; // enable step 4 mode 1
|
|
|
|
const bool bStarRefinementEnable = false; // enable step 4 mode 2 (only one mode will be executed)
|
|
|
|
|
|
|
|
vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
|
|
|
|
|
|
|
unsigned best_cost = UINT32_MAX;
|
|
|
|
uint32_t best_bitcost = 0;
|
|
|
|
int iDist;
|
|
|
|
int best_dist = 0;
|
|
|
|
unsigned best_index = num_cand;
|
2016-02-24 16:10:32 +00:00
|
|
|
int wpp_limit = -1;
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-05 10:24:03 +00:00
|
|
|
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
if (state->encoder_control->owf) {
|
2016-02-24 16:10:32 +00:00
|
|
|
wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
|
2015-12-04 10:28:26 +00:00
|
|
|
if (state->encoder_control->fme_level > 0) {
|
|
|
|
// Fractional motion estimation can change the mv by at most 1 pixel.
|
2016-02-24 16:10:32 +00:00
|
|
|
wpp_limit -= 1;
|
2015-12-04 10:28:26 +00:00
|
|
|
}
|
|
|
|
if (state->encoder_control->deblock_enable) {
|
|
|
|
// Strong deblock filter modifies 3 pixels.
|
2016-02-24 16:10:32 +00:00
|
|
|
wpp_limit -= 3;
|
2015-12-04 10:28:26 +00:00
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//step 1, compare (0,0) vector to predicted vectors
|
|
|
|
|
|
|
|
// Check whatever input vector we got, unless its (0, 0) which will be checked later.
|
2016-02-24 16:10:32 +00:00
|
|
|
if ((mv.x || mv.y) && intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit))
|
2015-07-21 09:02:54 +00:00
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-20 07:06:28 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int i;
|
|
|
|
// Select starting point from among merge candidates. These should include
|
|
|
|
// both mv_cand vectors and (0, 0).
|
|
|
|
for (i = 0; i < num_cand; ++i)
|
|
|
|
{
|
|
|
|
if (merge_cand[i].dir == 3) continue;
|
|
|
|
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
|
|
|
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
uint32_t bitcost;
|
2015-08-26 08:50:27 +00:00
|
|
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-20 07:06:28 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2015-11-20 07:06:28 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (best_index < (unsigned)num_cand) {
|
|
|
|
mv.x = merge_cand[best_index].mv[merge_cand[best_index].dir - 1][0] >> 2;
|
|
|
|
mv.y = merge_cand[best_index].mv[merge_cand[best_index].dir - 1][1] >> 2;
|
|
|
|
} else {
|
|
|
|
mv.x = mv_in_out->x >> 2;
|
|
|
|
mv.y = mv_in_out->y >> 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
//step 2, grid search
|
|
|
|
for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
|
|
|
|
{
|
2015-08-26 08:50:27 +00:00
|
|
|
best_cost = kvz_tz_pattern_search(state, pic, ref, step2_type, orig, iDist, &mv, best_cost, &best_dist,
|
2016-02-24 16:10:32 +00:00
|
|
|
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//step 3, raster scan
|
|
|
|
if (best_dist > iRaster)
|
|
|
|
{
|
|
|
|
best_dist = iRaster;
|
|
|
|
|
2015-11-20 07:06:28 +00:00
|
|
|
best_cost = kvz_tz_raster_search(state, pic, ref, orig, &mv, best_cost, mv_cand, merge_cand,
|
2016-02-24 16:10:32 +00:00
|
|
|
num_cand, ref_idx, &best_bitcost, width, height, iSearchRange, iRaster, wpp_limit);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//step 4
|
|
|
|
|
|
|
|
//raster refinement
|
|
|
|
if (bRasterRefinementEnable && best_dist > 0)
|
|
|
|
{
|
|
|
|
iDist = best_dist >> 1;
|
|
|
|
while (iDist > 0)
|
|
|
|
{
|
2015-08-26 08:50:27 +00:00
|
|
|
best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
|
2016-02-24 16:10:32 +00:00
|
|
|
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
iDist = iDist >> 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//star refinement (repeat step 2 for the current starting point)
|
|
|
|
if (bStarRefinementEnable && best_dist > 0)
|
|
|
|
{
|
|
|
|
for (iDist = 1; iDist <= iSearchRange; iDist *= 2)
|
|
|
|
{
|
2015-08-26 08:50:27 +00:00
|
|
|
best_cost = kvz_tz_pattern_search(state, pic, ref, step4_type, orig, iDist, &mv, best_cost, &best_dist,
|
2016-02-24 16:10:32 +00:00
|
|
|
mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost, width, height, wpp_limit);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mv.x = mv.x << 2;
|
|
|
|
mv.y = mv.y << 2;
|
|
|
|
|
|
|
|
*mv_in_out = mv;
|
|
|
|
*bitcost_out = best_bitcost;
|
|
|
|
|
|
|
|
return best_cost;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Do motion search using the HEXBS algorithm.
|
|
|
|
*
|
2015-10-12 11:56:59 +00:00
|
|
|
* \param width width of the block to search
|
|
|
|
* \param height height of the block to search
|
2015-07-21 09:02:54 +00:00
|
|
|
* \param pic Picture motion vector is searched for.
|
|
|
|
* \param ref Picture motion vector is searched from.
|
|
|
|
* \param orig Top left corner of the searched for block.
|
|
|
|
* \param mv_in_out Predicted mv in and best out. Quarter pixel precision.
|
|
|
|
*
|
|
|
|
* \returns Cost of the motion vector.
|
|
|
|
*
|
|
|
|
* Motion vector is searched by first searching iteratively with the large
|
|
|
|
* hexagon pattern until the best match is at the center of the hexagon.
|
|
|
|
* As a final step a smaller hexagon is used to check the adjacent pixels.
|
|
|
|
*
|
|
|
|
* If a non 0,0 predicted motion vector predictor is given as mv_in_out,
|
|
|
|
* the 0,0 vector is also tried. This is hoped to help in the case where
|
|
|
|
* the predicted motion vector is way off. In the future even more additional
|
|
|
|
* points like 0,0 might be used, such as vectors from top or left.
|
|
|
|
*/
|
2015-10-12 11:56:59 +00:00
|
|
|
static unsigned hexagon_search(const encoder_state_t * const state,
|
|
|
|
unsigned width, unsigned height,
|
2015-07-21 09:02:54 +00:00
|
|
|
const kvz_picture *pic, const kvz_picture *ref,
|
|
|
|
const vector2d_t *orig, vector2d_t *mv_in_out,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out)
|
|
|
|
{
|
2015-07-21 12:45:57 +00:00
|
|
|
// The start of the hexagonal pattern has been repeated at the end so that
|
|
|
|
// the indices between 1-6 can be used as the start of a 3-point list of new
|
|
|
|
// points to search.
|
|
|
|
// 6--1,7
|
|
|
|
// / \ =)
|
|
|
|
// 5 0 2,8
|
|
|
|
// \ /
|
|
|
|
// 4---3
|
|
|
|
static const vector2d_t large_hexbs[10] = {
|
|
|
|
{ 0, 0 },
|
|
|
|
{ 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }, { -1, -2 },
|
|
|
|
{ 1, -2 }, { 2, 0 }
|
|
|
|
};
|
|
|
|
// This is used as the last step of the hexagon search.
|
|
|
|
// 1
|
|
|
|
// 2 0 3
|
|
|
|
// 4
|
|
|
|
static const vector2d_t small_hexbs[5] = {
|
|
|
|
{ 0, 0 },
|
2015-07-21 12:52:08 +00:00
|
|
|
{ 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 }
|
2015-07-21 12:45:57 +00:00
|
|
|
};
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
|
|
|
unsigned best_cost = UINT32_MAX;
|
|
|
|
uint32_t best_bitcost = 0, bitcost;
|
|
|
|
unsigned i;
|
|
|
|
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
|
2016-02-24 16:10:32 +00:00
|
|
|
int wpp_limit = -1;
|
2015-11-05 10:24:03 +00:00
|
|
|
|
|
|
|
int (*calc_mvd)(const encoder_state_t * const, int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
if (state->encoder_control->owf) {
|
2016-02-24 16:10:32 +00:00
|
|
|
wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
|
2015-12-04 10:28:26 +00:00
|
|
|
if (state->encoder_control->fme_level > 0) {
|
|
|
|
// Fractional motion estimation can change the mv by at most 1 pixel.
|
2016-02-24 16:10:32 +00:00
|
|
|
wpp_limit -= 1;
|
2015-12-04 10:28:26 +00:00
|
|
|
}
|
|
|
|
if (state->encoder_control->deblock_enable) {
|
|
|
|
// Strong deblock filter modifies 3 pixels.
|
2016-02-24 16:10:32 +00:00
|
|
|
wpp_limit -= 3;
|
2015-12-04 10:28:26 +00:00
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check mv_in, if it's not in merge candidates.
|
|
|
|
bool mv_in_merge_cand = false;
|
|
|
|
for (int i = 0; i < num_cand; ++i) {
|
|
|
|
if (merge_cand[i].dir == 3) continue;
|
|
|
|
if (merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2 == mv.x &&
|
|
|
|
merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2 == mv.y) {
|
|
|
|
mv_in_merge_cand = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-24 10:10:51 +00:00
|
|
|
if (!mv_in_merge_cand &&
|
2016-02-24 16:10:32 +00:00
|
|
|
intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit))
|
2016-02-24 10:10:51 +00:00
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
best_cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
best_bitcost = bitcost;
|
|
|
|
best_index = num_cand;
|
|
|
|
|
2015-10-12 11:56:59 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Select starting point from among merge candidates. These should include
|
|
|
|
// both mv_cand vectors and (0, 0).
|
|
|
|
for (i = 0; i < num_cand; ++i) {
|
|
|
|
if (merge_cand[i].dir == 3) continue;
|
|
|
|
mv.x = merge_cand[i].mv[merge_cand[i].dir - 1][0] >> 2;
|
|
|
|
mv.y = merge_cand[i].mv[merge_cand[i].dir - 1][1] >> 2;
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv.x, mv.y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-10-12 11:56:59 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (best_index < num_cand) {
|
|
|
|
mv.x = merge_cand[best_index].mv[merge_cand[best_index].dir - 1][0] >> 2;
|
|
|
|
mv.y = merge_cand[best_index].mv[merge_cand[best_index].dir - 1][1] >> 2;
|
|
|
|
} else {
|
|
|
|
mv.x = mv_in_out->x >> 2;
|
|
|
|
mv.y = mv_in_out->y >> 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search the initial 7 points of the hexagon.
|
|
|
|
best_index = 0;
|
|
|
|
for (i = 0; i < 7; ++i) {
|
|
|
|
const vector2d_t *pattern = &large_hexbs[i];
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
unsigned cost;
|
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-08-26 08:50:27 +00:00
|
|
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-10-12 11:56:59 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iteratively search the 3 new points around the best match, until the best
|
|
|
|
// match is in the center.
|
|
|
|
while (best_index != 0) {
|
|
|
|
unsigned start; // Starting point of the 3 offsets to be searched.
|
|
|
|
if (best_index == 1) {
|
|
|
|
start = 6;
|
|
|
|
} else if (best_index == 8) {
|
|
|
|
start = 1;
|
|
|
|
} else {
|
|
|
|
start = best_index - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move the center to the best match.
|
|
|
|
mv.x += large_hexbs[best_index].x;
|
|
|
|
mv.y += large_hexbs[best_index].y;
|
|
|
|
best_index = 0;
|
|
|
|
|
|
|
|
// Iterate through the next 3 points.
|
|
|
|
for (i = 0; i < 3; ++i) {
|
|
|
|
const vector2d_t *offset = &large_hexbs[start + i];
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
unsigned cost;
|
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-08-26 08:50:27 +00:00
|
|
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-10-12 11:56:59 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = start + i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
}
|
|
|
|
++offset;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Move the center to the best match.
|
|
|
|
mv.x += large_hexbs[best_index].x;
|
|
|
|
mv.y += large_hexbs[best_index].y;
|
|
|
|
best_index = 0;
|
|
|
|
|
|
|
|
// Do the final step of the search with a small pattern.
|
|
|
|
for (i = 1; i < 5; ++i) {
|
|
|
|
const vector2d_t *offset = &small_hexbs[i];
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!intmv_within_tile(state, orig, mv.x + offset->x, mv.y + offset->y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
unsigned cost;
|
|
|
|
{
|
2015-09-14 09:34:41 +00:00
|
|
|
PERFORMANCE_MEASURE_START(KVZ_PERF_SEARCHPX);
|
2015-08-26 08:50:27 +00:00
|
|
|
cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
2016-02-24 16:10:32 +00:00
|
|
|
width, height, -1);
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-10-12 11:56:59 +00:00
|
|
|
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + width, orig->y, orig->y + height,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + width,
|
2015-07-21 09:02:54 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
2015-10-12 11:56:59 +00:00
|
|
|
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y + height);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cost > 0 && cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Adjust the movement vector according to the final best match.
|
|
|
|
mv.x += small_hexbs[best_index].x;
|
|
|
|
mv.y += small_hexbs[best_index].y;
|
|
|
|
|
|
|
|
// Return final movement vector in quarter-pixel precision.
|
|
|
|
mv_in_out->x = mv.x << 2;
|
|
|
|
mv_in_out->y = mv.y << 2;
|
|
|
|
|
|
|
|
*bitcost_out = best_bitcost;
|
|
|
|
|
|
|
|
return best_cost;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-11-20 07:50:01 +00:00
|
|
|
#define IME_FULL_SEARCH_RADIUS 32
|
|
|
|
static unsigned search_mv_full(const encoder_state_t * const state,
|
|
|
|
unsigned width, unsigned height,
|
|
|
|
const kvz_picture *pic, const kvz_picture *ref,
|
|
|
|
const vector2d_t *orig, vector2d_t *mv_in_out,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
2015-07-21 09:02:54 +00:00
|
|
|
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out)
|
|
|
|
{
|
2015-11-20 07:50:01 +00:00
|
|
|
vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
2015-07-21 09:02:54 +00:00
|
|
|
unsigned best_cost = UINT32_MAX;
|
|
|
|
uint32_t best_bitcost = 0, bitcost;
|
2015-11-20 07:50:01 +00:00
|
|
|
const int max_lcu_below = state->encoder_control->owf ? 1 : -1;
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-20 07:50:01 +00:00
|
|
|
int (*calc_mvd)(const encoder_state_t * const,
|
|
|
|
int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) =
|
|
|
|
state->encoder_control->cfg->mv_rdo ? kvz_calc_mvd_cost_cabac : calc_mvd_cost;
|
|
|
|
|
|
|
|
/*if (abs(mv.x) > IME_FULL_SEARCH_RADIUS || abs(mv.y) > IME_FULL_SEARCH_RADIUS) {
|
2015-07-21 09:02:54 +00:00
|
|
|
best_cost = calc_sad(pic, ref, orig->x, orig->y,
|
|
|
|
orig->x, orig->y,
|
|
|
|
block_width, block_width);
|
|
|
|
mv.x = 0;
|
|
|
|
mv.y = 0;
|
|
|
|
}*/
|
|
|
|
|
2015-11-20 07:50:01 +00:00
|
|
|
vector2d_t min_mv = {
|
|
|
|
mv.x - IME_FULL_SEARCH_RADIUS,
|
|
|
|
mv.y - IME_FULL_SEARCH_RADIUS,
|
|
|
|
};
|
|
|
|
vector2d_t max_mv = {
|
|
|
|
mv.x + IME_FULL_SEARCH_RADIUS,
|
|
|
|
mv.y + IME_FULL_SEARCH_RADIUS,
|
|
|
|
};
|
|
|
|
|
|
|
|
for (int y = min_mv.y; y < max_mv.y; ++y) {
|
|
|
|
for (int x = min_mv.x; x < max_mv.x; ++x) {
|
|
|
|
unsigned cost = kvz_image_calc_sad(pic, ref, orig->x, orig->y,
|
|
|
|
orig->x + x,
|
|
|
|
orig->y + y,
|
|
|
|
width, height,
|
|
|
|
max_lcu_below);
|
|
|
|
cost += calc_mvd(state, x, y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
mv.x = x;
|
|
|
|
mv.y = y;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mv_in_out->x = mv.x << 2;
|
|
|
|
mv_in_out->y = mv.y << 2;
|
|
|
|
|
|
|
|
*bitcost_out = best_bitcost;
|
|
|
|
|
|
|
|
return best_cost;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Do fractional motion estimation
|
|
|
|
*
|
2015-12-08 11:21:01 +00:00
|
|
|
* \param width width of the block
|
|
|
|
* \param height height of the block
|
2015-07-21 09:02:54 +00:00
|
|
|
* \param pic Picture motion vector is searched for.
|
|
|
|
* \param ref Picture motion vector is searched from.
|
|
|
|
* \param orig Top left corner of the searched for block.
|
|
|
|
* \param mv_in_out Predicted mv in and best out. Quarter pixel precision.
|
|
|
|
*
|
|
|
|
* \returns Cost of the motion vector.
|
|
|
|
*
|
|
|
|
* Algoritm first searches 1/2-pel positions around integer mv and after best match is found,
|
|
|
|
* refines the search by searching best 1/4-pel postion around best 1/2-pel position.
|
|
|
|
*/
|
|
|
|
static unsigned search_frac(const encoder_state_t * const state,
|
2015-12-08 11:21:01 +00:00
|
|
|
unsigned width, unsigned height,
|
2015-07-21 09:02:54 +00:00
|
|
|
const kvz_picture *pic, const kvz_picture *ref,
|
|
|
|
const vector2d_t *orig, vector2d_t *mv_in_out,
|
|
|
|
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out)
|
|
|
|
{
|
2015-07-21 12:45:57 +00:00
|
|
|
// Map indexes to relative coordinates in the following way:
|
|
|
|
// 6 7 8
|
|
|
|
// 3 4 5
|
|
|
|
// 0 1 2
|
|
|
|
static const vector2d_t square[9] = {
|
|
|
|
{ -1, 1 }, { 0, 1 }, { 1, 1 },
|
|
|
|
{ -1, 0 }, { 0, 0 }, { 1, 0 },
|
|
|
|
{ -1, -1 }, { 0, -1 }, { 1, -1 }
|
|
|
|
};
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2016-02-24 16:10:32 +00:00
|
|
|
int wpp_limit = -1;
|
|
|
|
if (state->encoder_control->owf) {
|
|
|
|
wpp_limit = 2 * LCU_WIDTH - orig->x % LCU_WIDTH;
|
|
|
|
if (state->encoder_control->deblock_enable) {
|
|
|
|
// Strong deblock filter modifies 3 pixels.
|
|
|
|
wpp_limit -= 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
//Set mv to halfpel precision
|
|
|
|
vector2d_t mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
|
|
|
unsigned best_cost = UINT32_MAX;
|
|
|
|
uint32_t best_bitcost = 0, bitcost;
|
|
|
|
unsigned i;
|
2016-02-24 10:10:51 +00:00
|
|
|
unsigned best_index = 4;
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
unsigned cost = 0;
|
|
|
|
|
|
|
|
vector2d_t halfpel_offset;
|
|
|
|
|
|
|
|
#define FILTER_SIZE 8
|
|
|
|
#define HALF_FILTER (FILTER_SIZE>>1)
|
|
|
|
|
2015-08-17 08:12:49 +00:00
|
|
|
kvz_extended_block src = { 0, 0, 0 };
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
//destination buffer for interpolation
|
2015-12-08 11:21:01 +00:00
|
|
|
int dst_stride = (width + 1) * 4;
|
2015-07-21 09:02:54 +00:00
|
|
|
kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
|
|
|
|
kvz_pixel* dst_off = &dst[dst_stride*4+4];
|
|
|
|
|
2015-11-05 10:24:03 +00:00
|
|
|
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1,
|
2015-07-21 09:02:54 +00:00
|
|
|
state->tile->lcu_offset_x * LCU_WIDTH,
|
|
|
|
state->tile->lcu_offset_y * LCU_WIDTH,
|
2015-12-08 11:21:01 +00:00
|
|
|
ref->y, ref->width, ref->height, FILTER_SIZE, width+1, height+1, &src);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-12-08 11:21:01 +00:00
|
|
|
kvz_filter_inter_quarterpel_luma(state->encoder_control, src.orig_topleft, src.stride, width+1,
|
|
|
|
height+1, dst, dst_stride, 1, 1);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
if (src.malloc_used) free(src.buffer);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
//Set mv to half-pixel precision
|
|
|
|
mv.x <<= 1;
|
|
|
|
mv.y <<= 1;
|
|
|
|
|
2015-08-10 14:19:31 +00:00
|
|
|
kvz_pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
|
|
|
kvz_pixel tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
2015-12-08 11:21:01 +00:00
|
|
|
kvz_pixels_blit(pic->y + orig->y*pic->width + orig->x, tmp_pic, width, height, pic->stride, width);
|
2015-08-10 14:19:31 +00:00
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
// Search halfpel positions around best integer mv
|
|
|
|
for (i = 0; i < 9; ++i) {
|
|
|
|
const vector2d_t *pattern = &square[i];
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!fracmv_within_tile(state, orig, (mv.x + pattern->x) << 1, (mv.y + pattern->y) << 1, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
int y,x;
|
2015-12-08 11:21:01 +00:00
|
|
|
for(y = 0; y < height; ++y) {
|
2015-07-21 09:02:54 +00:00
|
|
|
int dst_y = y*4+pattern->y*2;
|
2015-12-08 11:21:01 +00:00
|
|
|
for(x = 0; x < width; ++x) {
|
2015-07-21 09:02:54 +00:00
|
|
|
int dst_x = x*4+pattern->x*2;
|
2015-12-08 11:21:01 +00:00
|
|
|
tmp_filtered[y*width+x] = dst_off[dst_y*dst_stride+dst_x];
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-08 11:21:01 +00:00
|
|
|
cost = kvz_satd_any_size(width, height,
|
|
|
|
tmp_pic, width,
|
|
|
|
tmp_filtered, width);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-24 10:10:51 +00:00
|
|
|
// Move search to best_index
|
2015-07-21 09:02:54 +00:00
|
|
|
mv.x += square[best_index].x;
|
|
|
|
mv.y += square[best_index].y;
|
|
|
|
halfpel_offset.x = square[best_index].x*2;
|
|
|
|
halfpel_offset.y = square[best_index].y*2;
|
2016-02-24 10:10:51 +00:00
|
|
|
best_index = 4;
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
//Set mv to quarterpel precision
|
|
|
|
mv.x <<= 1;
|
|
|
|
mv.y <<= 1;
|
|
|
|
|
|
|
|
//Search quarterpel points around best halfpel mv
|
|
|
|
for (i = 0; i < 9; ++i) {
|
|
|
|
const vector2d_t *pattern = &square[i];
|
2016-02-24 16:10:32 +00:00
|
|
|
if (!fracmv_within_tile(state, orig, mv.x + pattern->x, mv.y + pattern->y, width, wpp_limit)) {
|
2016-02-24 10:10:51 +00:00
|
|
|
continue;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
int y,x;
|
2015-12-08 11:21:01 +00:00
|
|
|
for(y = 0; y < height; ++y) {
|
2015-07-21 09:02:54 +00:00
|
|
|
int dst_y = y*4+halfpel_offset.y+pattern->y;
|
2015-12-08 11:21:01 +00:00
|
|
|
for(x = 0; x < width; ++x) {
|
2015-07-21 09:02:54 +00:00
|
|
|
int dst_x = x*4+halfpel_offset.x+pattern->x;
|
2015-12-08 11:21:01 +00:00
|
|
|
tmp_filtered[y*width+x] = dst_off[dst_y*dst_stride+dst_x];
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-08 11:21:01 +00:00
|
|
|
cost = kvz_satd_any_size(width, height,
|
|
|
|
tmp_pic, width,
|
|
|
|
tmp_filtered, width);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
if (cost < best_cost) {
|
|
|
|
best_cost = cost;
|
|
|
|
best_index = i;
|
|
|
|
best_bitcost = bitcost;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//Set mv to best final best match
|
|
|
|
mv.x += square[best_index].x;
|
|
|
|
mv.y += square[best_index].y;
|
|
|
|
|
|
|
|
mv_in_out->x = mv.x;
|
|
|
|
mv_in_out->y = mv.y;
|
|
|
|
|
|
|
|
*bitcost_out = best_bitcost;
|
|
|
|
|
|
|
|
return best_cost;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-10-07 10:44:19 +00:00
|
|
|
/**
|
|
|
|
* \brief Perform inter search for a single reference frame.
|
|
|
|
*/
|
2015-11-03 08:32:37 +00:00
|
|
|
static void search_pu_inter_ref(const encoder_state_t * const state,
|
|
|
|
int x, int y,
|
|
|
|
int width, int height,
|
|
|
|
int depth,
|
2015-10-07 10:44:19 +00:00
|
|
|
lcu_t *lcu, cu_info_t *cur_cu,
|
|
|
|
int16_t mv_cand[2][2],
|
|
|
|
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t num_cand,
|
|
|
|
unsigned ref_idx,
|
|
|
|
uint32_t(*get_mvd_cost)(vector2d_t *, cabac_data_t*))
|
|
|
|
{
|
|
|
|
const int x_cu = x >> 3;
|
|
|
|
const int y_cu = y >> 3;
|
|
|
|
const videoframe_t * const frame = state->tile->frame;
|
|
|
|
kvz_picture *ref_image = state->global->ref->images[ref_idx];
|
|
|
|
uint32_t temp_bitcost = 0;
|
|
|
|
uint32_t temp_cost = 0;
|
|
|
|
vector2d_t orig, mvd;
|
|
|
|
int32_t merged = 0;
|
|
|
|
uint8_t cu_mv_cand = 0;
|
|
|
|
int8_t merge_idx = 0;
|
|
|
|
int8_t ref_list = state->global->refmap[ref_idx].list-1;
|
|
|
|
int8_t temp_ref_idx = cur_cu->inter.mv_ref[ref_list];
|
|
|
|
orig.x = x_cu * CU_MIN_SIZE_PIXELS;
|
|
|
|
orig.y = y_cu * CU_MIN_SIZE_PIXELS;
|
|
|
|
// Get MV candidates
|
|
|
|
cur_cu->inter.mv_ref[ref_list] = ref_idx;
|
2015-11-03 08:32:37 +00:00
|
|
|
kvz_inter_get_mv_cand(state, x, y, width, height, mv_cand, cur_cu, lcu, ref_list);
|
2015-10-07 10:44:19 +00:00
|
|
|
cur_cu->inter.mv_ref[ref_list] = temp_ref_idx;
|
|
|
|
|
|
|
|
|
|
|
|
vector2d_t mv = { 0, 0 };
|
|
|
|
{
|
|
|
|
// Take starting point for MV search from previous frame.
|
|
|
|
// When temporal motion vector candidates are added, there is probably
|
|
|
|
// no point to this anymore, but for now it helps.
|
2015-11-03 08:32:37 +00:00
|
|
|
// TODO: Update this to work with SMP/AMP blocks.
|
|
|
|
int mid_x_cu = (x + (width >> 1)) / 8;
|
|
|
|
int mid_y_cu = (y + (height >> 1)) / 8;
|
2015-10-07 10:44:19 +00:00
|
|
|
cu_info_t *ref_cu = &state->global->ref->cu_arrays[ref_idx]->data[mid_x_cu + mid_y_cu * (frame->width_in_lcu << MAX_DEPTH)];
|
|
|
|
if (ref_cu->type == CU_INTER) {
|
|
|
|
if (ref_cu->inter.mv_dir & 1) {
|
|
|
|
mv.x = ref_cu->inter.mv[0][0];
|
|
|
|
mv.y = ref_cu->inter.mv[0][1];
|
|
|
|
} else {
|
|
|
|
mv.x = ref_cu->inter.mv[1][0];
|
|
|
|
mv.y = ref_cu->inter.mv[1][1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (state->encoder_control->cfg->ime_algorithm) {
|
|
|
|
case KVZ_IME_TZ:
|
2015-11-20 07:06:28 +00:00
|
|
|
temp_cost += tz_search(state,
|
|
|
|
width, height,
|
|
|
|
frame->source,
|
|
|
|
ref_image,
|
|
|
|
&orig,
|
|
|
|
&mv,
|
|
|
|
mv_cand,
|
|
|
|
merge_cand,
|
|
|
|
num_cand,
|
|
|
|
ref_idx,
|
|
|
|
&temp_bitcost);
|
2015-10-07 10:44:19 +00:00
|
|
|
break;
|
|
|
|
|
2015-11-20 07:50:01 +00:00
|
|
|
case KVZ_IME_FULL:
|
|
|
|
temp_cost += search_mv_full(state,
|
|
|
|
width, height,
|
|
|
|
frame->source,
|
|
|
|
ref_image,
|
|
|
|
&orig,
|
|
|
|
&mv,
|
|
|
|
mv_cand,
|
|
|
|
merge_cand,
|
|
|
|
num_cand,
|
|
|
|
ref_idx,
|
|
|
|
&temp_bitcost);
|
|
|
|
break;
|
|
|
|
|
2015-10-07 10:44:19 +00:00
|
|
|
default:
|
2015-10-12 11:56:59 +00:00
|
|
|
temp_cost += hexagon_search(state,
|
2015-11-03 08:32:37 +00:00
|
|
|
width, height,
|
2015-10-12 11:56:59 +00:00
|
|
|
frame->source,
|
|
|
|
ref_image,
|
|
|
|
&orig,
|
|
|
|
&mv,
|
|
|
|
mv_cand,
|
|
|
|
merge_cand,
|
|
|
|
num_cand,
|
|
|
|
ref_idx,
|
|
|
|
&temp_bitcost);
|
2015-10-07 10:44:19 +00:00
|
|
|
break;
|
2015-11-20 07:50:01 +00:00
|
|
|
}
|
|
|
|
|
2016-02-24 16:10:32 +00:00
|
|
|
assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width, -1));
|
|
|
|
|
2015-10-07 10:44:19 +00:00
|
|
|
if (state->encoder_control->cfg->fme_level > 0) {
|
2015-12-08 11:21:01 +00:00
|
|
|
temp_cost = search_frac(state,
|
|
|
|
width, height,
|
|
|
|
frame->source,
|
|
|
|
ref_image,
|
|
|
|
&orig,
|
|
|
|
&mv,
|
|
|
|
mv_cand,
|
|
|
|
merge_cand,
|
|
|
|
num_cand,
|
|
|
|
ref_idx,
|
|
|
|
&temp_bitcost);
|
2016-02-24 16:10:32 +00:00
|
|
|
assert(fracmv_within_tile(state, &orig, mv.x, mv.y, width, -1));
|
2015-10-07 10:44:19 +00:00
|
|
|
}
|
2016-02-24 10:10:51 +00:00
|
|
|
|
2015-10-07 10:44:19 +00:00
|
|
|
merged = 0;
|
|
|
|
// Check every candidate to find a match
|
|
|
|
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
|
|
|
|
if (merge_cand[merge_idx].dir != 3 &&
|
|
|
|
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == mv.x &&
|
|
|
|
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == mv.y &&
|
|
|
|
(uint32_t)merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
|
|
|
|
merged = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only check when candidates are different
|
|
|
|
if (!merged && (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) {
|
|
|
|
vector2d_t mvd_temp1, mvd_temp2;
|
|
|
|
int cand1_cost,cand2_cost;
|
|
|
|
|
|
|
|
mvd_temp1.x = mv.x - mv_cand[0][0];
|
|
|
|
mvd_temp1.y = mv.y - mv_cand[0][1];
|
|
|
|
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
|
|
|
|
|
|
|
|
mvd_temp2.x = mv.x - mv_cand[1][0];
|
|
|
|
mvd_temp2.y = mv.y - mv_cand[1][1];
|
|
|
|
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
|
|
|
|
|
|
|
|
// Select candidate 1 if it has lower cost
|
|
|
|
if (cand2_cost < cand1_cost) {
|
|
|
|
cu_mv_cand = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mvd.x = mv.x - mv_cand[cu_mv_cand][0];
|
|
|
|
mvd.y = mv.y - mv_cand[cu_mv_cand][1];
|
|
|
|
|
|
|
|
if(temp_cost < cur_cu->inter.cost) {
|
|
|
|
|
|
|
|
// Map reference index to L0/L1 pictures
|
|
|
|
cur_cu->inter.mv_dir = ref_list+1;
|
|
|
|
cur_cu->inter.mv_ref_coded[ref_list] = state->global->refmap[ref_idx].idx;
|
|
|
|
|
|
|
|
cur_cu->merged = merged;
|
|
|
|
cur_cu->merge_idx = merge_idx;
|
|
|
|
cur_cu->inter.mv_ref[ref_list] = ref_idx;
|
|
|
|
cur_cu->inter.mv[ref_list][0] = (int16_t)mv.x;
|
|
|
|
cur_cu->inter.mv[ref_list][1] = (int16_t)mv.y;
|
|
|
|
cur_cu->inter.mvd[ref_list][0] = (int16_t)mvd.x;
|
|
|
|
cur_cu->inter.mvd[ref_list][1] = (int16_t)mvd.y;
|
|
|
|
cur_cu->inter.cost = temp_cost;
|
|
|
|
cur_cu->inter.bitcost = temp_bitcost + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[ref_list];
|
|
|
|
cur_cu->inter.mv_cand[ref_list] = cu_mv_cand;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
/**
|
2015-11-03 08:32:37 +00:00
|
|
|
* \brief Update PU to have best modes at this depth.
|
|
|
|
*
|
|
|
|
* \param state encoder state
|
|
|
|
* \param x_cu x-coordinate of the containing CU
|
|
|
|
* \param y_cu y-coordinate of the containing CU
|
|
|
|
* \param depth depth of the CU in the quadtree
|
|
|
|
* \param part_mode partition mode of the CU
|
|
|
|
* \param i_pu index of the PU in the CU
|
|
|
|
* \param lcu containing LCU
|
|
|
|
*
|
|
|
|
* \return cost of the best mode
|
2015-07-21 09:02:54 +00:00
|
|
|
*/
|
2015-11-03 08:32:37 +00:00
|
|
|
static int search_pu_inter(const encoder_state_t * const state,
|
|
|
|
int x_cu, int y_cu,
|
|
|
|
int depth,
|
|
|
|
part_mode_t part_mode,
|
|
|
|
int i_pu,
|
|
|
|
lcu_t *lcu)
|
2015-07-21 09:02:54 +00:00
|
|
|
{
|
|
|
|
const videoframe_t * const frame = state->tile->frame;
|
2015-11-03 08:32:37 +00:00
|
|
|
const int width_cu = LCU_WIDTH >> depth;
|
|
|
|
const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
|
|
|
|
const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
|
|
|
|
const int width = PU_GET_W(part_mode, width_cu, i_pu);
|
|
|
|
const int height = PU_GET_H(part_mode, width_cu, i_pu);
|
|
|
|
|
|
|
|
// Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
|
|
|
|
// nRx2N partitions.
|
|
|
|
const bool merge_a1 = i_pu == 0 || width >= height;
|
|
|
|
// Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
|
|
|
|
// 2NxnD partitions.
|
|
|
|
const bool merge_b1 = i_pu == 0 || width <= height;
|
|
|
|
|
|
|
|
const int x_local = SUB_SCU(x);
|
|
|
|
const int y_local = SUB_SCU(y);
|
|
|
|
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
int16_t mv_cand[2][2];
|
|
|
|
// Search for merge mode candidate
|
|
|
|
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS];
|
|
|
|
// Get list of candidates
|
2015-10-12 11:44:03 +00:00
|
|
|
int16_t num_cand = kvz_inter_get_merge_cand(state,
|
|
|
|
x, y,
|
2015-11-03 08:32:37 +00:00
|
|
|
width, height,
|
|
|
|
merge_a1, merge_b1,
|
2015-10-12 11:44:03 +00:00
|
|
|
merge_cand,
|
|
|
|
lcu);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-05 12:31:37 +00:00
|
|
|
uint32_t(*get_mvd_cost)(vector2d_t *, cabac_data_t*) = get_mvd_coding_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
get_mvd_cost = kvz_get_mvd_coding_cost_cabac;
|
|
|
|
}
|
|
|
|
|
2015-12-04 10:28:26 +00:00
|
|
|
int max_px_below_lcu = -1;
|
2015-07-21 09:02:54 +00:00
|
|
|
if (state->encoder_control->owf) {
|
2015-12-04 10:28:26 +00:00
|
|
|
max_px_below_lcu = LCU_WIDTH;
|
|
|
|
if (state->encoder_control->fme_level > 0) {
|
|
|
|
// Fractional motion estimation can change the mv by at most 1 pixel.
|
|
|
|
max_px_below_lcu -= 1;
|
|
|
|
}
|
|
|
|
if (state->encoder_control->deblock_enable) {
|
|
|
|
// Strong deblock filter modifies 3 pixels.
|
|
|
|
max_px_below_lcu -= 3;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Default to candidate 0
|
|
|
|
cur_cu->inter.mv_cand[0] = 0;
|
|
|
|
cur_cu->inter.mv_cand[1] = 0;
|
|
|
|
|
|
|
|
cur_cu->inter.cost = UINT_MAX;
|
|
|
|
|
2015-11-03 08:32:37 +00:00
|
|
|
uint32_t ref_idx;
|
2015-07-21 09:02:54 +00:00
|
|
|
for (ref_idx = 0; ref_idx < state->global->ref->used_size; ref_idx++) {
|
2015-11-03 08:32:37 +00:00
|
|
|
search_pu_inter_ref(state,
|
|
|
|
x, y,
|
|
|
|
width, height,
|
|
|
|
depth,
|
2015-10-07 10:44:19 +00:00
|
|
|
lcu, cur_cu,
|
|
|
|
mv_cand, merge_cand, num_cand,
|
|
|
|
ref_idx,
|
|
|
|
get_mvd_cost);
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Search bi-pred positions
|
2015-09-08 10:57:15 +00:00
|
|
|
if (state->global->slicetype == KVZ_SLICE_B && state->encoder_control->cfg->bipred) {
|
2015-07-21 09:02:54 +00:00
|
|
|
lcu_t *templcu = MALLOC(lcu_t, 1);
|
2015-12-08 10:43:10 +00:00
|
|
|
unsigned cu_width = LCU_WIDTH >> depth;
|
2015-07-21 09:02:54 +00:00
|
|
|
#define NUM_PRIORITY_LIST 12;
|
|
|
|
static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
|
|
|
|
static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
|
|
|
|
uint8_t cutoff = num_cand;
|
2015-11-05 10:24:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
|
|
|
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
|
|
|
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
|
|
|
if (state->encoder_control->cfg->mv_rdo) {
|
|
|
|
calc_mvd = kvz_calc_mvd_cost_cabac;
|
|
|
|
}
|
|
|
|
|
2015-07-21 09:02:54 +00:00
|
|
|
for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) {
|
|
|
|
uint8_t i = priorityList0[idx];
|
|
|
|
uint8_t j = priorityList1[idx];
|
|
|
|
if (i >= num_cand || j >= num_cand) break;
|
|
|
|
|
|
|
|
// Find one L0 and L1 candidate according to the priority list
|
|
|
|
if ((merge_cand[i].dir & 0x1) && (merge_cand[j].dir & 0x2)) {
|
|
|
|
if (merge_cand[i].ref[0] != merge_cand[j].ref[1] ||
|
|
|
|
merge_cand[i].mv[0][0] != merge_cand[j].mv[1][0] ||
|
|
|
|
merge_cand[i].mv[0][1] != merge_cand[j].mv[1][1]) {
|
|
|
|
uint32_t bitcost[2];
|
|
|
|
uint32_t cost = 0;
|
|
|
|
int8_t cu_mv_cand = 0;
|
|
|
|
int16_t mv[2][2];
|
|
|
|
kvz_pixel tmp_block[64 * 64];
|
|
|
|
kvz_pixel tmp_pic[64 * 64];
|
|
|
|
// Force L0 and L1 references
|
|
|
|
if (state->global->refmap[merge_cand[i].ref[0]].list == 2 || state->global->refmap[merge_cand[j].ref[1]].list == 1) continue;
|
|
|
|
|
2015-08-10 15:49:12 +00:00
|
|
|
mv[0][0] = merge_cand[i].mv[0][0];
|
|
|
|
mv[0][1] = merge_cand[i].mv[0][1];
|
|
|
|
mv[1][0] = merge_cand[j].mv[1][0];
|
|
|
|
mv[1][1] = merge_cand[j].mv[1][1];
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
// Check boundaries when using owf to process multiple frames at the same time
|
2015-12-04 10:28:26 +00:00
|
|
|
if (max_px_below_lcu >= 0) {
|
2015-11-03 08:32:37 +00:00
|
|
|
// When SAO is off, row is considered reconstructed when the last LCU
|
|
|
|
// is done, although the bottom 2 pixels might still need deblocking.
|
|
|
|
// To work around this, add 2 luma pixels to the reach of the mv
|
|
|
|
// in order to avoid referencing those possibly non-deblocked pixels.
|
|
|
|
int mv_lcu_row_reach_1 = ((y+(mv[0][1]>>2)) + (LCU_WIDTH >> depth) - 1 + 2) / LCU_WIDTH;
|
|
|
|
int mv_lcu_row_reach_2 = ((y+(mv[1][1]>>2)) + (LCU_WIDTH >> depth) - 1 + 2) / LCU_WIDTH;
|
|
|
|
int cur_lcu_row = y / LCU_WIDTH;
|
|
|
|
if (mv_lcu_row_reach_1 > cur_lcu_row + max_px_below_lcu || mv_lcu_row_reach_2 > cur_lcu_row + max_px_below_lcu) {
|
|
|
|
continue;
|
|
|
|
}
|
2015-07-21 09:02:54 +00:00
|
|
|
}
|
|
|
|
|
2015-09-16 07:37:45 +00:00
|
|
|
kvz_inter_recon_lcu_bipred(state,
|
|
|
|
state->global->ref->images[merge_cand[i].ref[0]],
|
|
|
|
state->global->ref->images[merge_cand[j].ref[1]],
|
|
|
|
x, y,
|
2015-11-03 08:32:37 +00:00
|
|
|
width,
|
|
|
|
height,
|
2015-09-16 07:37:45 +00:00
|
|
|
mv,
|
|
|
|
templcu);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-03 08:32:37 +00:00
|
|
|
for (int ypos = 0; ypos < height; ++ypos) {
|
|
|
|
int dst_y = ypos * width;
|
|
|
|
for (int xpos = 0; xpos < width; ++xpos) {
|
2015-09-02 07:55:19 +00:00
|
|
|
tmp_block[dst_y + xpos] = templcu->rec.y[
|
|
|
|
SUB_SCU(y + ypos) * LCU_WIDTH + SUB_SCU(x + xpos)];
|
2015-07-21 09:02:54 +00:00
|
|
|
tmp_pic[dst_y + xpos] = frame->source->y[x + xpos + (y + ypos)*frame->source->width];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-08 10:43:10 +00:00
|
|
|
cost = kvz_satd_any_size(cu_width, cu_width, tmp_pic, cu_width, tmp_block, cu_width);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
2015-11-05 10:24:03 +00:00
|
|
|
cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
|
|
|
|
cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
if (cost < cur_cu->inter.cost) {
|
|
|
|
|
|
|
|
cur_cu->inter.mv_dir = 3;
|
|
|
|
cur_cu->inter.mv_ref_coded[0] = state->global->refmap[merge_cand[i].ref[0]].idx;
|
|
|
|
cur_cu->inter.mv_ref_coded[1] = state->global->refmap[merge_cand[j].ref[1]].idx;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cur_cu->inter.mv_ref[0] = merge_cand[i].ref[0];
|
|
|
|
cur_cu->inter.mv_ref[1] = merge_cand[j].ref[1];
|
|
|
|
|
2015-08-10 15:49:12 +00:00
|
|
|
cur_cu->inter.mv[0][0] = merge_cand[i].mv[0][0];
|
|
|
|
cur_cu->inter.mv[0][1] = merge_cand[i].mv[0][1];
|
|
|
|
cur_cu->inter.mv[1][0] = merge_cand[j].mv[1][0];
|
|
|
|
cur_cu->inter.mv[1][1] = merge_cand[j].mv[1][1];
|
2015-07-21 09:02:54 +00:00
|
|
|
cur_cu->merged = 0;
|
|
|
|
|
|
|
|
// Check every candidate to find a match
|
|
|
|
for(int merge_idx = 0; merge_idx < num_cand; merge_idx++) {
|
|
|
|
if (
|
|
|
|
merge_cand[merge_idx].mv[0][0] == cur_cu->inter.mv[0][0] &&
|
|
|
|
merge_cand[merge_idx].mv[0][1] == cur_cu->inter.mv[0][1] &&
|
|
|
|
merge_cand[merge_idx].mv[1][0] == cur_cu->inter.mv[1][0] &&
|
|
|
|
merge_cand[merge_idx].mv[1][1] == cur_cu->inter.mv[1][1] &&
|
|
|
|
merge_cand[merge_idx].ref[0] == cur_cu->inter.mv_ref[0] &&
|
|
|
|
merge_cand[merge_idx].ref[1] == cur_cu->inter.mv_ref[1]) {
|
|
|
|
cur_cu->merged = 1;
|
|
|
|
cur_cu->merge_idx = merge_idx;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Each motion vector has its own candidate
|
|
|
|
for (int reflist = 0; reflist < 2; reflist++) {
|
|
|
|
cu_mv_cand = 0;
|
2015-11-03 08:32:37 +00:00
|
|
|
kvz_inter_get_mv_cand(state, x, y, width, height, mv_cand, cur_cu, lcu, reflist);
|
2015-07-21 09:02:54 +00:00
|
|
|
if ((mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1])) {
|
|
|
|
vector2d_t mvd_temp1, mvd_temp2;
|
|
|
|
int cand1_cost, cand2_cost;
|
|
|
|
|
|
|
|
mvd_temp1.x = cur_cu->inter.mv[reflist][0] - mv_cand[0][0];
|
|
|
|
mvd_temp1.y = cur_cu->inter.mv[reflist][1] - mv_cand[0][1];
|
2015-11-05 12:31:37 +00:00
|
|
|
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
mvd_temp2.x = cur_cu->inter.mv[reflist][0] - mv_cand[1][0];
|
|
|
|
mvd_temp2.y = cur_cu->inter.mv[reflist][1] - mv_cand[1][1];
|
2015-11-05 12:31:37 +00:00
|
|
|
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
|
2015-07-21 09:02:54 +00:00
|
|
|
|
|
|
|
// Select candidate 1 if it has lower cost
|
|
|
|
if (cand2_cost < cand1_cost) {
|
|
|
|
cu_mv_cand = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cur_cu->inter.mvd[reflist][0] = cur_cu->inter.mv[reflist][0] - mv_cand[cu_mv_cand][0];
|
|
|
|
cur_cu->inter.mvd[reflist][1] = cur_cu->inter.mv[reflist][1] - mv_cand[cu_mv_cand][1];
|
|
|
|
cur_cu->inter.mv_cand[reflist] = cu_mv_cand;
|
|
|
|
}
|
|
|
|
cur_cu->inter.cost = cost;
|
|
|
|
cur_cu->inter.bitcost = bitcost[0] + bitcost[1] + cur_cu->inter.mv_dir - 1 + cur_cu->inter.mv_ref_coded[0] + cur_cu->inter.mv_ref_coded[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
FREE_POINTER(templcu);
|
|
|
|
}
|
|
|
|
|
|
|
|
return cur_cu->inter.cost;
|
|
|
|
}
|
2015-11-03 08:32:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Update CU to have best modes at this depth.
|
|
|
|
*
|
|
|
|
* Only searches the 2Nx2N partition mode.
|
|
|
|
*
|
|
|
|
* \param state encoder state
|
|
|
|
* \param x x-coordinate of the CU
|
|
|
|
* \param y y-coordinate of the CU
|
|
|
|
* \param depth depth of the CU in the quadtree
|
|
|
|
* \param lcu containing LCU
|
|
|
|
*
|
|
|
|
* \return cost of the best mode
|
|
|
|
*/
|
|
|
|
int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int depth, lcu_t *lcu)
|
|
|
|
{
|
|
|
|
return search_pu_inter(state, x, y, depth, SIZE_2Nx2N, 0, lcu);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Update CU to have best modes at this depth.
|
|
|
|
*
|
|
|
|
* Only searches the given partition mode.
|
|
|
|
*
|
|
|
|
* \param state encoder state
|
|
|
|
* \param x x-coordinate of the CU
|
|
|
|
* \param y y-coordinate of the CU
|
|
|
|
* \param depth depth of the CU in the quadtree
|
|
|
|
* \param part_mode partition mode to search
|
|
|
|
* \param lcu containing LCU
|
|
|
|
*
|
|
|
|
* \return cost of the best mode
|
|
|
|
*/
|
|
|
|
int kvz_search_cu_smp(const encoder_state_t * const state,
|
|
|
|
int x, int y,
|
|
|
|
int depth,
|
|
|
|
part_mode_t part_mode,
|
|
|
|
lcu_t *lcu)
|
|
|
|
{
|
|
|
|
const int num_pu = kvz_part_mode_num_parts[part_mode];
|
|
|
|
const int width_scu = (LCU_WIDTH >> depth) >> MAX_DEPTH;
|
|
|
|
const int y_scu = SUB_SCU(y) >> MAX_DEPTH;
|
|
|
|
const int x_scu = SUB_SCU(x) >> MAX_DEPTH;
|
|
|
|
|
|
|
|
int cost = 0;
|
|
|
|
for (int i = 0; i < num_pu; ++i) {
|
|
|
|
const int x_pu = PU_GET_X(part_mode, width_scu, x_scu, i);
|
|
|
|
const int y_pu = PU_GET_Y(part_mode, width_scu, y_scu, i);
|
|
|
|
const int width_pu = PU_GET_W(part_mode, width_scu, i);
|
|
|
|
const int height_pu = PU_GET_H(part_mode, width_scu, i);
|
|
|
|
cu_info_t *cur_pu = LCU_GET_CU(lcu, x_pu, y_pu);
|
|
|
|
|
|
|
|
cur_pu->type = CU_INTER;
|
|
|
|
cur_pu->part_size = part_mode;
|
|
|
|
cur_pu->depth = depth;
|
|
|
|
|
|
|
|
cost += search_pu_inter(state, x, y, depth, part_mode, i, lcu);
|
|
|
|
|
|
|
|
for (int y = y_pu; y < y_pu + height_pu; ++y) {
|
|
|
|
for (int x = x_pu; x < x_pu + width_pu; ++x) {
|
|
|
|
cu_info_t *scu = LCU_GET_CU(lcu, x, y);
|
|
|
|
scu->type = CU_INTER;
|
|
|
|
memcpy(&scu->inter, &cur_pu->inter, sizeof(cur_pu->inter));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return cost;
|
|
|
|
}
|