mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Implemented inter search and reconstruction with lcu structure
This commit is contained in:
parent
b5fca8b206
commit
1f82239367
37
src/inter.c
37
src/inter.c
|
@ -71,15 +71,15 @@ void inter_set_block(picture* pic, uint32_t x_cu, uint32_t y_cu, uint8_t depth,
|
|||
* \param ypos block y position
|
||||
* \param width block width
|
||||
* \param mv[2] motion vector
|
||||
* \param dst destination picture
|
||||
* \param lcu destination lcu
|
||||
* \returns Void
|
||||
*/
|
||||
void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], picture *dst)
|
||||
void inter_recon_lcu(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu)
|
||||
{
|
||||
int x,y,coord_x,coord_y;
|
||||
int16_t mv[2] = { mv_param[0], mv_param[1] };
|
||||
|
||||
int32_t dst_width_c = dst->width>>1; //!< Destination picture width in chroma pixels
|
||||
int32_t dst_width_c = LCU_WIDTH>>1; //!< Destination picture width in chroma pixels
|
||||
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
|
||||
|
||||
// negative overflow flag
|
||||
|
@ -146,8 +146,10 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
// Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x
|
||||
for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) {
|
||||
for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) {
|
||||
dst->u_recdata[y*dst_width_c + x] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||
dst->v_recdata[y*dst_width_c + x] = (uint8_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (uint8_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -157,6 +159,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
// Copy Luma with boundary checking
|
||||
for (y = ypos; y < ypos + width; y++) {
|
||||
for (x = xpos; x < xpos + width; x++) {
|
||||
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
||||
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
||||
|
||||
coord_x = x + mv[0];
|
||||
coord_y = y + mv[1];
|
||||
overflow_neg_x = (coord_x < 0)?1:0;
|
||||
|
@ -178,9 +183,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
} else if (overflow_pos_y) {
|
||||
coord_y = ref->height - 1;
|
||||
}
|
||||
|
||||
|
||||
// set destination to (corrected) pixel value from the reference
|
||||
dst->y_recdata[y * dst->width + x] = ref->y_recdata[coord_y*ref->width + coord_x];
|
||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y*ref->width + coord_x];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,6 +194,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
// TODO: chroma fractional pixel interpolation
|
||||
for (y = ypos>>1; y < (ypos + width)>>1; y++) {
|
||||
for (x = xpos>>1; x < (xpos + width)>>1; x++) {
|
||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||
|
||||
coord_x = x + (mv[0]>>1);
|
||||
coord_y = y + (mv[1]>>1);
|
||||
|
||||
|
@ -213,8 +221,8 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
}
|
||||
|
||||
// set destinations to (corrected) pixel value from the reference
|
||||
dst->u_recdata[y*dst_width_c + x] = ref->u_recdata[coord_y*ref_width_c + coord_x];
|
||||
dst->v_recdata[y*dst_width_c + x] = ref->v_recdata[coord_y*ref_width_c + coord_x];
|
||||
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y*ref_width_c + coord_x];
|
||||
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y*ref_width_c + coord_x];
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -222,9 +230,12 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
} else { //If no overflow, we can copy without checking boundaries
|
||||
// Copy Luma
|
||||
for (y = ypos; y < ypos + width; y++) {
|
||||
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
||||
coord_y = (y + mv[1]) * ref->width; // pre-calculate
|
||||
for (x = xpos; x < xpos + width; x++) {
|
||||
dst->y_recdata[y * dst->width + x] = ref->y_recdata[coord_y + x + mv[0]];
|
||||
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
||||
|
||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + x + mv[0]];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -232,10 +243,12 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
|
|||
// Copy Chroma
|
||||
// TODO: chroma fractional pixel interpolation
|
||||
for (y = ypos>>1; y < (ypos + width)>>1; y++) {
|
||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||
coord_y = (y + (mv[1]>>1)) * ref_width_c; // pre-calculate
|
||||
for (x = xpos>>1; x < (xpos + width)>>1; x++) {
|
||||
dst->u_recdata[y*dst_width_c + x] = ref->u_recdata[coord_y + x + (mv[0]>>1)];
|
||||
dst->v_recdata[y*dst_width_c + x] = ref->v_recdata[coord_y + x + (mv[0]>>1)];
|
||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + x + (mv[0]>>1)];
|
||||
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + x + (mv[0]>>1)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
|
||||
void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu);
|
||||
void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv[2], picture* dst);
|
||||
void inter_recon_lcu(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu);
|
||||
|
||||
void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth,
|
||||
cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1);
|
||||
|
|
61
src/search.c
61
src/search.c
|
@ -279,22 +279,33 @@ static unsigned search_mv_full(unsigned depth,
|
|||
}
|
||||
#endif
|
||||
|
||||
static void search_inter(encoder_control *encoder, uint16_t x_ctb,
|
||||
uint16_t y_ctb, uint8_t depth)
|
||||
|
||||
|
||||
/**
|
||||
* Update lcu to have best modes at this depth.
|
||||
* \return Cost of best mode.
|
||||
*/
|
||||
static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lcu_t *lcu)
|
||||
{
|
||||
picture *cur_pic = encoder->in.cur_pic;
|
||||
int32_t ref_idx = 0;
|
||||
cu_info *cur_cu = &cur_pic->cu_array[depth][x_ctb + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)];
|
||||
int x_local = (x&0x3f), y_local = (y&0x3f);
|
||||
int x_cu = x>>3;
|
||||
int y_cu = y>>3;
|
||||
int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH;
|
||||
|
||||
cu_info *cur_cu = &lcu->cu[cu_pos];
|
||||
|
||||
cur_cu->inter.cost = UINT_MAX;
|
||||
|
||||
for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) {
|
||||
picture *ref_pic = encoder->ref->pics[ref_idx];
|
||||
unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
|
||||
cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_ctb * width_in_scu + x_ctb];
|
||||
cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu];
|
||||
uint32_t temp_cost = (int)(g_lambda_cost[encoder->QP] * ref_idx);
|
||||
vector2d orig, mv;
|
||||
orig.x = x_ctb * CU_MIN_SIZE_PIXELS;
|
||||
orig.y = y_ctb * CU_MIN_SIZE_PIXELS;
|
||||
orig.x = x_cu * CU_MIN_SIZE_PIXELS;
|
||||
orig.y = y_cu * CU_MIN_SIZE_PIXELS;
|
||||
mv.x = 0;
|
||||
mv.y = 0;
|
||||
if (ref_cu->type == CU_INTER) {
|
||||
|
@ -303,7 +314,7 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
|
|||
}
|
||||
|
||||
#if SEARCH_MV_FULL_RADIUS
|
||||
cur_cu->inter.cost = search_mv_full(depth, cur_pic, ref_pic, &orig, &mv);
|
||||
temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv);
|
||||
#else
|
||||
temp_cost += hexagon_search(depth, cur_pic, ref_pic, &orig, &mv);
|
||||
#endif
|
||||
|
@ -316,9 +327,9 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
|
|||
}
|
||||
}
|
||||
|
||||
return cur_cu->inter.cost;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Copy all non-reference CU data from depth+1 to depth.
|
||||
*/
|
||||
|
@ -453,15 +464,25 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Update lcu to have best modes at this depth.
|
||||
* \return Cost of best mode.
|
||||
*/
|
||||
static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lcu_t lcu)
|
||||
static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cur_cu)
|
||||
{
|
||||
int cost = MAX_INT;
|
||||
return cost;
|
||||
const int width_cu = LCU_CU_WIDTH >> depth;
|
||||
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
|
||||
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
|
||||
cu_info *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
|
||||
int x, y;
|
||||
|
||||
// Set mode in every CU covered by part_mode in this depth.
|
||||
for (y = y_cu; y < y_cu + width_cu; ++y) {
|
||||
for (x = x_cu; x < x_cu + width_cu; ++x) {
|
||||
cu_info *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
|
||||
cu->depth = depth;
|
||||
cu->type = CU_INTER;
|
||||
cu->merged = cur_cu->merged;
|
||||
cu->skipped = cur_cu->skipped;
|
||||
memcpy(&cu->inter, &cur_cu->inter, sizeof(cu_info_inter));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -575,6 +596,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
|
|||
|
||||
cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
|
||||
// Assign correct depth
|
||||
cur_cu->skipped = 0; cur_cu->merged = 0;
|
||||
cur_cu->depth = depth; cur_cu->tr_depth = depth;
|
||||
cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N;
|
||||
// If the CU is completely inside the frame at this depth, search for
|
||||
|
@ -588,8 +610,8 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
|
|||
depth >= MIN_INTER_SEARCH_DEPTH &&
|
||||
depth <= MAX_INTER_SEARCH_DEPTH)
|
||||
{
|
||||
int mode_cost = search_cu_inter(encoder, x, y, depth, work_tree[depth]);
|
||||
if (mode_cost < cost) {
|
||||
int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]);
|
||||
if (0&&mode_cost < cost) {
|
||||
cost = mode_cost;
|
||||
cur_cu->type = CU_INTER;
|
||||
}
|
||||
|
@ -611,7 +633,8 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
|
|||
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size);
|
||||
intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height);
|
||||
} else if (cur_cu->type == CU_INTER) {
|
||||
// TODO
|
||||
lcu_set_inter(&work_tree[depth], x, y, depth, cur_cu);
|
||||
inter_recon_lcu(encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue