Implemented inter search and reconstruction with lcu structure

This commit is contained in:
Marko Viitanen 2014-03-03 14:51:36 +02:00
parent b5fca8b206
commit 1f82239367
3 changed files with 68 additions and 32 deletions

View file

@ -71,15 +71,15 @@ void inter_set_block(picture* pic, uint32_t x_cu, uint32_t y_cu, uint8_t depth,
* \param ypos block y position * \param ypos block y position
* \param width block width * \param width block width
* \param mv[2] motion vector * \param mv[2] motion vector
* \param dst destination picture * \param lcu destination lcu
* \returns Void * \returns Void
*/ */
void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], picture *dst) void inter_recon_lcu(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu)
{ {
int x,y,coord_x,coord_y; int x,y,coord_x,coord_y;
int16_t mv[2] = { mv_param[0], mv_param[1] }; int16_t mv[2] = { mv_param[0], mv_param[1] };
int32_t dst_width_c = dst->width>>1; //!< Destination picture width in chroma pixels int32_t dst_width_c = LCU_WIDTH>>1; //!< Destination picture width in chroma pixels
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
// negative overflow flag // negative overflow flag
@ -146,8 +146,10 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
// Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x // Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x
for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) { for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) {
for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) { for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) {
dst->u_recdata[y*dst_width_c + x] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x]; int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
dst->v_recdata[y*dst_width_c + x] = (uint8_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x]; int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (uint8_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
} }
} }
} }
@ -157,6 +159,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
// Copy Luma with boundary checking // Copy Luma with boundary checking
for (y = ypos; y < ypos + width; y++) { for (y = ypos; y < ypos + width; y++) {
for (x = xpos; x < xpos + width; x++) { for (x = xpos; x < xpos + width; x++) {
int x_in_lcu = (x & ((LCU_WIDTH)-1));
int y_in_lcu = (y & ((LCU_WIDTH)-1));
coord_x = x + mv[0]; coord_x = x + mv[0];
coord_y = y + mv[1]; coord_y = y + mv[1];
overflow_neg_x = (coord_x < 0)?1:0; overflow_neg_x = (coord_x < 0)?1:0;
@ -180,7 +185,7 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
} }
// set destination to (corrected) pixel value from the reference // set destination to (corrected) pixel value from the reference
dst->y_recdata[y * dst->width + x] = ref->y_recdata[coord_y*ref->width + coord_x]; lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y*ref->width + coord_x];
} }
} }
@ -189,6 +194,9 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
// TODO: chroma fractional pixel interpolation // TODO: chroma fractional pixel interpolation
for (y = ypos>>1; y < (ypos + width)>>1; y++) { for (y = ypos>>1; y < (ypos + width)>>1; y++) {
for (x = xpos>>1; x < (xpos + width)>>1; x++) { for (x = xpos>>1; x < (xpos + width)>>1; x++) {
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
coord_x = x + (mv[0]>>1); coord_x = x + (mv[0]>>1);
coord_y = y + (mv[1]>>1); coord_y = y + (mv[1]>>1);
@ -213,8 +221,8 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
} }
// set destinations to (corrected) pixel value from the reference // set destinations to (corrected) pixel value from the reference
dst->u_recdata[y*dst_width_c + x] = ref->u_recdata[coord_y*ref_width_c + coord_x]; lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y*ref_width_c + coord_x];
dst->v_recdata[y*dst_width_c + x] = ref->v_recdata[coord_y*ref_width_c + coord_x]; lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y*ref_width_c + coord_x];
} }
} }
@ -222,9 +230,12 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
} else { //If no overflow, we can copy without checking boundaries } else { //If no overflow, we can copy without checking boundaries
// Copy Luma // Copy Luma
for (y = ypos; y < ypos + width; y++) { for (y = ypos; y < ypos + width; y++) {
int y_in_lcu = (y & ((LCU_WIDTH)-1));
coord_y = (y + mv[1]) * ref->width; // pre-calculate coord_y = (y + mv[1]) * ref->width; // pre-calculate
for (x = xpos; x < xpos + width; x++) { for (x = xpos; x < xpos + width; x++) {
dst->y_recdata[y * dst->width + x] = ref->y_recdata[coord_y + x + mv[0]]; int x_in_lcu = (x & ((LCU_WIDTH)-1));
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + x + mv[0]];
} }
} }
@ -232,10 +243,12 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in
// Copy Chroma // Copy Chroma
// TODO: chroma fractional pixel interpolation // TODO: chroma fractional pixel interpolation
for (y = ypos>>1; y < (ypos + width)>>1; y++) { for (y = ypos>>1; y < (ypos + width)>>1; y++) {
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
coord_y = (y + (mv[1]>>1)) * ref_width_c; // pre-calculate coord_y = (y + (mv[1]>>1)) * ref_width_c; // pre-calculate
for (x = xpos>>1; x < (xpos + width)>>1; x++) { for (x = xpos>>1; x < (xpos + width)>>1; x++) {
dst->u_recdata[y*dst_width_c + x] = ref->u_recdata[coord_y + x + (mv[0]>>1)]; int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
dst->v_recdata[y*dst_width_c + x] = ref->v_recdata[coord_y + x + (mv[0]>>1)]; lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + x + (mv[0]>>1)];
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + x + (mv[0]>>1)];
} }
} }
} }

View file

@ -31,7 +31,7 @@
void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu); void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu);
void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv[2], picture* dst); void inter_recon_lcu(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu);
void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth,
cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1); cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1);

View file

@ -279,22 +279,33 @@ static unsigned search_mv_full(unsigned depth,
} }
#endif #endif
static void search_inter(encoder_control *encoder, uint16_t x_ctb,
uint16_t y_ctb, uint8_t depth)
/**
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
*/
static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lcu_t *lcu)
{ {
picture *cur_pic = encoder->in.cur_pic; picture *cur_pic = encoder->in.cur_pic;
int32_t ref_idx = 0; int32_t ref_idx = 0;
cu_info *cur_cu = &cur_pic->cu_array[depth][x_ctb + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)]; int x_local = (x&0x3f), y_local = (y&0x3f);
int x_cu = x>>3;
int y_cu = y>>3;
int cu_pos = LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH;
cu_info *cur_cu = &lcu->cu[cu_pos];
cur_cu->inter.cost = UINT_MAX; cur_cu->inter.cost = UINT_MAX;
for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) { for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) {
picture *ref_pic = encoder->ref->pics[ref_idx]; picture *ref_pic = encoder->ref->pics[ref_idx];
unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu); unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_ctb * width_in_scu + x_ctb]; cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu];
uint32_t temp_cost = (int)(g_lambda_cost[encoder->QP] * ref_idx); uint32_t temp_cost = (int)(g_lambda_cost[encoder->QP] * ref_idx);
vector2d orig, mv; vector2d orig, mv;
orig.x = x_ctb * CU_MIN_SIZE_PIXELS; orig.x = x_cu * CU_MIN_SIZE_PIXELS;
orig.y = y_ctb * CU_MIN_SIZE_PIXELS; orig.y = y_cu * CU_MIN_SIZE_PIXELS;
mv.x = 0; mv.x = 0;
mv.y = 0; mv.y = 0;
if (ref_cu->type == CU_INTER) { if (ref_cu->type == CU_INTER) {
@ -303,7 +314,7 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
} }
#if SEARCH_MV_FULL_RADIUS #if SEARCH_MV_FULL_RADIUS
cur_cu->inter.cost = search_mv_full(depth, cur_pic, ref_pic, &orig, &mv); temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv);
#else #else
temp_cost += hexagon_search(depth, cur_pic, ref_pic, &orig, &mv); temp_cost += hexagon_search(depth, cur_pic, ref_pic, &orig, &mv);
#endif #endif
@ -316,9 +327,9 @@ static void search_inter(encoder_control *encoder, uint16_t x_ctb,
} }
} }
return cur_cu->inter.cost;
} }
/** /**
* Copy all non-reference CU data from depth+1 to depth. * Copy all non-reference CU data from depth+1 to depth.
*/ */
@ -453,15 +464,25 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
} }
} }
static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cur_cu)
/**
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
*/
static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lcu_t lcu)
{ {
int cost = MAX_INT; const int width_cu = LCU_CU_WIDTH >> depth;
return cost; const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
cu_info *const lcu_cu = &lcu->cu[LCU_CU_OFFSET];
int x, y;
// Set mode in every CU covered by part_mode in this depth.
for (y = y_cu; y < y_cu + width_cu; ++y) {
for (x = x_cu; x < x_cu + width_cu; ++x) {
cu_info *cu = &lcu_cu[x + y * LCU_T_CU_WIDTH];
cu->depth = depth;
cu->type = CU_INTER;
cu->merged = cur_cu->merged;
cu->skipped = cur_cu->skipped;
memcpy(&cu->inter, &cur_cu->inter, sizeof(cu_info_inter));
}
}
} }
@ -575,6 +596,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
// Assign correct depth // Assign correct depth
cur_cu->skipped = 0; cur_cu->merged = 0;
cur_cu->depth = depth; cur_cu->tr_depth = depth; cur_cu->depth = depth; cur_cu->tr_depth = depth;
cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N; cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N;
// If the CU is completely inside the frame at this depth, search for // If the CU is completely inside the frame at this depth, search for
@ -588,8 +610,8 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
depth >= MIN_INTER_SEARCH_DEPTH && depth >= MIN_INTER_SEARCH_DEPTH &&
depth <= MAX_INTER_SEARCH_DEPTH) depth <= MAX_INTER_SEARCH_DEPTH)
{ {
int mode_cost = search_cu_inter(encoder, x, y, depth, work_tree[depth]); int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]);
if (mode_cost < cost) { if (0&&mode_cost < cost) {
cost = mode_cost; cost = mode_cost;
cur_cu->type = CU_INTER; cur_cu->type = CU_INTER;
} }
@ -611,7 +633,8 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size); lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size);
intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height); intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height);
} else if (cur_cu->type == CU_INTER) { } else if (cur_cu->type == CU_INTER) {
// TODO lcu_set_inter(&work_tree[depth], x, y, depth, cur_cu);
inter_recon_lcu(encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
} }
} }