diff --git a/src/image.c b/src/image.c index fd23f3ee..2528b3c4 100644 --- a/src/image.c +++ b/src/image.c @@ -451,8 +451,15 @@ static unsigned image_interpolated_sad(const kvz_picture *pic, const kvz_picture * * \returns */ -unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y, - int block_width, int block_height, int max_px_below_lcu) { +unsigned kvz_image_calc_sad(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height, + int max_px_below_lcu) { assert(pic_x >= 0 && pic_x <= pic->width - block_width); assert(pic_y >= 0 && pic_y <= pic->height - block_height); @@ -480,6 +487,55 @@ unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int } +/** +* \brief Calculate interpolated SATD between two blocks. +* +* \param pic Image for the block we are trying to find. +* \param ref Image where we are trying to find the block. +*/ +unsigned kvz_image_calc_satd(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height) +{ + assert(pic_x >= 0 && pic_x <= pic->width - block_width); + assert(pic_y >= 0 && pic_y <= pic->height - block_height); + + if (ref_x >= 0 && ref_x <= ref->width - block_width && + ref_y >= 0 && ref_y <= ref->height - block_height) + { + // Reference block is completely inside the frame, so just calculate the + // SAD directly. This is the most common case, which is why it's first. + const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; + const kvz_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x]; + return kvz_satd_any_size(block_width, + block_height, + pic_data, + pic->stride, + ref_data, + ref->stride) >> (KVZ_BIT_DEPTH - 8); + } else { + // Call a routine that knows how to interpolate pixels outside the frame. + // TODO: write interpolated SATD + unsigned sad = image_interpolated_sad(pic, + ref, + pic_x, + pic_y, + ref_x, + ref_y, + block_width, + block_height) >> (KVZ_BIT_DEPTH - 8); + return 2.4 * sad; + } +} + + + + /** * \brief BLock Image Transfer from one buffer to another. * diff --git a/src/image.h b/src/image.h index 2475537c..01e670a4 100644 --- a/src/image.h +++ b/src/image.h @@ -78,6 +78,16 @@ unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int int block_width, int block_height, int max_lcu_below); +unsigned kvz_image_calc_satd(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height); + + void kvz_pixels_blit(const kvz_pixel* orig, kvz_pixel *dst, unsigned width, unsigned height, unsigned orig_stride, unsigned dst_stride); diff --git a/src/search_inter.c b/src/search_inter.c index d10f4abc..b3a5eaef 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1245,8 +1245,6 @@ static void search_pu_inter_ref(encoder_state_t * const state, const videoframe_t * const frame = state->tile->frame; kvz_picture *ref_image = state->frame->ref->images[ref_idx]; const vector2d_t orig = { x, y }; - uint32_t temp_bitcost = 0; - uint32_t temp_cost = 0; int32_t merged = 0; uint8_t cu_mv_cand = 0; int8_t merge_idx = 0; @@ -1287,19 +1285,21 @@ static void search_pu_inter_ref(encoder_state_t * const state, default: break; } + uint32_t temp_cost = 0; + uint32_t temp_bitcost = 0; switch (state->encoder_control->cfg.ime_algorithm) { case KVZ_IME_TZ: - temp_cost += tz_search(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - &temp_bitcost); + temp_cost = tz_search(state, + width, height, + frame->source, + ref_image, + &orig, + &mv, + mv_cand, + merge_cand, + num_cand, + ref_idx, + &temp_bitcost); break; @@ -1308,32 +1308,32 @@ static void search_pu_inter_ref(encoder_state_t * const state, case KVZ_IME_FULL16: case KVZ_IME_FULL8: case KVZ_IME_FULL: - temp_cost += search_mv_full(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - search_range, - &temp_bitcost); + temp_cost = search_mv_full(state, + width, height, + frame->source, + ref_image, + &orig, + &mv, + mv_cand, + merge_cand, + num_cand, + ref_idx, + search_range, + &temp_bitcost); break; default: - temp_cost += hexagon_search(state, - width, height, - frame->source, - ref_image, - &orig, - &mv, - mv_cand, - merge_cand, - num_cand, - ref_idx, - &temp_bitcost); + temp_cost = hexagon_search(state, + width, height, + frame->source, + ref_image, + &orig, + &mv, + mv_cand, + merge_cand, + num_cand, + ref_idx, + &temp_bitcost); break; } @@ -1349,8 +1349,20 @@ static void search_pu_inter_ref(encoder_state_t * const state, num_cand, ref_idx, &temp_bitcost); + } else if (temp_cost < INT_MAX) { + // Recalculate inter cost with SATD. + temp_cost = kvz_image_calc_satd( + frame->source, + ref_image, + orig.x, + orig.y, + state->tile->offset_x + orig.x + (mv.x >> 2), + state->tile->offset_y + orig.y + (mv.y >> 2), + width, + height); + temp_cost += temp_bitcost * (int)(state->lambda_sqrt + 0.5); } - + merged = 0; // Check every candidate to find a match for(merge_idx = 0; merge_idx < num_cand; merge_idx++) { diff --git a/src/search_inter.h b/src/search_inter.h index 3486ec6e..e42ac667 100644 --- a/src/search_inter.h +++ b/src/search_inter.h @@ -73,4 +73,10 @@ void kvz_search_cu_smp(encoder_state_t * const state, double *inter_cost, uint32_t *inter_bitcost); + +unsigned kvz_inter_satd_cost(const encoder_state_t* state, + const lcu_t *lcu, + int x, + int y); + #endif // SEARCH_INTER_H_