From 91cb0fbd45f820ea5c97efb37021a650d67d40af Mon Sep 17 00:00:00 2001 From: Pauli Oikkonen Date: Fri, 18 Jan 2019 17:32:52 +0200 Subject: [PATCH] Create strategy for directly obtaining pointer to constant-width SAD function --- src/search_inter.c | 9 ++++++- src/strategies/avx2/picture-avx2.c | 24 ++++++++++++++++++- .../sse41/reg_sad_pow2_widths-sse41.h | 7 ++++++ src/strategies/strategies-picture.c | 2 ++ src/strategies/strategies-picture.h | 17 +++++++++++++ 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/src/search_inter.c b/src/search_inter.c index 3910bcdd..31c2c525 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -78,6 +78,13 @@ typedef struct { * \brief Bit cost of best_mv */ uint32_t best_bitcost; + + /** + * \brief Possible optimized SAD implementation for the width, leave as + * NULL for arbitrary-width blocks + */ + optimized_sad_func_ptr_t optimized_sad; + } inter_search_info_t; @@ -1433,7 +1440,6 @@ static void search_pu_inter_bipred(inter_search_info_t *info, } } - /** * \brief Update PU to have best modes at this depth. * @@ -1486,6 +1492,7 @@ static void search_pu_inter(encoder_state_t * const state, .width = width, .height = height, .mvd_cost_func = cfg->mv_rdo ? kvz_calc_mvd_cost_cabac : calc_mvd_cost, + .optimized_sad = kvz_get_optimized_sad(width), }; // Search for merge mode candidates diff --git a/src/strategies/avx2/picture-avx2.c b/src/strategies/avx2/picture-avx2.c index d953fbb4..5aa7fe23 100644 --- a/src/strategies/avx2/picture-avx2.c +++ b/src/strategies/avx2/picture-avx2.c @@ -1255,8 +1255,29 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0, } } } -#endif //COMPILE_INTEL_AVX2 +static optimized_sad_func_ptr_t get_optimized_sad_avx2(int32_t width) +{ + if (width == 0) + return reg_sad_w0; + if (width == 4) + return reg_sad_w4; + if (width == 8) + return reg_sad_w8; + if (width == 12) + return reg_sad_w12; + if (width == 16) + return reg_sad_w16; + if (width == 24) + return reg_sad_w24; + if (width == 32) + return reg_sad_w32; + if (width == 64) + return reg_sad_w64; + else + return NULL; +} +#endif //COMPILE_INTEL_AVX2 int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth) { @@ -1290,6 +1311,7 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth) success &= kvz_strategyselector_register(opaque, "pixels_calc_ssd", "avx2", 40, &pixels_calc_ssd_avx2); success &= kvz_strategyselector_register(opaque, "inter_recon_bipred", "avx2", 40, &inter_recon_bipred_avx2); + success &= kvz_strategyselector_register(opaque, "get_optimized_sad", "avx2", 40, &get_optimized_sad_avx2); } #endif diff --git a/src/strategies/sse41/reg_sad_pow2_widths-sse41.h b/src/strategies/sse41/reg_sad_pow2_widths-sse41.h index a2b68239..a072b16b 100644 --- a/src/strategies/sse41/reg_sad_pow2_widths-sse41.h +++ b/src/strategies/sse41/reg_sad_pow2_widths-sse41.h @@ -4,6 +4,13 @@ #include #include "kvazaar.h" +static INLINE uint32_t reg_sad_w0(const kvz_pixel * const data1, const kvz_pixel * const data2, + const int32_t height, const uint32_t stride1, + const uint32_t stride2) +{ + return 0; +} + static INLINE uint32_t reg_sad_w4(const kvz_pixel * const data1, const kvz_pixel * const data2, const int32_t height, const uint32_t stride1, const uint32_t stride2) diff --git a/src/strategies/strategies-picture.c b/src/strategies/strategies-picture.c index 1c6e26b1..58814376 100644 --- a/src/strategies/strategies-picture.c +++ b/src/strategies/strategies-picture.c @@ -63,6 +63,8 @@ pixels_calc_ssd_func * kvz_pixels_calc_ssd = 0; inter_recon_bipred_func * kvz_inter_recon_bipred_blend = 0; +get_optimized_sad_func *kvz_get_optimized_sad = 0; + int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth) { bool success = true; diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index 78cbfaf8..37731a0e 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -33,6 +33,19 @@ typedef kvz_pixel (*pred_buffer)[32 * 32]; +/** + * \param data1: Picture block pointer + * \param data2: Reference block pointer + * \param height: Scan block height + * \param stride1: Picture block stride + * \param stride2: Reference block stride + */ +typedef uint32_t (*optimized_sad_func_ptr_t)(const kvz_pixel * const, + const kvz_pixel * const, + const int32_t, + const uint32_t, + const uint32_t); + // Function macro for defining hadamard calculating functions // for fixed size blocks. They calculate hadamard for integer @@ -112,6 +125,7 @@ typedef void (cost_pixel_nxn_multi_func)(const pred_buffer preds, const kvz_pixe typedef void (cost_pixel_any_size_multi_func)(int width, int height, const kvz_pixel **preds, const int stride, const kvz_pixel *orig, const int orig_stride, unsigned num_modes, unsigned *costs_out, int8_t *valid); typedef unsigned (pixels_calc_ssd_func)(const kvz_pixel *const ref, const kvz_pixel *const rec, const int ref_stride, const int rec_stride, const int width); +typedef optimized_sad_func_ptr_t (get_optimized_sad_func)(int32_t); typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0, @@ -165,6 +179,8 @@ extern pixels_calc_ssd_func *kvz_pixels_calc_ssd; extern inter_recon_bipred_func * kvz_inter_recon_bipred_blend; +extern get_optimized_sad_func *kvz_get_optimized_sad; + int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth); cost_pixel_nxn_func * kvz_pixels_get_satd_func(unsigned n); cost_pixel_nxn_func * kvz_pixels_get_sad_func(unsigned n); @@ -197,6 +213,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n); {"satd_any_size_quad", (void**) &kvz_satd_any_size_quad}, \ {"pixels_calc_ssd", (void**) &kvz_pixels_calc_ssd}, \ {"inter_recon_bipred", (void**) &kvz_inter_recon_bipred_blend}, \ + {"get_optimized_sad", (void**) &kvz_get_optimized_sad}, \