Create strategy for directly obtaining pointer to constant-width SAD function

This commit is contained in:
Pauli Oikkonen 2019-01-18 17:32:52 +02:00
parent 94035be342
commit 91cb0fbd45
5 changed files with 57 additions and 2 deletions

View file

@ -78,6 +78,13 @@ typedef struct {
* \brief Bit cost of best_mv
*/
uint32_t best_bitcost;
/**
* \brief Possible optimized SAD implementation for the width, leave as
* NULL for arbitrary-width blocks
*/
optimized_sad_func_ptr_t optimized_sad;
} inter_search_info_t;
@ -1433,7 +1440,6 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
}
}
/**
* \brief Update PU to have best modes at this depth.
*
@ -1486,6 +1492,7 @@ static void search_pu_inter(encoder_state_t * const state,
.width = width,
.height = height,
.mvd_cost_func = cfg->mv_rdo ? kvz_calc_mvd_cost_cabac : calc_mvd_cost,
.optimized_sad = kvz_get_optimized_sad(width),
};
// Search for merge mode candidates

View file

@ -1255,8 +1255,29 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
}
}
}
#endif //COMPILE_INTEL_AVX2
static optimized_sad_func_ptr_t get_optimized_sad_avx2(int32_t width)
{
if (width == 0)
return reg_sad_w0;
if (width == 4)
return reg_sad_w4;
if (width == 8)
return reg_sad_w8;
if (width == 12)
return reg_sad_w12;
if (width == 16)
return reg_sad_w16;
if (width == 24)
return reg_sad_w24;
if (width == 32)
return reg_sad_w32;
if (width == 64)
return reg_sad_w64;
else
return NULL;
}
#endif //COMPILE_INTEL_AVX2
int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
{
@ -1290,6 +1311,7 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
success &= kvz_strategyselector_register(opaque, "pixels_calc_ssd", "avx2", 40, &pixels_calc_ssd_avx2);
success &= kvz_strategyselector_register(opaque, "inter_recon_bipred", "avx2", 40, &inter_recon_bipred_avx2);
success &= kvz_strategyselector_register(opaque, "get_optimized_sad", "avx2", 40, &get_optimized_sad_avx2);
}
#endif

View file

@ -4,6 +4,13 @@
#include <immintrin.h>
#include "kvazaar.h"
static INLINE uint32_t reg_sad_w0(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1,
const uint32_t stride2)
{
return 0;
}
static INLINE uint32_t reg_sad_w4(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t height, const uint32_t stride1,
const uint32_t stride2)

View file

@ -63,6 +63,8 @@ pixels_calc_ssd_func * kvz_pixels_calc_ssd = 0;
inter_recon_bipred_func * kvz_inter_recon_bipred_blend = 0;
get_optimized_sad_func *kvz_get_optimized_sad = 0;
int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth) {
bool success = true;

View file

@ -33,6 +33,19 @@
typedef kvz_pixel (*pred_buffer)[32 * 32];
/**
* \param data1: Picture block pointer
* \param data2: Reference block pointer
* \param height: Scan block height
* \param stride1: Picture block stride
* \param stride2: Reference block stride
*/
typedef uint32_t (*optimized_sad_func_ptr_t)(const kvz_pixel * const,
const kvz_pixel * const,
const int32_t,
const uint32_t,
const uint32_t);
// Function macro for defining hadamard calculating functions
// for fixed size blocks. They calculate hadamard for integer
@ -112,6 +125,7 @@ typedef void (cost_pixel_nxn_multi_func)(const pred_buffer preds, const kvz_pixe
typedef void (cost_pixel_any_size_multi_func)(int width, int height, const kvz_pixel **preds, const int stride, const kvz_pixel *orig, const int orig_stride, unsigned num_modes, unsigned *costs_out, int8_t *valid);
typedef unsigned (pixels_calc_ssd_func)(const kvz_pixel *const ref, const kvz_pixel *const rec, const int ref_stride, const int rec_stride, const int width);
typedef optimized_sad_func_ptr_t (get_optimized_sad_func)(int32_t);
typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0,
@ -165,6 +179,8 @@ extern pixels_calc_ssd_func *kvz_pixels_calc_ssd;
extern inter_recon_bipred_func * kvz_inter_recon_bipred_blend;
extern get_optimized_sad_func *kvz_get_optimized_sad;
int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth);
cost_pixel_nxn_func * kvz_pixels_get_satd_func(unsigned n);
cost_pixel_nxn_func * kvz_pixels_get_sad_func(unsigned n);
@ -197,6 +213,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
{"satd_any_size_quad", (void**) &kvz_satd_any_size_quad}, \
{"pixels_calc_ssd", (void**) &kvz_pixels_calc_ssd}, \
{"inter_recon_bipred", (void**) &kvz_inter_recon_bipred_blend}, \
{"get_optimized_sad", (void**) &kvz_get_optimized_sad}, \