mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Create strategy for directly obtaining pointer to constant-width SAD function
This commit is contained in:
parent
94035be342
commit
91cb0fbd45
|
@ -78,6 +78,13 @@ typedef struct {
|
|||
* \brief Bit cost of best_mv
|
||||
*/
|
||||
uint32_t best_bitcost;
|
||||
|
||||
/**
|
||||
* \brief Possible optimized SAD implementation for the width, leave as
|
||||
* NULL for arbitrary-width blocks
|
||||
*/
|
||||
optimized_sad_func_ptr_t optimized_sad;
|
||||
|
||||
} inter_search_info_t;
|
||||
|
||||
|
||||
|
@ -1433,7 +1440,6 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Update PU to have best modes at this depth.
|
||||
*
|
||||
|
@ -1486,6 +1492,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
.width = width,
|
||||
.height = height,
|
||||
.mvd_cost_func = cfg->mv_rdo ? kvz_calc_mvd_cost_cabac : calc_mvd_cost,
|
||||
.optimized_sad = kvz_get_optimized_sad(width),
|
||||
};
|
||||
|
||||
// Search for merge mode candidates
|
||||
|
|
|
@ -1255,8 +1255,29 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif //COMPILE_INTEL_AVX2
|
||||
|
||||
static optimized_sad_func_ptr_t get_optimized_sad_avx2(int32_t width)
|
||||
{
|
||||
if (width == 0)
|
||||
return reg_sad_w0;
|
||||
if (width == 4)
|
||||
return reg_sad_w4;
|
||||
if (width == 8)
|
||||
return reg_sad_w8;
|
||||
if (width == 12)
|
||||
return reg_sad_w12;
|
||||
if (width == 16)
|
||||
return reg_sad_w16;
|
||||
if (width == 24)
|
||||
return reg_sad_w24;
|
||||
if (width == 32)
|
||||
return reg_sad_w32;
|
||||
if (width == 64)
|
||||
return reg_sad_w64;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
#endif //COMPILE_INTEL_AVX2
|
||||
|
||||
int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
|
||||
{
|
||||
|
@ -1290,6 +1311,7 @@ int kvz_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
|
|||
|
||||
success &= kvz_strategyselector_register(opaque, "pixels_calc_ssd", "avx2", 40, &pixels_calc_ssd_avx2);
|
||||
success &= kvz_strategyselector_register(opaque, "inter_recon_bipred", "avx2", 40, &inter_recon_bipred_avx2);
|
||||
success &= kvz_strategyselector_register(opaque, "get_optimized_sad", "avx2", 40, &get_optimized_sad_avx2);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -4,6 +4,13 @@
|
|||
#include <immintrin.h>
|
||||
#include "kvazaar.h"
|
||||
|
||||
static INLINE uint32_t reg_sad_w0(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||
const int32_t height, const uint32_t stride1,
|
||||
const uint32_t stride2)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static INLINE uint32_t reg_sad_w4(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||
const int32_t height, const uint32_t stride1,
|
||||
const uint32_t stride2)
|
||||
|
|
|
@ -63,6 +63,8 @@ pixels_calc_ssd_func * kvz_pixels_calc_ssd = 0;
|
|||
|
||||
inter_recon_bipred_func * kvz_inter_recon_bipred_blend = 0;
|
||||
|
||||
get_optimized_sad_func *kvz_get_optimized_sad = 0;
|
||||
|
||||
|
||||
int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
||||
bool success = true;
|
||||
|
|
|
@ -33,6 +33,19 @@
|
|||
|
||||
typedef kvz_pixel (*pred_buffer)[32 * 32];
|
||||
|
||||
/**
|
||||
* \param data1: Picture block pointer
|
||||
* \param data2: Reference block pointer
|
||||
* \param height: Scan block height
|
||||
* \param stride1: Picture block stride
|
||||
* \param stride2: Reference block stride
|
||||
*/
|
||||
typedef uint32_t (*optimized_sad_func_ptr_t)(const kvz_pixel * const,
|
||||
const kvz_pixel * const,
|
||||
const int32_t,
|
||||
const uint32_t,
|
||||
const uint32_t);
|
||||
|
||||
|
||||
// Function macro for defining hadamard calculating functions
|
||||
// for fixed size blocks. They calculate hadamard for integer
|
||||
|
@ -112,6 +125,7 @@ typedef void (cost_pixel_nxn_multi_func)(const pred_buffer preds, const kvz_pixe
|
|||
typedef void (cost_pixel_any_size_multi_func)(int width, int height, const kvz_pixel **preds, const int stride, const kvz_pixel *orig, const int orig_stride, unsigned num_modes, unsigned *costs_out, int8_t *valid);
|
||||
|
||||
typedef unsigned (pixels_calc_ssd_func)(const kvz_pixel *const ref, const kvz_pixel *const rec, const int ref_stride, const int rec_stride, const int width);
|
||||
typedef optimized_sad_func_ptr_t (get_optimized_sad_func)(int32_t);
|
||||
|
||||
|
||||
typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0,
|
||||
|
@ -165,6 +179,8 @@ extern pixels_calc_ssd_func *kvz_pixels_calc_ssd;
|
|||
|
||||
extern inter_recon_bipred_func * kvz_inter_recon_bipred_blend;
|
||||
|
||||
extern get_optimized_sad_func *kvz_get_optimized_sad;
|
||||
|
||||
int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth);
|
||||
cost_pixel_nxn_func * kvz_pixels_get_satd_func(unsigned n);
|
||||
cost_pixel_nxn_func * kvz_pixels_get_sad_func(unsigned n);
|
||||
|
@ -197,6 +213,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
|
|||
{"satd_any_size_quad", (void**) &kvz_satd_any_size_quad}, \
|
||||
{"pixels_calc_ssd", (void**) &kvz_pixels_calc_ssd}, \
|
||||
{"inter_recon_bipred", (void**) &kvz_inter_recon_bipred_blend}, \
|
||||
{"get_optimized_sad", (void**) &kvz_get_optimized_sad}, \
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue