mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Merge branch 'ibc_fixes'
This commit is contained in:
commit
6d1ffd48cb
|
@ -226,7 +226,30 @@ static bool check_mv_cost_satd(ibc_search_info_t *info,
|
||||||
double* best_bits,
|
double* best_bits,
|
||||||
vector2d_t *best_mv)
|
vector2d_t *best_mv)
|
||||||
{
|
{
|
||||||
|
if (!intmv_within_ibc_range(info, x, y)) return false;
|
||||||
|
|
||||||
|
double cost = calculate_ibc_cost_satd(info->state, info->lcu, &info->origin, x, y);
|
||||||
|
|
||||||
|
if (cost >= *best_cost) return false;
|
||||||
|
|
||||||
|
cost += info->mvd_cost_func(
|
||||||
|
info->state,
|
||||||
|
x, y, INTERNAL_MV_PREC,
|
||||||
|
info->mv_cand,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
best_bits
|
||||||
|
);
|
||||||
|
|
||||||
|
if (cost >= *best_cost) return false;
|
||||||
|
|
||||||
|
// Set to motion vector in internal pixel precision.
|
||||||
|
best_mv->x = x * (1 << INTERNAL_MV_PREC);
|
||||||
|
best_mv->y = y * (1 << INTERNAL_MV_PREC);
|
||||||
|
*best_cost = cost;
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* \brief Calculate cost for an integer motion vector.
|
* \brief Calculate cost for an integer motion vector.
|
||||||
|
|
|
@ -1374,17 +1374,29 @@ INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf)
|
||||||
return crc;
|
return crc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INLINE static uint32_t uvg_crc32c_4_16bit_generic(uint32_t crc, const uvg_pixel *buf)
|
||||||
|
{
|
||||||
|
const uint8_t *buf8 = (const uint8_t *)buf;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf8[i]) & 0xFF];
|
||||||
|
}
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf)
|
INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf)
|
||||||
{
|
{
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF];
|
for (int i = 0; i < 8; i++) {
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF];
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[i]) & 0xFF];
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF];
|
}
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF];
|
return crc;
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF];
|
}
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF];
|
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF];
|
INLINE static uint32_t uvg_crc32c_8_16bit_generic(uint32_t crc, const uvg_pixel *buf)
|
||||||
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF];
|
{
|
||||||
|
const uint8_t* buf8 = (const uint8_t*)buf;
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf8[i]) & 0xFF];
|
||||||
|
}
|
||||||
return crc;
|
return crc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1401,34 +1413,29 @@ static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_s
|
||||||
static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
{
|
{
|
||||||
uint32_t crc = 0xFFFFFFFF;
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]);
|
crc = uvg_crc32c_4_16bit_generic(crc, &buf[0 * pic_stride]);
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride] + 4);
|
crc = uvg_crc32c_4_16bit_generic(crc, &buf[1 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_16bit_generic(crc, &buf[2 * pic_stride]);
|
||||||
|
crc = uvg_crc32c_4_16bit_generic(crc, &buf[3 * pic_stride]);
|
||||||
|
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]);
|
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride] + 4);
|
|
||||||
|
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]);
|
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride] + 4);
|
|
||||||
|
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]);
|
|
||||||
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride] + 4);
|
|
||||||
return crc ^ 0xFFFFFFFF;
|
return crc ^ 0xFFFFFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
{
|
{
|
||||||
uint32_t crc = 0xFFFFFFFF;
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]);
|
for (int i = 0; i < 8; i++) {
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]);
|
crc = uvg_crc32c_8_generic(crc, &buf[i * pic_stride]);
|
||||||
|
}
|
||||||
|
return crc ^ 0xFFFFFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]);
|
static uint32_t uvg_crc32c_8x8_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]);
|
{
|
||||||
|
uint32_t crc = 0xFFFFFFFF;
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]);
|
for (int i = 0; i < 8; i++) {
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]);
|
crc = uvg_crc32c_8_16bit_generic(crc, &buf[i * pic_stride]);
|
||||||
|
}
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]);
|
|
||||||
crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]);
|
|
||||||
return crc ^ 0xFFFFFFFF;
|
return crc ^ 0xFFFFFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1440,6 +1447,7 @@ int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
|
||||||
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic);
|
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic);
|
||||||
} else {
|
} else {
|
||||||
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic);
|
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic);
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_16bit_generic);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -67,14 +67,20 @@ static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_st
|
||||||
static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
|
static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
{
|
{
|
||||||
uint64_t crc = 0xFFFFFFFF;
|
uint64_t crc = 0xFFFFFFFF;
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride]));
|
for (int i = 0; i < 8; i++) {
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride]));
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride]));
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride]));
|
}
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride]));
|
return (uint32_t)(crc ^ 0xFFFFFFFF);
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[4 * pic_stride]));
|
}
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[5 * pic_stride]));
|
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[6 * pic_stride]));
|
static uint32_t uvg_crc32c_8x8_16bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
|
||||||
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[7 * pic_stride]));
|
{
|
||||||
|
uint64_t crc = 0xFFFFFFFF;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride]));
|
||||||
|
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride + 4]));
|
||||||
|
}
|
||||||
|
|
||||||
return (uint32_t)(crc ^ 0xFFFFFFFF);
|
return (uint32_t)(crc ^ 0xFFFFFFFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,6 +95,7 @@ int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) {
|
||||||
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42);
|
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42);
|
||||||
} else {
|
} else {
|
||||||
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42);
|
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42);
|
||||||
|
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_16bit_sse42);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return success;
|
return success;
|
||||||
|
|
|
@ -207,7 +207,6 @@ int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
|
||||||
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height);
|
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned width, unsigned height);
|
||||||
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height);
|
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigned height);
|
||||||
|
|
||||||
#if UVG_BIT_DEPTH == 8
|
|
||||||
#define STRATEGIES_PICTURE_EXPORTS \
|
#define STRATEGIES_PICTURE_EXPORTS \
|
||||||
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
|
{"crc32c_4x4", (void**) &uvg_crc32c_4x4}, \
|
||||||
{"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \
|
{"crc32c_8x8", (void **)&uvg_crc32c_8x8}, \
|
||||||
|
@ -243,42 +242,5 @@ cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned width, unsigne
|
||||||
{"pixel_var", (void**) &uvg_pixel_var}, \
|
{"pixel_var", (void**) &uvg_pixel_var}, \
|
||||||
{"generate_residual", (void**) &uvg_generate_residual}, \
|
{"generate_residual", (void**) &uvg_generate_residual}, \
|
||||||
|
|
||||||
#else
|
|
||||||
#define STRATEGIES_PICTURE_EXPORTS \
|
|
||||||
{"reg_sad", (void**) &uvg_reg_sad}, \
|
|
||||||
{"sad_4x4", (void**) &uvg_sad_4x4}, \
|
|
||||||
{"sad_8x8", (void**) &uvg_sad_8x8}, \
|
|
||||||
{"sad_16x16", (void**) &uvg_sad_16x16}, \
|
|
||||||
{"sad_32x32", (void**) &uvg_sad_32x32}, \
|
|
||||||
{"sad_64x64", (void**) &uvg_sad_64x64}, \
|
|
||||||
{"satd_4x4", (void**) &uvg_satd_4x4}, \
|
|
||||||
{"satd_8x8", (void**) &uvg_satd_8x8}, \
|
|
||||||
{"satd_16x16", (void**) &uvg_satd_16x16}, \
|
|
||||||
{"satd_32x32", (void**) &uvg_satd_32x32}, \
|
|
||||||
{"satd_64x64", (void**) &uvg_satd_64x64}, \
|
|
||||||
{"satd_any_size", (void**) &uvg_satd_any_size}, \
|
|
||||||
{"satd_any_size_vtm", (void**) &uvg_satd_any_size_vtm}, \
|
|
||||||
{"sad_4x4_dual", (void**) &uvg_sad_4x4_dual}, \
|
|
||||||
{"sad_8x8_dual", (void**) &uvg_sad_8x8_dual}, \
|
|
||||||
{"sad_16x16_dual", (void**) &uvg_sad_16x16_dual}, \
|
|
||||||
{"sad_32x32_dual", (void**) &uvg_sad_32x32_dual}, \
|
|
||||||
{"sad_64x64_dual", (void**) &uvg_sad_64x64_dual}, \
|
|
||||||
{"satd_4x4_dual", (void**) &uvg_satd_4x4_dual}, \
|
|
||||||
{"satd_8x8_dual", (void**) &uvg_satd_8x8_dual}, \
|
|
||||||
{"satd_16x16_dual", (void**) &uvg_satd_16x16_dual}, \
|
|
||||||
{"satd_32x32_dual", (void**) &uvg_satd_32x32_dual}, \
|
|
||||||
{"satd_64x64_dual", (void**) &uvg_satd_64x64_dual}, \
|
|
||||||
{"satd_any_size_quad", (void**) &uvg_satd_any_size_quad}, \
|
|
||||||
{"pixels_calc_ssd", (void**) &uvg_pixels_calc_ssd}, \
|
|
||||||
{"bipred_average", (void**) &uvg_bipred_average}, \
|
|
||||||
{"get_optimized_sad", (void**) &uvg_get_optimized_sad}, \
|
|
||||||
{"ver_sad", (void**) &uvg_ver_sad}, \
|
|
||||||
{"hor_sad", (void**) &uvg_hor_sad}, \
|
|
||||||
{"pixel_var", (void**) &uvg_pixel_var}, \
|
|
||||||
{"generate_residual", (void**) &uvg_generate_residual}, \
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif //STRATEGIES_PICTURE_H_
|
#endif //STRATEGIES_PICTURE_H_
|
||||||
|
|
Loading…
Reference in a new issue