[IBC] Add the 16bit 8x8 crc32 and implement (somehow) the empty check_mv_cost_satd()

This commit is contained in:
Marko Viitanen 2024-08-05 08:53:03 +03:00
parent 7c7a56bb45
commit 3951e89923
3 changed files with 74 additions and 36 deletions

View file

@ -226,7 +226,30 @@ static bool check_mv_cost_satd(ibc_search_info_t *info,
double* best_bits, double* best_bits,
vector2d_t *best_mv) vector2d_t *best_mv)
{ {
if (!intmv_within_ibc_range(info, x, y)) return false;
double cost = calculate_ibc_cost_satd(info->state, info->lcu, &info->origin, x, y);
if (cost >= *best_cost) return false;
cost += info->mvd_cost_func(
info->state,
x, y, INTERNAL_MV_PREC,
info->mv_cand,
NULL,
0,
0,
best_bits
);
if (cost >= *best_cost) return false;
// Set to motion vector in internal pixel precision.
best_mv->x = x * (1 << INTERNAL_MV_PREC);
best_mv->y = y * (1 << INTERNAL_MV_PREC);
*best_cost = cost;
return true;
} }
/** /**
* \brief Calculate cost for an integer motion vector. * \brief Calculate cost for an integer motion vector.

View file

@ -1374,17 +1374,29 @@ INLINE static uint32_t uvg_crc32c_4_generic(uint32_t crc, const uvg_pixel *buf)
return crc; return crc;
} }
INLINE static uint32_t uvg_crc32c_4_16bit_generic(uint32_t crc, const uvg_pixel *buf)
{
const uint8_t *buf8 = (const uint8_t *)buf;
for (int i = 0; i < 8; i++) {
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf8[i]) & 0xFF];
}
return crc;
}
INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf) INLINE static uint32_t uvg_crc32c_8_generic(uint32_t crc, const uvg_pixel *buf)
{ {
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[0]) & 0xFF]; for (int i = 0; i < 8; i++) {
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[1]) & 0xFF]; crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[i]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[2]) & 0xFF]; }
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[3]) & 0xFF]; return crc;
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[4]) & 0xFF]; }
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[5]) & 0xFF];
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[6]) & 0xFF]; INLINE static uint32_t uvg_crc32c_8_16bit_generic(uint32_t crc, const uvg_pixel *buf)
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf[7]) & 0xFF]; {
const uint8_t* buf8 = (const uint8_t*)buf;
for (int i = 0; i < 16; i++) {
crc = (crc >> 8) ^ uvg_crc_table[(crc ^ buf8[i]) & 0xFF];
}
return crc; return crc;
} }
@ -1401,34 +1413,29 @@ static uint32_t uvg_crc32c_4x4_8bit_generic(const uvg_pixel *buf, uint32_t pic_s
static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride) static uint32_t uvg_crc32c_4x4_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{ {
uint32_t crc = 0xFFFFFFFF; uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride]); crc = uvg_crc32c_4_16bit_generic(crc, &buf[0 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[0 * pic_stride] + 4); crc = uvg_crc32c_4_16bit_generic(crc, &buf[1 * pic_stride]);
crc = uvg_crc32c_4_16bit_generic(crc, &buf[2 * pic_stride]);
crc = uvg_crc32c_4_16bit_generic(crc, &buf[3 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[1 * pic_stride] + 4);
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[2 * pic_stride] + 4);
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride]);
crc = uvg_crc32c_4_generic(crc, &buf[3 * pic_stride] + 4);
return crc ^ 0xFFFFFFFF; return crc ^ 0xFFFFFFFF;
} }
static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride) static uint32_t uvg_crc32c_8x8_8bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
{ {
uint32_t crc = 0xFFFFFFFF; uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_8_generic(crc, &buf[0 * pic_stride]); for (int i = 0; i < 8; i++) {
crc = uvg_crc32c_8_generic(crc, &buf[1 * pic_stride]); crc = uvg_crc32c_8_generic(crc, &buf[i * pic_stride]);
}
return crc ^ 0xFFFFFFFF;
}
crc = uvg_crc32c_8_generic(crc, &buf[2 * pic_stride]); static uint32_t uvg_crc32c_8x8_16bit_generic(const uvg_pixel *buf, uint32_t pic_stride)
crc = uvg_crc32c_8_generic(crc, &buf[3 * pic_stride]); {
uint32_t crc = 0xFFFFFFFF;
crc = uvg_crc32c_8_generic(crc, &buf[4 * pic_stride]); for (int i = 0; i < 8; i++) {
crc = uvg_crc32c_8_generic(crc, &buf[5 * pic_stride]); crc = uvg_crc32c_8_16bit_generic(crc, &buf[i * pic_stride]);
}
crc = uvg_crc32c_8_generic(crc, &buf[6 * pic_stride]);
crc = uvg_crc32c_8_generic(crc, &buf[7 * pic_stride]);
return crc ^ 0xFFFFFFFF; return crc ^ 0xFFFFFFFF;
} }
@ -1440,6 +1447,7 @@ int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic); success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_8bit_generic);
} else { } else {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic); success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "generic", 0, &uvg_crc32c_4x4_16bit_generic);
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "generic", 0, &uvg_crc32c_8x8_16bit_generic);
} }

View file

@ -67,14 +67,20 @@ static uint32_t uvg_crc32c_4x4_16bit_sse42(const uvg_pixel *buf, uint32_t pic_st
static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride) static uint32_t uvg_crc32c_8x8_8bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
{ {
uint64_t crc = 0xFFFFFFFF; uint64_t crc = 0xFFFFFFFF;
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[0 * pic_stride])); for (int i = 0; i < 8; i++) {
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[1 * pic_stride])); crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[2 * pic_stride])); }
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[3 * pic_stride])); return (uint32_t)(crc ^ 0xFFFFFFFF);
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[4 * pic_stride])); }
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[5 * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[6 * pic_stride])); static uint32_t uvg_crc32c_8x8_16bit_sse42(const uvg_pixel *buf, uint32_t pic_stride)
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[7 * pic_stride])); {
uint64_t crc = 0xFFFFFFFF;
for (int i = 0; i < 8; i++) {
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride]));
crc = _mm_crc32_u64(crc, *((uint64_t *)&buf[i * pic_stride + 4]));
}
return (uint32_t)(crc ^ 0xFFFFFFFF); return (uint32_t)(crc ^ 0xFFFFFFFF);
} }
@ -89,6 +95,7 @@ int uvg_strategy_register_picture_sse42(void* opaque, uint8_t bitdepth) {
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42); success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_8bit_sse42);
} else { } else {
success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42); success &= uvg_strategyselector_register(opaque, "crc32c_4x4", "sse42", 0, &uvg_crc32c_4x4_16bit_sse42);
success &= uvg_strategyselector_register(opaque, "crc32c_8x8", "sse42", 0, &uvg_crc32c_8x8_16bit_sse42);
} }
#endif #endif
return success; return success;