???? ??? ????

This commit is contained in:
Joose Sainio 2023-04-28 15:55:55 +03:00
parent ab4f63425d
commit bbe96a3217
5 changed files with 103 additions and 11 deletions

View file

@ -804,7 +804,6 @@ void uvg_dep_quant_update_state(
} }
} }
static bool same[13];
int uvg_dep_quant( int uvg_dep_quant(
const encoder_state_t* const state, const encoder_state_t* const state,
@ -889,14 +888,15 @@ int uvg_dep_quant(
height >= 4) { height >= 4) {
firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15; firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15;
} }
const int32_t default_quant_coeff = dep_quant_context.m_quant->m_QScale; uvg_find_first_non_zero_coeff(
const int32_t thres = dep_quant_context.m_quant->m_thresLast; srcCoeff,
for (; firstTestPos >= 0; firstTestPos--) { enableScalingLists,
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[firstTestPos]])) : (thres / (4 * default_quant_coeff)); dep_quant_context,
if (abs(srcCoeff[scan[firstTestPos]]) > thresTmp) { scan,
break; q_coeff,
} &firstTestPos,
} width,
height);
if (firstTestPos < 0) { if (firstTestPos < 0) {
return 0; return 0;
} }
@ -962,6 +962,7 @@ int uvg_dep_quant(
const uint32_t height_in_sbb = MAX(height >> 2, 1); const uint32_t height_in_sbb = MAX(height >> 2, 1);
const uint32_t width_in_sbb = MAX(width >> 2, 1); const uint32_t width_in_sbb = MAX(width >> 2, 1);
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
//===== populate trellis ===== //===== populate trellis =====
for (int scanIdx = firstTestPos; scanIdx >= 0; scanIdx--) { for (int scanIdx = firstTestPos; scanIdx >= 0; scanIdx--) {
uint32_t blkpos = scan[scanIdx]; uint32_t blkpos = scan[scanIdx];

View file

@ -37,6 +37,8 @@
#include "strategies/avx2/depquant-avx2.h" #include "strategies/avx2/depquant-avx2.h"
#include "strategyselector.h" #include "strategyselector.h"
#define COMPILE_INTEL_AVX2 1
#if COMPILE_INTEL_AVX2 && defined X86_64 #if COMPILE_INTEL_AVX2 && defined X86_64
#include "dep_quant.h" #include "dep_quant.h"
@ -1359,6 +1361,68 @@ void uvg_dep_quant_decide_and_update_avx2(
} }
void uvg_find_first_non_zero_avx2(const coeff_t* srcCoeff, const bool enableScalingLists, context_store dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, const int width, const int height)
{
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
const int32_t thres = dep_quant_context.m_quant->m_thresLast;
int temp = *firstTestPos;
if (enableScalingLists) {
for (; temp >= 0; (temp)--) {
coeff_t thresTmp = thres / (4 * q_coeff[scan[(temp)]]);
if (abs(srcCoeff[scan[(temp)]]) > thresTmp) {
break;
}
}
} else {
coeff_t thresTmp = thres / (4 * default_quant_coeff);
if (temp >= 16 && height >= 4) {
__m256i th = _mm256_set1_epi16(thresTmp);
temp -= 15;
for (; temp >= 0; temp -= 16) {
__m256i sbb_data;
if (width <= 4) {
sbb_data = _mm256_loadu_si256((__m256i const*)&srcCoeff[scan[temp]]);
} else if (width == 8) {
uint32_t i = scan[temp];
__m256i first = _mm256_loadu_si256((__m256i const*)&srcCoeff[i]);
__m256i second = _mm256_loadu_si256((__m256i const*)&srcCoeff[i+ 12]);
sbb_data = _mm256_blend_epi32(first, second, 204);
} else {
int16_t temp_d[16];
uint32_t i = scan[temp];
memcpy(temp_d, &srcCoeff[i], 8);
i += width;
memcpy(temp_d + 4, &srcCoeff[i], 8);
i += width;
memcpy(temp_d + 8, &srcCoeff[i], 8);
i += width;
memcpy(temp_d + 12, &srcCoeff[i], 8);
sbb_data = _mm256_loadu_si256((__m256i const*)temp_d);
}
sbb_data = _mm256_abs_epi16(sbb_data);
__m256i a = _mm256_cmpgt_epi16(sbb_data, th);
if (!_mm256_testz_si256(a, a))
{
if (temp >= 0) {
temp += 15;
}
break;
}
}
}
for (;temp >= 0; temp--) {
if (abs(srcCoeff[scan[(temp)]]) > thresTmp) {
break;
}
}
}
*firstTestPos = temp;
}
#endif //COMPILE_INTEL_AVX2 && defined X86_64 #endif //COMPILE_INTEL_AVX2 && defined X86_64
int uvg_strategy_register_depquant_avx2(void* opaque, uint8_t bitdepth) int uvg_strategy_register_depquant_avx2(void* opaque, uint8_t bitdepth)
@ -1367,6 +1431,7 @@ int uvg_strategy_register_depquant_avx2(void* opaque, uint8_t bitdepth)
#if COMPILE_INTEL_AVX2 && defined X86_64 #if COMPILE_INTEL_AVX2 && defined X86_64
success &= uvg_strategyselector_register(opaque, "dep_quant_decide_and_update", "avx2", 40, &uvg_dep_quant_decide_and_update_avx2); success &= uvg_strategyselector_register(opaque, "dep_quant_decide_and_update", "avx2", 40, &uvg_dep_quant_decide_and_update_avx2);
success &= uvg_strategyselector_register(opaque, "find_first_non_zero_coeff", "avx2", 40, &uvg_find_first_non_zero_avx2);
#endif //COMPILE_INTEL_AVX2 && defined X86_64 #endif //COMPILE_INTEL_AVX2 && defined X86_64
return success; return success;

View file

@ -227,12 +227,26 @@ static void uvg_dep_quant_decide_and_update_generic(
} }
void uvg_find_first_non_zero_generic(const coeff_t* srcCoeff, const bool enableScalingLists, context_store dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, int width, int height)
{
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
const int32_t thres = dep_quant_context.m_quant->m_thresLast;
int temp = *firstTestPos;
for (; temp >= 0; (temp)--) {
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[(temp)]])) : (thres / (4 * default_quant_coeff));
if (abs(srcCoeff[scan[(temp)]]) > thresTmp) {
break;
}
}
*firstTestPos = temp;
}
int uvg_strategy_register_depquant_generic(void* opaque, uint8_t bitdepth) int uvg_strategy_register_depquant_generic(void* opaque, uint8_t bitdepth)
{ {
bool success = true; bool success = true;
success &= uvg_strategyselector_register(opaque, "dep_quant_decide_and_update", "generic", 40, &uvg_dep_quant_decide_and_update_generic); success &= uvg_strategyselector_register(opaque, "dep_quant_decide_and_update", "generic", 0, &uvg_dep_quant_decide_and_update_generic);
success &= uvg_strategyselector_register(opaque, "find_first_non_zero_coeff", "generic", 0, &uvg_find_first_non_zero_generic);
return success; return success;
} }

View file

@ -39,6 +39,7 @@
// Define function pointers. // Define function pointers.
dep_quant_decide_and_update_func* uvg_dep_quant_decide_and_update; dep_quant_decide_and_update_func* uvg_dep_quant_decide_and_update;
find_first_non_zero_coeff_func* uvg_find_first_non_zero_coeff;
int uvg_strategy_register_depquant(void *opaque, uint8_t bitdepth) int uvg_strategy_register_depquant(void *opaque, uint8_t bitdepth)

View file

@ -61,16 +61,27 @@ typedef int(dep_quant_decide_and_update_func)(
const uint32_t effHeight, const uint32_t effHeight,
bool is_chroma); bool is_chroma);
typedef void(find_first_non_zero_coeff_func)(
const coeff_t* srcCoeff,
const bool enableScalingLists,
context_store dep_quant_context,
const uint32_t* const scan,
const int32_t* q_coeff,
int* firstTestPos,
int width,
int height);
// Declare function pointers. // Declare function pointers.
extern dep_quant_decide_and_update_func* uvg_dep_quant_decide_and_update; extern dep_quant_decide_and_update_func* uvg_dep_quant_decide_and_update;
extern find_first_non_zero_coeff_func* uvg_find_first_non_zero_coeff;
int uvg_strategy_register_depquant(void* opaque, uint8_t bitdepth); int uvg_strategy_register_depquant(void* opaque, uint8_t bitdepth);
#define STRATEGIES_DEPQUANT_EXPORTS \ #define STRATEGIES_DEPQUANT_EXPORTS \
{"dep_quant_decide_and_update", (void**)&uvg_dep_quant_decide_and_update}, \ {"dep_quant_decide_and_update", (void**)&uvg_dep_quant_decide_and_update}, \
{"find_first_non_zero_coeff", (void**)&uvg_find_first_non_zero_coeff}, \