diff --git a/CMakeLists.txt b/CMakeLists.txt index cafb8fd8..d8c37bbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,7 +143,7 @@ target_include_directories(uvg266 PUBLIC src) target_include_directories(uvg266 PUBLIC src/extras) target_include_directories(uvg266 PUBLIC src/strategies) -file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c" "src/dep_quant.c") +file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c") file(GLOB LIB_SOURCES_STRATEGIES_SSE41 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse41/*.c") file(GLOB LIB_SOURCES_STRATEGIES_SSE42 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse42/*.c") diff --git a/src/dep_quant.c b/src/dep_quant.c index 2656f9aa..8513cf77 100644 --- a/src/dep_quant.c +++ b/src/dep_quant.c @@ -32,8 +32,6 @@ #include "dep_quant.h" -#include - #include "cu.h" #include "encoderstate.h" #include "intra.h" @@ -923,63 +921,15 @@ int uvg_dep_quant( height >= 4) { firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15; } - //uvg_find_first_non_zero_coeff(srcCoeff, enableScalingLists, dep_quant_context, scan, q_coeff, &firstTestPos, width, height); - const int default_quant_coeff = dep_quant_context.m_quant->m_QScale; - const int32_t thres = dep_quant_context.m_quant->m_thresLast; - int temp = firstTestPos; - if (enableScalingLists) { - for (; temp >= 0; (temp)--) { - coeff_t thresTmp = thres / (4 * q_coeff[scan[(temp)]]); - if (abs(srcCoeff[scan[(temp)]]) > thresTmp) { - break; - } - } - } else { - coeff_t thresTmp = thres / (4 * default_quant_coeff); - if (temp >= 16 && height >= 4) { - __m256i th = _mm256_set1_epi16(thresTmp); - temp -= 15; - for (; temp >= 0; temp -= 16) { - __m256i sbb_data; - if (width <= 4) { - sbb_data = _mm256_loadu_si256((__m256i const*)&srcCoeff[scan[temp]]); - } else if (width == 8) { - uint32_t i = scan[temp]; - __m256i first = _mm256_loadu_si256((__m256i const*)&srcCoeff[i]); - __m256i second = _mm256_loadu_si256((__m256i const*)&srcCoeff[i + 12]); - sbb_data = _mm256_blend_epi32(first, second, 204); - } else { - int16_t temp_d[16]; - uint32_t i = scan[temp]; - memcpy(temp_d, &srcCoeff[i], 8); - i += width; - memcpy(temp_d + 4, &srcCoeff[i], 8); - i += width; - memcpy(temp_d + 8, &srcCoeff[i], 8); - i += width; - memcpy(temp_d + 12, &srcCoeff[i], 8); - - sbb_data = _mm256_loadu_si256((__m256i const*)temp_d); - } - sbb_data = _mm256_abs_epi16(sbb_data); - - __m256i a = _mm256_cmpgt_epi16(sbb_data, th); - if (!_mm256_testz_si256(a, a)) { - if (temp >= 0) { - temp += 15; - } - break; - } - } - } - for (; temp >= 0; temp--) { - if (abs(srcCoeff[scan[(temp)]]) > thresTmp) { - break; - } - } - } - - firstTestPos = temp; + uvg_find_first_non_zero_coeff( + srcCoeff, + enableScalingLists, + &dep_quant_context, + scan, + q_coeff, + &firstTestPos, + width, + height); if (firstTestPos < 0) { return 0; } @@ -1044,7 +994,8 @@ int uvg_dep_quant( const uint32_t height_in_sbb = MAX(height >> 2, 1); const uint32_t width_in_sbb = MAX(width >> 2, 1); - + + const int default_quant_coeff = dep_quant_context.m_quant->m_QScale; //===== populate trellis ===== for (int scanIdx = firstTestPos; scanIdx >= 0; scanIdx--) { uint32_t blkpos = scan[scanIdx]; diff --git a/src/strategies/avx2/depquant-avx2.c b/src/strategies/avx2/depquant-avx2.c index 357932f9..5ef1936e 100644 --- a/src/strategies/avx2/depquant-avx2.c +++ b/src/strategies/avx2/depquant-avx2.c @@ -1482,18 +1482,10 @@ void uvg_dep_quant_decide_and_update_avx2( } -void uvg_find_first_non_zero_avx2( - const coeff_t* srcCoeff, - const bool enableScalingLists, - context_store dep_quant_context, - const uint32_t* const scan, - const int32_t* q_coeff, - int* firstTestPos, - const int width, - const int height) +void uvg_find_first_non_zero_avx2(const coeff_t* srcCoeff, const bool enableScalingLists, const context_store * const dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, const int width, const int height) { - const int default_quant_coeff = dep_quant_context.m_quant->m_QScale; - const int32_t thres = dep_quant_context.m_quant->m_thresLast; + const int default_quant_coeff = dep_quant_context->m_quant->m_QScale; + const int32_t thres = dep_quant_context->m_quant->m_thresLast; int temp = *firstTestPos; if (enableScalingLists) { for (; temp >= 0; (temp)--) { diff --git a/src/strategies/generic/depquant-generic.c b/src/strategies/generic/depquant-generic.c index f1103054..b15ef52b 100644 --- a/src/strategies/generic/depquant-generic.c +++ b/src/strategies/generic/depquant-generic.c @@ -227,10 +227,10 @@ static void uvg_dep_quant_decide_and_update_generic( } -void uvg_find_first_non_zero_generic(const coeff_t* srcCoeff, const bool enableScalingLists, context_store dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, int width, int height) +void uvg_find_first_non_zero_generic(const coeff_t* srcCoeff, const bool enableScalingLists, const context_store * const dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, int width, int height) { - const int default_quant_coeff = dep_quant_context.m_quant->m_QScale; - const int32_t thres = dep_quant_context.m_quant->m_thresLast; + const int default_quant_coeff = dep_quant_context->m_quant->m_QScale; + const int32_t thres = dep_quant_context->m_quant->m_thresLast; int temp = *firstTestPos; for (; temp >= 0; (temp)--) { coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[(temp)]])) : (thres / (4 * default_quant_coeff)); diff --git a/src/strategies/strategies-depquant.h b/src/strategies/strategies-depquant.h index 6a49dc35..5a58a3c7 100644 --- a/src/strategies/strategies-depquant.h +++ b/src/strategies/strategies-depquant.h @@ -61,15 +61,15 @@ typedef int(dep_quant_decide_and_update_func)( const uint32_t effHeight, bool is_chroma); -typedef void(find_first_non_zero_coeff_func)( - const coeff_t* srcCoeff, - const bool enableScalingLists, - context_store dep_quant_context, - const uint32_t* const scan, - const int32_t* q_coeff, - int* firstTestPos, - int width, - int height); +typedef void (find_first_non_zero_coeff_func)( + const coeff_t* srcCoeff, + const bool enableScalingLists, + const context_store* const dep_quant_context, + const uint32_t* const scan, + const int32_t* q_coeff, + int* firstTestPos, + int width, + int height); // Declare function pointers.