pass context_store as pointer

This reverts commit 47c5ea3d5c.
This commit is contained in:
Joose Sainio 2023-07-21 10:49:46 +03:00
parent 2caf077cff
commit 1c293b8253
5 changed files with 27 additions and 84 deletions

View file

@ -143,7 +143,7 @@ target_include_directories(uvg266 PUBLIC src)
target_include_directories(uvg266 PUBLIC src/extras)
target_include_directories(uvg266 PUBLIC src/strategies)
file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c" "src/dep_quant.c")
file(GLOB LIB_SOURCES_STRATEGIES_AVX2 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/avx2/*.c")
file(GLOB LIB_SOURCES_STRATEGIES_SSE41 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse41/*.c")
file(GLOB LIB_SOURCES_STRATEGIES_SSE42 RELATIVE ${PROJECT_SOURCE_DIR} "src/strategies/sse42/*.c")

View file

@ -32,8 +32,6 @@
#include "dep_quant.h"
#include <immintrin.h>
#include "cu.h"
#include "encoderstate.h"
#include "intra.h"
@ -923,63 +921,15 @@ int uvg_dep_quant(
height >= 4) {
firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15;
}
//uvg_find_first_non_zero_coeff(srcCoeff, enableScalingLists, dep_quant_context, scan, q_coeff, &firstTestPos, width, height);
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
const int32_t thres = dep_quant_context.m_quant->m_thresLast;
int temp = firstTestPos;
if (enableScalingLists) {
for (; temp >= 0; (temp)--) {
coeff_t thresTmp = thres / (4 * q_coeff[scan[(temp)]]);
if (abs(srcCoeff[scan[(temp)]]) > thresTmp) {
break;
}
}
} else {
coeff_t thresTmp = thres / (4 * default_quant_coeff);
if (temp >= 16 && height >= 4) {
__m256i th = _mm256_set1_epi16(thresTmp);
temp -= 15;
for (; temp >= 0; temp -= 16) {
__m256i sbb_data;
if (width <= 4) {
sbb_data = _mm256_loadu_si256((__m256i const*)&srcCoeff[scan[temp]]);
} else if (width == 8) {
uint32_t i = scan[temp];
__m256i first = _mm256_loadu_si256((__m256i const*)&srcCoeff[i]);
__m256i second = _mm256_loadu_si256((__m256i const*)&srcCoeff[i + 12]);
sbb_data = _mm256_blend_epi32(first, second, 204);
} else {
int16_t temp_d[16];
uint32_t i = scan[temp];
memcpy(temp_d, &srcCoeff[i], 8);
i += width;
memcpy(temp_d + 4, &srcCoeff[i], 8);
i += width;
memcpy(temp_d + 8, &srcCoeff[i], 8);
i += width;
memcpy(temp_d + 12, &srcCoeff[i], 8);
sbb_data = _mm256_loadu_si256((__m256i const*)temp_d);
}
sbb_data = _mm256_abs_epi16(sbb_data);
__m256i a = _mm256_cmpgt_epi16(sbb_data, th);
if (!_mm256_testz_si256(a, a)) {
if (temp >= 0) {
temp += 15;
}
break;
}
}
}
for (; temp >= 0; temp--) {
if (abs(srcCoeff[scan[(temp)]]) > thresTmp) {
break;
}
}
}
firstTestPos = temp;
uvg_find_first_non_zero_coeff(
srcCoeff,
enableScalingLists,
&dep_quant_context,
scan,
q_coeff,
&firstTestPos,
width,
height);
if (firstTestPos < 0) {
return 0;
}
@ -1045,6 +995,7 @@ int uvg_dep_quant(
const uint32_t height_in_sbb = MAX(height >> 2, 1);
const uint32_t width_in_sbb = MAX(width >> 2, 1);
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
//===== populate trellis =====
for (int scanIdx = firstTestPos; scanIdx >= 0; scanIdx--) {
uint32_t blkpos = scan[scanIdx];

View file

@ -1482,18 +1482,10 @@ void uvg_dep_quant_decide_and_update_avx2(
}
void uvg_find_first_non_zero_avx2(
const coeff_t* srcCoeff,
const bool enableScalingLists,
context_store dep_quant_context,
const uint32_t* const scan,
const int32_t* q_coeff,
int* firstTestPos,
const int width,
const int height)
void uvg_find_first_non_zero_avx2(const coeff_t* srcCoeff, const bool enableScalingLists, const context_store * const dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, const int width, const int height)
{
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
const int32_t thres = dep_quant_context.m_quant->m_thresLast;
const int default_quant_coeff = dep_quant_context->m_quant->m_QScale;
const int32_t thres = dep_quant_context->m_quant->m_thresLast;
int temp = *firstTestPos;
if (enableScalingLists) {
for (; temp >= 0; (temp)--) {

View file

@ -227,10 +227,10 @@ static void uvg_dep_quant_decide_and_update_generic(
}
void uvg_find_first_non_zero_generic(const coeff_t* srcCoeff, const bool enableScalingLists, context_store dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, int width, int height)
void uvg_find_first_non_zero_generic(const coeff_t* srcCoeff, const bool enableScalingLists, const context_store * const dep_quant_context, const uint32_t* const scan, const int32_t* q_coeff, int* firstTestPos, int width, int height)
{
const int default_quant_coeff = dep_quant_context.m_quant->m_QScale;
const int32_t thres = dep_quant_context.m_quant->m_thresLast;
const int default_quant_coeff = dep_quant_context->m_quant->m_QScale;
const int32_t thres = dep_quant_context->m_quant->m_thresLast;
int temp = *firstTestPos;
for (; temp >= 0; (temp)--) {
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[(temp)]])) : (thres / (4 * default_quant_coeff));

View file

@ -64,7 +64,7 @@ typedef int(dep_quant_decide_and_update_func)(
typedef void (find_first_non_zero_coeff_func)(
const coeff_t* srcCoeff,
const bool enableScalingLists,
context_store dep_quant_context,
const context_store* const dep_quant_context,
const uint32_t* const scan,
const int32_t* q_coeff,
int* firstTestPos,