mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
[avx2] AVX2 version of depquant now exactly matches scalar version
This commit is contained in:
parent
48ea4bff4d
commit
2811ce58f4
|
@ -664,7 +664,7 @@ void uvg_dep_quant_update_state_eos(
|
|||
memset(state->m_absLevelsAndCtxInit[curr_state_offset], 0, 16 * sizeof(uint8_t));
|
||||
}
|
||||
uint8_t* temp = (uint8_t*)(&state->m_absLevelsAndCtxInit[curr_state_offset][scan_pos & 15]);
|
||||
*temp = (uint8_t)MIN(32, decisions->absLevel[decision_id]);
|
||||
*temp = (uint8_t)MIN(51, decisions->absLevel[decision_id]);
|
||||
|
||||
update_common_context(ctxs, state->m_commonCtx, scan_pos, cg_pos, width_in_sbb, height_in_sbb, next_sbb_right,
|
||||
next_sbb_below, prvState, ctxs->m_curr_state_offset + decision_id);
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
*/
|
||||
|
||||
#include "strategies/avx2/depquant-avx2.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
#if COMPILE_INTEL_AVX2 && defined X86_64
|
||||
#include "dep_quant.h"
|
||||
|
@ -352,13 +353,13 @@ static void check_rd_costs_avx2(const all_depquant_states* const state, const en
|
|||
__m256i cheaper_first = _mm256_blendv_epi8(rd_cost_a, rd_cost_b, a_vs_b);
|
||||
__m256i cheaper_first_data = _mm256_blendv_epi8(a_data, b_data, a_vs_b);
|
||||
|
||||
__m256i z_vs_decision = _mm256_cmpgt_epi64(rd_cost_z, rd_cost_decision);
|
||||
__m256i cheaper_second = _mm256_blendv_epi8(rd_cost_z, rd_cost_decision, z_vs_decision);
|
||||
__m256i cheaper_second_data = _mm256_blendv_epi8(z_data, decision_data, z_vs_decision);
|
||||
__m256i z_vs_decision = _mm256_cmpgt_epi64(rd_cost_decision, rd_cost_z);
|
||||
__m256i cheaper_second = _mm256_blendv_epi8(rd_cost_decision, rd_cost_z, z_vs_decision);
|
||||
__m256i cheaper_second_data = _mm256_blendv_epi8(decision_data, z_data, z_vs_decision);
|
||||
|
||||
__m256i final_decision = _mm256_cmpgt_epi64(cheaper_first, cheaper_second);
|
||||
__m256i final_rd_cost = _mm256_blendv_epi8(cheaper_first, cheaper_second, final_decision);
|
||||
__m256i final_data = _mm256_blendv_epi8(cheaper_first_data, cheaper_second_data, final_decision);
|
||||
__m256i final_decision = _mm256_cmpgt_epi64(cheaper_second, cheaper_first);
|
||||
__m256i final_rd_cost = _mm256_blendv_epi8(cheaper_second, cheaper_first, final_decision);
|
||||
__m256i final_data = _mm256_blendv_epi8(cheaper_second_data, cheaper_first_data, final_decision);
|
||||
|
||||
_mm256_store_si256((__m256i*)decisions->rdCost, final_rd_cost);
|
||||
final_data = _mm256_permutevar8x32_epi32(final_data, _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0));
|
||||
|
@ -952,7 +953,7 @@ static INLINE void update_states_avx2(
|
|||
}
|
||||
}
|
||||
uint32_t level_offset = scan_pos & 15;
|
||||
__m128i max_abs = _mm_min_epi32(abs_level, _mm_set1_epi32(32));
|
||||
__m128i max_abs = _mm_min_epi32(abs_level, _mm_set1_epi32(51));
|
||||
uint32_t max_abs_s[4];
|
||||
_mm_storeu_si128((__m128i*)max_abs_s, max_abs);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
|
@ -1094,7 +1095,7 @@ static INLINE void update_states_avx2(
|
|||
}
|
||||
|
||||
__m128i sum_abs = _mm_srli_epi32(tinit, 8);
|
||||
sum_abs = _mm_min_epi32(sum_abs, _mm_set1_epi32(32));
|
||||
sum_abs = _mm_min_epi32(sum_abs, _mm_set1_epi32(51));
|
||||
switch (numIPos) {
|
||||
case 5:
|
||||
{
|
||||
|
@ -1103,6 +1104,9 @@ static INLINE void update_states_avx2(
|
|||
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[4])),
|
||||
1);
|
||||
sum_abs = _mm_add_epi32(t, sum_abs);
|
||||
// Need this to make sure we don't go beyond 255
|
||||
sum_abs = _mm_and_si128(sum_abs, first_byte);
|
||||
sum_abs = _mm_min_epi32(sum_abs, _mm_set1_epi32(51));
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
|
|
|
@ -437,7 +437,7 @@ static void quantize_chroma(
|
|||
int8_t height = cu_loc->chroma_height;
|
||||
if(state->encoder_control->cfg.dep_quant && transform != CHROMA_TS) {
|
||||
int abs_sum = 0;
|
||||
state->quant_blocks[2].needs_init = state->encoder_control->cfg.jccr;
|
||||
state->quant_blocks[2].needs_init |= state->encoder_control->cfg.jccr;
|
||||
uvg_dep_quant(
|
||||
state,
|
||||
cur_tu,
|
||||
|
|
Loading…
Reference in a new issue