mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
[avx2] and last
This commit is contained in:
parent
12fea6f8b1
commit
963db5a407
|
@ -1385,7 +1385,7 @@ static INLINE void update_states_avx2(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint32_t level_offset = scan_pos & 15;
|
uint32_t level_offset = scan_pos & 15;
|
||||||
__m128i max_abs = _mm_min_epi32(abs_level, _mm_set1_epi32(255));
|
__m128i max_abs = _mm_min_epi32(abs_level, _mm_set1_epi32(32));
|
||||||
uint32_t max_abs_s[4];
|
uint32_t max_abs_s[4];
|
||||||
_mm_storeu_epi32(max_abs_s, max_abs);
|
_mm_storeu_epi32(max_abs_s, max_abs);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
@ -1527,6 +1527,7 @@ static INLINE void update_states_avx2(
|
||||||
}
|
}
|
||||||
|
|
||||||
__m128i sum_abs = _mm_srli_epi32(tinit, 8);
|
__m128i sum_abs = _mm_srli_epi32(tinit, 8);
|
||||||
|
sum_abs = _mm_min_epi32(sum_abs, _mm_set1_epi32(32));
|
||||||
switch (numIPos) {
|
switch (numIPos) {
|
||||||
case 5:
|
case 5:
|
||||||
{
|
{
|
||||||
|
@ -1534,8 +1535,7 @@ static INLINE void update_states_avx2(
|
||||||
levels,
|
levels,
|
||||||
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[4])),
|
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[4])),
|
||||||
1);
|
1);
|
||||||
t = _mm_and_epi32(t, first_byte);
|
sum_abs = _mm_add_epi32(t, sum_abs);
|
||||||
sum_abs = _mm_add_epi32(sum_abs, t);
|
|
||||||
}
|
}
|
||||||
case 4:
|
case 4:
|
||||||
{
|
{
|
||||||
|
@ -1543,8 +1543,7 @@ static INLINE void update_states_avx2(
|
||||||
levels,
|
levels,
|
||||||
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[3])),
|
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[3])),
|
||||||
1);
|
1);
|
||||||
t = _mm_and_epi32(t, first_byte);
|
sum_abs = _mm_add_epi32(t, sum_abs);
|
||||||
sum_abs = _mm_add_epi32(sum_abs, t);
|
|
||||||
}
|
}
|
||||||
case 3:
|
case 3:
|
||||||
{
|
{
|
||||||
|
@ -1552,8 +1551,7 @@ static INLINE void update_states_avx2(
|
||||||
levels,
|
levels,
|
||||||
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[2])),
|
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[2])),
|
||||||
1);
|
1);
|
||||||
t = _mm_and_epi32(t, first_byte);
|
sum_abs = _mm_add_epi32(t, sum_abs);
|
||||||
sum_abs = _mm_add_epi32(sum_abs, t);
|
|
||||||
}
|
}
|
||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
|
@ -1561,8 +1559,7 @@ static INLINE void update_states_avx2(
|
||||||
levels,
|
levels,
|
||||||
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[1])),
|
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[1])),
|
||||||
1);
|
1);
|
||||||
t = _mm_and_epi32(t, first_byte);
|
sum_abs = _mm_add_epi32(t, sum_abs);
|
||||||
sum_abs = _mm_add_epi32(sum_abs, t);
|
|
||||||
}
|
}
|
||||||
case 1:
|
case 1:
|
||||||
{
|
{
|
||||||
|
@ -1570,12 +1567,12 @@ static INLINE void update_states_avx2(
|
||||||
levels,
|
levels,
|
||||||
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[0])),
|
_mm_add_epi32(levels_start_offsets, _mm_set1_epi32(next_nb_info_ssb.inPos[0])),
|
||||||
1);
|
1);
|
||||||
t = _mm_and_epi32(t, first_byte);
|
sum_abs = _mm_add_epi32(t, sum_abs);
|
||||||
sum_abs = _mm_add_epi32(sum_abs, t);
|
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
sum_abs = _mm_and_epi32(sum_abs, first_byte);
|
||||||
if (extRiceFlag) {
|
if (extRiceFlag) {
|
||||||
assert(0 && "Not implemented for avx2");
|
assert(0 && "Not implemented for avx2");
|
||||||
} else {
|
} else {
|
||||||
|
@ -1815,7 +1812,7 @@ static INLINE void updateState(
|
||||||
state->all_gte_four &= state->m_remRegBins[state_id] >= 4;
|
state->all_gte_four &= state->m_remRegBins[state_id] >= 4;
|
||||||
state->all_lt_four &= state->m_remRegBins[state_id] < 4;
|
state->all_lt_four &= state->m_remRegBins[state_id] < 4;
|
||||||
uint8_t* levels = (uint8_t*)(state->m_absLevelsAndCtxInit[state_id]);
|
uint8_t* levels = (uint8_t*)(state->m_absLevelsAndCtxInit[state_id]);
|
||||||
levels[scan_pos & 15] = (uint8_t)MIN(255, decisions->absLevel[decision_id]);
|
levels[scan_pos & 15] = (uint8_t)MIN(32, decisions->absLevel[decision_id]);
|
||||||
|
|
||||||
if (state->m_remRegBins[state_id] >= 4) {
|
if (state->m_remRegBins[state_id] >= 4) {
|
||||||
coeff_t tinit = state->m_absLevelsAndCtxInit[state_id][8 + ((scan_pos - 1) & 15)];
|
coeff_t tinit = state->m_absLevelsAndCtxInit[state_id][8 + ((scan_pos - 1) & 15)];
|
||||||
|
|
Loading…
Reference in a new issue