mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
[depquant] AoS -> SoA all states
This commit is contained in:
parent
73442f1bba
commit
2f1e9c4020
354
src/dep_quant.c
354
src/dep_quant.c
|
@ -143,13 +143,34 @@ typedef struct
|
||||||
unsigned effHeight;
|
unsigned effHeight;
|
||||||
} depquant_state;
|
} depquant_state;
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int64_t m_rdCost[12];
|
||||||
|
uint16_t m_absLevelsAndCtxInit[12][24]; // 16x8bit for abs levels + 16x16bit for ctx init id
|
||||||
|
int8_t m_numSigSbb[12];
|
||||||
|
int m_remRegBins[12];
|
||||||
|
int8_t m_refSbbCtxId[12];
|
||||||
|
uint32_t m_sbbFracBits[12][2];
|
||||||
|
uint32_t m_sigFracBits[12][2];
|
||||||
|
int32_t m_coeffFracBits[12][6];
|
||||||
|
int8_t m_goRicePar[12];
|
||||||
|
int8_t m_goRiceZero[12];
|
||||||
|
int8_t m_stateId[12];
|
||||||
|
uint32_t *m_sigFracBitsArray[12][12];
|
||||||
|
int32_t *m_gtxFracBitsArray[21];
|
||||||
|
common_context* m_commonCtx;
|
||||||
|
|
||||||
|
unsigned effWidth;
|
||||||
|
unsigned effHeight;
|
||||||
|
} all_depquant_states;
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
common_context m_common_context;
|
common_context m_common_context;
|
||||||
depquant_state m_allStates[12];
|
all_depquant_states m_allStates;
|
||||||
depquant_state* m_currStates;
|
int m_curr_state_offset;
|
||||||
depquant_state* m_prevStates;
|
int m_prev_state_offset;
|
||||||
depquant_state* m_skipStates;
|
int m_skip_state_offset;
|
||||||
depquant_state m_startState;
|
depquant_state m_startState;
|
||||||
quant_block m_quant;
|
quant_block m_quant;
|
||||||
Decision m_trellis[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
Decision m_trellis[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||||
|
@ -526,66 +547,66 @@ static void depquant_state_init(depquant_state* state, uint32_t sig_frac_bits[2]
|
||||||
state->m_sbbFracBits[1] = 0;
|
state->m_sbbFracBits[1] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void checkRdCostSkipSbbZeroOut(Decision * decision, const depquant_state * const state, int decision_id)
|
static INLINE void checkRdCostSkipSbbZeroOut(Decision * decision, const all_depquant_states * const state, int decision_id, int skip_offset) {
|
||||||
{
|
int64_t rdCost = state->m_rdCost[decision_id + skip_offset] + state->m_sbbFracBits[decision_id + skip_offset][0];
|
||||||
int64_t rdCost = state->m_rdCost + state->m_sbbFracBits[0];
|
|
||||||
decision->rdCost[decision_id] = rdCost;
|
decision->rdCost[decision_id] = rdCost;
|
||||||
decision->absLevel[decision_id] = 0;
|
decision->absLevel[decision_id] = 0;
|
||||||
decision->prevId[decision_id] = 4 + state->m_stateId;
|
decision->prevId[decision_id] = 4 + state->m_stateId[decision_id + skip_offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
static void checkRdCosts(
|
static void checkRdCosts(
|
||||||
const depquant_state * const state,
|
const all_depquant_states * const state,
|
||||||
const enum ScanPosType spt,
|
const enum ScanPosType spt,
|
||||||
const PQData * pqDataA,
|
const PQData * pqDataA,
|
||||||
const PQData * pqDataB,
|
const PQData * pqDataB,
|
||||||
Decision * decisions,
|
Decision * decisions,
|
||||||
int decisionA,
|
const int decisionA,
|
||||||
int decisionB)
|
const int decisionB,
|
||||||
|
const int state_offset)
|
||||||
{
|
{
|
||||||
const int32_t* goRiceTab = g_goRiceBits[state->m_goRicePar];
|
const int32_t* goRiceTab = g_goRiceBits[state->m_goRicePar[state_offset]];
|
||||||
int64_t rdCostA = state->m_rdCost + pqDataA->deltaDist;
|
int64_t rdCostA = state->m_rdCost[state_offset] + pqDataA->deltaDist;
|
||||||
int64_t rdCostB = state->m_rdCost + pqDataB->deltaDist;
|
int64_t rdCostB = state->m_rdCost[state_offset] + pqDataB->deltaDist;
|
||||||
int64_t rdCostZ = state->m_rdCost;
|
int64_t rdCostZ = state->m_rdCost[state_offset];
|
||||||
if (state->m_remRegBins >= 4)
|
if (state->m_remRegBins[state_offset] >= 4)
|
||||||
{
|
{
|
||||||
if (pqDataA->absLevel < 4)
|
if (pqDataA->absLevel < 4)
|
||||||
{
|
{
|
||||||
rdCostA += state->m_coeffFracBits[pqDataA->absLevel];
|
rdCostA += state->m_coeffFracBits[state_offset][pqDataA->absLevel];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const coeff_t value = (pqDataA->absLevel - 4) >> 1;
|
const coeff_t value = (pqDataA->absLevel - 4) >> 1;
|
||||||
rdCostA +=
|
rdCostA +=
|
||||||
state->m_coeffFracBits[pqDataA->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
|
state->m_coeffFracBits[state_offset][pqDataA->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
|
||||||
}
|
}
|
||||||
if (pqDataB->absLevel < 4)
|
if (pqDataB->absLevel < 4)
|
||||||
{
|
{
|
||||||
rdCostB += state->m_coeffFracBits[pqDataB->absLevel];
|
rdCostB += state->m_coeffFracBits[state_offset][pqDataB->absLevel];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const coeff_t value = (pqDataB->absLevel - 4) >> 1;
|
const coeff_t value = (pqDataB->absLevel - 4) >> 1;
|
||||||
rdCostB +=
|
rdCostB +=
|
||||||
state->m_coeffFracBits[pqDataB->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
|
state->m_coeffFracBits[state_offset][pqDataB->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
|
||||||
}
|
}
|
||||||
if (spt == SCAN_ISCSBB)
|
if (spt == SCAN_ISCSBB)
|
||||||
{
|
{
|
||||||
rdCostA += state->m_sigFracBits[1];
|
rdCostA += state->m_sigFracBits[state_offset][1];
|
||||||
rdCostB += state->m_sigFracBits[1];
|
rdCostB += state->m_sigFracBits[state_offset][1];
|
||||||
rdCostZ += state->m_sigFracBits[0];
|
rdCostZ += state->m_sigFracBits[state_offset][0];
|
||||||
}
|
}
|
||||||
else if (spt == SCAN_SOCSBB)
|
else if (spt == SCAN_SOCSBB)
|
||||||
{
|
{
|
||||||
rdCostA += state->m_sbbFracBits[1] + state->m_sigFracBits[1];
|
rdCostA += state->m_sbbFracBits[state_offset][1] + state->m_sigFracBits[state_offset][1];
|
||||||
rdCostB += state->m_sbbFracBits[1] + state->m_sigFracBits[1];
|
rdCostB += state->m_sbbFracBits[state_offset][1] + state->m_sigFracBits[state_offset][1];
|
||||||
rdCostZ += state->m_sbbFracBits[1] + state->m_sigFracBits[0];
|
rdCostZ += state->m_sbbFracBits[state_offset][1] + state->m_sigFracBits[state_offset][0];
|
||||||
}
|
}
|
||||||
else if (state->m_numSigSbb)
|
else if (state->m_numSigSbb[state_offset])
|
||||||
{
|
{
|
||||||
rdCostA += state->m_sigFracBits[1];
|
rdCostA += state->m_sigFracBits[state_offset][1];
|
||||||
rdCostB += state->m_sigFracBits[1];
|
rdCostB += state->m_sigFracBits[state_offset][1];
|
||||||
rdCostZ += state->m_sigFracBits[0];
|
rdCostZ += state->m_sigFracBits[state_offset][0];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -595,43 +616,43 @@ static void checkRdCosts(
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
rdCostA +=
|
rdCostA +=
|
||||||
(1 << SCALE_BITS)
|
(1 << SCALE_BITS) + goRiceTab[pqDataA->absLevel <= state->m_goRiceZero[state_offset]
|
||||||
+ goRiceTab[pqDataA->absLevel <= state->m_goRiceZero ? pqDataA->absLevel - 1
|
? pqDataA->absLevel - 1
|
||||||
: (pqDataA->absLevel < RICEMAX ? pqDataA->absLevel : RICEMAX - 1)];
|
: (pqDataA->absLevel < RICEMAX ? pqDataA->absLevel : RICEMAX - 1)];
|
||||||
rdCostB +=
|
rdCostB +=
|
||||||
(1 << SCALE_BITS)
|
(1 << SCALE_BITS) + goRiceTab[pqDataB->absLevel <= state->m_goRiceZero[state_offset]
|
||||||
+ goRiceTab[pqDataB->absLevel <= state->m_goRiceZero ? pqDataB->absLevel - 1
|
? pqDataB->absLevel - 1
|
||||||
: (pqDataB->absLevel < RICEMAX ? pqDataB->absLevel : RICEMAX - 1)];
|
: (pqDataB->absLevel < RICEMAX ? pqDataB->absLevel : RICEMAX - 1)];
|
||||||
rdCostZ += goRiceTab[state->m_goRiceZero];
|
rdCostZ += goRiceTab[state->m_goRiceZero[state_offset]];
|
||||||
}
|
}
|
||||||
if (rdCostA < decisions->rdCost[decisionA])
|
if (rdCostA < decisions->rdCost[decisionA])
|
||||||
{
|
{
|
||||||
decisions->rdCost[decisionA] = rdCostA;
|
decisions->rdCost[decisionA] = rdCostA;
|
||||||
decisions->absLevel[decisionA] = pqDataA->absLevel;
|
decisions->absLevel[decisionA] = pqDataA->absLevel;
|
||||||
decisions->prevId[decisionA] = state->m_stateId;
|
decisions->prevId[decisionA] = state->m_stateId[state_offset];
|
||||||
}
|
}
|
||||||
if (rdCostZ < decisions->rdCost[decisionA])
|
if (rdCostZ < decisions->rdCost[decisionA])
|
||||||
{
|
{
|
||||||
decisions->rdCost[decisionA] = rdCostZ;
|
decisions->rdCost[decisionA] = rdCostZ;
|
||||||
decisions->absLevel[decisionA] = 0;
|
decisions->absLevel[decisionA] = 0;
|
||||||
decisions->prevId[decisionA] = state->m_stateId;
|
decisions->prevId[decisionA] = state->m_stateId[state_offset];
|
||||||
}
|
}
|
||||||
if (rdCostB < decisions->rdCost[decisionB])
|
if (rdCostB < decisions->rdCost[decisionB])
|
||||||
{
|
{
|
||||||
decisions->rdCost[decisionB] = rdCostB;
|
decisions->rdCost[decisionB] = rdCostB;
|
||||||
decisions->absLevel[decisionB] = pqDataB->absLevel;
|
decisions->absLevel[decisionB] = pqDataB->absLevel;
|
||||||
decisions->prevId[decisionB] = state->m_stateId;
|
decisions->prevId[decisionB] = state->m_stateId[state_offset];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void checkRdCostSkipSbb(const depquant_state* const state, Decision * decisions, int decision_id)
|
static INLINE void checkRdCostSkipSbb(const all_depquant_states* const state, Decision * decisions, int decision_id, int skip_offset)
|
||||||
{
|
{
|
||||||
int64_t rdCost = state->m_rdCost + state->m_sbbFracBits[0];
|
int64_t rdCost = state->m_rdCost[skip_offset + decision_id] + state->m_sbbFracBits[skip_offset + decision_id][0];
|
||||||
if (rdCost < decisions->rdCost[decision_id])
|
if (rdCost < decisions->rdCost[decision_id])
|
||||||
{
|
{
|
||||||
decisions->rdCost[decision_id] = rdCost;
|
decisions->rdCost[decision_id] = rdCost;
|
||||||
decisions->absLevel[decision_id] = 0;
|
decisions->absLevel[decision_id] = 0;
|
||||||
decisions->prevId[decision_id] = 4 + state->m_stateId;
|
decisions->prevId[decision_id] = 4 + state->m_stateId[skip_offset + decision_id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -685,8 +706,7 @@ static const Decision startDec = { .rdCost = {INT64_MAX >> 2, INT64_MAX >> 2, IN
|
||||||
|
|
||||||
|
|
||||||
static void xDecide(
|
static void xDecide(
|
||||||
depquant_state* const m_skipStates,
|
all_depquant_states* const all_states,
|
||||||
depquant_state* const m_prevStates,
|
|
||||||
depquant_state* const m_startState,
|
depquant_state* const m_startState,
|
||||||
quant_block * qp,
|
quant_block * qp,
|
||||||
const enum ScanPosType spt,
|
const enum ScanPosType spt,
|
||||||
|
@ -694,7 +714,9 @@ static void xDecide(
|
||||||
const int lastOffset,
|
const int lastOffset,
|
||||||
Decision* decisions,
|
Decision* decisions,
|
||||||
bool zeroOut,
|
bool zeroOut,
|
||||||
coeff_t quanCoeff)
|
coeff_t quanCoeff,
|
||||||
|
const int skip_offset,
|
||||||
|
const int prev_offset)
|
||||||
{
|
{
|
||||||
memcpy(decisions, &startDec, sizeof(Decision));
|
memcpy(decisions, &startDec, sizeof(Decision));
|
||||||
|
|
||||||
|
@ -702,26 +724,26 @@ static void xDecide(
|
||||||
{
|
{
|
||||||
if (spt == SCAN_EOCSBB)
|
if (spt == SCAN_EOCSBB)
|
||||||
{
|
{
|
||||||
checkRdCostSkipSbbZeroOut(decisions, &m_skipStates[0], 0);
|
checkRdCostSkipSbbZeroOut(decisions, all_states, 0, skip_offset);
|
||||||
checkRdCostSkipSbbZeroOut(decisions, &m_skipStates[1], 1);
|
checkRdCostSkipSbbZeroOut(decisions, all_states, 1, skip_offset);
|
||||||
checkRdCostSkipSbbZeroOut(decisions, &m_skipStates[2],2);
|
checkRdCostSkipSbbZeroOut(decisions, all_states,2, skip_offset);
|
||||||
checkRdCostSkipSbbZeroOut(decisions, &m_skipStates[3],3);
|
checkRdCostSkipSbbZeroOut(decisions, all_states,3, skip_offset);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
PQData pqData[4];
|
PQData pqData[4];
|
||||||
preQuantCoeff(qp, absCoeff, pqData, quanCoeff);
|
preQuantCoeff(qp, absCoeff, pqData, quanCoeff);
|
||||||
checkRdCosts(&m_prevStates[0], spt, &pqData[0], &pqData[2], decisions, 0, 2);
|
checkRdCosts(all_states, spt, &pqData[0], &pqData[2], decisions, 0, 2, prev_offset + 0);
|
||||||
checkRdCosts(&m_prevStates[1], spt, &pqData[0], &pqData[2], decisions,2, 0);
|
checkRdCosts(all_states, spt, &pqData[0], &pqData[2], decisions, 2, 0, prev_offset + 1);
|
||||||
checkRdCosts(&m_prevStates[2], spt, &pqData[3], &pqData[1], decisions, 1,3);
|
checkRdCosts(all_states, spt, &pqData[3], &pqData[1], decisions, 1, 3, prev_offset + 2);
|
||||||
checkRdCosts(&m_prevStates[3], spt, &pqData[3], &pqData[1], decisions, 3,1);
|
checkRdCosts(all_states, spt, &pqData[3], &pqData[1], decisions, 3, 1, prev_offset + 3);
|
||||||
if (spt == SCAN_EOCSBB)
|
if (spt == SCAN_EOCSBB)
|
||||||
{
|
{
|
||||||
checkRdCostSkipSbb(&m_skipStates[0], decisions, 0);
|
checkRdCostSkipSbb(all_states, decisions, 0, skip_offset);
|
||||||
checkRdCostSkipSbb(&m_skipStates[1], decisions, 1);
|
checkRdCostSkipSbb(all_states, decisions, 1, skip_offset);
|
||||||
checkRdCostSkipSbb(&m_skipStates[2], decisions,2);
|
checkRdCostSkipSbb(all_states, decisions,2, skip_offset);
|
||||||
checkRdCostSkipSbb(&m_skipStates[3], decisions,3);
|
checkRdCostSkipSbb(all_states, decisions,3, skip_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
checkRdCostStart(m_startState, lastOffset, &pqData[0], decisions, 0);
|
checkRdCostStart(m_startState, lastOffset, &pqData[0], decisions, 0);
|
||||||
|
@ -756,6 +778,7 @@ static INLINE unsigned templateAbsCompare(coeff_t sum)
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void update_common_context(
|
static INLINE void update_common_context(
|
||||||
|
context_store* ctxs,
|
||||||
common_context * cc,
|
common_context * cc,
|
||||||
const uint32_t scan_pos,
|
const uint32_t scan_pos,
|
||||||
const uint32_t cg_pos,
|
const uint32_t cg_pos,
|
||||||
|
@ -763,38 +786,38 @@ static INLINE void update_common_context(
|
||||||
const uint32_t height_in_sbb,
|
const uint32_t height_in_sbb,
|
||||||
const uint32_t next_sbb_right,
|
const uint32_t next_sbb_right,
|
||||||
const uint32_t next_sbb_below,
|
const uint32_t next_sbb_below,
|
||||||
const depquant_state* prevState,
|
const int prev_state,
|
||||||
depquant_state *currState)
|
const int curr_state)
|
||||||
{
|
{
|
||||||
const uint32_t numSbb = width_in_sbb * height_in_sbb;
|
const uint32_t numSbb = width_in_sbb * height_in_sbb;
|
||||||
uint8_t* sbbFlags = cc->m_currSbbCtx[currState->m_stateId].sbbFlags;
|
uint8_t* sbbFlags = cc->m_currSbbCtx[curr_state & 3].sbbFlags;
|
||||||
uint8_t* levels = cc->m_currSbbCtx[currState->m_stateId].levels;
|
uint8_t* levels = cc->m_currSbbCtx[curr_state & 3].levels;
|
||||||
size_t setCpSize = cc->m_nbInfo[scan_pos - 1].maxDist * sizeof(uint8_t);
|
size_t setCpSize = cc->m_nbInfo[scan_pos - 1].maxDist * sizeof(uint8_t);
|
||||||
if (prevState && prevState->m_refSbbCtxId >= 0) {
|
if (prev_state != -1 && ctxs->m_allStates.m_refSbbCtxId[prev_state] >= 0) {
|
||||||
memcpy(sbbFlags, cc->m_prevSbbCtx[prevState->m_refSbbCtxId].sbbFlags, numSbb * sizeof(uint8_t));
|
memcpy(sbbFlags, cc->m_prevSbbCtx[ctxs->m_allStates.m_refSbbCtxId[prev_state]].sbbFlags, numSbb * sizeof(uint8_t));
|
||||||
memcpy(levels + scan_pos, cc->m_prevSbbCtx[prevState->m_refSbbCtxId].levels + scan_pos, setCpSize);
|
memcpy(levels + scan_pos, cc->m_prevSbbCtx[ctxs->m_allStates.m_refSbbCtxId[prev_state]].levels + scan_pos, setCpSize);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
memset(sbbFlags, 0, numSbb * sizeof(uint8_t));
|
memset(sbbFlags, 0, numSbb * sizeof(uint8_t));
|
||||||
memset(levels + scan_pos, 0, setCpSize);
|
memset(levels + scan_pos, 0, setCpSize);
|
||||||
}
|
}
|
||||||
sbbFlags[cg_pos] = !!currState->m_numSigSbb;
|
sbbFlags[cg_pos] = !!ctxs->m_allStates.m_numSigSbb[curr_state];
|
||||||
memcpy(levels + scan_pos, currState->m_absLevelsAndCtxInit, 16 * sizeof(uint8_t));
|
memcpy(levels + scan_pos, ctxs->m_allStates.m_absLevelsAndCtxInit[curr_state], 16 * sizeof(uint8_t));
|
||||||
|
|
||||||
const int sigNSbb = ((next_sbb_right ? sbbFlags[next_sbb_right] : false) || (next_sbb_below ? sbbFlags[next_sbb_below] : false) ? 1 : 0);
|
const int sigNSbb = ((next_sbb_right ? sbbFlags[next_sbb_right] : false) || (next_sbb_below ? sbbFlags[next_sbb_below] : false) ? 1 : 0);
|
||||||
currState->m_numSigSbb = 0;
|
ctxs->m_allStates.m_numSigSbb[curr_state] = 0;
|
||||||
if (prevState) {
|
if (prev_state != -1) {
|
||||||
currState->m_remRegBins = prevState->m_remRegBins;
|
ctxs->m_allStates.m_remRegBins[curr_state] = ctxs->m_allStates.m_remRegBins[prev_state];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int ctxBinSampleRatio = 28;
|
int ctxBinSampleRatio = 28;
|
||||||
// (scanInfo.chType == COLOR_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
|
// (scanInfo.chType == COLOR_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
|
||||||
currState->m_remRegBins = (currState->effWidth * currState->effHeight * ctxBinSampleRatio) / 16;
|
ctxs->m_allStates.m_remRegBins[curr_state] = (ctxs->m_allStates.effWidth * ctxs->m_allStates.effHeight * ctxBinSampleRatio) / 16;
|
||||||
}
|
}
|
||||||
currState->m_goRicePar = 0;
|
ctxs->m_allStates.m_goRicePar[curr_state] = 0;
|
||||||
currState->m_refSbbCtxId = currState->m_stateId;
|
ctxs->m_allStates.m_refSbbCtxId[curr_state] = curr_state & 3;
|
||||||
currState->m_sbbFracBits[0] = cc->m_sbbFlagBits[sigNSbb][0];
|
ctxs->m_allStates.m_sbbFracBits[curr_state][0] = cc->m_sbbFlagBits[sigNSbb][0];
|
||||||
currState->m_sbbFracBits[1] = cc->m_sbbFlagBits[sigNSbb][1];
|
ctxs->m_allStates.m_sbbFracBits[curr_state][1] = cc->m_sbbFlagBits[sigNSbb][1];
|
||||||
|
|
||||||
uint16_t templateCtxInit[16];
|
uint16_t templateCtxInit[16];
|
||||||
const int scanBeg = scan_pos - 16;
|
const int scanBeg = scan_pos - 16;
|
||||||
|
@ -824,13 +847,13 @@ static INLINE void update_common_context(
|
||||||
templateCtxInit[id] = 0;
|
templateCtxInit[id] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memset(currState->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t));
|
memset(ctxs->m_allStates.m_absLevelsAndCtxInit[curr_state], 0, 16 * sizeof(uint8_t));
|
||||||
memcpy(currState->m_absLevelsAndCtxInit + 8, templateCtxInit, 16 * sizeof(uint16_t));
|
memcpy(ctxs->m_allStates.m_absLevelsAndCtxInit[curr_state] + 8, templateCtxInit, 16 * sizeof(uint16_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static INLINE void updateStateEOS(
|
static INLINE void updateStateEOS(
|
||||||
depquant_state * state,
|
context_store* ctxs,
|
||||||
const uint32_t scan_pos,
|
const uint32_t scan_pos,
|
||||||
const uint32_t cg_pos,
|
const uint32_t cg_pos,
|
||||||
const uint32_t sigCtxOffsetNext,
|
const uint32_t sigCtxOffsetNext,
|
||||||
|
@ -839,53 +862,52 @@ static INLINE void updateStateEOS(
|
||||||
const uint32_t height_in_sbb,
|
const uint32_t height_in_sbb,
|
||||||
const uint32_t next_sbb_right,
|
const uint32_t next_sbb_right,
|
||||||
const uint32_t next_sbb_below,
|
const uint32_t next_sbb_below,
|
||||||
const depquant_state* prevStates,
|
|
||||||
const depquant_state* skipStates,
|
|
||||||
const Decision * decisions,
|
const Decision * decisions,
|
||||||
int decision_id)
|
int decision_id)
|
||||||
{
|
{
|
||||||
state->m_rdCost = decisions->rdCost[decision_id];
|
all_depquant_states* state = &ctxs->m_allStates;
|
||||||
|
int curr_state_offset = ctxs->m_curr_state_offset + decision_id;
|
||||||
|
state->m_rdCost[curr_state_offset] = decisions->rdCost[decision_id];
|
||||||
if (decisions->prevId[decision_id] > -2)
|
if (decisions->prevId[decision_id] > -2)
|
||||||
{
|
{
|
||||||
const depquant_state* prvState = 0;
|
int prvState = -1;
|
||||||
if (decisions->prevId[decision_id] >= 4)
|
if (decisions->prevId[decision_id] >= 4)
|
||||||
{
|
{
|
||||||
prvState = skipStates + (decisions->prevId[decision_id] - 4);
|
prvState = ctxs->m_skip_state_offset + (decisions->prevId[decision_id] - 4);
|
||||||
state->m_numSigSbb = 0;
|
state->m_numSigSbb[curr_state_offset] = 0;
|
||||||
memset(state->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t));
|
memset(state->m_absLevelsAndCtxInit[curr_state_offset], 0, 16 * sizeof(uint8_t));
|
||||||
}
|
}
|
||||||
else if (decisions->prevId[decision_id] >= 0)
|
else if (decisions->prevId[decision_id] >= 0)
|
||||||
{
|
{
|
||||||
prvState = prevStates + decisions->prevId[decision_id];
|
prvState = ctxs->m_prev_state_offset + decisions->prevId[decision_id];
|
||||||
state->m_numSigSbb = prvState->m_numSigSbb + !!decisions->absLevel[decision_id];
|
state->m_numSigSbb[curr_state_offset] = state->m_numSigSbb[prvState] + !!decisions->absLevel[decision_id];
|
||||||
memcpy(state->m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 16 * sizeof(uint8_t));
|
memcpy(state->m_absLevelsAndCtxInit[curr_state_offset], state->m_absLevelsAndCtxInit[prvState], 16 * sizeof(uint8_t));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
state->m_numSigSbb = 1;
|
state->m_numSigSbb[curr_state_offset] = 1;
|
||||||
memset(state->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t));
|
memset(state->m_absLevelsAndCtxInit[curr_state_offset], 0, 16 * sizeof(uint8_t));
|
||||||
}
|
}
|
||||||
uint8_t* temp = (uint8_t*)(&state->m_absLevelsAndCtxInit[scan_pos & 15]);
|
uint8_t* temp = (uint8_t*)(&state->m_absLevelsAndCtxInit[curr_state_offset][scan_pos & 15]);
|
||||||
*temp = (uint8_t)MIN(255, decisions->absLevel[decision_id]);
|
*temp = (uint8_t)MIN(255, decisions->absLevel[decision_id]);
|
||||||
|
|
||||||
update_common_context(state->m_commonCtx, scan_pos, cg_pos, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below,prvState, state);
|
update_common_context(ctxs, state->m_commonCtx, scan_pos, cg_pos, width_in_sbb, height_in_sbb, next_sbb_right,next_sbb_below, prvState, ctxs->m_curr_state_offset + decision_id);
|
||||||
|
|
||||||
coeff_t tinit = state->m_absLevelsAndCtxInit[8 + ((scan_pos - 1) & 15)];
|
coeff_t tinit = state->m_absLevelsAndCtxInit[curr_state_offset][8 + ((scan_pos - 1) & 15)];
|
||||||
coeff_t sumNum = tinit & 7;
|
coeff_t sumNum = tinit & 7;
|
||||||
coeff_t sumAbs1 = (tinit >> 3) & 31;
|
coeff_t sumAbs1 = (tinit >> 3) & 31;
|
||||||
coeff_t sumGt1 = sumAbs1 - sumNum;
|
coeff_t sumGt1 = sumAbs1 - sumNum;
|
||||||
state->m_sigFracBits[0] = state->m_sigFracBitsArray[sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][0];
|
state->m_sigFracBits[curr_state_offset][0] = state->m_sigFracBitsArray[curr_state_offset][sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][0];
|
||||||
state->m_sigFracBits[1] = state->m_sigFracBitsArray[sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][1];
|
state->m_sigFracBits[curr_state_offset][1] = state->m_sigFracBitsArray[curr_state_offset][sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][1];
|
||||||
|
|
||||||
memcpy(state->m_coeffFracBits, state->m_gtxFracBitsArray[gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)], sizeof(state->m_coeffFracBits));
|
memcpy(state->m_coeffFracBits[curr_state_offset], state->m_gtxFracBitsArray[gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)], sizeof(state->m_coeffFracBits[0]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void updateState(
|
static INLINE void updateState(
|
||||||
depquant_state* state,
|
context_store * ctxs,
|
||||||
int numIPos,
|
int numIPos,
|
||||||
const uint32_t scan_pos,
|
const uint32_t scan_pos,
|
||||||
const depquant_state* prevStates,
|
|
||||||
const Decision* decisions,
|
const Decision* decisions,
|
||||||
const uint32_t sigCtxOffsetNext,
|
const uint32_t sigCtxOffsetNext,
|
||||||
const uint32_t gtxCtxOffsetNext,
|
const uint32_t gtxCtxOffsetNext,
|
||||||
|
@ -893,39 +915,41 @@ static INLINE void updateState(
|
||||||
const int baseLevel,
|
const int baseLevel,
|
||||||
const bool extRiceFlag,
|
const bool extRiceFlag,
|
||||||
int decision_id) {
|
int decision_id) {
|
||||||
state->m_rdCost = decisions->rdCost[decision_id];
|
all_depquant_states* state = &ctxs->m_allStates;
|
||||||
|
int state_id = ctxs->m_curr_state_offset + decision_id;
|
||||||
|
state->m_rdCost[state_id] = decisions->rdCost[decision_id];
|
||||||
if (decisions->prevId[decision_id] > -2)
|
if (decisions->prevId[decision_id] > -2)
|
||||||
{
|
{
|
||||||
if (decisions->prevId[decision_id] >= 0)
|
if (decisions->prevId[decision_id] >= 0)
|
||||||
{
|
{
|
||||||
const depquant_state* prvState = prevStates + decisions->prevId[decision_id];
|
const int prvState = ctxs->m_prev_state_offset + decisions->prevId[decision_id];
|
||||||
state->m_numSigSbb = prvState->m_numSigSbb + !!decisions->absLevel[decision_id];
|
state->m_numSigSbb[state_id] = (state->m_numSigSbb[prvState]) + !!decisions->absLevel[decision_id];
|
||||||
state->m_refSbbCtxId = prvState->m_refSbbCtxId;
|
state->m_refSbbCtxId[state_id] = state->m_refSbbCtxId[prvState];
|
||||||
state->m_sbbFracBits[0] = prvState->m_sbbFracBits[0];
|
state->m_sbbFracBits[state_id][0] = state->m_sbbFracBits[prvState][0];
|
||||||
state->m_sbbFracBits[1] = prvState->m_sbbFracBits[1];
|
state->m_sbbFracBits[state_id][1] = state->m_sbbFracBits[prvState][1];
|
||||||
state->m_remRegBins = prvState->m_remRegBins - 1;
|
state->m_remRegBins[state_id] = state->m_remRegBins[prvState] - 1;
|
||||||
state->m_goRicePar = prvState->m_goRicePar;
|
state->m_goRicePar[state_id] = state->m_goRicePar[prvState];
|
||||||
if (state->m_remRegBins >= 4)
|
if (state->m_remRegBins[state_id] >= 4)
|
||||||
{
|
{
|
||||||
state->m_remRegBins -= (decisions->absLevel[decision_id] < 2 ? (unsigned)decisions->absLevel[decision_id] : 3);
|
state->m_remRegBins[state_id] -= (decisions->absLevel[decision_id] < 2 ? (unsigned)decisions->absLevel[decision_id] : 3);
|
||||||
}
|
}
|
||||||
memcpy(state->m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48 * sizeof(uint8_t));
|
memcpy(state->m_absLevelsAndCtxInit[state_id], state->m_absLevelsAndCtxInit[prvState], 48 * sizeof(uint8_t));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
state->m_numSigSbb = 1;
|
state->m_numSigSbb[state_id] = 1;
|
||||||
state->m_refSbbCtxId = -1;
|
state->m_refSbbCtxId[state_id] = -1;
|
||||||
int ctxBinSampleRatio = 28; //(scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
|
int ctxBinSampleRatio = 28; //(scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
|
||||||
state->m_remRegBins = (state->effWidth * state->effHeight * ctxBinSampleRatio) / 16 - (decisions->absLevel[decision_id] < 2 ? (unsigned)decisions->absLevel[decision_id] : 3);
|
state->m_remRegBins[state_id] = (state->effWidth * state->effHeight * ctxBinSampleRatio) / 16 - (decisions->absLevel[decision_id] < 2 ? (unsigned)decisions->absLevel[decision_id] : 3);
|
||||||
memset(state->m_absLevelsAndCtxInit, 0, 48 * sizeof(uint8_t));
|
memset(state->m_absLevelsAndCtxInit[state_id], 0, 48 * sizeof(uint8_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t* levels = (uint8_t*)(state->m_absLevelsAndCtxInit);
|
uint8_t* levels = (uint8_t*)(state->m_absLevelsAndCtxInit[state_id]);
|
||||||
levels[scan_pos & 15] = (uint8_t)MIN(255, decisions->absLevel[decision_id]);
|
levels[scan_pos & 15] = (uint8_t)MIN(255, decisions->absLevel[decision_id]);
|
||||||
|
|
||||||
if (state->m_remRegBins >= 4)
|
if (state->m_remRegBins[state_id] >= 4)
|
||||||
{
|
{
|
||||||
coeff_t tinit = state->m_absLevelsAndCtxInit[8 + ((scan_pos - 1) & 15)];
|
coeff_t tinit = state->m_absLevelsAndCtxInit[state_id][8 + ((scan_pos - 1) & 15)];
|
||||||
coeff_t sumAbs1 = (tinit >> 3) & 31;
|
coeff_t sumAbs1 = (tinit >> 3) & 31;
|
||||||
coeff_t sumNum = tinit & 7;
|
coeff_t sumNum = tinit & 7;
|
||||||
#define UPDATE(k) {coeff_t t=levels[next_nb_info_ssb.inPos[k]]; sumAbs1+=MIN(4+(t&1),t); sumNum+=!!t; }
|
#define UPDATE(k) {coeff_t t=levels[next_nb_info_ssb.inPos[k]]; sumAbs1+=MIN(4+(t&1),t); sumNum+=!!t; }
|
||||||
|
@ -961,12 +985,12 @@ static INLINE void updateState(
|
||||||
}
|
}
|
||||||
#undef UPDATE
|
#undef UPDATE
|
||||||
coeff_t sumGt1 = sumAbs1 - sumNum;
|
coeff_t sumGt1 = sumAbs1 - sumNum;
|
||||||
state->m_sigFracBits[0] = state->m_sigFracBitsArray[sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][0];
|
state->m_sigFracBits[state_id][0] = state->m_sigFracBitsArray[state_id][sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][0];
|
||||||
state->m_sigFracBits[1] = state->m_sigFracBitsArray[sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][1];
|
state->m_sigFracBits[state_id][1] = state->m_sigFracBitsArray[state_id][sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][1];
|
||||||
memcpy(state->m_coeffFracBits, state->m_gtxFracBitsArray[gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)], sizeof(state->m_coeffFracBits));
|
memcpy(state->m_coeffFracBits[state_id], state->m_gtxFracBitsArray[gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)], sizeof(state->m_coeffFracBits[0]));
|
||||||
|
|
||||||
|
|
||||||
coeff_t sumAbs = state->m_absLevelsAndCtxInit[8 + ((scan_pos - 1) & 15)] >> 8;
|
coeff_t sumAbs = state->m_absLevelsAndCtxInit[state_id][8 + ((scan_pos - 1) & 15)] >> 8;
|
||||||
#define UPDATE(k) {coeff_t t=levels[next_nb_info_ssb.inPos[k]]; sumAbs+=t; }
|
#define UPDATE(k) {coeff_t t=levels[next_nb_info_ssb.inPos[k]]; sumAbs+=t; }
|
||||||
if (numIPos == 1)
|
if (numIPos == 1)
|
||||||
{
|
{
|
||||||
|
@ -1004,18 +1028,18 @@ static INLINE void updateState(
|
||||||
unsigned currentShift = templateAbsCompare(sumAbs);
|
unsigned currentShift = templateAbsCompare(sumAbs);
|
||||||
sumAbs = sumAbs >> currentShift;
|
sumAbs = sumAbs >> currentShift;
|
||||||
int sumAll = MAX(MIN(31, (int)sumAbs - (int)baseLevel), 0);
|
int sumAll = MAX(MIN(31, (int)sumAbs - (int)baseLevel), 0);
|
||||||
state->m_goRicePar = g_goRiceParsCoeff[sumAll];
|
state->m_goRicePar[state_id] = g_goRiceParsCoeff[sumAll];
|
||||||
state->m_goRicePar += currentShift;
|
state->m_goRicePar[state_id] += currentShift;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int sumAll = MAX(MIN(31, (int)sumAbs - 4 * 5), 0);
|
int sumAll = MAX(MIN(31, (int)sumAbs - 4 * 5), 0);
|
||||||
state->m_goRicePar = g_goRiceParsCoeff[sumAll];
|
state->m_goRicePar[state_id] = g_goRiceParsCoeff[sumAll];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
coeff_t sumAbs = state->m_absLevelsAndCtxInit[8 + ((scan_pos - 1) & 15)] >> 8;
|
coeff_t sumAbs = (state->m_absLevelsAndCtxInit[state_id][8 + ((scan_pos - 1) & 15)]) >> 8;
|
||||||
#define UPDATE(k) {coeff_t t=levels[next_nb_info_ssb.inPos[k]]; sumAbs+=t; }
|
#define UPDATE(k) {coeff_t t=levels[next_nb_info_ssb.inPos[k]]; sumAbs+=t; }
|
||||||
if (numIPos == 1)
|
if (numIPos == 1)
|
||||||
{
|
{
|
||||||
|
@ -1053,19 +1077,20 @@ static INLINE void updateState(
|
||||||
unsigned currentShift = templateAbsCompare(sumAbs);
|
unsigned currentShift = templateAbsCompare(sumAbs);
|
||||||
sumAbs = sumAbs >> currentShift;
|
sumAbs = sumAbs >> currentShift;
|
||||||
sumAbs = MIN(31, sumAbs);
|
sumAbs = MIN(31, sumAbs);
|
||||||
state->m_goRicePar = g_goRiceParsCoeff[sumAbs];
|
state->m_goRicePar[state_id] = g_goRiceParsCoeff[sumAbs];
|
||||||
state->m_goRicePar += currentShift;
|
state->m_goRicePar[state_id] += currentShift;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sumAbs = MIN(31, sumAbs);
|
sumAbs = MIN(31, sumAbs);
|
||||||
state->m_goRicePar = g_goRiceParsCoeff[sumAbs];
|
state->m_goRicePar[state_id] = g_goRiceParsCoeff[sumAbs];
|
||||||
}
|
}
|
||||||
state->m_goRiceZero = (state->m_stateId < 2 ? 1 : 2) << state->m_goRicePar;
|
state->m_goRiceZero[state_id] = ((state_id & 3) < 2 ? 1 : 2) << state->m_goRicePar[state_id];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool same[13];
|
||||||
static void xDecideAndUpdate(
|
static void xDecideAndUpdate(
|
||||||
rate_estimator* re,
|
rate_estimator* re,
|
||||||
context_store* ctxs,
|
context_store* ctxs,
|
||||||
|
@ -1087,7 +1112,7 @@ static void xDecideAndUpdate(
|
||||||
int effHeight)
|
int effHeight)
|
||||||
{
|
{
|
||||||
Decision* decisions = &ctxs->m_trellis[scan_pos];
|
Decision* decisions = &ctxs->m_trellis[scan_pos];
|
||||||
SWAP(ctxs->m_currStates, ctxs->m_prevStates, depquant_state*);
|
SWAP(ctxs->m_curr_state_offset, ctxs->m_prev_state_offset, int);
|
||||||
|
|
||||||
enum ScanPosType spt = 0;
|
enum ScanPosType spt = 0;
|
||||||
if ((scan_pos & 15) == 15 && scan_pos > 16 && scan_pos < effHeight * effWidth - 1)
|
if ((scan_pos & 15) == 15 && scan_pos > 16 && scan_pos < effHeight * effWidth - 1)
|
||||||
|
@ -1099,28 +1124,28 @@ static void xDecideAndUpdate(
|
||||||
spt = SCAN_EOCSBB;
|
spt = SCAN_EOCSBB;
|
||||||
}
|
}
|
||||||
|
|
||||||
xDecide(ctxs->m_skipStates, ctxs->m_prevStates, &ctxs->m_startState, &ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[pos_x] + re->m_lastBitsY[pos_y], decisions, zeroOut, quantCoeff);
|
xDecide(&ctxs->m_allStates, &ctxs->m_startState, &ctxs->m_quant, spt, absCoeff, re->m_lastBitsX[pos_x] + re->m_lastBitsY[pos_y], decisions, zeroOut, quantCoeff,ctxs->m_skip_state_offset, ctxs->m_prev_state_offset);
|
||||||
|
|
||||||
if (scan_pos) {
|
if (scan_pos) {
|
||||||
if (!(scan_pos & 15)) {
|
if (!(scan_pos & 15)) {
|
||||||
SWAP(ctxs->m_common_context.m_currSbbCtx, ctxs->m_common_context.m_prevSbbCtx, SbbCtx*);
|
SWAP(ctxs->m_common_context.m_currSbbCtx, ctxs->m_common_context.m_prevSbbCtx, SbbCtx*);
|
||||||
updateStateEOS(&ctxs->m_currStates[0], scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, ctxs->m_prevStates, ctxs->m_skipStates, decisions,0);
|
updateStateEOS(ctxs, scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, decisions, 0);
|
||||||
updateStateEOS(&ctxs->m_currStates[1], scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, ctxs->m_prevStates, ctxs->m_skipStates, decisions,1);
|
updateStateEOS(ctxs, scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, decisions, 1);
|
||||||
updateStateEOS(&ctxs->m_currStates[2], scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, ctxs->m_prevStates, ctxs->m_skipStates, decisions,2);
|
updateStateEOS(ctxs, scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, decisions, 2);
|
||||||
updateStateEOS(&ctxs->m_currStates[3], scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, ctxs->m_prevStates, ctxs->m_skipStates, decisions,3);
|
updateStateEOS(ctxs, scan_pos, cg_pos, sigCtxOffsetNext, gtxCtxOffsetNext, width_in_sbb, height_in_sbb, next_sbb_right, next_sbb_below, decisions, 3);
|
||||||
memcpy(decisions->prevId + 4, decisions->prevId, 4 * sizeof(int));
|
memcpy(decisions->prevId + 4, decisions->prevId, 4 * sizeof(int));
|
||||||
memcpy(decisions->absLevel + 4, decisions->absLevel, 4 * sizeof(coeff_t));
|
memcpy(decisions->absLevel + 4, decisions->absLevel, 4 * sizeof(coeff_t));
|
||||||
memcpy(decisions->rdCost + 4, decisions->rdCost, 4 * sizeof(int64_t));
|
memcpy(decisions->rdCost + 4, decisions->rdCost, 4 * sizeof(int64_t));
|
||||||
} else if (!zeroOut) {
|
} else if (!zeroOut) {
|
||||||
|
|
||||||
updateState(&ctxs->m_currStates[0], next_nb_info_ssb.num, scan_pos, ctxs->m_prevStates, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 0);
|
updateState(ctxs, next_nb_info_ssb.num, scan_pos, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 0);
|
||||||
updateState(&ctxs->m_currStates[1], next_nb_info_ssb.num, scan_pos, ctxs->m_prevStates, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 1);
|
updateState(ctxs, next_nb_info_ssb.num, scan_pos, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 1);
|
||||||
updateState(&ctxs->m_currStates[2], next_nb_info_ssb.num, scan_pos, ctxs->m_prevStates, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 2);
|
updateState(ctxs, next_nb_info_ssb.num, scan_pos, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 2);
|
||||||
updateState(&ctxs->m_currStates[3], next_nb_info_ssb.num, scan_pos, ctxs->m_prevStates, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 3);
|
updateState(ctxs, next_nb_info_ssb.num, scan_pos, decisions, sigCtxOffsetNext, gtxCtxOffsetNext, next_nb_info_ssb, 4, false, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (spt == SCAN_SOCSBB) {
|
if (spt == SCAN_SOCSBB) {
|
||||||
SWAP(ctxs->m_skipStates, ctxs->m_prevStates, depquant_state*);
|
SWAP(ctxs->m_skip_state_offset, ctxs->m_prev_state_offset, int);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1142,9 +1167,9 @@ int uvg_dep_quant(
|
||||||
//===== reset / pre-init =====
|
//===== reset / pre-init =====
|
||||||
const int baseLevel = 4;
|
const int baseLevel = 4;
|
||||||
context_store dep_quant_context;
|
context_store dep_quant_context;
|
||||||
dep_quant_context.m_currStates = &dep_quant_context.m_allStates[0];
|
dep_quant_context.m_curr_state_offset = 0;
|
||||||
dep_quant_context.m_prevStates = &dep_quant_context.m_allStates[4];
|
dep_quant_context.m_prev_state_offset = 4;
|
||||||
dep_quant_context.m_skipStates = &dep_quant_context.m_allStates[8];
|
dep_quant_context.m_skip_state_offset = 8;
|
||||||
|
|
||||||
const uint32_t lfnstIdx = tree_type != UVG_CHROMA_T || compID == COLOR_Y ?
|
const uint32_t lfnstIdx = tree_type != UVG_CHROMA_T || compID == COLOR_Y ?
|
||||||
cur_tu->lfnst_idx :
|
cur_tu->lfnst_idx :
|
||||||
|
@ -1226,18 +1251,32 @@ int uvg_dep_quant(
|
||||||
int effectHeight = MIN(32, effHeight);
|
int effectHeight = MIN(32, effHeight);
|
||||||
int effectWidth = MIN(32, effWidth);
|
int effectWidth = MIN(32, effWidth);
|
||||||
for (int k = 0; k < 12; k++) {
|
for (int k = 0; k < 12; k++) {
|
||||||
depquant_state_init(&dep_quant_context.m_allStates[k], rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]);
|
dep_quant_context.m_allStates.m_rdCost[k] = INT64_MAX >> 1;
|
||||||
dep_quant_context.m_allStates[k].effHeight = effectHeight;
|
dep_quant_context.m_allStates.m_numSigSbb[k] = 0;
|
||||||
dep_quant_context.m_allStates[k].effWidth = effectWidth;
|
dep_quant_context.m_allStates.m_remRegBins[k] = 4; // just large enough for last scan pos
|
||||||
dep_quant_context.m_allStates[k].m_commonCtx = &dep_quant_context.m_common_context;
|
dep_quant_context.m_allStates.m_refSbbCtxId[k] = -1;
|
||||||
dep_quant_context.m_allStates[k].m_stateId = k & 3;
|
dep_quant_context.m_allStates.m_sigFracBits[k][0] = rate_estimator.m_sigFracBits[0][0][0];
|
||||||
|
dep_quant_context.m_allStates.m_sigFracBits[k][1] = rate_estimator.m_sigFracBits[0][0][1];
|
||||||
|
memcpy(dep_quant_context.m_allStates.m_coeffFracBits[k], rate_estimator.m_gtxFracBits[0], sizeof(dep_quant_context.m_allStates.m_coeffFracBits[k]));
|
||||||
|
dep_quant_context.m_allStates.m_goRicePar[k] = 0;
|
||||||
|
dep_quant_context.m_allStates.m_goRiceZero[k] = 0;
|
||||||
|
|
||||||
|
dep_quant_context.m_allStates.m_sbbFracBits[k][0] = 0;
|
||||||
|
dep_quant_context.m_allStates.m_sbbFracBits[k][1] = 0;
|
||||||
|
|
||||||
|
dep_quant_context.m_allStates.m_stateId[k] = k & 3;
|
||||||
for (int i = 0; i < (compID == COLOR_Y ? 12 : 8); ++i) {
|
for (int i = 0; i < (compID == COLOR_Y ? 12 : 8); ++i) {
|
||||||
dep_quant_context.m_allStates[k].m_sigFracBitsArray[i] = rate_estimator.m_sigFracBits[(k & 3 ? (k & 3) - 1 : 0)][i];
|
dep_quant_context.m_allStates.m_sigFracBitsArray[k][i] = rate_estimator.m_sigFracBits[(k & 3 ? (k & 3) - 1 : 0)][i];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dep_quant_context.m_allStates.effHeight = effectHeight;
|
||||||
|
dep_quant_context.m_allStates.effWidth = effectWidth;
|
||||||
|
dep_quant_context.m_allStates.m_commonCtx = &dep_quant_context.m_common_context;
|
||||||
for (int i = 0; i < (compID == COLOR_Y ? 21 : 11); ++i) {
|
for (int i = 0; i < (compID == COLOR_Y ? 21 : 11); ++i) {
|
||||||
dep_quant_context.m_allStates[k].m_gtxFracBitsArray[i] = rate_estimator.m_gtxFracBits[i];
|
dep_quant_context.m_allStates.m_gtxFracBitsArray[i] = rate_estimator.m_gtxFracBits[i];
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
depquant_state_init(&dep_quant_context.m_startState, rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]);
|
depquant_state_init(&dep_quant_context.m_startState, rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]);
|
||||||
dep_quant_context.m_startState.effHeight = effectHeight;
|
dep_quant_context.m_startState.effHeight = effectHeight;
|
||||||
dep_quant_context.m_startState.effWidth = effectWidth;
|
dep_quant_context.m_startState.effWidth = effectWidth;
|
||||||
|
@ -1250,7 +1289,6 @@ int uvg_dep_quant(
|
||||||
dep_quant_context.m_startState.m_gtxFracBitsArray[i] = rate_estimator.m_gtxFracBits[i];
|
dep_quant_context.m_startState.m_gtxFracBitsArray[i] = rate_estimator.m_gtxFracBits[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const uint32_t height_in_sbb = MAX(height >> 2, 1);
|
const uint32_t height_in_sbb = MAX(height >> 2, 1);
|
||||||
const uint32_t width_in_sbb = MAX(width >> 2, 1);
|
const uint32_t width_in_sbb = MAX(width >> 2, 1);
|
||||||
//===== populate trellis =====
|
//===== populate trellis =====
|
||||||
|
@ -1320,11 +1358,13 @@ int uvg_dep_quant(
|
||||||
width,
|
width,
|
||||||
height); //tu.cu->slice->getReverseLastSigCoeffFlag());
|
height); //tu.cu->slice->getReverseLastSigCoeffFlag());
|
||||||
}
|
}
|
||||||
//printf("%d\n", scanIdx);
|
if(0){
|
||||||
//for(int i = 0; i < 4; i++) {
|
printf("%d\n", scanIdx);
|
||||||
// printf("%lld %hu %d\n", ctxs->m_trellis[scanIdx].rdCost[i], ctxs->m_trellis[scanIdx].absLevel[i], ctxs->m_trellis[scanIdx].prevId[i]);
|
for (int i = 0; i < 4; i++) {
|
||||||
//}
|
printf("%lld %hu %d\n", ctxs->m_trellis[scanIdx].rdCost[i], ctxs->m_trellis[scanIdx].absLevel[i], ctxs->m_trellis[scanIdx].prevId[i]);
|
||||||
//printf("\n");
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//===== find best path =====
|
//===== find best path =====
|
||||||
|
@ -1340,6 +1380,12 @@ int uvg_dep_quant(
|
||||||
|
|
||||||
//===== backward scanning =====
|
//===== backward scanning =====
|
||||||
int scanIdx = 0;
|
int scanIdx = 0;
|
||||||
|
context_store* ctxs = &dep_quant_context;
|
||||||
|
// printf("%d\n", scanIdx);
|
||||||
|
//for (int i = 0; i < 4; i++) {
|
||||||
|
// printf("%lld %hu %d\n", ctxs->m_trellis[scanIdx].rdCost[i], ctxs->m_trellis[scanIdx].absLevel[i], ctxs->m_trellis[scanIdx].prevId[i]);
|
||||||
|
//}
|
||||||
|
//printf("\n");
|
||||||
for (; prev_id >= 0; scanIdx++) {
|
for (; prev_id >= 0; scanIdx++) {
|
||||||
Decision temp = dep_quant_context.m_trellis[scanIdx];
|
Decision temp = dep_quant_context.m_trellis[scanIdx];
|
||||||
int32_t blkpos = scan[scanIdx];
|
int32_t blkpos = scan[scanIdx];
|
||||||
|
|
Loading…
Reference in a new issue