From 4a911bbf2b8c4be34415b285a420f1a7a29c6e3c Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Mon, 9 Jan 2023 14:10:10 +0200 Subject: [PATCH] [DepQuant] WIP: easy part done --- src/dep_quant.c | 630 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 601 insertions(+), 29 deletions(-) diff --git a/src/dep_quant.c b/src/dep_quant.c index 47314f48..776d482b 100644 --- a/src/dep_quant.c +++ b/src/dep_quant.c @@ -47,7 +47,22 @@ #define sm_maxNumSigCtx 12 #define sm_maxNumGtxCtx 21 #define SCALE_BITS 15 +#define RICEMAX 32 +static const int32_t g_goRiceBits[4][RICEMAX] = { + { 32768, 65536, 98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752}, + { 65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984}, + { 98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680}, + {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}, +}; + +static const int g_riceT[4] = { 32,128, 512, 2048 }; +static const int g_riceShift[5] = { 0, 2, 4, 6, 8 }; + +static const uint32_t g_goRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; + +enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 }; typedef struct { int m_QShift; @@ -79,6 +94,21 @@ typedef struct { uint8_t* levels; } SbbCtx; + + +typedef struct +{ + coeff_t absLevel; + int64_t deltaDist; +}PQData; + +typedef struct { + int64_t rdCost; + coeff_t absLevel; + int prevId; +} Decision; + + typedef struct { const NbInfoOut* m_nbInfo; uint32_t m_sbbFlagBits[2][2]; @@ -114,7 +144,7 @@ typedef struct { int8_t m_stateId; const uint32_t* m_sigFracBitsArray; const uint32_t* m_gtxFracBitsArray; - common_context* m_commonCtx; + struct common_context* m_commonCtx; unsigned effWidth; unsigned effHeight; @@ -317,12 +347,557 @@ static void depquant_state_init(depquant_state* state, uint32_t sig_frac_bits[2] state->m_goRiceZero = 0; } +static INLINE void checkRdCostSkipSbbZeroOut(Decision *decision, const depquant_state * const state) +{ + int64_t rdCost = state->m_rdCost + state->m_sbbFracBits[0]; + decision->rdCost = rdCost; + decision->absLevel = 0; + decision->prevId = 4 + state->m_stateId; +} + +static void checkRdCosts(const depquant_state * const state, const enum ScanPosType spt, const PQData *pqDataA, const PQData *pqDataB, Decision *decisionA, Decision *decisionB) +{ + const int32_t* goRiceTab = g_goRiceBits[state->m_goRicePar]; + int64_t rdCostA = state->m_rdCost + pqDataA->deltaDist; + int64_t rdCostB = state->m_rdCost + pqDataB->deltaDist; + int64_t rdCostZ = state->m_rdCost; + if (state->m_remRegBins >= 4) + { + if (pqDataA->absLevel < 4) + { + rdCostA += state->m_coeffFracBits[pqDataA->absLevel]; + } + else + { + const coeff_t value = (pqDataA->absLevel - 4) >> 1; + rdCostA += + state->m_coeffFracBits[pqDataA->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1]; + } + if (pqDataB->absLevel < 4) + { + rdCostB += state->m_coeffFracBits[pqDataB->absLevel]; + } + else + { + const coeff_t value = (pqDataB->absLevel - 4) >> 1; + rdCostB += + state->m_coeffFracBits[pqDataB->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1]; + } + if (spt == SCAN_ISCSBB) + { + rdCostA += state->m_sigFracBits[1]; + rdCostB += state->m_sigFracBits[1]; + rdCostZ += state->m_sigFracBits[0]; + } + else if (spt == SCAN_SOCSBB) + { + rdCostA += state->m_sbbFracBits[1] + state->m_sigFracBits[1]; + rdCostB += state->m_sbbFracBits[1] + state->m_sigFracBits[1]; + rdCostZ += state->m_sbbFracBits[1] + state->m_sigFracBits[0]; + } + else if (state->m_numSigSbb) + { + rdCostA += state->m_sigFracBits[1]; + rdCostB += state->m_sigFracBits[1]; + rdCostZ += state->m_sigFracBits[0]; + } + else + { + rdCostZ = decisionA->rdCost; + } + } + else + { + rdCostA += + (1 << SCALE_BITS) + + goRiceTab[pqDataA->absLevel <= state->m_goRiceZero ? pqDataA->absLevel - 1 + : (pqDataA->absLevel < RICEMAX ? pqDataA->absLevel : RICEMAX - 1)]; + rdCostB += + (1 << SCALE_BITS) + + goRiceTab[pqDataB->absLevel <= state->m_goRiceZero ? pqDataB->absLevel - 1 + : (pqDataB->absLevel < RICEMAX ? pqDataB->absLevel : RICEMAX - 1)]; + rdCostZ += goRiceTab[state->m_goRiceZero]; + } + if (rdCostA < decisionA->rdCost) + { + decisionA->rdCost = rdCostA; + decisionA->absLevel = pqDataA->absLevel; + decisionA->prevId = state->m_stateId; + } + if (rdCostZ < decisionA->rdCost) + { + decisionA->rdCost = rdCostZ; + decisionA->absLevel = 0; + decisionA->prevId = state->m_stateId; + } + if (rdCostB < decisionB->rdCost) + { + decisionB->rdCost = rdCostB; + decisionB->absLevel = pqDataB->absLevel; + decisionB->prevId = state->m_stateId; + } +} + +static INLINE void checkRdCostSkipSbb(const depquant_state* const state, Decision *decision) +{ + int64_t rdCost = state->m_rdCost + state->m_sbbFracBits[0]; + if (rdCost < decision->rdCost) + { + decision->rdCost = rdCost; + decision->absLevel = 0; + decision->prevId = 4 + state->m_stateId; + } +} + +static INLINE void checkRdCostStart(const depquant_state* const state, int32_t lastOffset, const PQData *pqData, Decision *decision) +{ + int64_t rdCost = pqData->deltaDist + lastOffset; + if (pqData->absLevel < 4) + { + rdCost += state->m_coeffFracBits[pqData->absLevel]; + } + else + { + const coeff_t value = (pqData->absLevel - 4) >> 1; + rdCost += state->m_coeffFracBits[pqData->absLevel - (value << 1)] + g_goRiceBits[state->m_goRicePar][value < RICEMAX ? value : RICEMAX - 1]; + } + if (rdCost < decision->rdCost) + { + decision->rdCost = rdCost; + decision->absLevel = pqData->absLevel; + decision->prevId = -1; + } +} + + +static INLINE void preQuantCoeff(const quant_block * const qp, const coeff_t absCoeff, PQData* pqData, coeff_t quanCoeff) +{ + int64_t scaledOrg = (int64_t)(absCoeff) * quanCoeff; + coeff_t qIdx = MAX(1, MIN(qp->m_maxQIdx, (coeff_t)((scaledOrg + qp->m_QAdd) >> qp->m_QShift))); + int64_t scaledAdd = qIdx * qp->m_DistStepAdd - scaledOrg * qp->m_DistOrgFact; + PQData *pq_a = &pqData[qIdx & 3]; + pq_a->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift; + pq_a->absLevel = (++qIdx) >> 1; + scaledAdd += qp->m_DistStepAdd; + PQData *pq_b = &pqData[qIdx & 3]; + pq_b->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift; + pq_b->absLevel = (++qIdx) >> 1; + scaledAdd += qp->m_DistStepAdd; + PQData *pq_c = &pqData[qIdx & 3]; + pq_c->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift; + pq_c->absLevel = (++qIdx) >> 1; + scaledAdd += qp->m_DistStepAdd; + PQData *pq_d = &pqData[qIdx & 3]; + pq_d->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift; + pq_d->absLevel = (++qIdx) >> 1; +} + + +#define DINIT(l,p) {INT64_MAX>>2,(l),(p)} +static const Decision startDec[8] = { DINIT(-1,-2),DINIT(-1,-2),DINIT(-1,-2),DINIT(-1,-2),DINIT(0,4),DINIT(0,5),DINIT(0,6),DINIT(0,7) }; +#undef DINIT + + +static void xDecide( + depquant_state* const m_skipStates, + depquant_state* const m_prevStates, + depquant_state* const m_startState, + quant_block *qp, + const enum ScanPosType spt, + const coeff_t absCoeff, + const int lastOffset, + Decision* decisions, + bool zeroOut, + coeff_t quanCoeff) +{ + memcpy(decisions, startDec, 8 * sizeof(Decision)); + + if (zeroOut) + { + if (spt == SCAN_EOCSBB) + { + checkRdCostSkipSbbZeroOut(&decisions[0], &m_skipStates[0]); + checkRdCostSkipSbbZeroOut(&decisions[1], &m_skipStates[1]); + checkRdCostSkipSbbZeroOut(&decisions[2], &m_skipStates[2]); + checkRdCostSkipSbbZeroOut(&decisions[3], &m_skipStates[3]); + } + return; + } + + PQData pqData[4]; + preQuantCoeff(qp, absCoeff, pqData, quanCoeff); + checkRdCosts(&m_prevStates[0], spt, &pqData[0], &pqData[2], &decisions[0], &decisions[2]); + checkRdCosts(&m_prevStates[1], spt, &pqData[0], &pqData[2], &decisions[2], &decisions[0]); + checkRdCosts(&m_prevStates[2], spt, &pqData[3], &pqData[1], &decisions[1], &decisions[3]); + checkRdCosts(&m_prevStates[3], spt, &pqData[3], &pqData[1], &decisions[3], &decisions[1]); + if (spt == SCAN_EOCSBB) + { + checkRdCostSkipSbb(&m_skipStates[0], &decisions[0]); + checkRdCostSkipSbb(&m_skipStates[1], &decisions[1]); + checkRdCostSkipSbb(&m_skipStates[2], &decisions[2]); + checkRdCostSkipSbb(&m_skipStates[3], &decisions[3]); + } + + checkRdCostStart(m_startState, lastOffset, &pqData[0], &decisions[0]); + checkRdCostStart(m_startState, lastOffset, &pqData[2], &decisions[2]); +} + + +unsigned templateAbsCompare(coeff_t sum) +{ + int rangeIdx = 0; + if (sum < g_riceT[0]) + { + rangeIdx = 0; + } + else if (sum < g_riceT[1]) + { + rangeIdx = 1; + } + else if (sum < g_riceT[2]) + { + rangeIdx = 2; + } + else if (sum < g_riceT[3]) + { + rangeIdx = 3; + } + else + { + rangeIdx = 4; + } + return g_riceShift[rangeIdx]; +} + +static INLINE void update_common_context(common_context * cc, const ScanInfo *scanInfo, const depquant_state* prevState, depquant_state *currState) +{ + uint8_t* sbbFlags = cc->m_currSbbCtx[currState->m_stateId].sbbFlags; + uint8_t* levels = cc->m_currSbbCtx[currState->m_stateId].levels; + size_t setCpSize = cc->m_nbInfo[scanInfo.scanIdx - 1].maxDist * sizeof(uint8_t); + if (prevState && prevState->m_refSbbCtxId >= 0) + { + memcpy(sbbFlags, cc->m_prevSbbCtx[prevState->m_refSbbCtxId].sbbFlags, scanInfo.numSbb * sizeof(uint8_t)); + memcpy(levels + scanInfo.scanIdx, cc->m_prevSbbCtx[prevState->m_refSbbCtxId].levels + scanInfo.scanIdx, setCpSize); + } + else + { + memset(sbbFlags, 0, scanInfo.numSbb * sizeof(uint8_t)); + memset(levels + scanInfo.scanIdx, 0, setCpSize); + } + sbbFlags[scanInfo.sbbPos] = !!currState->m_numSigSbb; + memcpy(levels + scanInfo.scanIdx, currState->m_absLevelsAndCtxInit, scanInfo.sbbSize * sizeof(uint8_t)); + + const int sigNSbb = ((scanInfo.nextSbbRight ? sbbFlags[scanInfo.nextSbbRight] : false) || (scanInfo.nextSbbBelow ? sbbFlags[scanInfo.nextSbbBelow] : false) ? 1 : 0); + currState->m_numSigSbb = 0; + if (prevState) + { + currState->m_remRegBins = prevState->m_remRegBins; + } + else + { + int ctxBinSampleRatio = 28; // (scanInfo.chType == COLOR_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + currState->m_remRegBins = (currState->effWidth * currState->effHeight * ctxBinSampleRatio) / 16; + } + currState->m_goRicePar = 0; + currState->m_refSbbCtxId = currState->m_stateId; + currState->m_sbbFracBits[0] = cc->m_sbbFlagBits[sigNSbb][0]; + currState->m_sbbFracBits[1] = cc->m_sbbFlagBits[sigNSbb][1]; + + uint16_t templateCtxInit[16]; + const int scanBeg = scanInfo.scanIdx - scanInfo.sbbSize; + const NbInfoOut* nbOut = cc->m_nbInfo + scanBeg; + const uint8_t* absLevels = levels + scanBeg; + for (int id = 0; id < scanInfo.sbbSize; id++, nbOut++) + { + if (nbOut->num) + { + coeff_t sumAbs = 0, sumAbs1 = 0, sumNum = 0; +#define UPDATE(k) {coeff_t t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=MIN(4+(t&1),t); sumNum+=!!t; } + UPDATE(0); + if (nbOut->num > 1) + { + UPDATE(1); + if (nbOut->num > 2) + { + UPDATE(2); + if (nbOut->num > 3) + { + UPDATE(3); + if (nbOut->num > 4) + { + UPDATE(4); + } + } + } + } +#undef UPDATE + templateCtxInit[id] = (uint16_t)(sumNum) + ((uint16_t)(sumAbs1) << 3) + ((uint16_t)MIN(127, sumAbs) << 8); + } + else + { + templateCtxInit[id] = 0; + } + } + memset(currState->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t)); + memcpy(currState->m_absLevelsAndCtxInit + 8, templateCtxInit, 16 * sizeof(uint16_t)); +} + + +static INLINE void updateStateEOS(depquant_state * state, const ScanInfo *scanInfo, const depquant_state* prevStates, const depquant_state* skipStates, + const Decision *decision) +{ + state->m_rdCost = decision->rdCost; + if (decision->prevId > -2) + { + const depquant_state* prvState = 0; + if (decision->prevId >= 4) + { + prvState = skipStates + (decision->prevId - 4); + state->m_numSigSbb = 0; + memset(state->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t)); + } + else if (decision->prevId >= 0) + { + prvState = prevStates + decision->prevId; + state->m_numSigSbb = prvState->m_numSigSbb + !!decision->absLevel; + memcpy(state->m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 16 * sizeof(uint8_t)); + } + else + { + state->m_numSigSbb = 1; + memset(state->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t)); + } + reinterpret_cast(m_absLevelsAndCtxInit)[scanInfo.insidePos] = (uint8_t)MIN(255, decision->absLevel); + + update_common_context(state->m_commonCtx, scanInfo, prvState, state); + + coeff_t tinit = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos]; + coeff_t sumNum = tinit & 7; + coeff_t sumAbs1 = (tinit >> 3) & 31; + coeff_t sumGt1 = sumAbs1 - sumNum; + state->m_sigFracBits = state->m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)]; + state->m_coeffFracBits = state->m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)]; + } +} + +static INLINE void updateState(depquant_state* state, int numIPos, const ScanInfo scanInfo, const depquant_state *prevStates, const Decision *decision, const int baseLevel, const bool extRiceFlag) +{ + state->m_rdCost = decision->rdCost; + if (decision->prevId > -2) + { + if (decision->prevId >= 0) + { + const depquant_state* prvState = prevStates + decision->prevId; + state->m_numSigSbb = prvState->m_numSigSbb + !!decision->absLevel; + state->m_refSbbCtxId = prvState->m_refSbbCtxId; + state->m_sbbFracBits[0] = prvState->m_sbbFracBits[0]; + state->m_sbbFracBits[1] = prvState->m_sbbFracBits[1]; + state->m_remRegBins = prvState->m_remRegBins - 1; + state->m_goRicePar = prvState->m_goRicePar; + if (state->m_remRegBins >= 4) + { + state->m_remRegBins -= (decision->absLevel < 2 ? (unsigned)decision->absLevel : 3); + } + memcpy(state->m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48 * sizeof(uint8_t)); + } + else + { + state->m_numSigSbb = 1; + state->m_refSbbCtxId = -1; + int ctxBinSampleRatio = 28; //(scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA; + state->m_remRegBins = (state->effWidth * state->effHeight * ctxBinSampleRatio) / 16 - (decision->absLevel < 2 ? (unsigned)decision->absLevel : 3); + memset(state->m_absLevelsAndCtxInit, 0, 48 * sizeof(uint8_t)); + } + + uint8_t* levels = (uint8_t*)(state->m_absLevelsAndCtxInit); + levels[scanInfo.insidePos] = (uint8_t)MIN(255, decision->absLevel); + + if (state->m_remRegBins >= 4) + { + coeff_t tinit = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos]; + coeff_t sumAbs1 = (tinit >> 3) & 31; + coeff_t sumNum = tinit & 7; +#define UPDATE(k) {coeff_t t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs1+=MIN(4+(t&1),t); sumNum+=!!t; } + if (numIPos == 1) + { + UPDATE(0); + } + else if (numIPos == 2) + { + UPDATE(0); + UPDATE(1); + } + else if (numIPos == 3) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + } + else if (numIPos == 4) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + } + else if (numIPos == 5) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + UPDATE(4); + } +#undef UPDATE + coeff_t sumGt1 = sumAbs1 - sumNum; + state->m_sigFracBits[0] = state->m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][0]; + state->m_sigFracBits[1] = state->m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][1]; + memcpy(state->m_coeffFracBits, &state->m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)], sizeof(state->m_coeffFracBits)); + + + coeff_t sumAbs = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8; +#define UPDATE(k) {coeff_t t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; } + if (numIPos == 1) + { + UPDATE(0); + } + else if (numIPos == 2) + { + UPDATE(0); + UPDATE(1); + } + else if (numIPos == 3) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + } + else if (numIPos == 4) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + } + else if (numIPos == 5) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + UPDATE(4); + } +#undef UPDATE + if (extRiceFlag) + { + unsigned currentShift = templateAbsCompare(sumAbs); + sumAbs = sumAbs >> currentShift; + int sumAll = MAX(MIN(31, (int)sumAbs - (int)baseLevel), 0); + state->m_goRicePar = g_goRiceParsCoeff[sumAll]; + state->m_goRicePar += currentShift; + } + else + { + int sumAll = MAX(MIN(31, (int)sumAbs - 4 * 5), 0); + state->m_goRicePar = g_goRiceParsCoeff[sumAll]; + } + } + else + { + coeff_t sumAbs = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8; +#define UPDATE(k) {coeff_t t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; } + if (numIPos == 1) + { + UPDATE(0); + } + else if (numIPos == 2) + { + UPDATE(0); + UPDATE(1); + } + else if (numIPos == 3) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + } + else if (numIPos == 4) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + } + else if (numIPos == 5) + { + UPDATE(0); + UPDATE(1); + UPDATE(2); + UPDATE(3); + UPDATE(4); + } +#undef UPDATE + if (extRiceFlag) + { + unsigned currentShift = templateAbsCompare(sumAbs); + sumAbs = sumAbs >> currentShift; + sumAbs = MIN(31, sumAbs); + state->m_goRicePar = g_goRiceParsCoeff[sumAbs]; + state->m_goRicePar += currentShift; + } + else + { + sumAbs = MIN(31, sumAbs); + state->m_goRicePar = g_goRiceParsCoeff[sumAbs]; + } + state->m_goRiceZero = (state->m_stateId < 2 ? 1 : 2) << state->m_goRicePar; + } + } +} + +static void xDecideAndUpdate( + const coeff_t absCoeff, + const ScanInfo scanInfo, + bool zeroOut, + coeff_t quantCoeff, + int effWidth, + int effHeight, + bool reverseLast, + Decision* decisions) +{ + std::swap(m_prevStates, m_currStates); + + xDecide(scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx, effWidth, effHeight, reverseLast), decisions, zeroOut, quantCoeff); + + if (scanInfo.scanIdx) { + if (scanInfo.eosbb) { + m_commonCtx.swap(); + updateStateEOS(&m_currStates[0], scanInfo, m_prevStates, m_skipStates, &decisions[0]); + updateStateEOS(&m_currStates[1], scanInfo, m_prevStates, m_skipStates, &decisions[1]); + updateStateEOS(&m_currStates[2], scanInfo, m_prevStates, m_skipStates, &decisions[2]); + updateStateEOS(&m_currStates[3], scanInfo, m_prevStates, m_skipStates, &decisions[3]); + memcpy(decisions + 4, decisions, 4 * sizeof(Decision)); + } else if (!zeroOut) { + + updateState(&m_currStates[0], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag); + updateState(&m_currStates[1], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag); + updateState(&m_currStates[2], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag); + updateState(&m_currStates[3], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag); + } + + if (scanInfo.spt == SCAN_SOCSBB) { + std::swap(m_prevStates, m_skipStates); + } + } +} + + uint8_t uvg_dep_quant( const encoder_state_t* const state, const cu_info_t* const cur_tu, const cu_loc_t* const cu_loc, const coeff_t* srcCoeff, - const coeff_t* coeff_out, + coeff_t* coeff_out, const color_t compID, enum uvg_tree_type tree_type, const double lambda, @@ -365,6 +940,7 @@ uint8_t uvg_dep_quant( quant_block quant_block; init_quant_block(state, &quant_block, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1); + Decision trellis[TR_MAX_WIDTH * TR_MAX_WIDTH][8]; //===== scaling matrix ==== //const int qpDQ = cQP.Qp + 1; //const int qpPer = qpDQ / 6; @@ -389,14 +965,13 @@ uint8_t uvg_dep_quant( if ( lfnstIdx > 0 && !is_ts && width >= 4 && height >= 4) { - firstTestPos = - ((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15; + firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15; } const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6]; const coeff_t thres = 4 << q_bits; for (; firstTestPos >= 0; firstTestPos--) { - coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[firstTestPos])) :(thres / (4 * default_quant_coeff)); - if (abs(srcCoeff[firstTestPos]) > thresTmp) { + coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[firstTestPos]])) :(thres / (4 * default_quant_coeff)); + if (abs(srcCoeff[scan[firstTestPos]]) > thresTmp) { break; } } @@ -414,50 +989,48 @@ uint8_t uvg_dep_quant( depquant_state start_state; + int effectHeight = MIN(32, effHeight); + int effectWidth = MIN(32, effWidth); for (int k = 0; k < 12; k++) { depquant_state_init(&all_state[k], rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]); - all_state[k].effHeight = MIN(32, effHeight); - all_state[k].effWidth = MIN(32, effWidth); + all_state[k].effHeight = effectHeight; + all_state[k].effWidth = effectWidth; } depquant_state_init(&start_state, rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]); - start_state.effHeight = MIN(32, effHeight); - start_state.effWidth = MIN(32, effWidth); + start_state.effHeight = effectHeight; + start_state.effWidth = effectWidth; //===== populate trellis ===== for (int scanIdx = firstTestPos; scanIdx >= 0; scanIdx--) { - const ScanInfo& scanInfo = tuPars.m_scanInfo[scanIdx]; + uint32_t scan_pos = scan[scanIdx]; if (enableScalingLists) { - m_quant.initQuantBlock( - tu, - compID, - cQP, - lambda, - quantCoeff[scanInfo.rasterPos]); + init_quant_block(state, &quant_block, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[scan_pos]); + xDecideAndUpdate( - abs(tCoeff[scanInfo.rasterPos]), + abs(srcCoeff[scan_pos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), - quantCoeff[scanInfo.rasterPos], + q_coeff[scan_pos], effectWidth, effectHeight, - tu.cu->slice->getReverseLastSigCoeffFlag()); + false); //tu.cu->slice->getReverseLastSigCoeffFlag()); } else { xDecideAndUpdate( - abs(tCoeff[scanInfo.rasterPos]), + abs(srcCoeff[scan_pos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), default_quant_coeff, effectWidth, effectHeight, - tu.cu->slice->getReverseLastSigCoeffFlag()); - } + false); //tu.cu->slice->getReverseLastSigCoeffFlag()); + } } //===== find best path ===== - Decision decision = {std::numeric_limits::max(), -1, -2}; + Decision decision = {INT64_MAX, -1, -2}; int64_t minPathCost = 0; for (int8_t stateId = 0; stateId < 4; stateId++) { - int64_t pathCost = m_trellis[0][stateId].rdCost; + int64_t pathCost = trellis[0][stateId].rdCost; if (pathCost < minPathCost) { decision.prevId = stateId; minPathCost = pathCost; @@ -467,10 +1040,9 @@ uint8_t uvg_dep_quant( //===== backward scanning ===== int scanIdx = 0; for (; decision.prevId >= 0; scanIdx++) { - decision = m_trellis[scanIdx][decision.prevId]; - int32_t blkpos = tuPars.m_scanId2BlkPos[scanIdx].idx; - q_coeff[blkpos] = - (tCoeff[blkpos] < 0 ? -decision.absLevel : decision.absLevel); + decision = trellis[scanIdx][decision.prevId]; + int32_t blkpos = scan[scanIdx]; + coeff_out[blkpos] = (srcCoeff[blkpos] < 0 ? -decision.absLevel : decision.absLevel); absSum += decision.absLevel; } }