[DepQuant] WIP: easy part done

This commit is contained in:
Joose Sainio 2023-01-09 14:10:10 +02:00
parent fd8f212a5a
commit 4a911bbf2b

View file

@ -47,7 +47,22 @@
#define sm_maxNumSigCtx 12
#define sm_maxNumGtxCtx 21
#define SCALE_BITS 15
#define RICEMAX 32
static const int32_t g_goRiceBits[4][RICEMAX] = {
{ 32768, 65536, 98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
{ 65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
{ 98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
{131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376},
};
static const int g_riceT[4] = { 32,128, 512, 2048 };
static const int g_riceShift[5] = { 0, 2, 4, 6, 8 };
static const uint32_t g_goRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 };
enum ScanPosType { SCAN_ISCSBB = 0, SCAN_SOCSBB = 1, SCAN_EOCSBB = 2 };
typedef struct {
int m_QShift;
@ -79,6 +94,21 @@ typedef struct {
uint8_t* levels;
} SbbCtx;
typedef struct
{
coeff_t absLevel;
int64_t deltaDist;
}PQData;
typedef struct {
int64_t rdCost;
coeff_t absLevel;
int prevId;
} Decision;
typedef struct {
const NbInfoOut* m_nbInfo;
uint32_t m_sbbFlagBits[2][2];
@ -114,7 +144,7 @@ typedef struct {
int8_t m_stateId;
const uint32_t* m_sigFracBitsArray;
const uint32_t* m_gtxFracBitsArray;
common_context* m_commonCtx;
struct common_context* m_commonCtx;
unsigned effWidth;
unsigned effHeight;
@ -317,12 +347,557 @@ static void depquant_state_init(depquant_state* state, uint32_t sig_frac_bits[2]
state->m_goRiceZero = 0;
}
static INLINE void checkRdCostSkipSbbZeroOut(Decision *decision, const depquant_state * const state)
{
int64_t rdCost = state->m_rdCost + state->m_sbbFracBits[0];
decision->rdCost = rdCost;
decision->absLevel = 0;
decision->prevId = 4 + state->m_stateId;
}
static void checkRdCosts(const depquant_state * const state, const enum ScanPosType spt, const PQData *pqDataA, const PQData *pqDataB, Decision *decisionA, Decision *decisionB)
{
const int32_t* goRiceTab = g_goRiceBits[state->m_goRicePar];
int64_t rdCostA = state->m_rdCost + pqDataA->deltaDist;
int64_t rdCostB = state->m_rdCost + pqDataB->deltaDist;
int64_t rdCostZ = state->m_rdCost;
if (state->m_remRegBins >= 4)
{
if (pqDataA->absLevel < 4)
{
rdCostA += state->m_coeffFracBits[pqDataA->absLevel];
}
else
{
const coeff_t value = (pqDataA->absLevel - 4) >> 1;
rdCostA +=
state->m_coeffFracBits[pqDataA->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
}
if (pqDataB->absLevel < 4)
{
rdCostB += state->m_coeffFracBits[pqDataB->absLevel];
}
else
{
const coeff_t value = (pqDataB->absLevel - 4) >> 1;
rdCostB +=
state->m_coeffFracBits[pqDataB->absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
}
if (spt == SCAN_ISCSBB)
{
rdCostA += state->m_sigFracBits[1];
rdCostB += state->m_sigFracBits[1];
rdCostZ += state->m_sigFracBits[0];
}
else if (spt == SCAN_SOCSBB)
{
rdCostA += state->m_sbbFracBits[1] + state->m_sigFracBits[1];
rdCostB += state->m_sbbFracBits[1] + state->m_sigFracBits[1];
rdCostZ += state->m_sbbFracBits[1] + state->m_sigFracBits[0];
}
else if (state->m_numSigSbb)
{
rdCostA += state->m_sigFracBits[1];
rdCostB += state->m_sigFracBits[1];
rdCostZ += state->m_sigFracBits[0];
}
else
{
rdCostZ = decisionA->rdCost;
}
}
else
{
rdCostA +=
(1 << SCALE_BITS)
+ goRiceTab[pqDataA->absLevel <= state->m_goRiceZero ? pqDataA->absLevel - 1
: (pqDataA->absLevel < RICEMAX ? pqDataA->absLevel : RICEMAX - 1)];
rdCostB +=
(1 << SCALE_BITS)
+ goRiceTab[pqDataB->absLevel <= state->m_goRiceZero ? pqDataB->absLevel - 1
: (pqDataB->absLevel < RICEMAX ? pqDataB->absLevel : RICEMAX - 1)];
rdCostZ += goRiceTab[state->m_goRiceZero];
}
if (rdCostA < decisionA->rdCost)
{
decisionA->rdCost = rdCostA;
decisionA->absLevel = pqDataA->absLevel;
decisionA->prevId = state->m_stateId;
}
if (rdCostZ < decisionA->rdCost)
{
decisionA->rdCost = rdCostZ;
decisionA->absLevel = 0;
decisionA->prevId = state->m_stateId;
}
if (rdCostB < decisionB->rdCost)
{
decisionB->rdCost = rdCostB;
decisionB->absLevel = pqDataB->absLevel;
decisionB->prevId = state->m_stateId;
}
}
static INLINE void checkRdCostSkipSbb(const depquant_state* const state, Decision *decision)
{
int64_t rdCost = state->m_rdCost + state->m_sbbFracBits[0];
if (rdCost < decision->rdCost)
{
decision->rdCost = rdCost;
decision->absLevel = 0;
decision->prevId = 4 + state->m_stateId;
}
}
static INLINE void checkRdCostStart(const depquant_state* const state, int32_t lastOffset, const PQData *pqData, Decision *decision)
{
int64_t rdCost = pqData->deltaDist + lastOffset;
if (pqData->absLevel < 4)
{
rdCost += state->m_coeffFracBits[pqData->absLevel];
}
else
{
const coeff_t value = (pqData->absLevel - 4) >> 1;
rdCost += state->m_coeffFracBits[pqData->absLevel - (value << 1)] + g_goRiceBits[state->m_goRicePar][value < RICEMAX ? value : RICEMAX - 1];
}
if (rdCost < decision->rdCost)
{
decision->rdCost = rdCost;
decision->absLevel = pqData->absLevel;
decision->prevId = -1;
}
}
static INLINE void preQuantCoeff(const quant_block * const qp, const coeff_t absCoeff, PQData* pqData, coeff_t quanCoeff)
{
int64_t scaledOrg = (int64_t)(absCoeff) * quanCoeff;
coeff_t qIdx = MAX(1, MIN(qp->m_maxQIdx, (coeff_t)((scaledOrg + qp->m_QAdd) >> qp->m_QShift)));
int64_t scaledAdd = qIdx * qp->m_DistStepAdd - scaledOrg * qp->m_DistOrgFact;
PQData *pq_a = &pqData[qIdx & 3];
pq_a->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
pq_a->absLevel = (++qIdx) >> 1;
scaledAdd += qp->m_DistStepAdd;
PQData *pq_b = &pqData[qIdx & 3];
pq_b->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
pq_b->absLevel = (++qIdx) >> 1;
scaledAdd += qp->m_DistStepAdd;
PQData *pq_c = &pqData[qIdx & 3];
pq_c->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
pq_c->absLevel = (++qIdx) >> 1;
scaledAdd += qp->m_DistStepAdd;
PQData *pq_d = &pqData[qIdx & 3];
pq_d->deltaDist = (scaledAdd * qIdx + qp->m_DistAdd) >> qp->m_DistShift;
pq_d->absLevel = (++qIdx) >> 1;
}
#define DINIT(l,p) {INT64_MAX>>2,(l),(p)}
static const Decision startDec[8] = { DINIT(-1,-2),DINIT(-1,-2),DINIT(-1,-2),DINIT(-1,-2),DINIT(0,4),DINIT(0,5),DINIT(0,6),DINIT(0,7) };
#undef DINIT
static void xDecide(
depquant_state* const m_skipStates,
depquant_state* const m_prevStates,
depquant_state* const m_startState,
quant_block *qp,
const enum ScanPosType spt,
const coeff_t absCoeff,
const int lastOffset,
Decision* decisions,
bool zeroOut,
coeff_t quanCoeff)
{
memcpy(decisions, startDec, 8 * sizeof(Decision));
if (zeroOut)
{
if (spt == SCAN_EOCSBB)
{
checkRdCostSkipSbbZeroOut(&decisions[0], &m_skipStates[0]);
checkRdCostSkipSbbZeroOut(&decisions[1], &m_skipStates[1]);
checkRdCostSkipSbbZeroOut(&decisions[2], &m_skipStates[2]);
checkRdCostSkipSbbZeroOut(&decisions[3], &m_skipStates[3]);
}
return;
}
PQData pqData[4];
preQuantCoeff(qp, absCoeff, pqData, quanCoeff);
checkRdCosts(&m_prevStates[0], spt, &pqData[0], &pqData[2], &decisions[0], &decisions[2]);
checkRdCosts(&m_prevStates[1], spt, &pqData[0], &pqData[2], &decisions[2], &decisions[0]);
checkRdCosts(&m_prevStates[2], spt, &pqData[3], &pqData[1], &decisions[1], &decisions[3]);
checkRdCosts(&m_prevStates[3], spt, &pqData[3], &pqData[1], &decisions[3], &decisions[1]);
if (spt == SCAN_EOCSBB)
{
checkRdCostSkipSbb(&m_skipStates[0], &decisions[0]);
checkRdCostSkipSbb(&m_skipStates[1], &decisions[1]);
checkRdCostSkipSbb(&m_skipStates[2], &decisions[2]);
checkRdCostSkipSbb(&m_skipStates[3], &decisions[3]);
}
checkRdCostStart(m_startState, lastOffset, &pqData[0], &decisions[0]);
checkRdCostStart(m_startState, lastOffset, &pqData[2], &decisions[2]);
}
unsigned templateAbsCompare(coeff_t sum)
{
int rangeIdx = 0;
if (sum < g_riceT[0])
{
rangeIdx = 0;
}
else if (sum < g_riceT[1])
{
rangeIdx = 1;
}
else if (sum < g_riceT[2])
{
rangeIdx = 2;
}
else if (sum < g_riceT[3])
{
rangeIdx = 3;
}
else
{
rangeIdx = 4;
}
return g_riceShift[rangeIdx];
}
static INLINE void update_common_context(common_context * cc, const ScanInfo *scanInfo, const depquant_state* prevState, depquant_state *currState)
{
uint8_t* sbbFlags = cc->m_currSbbCtx[currState->m_stateId].sbbFlags;
uint8_t* levels = cc->m_currSbbCtx[currState->m_stateId].levels;
size_t setCpSize = cc->m_nbInfo[scanInfo.scanIdx - 1].maxDist * sizeof(uint8_t);
if (prevState && prevState->m_refSbbCtxId >= 0)
{
memcpy(sbbFlags, cc->m_prevSbbCtx[prevState->m_refSbbCtxId].sbbFlags, scanInfo.numSbb * sizeof(uint8_t));
memcpy(levels + scanInfo.scanIdx, cc->m_prevSbbCtx[prevState->m_refSbbCtxId].levels + scanInfo.scanIdx, setCpSize);
}
else
{
memset(sbbFlags, 0, scanInfo.numSbb * sizeof(uint8_t));
memset(levels + scanInfo.scanIdx, 0, setCpSize);
}
sbbFlags[scanInfo.sbbPos] = !!currState->m_numSigSbb;
memcpy(levels + scanInfo.scanIdx, currState->m_absLevelsAndCtxInit, scanInfo.sbbSize * sizeof(uint8_t));
const int sigNSbb = ((scanInfo.nextSbbRight ? sbbFlags[scanInfo.nextSbbRight] : false) || (scanInfo.nextSbbBelow ? sbbFlags[scanInfo.nextSbbBelow] : false) ? 1 : 0);
currState->m_numSigSbb = 0;
if (prevState)
{
currState->m_remRegBins = prevState->m_remRegBins;
}
else
{
int ctxBinSampleRatio = 28; // (scanInfo.chType == COLOR_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
currState->m_remRegBins = (currState->effWidth * currState->effHeight * ctxBinSampleRatio) / 16;
}
currState->m_goRicePar = 0;
currState->m_refSbbCtxId = currState->m_stateId;
currState->m_sbbFracBits[0] = cc->m_sbbFlagBits[sigNSbb][0];
currState->m_sbbFracBits[1] = cc->m_sbbFlagBits[sigNSbb][1];
uint16_t templateCtxInit[16];
const int scanBeg = scanInfo.scanIdx - scanInfo.sbbSize;
const NbInfoOut* nbOut = cc->m_nbInfo + scanBeg;
const uint8_t* absLevels = levels + scanBeg;
for (int id = 0; id < scanInfo.sbbSize; id++, nbOut++)
{
if (nbOut->num)
{
coeff_t sumAbs = 0, sumAbs1 = 0, sumNum = 0;
#define UPDATE(k) {coeff_t t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=MIN(4+(t&1),t); sumNum+=!!t; }
UPDATE(0);
if (nbOut->num > 1)
{
UPDATE(1);
if (nbOut->num > 2)
{
UPDATE(2);
if (nbOut->num > 3)
{
UPDATE(3);
if (nbOut->num > 4)
{
UPDATE(4);
}
}
}
}
#undef UPDATE
templateCtxInit[id] = (uint16_t)(sumNum) + ((uint16_t)(sumAbs1) << 3) + ((uint16_t)MIN(127, sumAbs) << 8);
}
else
{
templateCtxInit[id] = 0;
}
}
memset(currState->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t));
memcpy(currState->m_absLevelsAndCtxInit + 8, templateCtxInit, 16 * sizeof(uint16_t));
}
static INLINE void updateStateEOS(depquant_state * state, const ScanInfo *scanInfo, const depquant_state* prevStates, const depquant_state* skipStates,
const Decision *decision)
{
state->m_rdCost = decision->rdCost;
if (decision->prevId > -2)
{
const depquant_state* prvState = 0;
if (decision->prevId >= 4)
{
prvState = skipStates + (decision->prevId - 4);
state->m_numSigSbb = 0;
memset(state->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t));
}
else if (decision->prevId >= 0)
{
prvState = prevStates + decision->prevId;
state->m_numSigSbb = prvState->m_numSigSbb + !!decision->absLevel;
memcpy(state->m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 16 * sizeof(uint8_t));
}
else
{
state->m_numSigSbb = 1;
memset(state->m_absLevelsAndCtxInit, 0, 16 * sizeof(uint8_t));
}
reinterpret_cast<uint8_t*>(m_absLevelsAndCtxInit)[scanInfo.insidePos] = (uint8_t)MIN(255, decision->absLevel);
update_common_context(state->m_commonCtx, scanInfo, prvState, state);
coeff_t tinit = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos];
coeff_t sumNum = tinit & 7;
coeff_t sumAbs1 = (tinit >> 3) & 31;
coeff_t sumGt1 = sumAbs1 - sumNum;
state->m_sigFracBits = state->m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)];
state->m_coeffFracBits = state->m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)];
}
}
static INLINE void updateState(depquant_state* state, int numIPos, const ScanInfo scanInfo, const depquant_state *prevStates, const Decision *decision, const int baseLevel, const bool extRiceFlag)
{
state->m_rdCost = decision->rdCost;
if (decision->prevId > -2)
{
if (decision->prevId >= 0)
{
const depquant_state* prvState = prevStates + decision->prevId;
state->m_numSigSbb = prvState->m_numSigSbb + !!decision->absLevel;
state->m_refSbbCtxId = prvState->m_refSbbCtxId;
state->m_sbbFracBits[0] = prvState->m_sbbFracBits[0];
state->m_sbbFracBits[1] = prvState->m_sbbFracBits[1];
state->m_remRegBins = prvState->m_remRegBins - 1;
state->m_goRicePar = prvState->m_goRicePar;
if (state->m_remRegBins >= 4)
{
state->m_remRegBins -= (decision->absLevel < 2 ? (unsigned)decision->absLevel : 3);
}
memcpy(state->m_absLevelsAndCtxInit, prvState->m_absLevelsAndCtxInit, 48 * sizeof(uint8_t));
}
else
{
state->m_numSigSbb = 1;
state->m_refSbbCtxId = -1;
int ctxBinSampleRatio = 28; //(scanInfo.chType == CHANNEL_TYPE_LUMA) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
state->m_remRegBins = (state->effWidth * state->effHeight * ctxBinSampleRatio) / 16 - (decision->absLevel < 2 ? (unsigned)decision->absLevel : 3);
memset(state->m_absLevelsAndCtxInit, 0, 48 * sizeof(uint8_t));
}
uint8_t* levels = (uint8_t*)(state->m_absLevelsAndCtxInit);
levels[scanInfo.insidePos] = (uint8_t)MIN(255, decision->absLevel);
if (state->m_remRegBins >= 4)
{
coeff_t tinit = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos];
coeff_t sumAbs1 = (tinit >> 3) & 31;
coeff_t sumNum = tinit & 7;
#define UPDATE(k) {coeff_t t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs1+=MIN(4+(t&1),t); sumNum+=!!t; }
if (numIPos == 1)
{
UPDATE(0);
}
else if (numIPos == 2)
{
UPDATE(0);
UPDATE(1);
}
else if (numIPos == 3)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
}
else if (numIPos == 4)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
UPDATE(3);
}
else if (numIPos == 5)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
UPDATE(3);
UPDATE(4);
}
#undef UPDATE
coeff_t sumGt1 = sumAbs1 - sumNum;
state->m_sigFracBits[0] = state->m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][0];
state->m_sigFracBits[1] = state->m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + MIN((sumAbs1 + 1) >> 1, 3)][1];
memcpy(state->m_coeffFracBits, &state->m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)], sizeof(state->m_coeffFracBits));
coeff_t sumAbs = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8;
#define UPDATE(k) {coeff_t t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; }
if (numIPos == 1)
{
UPDATE(0);
}
else if (numIPos == 2)
{
UPDATE(0);
UPDATE(1);
}
else if (numIPos == 3)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
}
else if (numIPos == 4)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
UPDATE(3);
}
else if (numIPos == 5)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
UPDATE(3);
UPDATE(4);
}
#undef UPDATE
if (extRiceFlag)
{
unsigned currentShift = templateAbsCompare(sumAbs);
sumAbs = sumAbs >> currentShift;
int sumAll = MAX(MIN(31, (int)sumAbs - (int)baseLevel), 0);
state->m_goRicePar = g_goRiceParsCoeff[sumAll];
state->m_goRicePar += currentShift;
}
else
{
int sumAll = MAX(MIN(31, (int)sumAbs - 4 * 5), 0);
state->m_goRicePar = g_goRiceParsCoeff[sumAll];
}
}
else
{
coeff_t sumAbs = state->m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8;
#define UPDATE(k) {coeff_t t=levels[scanInfo.nextNbInfoSbb.inPos[k]]; sumAbs+=t; }
if (numIPos == 1)
{
UPDATE(0);
}
else if (numIPos == 2)
{
UPDATE(0);
UPDATE(1);
}
else if (numIPos == 3)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
}
else if (numIPos == 4)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
UPDATE(3);
}
else if (numIPos == 5)
{
UPDATE(0);
UPDATE(1);
UPDATE(2);
UPDATE(3);
UPDATE(4);
}
#undef UPDATE
if (extRiceFlag)
{
unsigned currentShift = templateAbsCompare(sumAbs);
sumAbs = sumAbs >> currentShift;
sumAbs = MIN(31, sumAbs);
state->m_goRicePar = g_goRiceParsCoeff[sumAbs];
state->m_goRicePar += currentShift;
}
else
{
sumAbs = MIN(31, sumAbs);
state->m_goRicePar = g_goRiceParsCoeff[sumAbs];
}
state->m_goRiceZero = (state->m_stateId < 2 ? 1 : 2) << state->m_goRicePar;
}
}
}
static void xDecideAndUpdate(
const coeff_t absCoeff,
const ScanInfo scanInfo,
bool zeroOut,
coeff_t quantCoeff,
int effWidth,
int effHeight,
bool reverseLast,
Decision* decisions)
{
std::swap(m_prevStates, m_currStates);
xDecide(scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx, effWidth, effHeight, reverseLast), decisions, zeroOut, quantCoeff);
if (scanInfo.scanIdx) {
if (scanInfo.eosbb) {
m_commonCtx.swap();
updateStateEOS(&m_currStates[0], scanInfo, m_prevStates, m_skipStates, &decisions[0]);
updateStateEOS(&m_currStates[1], scanInfo, m_prevStates, m_skipStates, &decisions[1]);
updateStateEOS(&m_currStates[2], scanInfo, m_prevStates, m_skipStates, &decisions[2]);
updateStateEOS(&m_currStates[3], scanInfo, m_prevStates, m_skipStates, &decisions[3]);
memcpy(decisions + 4, decisions, 4 * sizeof(Decision));
} else if (!zeroOut) {
updateState(&m_currStates[0], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
updateState(&m_currStates[1], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
updateState(&m_currStates[2], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
updateState(&m_currStates[3], scanInfo.nextNbInfoSbb.num, scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
}
if (scanInfo.spt == SCAN_SOCSBB) {
std::swap(m_prevStates, m_skipStates);
}
}
}
uint8_t uvg_dep_quant(
const encoder_state_t* const state,
const cu_info_t* const cur_tu,
const cu_loc_t* const cu_loc,
const coeff_t* srcCoeff,
const coeff_t* coeff_out,
coeff_t* coeff_out,
const color_t compID,
enum uvg_tree_type tree_type,
const double lambda,
@ -365,6 +940,7 @@ uint8_t uvg_dep_quant(
quant_block quant_block;
init_quant_block(state, &quant_block, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, -1);
Decision trellis[TR_MAX_WIDTH * TR_MAX_WIDTH][8];
//===== scaling matrix ====
//const int qpDQ = cQP.Qp + 1;
//const int qpPer = qpDQ / 6;
@ -389,14 +965,13 @@ uint8_t uvg_dep_quant(
if (
lfnstIdx > 0 && !is_ts && width >= 4 &&
height >= 4) {
firstTestPos =
((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15;
firstTestPos =((width == 4 && height == 4) || (width == 8 && height == 8)) ? 7 : 15;
}
const int32_t default_quant_coeff = uvg_g_quant_scales[needs_block_size_trafo_scale][qp_scaled % 6];
const coeff_t thres = 4 << q_bits;
for (; firstTestPos >= 0; firstTestPos--) {
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[firstTestPos])) :(thres / (4 * default_quant_coeff));
if (abs(srcCoeff[firstTestPos]) > thresTmp) {
coeff_t thresTmp = (enableScalingLists) ? (thres / (4 * q_coeff[scan[firstTestPos]])) :(thres / (4 * default_quant_coeff));
if (abs(srcCoeff[scan[firstTestPos]]) > thresTmp) {
break;
}
}
@ -414,50 +989,48 @@ uint8_t uvg_dep_quant(
depquant_state start_state;
int effectHeight = MIN(32, effHeight);
int effectWidth = MIN(32, effWidth);
for (int k = 0; k < 12; k++) {
depquant_state_init(&all_state[k], rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]);
all_state[k].effHeight = MIN(32, effHeight);
all_state[k].effWidth = MIN(32, effWidth);
all_state[k].effHeight = effectHeight;
all_state[k].effWidth = effectWidth;
}
depquant_state_init(&start_state, rate_estimator.m_sigFracBits[0][0], rate_estimator.m_gtxFracBits[0]);
start_state.effHeight = MIN(32, effHeight);
start_state.effWidth = MIN(32, effWidth);
start_state.effHeight = effectHeight;
start_state.effWidth = effectWidth;
//===== populate trellis =====
for (int scanIdx = firstTestPos; scanIdx >= 0; scanIdx--) {
const ScanInfo& scanInfo = tuPars.m_scanInfo[scanIdx];
uint32_t scan_pos = scan[scanIdx];
if (enableScalingLists) {
m_quant.initQuantBlock(
tu,
compID,
cQP,
lambda,
quantCoeff[scanInfo.rasterPos]);
init_quant_block(state, &quant_block, cur_tu, log2_tr_width, log2_tr_height, compID, needs_block_size_trafo_scale, q_coeff[scan_pos]);
xDecideAndUpdate(
abs(tCoeff[scanInfo.rasterPos]),
abs(srcCoeff[scan_pos]),
scanInfo,
(zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)),
quantCoeff[scanInfo.rasterPos],
q_coeff[scan_pos],
effectWidth,
effectHeight,
tu.cu->slice->getReverseLastSigCoeffFlag());
false); //tu.cu->slice->getReverseLastSigCoeffFlag());
} else {
xDecideAndUpdate(
abs(tCoeff[scanInfo.rasterPos]),
abs(srcCoeff[scan_pos]),
scanInfo,
(zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)),
default_quant_coeff,
effectWidth,
effectHeight,
tu.cu->slice->getReverseLastSigCoeffFlag());
false); //tu.cu->slice->getReverseLastSigCoeffFlag());
}
}
//===== find best path =====
Decision decision = {std::numeric_limits<int64_t>::max(), -1, -2};
Decision decision = {INT64_MAX, -1, -2};
int64_t minPathCost = 0;
for (int8_t stateId = 0; stateId < 4; stateId++) {
int64_t pathCost = m_trellis[0][stateId].rdCost;
int64_t pathCost = trellis[0][stateId].rdCost;
if (pathCost < minPathCost) {
decision.prevId = stateId;
minPathCost = pathCost;
@ -467,10 +1040,9 @@ uint8_t uvg_dep_quant(
//===== backward scanning =====
int scanIdx = 0;
for (; decision.prevId >= 0; scanIdx++) {
decision = m_trellis[scanIdx][decision.prevId];
int32_t blkpos = tuPars.m_scanId2BlkPos[scanIdx].idx;
q_coeff[blkpos] =
(tCoeff[blkpos] < 0 ? -decision.absLevel : decision.absLevel);
decision = trellis[scanIdx][decision.prevId];
int32_t blkpos = scan[scanIdx];
coeff_out[blkpos] = (srcCoeff[blkpos] < 0 ? -decision.absLevel : decision.absLevel);
absSum += decision.absLevel;
}
}