uvg266/src/rdo.c

/*****************************************************************************
 * This file is part of Kvazaar HEVC encoder.
 *
 * Copyright (C) 2013-2015 Tampere University of Technology and others (see
 * COPYING file).
 *
 * Kvazaar is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the
 * Free Software Foundation; either version 2.1 of the License, or (at your
 * option) any later version.
 *
 * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
 ****************************************************************************/

#include "rdo.h"

#include <stdlib.h>
#include <string.h>

#include "cabac.h"
#include "context.h"
#include "encode_coding_tree.h"
#include "encoder.h"
#include "imagelist.h"
#include "inter.h"
#include "scalinglist.h"
#include "tables.h"
#include "transform.h"

#include "strategies/strategies-quant.h"


#define QUANT_SHIFT          14
#define SCAN_SET_SIZE        16
#define LOG2_SCAN_SET_SIZE    4
#define SBH_THRESHOLD         4

static const double COEFF_COST_QP_FACTOR = 0.044407704;
static const double COEFF_COST_BIAS      = 0.557323653;

const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 };
const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 };

/**
 * Entropy bits to estimate coded bits in RDO / RDOQ (From HM 12.0)
 */
const uint32_t kvz_entropy_bits[128] =
{
  0x08000, 0x08000, 0x076da, 0x089a0, 0x06e92, 0x09340, 0x0670a, 0x09cdf, 0x06029, 0x0a67f, 0x059dd, 0x0b01f, 0x05413, 0x0b9bf, 0x04ebf, 0x0c35f,
  0x049d3, 0x0ccff, 0x04546, 0x0d69e, 0x0410d, 0x0e03e, 0x03d22, 0x0e9de, 0x0397d, 0x0f37e, 0x03619, 0x0fd1e, 0x032ee, 0x106be, 0x02ffa, 0x1105d,
  0x02d37, 0x119fd, 0x02aa2, 0x1239d, 0x02836, 0x12d3d, 0x025f2, 0x136dd, 0x023d1, 0x1407c, 0x021d2, 0x14a1c, 0x01ff2, 0x153bc, 0x01e2f, 0x15d5c,
  0x01c87, 0x166fc, 0x01af7, 0x1709b, 0x0197f, 0x17a3b, 0x0181d, 0x183db, 0x016d0, 0x18d7b, 0x01595, 0x1971b, 0x0146c, 0x1a0bb, 0x01354, 0x1aa5a,
  0x0124c, 0x1b3fa, 0x01153, 0x1bd9a, 0x01067, 0x1c73a, 0x00f89, 0x1d0da, 0x00eb7, 0x1da79, 0x00df0, 0x1e419, 0x00d34, 0x1edb9, 0x00c82, 0x1f759,
  0x00bda, 0x200f9, 0x00b3c, 0x20a99, 0x00aa5, 0x21438, 0x00a17, 0x21dd8, 0x00990, 0x22778, 0x00911, 0x23118, 0x00898, 0x23ab8, 0x00826, 0x24458,
  0x007ba, 0x24df7, 0x00753, 0x25797, 0x006f2, 0x26137, 0x00696, 0x26ad7, 0x0063f, 0x27477, 0x005ed, 0x27e17, 0x0059f, 0x287b6, 0x00554, 0x29156,
  0x0050e, 0x29af6, 0x004cc, 0x2a497, 0x0048d, 0x2ae35, 0x00451, 0x2b7d6, 0x00418, 0x2c176, 0x003e2, 0x2cb15, 0x003af, 0x2d4b5, 0x0037f, 0x2de55
};

// Entropy bits scaled so that 50% probability yields 1 bit.
const float kvz_f_entropy_bits[128] =
{
  1.0, 1.0,
  0.92852783203125, 1.0751953125,
  0.86383056640625, 1.150390625,
  0.80499267578125, 1.225555419921875,
  0.751251220703125, 1.300750732421875,
  0.702056884765625, 1.375946044921875,
  0.656829833984375, 1.451141357421875,
  0.615203857421875, 1.526336669921875,
  0.576751708984375, 1.601531982421875,
  0.54119873046875, 1.67669677734375,
  0.508209228515625, 1.75189208984375,
  0.47760009765625, 1.82708740234375,
  0.449127197265625, 1.90228271484375,
  0.422637939453125, 1.97747802734375,
  0.39788818359375, 2.05267333984375,
  0.37481689453125, 2.127838134765625,
  0.353240966796875, 2.203033447265625,
  0.33306884765625, 2.278228759765625,
  0.31414794921875, 2.353424072265625,
  0.29644775390625, 2.428619384765625,
  0.279815673828125, 2.5037841796875,
  0.26422119140625, 2.5789794921875,
  0.24957275390625, 2.6541748046875,
  0.235809326171875, 2.7293701171875,
  0.222869873046875, 2.8045654296875,
  0.210662841796875, 2.879730224609375,
  0.199188232421875, 2.954925537109375,
  0.188385009765625, 3.030120849609375,
  0.17822265625, 3.105316162109375,
  0.168609619140625, 3.180511474609375,
  0.1595458984375, 3.255706787109375,
  0.1510009765625, 3.33087158203125,
  0.1429443359375, 3.40606689453125,
  0.135345458984375, 3.48126220703125,
  0.128143310546875, 3.55645751953125,
  0.121368408203125, 3.63165283203125,
  0.114959716796875, 3.706817626953125,
  0.10888671875, 3.782012939453125,
  0.1031494140625, 3.857208251953125,
  0.09771728515625, 3.932403564453125,
  0.09259033203125, 4.007598876953125,
  0.0877685546875, 4.082794189453125,
  0.083160400390625, 4.157958984375,
  0.078826904296875, 4.233154296875,
  0.07470703125, 4.308349609375,
  0.070831298828125, 4.383544921875,
  0.067138671875, 4.458740234375,
  0.06365966796875, 4.533935546875,
  0.06036376953125, 4.609100341796875,
  0.057220458984375, 4.684295654296875,
  0.05426025390625, 4.759490966796875,
  0.05145263671875, 4.834686279296875,
  0.048797607421875, 4.909881591796875,
  0.046295166015625, 4.985076904296875,
  0.043914794921875, 5.06024169921875,
  0.0416259765625, 5.13543701171875,
  0.03948974609375, 5.21063232421875,
  0.0374755859375, 5.285858154296875,
  0.035552978515625, 5.360992431640625,
  0.033721923828125, 5.43621826171875,
  0.031982421875, 5.51141357421875,
  0.03033447265625, 5.586578369140625,
  0.028778076171875, 5.661773681640625,
  0.027313232421875, 5.736968994140625,
};


// This struct is for passing data to kvz_rdoq_sign_hiding
struct sh_rates_t {
  // Bit cost of increasing rate by one.
  int32_t inc[32 * 32];
  // Bit cost of decreasing rate by one.
  int32_t dec[32 * 32];
  // Bit cost of going from zero to one.
  int32_t sig_coeff_inc[32 * 32];
  // Coeff minus quantized coeff.
  int32_t quant_delta[32 * 32];
};


/**
 * \brief Calculate actual (or really close to actual) bitcost for coding
 * coefficients.
 *
 * \param coeff coefficient array
 * \param width coeff block width
 * \param type data type (0 == luma)
 *
 * \returns bits needed to code input coefficients
 */
static INLINE uint32_t get_coeff_cabac_cost(
    const encoder_state_t * const state,
    const coeff_t *coeff,
    int32_t width,
    int32_t type,
    int8_t scan_mode)
{
  // Make sure there are coeffs present
  bool found = false;
  for (int i = 0; i < width*width; i++) {
    if (coeff[i] != 0) {
      found = 1;
      break;
    }
  }
  if (!found) return 0;

  // Take a copy of the CABAC so that we don't overwrite the contexts when
  // counting the bits.
  cabac_data_t cabac_copy;
  memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy));

  // Clear bytes and bits and set mode to "count"
  cabac_copy.only_count = 1;
  cabac_copy.num_buffered_bytes = 0;
  cabac_copy.bits_left = 23;

  // Execute the coding function.
  // It is safe to drop the const modifier since state won't be modified
  // when cabac.only_count is set.
  kvz_encode_coeff_nxn((encoder_state_t*) state,
                       &cabac_copy,
                       coeff,
                       width,
                       type,
                       scan_mode,
                       0);

  return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
}

/**
 * \brief Estimate bitcost for coding coefficients.
 *
 * \param coeff   coefficient array
 * \param width   coeff block width
 * \param type    data type (0 == luma)
 *
 * \returns       number of bits needed to code coefficients
 */
uint32_t kvz_get_coeff_cost(const encoder_state_t * const state,
                            const coeff_t *coeff,
                            int32_t width,
                            int32_t type,
                            int8_t scan_mode)
{
  if (state->qp >= state->encoder_control->cfg.fast_residual_cost_limit) {
    return get_coeff_cabac_cost(state, coeff, width, type, scan_mode);

  } else {
    // Estimate coeff coding cost based on QP and sum of absolute coeffs.
    const uint32_t sum = kvz_coeff_abs_sum(coeff, width * width);
    return (uint32_t)(sum * (state->qp * COEFF_COST_QP_FACTOR + COEFF_COST_BIAS) + 0.5);
  }
}

#define COEF_REMAIN_BIN_REDUCTION 3
/** Calculates the cost for specific absolute transform level
 * \param abs_level scaled quantized level
 * \param ctx_num_one current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
 * \param ctx_num_abs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
 * \param abs_go_rice Rice parameter for coeff_abs_level_minus3
 * \returns cost of given absolute transform level
 * From HM 12.0
 */
INLINE int32_t kvz_get_ic_rate(encoder_state_t * const state,
                    uint32_t abs_level,
                    uint16_t ctx_num_one,
                    uint16_t ctx_num_abs,
                    uint16_t abs_go_rice,
                    uint32_t c1_idx,
                    uint32_t c2_idx,
                    int8_t type)
{
  cabac_data_t * const cabac = &state->cabac;
  int32_t rate = 1 << CTX_FRAC_BITS; // cost of sign bit
  uint32_t base_level  =  (c1_idx < C1FLAG_NUMBER)? (2 + (c2_idx < C2FLAG_NUMBER)) : 1;
  //cabac_ctx_t *base_one_ctx = (type == 0) ? &(cabac->ctx.cu_one_model_luma[0]) : &(cabac->ctx.cu_one_model_chroma[0]);
  //cabac_ctx_t *base_abs_ctx = (type == 0) ? &(cabac->ctx.cu_abs_model_luma[0]) : &(cabac->ctx.cu_abs_model_chroma[0]);

  if ( abs_level >= base_level ) {
    int32_t symbol     = abs_level - base_level;
    int32_t length;
    if (symbol < (COEF_REMAIN_BIN_REDUCTION << abs_go_rice)) {
      length = symbol>>abs_go_rice;
      rate += (length+1+abs_go_rice) * (1 << CTX_FRAC_BITS);
    } else {
      length = abs_go_rice;
      symbol  = symbol - ( COEF_REMAIN_BIN_REDUCTION << abs_go_rice);
      while (symbol >= (1<<length)) {
        symbol -=  (1<<(length++));
      }
      rate += (COEF_REMAIN_BIN_REDUCTION+length+1-abs_go_rice+length) * (1 << CTX_FRAC_BITS);
    }
    //ToDo: fix for VVC
    /*
    rate += CTX_ENTROPY_BITS(&base_one_ctx[ctx_num_one],0);
    iRate += fracBitsPar.intBits[(uiAbsLevel - 1) & 1];
    iRate += fracBitsGt1.intBits[1];
    iRate += fracBitsGt2.intBits[1];
    */
  }
  else if (abs_level == 1)
  {
    /*
    iRate += fracBitsPar.intBits[0];
    iRate += fracBitsGt1.intBits[0];
    */
  }
  else if (abs_level == 2)
  {
    /*
    iRate += fracBitsPar.intBits[1];
    iRate += fracBitsGt1.intBits[0];
    */
  }
  else if (abs_level == 3)
  {
    /*
    iRate += fracBitsPar.intBits[0];
    iRate += fracBitsGt1.intBits[1];
    iRate += fracBitsGt2.intBits[0];
    */
  }
  else if (abs_level == 4)
  {
    /*
    iRate += fracBitsPar.intBits[1];
    iRate += fracBitsGt1.intBits[1];
    iRate += fracBitsGt2.intBits[0];
    */
  }

  return rate;
}

/** Get the best level in RD sense
 * \param coded_cost reference to coded cost
 * \param coded_cost0 reference to cost when coefficient is 0
 * \param coded_cost_sig reference to cost of significant coefficient
 * \param level_double reference to unscaled quantized level
 * \param max_abs_level scaled quantized level
 * \param ctx_num_sig current ctxInc for coeff_abs_significant_flag
 * \param ctx_num_one current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
 * \param ctx_num_abs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
 * \param abs_go_rice current Rice parameter for coeff_abs_level_minus3
 * \param q_bits quantization step size
 * \param temp correction factor
 * \param last indicates if the coefficient is the last significant
 * \returns best quantized transform level for given scan position
 * This method calculates the best quantized transform level for a given scan position.
 * From HM 12.0
 */
INLINE uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
                           int32_t level_double, uint32_t max_abs_level,
                           uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                           uint16_t abs_go_rice,
                           uint32_t c1_idx, uint32_t c2_idx,
                           int32_t q_bits,double temp, int8_t last, int8_t type)
{
  cabac_data_t * const cabac = &state->cabac;
  double cur_cost_sig   = 0;
  uint32_t best_abs_level = 0;
  int32_t abs_level;
  int32_t min_abs_level;
  cabac_ctx_t* base_sig_model = type?(cabac->ctx.cu_sig_model_chroma[0]):(cabac->ctx.cu_sig_model_luma[0]);

  if( !last && max_abs_level < 3 ) {
    *coded_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
    *coded_cost     = *coded_cost0 + *coded_cost_sig;
    if (max_abs_level == 0) return best_abs_level;
  } else {
    *coded_cost = MAX_DOUBLE;
  }

  if( !last ) {
    cur_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
  }

  min_abs_level    = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
  for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
    double err       = (double)(level_double - ( abs_level * (1 << q_bits) ) );
    double cur_cost  = err * err * temp + state->lambda *
                       kvz_get_ic_rate( state, abs_level, ctx_num_one, ctx_num_abs,
                                    abs_go_rice, c1_idx, c2_idx, type);
    cur_cost        += cur_cost_sig;

    if( cur_cost < *coded_cost ) {
      best_abs_level  = abs_level;
      *coded_cost     = cur_cost;
      *coded_cost_sig = cur_cost_sig;
    }
  }

  return best_abs_level;
}


/** Calculates the cost of signaling the last significant coefficient in the block
 * \param pos_x X coordinate of the last significant coefficient
 * \param pos_y Y coordinate of the last significant coefficient
 * \returns cost of last significant coefficient
 * \param uiWidth width of the transform unit (TU)
 *
 * From HM 12.0
*/
static double get_rate_last(const encoder_state_t * const state,
                            const uint32_t  pos_x, const uint32_t pos_y,
                            int32_t* last_x_bits, int32_t* last_y_bits)
{
  uint32_t ctx_x   = g_group_idx[pos_x];
  uint32_t ctx_y   = g_group_idx[pos_y];
  double uiCost = last_x_bits[ ctx_x ] + last_y_bits[ ctx_y ];
  if( ctx_x > 3 ) {
    uiCost += CTX_FRAC_ONE_BIT * ((ctx_x - 2) >> 1);
  }
  if( ctx_y > 3 ) {
    uiCost += CTX_FRAC_ONE_BIT * ((ctx_y - 2) >> 1);
  }
  return state->lambda * uiCost;
}

static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t height, int8_t type,
                           int32_t* last_x_bits, int32_t* last_y_bits)
{
  cabac_data_t * const cabac = &state->cabac;
  int32_t bits_x = 0, bits_y = 0;
  int32_t blk_size_offset_x, blk_size_offset_y, shiftX, shiftY;
  int32_t ctx;

  cabac_ctx_t *base_ctx_x = (type ? cabac->ctx.cu_ctx_last_x_chroma : cabac->ctx.cu_ctx_last_x_luma);
  cabac_ctx_t *base_ctx_y = (type ? cabac->ctx.cu_ctx_last_y_chroma : cabac->ctx.cu_ctx_last_y_luma);

  blk_size_offset_x = type ? 0: (kvz_g_convert_to_bit[ width ] *3 + ((kvz_g_convert_to_bit[ width ] +1)>>2));
  blk_size_offset_y = type ? 0: (kvz_g_convert_to_bit[ height ]*3 + ((kvz_g_convert_to_bit[ height ]+1)>>2));
  shiftX = type ? kvz_g_convert_to_bit[ width  ] :((kvz_g_convert_to_bit[ width  ]+3)>>2);
  shiftY = type ? kvz_g_convert_to_bit[ height ] :((kvz_g_convert_to_bit[ height ]+3)>>2);


  for (ctx = 0; ctx < g_group_idx[ width - 1 ]; ctx++) {
    int32_t ctx_offset = blk_size_offset_x + (ctx >>shiftX);
    last_x_bits[ ctx ] = bits_x + CTX_ENTROPY_BITS(&base_ctx_x[ ctx_offset ],0);
    bits_x += CTX_ENTROPY_BITS(&base_ctx_x[ ctx_offset ],1);
  }
  last_x_bits[ctx] = bits_x;
  for (ctx = 0; ctx < g_group_idx[ height - 1 ]; ctx++) {
    int32_t ctx_offset = blk_size_offset_y + (ctx >>shiftY);
    last_y_bits[ ctx ] = bits_y + CTX_ENTROPY_BITS(&base_ctx_y[ ctx_offset ],0);
    bits_y +=  CTX_ENTROPY_BITS(&base_ctx_y[ ctx_offset ],1);
  }
  last_y_bits[ctx] = bits_y;
}

/**
 * \brief Select which coefficient to change for sign hiding, and change it.
 *
 * When sign hiding is enabled, the last sign bit of the last coefficient is
 * calculated from the parity of the other coefficients. If the parity is not
 * correct, one coefficient has to be changed by one. This function uses
 * tables generated during RDOQ to select the best coefficient to change.
 */
void kvz_rdoq_sign_hiding(
    const encoder_state_t *const state,
    const int32_t qp_scaled,
    const uint32_t *const scan2raster,
    const struct sh_rates_t *const sh_rates,
    const int32_t last_pos,
    const coeff_t *const coeffs,
    coeff_t *const quant_coeffs)
{
  const encoder_control_t * const ctrl = state->encoder_control;

  int inv_quant = kvz_g_inv_quant_scales[qp_scaled % 6];
  // This somehow scales quant_delta into fractional bits. Instead of the bits
  // being multiplied by lambda, the residual is divided by it, or something
  // like that.
  const int64_t rd_factor = (inv_quant * inv_quant * (1 << (2 * (qp_scaled / 6)))
                      / state->lambda / 16 / (1 << (2 * (ctrl->bitdepth - 8))) + 0.5);
  const int last_cg = (last_pos - 1) >> LOG2_SCAN_SET_SIZE;

  for (int32_t cg_scan = last_cg; cg_scan >= 0; cg_scan--) {
    const int32_t cg_coeff_scan = cg_scan << LOG2_SCAN_SET_SIZE;
    
    // Find positions of first and last non-zero coefficients in the CG.
    int32_t last_nz_scan = -1;
    for (int32_t coeff_i = SCAN_SET_SIZE - 1; coeff_i >= 0; --coeff_i) {
      if (quant_coeffs[scan2raster[coeff_i + cg_coeff_scan]]) {
        last_nz_scan = coeff_i;
        break;
      }
    }
    int32_t first_nz_scan = SCAN_SET_SIZE;
    for (int32_t coeff_i = 0; coeff_i <= last_nz_scan; coeff_i++) {
      if (quant_coeffs[scan2raster[coeff_i + cg_coeff_scan]]) {
        first_nz_scan = coeff_i;
        break;
      }
    }

    if (last_nz_scan - first_nz_scan < SBH_THRESHOLD) {
      continue;
    }

    const int32_t signbit = quant_coeffs[scan2raster[cg_coeff_scan + first_nz_scan]] <= 0;
    unsigned abs_coeff_sum = 0;
    for (int32_t coeff_scan = first_nz_scan; coeff_scan <= last_nz_scan; coeff_scan++) {
      abs_coeff_sum += quant_coeffs[scan2raster[coeff_scan + cg_coeff_scan]];
    }
    if (signbit == (abs_coeff_sum & 0x1)) {
      // Sign already matches with the parity, no need to modify coefficients.
      continue;
    }

    // Otherwise, search for the best coeff to change by one and change it.

    struct {
      int64_t cost;
      int pos;
      int change;
    } current, best = { MAX_INT64, 0, 0 };

    const int last_coeff_scan = (cg_scan == last_cg ? last_nz_scan : SCAN_SET_SIZE - 1);
    for (int coeff_scan = last_coeff_scan; coeff_scan >= 0; --coeff_scan) {
      current.pos = scan2raster[coeff_scan + cg_coeff_scan];
      // Shift the calculation back into original precision to avoid
      // changing the bitstream.
#     define PRECISION_INC (15 - CTX_FRAC_BITS)
      int64_t quant_cost_in_bits = rd_factor * sh_rates->quant_delta[current.pos];

      coeff_t abs_coeff = abs(quant_coeffs[current.pos]);

      if (abs_coeff != 0) {
        // Choose between incrementing and decrementing a non-zero coeff.

        int64_t inc_bits = sh_rates->inc[current.pos];
        int64_t dec_bits = sh_rates->dec[current.pos];
        if (abs_coeff == 1) {
          // We save sign bit and sig_coeff goes to zero.
          dec_bits -= CTX_FRAC_ONE_BIT + sh_rates->sig_coeff_inc[current.pos];
        }
        if (cg_scan == last_cg && last_nz_scan == coeff_scan && abs_coeff == 1) {
          // Changing the last non-zero bit in the last cg to zero.
          // This might save a lot of bits if the next bits are already
          // zeros, or just a coupple fractional bits if they are not.
          // TODO: Check if calculating the real savings makes sense.
          dec_bits -= 4 * CTX_FRAC_ONE_BIT;
        }

        inc_bits = -quant_cost_in_bits + inc_bits * (1 << PRECISION_INC);
        dec_bits = quant_cost_in_bits + dec_bits * (1 << PRECISION_INC);

        if (inc_bits < dec_bits) {
          current.change = 1;
          current.cost = inc_bits;
        } else {
          current.change = -1;
          current.cost = dec_bits;

          if (coeff_scan == first_nz_scan && abs_coeff == 1) {
            // Don't turn first non-zero coeff into zero.
            // Seems kind of arbitrary. It's probably because it could lead to
            // breaking SBH_THRESHOLD.
            current.cost = MAX_INT64;
          }
        }
      } else {
        // Try incrementing a zero coeff.

        // Add sign bit, other bits and sig_coeff goes to one.
        int bits = CTX_FRAC_ONE_BIT + sh_rates->inc[current.pos] + sh_rates->sig_coeff_inc[current.pos];
        current.cost = -llabs(quant_cost_in_bits) + bits * (1 << PRECISION_INC);
        current.change = 1;

        if (coeff_scan < first_nz_scan) {
          if (((coeffs[current.pos] >= 0) ? 0 : 1) != signbit) {
            current.cost = MAX_INT64;
          }
        }
      }

      if (current.cost < best.cost) {
        best = current;
      }
    }

    if (quant_coeffs[best.pos] == 32767 || quant_coeffs[best.pos] == -32768) {
      best.change = -1;
    }

    if (coeffs[best.pos] >= 0) {
      quant_coeffs[best.pos] += best.change;
    } else {
      quant_coeffs[best.pos] -= best.change;
    }
  }
}


/** RDOQ with CABAC
 * \returns void
 * Rate distortion optimized quantization for entropy
 * coding engines using probability models like CABAC
 * From HM 12.0
 */
void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, int32_t width,
           int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth)
{
  const encoder_control_t * const encoder = state->encoder_control;
  cabac_data_t * const cabac = &state->cabac;
  uint32_t log2_tr_size      = kvz_g_convert_to_bit[ width ] + 2;
  int32_t  transform_shift   = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size;  // Represents scaling through forward transform
  uint16_t go_rice_param     = 0;
  uint32_t log2_block_size   = kvz_g_convert_to_bit[ width ] + 2;
  int32_t  scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);

  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
  
  int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;

  const int32_t *quant_coeff  = encoder->scaling_list.quant_coeff[log2_tr_size-2][scalinglist_type][qp_scaled%6];
  const double *err_scale     = encoder->scaling_list.error_scale[log2_tr_size-2][scalinglist_type][qp_scaled%6];

  double block_uncoded_cost = 0;
  
  double cost_coeff [ 32 * 32 ];
  double cost_sig   [ 32 * 32 ];
  double cost_coeff0[ 32 * 32 ];

  struct sh_rates_t sh_rates;

  const uint32_t *scan_cg = g_sig_last_scan_cg[log2_block_size - 2][scan_mode];
  const uint32_t cg_size = 16;
  const int32_t  shift = 4 >> 1;
  const uint32_t num_blk_side = width >> shift;
  double   cost_coeffgroup_sig[ 64 ];
  uint32_t sig_coeffgroup_flag[ 64 ];

  uint16_t    ctx_set    = 0;
  int16_t     c1         = 1;
  int16_t     c2         = 0;
  double      base_cost  = 0;

  uint32_t    c1_idx     = 0;
  uint32_t    c2_idx     = 0;
  int32_t     base_level;

  const uint32_t *scan = kvz_g_sig_last_scan[ scan_mode ][ log2_block_size - 1 ];

  int32_t cg_last_scanpos = -1;
  int32_t last_scanpos = -1;

  uint32_t cg_num = width * height >> 4;

  // Explicitly tell the only possible numbers of elements to be zeroed.
  // Hope the compiler is able to utilize this information.
  switch (cg_num) {
    case  1: FILL_ARRAY(sig_coeffgroup_flag, 0,  1); break;
    case  4: FILL_ARRAY(sig_coeffgroup_flag, 0,  4); break;
    case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break;
    case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break;
    default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups");
  }

  cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]);
  cabac_ctx_t *baseCtx              = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0][0]) : &(cabac->ctx.cu_sig_model_chroma[0][0]);

  struct {
    double coded_level_and_dist;
    double uncoded_dist;
    double sig_cost;
    double sig_cost_0;
    int32_t nnz_before_pos0;
  } rd_stats;

  //Find last cg and last scanpos
  int32_t cg_scanpos;
  for (cg_scanpos = (cg_num - 1); cg_scanpos >= 0; cg_scanpos--)
  {
    for (int32_t scanpos_in_cg = (cg_size - 1); scanpos_in_cg >= 0; scanpos_in_cg--)
    {
      int32_t  scanpos        = cg_scanpos*cg_size + scanpos_in_cg;
      uint32_t blkpos         = scan[scanpos];
      int32_t q               = quant_coeff[blkpos];
      int32_t level_double    = coef[blkpos];
      level_double            = MIN(abs(level_double) * q, MAX_INT - (1 << (q_bits - 1)));
      uint32_t max_abs_level  = (level_double + (1 << (q_bits - 1))) >> q_bits;

      if (max_abs_level > 0) {
        last_scanpos    = scanpos;
        ctx_set         = (scanpos > 0 && type == 0) ? 2 : 0;
        cg_last_scanpos = cg_scanpos;
        sh_rates.sig_coeff_inc[blkpos] = 0;
        break;
      }
      dest_coeff[blkpos] = 0;
    }
    if (last_scanpos != -1) break;
  }

  if (last_scanpos == -1) {
    return;
  }

  for (; cg_scanpos >= 0; cg_scanpos--) cost_coeffgroup_sig[cg_scanpos] = 0;

  int32_t last_x_bits[32], last_y_bits[32];
  calc_last_bits(state, width, height, type, last_x_bits, last_y_bits);

  for (int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
    uint32_t cg_blkpos  = scan_cg[cg_scanpos];
    uint32_t cg_pos_y   = cg_blkpos / num_blk_side;
    uint32_t cg_pos_x   = cg_blkpos - (cg_pos_y * num_blk_side);

    int32_t pattern_sig_ctx = kvz_context_calc_pattern_sig_ctx(sig_coeffgroup_flag,
                                                           cg_pos_x, cg_pos_y, width);

    FILL(rd_stats, 0);
    for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--)  {
      int32_t  scanpos = cg_scanpos*cg_size + scanpos_in_cg;
      if (scanpos > last_scanpos) continue;
      uint32_t blkpos         = scan[scanpos];
      int32_t q               = quant_coeff[blkpos];
      double temp             = err_scale[blkpos];
      int32_t level_double    = coef[blkpos];
      level_double            = MIN(abs(level_double) * q , MAX_INT - (1 << (q_bits - 1)));
      uint32_t max_abs_level  = (level_double + (1 << (q_bits - 1))) >> q_bits;

      double err              = (double)level_double;
      cost_coeff0[scanpos]    = err * err * temp; 
      block_uncoded_cost      += cost_coeff0[ scanpos ];
      //===== coefficient level estimation =====
      int32_t  level;
      uint16_t  one_ctx = 4 * ctx_set + c1;
      uint16_t  abs_ctx = ctx_set + c2;

      if( scanpos == last_scanpos ) {
        level            = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
                                             level_double, max_abs_level, 0, one_ctx, abs_ctx, go_rice_param,
                                             c1_idx, c2_idx, q_bits, temp, 1, type );
      } else {
        uint32_t  pos_y    = blkpos >> log2_block_size;
        uint32_t  pos_x    = blkpos - ( pos_y << log2_block_size );
        uint16_t  ctx_sig  = (uint16_t)kvz_context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y,
                                                     log2_block_size, type);
        level              = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
                                             level_double, max_abs_level, ctx_sig, one_ctx, abs_ctx, go_rice_param,
                                             c1_idx, c2_idx, q_bits, temp, 0, type );
        if (encoder->cfg.signhide_enable) {
          int greater_than_zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 1);
          int zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 0);
          sh_rates.sig_coeff_inc[blkpos] = greater_than_zero - zero;
        }
      }
      /*
      if (encoder->cfg.signhide_enable) {
        sh_rates.quant_delta[blkpos] = (level_double - level * (1 << q_bits)) >> (q_bits - 8);
        if (level > 0) {
          int32_t rate_now  = kvz_get_ic_rate(state, level, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type);
          int32_t rate_up   = kvz_get_ic_rate(state, level + 1, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type);
          int32_t rate_down = kvz_get_ic_rate(state, level - 1, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type);
          sh_rates.inc[blkpos] = rate_up - rate_now;
          sh_rates.dec[blkpos] = rate_down - rate_now;
        } else { // level == 0
          sh_rates.inc[blkpos]   = CTX_ENTROPY_BITS(&base_one_ctx[one_ctx], 0);
        }
      }*/
      dest_coeff[blkpos] = (coeff_t)level;
      base_cost         += cost_coeff[scanpos];

      base_level = (c1_idx < C1FLAG_NUMBER) ? (2 + (c2_idx < C2FLAG_NUMBER)) : 1;
      if (level >= base_level) {
        if(level  > 3*(1<<go_rice_param)) {
          go_rice_param = MIN(go_rice_param + 1, 4);
        }
      }
      if (level >= 1) c1_idx ++;

      //===== update bin model =====
      if (level > 1) {
        c1 = 0;
        c2 += (c2 < 2);
        c2_idx ++;
      } else if( (c1 < 3) && (c1 > 0) && level) {
        c1++;
      }

      //===== context set update =====
      if ((scanpos % SCAN_SET_SIZE == 0) && scanpos > 0) {
        c2                = 0;
        go_rice_param     = 0;

        c1_idx   = 0;
        c2_idx   = 0;
        ctx_set = (scanpos == SCAN_SET_SIZE || type != 0) ? 0 : 2;
        if( c1 == 0 ) {
          ctx_set++;
        }
        c1 = 1;
      }

      rd_stats.sig_cost += cost_sig[scanpos];
      if ( scanpos_in_cg == 0 ) {
        rd_stats.sig_cost_0 = cost_sig[scanpos];
      }
      if ( dest_coeff[blkpos] )  {
        sig_coeffgroup_flag[cg_blkpos] = 1;
        rd_stats.coded_level_and_dist   += cost_coeff[scanpos] - cost_sig[scanpos];
        rd_stats.uncoded_dist           += cost_coeff0[scanpos];
        if ( scanpos_in_cg != 0 ) {
          rd_stats.nnz_before_pos0++;
        }
      }
    } //end for (scanpos_in_cg)

    if( cg_scanpos ) {
      if (sig_coeffgroup_flag[cg_blkpos] == 0) {
        uint32_t ctx_sig  = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
                                                        cg_pos_y, width);
        cost_coeffgroup_sig[cg_scanpos] = state->lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
        base_cost += cost_coeffgroup_sig[cg_scanpos]  - rd_stats.sig_cost;
      } else {
        if (cg_scanpos < cg_last_scanpos){
          double cost_zero_cg;
          uint32_t ctx_sig;
          if (rd_stats.nnz_before_pos0 == 0) {
            base_cost -= rd_stats.sig_cost_0;
            rd_stats.sig_cost -= rd_stats.sig_cost_0;
          }
          // rd-cost if SigCoeffGroupFlag = 0, initialization
          cost_zero_cg = base_cost;

          // add SigCoeffGroupFlag cost to total cost
          ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
            cg_pos_y, width);

          cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
          base_cost += cost_coeffgroup_sig[cg_scanpos];
          cost_zero_cg += state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);

          // try to convert the current coeff group from non-zero to all-zero
          cost_zero_cg += rd_stats.uncoded_dist;          // distortion for resetting non-zero levels to zero levels
          cost_zero_cg -= rd_stats.coded_level_and_dist;  // distortion and level cost for keeping all non-zero levels
          cost_zero_cg -= rd_stats.sig_cost;              // sig cost for all coeffs, including zero levels and non-zerl levels

          // if we can save cost, change this block to all-zero block
          if (cost_zero_cg < base_cost) {

            sig_coeffgroup_flag[cg_blkpos] = 0;
            base_cost = cost_zero_cg;

            cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);

            // reset coeffs to 0 in this block
            for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
              int32_t  scanpos = cg_scanpos*cg_size + scanpos_in_cg;
              uint32_t blkpos = scan[scanpos];
              if (dest_coeff[blkpos]){
                dest_coeff[blkpos] = 0;
                cost_coeff[scanpos] = cost_coeff0[scanpos];
                cost_sig[scanpos] = 0;
              }
            }
          } // end if ( cost_all_zeros < base_cost )
        }
      } // end if if (sig_coeffgroup_flag[ cg_blkpos ] == 0)
    } else {
      sig_coeffgroup_flag[cg_blkpos] = 1;
    }
  } //end for (cg_scanpos)

  //===== estimate last position =====
  double  best_cost        = 0;
  int32_t ctx_cbf          = 0;
  int8_t found_last        = 0;
  int32_t best_last_idx_p1 = 0;

  if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
    best_cost  = block_uncoded_cost +   state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
    base_cost +=   state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
  } else {
    // ToDo: update for VVC contexts
    cabac_ctx_t* base_cbf_model = type?(cabac->ctx.qt_cbf_model_cb):(cabac->ctx.qt_cbf_model_luma);
    ctx_cbf    = ( type ? tr_depth : !tr_depth);
    best_cost  = block_uncoded_cost +  state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
    base_cost +=   state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
  }

  for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
    uint32_t cg_blkpos = scan_cg[cg_scanpos];
    base_cost -= cost_coeffgroup_sig[cg_scanpos];

    if (sig_coeffgroup_flag[ cg_blkpos ]) {
      for ( int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
        int32_t  scanpos = cg_scanpos*cg_size + scanpos_in_cg;
        if (scanpos > last_scanpos) continue;
        uint32_t blkpos  = scan[scanpos];

        if( dest_coeff[ blkpos ] ) {
          uint32_t   pos_y = blkpos >> log2_block_size;
          uint32_t   pos_x = blkpos - ( pos_y << log2_block_size );

          double cost_last = /**(scan_mode == SCAN_VER) ? get_rate_last(state, pos_y, pos_x,last_x_bits,last_y_bits) : **/get_rate_last(state, pos_x, pos_y, last_x_bits,last_y_bits );
          double totalCost = base_cost + cost_last - cost_sig[ scanpos ];

          if( totalCost < best_cost ) {
            best_last_idx_p1 = scanpos + 1;
            best_cost        = totalCost;
          }
          if( dest_coeff[ blkpos ] > 1 ) {
            found_last = 1;
            break;
          }
          base_cost -= cost_coeff[scanpos];
          base_cost += cost_coeff0[scanpos];
        } else {
          base_cost -= cost_sig[scanpos];
        }
      } //end for
      if (found_last) break;
    } // end if (sig_coeffgroup_flag[ cg_blkpos ])
  } // end for

  uint32_t abs_sum = 0;
  for ( int32_t scanpos = 0; scanpos < best_last_idx_p1; scanpos++) {
    int32_t blkPos     = scan[scanpos];
    int32_t level      = dest_coeff[blkPos];
    abs_sum            += level;
    dest_coeff[blkPos] = (coeff_t)(( coef[blkPos] < 0 ) ? -level : level);
  }
  //===== clean uncoded coefficients =====
  for ( int32_t scanpos = best_last_idx_p1; scanpos <= last_scanpos; scanpos++) {
    dest_coeff[scan[scanpos]] = 0;
  }

  if (encoder->cfg.signhide_enable && abs_sum >= 2) {
    kvz_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff);
  }
}

/**
 * Calculate cost of actual motion vectors using CABAC coding
 */
uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
                                       const cabac_data_t* cabac,
                                       const int32_t mvd_hor,
                                       const int32_t mvd_ver)
{
  cabac_data_t cabac_copy = *cabac;
  cabac_copy.only_count = 1;

  // It is safe to drop const here because cabac->only_count is set.
  kvz_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver);

  uint32_t bitcost =
    ((23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3)) -
    ((23 - cabac->bits_left)     + (cabac->num_buffered_bytes << 3));

  return bitcost;
}

/** MVD cost calculation with CABAC
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding
*/
uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
                                 int x,
                                 int y,
                                 int mv_shift,
                                 int16_t mv_cand[2][2],
                                 inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
                                 int16_t num_cand,
                                 int32_t ref_idx,
                                 uint32_t *bitcost)
{
  cabac_data_t state_cabac_copy;
  cabac_data_t* cabac;
  uint32_t merge_idx;
  vector2d_t mvd = { 0, 0 };
  int8_t merged = 0;
  int8_t cur_mv_cand = 0;

  x *= 1 << mv_shift;
  y *= 1 << mv_shift;

  // Check every candidate to find a match
  for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
    if (merge_cand[merge_idx].dir == 3) continue;
    if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x &&
      merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y &&
      state->frame->ref_LX[merge_cand[merge_idx].dir - 1][
        merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1]
      ] == ref_idx)
    {
      merged = 1;
      break;
    }
  }

  // Store cabac state and contexts
  memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t));

  // Clear bytes and bits and set mode to "count"
  state_cabac_copy.only_count = 1;
  state_cabac_copy.num_buffered_bytes = 0;
  state_cabac_copy.bits_left = 23;

  cabac = &state_cabac_copy;

  if (!merged) {
    vector2d_t mvd1 = {
      x - mv_cand[0][0],
      y - mv_cand[0][1],
    };
    vector2d_t mvd2 = {
      x - mv_cand[1][0],
      y - mv_cand[1][1],
    };
    uint32_t cand1_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
    uint32_t cand2_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);

    // Select candidate 1 if it has lower cost
    if (cand2_cost < cand1_cost) {
      cur_mv_cand = 1;
      mvd = mvd2;
    } else {
      mvd = mvd1;
    }
  }

  cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);

  CABAC_BIN(cabac, merged, "MergeFlag");
  num_cand = MRG_MAX_NUM_CANDS;
  if (merged) {
    if (num_cand > 1) {
      int32_t ui;
      for (ui = 0; ui < num_cand - 1; ui++) {
        int32_t symbol = (ui != merge_idx);
        if (ui == 0) {
          cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
          CABAC_BIN(cabac, symbol, "MergeIndex");
        } else {
          CABAC_BIN_EP(cabac, symbol, "MergeIndex");
        }
        if (symbol == 0) break;
      }
    }
  } else {
    uint32_t ref_list_idx;
    uint32_t j;
    int ref_list[2] = { 0, 0 };
    for (j = 0; j < state->frame->ref->used_size; j++) {
      if (state->frame->ref->pocs[j] < state->frame->poc) {
        ref_list[0]++;
      } else {
        ref_list[1]++;
      }
    }

    //ToDo: bidir mv support
    for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) {
      if (/*cur_cu->inter.mv_dir*/ 1 & (1 << ref_list_idx)) {
        if (ref_list[ref_list_idx] > 1) {
          // parseRefFrmIdx
          int32_t ref_frame = ref_idx;

          cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
          CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX");

          if (ref_frame > 0) {
            int32_t i;
            int32_t ref_num = ref_list[ref_list_idx] - 2;

            cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
            ref_frame--;

            for (i = 0; i < ref_num; ++i) {
              const uint32_t symbol = (i == ref_frame) ? 0 : 1;

              if (i == 0) {
                CABAC_BIN(cabac, symbol, "ref_idx_lX");
              } else {
                CABAC_BIN_EP(cabac, symbol, "ref_idx_lX");
              }
              if (symbol == 0) break;
            }
          }
        }

        // ToDo: Bidir vector support
        if (!(state->frame->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) {
          // It is safe to drop const here because cabac->only_count is set.
          kvz_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y);
        }

        // Signal which candidate MV to use
        kvz_cabac_write_unary_max_symbol(
            cabac,
            cabac->ctx.mvp_idx_model,
            cur_mv_cand,
            1,
            AMVP_MAX_NUM_CANDS - 1);
      }
    }
  }

  *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);

  // Store bitcost before restoring cabac
  return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
}
-												Changed skip_rdoq name to kvz_skip_unnecessary_rdoq. Changed the order it uses when it goes through CGs and tuned its sum calculation.

											
										
										
											2016-08-18 11:02:56 +00:00
+								/*****************************************************************************
-												Added GPLv2 headers to rdo.c/.h

											
										
										
											2014-01-27 12:39:56 +00:00
+								 * This file is part of Kvazaar HEVC encoder.
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
+								 *
-												Change license to LGPL.

- Everyone who has contributed code to the project has been asked to license
  their contributions under LPGL and they have agreed.

- COPYING file changed to say LGPLv2.1 instead of GPLv2.

- GPL changed to LGPL in the header of every single file that a header and
  header added to the few that were missing one.

- Also.. Happy new year!

											
										
										
											2015-02-23 11:18:48 +00:00
+								 * Copyright (C) 2013-2015 Tampere University of Technology and others (see
-												Added GPLv2 headers to rdo.c/.h

											
										
										
											2014-01-27 12:39:56 +00:00
+								 * COPYING file).
 								 *
-												Change license to LGPL.

- Everyone who has contributed code to the project has been asked to license
  their contributions under LPGL and they have agreed.

- COPYING file changed to say LGPLv2.1 instead of GPLv2.

- GPL changed to LGPL in the header of every single file that a header and
  header added to the few that were missing one.

- Also.. Happy new year!

											
										
										
											2015-02-23 11:18:48 +00:00
+								 * Kvazaar is free software: you can redistribute it and/or modify it under
 								 * the terms of the GNU Lesser General Public License as published by the
 								 * Free Software Foundation; either version 2.1 of the License, or (at your
 								 * option) any later version.
-												Added GPLv2 headers to rdo.c/.h

											
										
										
											2014-01-27 12:39:56 +00:00
+								 *
-												Change license to LGPL.

- Everyone who has contributed code to the project has been asked to license
  their contributions under LPGL and they have agreed.

- COPYING file changed to say LGPLv2.1 instead of GPLv2.

- GPL changed to LGPL in the header of every single file that a header and
  header added to the few that were missing one.

- Also.. Happy new year!

											
										
										
											2015-02-23 11:18:48 +00:00
+								 * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
 								 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 								 * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
 								 * more details.
-												Added GPLv2 headers to rdo.c/.h

											
										
										
											2014-01-27 12:39:56 +00:00
+								 *
-												Change license to LGPL.

- Everyone who has contributed code to the project has been asked to license
  their contributions under LPGL and they have agreed.

- COPYING file changed to say LGPLv2.1 instead of GPLv2.

- GPL changed to LGPL in the header of every single file that a header and
  header added to the few that were missing one.

- Also.. Happy new year!

											
										
										
											2015-02-23 11:18:48 +00:00
+								 * You should have received a copy of the GNU General Public License along
 								 * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
-												Added GPLv2 headers to rdo.c/.h

											
										
										
											2014-01-27 12:39:56 +00:00
+								 ****************************************************************************/
-												Run include-what-you-use fix_includes.py fix_includes.py

The includes should make more sense now and not just happen to compile
due to headers included from other headers.

Used a modified version of IWYU. Modifications were to attribute int8_t
and so on to stdint.h instead of sys/types.h and immintrin.h instead of
more specific headers.

include-what-you-use 0.7 (git:b70df35)
based on clang version 3.9.0 (trunk 264728)

											
										
										
											2016-04-01 14:14:23 +00:00
+								#include "rdo.h"
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								#include <stdlib.h>
 								#include <string.h>
 								#include "cabac.h"
-												Run include-what-you-use fix_includes.py fix_includes.py

The includes should make more sense now and not just happen to compile
due to headers included from other headers.

Used a modified version of IWYU. Modifications were to attribute int8_t
and so on to stdint.h instead of sys/types.h and immintrin.h instead of
more specific headers.

include-what-you-use 0.7 (git:b70df35)
based on clang version 3.9.0 (trunk 264728)

											
										
										
											2016-04-01 14:14:23 +00:00
+								#include "context.h"
-												Extract encoding code from encoderstate.c

Moves functions kvz_encode_coding_tree and kvz_encode_coeff_nxn from
encoderstate.c to encode_coding_tree.c.

											
										
										
											2016-08-09 13:01:51 +00:00
+								#include "encode_coding_tree.h"
-												Run include-what-you-use fix_includes.py fix_includes.py

The includes should make more sense now and not just happen to compile
due to headers included from other headers.

Used a modified version of IWYU. Modifications were to attribute int8_t
and so on to stdint.h instead of sys/types.h and immintrin.h instead of
more specific headers.

include-what-you-use 0.7 (git:b70df35)
based on clang version 3.9.0 (trunk 264728)

											
										
										
											2016-04-01 14:14:23 +00:00
+								#include "encoder.h"
 								#include "imagelist.h"
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								#include "inter.h"
-												Run include-what-you-use fix_includes.py fix_includes.py

The includes should make more sense now and not just happen to compile
due to headers included from other headers.

Used a modified version of IWYU. Modifications were to attribute int8_t
and so on to stdint.h instead of sys/types.h and immintrin.h instead of
more specific headers.

include-what-you-use 0.7 (git:b70df35)
based on clang version 3.9.0 (trunk 264728)

											
										
										
											2016-04-01 14:14:23 +00:00
+								#include "scalinglist.h"
 								#include "tables.h"
 								#include "transform.h"
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												Add sum of absolute coefficients to strategies

											
										
										
											2017-07-27 06:49:17 +00:00
+								#include "strategies/strategies-quant.h"
-												Implemented RDO function to calculate bits used for coefficient coding

											
										
										
											2014-04-04 08:49:31 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								#define QUANT_SHIFT          14
 								#define SCAN_SET_SIZE        16
 								#define LOG2_SCAN_SET_SIZE    4
 								#define SBH_THRESHOLD         4
-												Add an option to skip CABAC for residual costs

Adds command line option --fast-residual-cost=<limit>. When QP is below
the limit, estimates the cost of coding the residual coefficients from
the sum of absolute coefficients. Skipping CABAC is not worth it with
high QPs because there are fewer coefficients so CABAC is not as slow.

											
										
										
											2018-06-19 10:39:23 +00:00
+								static const double COEFF_COST_QP_FACTOR = 0.044407704;
 								static const double COEFF_COST_BIAS      = 0.557323653;
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 };
 								const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 };
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												Fix compiler warnings for VS2010 /W4 in context.c and rdo.c.
- Working towards issue #11.
- Moved all const arrays from .h to the .c. These are not used anywhere else.
- Moved entropy_bits array and its helper macro to rdo.c. They are not used
  anywhere else.
- Implicit conversions to explicit ones.

											
										
										
											2014-02-06 15:57:41 +00:00
+								/**
 								 * Entropy bits to estimate coded bits in RDO / RDOQ (From HM 12.0)
 								 */
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								const uint32_t kvz_entropy_bits[128] =
-												Fix compiler warnings for VS2010 /W4 in context.c and rdo.c.
- Working towards issue #11.
- Moved all const arrays from .h to the .c. These are not used anywhere else.
- Moved entropy_bits array and its helper macro to rdo.c. They are not used
  anywhere else.
- Implicit conversions to explicit ones.

											
										
										
											2014-02-06 15:57:41 +00:00
+								{
 x08000, 0x08000, 0x076da, 0x089a0, 0x06e92, 0x09340, 0x0670a, 0x09cdf, 0x06029, 0x0a67f, 0x059dd, 0x0b01f, 0x05413, 0x0b9bf, 0x04ebf, 0x0c35f,
 x049d3, 0x0ccff, 0x04546, 0x0d69e, 0x0410d, 0x0e03e, 0x03d22, 0x0e9de, 0x0397d, 0x0f37e, 0x03619, 0x0fd1e, 0x032ee, 0x106be, 0x02ffa, 0x1105d,
 x02d37, 0x119fd, 0x02aa2, 0x1239d, 0x02836, 0x12d3d, 0x025f2, 0x136dd, 0x023d1, 0x1407c, 0x021d2, 0x14a1c, 0x01ff2, 0x153bc, 0x01e2f, 0x15d5c,
 x01c87, 0x166fc, 0x01af7, 0x1709b, 0x0197f, 0x17a3b, 0x0181d, 0x183db, 0x016d0, 0x18d7b, 0x01595, 0x1971b, 0x0146c, 0x1a0bb, 0x01354, 0x1aa5a,
 x0124c, 0x1b3fa, 0x01153, 0x1bd9a, 0x01067, 0x1c73a, 0x00f89, 0x1d0da, 0x00eb7, 0x1da79, 0x00df0, 0x1e419, 0x00d34, 0x1edb9, 0x00c82, 0x1f759,
 x00bda, 0x200f9, 0x00b3c, 0x20a99, 0x00aa5, 0x21438, 0x00a17, 0x21dd8, 0x00990, 0x22778, 0x00911, 0x23118, 0x00898, 0x23ab8, 0x00826, 0x24458,
 x007ba, 0x24df7, 0x00753, 0x25797, 0x006f2, 0x26137, 0x00696, 0x26ad7, 0x0063f, 0x27477, 0x005ed, 0x27e17, 0x0059f, 0x287b6, 0x00554, 0x29156,
 x0050e, 0x29af6, 0x004cc, 0x2a497, 0x0048d, 0x2ae35, 0x00451, 0x2b7d6, 0x00418, 0x2c176, 0x003e2, 0x2cb15, 0x003af, 0x2d4b5, 0x0037f, 0x2de55
 								};
-												Add cabac bit costs float table.

											
										
										
											2014-09-10 13:06:19 +00:00
+								// Entropy bits scaled so that 50% probability yields 1 bit.
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								const float kvz_f_entropy_bits[128] =
-												Add cabac bit costs float table.

											
										
										
											2014-09-10 13:06:19 +00:00
+								{
 .0, 1.0,
 .92852783203125, 1.0751953125,
 .86383056640625, 1.150390625,
 .80499267578125, 1.225555419921875,
 .751251220703125, 1.300750732421875,
 .702056884765625, 1.375946044921875,
 .656829833984375, 1.451141357421875,
 .615203857421875, 1.526336669921875,
 .576751708984375, 1.601531982421875,
 .54119873046875, 1.67669677734375,
 .508209228515625, 1.75189208984375,
 .47760009765625, 1.82708740234375,
 .449127197265625, 1.90228271484375,
 .422637939453125, 1.97747802734375,
 .39788818359375, 2.05267333984375,
 .37481689453125, 2.127838134765625,
 .353240966796875, 2.203033447265625,
 .33306884765625, 2.278228759765625,
 .31414794921875, 2.353424072265625,
 .29644775390625, 2.428619384765625,
 .279815673828125, 2.5037841796875,
 .26422119140625, 2.5789794921875,
 .24957275390625, 2.6541748046875,
 .235809326171875, 2.7293701171875,
 .222869873046875, 2.8045654296875,
 .210662841796875, 2.879730224609375,
 .199188232421875, 2.954925537109375,
 .188385009765625, 3.030120849609375,
 .17822265625, 3.105316162109375,
 .168609619140625, 3.180511474609375,
 .1595458984375, 3.255706787109375,
 .1510009765625, 3.33087158203125,
 .1429443359375, 3.40606689453125,
 .135345458984375, 3.48126220703125,
 .128143310546875, 3.55645751953125,
 .121368408203125, 3.63165283203125,
 .114959716796875, 3.706817626953125,
 .10888671875, 3.782012939453125,
 .1031494140625, 3.857208251953125,
 .09771728515625, 3.932403564453125,
 .09259033203125, 4.007598876953125,
 .0877685546875, 4.082794189453125,
 .083160400390625, 4.157958984375,
 .078826904296875, 4.233154296875,
 .07470703125, 4.308349609375,
 .070831298828125, 4.383544921875,
 .067138671875, 4.458740234375,
 .06365966796875, 4.533935546875,
 .06036376953125, 4.609100341796875,
 .057220458984375, 4.684295654296875,
 .05426025390625, 4.759490966796875,
 .05145263671875, 4.834686279296875,
 .048797607421875, 4.909881591796875,
 .046295166015625, 4.985076904296875,
 .043914794921875, 5.06024169921875,
 .0416259765625, 5.13543701171875,
 .03948974609375, 5.21063232421875,
 .0374755859375, 5.285858154296875,
 .035552978515625, 5.360992431640625,
 .033721923828125, 5.43621826171875,
 .031982421875, 5.51141357421875,
 .03033447265625, 5.586578369140625,
 .028778076171875, 5.661773681640625,
 .027313232421875, 5.736968994140625,
 								};
-												Move intra mode search form intra module to search module.

- Make the actual intra prediction function global.

- Move the rdo stuff to rdo module.

											
										
										
											2014-05-19 10:18:06 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								// This struct is for passing data to kvz_rdoq_sign_hiding
 								struct sh_rates_t {
 								  // Bit cost of increasing rate by one.
 								  int32_t inc[32 * 32];
 								  // Bit cost of decreasing rate by one.
 								  int32_t dec[32 * 32];
 								  // Bit cost of going from zero to one.
 								  int32_t sig_coeff_inc[32 * 32];
 								  // Coeff minus quantized coeff.
 								  int32_t quant_delta[32 * 32];
 								};
-												Skip CABAC coefficient cost for --rd=0

											
										
										
											2017-07-26 14:42:45 +00:00
+								/**
 								 * \brief Calculate actual (or really close to actual) bitcost for coding
 								 * coefficients.
 								 *
-												Use RDO in final_cost

											
										
										
											2014-04-04 10:09:02 +00:00
+								 * \param coeff coefficient array
 								 * \param width coeff block width
 								 * \param type data type (0 == luma)
-												Skip CABAC coefficient cost for --rd=0

											
										
										
											2017-07-26 14:42:45 +00:00
+								 *
-												Use RDO in final_cost

											
										
										
											2014-04-04 10:09:02 +00:00
+								 * \returns bits needed to code input coefficients
 								 */
-												Add an option to skip CABAC for residual costs

Adds command line option --fast-residual-cost=<limit>. When QP is below
the limit, estimates the cost of coding the residual coefficients from
the sum of absolute coefficients. Skipping CABAC is not worth it with
high QPs because there are fewer coefficients so CABAC is not as slow.

											
										
										
											2018-06-19 10:39:23 +00:00
+								static INLINE uint32_t get_coeff_cabac_cost(
-												Skip CABAC coefficient cost for --rd=0

											
										
										
											2017-07-26 14:42:45 +00:00
+								    const encoder_state_t * const state,
 								    const coeff_t *coeff,
 								    int32_t width,
 								    int32_t type,
 								    int8_t scan_mode)
-												Implemented RDO function to calculate bits used for coefficient coding

											
										
										
											2014-04-04 08:49:31 +00:00
+								{
-												Clean up get_coeff_cost()

Since contexts were moved to cabac struct, there's no need to store contexts one by one

											
										
										
											2014-04-07 08:08:51 +00:00
+								  // Make sure there are coeffs present
-												Reduce copying in kvz_get_coeff_cost

Changes function kvz_get_coeff_cost to only copy the CABAC contexts and
not the whole encoder state.

Other threads could be simultaneously using the other parts of the
encoder state. Only copying the CABAC fixes a TSan data race warning.

											
										
										
											2017-07-20 12:04:02 +00:00
+								  bool found = false;
 								  for (int i = 0; i < width*width; i++) {
-												Fix bug in search relating to transform with no non-zero coefficients.

- Because cost was calculated even though there were no coefficients, these
  very good modes were less likely to be selected.

- Added assert to encode_coeff_nxn to avoid these problems in the future.

											
										
										
											2014-05-28 12:01:04 +00:00
+								    if (coeff[i] != 0) {
-												Implemented RDO function to calculate bits used for coefficient coding

											
										
										
											2014-04-04 08:49:31 +00:00
+								      found = 1;
 								      break;
 								    }
 								  }
-												Reduce copying in kvz_get_coeff_cost

Changes function kvz_get_coeff_cost to only copy the CABAC contexts and
not the whole encoder state.

Other threads could be simultaneously using the other parts of the
encoder state. Only copying the CABAC fixes a TSan data race warning.

											
										
										
											2017-07-20 12:04:02 +00:00
+								  if (!found) return 0;
-												Implemented RDO function to calculate bits used for coefficient coding

											
										
										
											2014-04-04 08:49:31 +00:00
-												Reduce copying in kvz_get_coeff_cost

Changes function kvz_get_coeff_cost to only copy the CABAC contexts and
not the whole encoder state.

Other threads could be simultaneously using the other parts of the
encoder state. Only copying the CABAC fixes a TSan data race warning.

											
										
										
											2017-07-20 12:04:02 +00:00
+								  // Take a copy of the CABAC so that we don't overwrite the contexts when
 								  // counting the bits.
 								  cabac_data_t cabac_copy;
 								  memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy));
-												Clean up tabs and whitespaces

											
										
										
											2014-04-04 13:04:44 +00:00
-												Use RDO in final_cost

											
										
										
											2014-04-04 10:09:02 +00:00
+								  // Clear bytes and bits and set mode to "count"
-												Reduce copying in kvz_get_coeff_cost

Changes function kvz_get_coeff_cost to only copy the CABAC contexts and
not the whole encoder state.

Other threads could be simultaneously using the other parts of the
encoder state. Only copying the CABAC fixes a TSan data race warning.

											
										
										
											2017-07-20 12:04:02 +00:00
+								  cabac_copy.only_count = 1;
 								  cabac_copy.num_buffered_bytes = 0;
 								  cabac_copy.bits_left = 23;
 								  // Execute the coding function.
 								  // It is safe to drop the const modifier since state won't be modified
 								  // when cabac.only_count is set.
 								  kvz_encode_coeff_nxn((encoder_state_t*) state,
 								                       &cabac_copy,
 								                       coeff,
 								                       width,
 								                       type,
 								                       scan_mode,
 );
 								  return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3);
-												Implemented RDO function to calculate bits used for coefficient coding

											
										
										
											2014-04-04 08:49:31 +00:00
+								}
-												Add an option to skip CABAC for residual costs

Adds command line option --fast-residual-cost=<limit>. When QP is below
the limit, estimates the cost of coding the residual coefficients from
the sum of absolute coefficients. Skipping CABAC is not worth it with
high QPs because there are fewer coefficients so CABAC is not as slow.

											
										
										
											2018-06-19 10:39:23 +00:00
+								/**
 								 * \brief Estimate bitcost for coding coefficients.
 								 *
 								 * \param coeff   coefficient array
 								 * \param width   coeff block width
 								 * \param type    data type (0 == luma)
 								 *
 								 * \returns       number of bits needed to code coefficients
 								 */
 								uint32_t kvz_get_coeff_cost(const encoder_state_t * const state,
 								                            const coeff_t *coeff,
 								                            int32_t width,
 								                            int32_t type,
 								                            int8_t scan_mode)
 								{
 								  if (state->qp >= state->encoder_control->cfg.fast_residual_cost_limit) {
 								    return get_coeff_cabac_cost(state, coeff, width, type, scan_mode);
 								  } else {
 								    // Estimate coeff coding cost based on QP and sum of absolute coeffs.
 								    const uint32_t sum = kvz_coeff_abs_sum(coeff, width * width);
 								    return (uint32_t)(sum * (state->qp * COEFF_COST_QP_FACTOR + COEFF_COST_BIAS) + 0.5);
 								  }
 								}
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								#define COEF_REMAIN_BIN_REDUCTION 3
 								/** Calculates the cost for specific absolute transform level
 								 * \param abs_level scaled quantized level
 								 * \param ctx_num_one current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
 								 * \param ctx_num_abs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
 								 * \param abs_go_rice Rice parameter for coeff_abs_level_minus3
 								 * \returns cost of given absolute transform level
 								 * From HM 12.0
 								 */
-												Added INLINE to kvz_get_ic_rate and kvz_get_coded_level in rdo.c

											
										
										
											2017-05-12 12:03:30 +00:00
+								INLINE int32_t kvz_get_ic_rate(encoder_state_t * const state,
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								                    uint32_t abs_level,
 								                    uint16_t ctx_num_one,
 								                    uint16_t ctx_num_abs,
 								                    uint16_t abs_go_rice,
 								                    uint32_t c1_idx,
 								                    uint32_t c2_idx,
 								                    int8_t type)
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								{
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								  cabac_data_t * const cabac = &state->cabac;
-												Added new ctx models for sig, parity and gtx, removed models for one and abs

											
										
										
											2018-08-29 12:57:40 +00:00
+								  int32_t rate = 1 << CTX_FRAC_BITS; // cost of sign bit
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								  uint32_t base_level  =  (c1_idx < C1FLAG_NUMBER)? (2 + (c2_idx < C2FLAG_NUMBER)) : 1;
-												Added new ctx models for sig, parity and gtx, removed models for one and abs

											
										
										
											2018-08-29 12:57:40 +00:00
+								  //cabac_ctx_t *base_one_ctx = (type == 0) ? &(cabac->ctx.cu_one_model_luma[0]) : &(cabac->ctx.cu_one_model_chroma[0]);
 								  //cabac_ctx_t *base_abs_ctx = (type == 0) ? &(cabac->ctx.cu_abs_model_luma[0]) : &(cabac->ctx.cu_abs_model_chroma[0]);
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
 								  if ( abs_level >= base_level ) {
-												Fix compiler warnings for VS2010 /W3.

- Working towards issue #11.
- Change lambda cost multiplier for intra NxN to from 256 to 4.
- Add 0.5 to the lambda multipliers so it's rounded instead of truncated.

											
										
										
											2014-02-06 08:57:29 +00:00
+								    int32_t symbol     = abs_level - base_level;
 								    int32_t length;
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								    if (symbol < (COEF_REMAIN_BIN_REDUCTION << abs_go_rice)) {
 								      length = symbol>>abs_go_rice;
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								      rate += (length+1+abs_go_rice) * (1 << CTX_FRAC_BITS);
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								    } else {
 								      length = abs_go_rice;
 								      symbol  = symbol - ( COEF_REMAIN_BIN_REDUCTION << abs_go_rice);
 								      while (symbol >= (1<<length)) {
 								        symbol -=  (1<<(length++));
 								      }
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								      rate += (COEF_REMAIN_BIN_REDUCTION+length+1-abs_go_rice+length) * (1 << CTX_FRAC_BITS);
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								    }
-												Added new ctx models for sig, parity and gtx, removed models for one and abs

											
										
										
											2018-08-29 12:57:40 +00:00
+								    //ToDo: fix for VVC
 								    /*
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								    rate += CTX_ENTROPY_BITS(&base_one_ctx[ctx_num_one],0);
-												Added new ctx models for sig, parity and gtx, removed models for one and abs

											
										
										
											2018-08-29 12:57:40 +00:00
+								    iRate += fracBitsPar.intBits[(uiAbsLevel - 1) & 1];
 								    iRate += fracBitsGt1.intBits[1];
 								    iRate += fracBitsGt2.intBits[1];
 								    */
 								  }
 								  else if (abs_level == 1)
 								  {
 								    /*
 								    iRate += fracBitsPar.intBits[0];
 								    iRate += fracBitsGt1.intBits[0];
 								    */
 								  }
 								  else if (abs_level == 2)
 								  {
 								    /*
 								    iRate += fracBitsPar.intBits[1];
 								    iRate += fracBitsGt1.intBits[0];
 								    */
 								  }
 								  else if (abs_level == 3)
 								  {
 								    /*
 								    iRate += fracBitsPar.intBits[0];
 								    iRate += fracBitsGt1.intBits[1];
 								    iRate += fracBitsGt2.intBits[0];
 								    */
 								  }
 								  else if (abs_level == 4)
 								  {
 								    /*
 								    iRate += fracBitsPar.intBits[1];
 								    iRate += fracBitsGt1.intBits[1];
 								    iRate += fracBitsGt2.intBits[0];
 								    */
-												Fix for RDOQ, added missing cost function

											
										
										
											2014-01-28 09:00:17 +00:00
+								  }
 								  return rate;
 								}
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								/** Get the best level in RD sense
 								 * \param coded_cost reference to coded cost
 								 * \param coded_cost0 reference to cost when coefficient is 0
 								 * \param coded_cost_sig reference to cost of significant coefficient
 								 * \param level_double reference to unscaled quantized level
 								 * \param max_abs_level scaled quantized level
 								 * \param ctx_num_sig current ctxInc for coeff_abs_significant_flag
 								 * \param ctx_num_one current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
 								 * \param ctx_num_abs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
 								 * \param abs_go_rice current Rice parameter for coeff_abs_level_minus3
 								 * \param q_bits quantization step size
 								 * \param temp correction factor
 								 * \param last indicates if the coefficient is the last significant
 								 * \returns best quantized transform level for given scan position
 								 * This method calculates the best quantized transform level for a given scan position.
 								 * From HM 12.0
 								 */
-												Added INLINE to kvz_get_ic_rate and kvz_get_coded_level in rdo.c

											
										
										
											2017-05-12 12:03:30 +00:00
+								INLINE uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								                           int32_t level_double, uint32_t max_abs_level,
 								                           uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
 								                           uint16_t abs_go_rice,
 								                           uint32_t c1_idx, uint32_t c2_idx,
 								                           int32_t q_bits,double temp, int8_t last, int8_t type)
 								{
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								  cabac_data_t * const cabac = &state->cabac;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  double cur_cost_sig   = 0;
 								  uint32_t best_abs_level = 0;
 								  int32_t abs_level;
 								  int32_t min_abs_level;
-												Added new ctx models for sig, parity and gtx, removed models for one and abs

											
										
										
											2018-08-29 12:57:40 +00:00
+								  cabac_ctx_t* base_sig_model = type?(cabac->ctx.cu_sig_model_chroma[0]):(cabac->ctx.cu_sig_model_luma[0]);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
 								  if( !last && max_abs_level < 3 ) {
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								    *coded_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								    *coded_cost     = *coded_cost0 + *coded_cost_sig;
 								    if (max_abs_level == 0) return best_abs_level;
 								  } else {
 								    *coded_cost = MAX_DOUBLE;
 								  }
 								  if( !last ) {
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								    cur_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  }
 								  min_abs_level    = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								  for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								    double err       = (double)(level_double - ( abs_level * (1 << q_bits) ) );
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								    double cur_cost  = err * err * temp + state->lambda *
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								                       kvz_get_ic_rate( state, abs_level, ctx_num_one, ctx_num_abs,
-												Removed duplicate function from RDOQ

											
										
										
											2014-08-22 07:59:14 +00:00
+								                                    abs_go_rice, c1_idx, c2_idx, type);
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								    cur_cost        += cur_cost_sig;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
 								    if( cur_cost < *coded_cost ) {
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								      best_abs_level  = abs_level;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								      *coded_cost     = cur_cost;
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								      *coded_cost_sig = cur_cost_sig;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								    }
 								  }
 								  return best_abs_level;
 								}
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
 								/** Calculates the cost of signaling the last significant coefficient in the block
 								 * \param pos_x X coordinate of the last significant coefficient
 								 * \param pos_y Y coordinate of the last significant coefficient
 								 * \returns cost of last significant coefficient
 								 * \param uiWidth width of the transform unit (TU)
 								 *
 								 * From HM 12.0
 								*/
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								static double get_rate_last(const encoder_state_t * const state,
-												rdo: Staticize

											
										
										
											2014-02-21 13:21:14 +00:00
+								                            const uint32_t  pos_x, const uint32_t pos_y,
 								                            int32_t* last_x_bits, int32_t* last_y_bits)
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								{
 								  uint32_t ctx_x   = g_group_idx[pos_x];
 								  uint32_t ctx_y   = g_group_idx[pos_y];
 								  double uiCost = last_x_bits[ ctx_x ] + last_y_bits[ ctx_y ];
 								  if( ctx_x > 3 ) {
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
+								    uiCost += CTX_FRAC_ONE_BIT * ((ctx_x - 2) >> 1);
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								  }
 								  if( ctx_y > 3 ) {
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
+								    uiCost += CTX_FRAC_ONE_BIT * ((ctx_y - 2) >> 1);
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								  }
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								  return state->lambda * uiCost;
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								}
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t height, int8_t type,
-												rdo: Staticize

											
										
										
											2014-02-21 13:21:14 +00:00
+								                           int32_t* last_x_bits, int32_t* last_y_bits)
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
+								{
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								  cabac_data_t * const cabac = &state->cabac;
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
+								  int32_t bits_x = 0, bits_y = 0;
 								  int32_t blk_size_offset_x, blk_size_offset_y, shiftX, shiftY;
 								  int32_t ctx;
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Rename struct cabac_ctx to cabac_ctx_t.

											
										
										
											2015-03-04 11:26:48 +00:00
+								  cabac_ctx_t *base_ctx_x = (type ? cabac->ctx.cu_ctx_last_x_chroma : cabac->ctx.cu_ctx_last_x_luma);
 								  cabac_ctx_t *base_ctx_y = (type ? cabac->ctx.cu_ctx_last_y_chroma : cabac->ctx.cu_ctx_last_y_luma);
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								  blk_size_offset_x = type ? 0: (kvz_g_convert_to_bit[ width ] *3 + ((kvz_g_convert_to_bit[ width ] +1)>>2));
 								  blk_size_offset_y = type ? 0: (kvz_g_convert_to_bit[ height ]*3 + ((kvz_g_convert_to_bit[ height ]+1)>>2));
 								  shiftX = type ? kvz_g_convert_to_bit[ width  ] :((kvz_g_convert_to_bit[ width  ]+3)>>2);
 								  shiftY = type ? kvz_g_convert_to_bit[ height ] :((kvz_g_convert_to_bit[ height ]+3)>>2);
-												Implemented RDOQ function get_rate_last()

											
										
										
											2014-01-22 12:12:46 +00:00
 								  for (ctx = 0; ctx < g_group_idx[ width - 1 ]; ctx++) {
 								    int32_t ctx_offset = blk_size_offset_x + (ctx >>shiftX);
 								    last_x_bits[ ctx ] = bits_x + CTX_ENTROPY_BITS(&base_ctx_x[ ctx_offset ],0);
 								    bits_x += CTX_ENTROPY_BITS(&base_ctx_x[ ctx_offset ],1);
 								  }
 								  last_x_bits[ctx] = bits_x;
 								  for (ctx = 0; ctx < g_group_idx[ height - 1 ]; ctx++) {
 								    int32_t ctx_offset = blk_size_offset_y + (ctx >>shiftY);
 								    last_y_bits[ ctx ] = bits_y + CTX_ENTROPY_BITS(&base_ctx_y[ ctx_offset ],0);
 								    bits_y +=  CTX_ENTROPY_BITS(&base_ctx_y[ ctx_offset ],1);
 								  }
 								  last_y_bits[ctx] = bits_y;
 								}
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
+								/**
 								 * \brief Select which coefficient to change for sign hiding, and change it.
 								 *
 								 * When sign hiding is enabled, the last sign bit of the last coefficient is
 								 * calculated from the parity of the other coefficients. If the parity is not
 								 * correct, one coefficient has to be changed by one. This function uses
 								 * tables generated during RDOQ to select the best coefficient to change.
 								 */
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								void kvz_rdoq_sign_hiding(
 								    const encoder_state_t *const state,
 								    const int32_t qp_scaled,
 								    const uint32_t *const scan2raster,
 								    const struct sh_rates_t *const sh_rates,
 								    const int32_t last_pos,
 								    const coeff_t *const coeffs,
 								    coeff_t *const quant_coeffs)
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								{
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								  const encoder_control_t * const ctrl = state->encoder_control;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
+								  int inv_quant = kvz_g_inv_quant_scales[qp_scaled % 6];
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								  // This somehow scales quant_delta into fractional bits. Instead of the bits
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
+								  // being multiplied by lambda, the residual is divided by it, or something
 								  // like that.
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								  const int64_t rd_factor = (inv_quant * inv_quant * (1 << (2 * (qp_scaled / 6)))
 								                      / state->lambda / 16 / (1 << (2 * (ctrl->bitdepth - 8))) + 0.5);
 								  const int last_cg = (last_pos - 1) >> LOG2_SCAN_SET_SIZE;
 								  for (int32_t cg_scan = last_cg; cg_scan >= 0; cg_scan--) {
 								    const int32_t cg_coeff_scan = cg_scan << LOG2_SCAN_SET_SIZE;
 								    // Find positions of first and last non-zero coefficients in the CG.
 								    int32_t last_nz_scan = -1;
 								    for (int32_t coeff_i = SCAN_SET_SIZE - 1; coeff_i >= 0; --coeff_i) {
 								      if (quant_coeffs[scan2raster[coeff_i + cg_coeff_scan]]) {
 								        last_nz_scan = coeff_i;
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								        break;
 								      }
 								    }
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    int32_t first_nz_scan = SCAN_SET_SIZE;
 								    for (int32_t coeff_i = 0; coeff_i <= last_nz_scan; coeff_i++) {
 								      if (quant_coeffs[scan2raster[coeff_i + cg_coeff_scan]]) {
 								        first_nz_scan = coeff_i;
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								        break;
 								      }
 								    }
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    if (last_nz_scan - first_nz_scan < SBH_THRESHOLD) {
 								      continue;
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								    }
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    const int32_t signbit = quant_coeffs[scan2raster[cg_coeff_scan + first_nz_scan]] <= 0;
 								    unsigned abs_coeff_sum = 0;
 								    for (int32_t coeff_scan = first_nz_scan; coeff_scan <= last_nz_scan; coeff_scan++) {
 								      abs_coeff_sum += quant_coeffs[scan2raster[coeff_scan + cg_coeff_scan]];
 								    }
 								    if (signbit == (abs_coeff_sum & 0x1)) {
 								      // Sign already matches with the parity, no need to modify coefficients.
 								      continue;
 								    }
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    // Otherwise, search for the best coeff to change by one and change it.
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    struct {
 								      int64_t cost;
 								      int pos;
 								      int change;
 								    } current, best = { MAX_INT64, 0, 0 };
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    const int last_coeff_scan = (cg_scan == last_cg ? last_nz_scan : SCAN_SET_SIZE - 1);
 								    for (int coeff_scan = last_coeff_scan; coeff_scan >= 0; --coeff_scan) {
 								      current.pos = scan2raster[coeff_scan + cg_coeff_scan];
 								      // Shift the calculation back into original precision to avoid
 								      // changing the bitstream.
 								#     define PRECISION_INC (15 - CTX_FRAC_BITS)
 								      int64_t quant_cost_in_bits = rd_factor * sh_rates->quant_delta[current.pos];
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								      coeff_t abs_coeff = abs(quant_coeffs[current.pos]);
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								      if (abs_coeff != 0) {
 								        // Choose between incrementing and decrementing a non-zero coeff.
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								        int64_t inc_bits = sh_rates->inc[current.pos];
 								        int64_t dec_bits = sh_rates->dec[current.pos];
 								        if (abs_coeff == 1) {
 								          // We save sign bit and sig_coeff goes to zero.
 								          dec_bits -= CTX_FRAC_ONE_BIT + sh_rates->sig_coeff_inc[current.pos];
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								        }
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								        if (cg_scan == last_cg && last_nz_scan == coeff_scan && abs_coeff == 1) {
 								          // Changing the last non-zero bit in the last cg to zero.
 								          // This might save a lot of bits if the next bits are already
 								          // zeros, or just a coupple fractional bits if they are not.
 								          // TODO: Check if calculating the real savings makes sense.
 								          dec_bits -= 4 * CTX_FRAC_ONE_BIT;
 								        }
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								        inc_bits = -quant_cost_in_bits + inc_bits * (1 << PRECISION_INC);
 								        dec_bits = quant_cost_in_bits + dec_bits * (1 << PRECISION_INC);
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								        if (inc_bits < dec_bits) {
 								          current.change = 1;
 								          current.cost = inc_bits;
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								        } else {
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								          current.change = -1;
 								          current.cost = dec_bits;
 								          if (coeff_scan == first_nz_scan && abs_coeff == 1) {
 								            // Don't turn first non-zero coeff into zero.
 								            // Seems kind of arbitrary. It's probably because it could lead to
 								            // breaking SBH_THRESHOLD.
 								            current.cost = MAX_INT64;
 								          }
 								        }
 								      } else {
 								        // Try incrementing a zero coeff.
 								        // Add sign bit, other bits and sig_coeff goes to one.
 								        int bits = CTX_FRAC_ONE_BIT + sh_rates->inc[current.pos] + sh_rates->sig_coeff_inc[current.pos];
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								        current.cost = -llabs(quant_cost_in_bits) + bits * (1 << PRECISION_INC);
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								        current.change = 1;
 								        if (coeff_scan < first_nz_scan) {
 								          if (((coeffs[current.pos] >= 0) ? 0 : 1) != signbit) {
 								            current.cost = MAX_INT64;
 								          }
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								        }
 								      }
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
 								      if (current.cost < best.cost) {
 								        best = current;
 								      }
 								    }
 								    if (quant_coeffs[best.pos] == 32767 || quant_coeffs[best.pos] == -32768) {
 								      best.change = -1;
 								    }
 								    if (coeffs[best.pos] >= 0) {
 								      quant_coeffs[best.pos] += best.change;
 								    } else {
 								      quant_coeffs[best.pos] -= best.change;
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
+								    }
 								  }
 								}
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								/** RDOQ with CABAC
 								 * \returns void
 								 * Rate distortion optimized quantization for entropy
 								 * coding engines using probability models like CABAC
 								 * From HM 12.0
 								 */
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff, int32_t width,
-												Remove abs_sum from coeff quantization.

- It's meant for checking if there are any coefficients, but we don't use it
  and it's annoying to remember to initialize it and pass it around. The
  benefit should be quite small anyway.

											
										
										
											2014-05-13 12:08:34 +00:00
+								           int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth)
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								{
-												Rename parameter encoder_state to state  in all functions.

- It's so widely used that there isn't really need to emphasize that
  it's the encoders state. Also, it isn't really the encoders state,
  but encoding jobs state.

											
										
										
											2015-03-04 15:00:23 +00:00
+								  const encoder_control_t * const encoder = state->encoder_control;
 								  cabac_data_t * const cabac = &state->cabac;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  uint32_t log2_tr_size      = kvz_g_convert_to_bit[ width ] + 2;
 								  int32_t  transform_shift   = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size;  // Represents scaling through forward transform
 								  uint16_t go_rice_param     = 0;
 								  uint32_t log2_block_size   = kvz_g_convert_to_bit[ width ] + 2;
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
+								  int32_t  scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								  int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
-												Move sign hiding stuff in rdoq to its own function.

- There is some stuff from sign hiding left intermingled with rdoq code,
  but I don't want to change the code too before testing that I didn't
  break anything.

											
										
										
											2015-01-24 17:56:41 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												scalinglist in independent file

											
										
										
											2014-04-16 07:40:42 +00:00
+								  const int32_t *quant_coeff  = encoder->scaling_list.quant_coeff[log2_tr_size-2][scalinglist_type][qp_scaled%6];
 								  const double *err_scale     = encoder->scaling_list.error_scale[log2_tr_size-2][scalinglist_type][qp_scaled%6];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
 								  double block_uncoded_cost = 0;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  double cost_coeff [ 32 * 32 ];
 								  double cost_sig   [ 32 * 32 ];
 								  double cost_coeff0[ 32 * 32 ];
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								  struct sh_rates_t sh_rates;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												Refactor coefficient group scan mapping lists.

The relation between coefficients positions and coefficient group positions
was a big confusing due to the use of 16x16 diagonal coefficient mappings
also as coefficient group mappings.

- Moved all coefficient group mappings to their own const arrays and added
  a new array the select the correct coefficient group mapping. This removes
  special cases for 8x8 and 32x32 transform sizes.

- Removed all coefficient group mapping initialization from init_sig_last_scan.

- Removed 128x128 and 64x64 from regular coefficient group array as those
  transform sizes don't exist anymore in HEVC.

											
										
										
											2014-03-14 13:45:23 +00:00
+								  const uint32_t *scan_cg = g_sig_last_scan_cg[log2_block_size - 2][scan_mode];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  const uint32_t cg_size = 16;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  const int32_t  shift = 4 >> 1;
 								  const uint32_t num_blk_side = width >> shift;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  double   cost_coeffgroup_sig[ 64 ];
 								  uint32_t sig_coeffgroup_flag[ 64 ];
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  uint16_t    ctx_set    = 0;
 								  int16_t     c1         = 1;
 								  int16_t     c2         = 0;
 								  double      base_cost  = 0;
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  uint32_t    c1_idx     = 0;
 								  uint32_t    c2_idx     = 0;
 								  int32_t     base_level;
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								  const uint32_t *scan = kvz_g_sig_last_scan[ scan_mode ][ log2_block_size - 1 ];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  int32_t cg_last_scanpos = -1;
 								  int32_t last_scanpos = -1;
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  uint32_t cg_num = width * height >> 4;
-												Reduce more unnecessary initializations.

											
										
										
											2017-01-30 16:25:59 +00:00
+								  // Explicitly tell the only possible numbers of elements to be zeroed.
 								  // Hope the compiler is able to utilize this information.
 								  switch (cg_num) {
-												Modify and use FILL_ARRAY macro to prevent warning on GCC 7

Following warning was given and is false positive

error: 'memset' used with length equal to number of elements without multiplication by element size [-Werror=memset-elt-size]

											
										
										
											2017-04-11 09:57:22 +00:00
+								    case  1: FILL_ARRAY(sig_coeffgroup_flag, 0,  1); break;
 								    case  4: FILL_ARRAY(sig_coeffgroup_flag, 0,  4); break;
 								    case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break;
 								    case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break;
-												Reduce more unnecessary initializations.

											
										
										
											2017-01-30 16:25:59 +00:00
+								    default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups");
 								  }
-												Rename struct cabac_ctx to cabac_ctx_t.

											
										
										
											2015-03-04 11:26:48 +00:00
+								  cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]);
-												Fix cu_sig_coeff_group_model init and some instances of cu_sig_model usage

											
										
										
											2018-08-30 06:08:08 +00:00
+								  cabac_ctx_t *baseCtx              = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0][0]) : &(cabac->ctx.cu_sig_model_chroma[0][0]);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												Remove struct rd_stats.

											
										
										
											2015-03-04 11:58:24 +00:00
+								  struct {
 								    double coded_level_and_dist;
 								    double uncoded_dist;
 								    double sig_cost;
 								    double sig_cost_0;
 								    int32_t nnz_before_pos0;
 								  } rd_stats;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  //Find last cg and last scanpos
-												Reduce more unnecessary initializations.

											
										
										
											2017-01-30 16:25:59 +00:00
+								  int32_t cg_scanpos;
 								  for (cg_scanpos = (cg_num - 1); cg_scanpos >= 0; cg_scanpos--)
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  {
 								    for (int32_t scanpos_in_cg = (cg_size - 1); scanpos_in_cg >= 0; scanpos_in_cg--)
 								    {
 								      int32_t  scanpos        = cg_scanpos*cg_size + scanpos_in_cg;
 								      uint32_t blkpos         = scan[scanpos];
 								      int32_t q               = quant_coeff[blkpos];
 								      int32_t level_double    = coef[blkpos];
 								      level_double            = MIN(abs(level_double) * q, MAX_INT - (1 << (q_bits - 1)));
 								      uint32_t max_abs_level  = (level_double + (1 << (q_bits - 1))) >> q_bits;
 								      if (max_abs_level > 0) {
 								        last_scanpos    = scanpos;
 								        ctx_set         = (scanpos > 0 && type == 0) ? 2 : 0;
 								        cg_last_scanpos = cg_scanpos;
-												Remove unnecessary memory initialization to zero

Values in interval [last_scanpos, 0] are overwritten in following for loop, except for the sig_coeff_inc value.

											
										
										
											2017-01-30 13:18:02 +00:00
+								        sh_rates.sig_coeff_inc[blkpos] = 0;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        break;
 								      }
 								      dest_coeff[blkpos] = 0;
 								    }
 								    if (last_scanpos != -1) break;
 								  }
 								  if (last_scanpos == -1) {
 								    return;
 								  }
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Reduce more unnecessary initializations.

											
										
										
											2017-01-30 16:25:59 +00:00
+								  for (; cg_scanpos >= 0; cg_scanpos--) cost_coeffgroup_sig[cg_scanpos] = 0;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  int32_t last_x_bits[32], last_y_bits[32];
 								  calc_last_bits(state, width, height, type, last_x_bits, last_y_bits);
 								  for (int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
 								    uint32_t cg_blkpos  = scan_cg[cg_scanpos];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								    uint32_t cg_pos_y   = cg_blkpos / num_blk_side;
 								    uint32_t cg_pos_x   = cg_blkpos - (cg_pos_y * num_blk_side);
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Prefix all non-static symbols with "kvz_".

											
										
										
											2015-08-26 08:50:27 +00:00
+								    int32_t pattern_sig_ctx = kvz_context_calc_pattern_sig_ctx(sig_coeffgroup_flag,
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								                                                           cg_pos_x, cg_pos_y, width);
-												Clean up calls to memset.

- Replaces all calls to memset with new FILL and FILL_ARRAY macros. The use
  of memset was inconsistent and we never use it for anything complicated.

											
										
										
											2015-02-13 09:56:55 +00:00
+								    FILL(rd_stats, 0);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								    for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--)  {
 								      int32_t  scanpos = cg_scanpos*cg_size + scanpos_in_cg;
 								      if (scanpos > last_scanpos) continue;
 								      uint32_t blkpos         = scan[scanpos];
 								      int32_t q               = quant_coeff[blkpos];
 								      double temp             = err_scale[blkpos];
 								      int32_t level_double    = coef[blkpos];
 								      level_double            = MIN(abs(level_double) * q , MAX_INT - (1 << (q_bits - 1)));
 								      uint32_t max_abs_level  = (level_double + (1 << (q_bits - 1))) >> q_bits;
 								      double err              = (double)level_double;
 								      cost_coeff0[scanpos]    = err * err * temp;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								      block_uncoded_cost      += cost_coeff0[ scanpos ];
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      //===== coefficient level estimation =====
 								      int32_t  level;
 								      uint16_t  one_ctx = 4 * ctx_set + c1;
 								      uint16_t  abs_ctx = ctx_set + c2;
 								      if( scanpos == last_scanpos ) {
 								        level            = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
 								                                             level_double, max_abs_level, 0, one_ctx, abs_ctx, go_rice_param,
 								                                             c1_idx, c2_idx, q_bits, temp, 1, type );
 								      } else {
 								        uint32_t  pos_y    = blkpos >> log2_block_size;
 								        uint32_t  pos_x    = blkpos - ( pos_y << log2_block_size );
 								        uint16_t  ctx_sig  = (uint16_t)kvz_context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y,
 								                                                     log2_block_size, type);
 								        level              = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
 								                                             level_double, max_abs_level, ctx_sig, one_ctx, abs_ctx, go_rice_param,
 								                                             c1_idx, c2_idx, q_bits, temp, 0, type );
-												Drop redundant fields in encoder_control_t

Some of the fields in encoder_control_t were simply copies of the
corresponding fields in kvz_config. This commit drops the copied fields
in favor of using the fields in encoder_control_t.cfg directly.

											
										
										
											2017-02-06 11:00:25 +00:00
+								        if (encoder->cfg.signhide_enable) {
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								          int greater_than_zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 1);
 								          int zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 0);
 								          sh_rates.sig_coeff_inc[blkpos] = greater_than_zero - zero;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        }
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      }
-												Commented out sign hiding code, which is not used in VVC

											
										
										
											2018-08-17 06:38:11 +00:00
+								      /*
-												Drop redundant fields in encoder_control_t

Some of the fields in encoder_control_t were simply copies of the
corresponding fields in kvz_config. This commit drops the copied fields
in favor of using the fields in encoder_control_t.cfg directly.

											
										
										
											2017-02-06 11:00:25 +00:00
+								      if (encoder->cfg.signhide_enable) {
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								        sh_rates.quant_delta[blkpos] = (level_double - level * (1 << q_bits)) >> (q_bits - 8);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        if (level > 0) {
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								          int32_t rate_now  = kvz_get_ic_rate(state, level, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type);
 								          int32_t rate_up   = kvz_get_ic_rate(state, level + 1, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type);
 								          int32_t rate_down = kvz_get_ic_rate(state, level - 1, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, type);
 								          sh_rates.inc[blkpos] = rate_up - rate_now;
 								          sh_rates.dec[blkpos] = rate_down - rate_now;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        } else { // level == 0
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								          sh_rates.inc[blkpos]   = CTX_ENTROPY_BITS(&base_one_ctx[one_ctx], 0);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        }
-												Commented out sign hiding code, which is not used in VVC

											
										
										
											2018-08-17 06:38:11 +00:00
+								      }*/
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      dest_coeff[blkpos] = (coeff_t)level;
 								      base_cost         += cost_coeff[scanpos];
 								      base_level = (c1_idx < C1FLAG_NUMBER) ? (2 + (c2_idx < C2FLAG_NUMBER)) : 1;
 								      if (level >= base_level) {
 								        if(level  > 3*(1<<go_rice_param)) {
 								          go_rice_param = MIN(go_rice_param + 1, 4);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        }
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      }
 								      if (level >= 1) c1_idx ++;
 								      //===== update bin model =====
 								      if (level > 1) {
 								        c1 = 0;
 								        c2 += (c2 < 2);
 								        c2_idx ++;
 								      } else if( (c1 < 3) && (c1 > 0) && level) {
 								        c1++;
 								      }
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      //===== context set update =====
 								      if ((scanpos % SCAN_SET_SIZE == 0) && scanpos > 0) {
 								        c2                = 0;
 								        go_rice_param     = 0;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        c1_idx   = 0;
 								        c2_idx   = 0;
 								        ctx_set = (scanpos == SCAN_SET_SIZE || type != 0) ? 0 : 2;
 								        if( c1 == 0 ) {
 								          ctx_set++;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        }
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        c1 = 1;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								      }
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								      rd_stats.sig_cost += cost_sig[scanpos];
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      if ( scanpos_in_cg == 0 ) {
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        rd_stats.sig_cost_0 = cost_sig[scanpos];
 								      }
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      if ( dest_coeff[blkpos] )  {
 								        sig_coeffgroup_flag[cg_blkpos] = 1;
 								        rd_stats.coded_level_and_dist   += cost_coeff[scanpos] - cost_sig[scanpos];
 								        rd_stats.uncoded_dist           += cost_coeff0[scanpos];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        if ( scanpos_in_cg != 0 ) {
 								          rd_stats.nnz_before_pos0++;
 								        }
 								      }
 								    } //end for (scanpos_in_cg)
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								    if( cg_scanpos ) {
 								      if (sig_coeffgroup_flag[cg_blkpos] == 0) {
 								        uint32_t ctx_sig  = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
 								                                                        cg_pos_y, width);
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								        cost_coeffgroup_sig[cg_scanpos] = state->lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        base_cost += cost_coeffgroup_sig[cg_scanpos]  - rd_stats.sig_cost;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								      } else {
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        if (cg_scanpos < cg_last_scanpos){
 								          double cost_zero_cg;
 								          uint32_t ctx_sig;
 								          if (rd_stats.nnz_before_pos0 == 0) {
 								            base_cost -= rd_stats.sig_cost_0;
 								            rd_stats.sig_cost -= rd_stats.sig_cost_0;
 								          }
 								          // rd-cost if SigCoeffGroupFlag = 0, initialization
 								          cost_zero_cg = base_cost;
 								          // add SigCoeffGroupFlag cost to total cost
 								          ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
 								            cg_pos_y, width);
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								          cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								          base_cost += cost_coeffgroup_sig[cg_scanpos];
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								          cost_zero_cg += state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
 								          // try to convert the current coeff group from non-zero to all-zero
 								          cost_zero_cg += rd_stats.uncoded_dist;          // distortion for resetting non-zero levels to zero levels
 								          cost_zero_cg -= rd_stats.coded_level_and_dist;  // distortion and level cost for keeping all non-zero levels
 								          cost_zero_cg -= rd_stats.sig_cost;              // sig cost for all coeffs, including zero levels and non-zerl levels
 								          // if we can save cost, change this block to all-zero block
 								          if (cost_zero_cg < base_cost) {
 								            sig_coeffgroup_flag[cg_blkpos] = 0;
 								            base_cost = cost_zero_cg;
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								            cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
 								            // reset coeffs to 0 in this block
 								            for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
 								              int32_t  scanpos = cg_scanpos*cg_size + scanpos_in_cg;
 								              uint32_t blkpos = scan[scanpos];
 								              if (dest_coeff[blkpos]){
 								                dest_coeff[blkpos] = 0;
 								                cost_coeff[scanpos] = cost_coeff0[scanpos];
 								                cost_sig[scanpos] = 0;
 								              }
 								            }
 								          } // end if ( cost_all_zeros < base_cost )
 								        }
 								      } // end if if (sig_coeffgroup_flag[ cg_blkpos ] == 0)
 								    } else {
 								      sig_coeffgroup_flag[cg_blkpos] = 1;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								    }
 								  } //end for (cg_scanpos)
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  //===== estimate last position =====
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  double  best_cost        = 0;
 								  int32_t ctx_cbf          = 0;
 								  int8_t found_last        = 0;
 								  int32_t best_last_idx_p1 = 0;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
 								  if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								    best_cost  = block_uncoded_cost +   state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
 								    base_cost +=   state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
+								  } else {
-												Implemented JVET-K0072 based cbf context selections

											
										
										
											2018-08-29 07:12:07 +00:00
+								    // ToDo: update for VVC contexts
 								    cabac_ctx_t* base_cbf_model = type?(cabac->ctx.qt_cbf_model_cb):(cabac->ctx.qt_cbf_model_luma);
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								    ctx_cbf    = ( type ? tr_depth : !tr_depth);
-												Use separate lambda and QP for each LCU

Adds fields lambda, lambda_sqrt and qp to encoder_state_t. Drops field
cur_lambda_cost_sqrt from encoder_state_config_frame_t and renames
cur_lambda_cost to lambda.

											
										
										
											2016-08-21 04:16:59 +00:00
+								    best_cost  = block_uncoded_cost +  state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
 								    base_cost +=   state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  }
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								    uint32_t cg_blkpos = scan_cg[cg_scanpos];
 								    base_cost -= cost_coeffgroup_sig[cg_scanpos];
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								    if (sig_coeffgroup_flag[ cg_blkpos ]) {
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								      for ( int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
 								        int32_t  scanpos = cg_scanpos*cg_size + scanpos_in_cg;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        if (scanpos > last_scanpos) continue;
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								        uint32_t blkpos  = scan[scanpos];
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        if( dest_coeff[ blkpos ] ) {
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								          uint32_t   pos_y = blkpos >> log2_block_size;
 								          uint32_t   pos_x = blkpos - ( pos_y << log2_block_size );
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Some changes for PCM and Intra towards VTM 2.0 compatibility.

											
										
										
											2018-08-27 06:18:15 +00:00
+								          double cost_last = /**(scan_mode == SCAN_VER) ? get_rate_last(state, pos_y, pos_x,last_x_bits,last_y_bits) : **/get_rate_last(state, pos_x, pos_y, last_x_bits,last_y_bits );
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								          double totalCost = base_cost + cost_last - cost_sig[ scanpos ];
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								          if( totalCost < best_cost ) {
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								            best_last_idx_p1 = scanpos + 1;
 								            best_cost        = totalCost;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								          }
 								          if( dest_coeff[ blkpos ] > 1 ) {
 								            found_last = 1;
 								            break;
 								          }
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								          base_cost -= cost_coeff[scanpos];
 								          base_cost += cost_coeff0[scanpos];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        } else {
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								          base_cost -= cost_sig[scanpos];
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								        }
 								      } //end for
 								      if (found_last) break;
 								    } // end if (sig_coeffgroup_flag[ cg_blkpos ])
 								  } // end for
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  uint32_t abs_sum = 0;
 								  for ( int32_t scanpos = 0; scanpos < best_last_idx_p1; scanpos++) {
 								    int32_t blkPos     = scan[scanpos];
 								    int32_t level      = dest_coeff[blkPos];
 								    abs_sum            += level;
 								    dest_coeff[blkPos] = (coeff_t)(( coef[blkPos] < 0 ) ? -level : level);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  }
 								  //===== clean uncoded coefficients =====
-												RDOQ reworked in rdo.c. rdoq_signhide now skips coeffs that are after best_last_idx.

											
										
										
											2016-09-09 07:16:51 +00:00
+								  for ( int32_t scanpos = best_last_idx_p1; scanpos <= last_scanpos; scanpos++) {
 								    dest_coeff[scan[scanpos]] = 0;
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  }
-												whitespace: Drop trailing spaces

											
										
										
											2014-02-21 13:00:20 +00:00
-												Drop redundant fields in encoder_control_t

Some of the fields in encoder_control_t were simply copies of the
corresponding fields in kvz_config. This commit drops the copied fields
in favor of using the fields in encoder_control_t.cfg directly.

											
										
										
											2017-02-06 11:00:25 +00:00
+								  if (encoder->cfg.signhide_enable && abs_sum >= 2) {
-												Refactor kvz_rdoq_sign_hiding

Rename and reorder everything to make more sense.

- Moved input tables into their own struct and renamed them to what
  they actually represent.
- Renamed pretty much every variable to comform to our style and
  to make sense.
- Removed the lastCG stuff, as the function already gets passed the
  last coeff anyway. (it was named width, what the hell?)

											
										
										
											2017-01-19 19:27:53 +00:00
+								    kvz_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff);
-												Imported and converted RDOQ from HM 12.0, NOT WORKING YET

											
										
										
											2014-01-20 14:34:11 +00:00
+								  }
 								}
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
-												Refactor inter MV candidate selection

Moves duplicate code for checking the best MV candidate from functions
calc_mvd_cost, search_pu_inter_ref and search_pu_inter to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								/**
 								 * Calculate cost of actual motion vectors using CABAC coding
 								 */
-												Put inter search parameters in a single struct

Adds struct inter_search_info_t for holding the parameters that are used
by most function related to inter search. Passing the parameters in
a single struct greatly reduces the number of parameters for many
functions.

											
										
										
											2017-08-03 11:11:36 +00:00
+								uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state,
-												Refactor inter MVD cost functions

Moves duplicate code for writing the MVD of a single motion vector from
kvz_get_mvd_coding_cost_cabac and encoder_inter_prediction_unit to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								                                       const cabac_data_t* cabac,
 								                                       const int32_t mvd_hor,
 								                                       const int32_t mvd_ver)
-												Make mvd_coding_cost functions take const cabac

											
										
										
											2016-08-29 20:44:41 +00:00
+								{
-												Refactor inter MVD cost functions

Moves duplicate code for writing the MVD of a single motion vector from
kvz_get_mvd_coding_cost_cabac and encoder_inter_prediction_unit to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								  cabac_data_t cabac_copy = *cabac;
 								  cabac_copy.only_count = 1;
-												Added a function for cabac mvd coding cost get_mvd_coding_cost_cabac()

Conflicts:
	src/rdo.c

											
										
										
											2015-08-26 07:32:52 +00:00
-												Refactor inter MVD cost functions

Moves duplicate code for writing the MVD of a single motion vector from
kvz_get_mvd_coding_cost_cabac and encoder_inter_prediction_unit to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								  // It is safe to drop const here because cabac->only_count is set.
 								  kvz_encode_mvd((encoder_state_t*) state, &cabac_copy, mvd_hor, mvd_ver);
 								  uint32_t bitcost =
 								    ((23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3)) -
 								    ((23 - cabac->bits_left)     + (cabac->num_buffered_bytes << 3));
-												Added a function for cabac mvd coding cost get_mvd_coding_cost_cabac()

Conflicts:
	src/rdo.c

											
										
										
											2015-08-26 07:32:52 +00:00
 								  return bitcost;
 								}
-												Cleanup of mv-rdo, removed unused functions

											
										
										
											2015-11-05 12:36:36 +00:00
+								/** MVD cost calculation with CABAC
 								* \returns int
 								* Calculates Motion Vector cost and related costs using CABAC coding
 								*/
-												Put inter search parameters in a single struct

Adds struct inter_search_info_t for holding the parameters that are used
by most function related to inter search. Passing the parameters in
a single struct greatly reduces the number of parameters for many
functions.

											
										
										
											2017-08-03 11:11:36 +00:00
+								uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
 								                                 int x,
 								                                 int y,
 								                                 int mv_shift,
 								                                 int16_t mv_cand[2][2],
 								                                 inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
 								                                 int16_t num_cand,
 								                                 int32_t ref_idx,
 								                                 uint32_t *bitcost)
 								{
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								  cabac_data_t state_cabac_copy;
 								  cabac_data_t* cabac;
 								  uint32_t merge_idx;
-												Refactor inter MV candidate selection

Moves duplicate code for checking the best MV candidate from functions
calc_mvd_cost, search_pu_inter_ref and search_pu_inter to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								  vector2d_t mvd = { 0, 0 };
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								  int8_t merged = 0;
 								  int8_t cur_mv_cand = 0;
-												Fix undefined left shifts in rdo

Replaces left shifts by multiplications when the operand may be
a negative value. Left shift of a negative value is undefined behavior.

											
										
										
											2017-07-21 11:07:34 +00:00
+								  x *= 1 << mv_shift;
 								  y *= 1 << mv_shift;
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
 								  // Check every candidate to find a match
 								  for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
 								    if (merge_cand[merge_idx].dir == 3) continue;
 								    if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x &&
 								      merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y &&
-												Changes to refence lists to behave more like L0/L1 lists from the specification

											
										
										
											2017-06-26 12:31:57 +00:00
+								      state->frame->ref_LX[merge_cand[merge_idx].dir - 1][
 								        merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1]
 								      ] == ref_idx)
 								    {
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								      merged = 1;
 								      break;
 								    }
 								  }
-												Added a function for cabac mvd coding cost get_mvd_coding_cost_cabac()

Conflicts:
	src/rdo.c

											
										
										
											2015-08-26 07:32:52 +00:00
+								  // Store cabac state and contexts
 								  memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t));
 								  // Clear bytes and bits and set mode to "count"
 								  state_cabac_copy.only_count = 1;
 								  state_cabac_copy.num_buffered_bytes = 0;
 								  state_cabac_copy.bits_left = 23;
 								  cabac = &state_cabac_copy;
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								  if (!merged) {
-												Refactor inter MV candidate selection

Moves duplicate code for checking the best MV candidate from functions
calc_mvd_cost, search_pu_inter_ref and search_pu_inter to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								    vector2d_t mvd1 = {
 								      x - mv_cand[0][0],
 								      y - mv_cand[0][1],
 								    };
 								    vector2d_t mvd2 = {
 								      x - mv_cand[1][0],
 								      y - mv_cand[1][1],
 								    };
 								    uint32_t cand1_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
 								    uint32_t cand2_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
 								    // Select candidate 1 if it has lower cost
 								    if (cand2_cost < cand1_cost) {
 								      cur_mv_cand = 1;
-												Refactor inter MV candidate selection

Moves duplicate code for checking the best MV candidate from functions
calc_mvd_cost, search_pu_inter_ref and search_pu_inter to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								      mvd = mvd2;
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								    } else {
-												Refactor inter MV candidate selection

Moves duplicate code for checking the best MV candidate from functions
calc_mvd_cost, search_pu_inter_ref and search_pu_inter to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								      mvd = mvd1;
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								    }
 								  }
 								  cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
 								  CABAC_BIN(cabac, merged, "MergeFlag");
 								  num_cand = MRG_MAX_NUM_CANDS;
-												Cleanup of mv-rdo, removed unused functions

											
										
										
											2015-11-05 12:36:36 +00:00
+								  if (merged) {
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								    if (num_cand > 1) {
 								      int32_t ui;
 								      for (ui = 0; ui < num_cand - 1; ui++) {
 								        int32_t symbol = (ui != merge_idx);
 								        if (ui == 0) {
 								          cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
 								          CABAC_BIN(cabac, symbol, "MergeIndex");
 								        } else {
 								          CABAC_BIN_EP(cabac, symbol, "MergeIndex");
 								        }
 								        if (symbol == 0) break;
 								      }
 								    }
 								  } else {
 								    uint32_t ref_list_idx;
 								    uint32_t j;
 								    int ref_list[2] = { 0, 0 };
-												Rename encoder_state_t.global to frame

"Frame" is more accurate than "global" since when OWF is used, encoder
states for each frame have their own struct.

											
										
										
											2016-08-10 00:46:23 +00:00
+								    for (j = 0; j < state->frame->ref->used_size; j++) {
 								      if (state->frame->ref->pocs[j] < state->frame->poc) {
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								        ref_list[0]++;
 								      } else {
 								        ref_list[1]++;
 								      }
 								    }
-												Cleanup of mv-rdo, removed unused functions

											
										
										
											2015-11-05 12:36:36 +00:00
+								    //ToDo: bidir mv support
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								    for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) {
 								      if (/*cur_cu->inter.mv_dir*/ 1 & (1 << ref_list_idx)) {
 								        if (ref_list[ref_list_idx] > 1) {
 								          // parseRefFrmIdx
-												Cleanup of mv-rdo, removed unused functions

											
										
										
											2015-11-05 12:36:36 +00:00
+								          int32_t ref_frame = ref_idx;
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
 								          cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
 								          CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX");
 								          if (ref_frame > 0) {
 								            int32_t i;
 								            int32_t ref_num = ref_list[ref_list_idx] - 2;
 								            cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
 								            ref_frame--;
 								            for (i = 0; i < ref_num; ++i) {
 								              const uint32_t symbol = (i == ref_frame) ? 0 : 1;
 								              if (i == 0) {
 								                CABAC_BIN(cabac, symbol, "ref_idx_lX");
 								              } else {
 								                CABAC_BIN_EP(cabac, symbol, "ref_idx_lX");
 								              }
 								              if (symbol == 0) break;
 								            }
 								          }
 								        }
-												Cleanup of mv-rdo, removed unused functions

											
										
										
											2015-11-05 12:36:36 +00:00
+								        // ToDo: Bidir vector support
-												Rename encoder_state_t.global to frame

"Frame" is more accurate than "global" since when OWF is used, encoder
states for each frame have their own struct.

											
										
										
											2016-08-10 00:46:23 +00:00
+								        if (!(state->frame->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) {
-												Refactor inter MVD cost functions

Moves duplicate code for writing the MVD of a single motion vector from
kvz_get_mvd_coding_cost_cabac and encoder_inter_prediction_unit to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								          // It is safe to drop const here because cabac->only_count is set.
 								          kvz_encode_mvd((encoder_state_t*) state, cabac, mvd.x, mvd.y);
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								        }
 								        // Signal which candidate MV to use
-												Refactor inter MVD cost functions

Moves duplicate code for writing the MVD of a single motion vector from
kvz_get_mvd_coding_cost_cabac and encoder_inter_prediction_unit to a new
function.

											
										
										
											2018-01-18 10:47:27 +00:00
+								        kvz_cabac_write_unary_max_symbol(
 								            cabac,
 								            cabac->ctx.mvp_idx_model,
 								            cur_mv_cand,
 ,
 								            AMVP_MAX_NUM_CANDS - 1);
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								      }
 								    }
 								  }
-												Clean up code using the fixed point frac bit tables

This is to prepare for changing the code using the floating point table
to use the fixed point table instead.

This also allows reducing the size of the fractional part, which was
useful for finding every place where the the fixed point presentation
is relied upon.

											
										
										
											2017-01-18 15:58:50 +00:00
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								  *bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
 								  // Store bitcost before restoring cabac
-												Put inter search parameters in a single struct

Adds struct inter_search_info_t for holding the parameters that are used
by most function related to inter search. Passing the parameters in
a single struct greatly reduces the number of parameters for many
functions.

											
										
										
											2017-08-03 11:11:36 +00:00
+								  return *bitcost * (uint32_t)(state->lambda_sqrt + 0.5);
-												Added calc_mvd_cost_cabac() to calculate real bits used for motion vectors

Conflicts:
	src/rdo.h
	src/search_inter.c

Conflicts:
	src/rdo.c

											
										
										
											2015-08-21 06:11:30 +00:00
+								}