Added commandline option for RDO (--rd)

2024-11-23 18:14:06 +00:00 · 2014-04-07 14:36:01 +03:00 · 2014-04-07 14:36:01 +03:00 · c38ec1aa10
parent 6558c92020
commit c38ec1aa10
7 changed files with 69 additions and 29 deletions
--- a/README.md
+++ b/README.md
@ -31,6 +31,10 @@ meant to be user configurable later.
                                       beta and tc range is -6..6 [0:0]
              --no-sao               : Disable sample adaptive offset
              --no-rdoq              : Disable RDO quantization
+              --rd <integer>         : Rate-Distortion Optimization level [1]\n"
+                                         0: no RDO\n"
+                                         1: estimated RDO\n"
+                                         2: full RDO\n"
              --no-transform-skip    : Disable transform skip
              --aud                  : Use access unit delimiters
              --cqmfile <string>     : Custom Quantization Matrices from a file
--- a/src/config.c
+++ b/src/config.c
@ -65,6 +65,7 @@ int config_init(config *cfg)
  cfg->deblock_tc      = 0;
  cfg->sao_enable      = 1;
  cfg->rdoq_enable     = 1;
+  cfg->rdo             = 1;
  cfg->trskip_enable   = 1;
  cfg->vui.sar_width   = 0;
  cfg->vui.sar_height  = 0;
@ -225,6 +226,16 @@ static int config_parse(config *cfg, const char *name, const char *value)
    cfg->sao_enable = atobool(value);
  else if OPT("rdoq")
    cfg->rdoq_enable = atobool(value);
+  else if OPT("rd") {
+    int rdo = 0;
+    if (sscanf(value, "%d", &rdo)) {
+      if(rdo < 0 || rdo > 2) {
+        fprintf(stderr, "--rd parameter out of range [0..2], set to 1\n");
+        rdo = 1;
+      }
+      cfg->rdo = rdo;
+    }
+  }
  else if OPT("transform-skip")
    cfg->trskip_enable = atobool(value);
  else if OPT("sar") {
@ -293,6 +304,7 @@ int config_read(config *cfg,int argc, char *argv[])
    { "deblock",            required_argument, NULL, 0 },
    { "no-sao",                   no_argument, NULL, 0 },
    { "no-rdoq",                  no_argument, NULL, 0 },
+    { "rd",                 required_argument, NULL, 0 },
    { "no-transform-skip",        no_argument, NULL, 0 },
    { "sar",                required_argument, NULL, 0 },
    { "overscan",           required_argument, NULL, 0 },
--- a/src/config.h
+++ b/src/config.h
@ -44,6 +44,7 @@ typedef struct
  int32_t deblock_enable; /*!< \brief Flag to enable deblocking filter */
  int32_t sao_enable;     /*!< \brief Flag to enable sample adaptive offset filter */
  int32_t rdoq_enable;    /*!< \brief Flag to enable RD optimized quantization. */
+  int32_t rdo;            /*!< \brief RD-calculation level (0..2) */
  int32_t trskip_enable;    /*!< \brief Flag to enable transform skip (for 4x4 blocks). */
  int32_t deblock_beta;   /*!< \brief (deblocking) beta offset (div 2), range -6...6 */
  int32_t deblock_tc;     /*!< \brief (deblocking) tc offset (div 2), range -6...6 */
--- a/src/encmain.c
+++ b/src/encmain.c
@ -103,6 +103,10 @@ int main(int argc, char *argv[])
            "                                   beta and tc range is -6..6 [0:0]\n"
            "          --no-sao               : Disable sample adaptive offset\n"
            "          --no-rdoq              : Disable RDO quantization\n"
+            "          --rd <integer>         : Rate-Distortion Optimization level [1]\n"
+            "                                     0: no RDO\n"
+            "                                     1: estimated RDO\n"
+            "                                     2: full RDO\n"
            "          --no-transform-skip    : Disable transform skip\n"
            "          --aud                  : Use access unit delimiters\n"
            "          --cqmfile <string>     : Custom Quantization Matrices from a file\n"
@ -227,8 +231,9 @@ int main(int argc, char *argv[])
  encoder->tc_offset_div2   = (int8_t)encoder->cfg->deblock_tc;
  // SAO
  encoder->sao_enable = (int8_t)encoder->cfg->sao_enable;
-  // RDOQ
+  // RDO
  encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable;
+  encoder->rdo         = (int8_t)encoder->cfg->rdo;
  // TR SKIP
  encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable;
  // VUI
@ -379,7 +384,7 @@ int main(int argc, char *argv[])
  fgetpos(output,(fpos_t*)&curpos);

  // Print statistics of the coding
-  printf(" Processed %d frames, %10d bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, ((int32_t)curpos)<<3,
+  printf(" Processed %d frames, %10lld bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, curpos<<3,
         psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame);

  fclose(input);
--- a/src/encoder.c
+++ b/src/encoder.c
@ -279,6 +279,8 @@ encoder_control *init_encoder_control(config *cfg)
  enc_c->tc_offset_div2    = 0;
  // SAO
  enc_c->sao_enable = 1;
+  // Rate-distortion optimization level
+  enc_c->rdo        = 1;

  // Allocate the bitstream struct
  stream = create_bitstream();
@ -396,6 +398,7 @@ void encode_one_frame(encoder_control* encoder)

  cabac_data cabac;

+
  /** IDR picture when: period == 0 and frame == 0
   *                    period == 1 && frame%2 == 0
   *                    period != 0 && frame%period == 0
@ -1943,25 +1946,32 @@ void encode_transform_tree(encoder_control* encoder, cabac_data *cabac, int32_t
      dequant(encoder, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type);
      itransform2d(temp_block2,pre_quant_coeff,width,0);

-      // SSD between reconstruction and original + sum of coeffs
+      // SSD between original and reconstructed
      for (i = 0; i < 16; i++) {
        int diff = temp_block[i]-block[i];
        cost += diff*diff;
-        //coeffcost += abs((int)temp_coeff[i]);

        diff = temp_block2[i] - block[i];
        cost2 += diff*diff;
-        //coeffcost2 += abs((int)temp_coeff2[i]);
      }
-      // TODO: add an option to use estimated RD-calculation
-      //cost += (1 + coeffcost + (coeffcost>>1))*((int)g_cur_lambda_cost+0.5);
-      //cost2 += (coeffcost2 + (coeffcost2>>1))*((int)g_cur_lambda_cost+0.5);

-      coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma);
-      coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma);
+      // Simple RDO
+      if(encoder->rdo == 1) {
+        // SSD between reconstruction and original + sum of coeffs
+        for (i = 0; i < 16; i++) {
+          coeffcost += abs((int)temp_coeff[i]);
+          coeffcost2 += abs((int)temp_coeff2[i]);
+        }
+        cost += (1 + coeffcost + (coeffcost>>1))*((int)g_cur_lambda_cost+0.5);
+        cost2 += (coeffcost2 + (coeffcost2>>1))*((int)g_cur_lambda_cost+0.5);
+        // Full RDO
+      } else if(encoder->rdo == 2) {
+        coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma);
+        coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma);

-      cost  += coeffcost*((int)g_cur_lambda_cost+0.5);
-      cost2 += coeffcost2*((int)g_cur_lambda_cost+0.5);
+        cost  += coeffcost*((int)g_cur_lambda_cost+0.5);
+        cost2 += coeffcost2*((int)g_cur_lambda_cost+0.5);
+      }

      cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2);
    }
--- a/src/encoder.h
+++ b/src/encoder.h
@ -77,6 +77,7 @@ typedef struct
  int8_t deblock_enable; // \brief Flag to enable deblocking filter
  int8_t sao_enable;     // \brief Flag to enable sample adaptive offset filter
  int8_t rdoq_enable;    // \brief Whether RDOQ is enabled or not.
+  int8_t rdo;            // \brief RDO level
  int8_t trskip_enable;    // \brief Flag to enable transform skipping (4x4 intra)
  int8_t beta_offset_div2; // \brief (deblocking) beta offset (div 2), range -6...6
  int8_t tc_offset_div2;   // \brief (deblocking)tc offset (div 2), range -6...6
--- a/src/search.c
+++ b/src/search.c
@ -733,43 +733,48 @@ static int lcu_get_final_cost(encoder_control *encoder,
  int x_local = (x_px&0x3f), y_local = (y_px&0x3f);
  int cost = 0;
  int coeff_cost = 0;
-  //int coeff_cost_temp = 0;
+
  int width = LCU_WIDTH>>depth;
  int x,y;
  cur_cu = &lcu->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];

-  // SSD between reconstruction and original + sum of coeffs
+  // SSD between reconstruction and original
  for (y = y_local; y < y_local+width; ++y) {
    for (x = x_local; x < x_local+width; ++x) {
      int diff = (int)lcu->rec.y[y * LCU_WIDTH + x] - (int)lcu->ref.y[y * LCU_WIDTH + x];
      cost += diff*diff;
-      // TODO: add an option to use estimated RD-calculation
-      //coeff_cost_temp += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]);
    }
  }
-  // Chroma SSD + sum of coeffs
+  // Chroma SSD
  for (y = y_local>>1; y < (y_local+width)>>1; ++y) {
    for (x = x_local>>1; x < (x_local+width)>>1; ++x) {
      int diff = (int)lcu->rec.u[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.u[y * (LCU_WIDTH>>1) + x];
      cost += diff*diff;
      diff = (int)lcu->rec.v[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.v[y * (LCU_WIDTH>>1) + x];
      cost += diff*diff;
-      // TODO: add an option to use estimated RD-calculation
-      //coeff_cost_temp += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]);
-      //coeff_cost_temp += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]);
    }
  }

-  // Bitcost
-  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5);
-
-  // Coefficient costs
-  // TODO: add an option to use estimated RD-calculation
-  //cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5);
+  if(encoder->rdo == 1) {
+    // sum of coeffs
+    for (y = y_local; y < y_local+width; ++y) {
+      for (x = x_local; x < x_local+width; ++x) {
+        coeff_cost += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]);
+      }
+    }
+    // Chroma sum of coeffs
+    for (y = y_local>>1; y < (y_local+width)>>1; ++y) {
+      for (x = x_local>>1; x < (x_local+width)>>1; ++x) {
+        coeff_cost += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]);
+        coeff_cost += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]);
+      }
+    }
+    // Coefficient costs
+    cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5);

  // Calculate actual bit costs for coding the coeffs
  // RDO
-  {
+  } else if (encoder->rdo == 2) {
    coefficient coeff_temp[32*32];
    coefficient coeff_temp_u[16*16];
    coefficient coeff_temp_v[16*16];
@ -826,10 +831,12 @@ static int lcu_get_final_cost(encoder_control *encoder,
      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
    }
+    // Multiply bit count with lambda to get RD-cost
+    cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5);
  }
-  // Multiply bit count with lambda to get RD-cost
-  cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5);

+  // Bitcost
+  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5);

  return cost;
 }