From c38ec1aa10e8a91853c69a95685bec9fd91df93e Mon Sep 17 00:00:00 2001
From: Marko Viitanen <fador@iki.fi>
Date: Mon, 7 Apr 2014 14:36:01 +0300
Subject: [PATCH] Added commandline option for RDO (--rd)

---
 README.md     |  4 ++++
 src/config.c  | 12 ++++++++++++
 src/config.h  |  1 +
 src/encmain.c |  9 +++++++--
 src/encoder.c | 30 ++++++++++++++++++++----------
 src/encoder.h |  1 +
 src/search.c  | 41 ++++++++++++++++++++++++-----------------
 7 files changed, 69 insertions(+), 29 deletions(-)
diff --git a/README.md b/README.md
index d5ac86a3..1cc50f98 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,10 @@ meant to be user configurable later.
                                        beta and tc range is -6..6 [0:0]
               --no-sao               : Disable sample adaptive offset
               --no-rdoq              : Disable RDO quantization
+              --rd <integer>         : Rate-Distortion Optimization level [1]\n"
+                                         0: no RDO\n"
+                                         1: estimated RDO\n"
+                                         2: full RDO\n"
               --no-transform-skip    : Disable transform skip
               --aud                  : Use access unit delimiters
               --cqmfile <string>     : Custom Quantization Matrices from a file
diff --git a/src/config.c b/src/config.c
index bcaf4ce0..5866e72a 100644
--- a/src/config.c
+++ b/src/config.c
@@ -65,6 +65,7 @@ int config_init(config *cfg)
   cfg->deblock_tc      = 0;
   cfg->sao_enable      = 1;
   cfg->rdoq_enable     = 1;
+  cfg->rdo             = 1;
   cfg->trskip_enable   = 1;
   cfg->vui.sar_width   = 0;
   cfg->vui.sar_height  = 0;
@@ -225,6 +226,16 @@ static int config_parse(config *cfg, const char *name, const char *value)
     cfg->sao_enable = atobool(value);
   else if OPT("rdoq")
     cfg->rdoq_enable = atobool(value);
+  else if OPT("rd") {
+    int rdo = 0;
+    if (sscanf(value, "%d", &rdo)) {
+      if(rdo < 0 || rdo > 2) {
+        fprintf(stderr, "--rd parameter out of range [0..2], set to 1\n");
+        rdo = 1;
+      }
+      cfg->rdo = rdo;
+    }
+  }
   else if OPT("transform-skip")
     cfg->trskip_enable = atobool(value);
   else if OPT("sar") {
@@ -293,6 +304,7 @@ int config_read(config *cfg,int argc, char *argv[])
     { "deblock",            required_argument, NULL, 0 },
     { "no-sao",                   no_argument, NULL, 0 },
     { "no-rdoq",                  no_argument, NULL, 0 },
+    { "rd",                 required_argument, NULL, 0 },
     { "no-transform-skip",        no_argument, NULL, 0 },
     { "sar",                required_argument, NULL, 0 },
     { "overscan",           required_argument, NULL, 0 },
diff --git a/src/config.h b/src/config.h
index 3c23cb06..88c95010 100644
--- a/src/config.h
+++ b/src/config.h
@@ -44,6 +44,7 @@ typedef struct
   int32_t deblock_enable; /*!< \brief Flag to enable deblocking filter */
   int32_t sao_enable;     /*!< \brief Flag to enable sample adaptive offset filter */
   int32_t rdoq_enable;    /*!< \brief Flag to enable RD optimized quantization. */
+  int32_t rdo;            /*!< \brief RD-calculation level (0..2) */
   int32_t trskip_enable;    /*!< \brief Flag to enable transform skip (for 4x4 blocks). */
   int32_t deblock_beta;   /*!< \brief (deblocking) beta offset (div 2), range -6...6 */
   int32_t deblock_tc;     /*!< \brief (deblocking) tc offset (div 2), range -6...6 */
diff --git a/src/encmain.c b/src/encmain.c
index 84c1489f..bb76e2ff 100644
--- a/src/encmain.c
+++ b/src/encmain.c
@@ -103,6 +103,10 @@ int main(int argc, char *argv[])
             "                                   beta and tc range is -6..6 [0:0]\n"
             "          --no-sao               : Disable sample adaptive offset\n"
             "          --no-rdoq              : Disable RDO quantization\n"
+            "          --rd <integer>         : Rate-Distortion Optimization level [1]\n"
+            "                                     0: no RDO\n"
+            "                                     1: estimated RDO\n"
+            "                                     2: full RDO\n"
             "          --no-transform-skip    : Disable transform skip\n"
             "          --aud                  : Use access unit delimiters\n"
             "          --cqmfile <string>     : Custom Quantization Matrices from a file\n"
@@ -227,8 +231,9 @@ int main(int argc, char *argv[])
   encoder->tc_offset_div2   = (int8_t)encoder->cfg->deblock_tc;
   // SAO
   encoder->sao_enable = (int8_t)encoder->cfg->sao_enable;
-  // RDOQ
+  // RDO
   encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable;
+  encoder->rdo         = (int8_t)encoder->cfg->rdo;
   // TR SKIP
   encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable;
   // VUI
@@ -379,7 +384,7 @@ int main(int argc, char *argv[])
   fgetpos(output,(fpos_t*)&curpos);
 
   // Print statistics of the coding
-  printf(" Processed %d frames, %10d bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, ((int32_t)curpos)<<3,
+  printf(" Processed %d frames, %10lld bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, curpos<<3,
          psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame);
 
   fclose(input);
diff --git a/src/encoder.c b/src/encoder.c
index e7e96478..6dae6f88 100644
--- a/src/encoder.c
+++ b/src/encoder.c
@@ -279,6 +279,8 @@ encoder_control *init_encoder_control(config *cfg)
   enc_c->tc_offset_div2    = 0;
   // SAO
   enc_c->sao_enable = 1;
+  // Rate-distortion optimization level
+  enc_c->rdo        = 1;
 
   // Allocate the bitstream struct
   stream = create_bitstream();
@@ -396,6 +398,7 @@ void encode_one_frame(encoder_control* encoder)
 
   cabac_data cabac;
 
+
   /** IDR picture when: period == 0 and frame == 0
    *                    period == 1 && frame%2 == 0
    *                    period != 0 && frame%period == 0
@@ -1943,25 +1946,32 @@ void encode_transform_tree(encoder_control* encoder, cabac_data *cabac, int32_t
       dequant(encoder, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type);
       itransform2d(temp_block2,pre_quant_coeff,width,0);
 
-      // SSD between reconstruction and original + sum of coeffs
+      // SSD between original and reconstructed
       for (i = 0; i < 16; i++) {
         int diff = temp_block[i]-block[i];
         cost += diff*diff;
-        //coeffcost += abs((int)temp_coeff[i]);
 
         diff = temp_block2[i] - block[i];
         cost2 += diff*diff;
-        //coeffcost2 += abs((int)temp_coeff2[i]);
       }
-      // TODO: add an option to use estimated RD-calculation
-      //cost += (1 + coeffcost + (coeffcost>>1))*((int)g_cur_lambda_cost+0.5);
-      //cost2 += (coeffcost2 + (coeffcost2>>1))*((int)g_cur_lambda_cost+0.5);
 
-      coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma);
-      coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma);
+      // Simple RDO
+      if(encoder->rdo == 1) {
+        // SSD between reconstruction and original + sum of coeffs
+        for (i = 0; i < 16; i++) {
+          coeffcost += abs((int)temp_coeff[i]);
+          coeffcost2 += abs((int)temp_coeff2[i]);
+        }
+        cost += (1 + coeffcost + (coeffcost>>1))*((int)g_cur_lambda_cost+0.5);
+        cost2 += (coeffcost2 + (coeffcost2>>1))*((int)g_cur_lambda_cost+0.5);
+        // Full RDO
+      } else if(encoder->rdo == 2) {
+        coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma);
+        coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma);
 
-      cost  += coeffcost*((int)g_cur_lambda_cost+0.5);
-      cost2 += coeffcost2*((int)g_cur_lambda_cost+0.5);
+        cost  += coeffcost*((int)g_cur_lambda_cost+0.5);
+        cost2 += coeffcost2*((int)g_cur_lambda_cost+0.5);
+      }
 
       cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2);
     }
diff --git a/src/encoder.h b/src/encoder.h
index b30d09c0..ae4f7f32 100644
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -77,6 +77,7 @@ typedef struct
   int8_t deblock_enable; // \brief Flag to enable deblocking filter
   int8_t sao_enable;     // \brief Flag to enable sample adaptive offset filter
   int8_t rdoq_enable;    // \brief Whether RDOQ is enabled or not.
+  int8_t rdo;            // \brief RDO level
   int8_t trskip_enable;    // \brief Flag to enable transform skipping (4x4 intra)
   int8_t beta_offset_div2; // \brief (deblocking) beta offset (div 2), range -6...6
   int8_t tc_offset_div2;   // \brief (deblocking)tc offset (div 2), range -6...6
diff --git a/src/search.c b/src/search.c
index 6d554335..3193aef6 100644
--- a/src/search.c
+++ b/src/search.c
@@ -733,43 +733,48 @@ static int lcu_get_final_cost(encoder_control *encoder,
   int x_local = (x_px&0x3f), y_local = (y_px&0x3f);
   int cost = 0;
   int coeff_cost = 0;
-  //int coeff_cost_temp = 0;
+
   int width = LCU_WIDTH>>depth;
   int x,y;
   cur_cu = &lcu->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
 
-  // SSD between reconstruction and original + sum of coeffs
+  // SSD between reconstruction and original
   for (y = y_local; y < y_local+width; ++y) {
     for (x = x_local; x < x_local+width; ++x) {
       int diff = (int)lcu->rec.y[y * LCU_WIDTH + x] - (int)lcu->ref.y[y * LCU_WIDTH + x];
       cost += diff*diff;
-      // TODO: add an option to use estimated RD-calculation
-      //coeff_cost_temp += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]);
     }
   }
-  // Chroma SSD + sum of coeffs
+  // Chroma SSD
   for (y = y_local>>1; y < (y_local+width)>>1; ++y) {
     for (x = x_local>>1; x < (x_local+width)>>1; ++x) {
       int diff = (int)lcu->rec.u[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.u[y * (LCU_WIDTH>>1) + x];
       cost += diff*diff;
       diff = (int)lcu->rec.v[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.v[y * (LCU_WIDTH>>1) + x];
       cost += diff*diff;
-      // TODO: add an option to use estimated RD-calculation
-      //coeff_cost_temp += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]);
-      //coeff_cost_temp += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]);
     }
   }
 
-  // Bitcost
-  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5);
-
-  // Coefficient costs
-  // TODO: add an option to use estimated RD-calculation
-  //cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5);
+  if(encoder->rdo == 1) {
+    // sum of coeffs
+    for (y = y_local; y < y_local+width; ++y) {
+      for (x = x_local; x < x_local+width; ++x) {
+        coeff_cost += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]);
+      }
+    }
+    // Chroma sum of coeffs
+    for (y = y_local>>1; y < (y_local+width)>>1; ++y) {
+      for (x = x_local>>1; x < (x_local+width)>>1; ++x) {
+        coeff_cost += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]);
+        coeff_cost += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]);
+      }
+    }
+    // Coefficient costs
+    cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5);
 
   // Calculate actual bit costs for coding the coeffs
   // RDO
-  {
+  } else if (encoder->rdo == 2) {
     coefficient coeff_temp[32*32];
     coefficient coeff_temp_u[16*16];
     coefficient coeff_temp_v[16*16];
@@ -826,10 +831,12 @@ static int lcu_get_final_cost(encoder_control *encoder,
       coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
       coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
     }
+    // Multiply bit count with lambda to get RD-cost
+    cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5);
   }
-  // Multiply bit count with lambda to get RD-cost
-  cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5);
 
+  // Bitcost
+  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5);
 
   return cost;
 }