From c38ec1aa10e8a91853c69a95685bec9fd91df93e Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 7 Apr 2014 14:36:01 +0300 Subject: [PATCH] Added commandline option for RDO (--rd) --- README.md | 4 ++++ src/config.c | 12 ++++++++++++ src/config.h | 1 + src/encmain.c | 9 +++++++-- src/encoder.c | 30 ++++++++++++++++++++---------- src/encoder.h | 1 + src/search.c | 41 ++++++++++++++++++++++++----------------- 7 files changed, 69 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index d5ac86a3..1cc50f98 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,10 @@ meant to be user configurable later. beta and tc range is -6..6 [0:0] --no-sao : Disable sample adaptive offset --no-rdoq : Disable RDO quantization + --rd : Rate-Distortion Optimization level [1]\n" + 0: no RDO\n" + 1: estimated RDO\n" + 2: full RDO\n" --no-transform-skip : Disable transform skip --aud : Use access unit delimiters --cqmfile : Custom Quantization Matrices from a file diff --git a/src/config.c b/src/config.c index bcaf4ce0..5866e72a 100644 --- a/src/config.c +++ b/src/config.c @@ -65,6 +65,7 @@ int config_init(config *cfg) cfg->deblock_tc = 0; cfg->sao_enable = 1; cfg->rdoq_enable = 1; + cfg->rdo = 1; cfg->trskip_enable = 1; cfg->vui.sar_width = 0; cfg->vui.sar_height = 0; @@ -225,6 +226,16 @@ static int config_parse(config *cfg, const char *name, const char *value) cfg->sao_enable = atobool(value); else if OPT("rdoq") cfg->rdoq_enable = atobool(value); + else if OPT("rd") { + int rdo = 0; + if (sscanf(value, "%d", &rdo)) { + if(rdo < 0 || rdo > 2) { + fprintf(stderr, "--rd parameter out of range [0..2], set to 1\n"); + rdo = 1; + } + cfg->rdo = rdo; + } + } else if OPT("transform-skip") cfg->trskip_enable = atobool(value); else if OPT("sar") { @@ -293,6 +304,7 @@ int config_read(config *cfg,int argc, char *argv[]) { "deblock", required_argument, NULL, 0 }, { "no-sao", no_argument, NULL, 0 }, { "no-rdoq", no_argument, NULL, 0 }, + { "rd", required_argument, NULL, 0 }, { "no-transform-skip", no_argument, NULL, 0 }, { "sar", required_argument, NULL, 0 }, { "overscan", required_argument, NULL, 0 }, diff --git a/src/config.h b/src/config.h index 3c23cb06..88c95010 100644 --- a/src/config.h +++ b/src/config.h @@ -44,6 +44,7 @@ typedef struct int32_t deblock_enable; /*!< \brief Flag to enable deblocking filter */ int32_t sao_enable; /*!< \brief Flag to enable sample adaptive offset filter */ int32_t rdoq_enable; /*!< \brief Flag to enable RD optimized quantization. */ + int32_t rdo; /*!< \brief RD-calculation level (0..2) */ int32_t trskip_enable; /*!< \brief Flag to enable transform skip (for 4x4 blocks). */ int32_t deblock_beta; /*!< \brief (deblocking) beta offset (div 2), range -6...6 */ int32_t deblock_tc; /*!< \brief (deblocking) tc offset (div 2), range -6...6 */ diff --git a/src/encmain.c b/src/encmain.c index 84c1489f..bb76e2ff 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -103,6 +103,10 @@ int main(int argc, char *argv[]) " beta and tc range is -6..6 [0:0]\n" " --no-sao : Disable sample adaptive offset\n" " --no-rdoq : Disable RDO quantization\n" + " --rd : Rate-Distortion Optimization level [1]\n" + " 0: no RDO\n" + " 1: estimated RDO\n" + " 2: full RDO\n" " --no-transform-skip : Disable transform skip\n" " --aud : Use access unit delimiters\n" " --cqmfile : Custom Quantization Matrices from a file\n" @@ -227,8 +231,9 @@ int main(int argc, char *argv[]) encoder->tc_offset_div2 = (int8_t)encoder->cfg->deblock_tc; // SAO encoder->sao_enable = (int8_t)encoder->cfg->sao_enable; - // RDOQ + // RDO encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable; + encoder->rdo = (int8_t)encoder->cfg->rdo; // TR SKIP encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable; // VUI @@ -379,7 +384,7 @@ int main(int argc, char *argv[]) fgetpos(output,(fpos_t*)&curpos); // Print statistics of the coding - printf(" Processed %d frames, %10d bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, ((int32_t)curpos)<<3, + printf(" Processed %d frames, %10lld bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, curpos<<3, psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame); fclose(input); diff --git a/src/encoder.c b/src/encoder.c index e7e96478..6dae6f88 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -279,6 +279,8 @@ encoder_control *init_encoder_control(config *cfg) enc_c->tc_offset_div2 = 0; // SAO enc_c->sao_enable = 1; + // Rate-distortion optimization level + enc_c->rdo = 1; // Allocate the bitstream struct stream = create_bitstream(); @@ -396,6 +398,7 @@ void encode_one_frame(encoder_control* encoder) cabac_data cabac; + /** IDR picture when: period == 0 and frame == 0 * period == 1 && frame%2 == 0 * period != 0 && frame%period == 0 @@ -1943,25 +1946,32 @@ void encode_transform_tree(encoder_control* encoder, cabac_data *cabac, int32_t dequant(encoder, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type); itransform2d(temp_block2,pre_quant_coeff,width,0); - // SSD between reconstruction and original + sum of coeffs + // SSD between original and reconstructed for (i = 0; i < 16; i++) { int diff = temp_block[i]-block[i]; cost += diff*diff; - //coeffcost += abs((int)temp_coeff[i]); diff = temp_block2[i] - block[i]; cost2 += diff*diff; - //coeffcost2 += abs((int)temp_coeff2[i]); } - // TODO: add an option to use estimated RD-calculation - //cost += (1 + coeffcost + (coeffcost>>1))*((int)g_cur_lambda_cost+0.5); - //cost2 += (coeffcost2 + (coeffcost2>>1))*((int)g_cur_lambda_cost+0.5); - coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma); - coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma); + // Simple RDO + if(encoder->rdo == 1) { + // SSD between reconstruction and original + sum of coeffs + for (i = 0; i < 16; i++) { + coeffcost += abs((int)temp_coeff[i]); + coeffcost2 += abs((int)temp_coeff2[i]); + } + cost += (1 + coeffcost + (coeffcost>>1))*((int)g_cur_lambda_cost+0.5); + cost2 += (coeffcost2 + (coeffcost2>>1))*((int)g_cur_lambda_cost+0.5); + // Full RDO + } else if(encoder->rdo == 2) { + coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma); + coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma); - cost += coeffcost*((int)g_cur_lambda_cost+0.5); - cost2 += coeffcost2*((int)g_cur_lambda_cost+0.5); + cost += coeffcost*((int)g_cur_lambda_cost+0.5); + cost2 += coeffcost2*((int)g_cur_lambda_cost+0.5); + } cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2); } diff --git a/src/encoder.h b/src/encoder.h index b30d09c0..ae4f7f32 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -77,6 +77,7 @@ typedef struct int8_t deblock_enable; // \brief Flag to enable deblocking filter int8_t sao_enable; // \brief Flag to enable sample adaptive offset filter int8_t rdoq_enable; // \brief Whether RDOQ is enabled or not. + int8_t rdo; // \brief RDO level int8_t trskip_enable; // \brief Flag to enable transform skipping (4x4 intra) int8_t beta_offset_div2; // \brief (deblocking) beta offset (div 2), range -6...6 int8_t tc_offset_div2; // \brief (deblocking)tc offset (div 2), range -6...6 diff --git a/src/search.c b/src/search.c index 6d554335..3193aef6 100644 --- a/src/search.c +++ b/src/search.c @@ -733,43 +733,48 @@ static int lcu_get_final_cost(encoder_control *encoder, int x_local = (x_px&0x3f), y_local = (y_px&0x3f); int cost = 0; int coeff_cost = 0; - //int coeff_cost_temp = 0; + int width = LCU_WIDTH>>depth; int x,y; cur_cu = &lcu->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; - // SSD between reconstruction and original + sum of coeffs + // SSD between reconstruction and original for (y = y_local; y < y_local+width; ++y) { for (x = x_local; x < x_local+width; ++x) { int diff = (int)lcu->rec.y[y * LCU_WIDTH + x] - (int)lcu->ref.y[y * LCU_WIDTH + x]; cost += diff*diff; - // TODO: add an option to use estimated RD-calculation - //coeff_cost_temp += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]); } } - // Chroma SSD + sum of coeffs + // Chroma SSD for (y = y_local>>1; y < (y_local+width)>>1; ++y) { for (x = x_local>>1; x < (x_local+width)>>1; ++x) { int diff = (int)lcu->rec.u[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.u[y * (LCU_WIDTH>>1) + x]; cost += diff*diff; diff = (int)lcu->rec.v[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.v[y * (LCU_WIDTH>>1) + x]; cost += diff*diff; - // TODO: add an option to use estimated RD-calculation - //coeff_cost_temp += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]); - //coeff_cost_temp += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]); } } - // Bitcost - cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5); - - // Coefficient costs - // TODO: add an option to use estimated RD-calculation - //cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5); + if(encoder->rdo == 1) { + // sum of coeffs + for (y = y_local; y < y_local+width; ++y) { + for (x = x_local; x < x_local+width; ++x) { + coeff_cost += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]); + } + } + // Chroma sum of coeffs + for (y = y_local>>1; y < (y_local+width)>>1; ++y) { + for (x = x_local>>1; x < (x_local+width)>>1; ++x) { + coeff_cost += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]); + coeff_cost += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]); + } + } + // Coefficient costs + cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5); // Calculate actual bit costs for coding the coeffs // RDO - { + } else if (encoder->rdo == 2) { coefficient coeff_temp[32*32]; coefficient coeff_temp_u[16*16]; coefficient coeff_temp_v[16*16]; @@ -826,10 +831,12 @@ static int lcu_get_final_cost(encoder_control *encoder, coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode); coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode); } + // Multiply bit count with lambda to get RD-cost + cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5); } - // Multiply bit count with lambda to get RD-cost - cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5); + // Bitcost + cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5); return cost; }