mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Use RDO in final_cost
This commit is contained in:
parent
b83559d3f3
commit
c5ba5eb3c8
16
src/rdo.c
16
src/rdo.c
|
@ -57,12 +57,17 @@ const uint32_t entropy_bits[128] =
|
|||
0x0050e, 0x29af6, 0x004cc, 0x2a497, 0x0048d, 0x2ae35, 0x00451, 0x2b7d6, 0x00418, 0x2c176, 0x003e2, 0x2cb15, 0x003af, 0x2d4b5, 0x0037f, 0x2de55
|
||||
};
|
||||
|
||||
|
||||
/** Calculate actual (or really close to actual) bitcost for coding coefficients
|
||||
* \param coeff coefficient array
|
||||
* \param width coeff block width
|
||||
* \param type data type (0 == luma)
|
||||
* \returns bits needed to code input coefficients
|
||||
*/
|
||||
int32_t get_coeff_cost(encoder_control *encoder, coefficient *coeff, int32_t width, int32_t type)
|
||||
{
|
||||
cabac_data temp_cabac;
|
||||
int32_t cost = 0;
|
||||
//Context to save
|
||||
//Context to save TODO: handle contexts better
|
||||
cabac_ctx sig_coeff_group[4];
|
||||
cabac_ctx sig_model[27];
|
||||
cabac_ctx transform_skip;
|
||||
|
@ -82,7 +87,7 @@ int32_t get_coeff_cost(encoder_control *encoder, coefficient *coeff, int32_t wid
|
|||
|
||||
if(!found) return 0;
|
||||
|
||||
// Store contexts
|
||||
// Store contexts TODO: handle contexts better
|
||||
if(type==0) {
|
||||
memcpy(last_x,g_cu_ctx_last_x_luma, sizeof(cabac_ctx)*15);
|
||||
memcpy(last_y,g_cu_ctx_last_y_luma, sizeof(cabac_ctx)*15);
|
||||
|
@ -102,18 +107,21 @@ int32_t get_coeff_cost(encoder_control *encoder, coefficient *coeff, int32_t wid
|
|||
|
||||
// Store cabac state
|
||||
memcpy(&temp_cabac,&cabac,sizeof(cabac_data));
|
||||
// Clear bytes and bits and set mode to "count"
|
||||
cabac.only_count = 1;
|
||||
cabac.num_buffered_bytes = 0;
|
||||
cabac.bits_left = 23;
|
||||
|
||||
// Execute the coding function
|
||||
encode_coeff_nxn(encoder, coeff, width, type, width < 8? SCAN_VER:SCAN_DIAG, 0);
|
||||
|
||||
// Store bitcost before restoring cabac
|
||||
cost = (23-cabac.bits_left) + (cabac.num_buffered_bytes << 3);
|
||||
|
||||
// Restore cabac state
|
||||
memcpy(&cabac,&temp_cabac,sizeof(cabac_data));
|
||||
|
||||
// Restore contexts
|
||||
// Restore contexts TODO: handle contexts better
|
||||
if(type==0) {
|
||||
memcpy(g_cu_ctx_last_x_luma,last_x, sizeof(cabac_ctx)*15);
|
||||
memcpy(g_cu_ctx_last_y_luma,last_y, sizeof(cabac_ctx)*15);
|
||||
|
|
60
src/search.c
60
src/search.c
|
@ -34,7 +34,7 @@
|
|||
#include "intra.h"
|
||||
#include "inter.h"
|
||||
#include "filter.h"
|
||||
|
||||
#include "rdo.h"
|
||||
|
||||
// Temporarily for debugging.
|
||||
#define SEARCH_MV_FULL_RADIUS 0
|
||||
|
@ -724,13 +724,15 @@ static int search_cu_intra(encoder_control *encoder,
|
|||
* coding (bitcost * lambda) and cost for coding coefficients (estimated
|
||||
* here as (coefficient_sum * 1.5) * lambda)
|
||||
*/
|
||||
static int lcu_get_final_cost(const int x_px, const int y_px,
|
||||
static int lcu_get_final_cost(encoder_control *encoder,
|
||||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu)
|
||||
{
|
||||
cu_info *cur_cu;
|
||||
int x_local = (x_px&0x3f), y_local = (y_px&0x3f);
|
||||
int cost = 0;
|
||||
int coeff_cost = 0;
|
||||
//int coeff_cost_temp = 0;
|
||||
int width = LCU_WIDTH>>depth;
|
||||
int x,y;
|
||||
cur_cu = &lcu->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
|
||||
|
@ -740,7 +742,8 @@ static int lcu_get_final_cost(const int x_px, const int y_px,
|
|||
for (x = x_local; x < x_local+width; ++x) {
|
||||
int diff = (int)lcu->rec.y[y * LCU_WIDTH + x] - (int)lcu->ref.y[y * LCU_WIDTH + x];
|
||||
cost += diff*diff;
|
||||
coeff_cost += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]);
|
||||
// TODO: add an option to use estimated RD-calculation
|
||||
//coeff_cost_temp += abs((int)lcu->coeff.y[y * LCU_WIDTH + x]);
|
||||
}
|
||||
}
|
||||
// Chroma SSD + sum of coeffs
|
||||
|
@ -750,17 +753,56 @@ static int lcu_get_final_cost(const int x_px, const int y_px,
|
|||
cost += diff*diff;
|
||||
diff = (int)lcu->rec.v[y * (LCU_WIDTH>>1) + x] - (int)lcu->ref.v[y * (LCU_WIDTH>>1) + x];
|
||||
cost += diff*diff;
|
||||
|
||||
coeff_cost += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]);
|
||||
coeff_cost += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]);
|
||||
// TODO: add an option to use estimated RD-calculation
|
||||
//coeff_cost_temp += abs((int)lcu->coeff.u[y * (LCU_WIDTH>>1) + x]);
|
||||
//coeff_cost_temp += abs((int)lcu->coeff.v[y * (LCU_WIDTH>>1) + x]);
|
||||
}
|
||||
}
|
||||
|
||||
// Bitcost
|
||||
cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(g_cur_lambda_cost+0.5);
|
||||
|
||||
// Coefficient costs (TODO: more tuning of the cost)
|
||||
cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5);
|
||||
// Coefficient costs
|
||||
// TODO: add an option to use estimated RD-calculation
|
||||
//cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(g_cur_lambda_cost+0.5);
|
||||
|
||||
// Calculate actual bit costs for coding the coeffs
|
||||
// RDO
|
||||
{
|
||||
coefficient coeff_temp[32*32];
|
||||
coefficient coeff_temp_u[16*16];
|
||||
coefficient coeff_temp_v[16*16];
|
||||
int i;
|
||||
int blocks = (width == 64)?4:1;
|
||||
|
||||
for(i = 0; i < blocks; i++) {
|
||||
// For 64x64 blocks we need to do transform split to 32x32
|
||||
int blk_y = i&2 ? 32:0 + y_local;
|
||||
int blk_x = i&1 ? 32:0 + x_local;
|
||||
int blockwidth = (width == 64)?32:width;
|
||||
|
||||
// Calculate luma coeff bit count
|
||||
picture_blit_coeffs(&lcu->coeff.y[(blk_y*LCU_WIDTH)+blk_x],coeff_temp,blockwidth,blockwidth,LCU_WIDTH,blockwidth);
|
||||
coeff_cost += get_coeff_cost(encoder, coeff_temp, blockwidth, 0);
|
||||
|
||||
// 2x2 block cannot be coded..
|
||||
if(blockwidth != 4) {
|
||||
blk_y >>= 1;
|
||||
blk_x >>= 1;
|
||||
blockwidth >>= 1;
|
||||
|
||||
// Calculate chroma coeff bit count
|
||||
picture_blit_coeffs(&lcu->coeff.u[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_u,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
|
||||
picture_blit_coeffs(&lcu->coeff.v[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_v,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
|
||||
|
||||
coeff_cost += get_coeff_cost(encoder, coeff_temp_u, blockwidth, 2);
|
||||
coeff_cost += get_coeff_cost(encoder, coeff_temp_v, blockwidth, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Multiply bit count with lambda to get RD-cost
|
||||
cost += coeff_cost * (int32_t)(g_cur_lambda_cost+0.5);
|
||||
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -842,7 +884,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
|
|||
}
|
||||
}
|
||||
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
|
||||
cost = lcu_get_final_cost(x, y, depth, &work_tree[depth]);
|
||||
cost = lcu_get_final_cost(encoder, x, y, depth, &work_tree[depth]);
|
||||
}
|
||||
|
||||
// Recursively split all the way to max search depth.
|
||||
|
|
Loading…
Reference in a new issue