diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index aa083f5b..76f0cc7e 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -308,11 +308,11 @@ static void encode_transform_coeff(encoder_state_t * const state, } } -static void encode_inter_prediction_unit(encoder_state_t * const state, +void kvz_encode_inter_prediction_unit(encoder_state_t * const state, cabac_data_t * const cabac, const cu_info_t * const cur_cu, int x, int y, int width, int height, - int depth) + int depth, lcu_t* lcu) { // Mergeflag int16_t num_cand = 0; @@ -385,10 +385,20 @@ static void encode_inter_prediction_unit(encoder_state_t * const state, if (state->frame->ref_list != REF_PIC_LIST_1 || cur_cu->inter.mv_dir != 3) { int16_t mv_cand[2][2]; - kvz_inter_get_mv_cand_cua( + if (lcu) { + kvz_inter_get_mv_cand( + state, + x, y, width, height, + mv_cand, cur_cu, + lcu, ref_list_idx); + } + else { + kvz_inter_get_mv_cand_cua( state, x, y, width, height, - mv_cand, cur_cu, ref_list_idx); + mv_cand, cur_cu, ref_list_idx + ); + } uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx); const int32_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0]; @@ -855,7 +865,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state, const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i); const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y); - encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth); + kvz_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, NULL); } { diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 4832eeb1..a3f95b36 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -51,6 +51,12 @@ void kvz_encode_mvd(encoder_state_t * const state, int32_t mvd_hor, int32_t mvd_ver); +void kvz_encode_inter_prediction_unit(encoder_state_t* const state, + cabac_data_t* const cabac, + const cu_info_t* const cur_cu, + int x, int y, int width, int height, + int depth, lcu_t* lcu); + void kvz_encode_last_significant_xy(cabac_data_t * const cabac, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, diff --git a/src/fast_coeff_cost.c b/src/fast_coeff_cost.c index d769791d..1abb5114 100644 --- a/src/fast_coeff_cost.c +++ b/src/fast_coeff_cost.c @@ -40,7 +40,7 @@ static uint16_t to_q88(float f) return (uint16_t)(f * 256.0f + 0.5f); } -static uint64_t to_4xq88(const float f[4]) +static uint64_t to_4xq88(const double f[4]) { int i; uint64_t result = 0; @@ -58,9 +58,9 @@ int kvz_fast_coeff_table_parse(fast_coeff_table_t *fast_coeff_table, FILE *fast_ uint64_t *wts_by_qp = fast_coeff_table->wts_by_qp; for (i = 0; i < MAX_FAST_COEFF_COST_QP; i++) { - float curr_wts[4]; + double curr_wts[4]; - if (fscanf(fast_coeff_table_f, "%f %f %f %f\n", curr_wts + 0, + if (fscanf(fast_coeff_table_f, "%lf %lf %lf %lf\n", curr_wts + 0, curr_wts + 1, curr_wts + 2, curr_wts + 3) != 4) { diff --git a/src/fast_coeff_cost.h b/src/fast_coeff_cost.h index 5ae6dc25..dee647f7 100644 --- a/src/fast_coeff_cost.h +++ b/src/fast_coeff_cost.h @@ -45,7 +45,7 @@ typedef struct { // Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from // 0 to MAX_FAST_COEFF_COST_QP -static const float default_fast_coeff_cost_wts[][4] = { +static const double default_fast_coeff_cost_wts[][4] = { // Just extend it by stretching the first actual values.. {0.164240, 4.161530, 3.509033, 6.928047}, {0.164240, 4.161530, 3.509033, 6.928047}, diff --git a/src/global.h b/src/global.h index 2ad0830b..9a2ee989 100644 --- a/src/global.h +++ b/src/global.h @@ -110,7 +110,7 @@ typedef int16_t coeff_t; -// #define VERBOSE 1 +#define VERBOSE 1 /* CONFIG VARIABLES */ diff --git a/src/inter.c b/src/inter.c index 02ea1a95..d6b83090 100644 --- a/src/inter.c +++ b/src/inter.c @@ -1228,7 +1228,7 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state, int32_t width, int32_t height, const merge_candidates_t *merge_cand, - const cu_info_t *cur_cu, + const cu_info_t * const cur_cu, int8_t reflist, int16_t mv_cand[2][2]) { @@ -1335,7 +1335,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state, int32_t width, int32_t height, int16_t mv_cand[2][2], - cu_info_t* cur_cu, + const cu_info_t * const cur_cu, lcu_t *lcu, int8_t reflist) { diff --git a/src/inter.h b/src/inter.h index 1a46e98a..7b5c4ea7 100644 --- a/src/inter.h +++ b/src/inter.h @@ -88,7 +88,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state, int32_t width, int32_t height, int16_t mv_cand[2][2], - cu_info_t* cur_cu, + const cu_info_t* cur_cu, lcu_t *lcu, int8_t reflist); diff --git a/src/search.c b/src/search.c index cd4c67b7..553c4380 100644 --- a/src/search.c +++ b/src/search.c @@ -318,7 +318,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, // However, if we have different tr_depth, the bits cannot be written in correct // order anyways so do not touch the chroma cbf here. if (state->encoder_control->chroma_format != KVZ_CSP_400) { - const cabac_ctx_t* cr_ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]); + cabac_ctx_t* cr_ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]); cabac->cur_ctx = cr_ctx; int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V); @@ -536,7 +536,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, int cu_width = LCU_WIDTH >> depth; double cost = MAX_DOUBLE; double inter_zero_coeff_cost = MAX_DOUBLE; - uint32_t inter_bitcost = MAX_INT; + double inter_bitcost = MAX_INT; cu_info_t *cur_cu; cabac_data_t pre_search_cabac; memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac)); @@ -600,7 +600,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, if (can_use_inter) { double mode_cost; - uint32_t mode_bitcost; + double mode_bitcost; kvz_search_cu_inter(state, x, y, depth, @@ -760,7 +760,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, } else if(depth == MAX_DEPTH && cur_cu->type == CU_INTRA) { // Add cost of intra part_size. - const cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]); + cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]); bits += CTX_ENTROPY_FBITS(ctx, 1); // NxN state->search_cabac.cur_ctx = ctx; FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split"); @@ -835,7 +835,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, if (depth < MAX_DEPTH) { // Add cost of cu_split_flag. uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth); - const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]); + cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]); split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda; state->search_cabac.cur_ctx = ctx; FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split"); @@ -844,7 +844,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) { // Add cost of intra part_size. - const cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]); + cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]); split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN state->search_cabac.cur_ctx = ctx; FILE_BITS(CTX_ENTROPY_FBITS(ctx, 0), x, y, depth, "split"); @@ -893,7 +893,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, } else if (depth == MAX_DEPTH && cur_cu->type == CU_INTRA) { // Add cost of intra part_size. - const cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]); + cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]); bits += CTX_ENTROPY_FBITS(ctx, 1); // NxN state->search_cabac.cur_ctx = ctx; FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split"); diff --git a/src/search.h b/src/search.h index 2ca47c22..b11a0ad5 100644 --- a/src/search.h +++ b/src/search.h @@ -59,7 +59,7 @@ typedef struct unit_stats_map_t { cu_info_t unit[MAX_UNIT_STATS_MAP_SIZE]; //!< list of searched units double cost[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching RD costs - uint32_t bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs + double bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs int8_t keys[MAX_UNIT_STATS_MAP_SIZE]; //!< list of keys (indices) to elements in the other arrays int size; //!< number of active elements in the lists } unit_stats_map_t; diff --git a/src/search_inter.c b/src/search_inter.c index 57e163f4..983ffcc8 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -199,15 +199,15 @@ static INLINE bool intmv_within_tile(const inter_search_info_t *info, int x, int * \return true if best_mv was changed, false otherwise */ static bool check_mv_cost(inter_search_info_t *info, - int x, - int y, - double *best_cost, - uint32_t *best_bits, - vector2d_t *best_mv) + int x, + int y, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) { if (!intmv_within_tile(info, x, y)) return false; - uint32_t bitcost = 0; + double bitcost = 0; double cost = kvz_image_calc_sad( info->pic, info->ref, @@ -292,10 +292,10 @@ static bool mv_in_merge(const inter_search_info_t *info, vector2d_t mv) * best_mv to the best one. */ static void select_starting_point(inter_search_info_t *info, - vector2d_t extra_mv, - double *best_cost, - uint32_t *best_bits, - vector2d_t *best_mv) + vector2d_t extra_mv, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) { // Check the 0-vector, so we can ignore all 0-vectors in the merge cand list. check_mv_cost(info, 0, 0, best_cost, best_bits, best_mv); @@ -394,9 +394,9 @@ static double calc_mvd_cost(const encoder_state_t *state, inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, - uint32_t *bitcost) + double* bitcost) { - uint32_t temp_bitcost = 0; + double temp_bitcost = 0; uint32_t merge_idx; int8_t merged = 0; @@ -429,9 +429,9 @@ static double calc_mvd_cost(const encoder_state_t *state, static bool early_terminate(inter_search_info_t *info, - double *best_cost, - uint32_t *best_bits, - vector2d_t *best_mv) + double *best_cost, + double* best_bits, + vector2d_t *best_mv) { static const vector2d_t small_hexbs[7] = { { 0, -1 }, { -1, 0 }, { 0, 1 }, { 1, 0 }, @@ -485,7 +485,7 @@ void kvz_tz_pattern_search(inter_search_info_t *info, vector2d_t mv, int *best_dist, double *best_cost, - uint32_t *best_bits, + double* best_bits, vector2d_t *best_mv) { assert(pattern_type < 4); @@ -603,7 +603,7 @@ void kvz_tz_raster_search(inter_search_info_t *info, int iSearchRange, int iRaster, double *best_cost, - uint32_t *best_bits, + double* best_bits, vector2d_t *best_mv) { const vector2d_t mv = { best_mv->x >> 2, best_mv->y >> 2 }; @@ -618,10 +618,10 @@ void kvz_tz_raster_search(inter_search_info_t *info, static void tz_search(inter_search_info_t *info, - vector2d_t extra_mv, - double *best_cost, - uint32_t *best_bits, - vector2d_t *best_mv) + vector2d_t extra_mv, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) { //TZ parameters const int iSearchRange = 96; // search range for each stage @@ -705,11 +705,11 @@ static void tz_search(inter_search_info_t *info, * points like 0,0 might be used, such as vectors from top or left. */ static void hexagon_search(inter_search_info_t *info, - vector2d_t extra_mv, - uint32_t steps, - double *best_cost, - uint32_t *best_bits, - vector2d_t *best_mv) + vector2d_t extra_mv, + uint32_t steps, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) { // The start of the hexagonal pattern has been repeated at the end so that // the indices between 1-6 can be used as the start of a 3-point list of new @@ -803,11 +803,11 @@ static void hexagon_search(inter_search_info_t *info, * points like 0,0 might be used, such as vectors from top or left. **/ static void diamond_search(inter_search_info_t *info, - vector2d_t extra_mv, - uint32_t steps, - double *best_cost, - uint32_t *best_bits, - vector2d_t *best_mv) + vector2d_t extra_mv, + uint32_t steps, + double *best_cost, + double* best_bits, + vector2d_t *best_mv) { enum diapos { DIA_UP = 0, @@ -888,7 +888,7 @@ static void search_mv_full(inter_search_info_t *info, int32_t search_range, vector2d_t extra_mv, double *best_cost, - uint32_t *best_bits, + double* best_bits, vector2d_t *best_mv) { // Search around the 0-vector. @@ -968,7 +968,7 @@ static void search_mv_full(inter_search_info_t *info, */ static void search_frac(inter_search_info_t *info, double *best_cost, - uint32_t *best_bits, + double *best_bits, vector2d_t *best_mv) { // Map indexes to relative coordinates in the following way: @@ -985,8 +985,8 @@ static void search_frac(inter_search_info_t *info, vector2d_t mv = { best_mv->x >> 2, best_mv->y >> 2 }; double cost = MAX_DOUBLE; - uint32_t bitcost = 0; - uint32_t bitcosts[4] = { 0 }; + double bitcost = 0; + double bitcosts[4] = { 0 }; unsigned best_index = 0; // Keep this as unsigned until SAD / SATD functions are updated @@ -1314,7 +1314,7 @@ static void search_pu_inter_ref(inter_search_info_t *info, } double best_cost = MAX_DOUBLE; - uint32_t best_bits = MAX_INT; + double best_bits = MAX_INT; // Select starting point from among merge candidates. These should // include both mv_cand vectors and (0, 0). @@ -1338,12 +1338,12 @@ static void search_pu_inter_ref(inter_search_info_t *info, case KVZ_IME_DIA: diamond_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps, - &best_cost, &best_bits, &best_mv); + &best_cost, &best_bits, &best_mv); break; default: hexagon_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps, - &best_cost, &best_bits, &best_mv); + &best_cost, &best_bits, &best_mv); break; } } @@ -1484,7 +1484,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info, double cost = kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, frame->source->width); - uint32_t bitcost[2] = { 0, 0 }; + double bitcost[2] = { 0, 0 }; cost += info->mvd_cost_func(info->state, merge_cand[i].mv[0][0], @@ -1827,7 +1827,7 @@ static void search_pu_inter(encoder_state_t * const state, list); double frac_cost = MAX_DOUBLE; - uint32_t frac_bits = MAX_INT; + double frac_bits = MAX_INT; vector2d_t frac_mv = { unipred_pu->inter.mv[list][0], unipred_pu->inter.mv[list][1] }; search_frac(info, &frac_cost, &frac_bits, &frac_mv); @@ -1917,7 +1917,7 @@ static void search_pu_inter(encoder_state_t * const state, best_bipred_cost = kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH); - uint32_t bitcost[2] = { 0, 0 }; + double bitcost[2] = { 0, 0 }; best_bipred_cost += info->mvd_cost_func(info->state, bipred_pu->inter.mv[0][0], @@ -1990,10 +1990,10 @@ static void search_pu_inter(encoder_state_t * const state, * \param inter_bitcost Return inter bitcost */ void kvz_cu_cost_inter_rd2(encoder_state_t * const state, - int x, int y, int depth, - lcu_t *lcu, - double *inter_cost, - uint32_t *inter_bitcost){ + int x, int y, int depth, + lcu_t *lcu, + double *inter_cost, + double* inter_bitcost){ cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y)); int tr_depth = MAX(1, depth); @@ -2040,7 +2040,7 @@ void kvz_search_cu_inter(encoder_state_t * const state, int x, int y, int depth, lcu_t *lcu, double *inter_cost, - uint32_t *inter_bitcost) + double* inter_bitcost) { *inter_cost = MAX_DOUBLE; *inter_bitcost = MAX_INT; @@ -2108,10 +2108,10 @@ void kvz_search_cu_inter(encoder_state_t * const state, // Calculate more accurate cost when needed if (state->encoder_control->cfg.rdo >= 2) { kvz_cu_cost_inter_rd2(state, - x, y, depth, - lcu, - inter_cost, - inter_bitcost); + x, y, depth, + lcu, + inter_cost, + inter_bitcost); } if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) { @@ -2146,7 +2146,7 @@ void kvz_search_cu_smp(encoder_state_t * const state, part_mode_t part_mode, lcu_t *lcu, double *inter_cost, - uint32_t *inter_bitcost) + double* inter_bitcost) { *inter_cost = MAX_DOUBLE; *inter_bitcost = MAX_INT; @@ -2173,7 +2173,7 @@ void kvz_search_cu_smp(encoder_state_t * const state, const int height_pu = PU_GET_H(part_mode, width, i); double cost = MAX_DOUBLE; - uint32_t bitcost = MAX_INT; + double bitcost = MAX_INT; search_pu_inter(state, x, y, depth, part_mode, i, lcu, amvp, &merge, &info); @@ -2250,10 +2250,10 @@ void kvz_search_cu_smp(encoder_state_t * const state, // Calculate more accurate cost when needed if (state->encoder_control->cfg.rdo >= 2) { kvz_cu_cost_inter_rd2(state, - x, y, depth, - lcu, - inter_cost, - inter_bitcost); + x, y, depth, + lcu, + inter_cost, + inter_bitcost); } else { *inter_cost += state->lambda_sqrt * smp_extra_bits; } diff --git a/src/search_inter.h b/src/search_inter.h index 8b4b16f2..bb9067c5 100644 --- a/src/search_inter.h +++ b/src/search_inter.h @@ -71,13 +71,13 @@ typedef double kvz_mvd_cost_func(const encoder_state_t *state, inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], int16_t num_cand, int32_t ref_idx, - uint32_t *bitcost); + double *bitcost); void kvz_search_cu_inter(encoder_state_t * const state, int x, int y, int depth, lcu_t *lcu, double *inter_cost, - uint32_t *inter_bitcost); + double* inter_bitcost); void kvz_search_cu_smp(encoder_state_t * const state, int x, int y, @@ -85,7 +85,7 @@ void kvz_search_cu_smp(encoder_state_t * const state, part_mode_t part_mode, lcu_t *lcu, double *inter_cost, - uint32_t *inter_bitcost); + double* inter_bitcost); unsigned kvz_inter_satd_cost(const encoder_state_t* state,