#include "alf.h" #include #include #include #include #include "cabac.h" #include "rdo.h" #include "strategies/strategies-sao.h" #include "kvz_math.h" #if MAX_NUM_CC_ALF_FILTERS>1 typedef struct filter_idx_count { uint64_t count; uint8_t filter_idx; } filter_idx_count; int comparator(const void *v1, const void *v2) { const filter_idx_count *p1 = (filter_idx_count *)v1; const filter_idx_count *p2 = (filter_idx_count *)v2; return (p1->count < p2->count); } #endif //-------------------------help functions--------------------------- void set_aps_map(kvz_config *const cfg) { if (g_frame_count == state->frame->num) { return; } g_frame_count = state->frame->num; reset_alf_param(&alf_param); //int layerIdx = cs.vps == nullptr ? 0 : cs.vps->getGeneralLayerIdx(cs.slice->getPic()->layerId); int layer_idx = state->frame->num; if (layer_idx && (false/*cs.slice->getPendingRasInit()*/ || (state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_IDR_N_LP))) { for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { //state->slice->apss[i].aps_id = 0; //state->slice->apss[i].aps_type = 0; state->slice->apss[i].temporal_id = 0; state->slice->apss[i].layer_id = 0; reset_alf_param(&state->slice->apss[i]); state->slice->apss[i].num_luma_filters = 0; state->slice->apss[i].num_alternatives_chroma = 0; } g_aps_id_start = ALF_CTB_MAX_NUM_APS; } enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; chroma_scale_x = (chroma_fmt == KVZ_CSP_444) ? 0 : 1; chroma_scale_y = (chroma_fmt != KVZ_CSP_420) ? 0 : 1; //Default clp_rng for a slice g_clp_rngs.comp[COMPONENT_Y].min = g_clp_rngs.comp[COMPONENT_Cb].min = g_clp_rngs.comp[COMPONENT_Cr].min = 0; g_clp_rngs.comp[COMPONENT_Y].max = (1 << kvz_bit_depth) - 1; g_clp_rngs.comp[COMPONENT_Y].bd = kvz_bit_depth; g_clp_rngs.comp[COMPONENT_Y].n = 0; g_clp_rngs.comp[COMPONENT_Cb].max = g_clp_rngs.comp[COMPONENT_Cr].max = (1 << kvz_bit_depth) - 1; g_clp_rngs.comp[COMPONENT_Cb].bd = g_clp_rngs.comp[COMPONENT_Cr].bd = kvz_bit_depth; g_clp_rngs.comp[COMPONENT_Cb].n = g_clp_rngs.comp[COMPONENT_Cr].n = 0; g_clp_rngs.used = g_clp_rngs.chroma = false; //int shiftLuma = 2 * 0;// DISTORTION_PRECISION_ADJUSTMENT(g_input_bit_depth[CHANNEL_TYPE_LUMA]); //int shiftChroma = 2 * 0;// DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_CHROMA]); g_lambda[COMPONENT_Y] = state->frame->lambda;// *double(1 << shiftLuma); g_lambda[COMPONENT_Cb] = state->frame->lambda;// *double(1 << shiftChroma); g_lambda[COMPONENT_Cr] = state->frame->lambda;// *double(1 << shiftChroma); //g_alf_covariance_cc_alf[0] = 0; //g_alf_covariance_cc_alf[1] = 0; //g_alf_covariance_frame_cc_alf[0] = 0; //g_alf_covariance_frame_cc_alf[1] = 0; } #endif // !FULL_FRAME void init_ctu_alternative_chroma(uint8_t* ctu_alts[MAX_NUM_COMPONENT], const int32_t num_ctus) { uint8_t alt_idx = 0; for (int ctu_idx = 0; ctu_idx < num_ctus; ++ctu_idx) { ctu_alts[COMPONENT_Cb][ctu_idx] = alt_idx; if ((ctu_idx + 1) * g_alf_aps_temp.num_alternatives_chroma >= (alt_idx + 1) * num_ctus) ++alt_idx; } alt_idx = 0; for (int ctu_idx = 0; ctu_idx < num_ctus; ++ctu_idx) { ctu_alts[COMPONENT_Cr][ctu_idx] = alt_idx; if ((ctu_idx + 1) * g_alf_aps_temp.num_alternatives_chroma >= (alt_idx + 1) * num_ctus) ++alt_idx; } } int16_t clip_alf(const int16_t clip, const int16_t ref, const int16_t val0, const int16_t val1) { return alf_clip3(-clip, +clip, val0 - ref) + alf_clip3(-clip, +clip, val1 - ref); } int alf_clip_pixel(const int a, const clp_rng clp_rng) { return MIN(MAX(clp_rng.min, a), clp_rng.max); } int16_t alf_clip3(const int16_t minVal, const int16_t maxVal, const int16_t a) { return MIN(MAX(minVal, a), maxVal); } void get_clip_max(const alf_covariance *cov, int *clip_max) { const int num_coeff = cov->num_coeff; for (int k = 0; k < num_coeff - 1; ++k) { clip_max[k] = 0; bool inc = true; while (inc && clip_max[k] + 1 < cov->num_bins && cov->y[clip_max[k] + 1][k] == cov->y[clip_max[k]][k]) { for (int l = 0; inc && l < num_coeff; ++l) { if (cov->ee[clip_max[k]][0][k][l] != cov->ee[clip_max[k] + 1][0][k][l]) { inc = false; } } if (inc) { ++clip_max[k]; } } } clip_max[num_coeff - 1] = 0; } void reduce_clip_cost(const alf_covariance *cov, int *clip) { for (int k = 0; k < cov->num_coeff - 1; ++k) { bool dec = true; while (dec && clip[k] > 0 && cov->y[clip[k] - 1][k] == cov->y[clip[k]][k]) { for (int l = 0; dec && l < cov->num_coeff; ++l) { if (cov->ee[clip[k]][clip[l]][k][l] != cov->ee[clip[k] - 1][clip[l]][k][l]) { dec = false; } } if (dec) { --clip[k]; } } } } void set_ey_from_clip(const alf_covariance *cov,const int* clip, double ee[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF], double y[MAX_NUM_ALF_LUMA_COEFF], int size) { for (int k = 0; ky[clip[k]][k]; for (int l = 0; lee[clip[k]][clip[l]][k][l]; } } } double optimize_filter(const alf_covariance *cov, int* clip, double *f, bool optimize_clip) { const int size = cov->num_coeff; int clip_max[MAX_NUM_ALF_LUMA_COEFF]; double err_best, err_last; double ke[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF]; double ky[MAX_NUM_ALF_LUMA_COEFF]; if (optimize_clip) { // Start by looking for min clipping that has no impact => max_clipping get_clip_max(cov, clip_max); for (int k = 0; knum_bins - 1); } } set_ey_from_clip(cov, clip, ke, ky, size); gns_solve_by_chol(ke, ky, f, size); err_best = calculate_error(cov, clip, f); int step = optimize_clip ? (cov->num_bins + 1) / 2 : 0; while (step > 0) { double err_min = err_best; int idx_min = -1; int inc_min = 0; for (int k = 0; k < size - 1; ++k) { if (clip[k] - step >= clip_max[k]) { clip[k] -= step; ky[k] = cov->y[clip[k]][k]; for (int l = 0; l < size; l++) { ke[k][l] = cov->ee[clip[k]][clip[l]][k][l]; ke[l][k] = cov->ee[clip[l]][clip[k]][l][k]; } gns_solve_by_chol(ke, ky, f, size); err_last = calculate_error(cov, clip, f); if (err_last < err_min) { err_min = err_last; idx_min = k; inc_min = -step; } clip[k] += step; } if (clip[k] + step < cov->num_bins) { clip[k] += step; ky[k] = cov->y[clip[k]][k]; for (int l = 0; l < size; l++) { ke[k][l] = cov->ee[clip[k]][clip[l]][k][l]; ke[l][k] = cov->ee[clip[l]][clip[k]][l][k]; } gns_solve_by_chol(ke, ky, f, size); err_last = calculate_error(cov, clip, f); if (err_last < err_min) { err_min = err_last; idx_min = k; inc_min = step; } clip[k] -= step; } ky[k] = cov->y[clip[k]][k]; for (int l = 0; l < size; l++) { ke[k][l] = cov->ee[clip[k]][clip[l]][k][l]; ke[l][k] = cov->ee[clip[l]][clip[k]][l][k]; } } if (idx_min >= 0) { err_best = err_min; clip[idx_min] += inc_min; ky[idx_min] = cov->y[clip[idx_min]][idx_min]; for (int l = 0; l < size; l++) { ke[idx_min][l] = cov->ee[clip[idx_min]][clip[l]][idx_min][l]; ke[l][idx_min] = cov->ee[clip[l]][clip[idx_min]][l][idx_min]; } } else { --step; } } if (optimize_clip) { // test all max for (int k = 0; k < size - 1; ++k) { clip_max[k] = 0; } double ke_max[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF]; double ky_max[MAX_NUM_ALF_LUMA_COEFF]; set_ey_from_clip(cov, clip_max, ke_max, ky_max, size); gns_solve_by_chol(ke_max, ky_max, f, size); err_last = calculate_error(cov, clip_max, f); if (err_last < err_best) { err_best = err_last; for (int k = 0; k= 0; i--) { double sum = 0; for (int j = i + 1; j <= size; j++) { sum += r[i][j] * a[j]; } a[i] = (z[i] - sum) / r[i][i]; } } void gns_transpose_backsubstitution(double u[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF], double* rhs, double* x, int order) { /* Backsubstitution starts */ x[0] = rhs[0] / u[0][0]; /* First row of U' */ for (int i = 1; i < order; i++) { /* For the rows 1..order-1 */ double sum = 0; //Holds backsubstitution from already handled rows for (int j = 0; j < i; j++) /* Backsubst already solved unknowns */ { sum += x[j] * u[j][i]; } x[i] = (rhs[i] - sum) / u[i][i]; /* i'th component of solution vect. */ } } int gns_cholesky_dec(double inp_matr[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF], double out_matr[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF], int num_eq) { double inv_diag[MAX_NUM_ALF_LUMA_COEFF]; /* Vector of the inverse of diagonal entries of outMatr */ for (int i = 0; i < num_eq; i++) { for (int j = i; j < num_eq; j++) { /* Compute the scaling factor */ double scale = inp_matr[i][j]; if (i > 0) { for (int k = i - 1; k >= 0; k--) { scale -= out_matr[k][j] * out_matr[k][i]; } } /* Compute i'th row of outMatr */ if (i == j) { if (scale <= REG_SQR) // if(scale <= 0 ) /* If inpMatr is singular */ { return 0; } else /* Normal operation */ { inv_diag[i] = 1.0 / (out_matr[i][i] = sqrt(scale)); } } else { out_matr[i][j] = scale * inv_diag[i]; /* Upper triangular part */ out_matr[j][i] = 0.0; /* Lower triangular part set to 0 */ } } } return 1; /* Signal that Cholesky factorization is successfully performed */ } int gns_solve_by_chol(double lhs[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF], double rhs[MAX_NUM_ALF_LUMA_COEFF], double *x, int num_eq) { double aux[MAX_NUM_ALF_LUMA_COEFF]; /* Auxiliary vector */ double u[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF]; /* Upper triangular Cholesky factor of lhs */ int res = 1; // Signal that Cholesky factorization is successfully performed /* The equation to be solved is LHSx = rhs */ /* Compute upper triangular U such that U'*U = lhs */ if (gns_cholesky_dec(lhs, u, num_eq)) /* If Cholesky decomposition has been successful */ { /* Now, the equation is U'*U*x = rhs, where U is upper triangular * Solve U'*aux = rhs for aux */ gns_transpose_backsubstitution(u, rhs, aux, num_eq); /* The equation is now U*x = aux, solve it for x (new motion coefficients) */ gns_backsubstitution(u, aux, num_eq, x); } else /* lhs was singular */ { res = 0; /* Regularize lhs */ for (int i = 0; i < num_eq; i++) { lhs[i][i] += REG; } /* Compute upper triangular U such that U'*U = regularized lhs */ res = gns_cholesky_dec(lhs, u, num_eq); if (!res) { memset(x, 0, sizeof(double)*num_eq); return 0; } /* Solve U'*aux = rhs for aux */ gns_transpose_backsubstitution(u, rhs, aux, num_eq); /* Solve U*x = aux for x */ gns_backsubstitution(u, aux, num_eq, x); } return res; } int gns_solve_by_chol_clip_gns(alf_covariance *cov, const int *clip, double *x, int num_eq) { double lhs[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF]; double rhs[MAX_NUM_ALF_LUMA_COEFF]; set_ey_from_clip(cov, clip, lhs, rhs, num_eq); return gns_solve_by_chol(lhs, rhs, x, num_eq); } double calc_error_for_coeffs(const alf_covariance *cov, const int *clip, const int *coeff, const int num_coeff, const int bit_depth) { double factor = 1 << (bit_depth - 1); double error = 0; for (int i = 0; i < num_coeff; i++) //diagonal { double sum = 0; for (int j = i + 1; j < num_coeff; j++) { sum += cov->ee[clip[i]][clip[j]][i][j] * coeff[j]; } error += ((cov->ee[clip[i]][clip[i]][i][i] * coeff[i] + sum * 2) / factor - 2 * cov->y[clip[i]][i]) * coeff[i]; } return error / factor; } double calc_error_for_cc_alf_coeffs(const alf_covariance *cov, const int16_t* coeff, const int num_coeff, const int bit_depth) { double factor = 1 << (bit_depth - 1); double error = 0; for (int i = 0; i < num_coeff; i++) // diagonal { double sum = 0; for (int j = i + 1; j < num_coeff; j++) { // E[j][i] = E[i][j], sum will be multiplied by 2 later sum += cov->ee[0][0][i][j] * coeff[j]; } error += ((cov->ee[0][0][i][i] * coeff[i] + sum * 2) / factor - 2 *cov->y[0][i]) * coeff[i]; } return error / factor; } int length_uvlc(int ui_code) { int ui_length = 1; int ui_temp = ++ui_code; assert(ui_temp); // "Integer overflow" while (1 != ui_temp) { ui_temp >>= 1; ui_length += 2; } // Take care of cases where ui_length > 32 return (ui_length >> 1) + ((ui_length + 1) >> 1); } double get_dist_coeff_force_0(bool* coded_var_bins, double error_force_0_coeff_tab[MAX_NUM_ALF_CLASSES][2], int* bits_var_bin, int zero_bits_var_bin, const int num_filters, double lambda) { double dist_force_0 = 0; memset(coded_var_bins, 0, sizeof(*coded_var_bins) * MAX_NUM_ALF_CLASSES); for (int filt_idx = 0; filt_idx < num_filters; filt_idx++) { double cost_diff = error_force_0_coeff_tab[filt_idx][0] - (error_force_0_coeff_tab[filt_idx][1] + g_lambda[COMPONENT_Y] * bits_var_bin[filt_idx]); coded_var_bins[filt_idx] = cost_diff > 0 ? true : false; dist_force_0 += error_force_0_coeff_tab[filt_idx][coded_var_bins[filt_idx] ? 1 : 0]; } return dist_force_0; } double get_dist_force_0(channel_type channel, const int num_filters, double error_tab_force_0_coeff[MAX_NUM_ALF_CLASSES][2], bool* coded_var_bins, double lambda) { int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; int bits_var_bin[MAX_NUM_ALF_CLASSES]; for (int ind = 0; ind < num_filters; ++ind) { bits_var_bin[ind] = 0; for (int i = 0; i < num_coeff - 1; i++) { bits_var_bin[ind] += length_uvlc(abs(g_filter_coeff_set[ind][i])); if (abs(g_filter_coeff_set[ind][i]) != 0) bits_var_bin[ind] += 1; } } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA]) /*#else if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA]) #endif*/ { for (int ind = 0; ind < num_filters; ++ind) { for (int i = 0; i < num_coeff - 1; i++) { if (!abs(g_filter_coeff_set[ind][i])) { g_filter_clipp_set[ind][i] = 0; } } } } double dist_force_0 = get_dist_coeff_force_0(coded_var_bins, error_tab_force_0_coeff, bits_var_bin, num_filters); return dist_force_0; } int get_cost_filter_coeff_force_0(channel_type channel, int **p_diff_q_filter_coeff_int_pp, const int num_filters, bool* coded_var_bins) { const int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; int len = num_filters; //filter_coefficient_flag[i] // Filter coefficients for (int ind = 0; ind < num_filters; ++ind) { if (coded_var_bins[ind]) { for (int i = 0; i < num_coeff - 1; i++) { len += length_uvlc(abs(p_diff_q_filter_coeff_int_pp[ind][i])); // alf_coeff_luma_delta[i][j] if ((abs(p_diff_q_filter_coeff_int_pp[ind][i]) != 0)) len += 1; } } } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA]) /*#else if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA]) #endif*/ { for (int ind = 0; ind < num_filters; ++ind) { for (int i = 0; i < num_coeff - 1; i++) { if (!abs(p_diff_q_filter_coeff_int_pp[ind][i])) { g_filter_clipp_set[ind][i] = 0; } len += 2; } } } return len; } int get_cost_filter_coeff(channel_type channel, int **p_diff_q_filter_coeff_int_pp, const int num_filters) { // #if JVET_O0216_ALF_COEFF_EG3 return length_filter_coeffs(channel, num_filters, p_diff_q_filter_coeff_int_pp); // alf_coeff_luma_delta[i][j]; /* #else const int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; const int max_golomb_idx = channel == CHANNEL_TYPE_LUMA ? 3 : 2; const int *golomb_idx = channel == CHANNEL_TYPE_LUMA ? alf_golomb_idx_7 : alf_golomb_idx_5; memset(g_bits_coeff_scan, 0, sizeof(g_bits_coeff_scan)); for (int ind = 0; ind < num_filters; ++ind) { for (int i = 0; i < num_coeff - 1; i++) { int coeff_val = abs(p_diff_q_filter_coeff_int_pp[ind][i]); for (int k = 1; k < 15; k++) { g_bits_coeff_scan[golomb_idx[i]][k] += length_golomb(coeff_val, k); } } } int k_min = get_golomb_k_min(channel, num_filters, g_k_min_tab, g_bits_coeff_scan); // Coding parameters int len = k_min //min_golomb_order + max_golomb_idx; //golomb_order_increase_flag // Filter coefficients //len += lengthFilterCoeffs( alfShape, num_filters, p_diff_q_filter_coeff_int_pp, m_kMinTab ); // alf_coeff_luma_delta[i][j] for (int ind = 0; ind < num_filters; ++ind) { for (int i = 0; i < num_coeff - 1; i++) { len += length_golomb(abs(p_diff_q_filter_coeff_int_pp[ind][i]), g_k_min_tab[golomb_idx[i]]); } } return len; */ } int get_cost_filter_clipp(channel_type channel, int **p_diff_q_filter_coeff_int_pp, const int num_filters) { int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; for (int filter_idx = 0; filter_idx < num_filters; ++filter_idx) { for (int i = 0; i < num_coeff - 1; i++) { if (!abs(p_diff_q_filter_coeff_int_pp[filter_idx][i])) { g_filter_clipp_set[filter_idx][i] = 0; } } } return (num_filters * (num_coeff - 1)) << 1; } /*#if !JVET_O0491_HLS_CLEANUP int get_tb_length(int ui_symbol, const int ui_max_symbol) { int ui_thresh; if (ui_max_symbol > 256) { int ui_thresh_val = 1 << 8; ui_thresh = 8; while (ui_thresh_val <= ui_max_symbol) { ui_thresh++; ui_thresh_val <<= 1; } ui_thresh--; } else { ui_thresh = kvz_tb_max[ui_max_symbol]; } int ui_val = 1 << ui_thresh; assert(ui_val <= ui_max_symbol); assert((ui_val << 1) > ui_max_symbol); assert(ui_symbol < ui_max_symbol); int b = ui_max_symbol - ui_val; assert(b < ui_val); if (ui_symbol < ui_val - b) { return ui_thresh; } else { return ui_thresh + 1; } }*/ int get_non_filter_coeff_rate(alf_aps *aps) { //short* filter_coeff_delta_idx = aps->filter_coeff_delta_idx; //int fixed_filter_pattern = aps->fixed_filter_pattern; //int fixed_filter_set_index = aps->fixed_filter_set_index; int len = 1 // alf_coefficients_delta_flag /*#if !JVET_O0491_HLS_CLEANUP + length_truncated_unary(0, 3) // chroma_idc = 0, it is signalled when ALF is enabled for luma + get_tb_length(num_luma_filters - 1, MAX_NUM_ALF_CLASSES); //numLumaFilters*/ + 2 // slice_alf_chroma_idc u(2) + length_uvlc(aps->num_luma_filters - 1); // alf_luma_num_filters_signalled_minus1 ue(v) if (aps->num_luma_filters > 1) { const int coeff_length = kvz_math_ceil_log2(aps->num_luma_filters); //#if JVET_O0491_HLS_CLEANUP for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { //len += get_tb_length((int)filter_coeff_delta_idx[i], num_luma_filters); //#if !JVET_O0491_HLS_CLEANUP len += coeff_length; } } /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED len++; //fixed filter set flag if (*fixed_filter_set_index > 0) { len += get_tb_length(*fixed_filter_set_index - 1, ALF_NUM_FIXED_FILTER_SETS); len += 1; //fixed filter flag pattern if (*fixed_filter_pattern > 0) len += MAX_NUM_ALF_CLASSES; //"fixed_filter_flag" for each class }*/ return len; } int length_filter_coeffs(channel_type channel, const int num_filters, int **filter_coeff) { int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; int bit_cnt = 0; for (int ind = 0; ind < num_filters; ++ind) { for (int i = 0; i < num_coeff - 1; i++) { bit_cnt += length_uvlc(abs(filter_coeff[ind][i])); if (abs(filter_coeff[ind][i]) != 0) bit_cnt += 1; } } return bit_cnt; } double calculate_error(const alf_covariance *cov, const int *clip, const double *coeff) { double sum = 0; for (int i = 0; i < cov->num_coeff; i++) { sum += coeff[i] * cov->y[clip[i]][i]; } return cov->pix_acc - sum; } double calculate_error_opt_filt(alf_covariance *cov, const int *clip) { double c[MAX_NUM_ALF_LUMA_COEFF]; return optimize_filter_gns_calc(cov, clip, c, cov->num_coeff); } int get_chroma_coeff_rate(alf_aps* aps, int alt_idx) { int i_bits = 0; //AlfFilterShape alfShape(5); const int num_coeff = 7; // Filter coefficients for (int i = 0; i < num_coeff - 1; i++) { i_bits += length_uvlc(abs(aps->chroma_coeff[alt_idx][i])); // alf_coeff_chroma[alt_idx][i] if ((aps->chroma_coeff[alt_idx][i]) != 0) i_bits += 1; } if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_CHROMA]) { for (int i = 0; i < num_coeff - 1; i++) { if (!abs(aps->chroma_coeff[alt_idx][i])) { aps->chroma_clipp[alt_idx][i] = 0; } } i_bits += ((num_coeff - 1) << 1); } return i_bits; } /*#if !JVET_O0491_HLS_CLEANUP int length_truncated_unary(int symbol, int max_symbol) { if (max_symbol == 0) { return 0; } bool code_last = (max_symbol > symbol); int num_bins = 0; while (symbol--) { num_bins++; } if (code_last) { num_bins++; } return num_bins; }*/ double get_filtered_distortion(alf_covariance* cov, const int num_classes, const int num_filters_minus1, const int num_coeff) { double dist = 0; for (int class_idx = 0; class_idx < num_classes; class_idx++) { dist += calc_error_for_coeffs(&cov[class_idx], g_filter_clipp_set[class_idx], g_filter_coeff_set[class_idx], num_coeff, bit_depth); } return dist; } double get_unfiltered_distortion_cov_channel(alf_covariance* cov, channel_type channel) { double dist = 0; if (channel == CHANNEL_TYPE_LUMA) { dist = get_unfiltered_distortion_cov_classes(cov, MAX_NUM_ALF_CLASSES); } else { /*#if !JVET_O0491_HLS_CLEANUP dist = get_unfiltered_distortion_cov_classes(cov, 1) + length_truncated_unary(0, 3) * g_lambda[COMPONENT_Cb];*/ dist = get_unfiltered_distortion_cov_classes(cov, 1); } return dist; } double get_unfiltered_distortion_cov_classes(alf_covariance* cov, const int num_classes) { double dist = 0; for (int class_idx = 0; class_idx < num_classes; class_idx++) { dist += cov[class_idx].pix_acc; } return dist; } void get_frame_stats(channel_type channel, int i_shape_idx, const int32_t num_ctus) { bool is_luma = channel == CHANNEL_TYPE_LUMA ? true : false; int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB int num_alternatives = is_luma ? 1 : g_alf_aps_temp.num_alternatives_chroma; // When calling this function m_ctuEnableFlag shall be set to 0 for CTUs using alternative APS // Here we compute frame stats for building new alternative filters for (int alt_idx = 0; alt_idx < num_alternatives; ++alt_idx) { for (int i = 0; i < num_classes; i++) { reset_alf_covariance(&g_alf_covariance_frame[channel][i_shape_idx][is_luma ? i : alt_idx], MAX_ALF_NUM_CLIPPING_VALUES); } if (is_luma) { get_frame_stat(g_alf_covariance_frame[CHANNEL_TYPE_LUMA][i_shape_idx], g_alf_covariance[COMPONENT_Y][i_shape_idx], g_ctu_enable_flag[COMPONENT_Y], NULL, num_classes, alt_idx, num_ctus); } else { get_frame_stat(g_alf_covariance_frame[CHANNEL_TYPE_CHROMA][i_shape_idx], g_alf_covariance[COMPONENT_Cb][i_shape_idx], g_ctu_enable_flag[COMPONENT_Cb], g_ctu_alternative[COMPONENT_Cb], num_classes, alt_idx, num_ctus); get_frame_stat(g_alf_covariance_frame[CHANNEL_TYPE_CHROMA][i_shape_idx], g_alf_covariance[COMPONENT_Cr][i_shape_idx], g_ctu_enable_flag[COMPONENT_Cr], g_ctu_alternative[COMPONENT_Cr], num_classes, alt_idx, num_ctus); } } /*#else for (int i = 0; i < num_classes; i++) { reset_alf_covariance(&g_alf_covariance_frame[channel][i_shape_idx][i], g_alf_num_clipping_values[channel]); } if (channel == CHANNEL_TYPE_LUMA) { get_frame_stat(g_alf_covariance_frame[CHANNEL_TYPE_LUMA][i_shape_idx], g_alf_covariance[COMPONENT_Y][i_shape_idx], g_ctu_enable_flag[COMPONENT_Y], num_classes); } else { get_frame_stat(g_alf_covariance_frame[CHANNEL_TYPE_CHROMA][i_shape_idx], g_alf_covariance[COMPONENT_Cb][i_shape_idx], g_ctu_enable_flag[COMPONENT_Cb], num_classes); get_frame_stat(g_alf_covariance_frame[CHANNEL_TYPE_CHROMA][i_shape_idx], g_alf_covariance[COMPONENT_Cr][i_shape_idx], g_ctu_enable_flag[COMPONENT_Cr], num_classes); } #endif*/ } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB void get_frame_stat(alf_covariance* frame_cov, alf_covariance** ctb_cov, uint8_t* ctb_enable_flags, uint8_t* ctb_alt_idx, const int num_classes, int alt_idx, int ctu_idx) /*#else void get_frame_stat(alf_covariance* frame_cov, alf_covariance** ctb_cov, uint8_t* ctb_enable_flags, const int num_classes) #endif*/ { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const channel_type channel = (!ctb_alt_idx ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA); bool is_luma = channel == CHANNEL_TYPE_LUMA ? true : false; for (int ctu_idx = 0; ctu_idx < num_ctus; ctu_idx++) { if (ctb_enable_flags[ctu_idx]) { for (int class_idx = 0; class_idx < num_classes; class_idx++) { if (is_luma || alt_idx == ctb_alt_idx[ctu_idx]) { add_alf_cov(&frame_cov[is_luma ? class_idx : alt_idx], &ctb_cov[ctu_idx][class_idx]); } } } } /*#else for (int i = 0; i < g_num_ctus_in_pic; i++) { if (ctb_enable_flags[i]) { for (int j = 0; j < num_classes; j++) { add_alf_cov(&frame_cov[j], &ctb_cov[i][j]); } } } #endif*/ } void copy_cov(alf_covariance *dst, alf_covariance *src) { dst->num_coeff = src->num_coeff; dst->num_bins = src->num_bins; memcpy(&dst->ee, &src->ee, sizeof(dst->ee)); memcpy(&dst->y, &src->y, sizeof(dst->y)); dst->pix_acc = src->pix_acc; } void copy_alf_param(alf_aps *dst, alf_aps *src) { memcpy(dst->enabled_flag, src->enabled_flag, sizeof(dst->enabled_flag)); memcpy(dst->non_linear_flag, src->non_linear_flag, sizeof(dst->non_linear_flag)); memcpy(dst->luma_coeff, src->luma_coeff, sizeof(dst->luma_coeff)); memcpy(dst->luma_clipp, src->luma_clipp, sizeof(dst->luma_clipp)); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB dst->num_alternatives_chroma = src->num_alternatives_chroma; //#endif memcpy(dst->chroma_coeff, src->chroma_coeff, sizeof(dst->chroma_coeff)); memcpy(dst->chroma_clipp, src->chroma_clipp, sizeof(dst->chroma_clipp)); memcpy(dst->filter_coeff_delta_idx, src->filter_coeff_delta_idx, sizeof(dst->filter_coeff_delta_idx)); memcpy(dst->alf_luma_coeff_flag, src->alf_luma_coeff_flag, sizeof(dst->alf_luma_coeff_flag)); dst->num_luma_filters = src->num_luma_filters; dst->alf_luma_coeff_delta_flag = src->alf_luma_coeff_delta_flag; //#if !JVET_O0669_REMOVE_ALF_COEFF_PRED //dst->alf_luma_coeff_delta_prediction_flag = src->alf_luma_coeff_delta_prediction_flag; //#endif dst->t_layer = src->t_layer; memcpy(dst->new_filter_flag, src->new_filter_flag, sizeof(dst->new_filter_flag)); //#if !JVET_O0669_REMOVE_ALF_COEFF_PRED //dst->fixed_filter_pattern = src->fixed_filter_pattern; //memcpy(dst->fixed_filter_idx, src->fixed_filter_idx, sizeof(dst->fixed_filter_idx)); //dst->fixed_filter_set_index = src->fixed_filter_set_index; //#endif } void copy_alf_param_w_channel(alf_aps* dst, alf_aps* src, channel_type channel) { if (channel == CHANNEL_TYPE_LUMA) { copy_alf_param(dst, src); } else { /*#if !JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB alfParamDst.nonLinearFlag[channel] = alfParamSrc.nonLinearFlag[channel]; #endif*/ dst->enabled_flag[COMPONENT_Cb] = src->enabled_flag[COMPONENT_Cb]; dst->enabled_flag[COMPONENT_Cr] = src->enabled_flag[COMPONENT_Cr]; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB dst->num_alternatives_chroma = src->num_alternatives_chroma; dst->non_linear_flag[CHANNEL_TYPE_CHROMA] = src->non_linear_flag[CHANNEL_TYPE_CHROMA]; //#endif memcpy(dst->chroma_coeff, src->chroma_coeff, sizeof(dst->chroma_coeff)); memcpy(dst->chroma_clipp, src->chroma_clipp, sizeof(dst->chroma_clipp)); } } void reset_alf_param(alf_aps *src) { memset(src->enabled_flag, false, sizeof(src->enabled_flag)); memset(src->non_linear_flag, false, sizeof(src->non_linear_flag)); memset(src->luma_coeff, 0, sizeof(src->luma_coeff)); memset(src->luma_clipp, 0, sizeof(src->luma_clipp)); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB src->num_alternatives_chroma = 1; //#endif memset(src->chroma_coeff, 0, sizeof(src->chroma_coeff)); memset(src->chroma_clipp, 0, sizeof(src->chroma_clipp)); memset(src->filter_coeff_delta_idx, 0, sizeof(src->filter_coeff_delta_idx)); memset(src->alf_luma_coeff_flag, true, sizeof(src->alf_luma_coeff_flag)); src->num_luma_filters = 1; src->alf_luma_coeff_delta_flag = false; /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED alfLumaCoeffDeltaPredictionFlag = false; #endif*/ src->t_layer = 0; memset(src->new_filter_flag, 0, sizeof(src->new_filter_flag)); /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED fixedFilterPattern = 0; std::memset(fixedFilterIdx, 0, sizeof(fixedFilterIdx)); fixedFilterSetIndex = 0; #endif*/ } void add_alf_cov(alf_covariance *dst, alf_covariance *src) { int num_bins = dst->num_bins; int num_coeff = dst->num_coeff; for (int b0 = 0; b0 < num_bins; b0++) { for (int b1 = 0; b1 < num_bins; b1++) { for (int j = 0; j < num_coeff; j++) { for (int i = 0; i < num_coeff; i++) { dst->ee[b0][b1][j][i] += src->ee[b0][b1][j][i]; } } } } for (int b = 0; b < num_bins; b++) { for (int j = 0; j < num_coeff; j++) { dst->y[b][j] += src->y[b][j]; } } dst->pix_acc += src->pix_acc; } void add_alf_cov_lhs_rhs(alf_covariance *dst, alf_covariance *lhs, alf_covariance *rhs) { int num_coeff = lhs->num_coeff; int num_bins = lhs->num_bins; for (int b0 = 0; b0 < num_bins; b0++) { for (int b1 = 0; b1 < num_bins; b1++) { for (int j = 0; j < num_coeff; j++) { for (int i = 0; i < num_coeff; i++) { dst->ee[b0][b1][j][i] = lhs->ee[b0][b1][j][i] + rhs->ee[b0][b1][j][i]; } } } } for (int b = 0; b < num_bins; b++) { for (int j = 0; j < num_coeff; j++) { dst->y[b][j] = lhs->y[b][j] + rhs->y[b][j]; } } dst->pix_acc = lhs->pix_acc + rhs->pix_acc; } void reset_alf_covariance(alf_covariance *alf, int num_bins) { if (num_bins > 0) { alf->num_bins = num_bins; } alf->pix_acc = 0; memset(alf->y, 0, sizeof(alf->y)); memset(alf->ee, 0, sizeof(alf->ee)); } void reset_cc_alf_aps_param(cc_alf_filter_param *cc_alf) { memset(cc_alf->cc_alf_filter_enabled, false, sizeof(cc_alf->cc_alf_filter_enabled)); memset(cc_alf->cc_alf_filter_idx_enabled, false, sizeof(cc_alf->cc_alf_filter_idx_enabled)); memset(cc_alf->cc_alf_coeff, 0, sizeof(cc_alf->cc_alf_coeff)); cc_alf->cc_alf_filter_count[0] = cc_alf->cc_alf_filter_count[1] = MAX_NUM_CC_ALF_FILTERS; cc_alf->number_valid_components = 3; cc_alf->new_cc_alf_filter[0] = cc_alf->new_cc_alf_filter[1] = 0; } void copy_pixels(kvz_pixel *src, int x_src_start, int y_src_start, int src_stride, kvz_pixel *dst, int x_dst_start, int y_dst_start, int dst_stride, int width, int height) { for (int y = 0; y < height; y++) { int src_y = y_src_start + y; int dst_y = y_dst_start + y; for (int x = 0; x < width; x++) { int src_x = x_src_start + x; int dst_x = x_dst_start + x; dst[dst_y*dst_stride + dst_x] = src[src_y*src_stride + src_x]; } } } void adjust_pixels(kvz_pixel *src, int x_start, int x_end, int y_start, int y_end, int stride, int pic_width, int pic_height) { assert(x_start <= x_end); assert(y_start <= y_end); assert(x_end <= pic_width); assert(y_end <= pic_height); //not on any edge if (x_start != 0 && y_start != 0 && x_end != pic_width && y_end != pic_height) { return; } bool top_left = (x_start == 0 && y_start == 0); bool top_right = (x_end == pic_width && y_start == 0); bool bottom_left = (x_start == 0 && y_end == pic_height); bool bottom_right = (x_end == pic_width && y_end == pic_height); //left side if (x_start == 0) { for (int y = y_start; y < y_end; y++) { src[y * stride - 4] = src[y * stride - 3] = src[y * stride - 2] = src[y * stride - 1] = src[y * stride]; } } //right side if (x_end == pic_width) { const int x_px = x_end - 1; for (int y = y_start; y < y_end; y++) { src[y * stride + x_px + 4] = src[y * stride + x_px + 3] = src[y * stride + x_px + 2] = src[y * stride + x_px + 1] = src[y * stride + x_px]; } } //top if (y_start == 0) { for (int x = x_start; x < x_end; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x]; } } //bottom if (y_end == pic_height) { const int y_px = y_end - 1; for (int x = x_start; x < x_end; x++) { src[x + stride * (4 + y_px)] = src[x + stride * (3 + y_px)] = src[x + stride * (2 + y_px)] = src[x + stride * (1 + y_px)] = src[x + stride * y_px]; } } //left top corner if (top_left) { for (int x = -4; x < 0; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[0]; } } //right top corner if (top_right) { const int x_px = x_end - 1; for (int x = pic_width; x < pic_width + 4; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x_px]; } } //left or right bottom corner if (bottom_left) { const int y_px = y_end - 1; for (int x = -4; x < 0; x++) { src[(4 + y_px) * stride + x] = src[(3 + y_px) * stride + x] = src[(2 + y_px) * stride + x] = src[(1 + y_px) * stride + x] = src[stride * y_px]; } } if (bottom_right) { const int x_px = x_end - 1; const int y_px = y_end - 1; for (int x = x_end; x < x_end + 4; x++) { src[(4 + y_px) * stride + x] = src[(3 + y_px) * stride + x] = src[(2 + y_px) * stride + x] = src[(1 + y_px) * stride + x] = src[stride * y_px + x_px]; } } } void adjust_pixels_CTU_plus_4_pix(kvz_pixel *src, int x_start, int x_end, int y_start, int y_end, int stride, int pic_width, int pic_height) { assert(x_start <= x_end); assert(y_start <= y_end); assert(x_end <= pic_width); assert(y_end <= pic_height); //not on any edge if (x_start != 0 && y_start != 0 && x_end != pic_width && y_end != pic_height) { return; } bool top_left = (x_start == 0 && y_start == 0); bool top_right = (x_end == pic_width && y_start == 0); bool bottom_left = (x_start == 0 && y_end == pic_height); bool bottom_right = (x_end == pic_width && y_end == pic_height); //left side if (top_left && !bottom_left) { for (int y = y_start; y < y_end + MAX_ALF_PADDING_SIZE; y++) { src[y * stride - 4] = src[y * stride - 3] = src[y * stride - 2] = src[y * stride - 1] = src[y * stride]; } } else if (!top_left && bottom_left) { for (int y = y_start + MAX_ALF_PADDING_SIZE; y < y_end; y++) { src[y * stride - 4] = src[y * stride - 3] = src[y * stride - 2] = src[y * stride - 1] = src[y * stride]; } } else if (top_left && bottom_left) { for (int y = y_start; y < y_end; y++) { src[y * stride - 4] = src[y * stride - 3] = src[y * stride - 2] = src[y * stride - 1] = src[y * stride]; } } else if (x_start == 0) { for (int y = y_start + MAX_ALF_PADDING_SIZE; y < y_end + MAX_ALF_PADDING_SIZE; y++) { src[y * stride - 4] = src[y * stride - 3] = src[y * stride - 2] = src[y * stride - 1] = src[y * stride]; } }//left side //right side if (top_right && !bottom_right) { const int x_px = x_end - 1; for (int y = y_start; y < y_end + MAX_ALF_PADDING_SIZE; y++) { src[y * stride + x_px + 4] = src[y * stride + x_px + 3] = src[y * stride + x_px + 2] = src[y * stride + x_px + 1] = src[y * stride + x_px]; } } else if (!top_right && bottom_right) { const int x_px = x_end - 1; for (int y = y_start + MAX_ALF_PADDING_SIZE; y < y_end; y++) { src[y * stride + x_px + 4] = src[y * stride + x_px + 3] = src[y * stride + x_px + 2] = src[y * stride + x_px + 1] = src[y * stride + x_px]; } } else if (top_right && bottom_right) { const int x_px = x_end - 1; for (int y = y_start; y < y_end; y++) { src[y * stride + x_px + 4] = src[y * stride + x_px + 3] = src[y * stride + x_px + 2] = src[y * stride + x_px + 1] = src[y * stride + x_px]; } } else if (x_end == pic_width) { const int x_px = x_end - 1; for (int y = y_start + MAX_ALF_PADDING_SIZE; y < y_end + MAX_ALF_PADDING_SIZE; y++) { src[y * stride + x_px + 4] = src[y * stride + x_px + 3] = src[y * stride + x_px + 2] = src[y * stride + x_px + 1] = src[y * stride + x_px]; } }//right side //top if (top_left && !top_right) { for (int x = x_start; x < x_end + MAX_ALF_PADDING_SIZE; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x]; } } else if (!top_left && top_right) { for (int x = x_start + MAX_ALF_PADDING_SIZE; x < x_end; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x]; } } else if (top_left && top_right) { for (int x = x_start; x < x_end; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x]; } } else if (y_start == 0) { for (int x = x_start + MAX_ALF_PADDING_SIZE; x < x_end + MAX_ALF_PADDING_SIZE; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x]; } }//top //bottom if (bottom_left && !bottom_right) { const int y_px = y_end - 1; for (int x = x_start; x < x_end + MAX_ALF_PADDING_SIZE; x++) { src[x + stride * (4 + y_px)] = src[x + stride * (3 + y_px)] = src[x + stride * (2 + y_px)] = src[x + stride * (1 + y_px)] = src[x + stride * y_px]; } } else if (!bottom_left && bottom_right) { const int y_px = y_end - 1; for (int x = x_start + MAX_ALF_PADDING_SIZE; x < x_end; x++) { src[x + stride * (4 + y_px)] = src[x + stride * (3 + y_px)] = src[x + stride * (2 + y_px)] = src[x + stride * (1 + y_px)] = src[x + stride * y_px]; } } else if (bottom_left && bottom_right) { const int y_px = y_end - 1; for (int x = x_start; x < x_end; x++) { src[x + stride * (4 + y_px)] = src[x + stride * (3 + y_px)] = src[x + stride * (2 + y_px)] = src[x + stride * (1 + y_px)] = src[x + stride * y_px]; } } else if (y_end == pic_height) { const int y_px = y_end - 1; for (int x = x_start + MAX_ALF_PADDING_SIZE; x < x_end + MAX_ALF_PADDING_SIZE; x++) { src[x + stride * (4 + y_px)] = src[x + stride * (3 + y_px)] = src[x + stride * (2 + y_px)] = src[x + stride * (1 + y_px)] = src[x + stride * y_px]; } }//bottom //left top corner if (top_left) { for (int x = -4; x < 0; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[0]; } } //right top corner if (top_right) { const int x_px = x_end - 1; for (int x = pic_width; x < pic_width + 4; x++) { src[-4 * stride + x] = src[-3 * stride + x] = src[-2 * stride + x] = src[-1 * stride + x] = src[x_px]; } } //left bottom corner if (bottom_left) { const int y_px = y_end - 1; for (int x = -4; x < 0; x++) { src[(4 + y_px) * stride + x] = src[(3 + y_px) * stride + x] = src[(2 + y_px) * stride + x] = src[(1 + y_px) * stride + x] = src[stride * y_px]; } } //right bottom corner if (bottom_right) { const int x_px = x_end - 1; const int y_px = y_end - 1; for (int x = x_end; x < x_end + 4; x++) { src[(4 + y_px) * stride + x] = src[(3 + y_px) * stride + x] = src[(2 + y_px) * stride + x] = src[(1 + y_px) * stride + x] = src[stride * y_px + x_px]; } } } //Need to adjust void adjust_pixels_chroma(kvz_pixel *src, int x_start, int x_end, int y_start, int y_end, int stride, int pic_width, int pic_height) { assert(x_start <= x_end); assert(y_start <= y_end); assert(x_end <= pic_width); assert(y_end <= pic_height); //not on any edge if (x_start != 0 && y_start != 0 && x_end != pic_width && y_end != pic_height) { return; } bool top_left = (x_start == 0 && y_start == 0); bool top_right = (x_end == pic_width && y_start == 0); bool bottom_left = (x_start == 0 && y_end == pic_height); bool bottom_right = (x_end == pic_width && y_end == pic_height); //left side if (x_start == 0) { for (int y = y_start; y < y_end; y++) { src[y * stride - 2] = src[y * stride - 1] = src[y * stride]; } } //right side if (x_end == pic_width) { const int x_px = x_end - 1; for (int y = y_start; y < y_end; y++) { src[y * stride + x_px + 2] = src[y * stride + x_px + 1] = src[y * stride + x_px]; } } //top if (y_start == 0) { for (int x = x_start; x < x_end; x++) { src[-2 * stride + x] = src[-1 * stride + x] = src[x]; } } //bottom if (y_end == pic_height) { const int y_px = y_end - 1; for (int x = x_start; x < x_end; x++) { src[x + stride * (2 + y_px)] = src[x + stride * (1 + y_px)] = src[x + stride * y_px]; } } //left top corner if (top_left) { for (int x = -2; x < 0; x++) { src[-2 * stride + x] = src[-1 * stride + x] = src[0]; } } //right top corner if (top_right) { const int x_px = x_end - 1; for (int x = pic_width; x < pic_width + 2; x++) { src[-2 * stride + x] = src[-1 * stride + x] = src[x_px]; } } //left or right bottom corner if (bottom_left) { const int y_px = y_end - 1; for (int x = -2; x < 0; x++) { src[(2 + y_px) * stride + x] = src[(1 + y_px) * stride + x] = src[stride * y_px]; } } if (bottom_right) { const int x_px = x_end - 1; const int y_px = y_end - 1; for (int x = x_end; x < x_end + 2; x++) { src[(2 + y_px) * stride + x] = src[(1 + y_px) * stride + x] = src[stride * y_px + x_px]; } } } void set_ctu_enable_flag(uint8_t **flags, channel_type channel, uint8_t value, const int32_t num_ctus) { if (channel == CHANNEL_TYPE_LUMA) { memset(flags[COMPONENT_Y], value, sizeof(uint8_t) * num_ctus); } else { memset(flags[COMPONENT_Cr], value, sizeof(uint8_t) * num_ctus); memset(flags[COMPONENT_Cb], value, sizeof(uint8_t) * num_ctus); } } void copy_ctu_enable_flag(uint8_t **flags_dst, uint8_t **flags_src, channel_type channel, const int32_t num_ctus) { if (channel == CHANNEL_TYPE_LUMA) { memcpy(flags_dst[COMPONENT_Y], flags_src[COMPONENT_Y], sizeof(uint8_t) * num_ctus); } else { memcpy(flags_dst[COMPONENT_Cr], flags_src[COMPONENT_Cr], sizeof(uint8_t) * num_ctus); memcpy(flags_dst[COMPONENT_Cb], flags_src[COMPONENT_Cb], sizeof(uint8_t) * num_ctus); } } //------------------------------------------------------------------- //-------------------------encoding functions------------------------ void kvz_alf_enc_process(encoder_state_t *const state) { kvz_alf_enc_create(state); if (1 /*!layerIdx*/ && (false/*cs.slice->getPendingRasInit()*/ || (state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_IDR_N_LP))) { for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { state->slice->apss[i].aps_id = -1; state->slice->apss[i].aps_type = 0; state->slice->apss[i].temporal_id = 0; state->slice->apss[i].layer_id = 0; reset_alf_param(&state->slice->apss[i]); state->slice->apss[i].num_luma_filters = 0; state->slice->apss[i].num_alternatives_chroma = 0; if (state->encoder_control->cfg.param_set_map[i + T_ALF_APS].b_changed) { alf_aps* alf_aps = &state->encoder_control->cfg.param_set_map[i + T_ALF_APS].parameter_set; cc_alf_filter_param* cc_alf_aps = &state->encoder_control->cfg.param_set_map[i + T_ALF_APS].parameter_set.cc_alf_aps_param; state->encoder_control->cfg.param_set_map[i + T_ALF_APS].b_changed = false; if (alf_aps) { alf_aps->aps_id = 0; alf_aps->aps_type = 0; alf_aps->temporal_id = 0; alf_aps->layer_id = 0; reset_alf_param(alf_aps); reset_cc_alf_aps_param(cc_alf_aps); } } } g_aps_id_start = ALF_CTB_MAX_NUM_APS; } alf_aps alf_param; reset_alf_param(&alf_param); enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; bool chroma_scale_x = (chroma_fmt == KVZ_CSP_444) ? 0 : 1; bool chroma_scale_y = (chroma_fmt != KVZ_CSP_420) ? 0 : 1; int8_t kvz_bit_depth = state->encoder_control->bitdepth; const int32_t num_ctus_in_pic = state->lcu_order_count; //Default clp_rng g_clp_rngs.comp[COMPONENT_Y].min = g_clp_rngs.comp[COMPONENT_Cb].min = g_clp_rngs.comp[COMPONENT_Cr].min = 0; g_clp_rngs.comp[COMPONENT_Y].max = (1 << kvz_bit_depth) - 1; g_clp_rngs.comp[COMPONENT_Y].bd = kvz_bit_depth; g_clp_rngs.comp[COMPONENT_Y].n = 0; g_clp_rngs.comp[COMPONENT_Cb].max = g_clp_rngs.comp[COMPONENT_Cr].max = (1 << kvz_bit_depth) - 1; g_clp_rngs.comp[COMPONENT_Cb].bd = g_clp_rngs.comp[COMPONENT_Cr].bd = kvz_bit_depth; g_clp_rngs.comp[COMPONENT_Cb].n = g_clp_rngs.comp[COMPONENT_Cr].n = 0; g_clp_rngs.used = g_clp_rngs.chroma = false; double lambda_chroma_weight = 0.0; cabac_data_t ctx_start; cabac_data_t ctx_start_cc_alf; memcpy(&cabac_estimator, &state->cabac, sizeof(cabac_estimator)); memcpy(&ctx_start, &state->cabac, sizeof(ctx_start)); memcpy(&ctx_start_cc_alf, &cabac_estimator, sizeof(ctx_start_cc_alf)); cabac_estimator.only_count = 1; ctx_start.only_count = 1; ctx_start_cc_alf.only_count = 1; // derive classification const int luma_height = state->tile->frame->height; const int luma_width = state->tile->frame->width; for (int y_pos = 0; y_pos < luma_height; y_pos += LCU_WIDTH) { for (int x_pos = 0; x_pos < lumaWidth; x_pos += maxCUWidth) {*/ const int y_pos = lcu->position_px.y; const int x_pos = lcu->position_px.x; const int width = lcu->size.x; //(x_pos + maxCUWidth > lumaWidth) ? (lumaWidth - x_pos) : maxCUWidth; const int height = lcu->size.y; //(y_pos + maxCUHeight > lumaHeight) ? (lumaHeight - y_pos) : maxCUHeight; int raster_slice_alf_pad = 0; //Tätä if-lauseen sisällä olevaa algoritmia pitää vielä viilata if (is_crossed_by_virtual_boundaries(x_pos, y_pos, width, height, &clip_top, &clip_bottom, &clip_left, &clip_right, &num_hor_vir_bndry, &num_ver_vir_bndry, hor_vir_bndry_pos, ver_vir_bndry_pos, state)) { int y_start = y_pos; for (int i = 0; i <= num_hor_vir_bndry; i++) { const int width = (x_pos + LCU_WIDTH > luma_width) ? (luma_width - x_pos) : LCU_WIDTH; const int height = (y_pos + LCU_WIDTH > luma_height) ? (luma_height - y_pos) : LCU_WIDTH; { kvz_alf_derive_classification(state, width, height, x_pos, y_pos, x_pos, y_pos); } } } // get CTB stats for filtering kvz_alf_derive_stats_for_filtering(state); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB /*for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++) { g_alf_ctb_filter_index[ctb_iIdx] = ALF_NUM_FIXED_FILTER_SETS; } // consider using new filter (only) alf_param.new_filter_flag[CHANNEL_TYPE_LUMA] = true; alf_param.new_filter_flag[CHANNEL_TYPE_CHROMA] = true; state->slice->tile_group_num_aps = 1; // Only new filter for RD cost optimization // derive filter (luma) kvz_alf_encoder(state, &alf_param, CHANNEL_TYPE_LUMA, lambda_chroma_weight ); // derive filter (chroma) if (state->encoder_control->chroma_format != KVZ_CSP_400) { kvz_alf_encoder(state, &alf_param, CHANNEL_TYPE_CHROMA, lambda_chroma_weight ); } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB // let alfEncoderCtb decide now alf_param.new_filter_flag[CHANNEL_TYPE_LUMA] = false; alf_param.new_filter_flag[CHANNEL_TYPE_CHROMA] = false; state->slice->tile_group_num_aps = 0; //#endif //m_CABACEstimator->getCtx() = AlfCtx(ctxStart); memcpy(&cabac_estimator, &ctx_start, sizeof(cabac_estimator)); kvz_alf_encoder_ctb(state, &alf_param, lambda_chroma_weight); //for (int s = 0; s < state.; s++) //numSliceSegments { if (state->encoder_control->cfg.lossless) { for (uint32_t ctb_idx = 0; ctb_idx < num_ctus_in_pic; ctb_idx++) //pcPic->slices[s]->getNumCtuInSlice() { //uint32_t ctuRsAddr = pcPic->slices[s]->getCtuAddrInSlice(ctuIdx); g_ctu_enable_flag[COMPONENT_Y][ctb_idx] = 0; g_ctu_enable_flag[COMPONENT_Cb][ctb_idx] = 0; g_ctu_enable_flag[COMPONENT_Cr][ctb_idx] = 0; } } } kvz_alf_reconstruct(state); // Do not transmit CC ALF if it is unchanged if(state->slice->tile_group_alf_enabled_flag[COMPONENT_Y]) { for (int32_t luma_alf_aps_id = 0; luma_alf_aps_id < state->slice->tile_group_num_aps; luma_alf_aps_id++ ) { //APS* aps = (luma_alf_aps_id >= 0) ? m_apsMap->getPS((luma_alf_aps_id << NUM_APS_TYPE_LEN) + ALF_APS) : nullptr; int aps_id = state->slice->tile_group_luma_aps_id[luma_alf_aps_id]; alf_aps* aps = (aps_id >= 0) ? &state->encoder_control->cfg.param_set_map[aps_id + T_ALF_APS].parameter_set : NULL; bool changed = state->encoder_control->cfg.param_set_map[aps_id + T_ALF_APS].b_changed; if (aps && changed) { aps->cc_alf_aps_param.new_cc_alf_filter[0] = false; aps->cc_alf_aps_param.new_cc_alf_filter[1] = false; } } } int chroma_alf_aps_id = (state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] || state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr]) ? state->slice->tile_group_chroma_aps_id : -1; alf_aps* aps = (chroma_alf_aps_id >= 0) ? &state->encoder_control->cfg.param_set_map[chroma_alf_aps_id + T_ALF_APS].parameter_set : NULL; bool changed = (chroma_alf_aps_id >= 0) ? state->encoder_control->cfg.param_set_map[chroma_alf_aps_id + T_ALF_APS].b_changed : 0; if (aps && changed) { aps->cc_alf_aps_param.new_cc_alf_filter[0] = false; aps->cc_alf_aps_param.new_cc_alf_filter[1] = false; } if (state->encoder_control->cfg.alf_type != 2) { return; } const kvz_picture *org_yuv = state->tile->frame->source; const kvz_picture *rec_yuv = state->tile->frame->rec; const int luma_stride = state->tile->frame->rec->stride; const int chroma_stride = luma_stride >> chroma_scale_x; const int chroma_height = luma_height >> chroma_scale_y; const int chroma_padding = MAX_ALF_PADDING_SIZE >> chroma_scale_x; const int index_chroma = -(chroma_stride * chroma_padding + chroma_padding); //Copy reconstructed samples to a buffer. memcpy(&alf_tmp_u[index_chroma], &state->tile->frame->rec->u[index_chroma], sizeof(kvz_pixel) * chroma_stride * (chroma_height + chroma_padding * 2)); memcpy(&alf_tmp_v[index_chroma], &state->tile->frame->rec->v[index_chroma], sizeof(kvz_pixel) * chroma_stride * (chroma_height + chroma_padding * 2)); adjust_pixels_chroma(alf_tmp_u, 0, rec_yuv->width >> chroma_scale_x, 0, rec_yuv->height >> chroma_scale_y, rec_yuv->stride >> chroma_scale_x, rec_yuv->width >> chroma_scale_x, rec_yuv->height >> chroma_scale_y); adjust_pixels_chroma(alf_tmp_v, 0, rec_yuv->width >> chroma_scale_x, 0, rec_yuv->height >> chroma_scale_y, rec_yuv->stride >> chroma_scale_x, rec_yuv->width >> chroma_scale_x, rec_yuv->height >> chroma_scale_y); const int num_ctus_in_width = state->tile->frame->width_in_lcu; derive_stats_for_cc_alf_filtering(state, org_yuv, COMPONENT_Cb, num_ctus_in_width, (0 + 1)); derive_stats_for_cc_alf_filtering(state, org_yuv, COMPONENT_Cr, num_ctus_in_width, (0 + 1)); init_distortion_cc_alf(num_ctus_in_pic); memcpy(&cabac_estimator, &ctx_start_cc_alf, sizeof(cabac_estimator)); derive_cc_alf_filter(state, COMPONENT_Cb, org_yuv, rec_yuv); memcpy(&cabac_estimator, &ctx_start_cc_alf, sizeof(cabac_estimator)); derive_cc_alf_filter(state, COMPONENT_Cr, org_yuv, rec_yuv); setup_cc_alf_aps(state); for (int comp_idx = 1; comp_idx < (state->encoder_control->chroma_format == KVZ_CSP_400 ? 1 : MAX_NUM_COMPONENT); comp_idx++) { if (g_cc_alf_filter_param.cc_alf_filter_enabled[comp_idx - 1]) { const kvz_pixel* rec_uv = comp_idx == COMPONENT_Cb ? rec_yuv->u : rec_yuv->v; const int luma_stride = rec_yuv->stride; apply_cc_alf_filter(state, comp_idx, rec_uv, alf_tmp_y, luma_stride, g_cc_alf_filter_control[comp_idx - 1], g_cc_alf_filter_param.cc_alf_coeff[comp_idx - 1], -1); } } } double kvz_alf_derive_ctb_alf_enable_flags(encoder_state_t * const state, channel_type channel, const int i_shape_idx, double *dist_unfilter, const int num_classes, const double chroma_weight ) { /* TempCtx ctxTempStart(m_CtxCache); TempCtx ctxTempBest(m_CtxCache);*/ cabac_data_t ctx_temp_start; cabac_data_t ctx_temp_best; /*#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB TempCtx ctxTempAltStart(m_CtxCache); TempCtx ctxTempAltBest(m_CtxCache);*/ cabac_data_t ctx_temp_alt_start; //cabac_data_t ctx_temp_alt_best; //#endif bool is_luma = channel == CHANNEL_TYPE_LUMA ? 1 : 0; const kvz_pixel comp_id_first = is_luma ? COMPONENT_Y : COMPONENT_Cb; const kvz_pixel comp_id_last = is_luma ? COMPONENT_Y : COMPONENT_Cr; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const int num_alts = is_luma ? 1 : g_alf_aps_temp.num_alternatives_chroma; //#endif int num_coeff = is_luma ? 13 : 7; double cost = 0; double lambda = state->frame->lambda; *dist_unfilter = 0; if (is_luma) { g_alf_aps_temp.enabled_flag[COMPONENT_Y] = 1; } else { g_alf_aps_temp.enabled_flag[COMPONENT_Cb] = 1; g_alf_aps_temp.enabled_flag[COMPONENT_Cr] = 1; } assert((chroma_weight <= 0.0) && (state->slice->start_in_rs == 0)); //"incompatible start CTU address, must be 0" kvz_alf_reconstruct_coeff(state, &g_alf_aps_temp, channel, true, is_luma); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB for (int alt_idx = 0; alt_idx < (is_luma ? 1 : MAX_NUM_ALF_ALTERNATIVES_CHROMA); alt_idx++) { for (int class_idx = 0; class_idx < (is_luma ? MAX_NUM_ALF_CLASSES : 1); class_idx++) { for (int i = 0; i < (is_luma ? MAX_NUM_ALF_LUMA_COEFF : MAX_NUM_ALF_CHROMA_COEFF); i++) { g_filter_coeff_set[is_luma ? class_idx : alt_idx][i] = is_luma ? g_coeff_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + i] : g_chroma_coeff_final[alt_idx][i]; g_filter_clipp_set[is_luma ? class_idx : alt_idx][i] = is_luma ? g_clipp_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + i] : g_chroma_clipp_final[alt_idx][i]; } } } /*#else for (int class_idx = 0; class_idx < (is_luma ? MAX_NUM_ALF_CLASSES : 1); class_idx++) { for (int i = 0; i < (is_luma ? MAX_NUM_ALF_LUMA_COEFF : MAX_NUM_ALF_CHROMA_COEFF); i++) { g_filter_coeff_set[class_idx][i] = is_luma ? g_coeff_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + i] : g_chroma_coeff_final[i]; g_filter_clipp_set[class_idx][i] = is_luma ? g_clipp_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + i] : g_chroma_clipp_final[i]; } } #endif*/ for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { for (int comp_id = comp_id_first; comp_id <= comp_id_last; comp_id++) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //#if ENABLE_QPA const double ctu_lambda = chroma_weight > 0.0 ? (is_luma ? 0/*cs.picture->m_uEnerHpCtu[ctuIdx]*/ : 0/*cs.picture->m_uEnerHpCtu[ctuIdx]*/ / chroma_weight) : g_lambda[comp_id]; /*#else const double ctu_lambda = m_lambda[comp_id]; #endif #endif*/ double dist_unfilter_ctu = get_unfiltered_distortion_cov_classes(g_alf_covariance[comp_id][i_shape_idx][ctu_idx], num_classes); //ctxTempStart = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_start, &cabac_estimator, sizeof(ctx_temp_start)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); cabac_estimator.only_count = 1; g_ctu_enable_flag[comp_id][ctu_idx] = 1; code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (is_luma) { // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation assert(g_alf_ctb_filter_index[ctu_idx] == ALF_NUM_FIXED_FILTER_SETS); assert(state->slice->tile_group_num_aps == 1); code_alf_ctu_filter_index(state, &cabac_estimator, ctu_idx, g_alf_aps_temp.enabled_flag[COMPONENT_Y]); } double cost_on = dist_unfilter_ctu + ctu_lambda * (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); /*#else double costOn = distUnfilterCtu + getFilteredDistortion(m_alfCovariance[compID][iShapeIdx][ctuIdx], numClasses, m_alfParamTemp.numLumaFilters - 1, numCoeff); #if ENABLE_QPA const double ctu_lambda = chromaWeight > 0.0 ? (isLuma(channel) ? cs.picture->m_uEnerHpCtu[ctuIdx] : cs.picture->m_uEnerHpCtu[ctuIdx] / chromaWeight) : m_lambda[compID]; #else const double ctu_lambda = m_lambda[compID]; #endif costOn += ctu_lambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); #endif*/ //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (is_luma) { cost_on += get_filtered_distortion(g_alf_covariance[comp_id][i_shape_idx][ctu_idx], num_classes, g_alf_aps_temp.num_luma_filters - 1, num_coeff, bit_depth); } else { double best_alt_cost = MAX_DOUBLE; int best_alt_idx = -1; //ctxTempAltStart = AlfCtx(ctxTempBest); memcpy(&ctx_temp_alt_start, &ctx_temp_best, sizeof(ctx_temp_alt_start)); for (int alt_idx = 0; alt_idx < num_alts; ++alt_idx) { if (alt_idx) { //m_CABACEstimator->getCtx() = AlfCtx(ctxTempAltStart); memcpy(&cabac_estimator, &ctx_temp_alt_start, sizeof(cabac_estimator)); } //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); cabac_estimator.only_count = 1; g_ctu_alternative[comp_id][ctu_idx] = alt_idx; code_alf_ctu_alternative_ctu(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); double r_altCost = ctu_lambda * (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale * 0/*m_CABACEstimator->getEstFracBits()*/; double alt_dist = 0.; alt_dist += calc_error_for_coeffs(&g_alf_covariance[comp_id][i_shape_idx][ctu_idx][0], g_filter_clipp_set[alt_idx], g_filter_coeff_set[alt_idx], num_coeff, bit_depth); double alt_cost = alt_dist + r_altCost; if (alt_cost < best_alt_cost) { best_alt_cost = alt_cost; best_alt_idx = alt_idx; //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); } } g_ctu_alternative[comp_id][ctu_idx] = best_alt_idx; cost_on += best_alt_cost; } //#endif //m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); cabac_estimator.only_count = 1; g_ctu_enable_flag[comp_id][ctu_idx] = 0; code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); double cost_off = dist_unfilter_ctu + ctu_lambda * (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale * 0;// m_CABACEstimator->getEstFracBits(); if (cost_on < cost_off) { cost += cost_on; //m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest); memcpy(&cabac_estimator, &ctx_temp_best, sizeof(cabac_estimator)); g_ctu_enable_flag[comp_id][ctu_idx] = 1; } else { cost += cost_off; g_ctu_enable_flag[comp_id][ctu_idx] = 0; *dist_unfilter += dist_unfilter_ctu; } } } if (!is_luma) { const alf_component_id compIDFirst = COMPONENT_Cb; const alf_component_id compIDLast = COMPONENT_Cr; for (int compId = compIDFirst; compId <= compIDLast; compId++) { g_alf_aps_temp.enabled_flag[compId] = false; for (int i = 0; i < num_ctus_in_pic; i++) { if (g_ctu_enable_flag[compId][i]) { g_alf_aps_temp.enabled_flag[compId] = true; break; } } } /*#if !JVET_O0491_HLS_CLEANUP const int alf_chroma_idc = g_alf_aps_temp.enabled_flag[COMPONENT_Cb] * 2 + g_alf_aps_temp.enabled_flag[COMPONENT_Cr]; cost += length_truncated_unary(alf_chroma_idc, 3) * g_lambda[channel];*/ } return cost; } void kvz_alf_enc_create(encoder_state_t * const state) { if (g_created) { return; } enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; const int pic_width = state->tile->frame->width; const int pic_height = state->tile->frame->height; const int8_t input_bitdepth = state->encoder_control->bitdepth; const int32_t num_ctus_in_pic = state->lcu_order_count; assert(MAX_ALF_NUM_CLIPPING_VALUES > 0); //"g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] must be at least one" g_alf_clipping_values[CHANNEL_TYPE_LUMA][0] = 1 << input_bitdepth; int shift_luma = input_bitdepth - 8; for (int i = 1; i < MAX_ALF_NUM_CLIPPING_VALUES; ++i) { g_alf_clipping_values[CHANNEL_TYPE_LUMA][i] = 1 << (7 - 2 * i + shift_luma); } assert(MAX_ALF_NUM_CLIPPING_VALUES > 0); //"g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] must be at least one" g_alf_clipping_values[CHANNEL_TYPE_CHROMA][0] = 1 << input_bitdepth; int shift_chroma = input_bitdepth - 8; for (int i = 1; i < MAX_ALF_NUM_CLIPPING_VALUES; ++i) { g_alf_clipping_values[CHANNEL_TYPE_CHROMA][i] = 1 << (7 - 2 * i + shift_chroma); } // Classification g_classifier = malloc(pic_height * sizeof(**g_classifier)); g_classifier[0] = malloc(pic_height * pic_width * sizeof(*g_classifier)); for (int i = 1; i < pic_height; i++) { g_classifier[i] = g_classifier[0] + i * pic_width; } for (int filter_set_index = 0; filter_set_index < ALF_NUM_FIXED_FILTER_SETS; filter_set_index++) { for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) { int fixed_filter_idx = g_class_to_filter_mapping[filter_set_index][class_idx]; for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF - 1; i++) { g_fixed_filter_set_coeff_dec[filter_set_index][class_idx * MAX_NUM_ALF_LUMA_COEFF + i] = g_fixed_filter_set_coeff[fixed_filter_idx][i]; } g_fixed_filter_set_coeff_dec[filter_set_index][class_idx * MAX_NUM_ALF_LUMA_COEFF + MAX_NUM_ALF_LUMA_COEFF - 1] = (1 << (input_bitdepth - 1)); } } for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES; i++) { g_clip_default[i] = g_alf_clipping_values[CHANNEL_TYPE_LUMA][0]; } for (int i = 0; i != 1/*m_filterShapes[COMPONENT_Y].size()*/; i++) { for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++) { g_alf_covariance_merged[i][j].num_coeff = 13; g_alf_covariance_merged[i][j].num_bins = MAX_ALF_NUM_CLIPPING_VALUES; memset(g_alf_covariance_merged[i][j].y, 0, sizeof(g_alf_covariance_merged[i][j].y)); memset(g_alf_covariance_merged[i][j].ee, 0, sizeof(g_alf_covariance_merged[i][j].ee)); } } g_cc_alf_filter_control[0] = malloc(num_ctus_in_pic * sizeof(*g_cc_alf_filter_control)); g_cc_alf_filter_control[1] = malloc(num_ctus_in_pic * sizeof(*g_cc_alf_filter_control)); for (int channel_idx = 0; channel_idx < MAX_NUM_CHANNEL_TYPE; channel_idx++) { channel_type ch_type = (channel_type)channel_idx; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB int num_classes = channel_idx ? MAX_NUM_ALF_ALTERNATIVES_CHROMA : MAX_NUM_ALF_CLASSES; /*#else int num_classes = channel_idx ? 1 : MAX_NUM_ALF_CLASSES; #endif*/ int num_coeffs = channel_idx ? 7 : 13; //m_alfCovarianceFrame[ch_type] = new AlfCovariance*[m_filterShapes[ch_type].size()]; g_alf_covariance_frame[ch_type] = malloc(sizeof(**g_alf_covariance_frame[ch_type])); for (int i = 0; i != 1/*m_filterShapes[ch_type].size()*/; i++) { g_alf_covariance_frame[ch_type][i] = malloc(num_classes * sizeof(alf_covariance)); for (int k = 0; k < num_classes; k++) { g_alf_covariance_frame[ch_type][i][k].num_coeff = num_coeffs; g_alf_covariance_frame[ch_type][i][k].num_bins = MAX_ALF_NUM_CLIPPING_VALUES; g_alf_covariance_frame[ch_type][i][k].pix_acc = 0; memset(g_alf_covariance_frame[ch_type][i][k].y, 0, sizeof(g_alf_covariance_frame[ch_type][i][k].y)); memset(g_alf_covariance_frame[ch_type][i][k].ee, 0, sizeof(g_alf_covariance_frame[ch_type][i][k].ee)); } } } for (int comp_idx = 0; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { g_ctu_enable_flag[comp_idx] = malloc(num_ctus_in_pic * sizeof(*g_ctu_enable_flag[comp_idx])); g_ctu_enable_flag_tmp[comp_idx] = malloc(num_ctus_in_pic * sizeof(*g_ctu_enable_flag_tmp[comp_idx])); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB // g_ctu_enable_flag_tmp2[comp_idx] = malloc(g_num_ctus_in_pic * sizeof(*g_ctu_enable_flag_tmp2[comp_idx])); if (comp_idx == COMPONENT_Y) { g_ctu_alternative_tmp[comp_idx] = NULL; g_ctu_alternative[comp_idx] = NULL; } else { g_ctu_alternative_tmp[comp_idx] = malloc(num_ctus_in_pic * sizeof(*g_ctu_alternative_tmp[comp_idx])); g_ctu_alternative[comp_idx] = malloc(num_ctus_in_pic * sizeof(*g_ctu_alternative[comp_idx])); for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_ctu_alternative_tmp[comp_idx][ctu_idx] = 0; g_ctu_alternative[comp_idx][ctu_idx] = 0; } } //#endif int num_classes = comp_idx ? 1 : MAX_NUM_ALF_CLASSES; int num_coeffs = comp_idx ? 7 : 13; int alf_num_clipping_values = MAX_ALF_NUM_CLIPPING_VALUES; g_alf_covariance[comp_idx] = malloc(sizeof(***g_alf_covariance[comp_idx])); for (int i = 0; i != 1/*m_filterShapes[ch_type].size()*/; i++) { g_alf_covariance[comp_idx][i] = malloc(num_ctus_in_pic * sizeof(**g_alf_covariance[comp_idx][i])); for (int j = 0; j < num_ctus_in_pic; j++) { g_alf_covariance[comp_idx][i][j] = malloc(num_classes * sizeof(alf_covariance)); for (int k = 0; k < num_classes; k++) { g_alf_covariance[comp_idx][i][j][k].num_coeff = num_coeffs; g_alf_covariance[comp_idx][i][j][k].num_bins = alf_num_clipping_values; g_alf_covariance[comp_idx][i][j][k].pix_acc = 0; memset(g_alf_covariance[comp_idx][i][j][k].y, 0, sizeof(g_alf_covariance[comp_idx][i][j][k].y)); memset(g_alf_covariance[comp_idx][i][j][k].ee, 0, sizeof(g_alf_covariance[comp_idx][i][j][k].ee)); } } } } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB g_filter_coeff_set = malloc(/*MAX(*/MAX_NUM_ALF_CLASSES/*, MAX_NUM_ALF_ALTERNATIVES_CHROMA)*/ * sizeof(int*)); g_filter_clipp_set = malloc(/*MAX(*/MAX_NUM_ALF_CLASSES/*, MAX_NUM_ALF_ALTERNATIVES_CHROMA)*/ * sizeof(int*)); /*#else g_filter_coeff_set = malloc(MAX_NUM_ALF_CLASSES * sizeof(int*)); g_filter_clipp_set = malloc(MAX_NUM_ALF_CLASSES * sizeof(int*)); #endif*/ for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { g_filter_coeff_set[i] = malloc(MAX_NUM_ALF_LUMA_COEFF * sizeof(int)); g_filter_clipp_set[i] = malloc(MAX_NUM_ALF_LUMA_COEFF * sizeof(int)); } for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) { g_ctb_distortion_unfilter[comp] = malloc(num_ctus_in_pic * sizeof(double)); } g_alf_ctb_filter_index = malloc(num_ctus_in_pic * sizeof(*g_alf_ctb_filter_index)); g_alf_ctb_filter_set_index_tmp = malloc(num_ctus_in_pic * sizeof(*g_alf_ctb_filter_set_index_tmp)); enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; const int number_of_components = (chroma_fmt == KVZ_CSP_400) ? 1 : MAX_NUM_COMPONENT; /*Turha, alustetaan jo ylempänä // init CTU stats buffers for (int comp_idx = 0; comp_idx < number_of_components; comp_idx++) { bool is_luma = comp_idx == 0 ? 1 : 0; const int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; for (int shape = 0; shape != 1 /*m_filterShapes[toChannelType(comp_id)].size()*//*; shape++) { for (int class_idx = 0; class_idx < num_classes; class_idx++) { for (int ctu_idx = 0; ctu_idx < g_num_ctus_in_pic; ctu_idx++) { reset_alf_covariance(&g_alf_covariance[comp_idx][shape][ctu_idx][class_idx], g_alf_num_clipping_values[comp_idx == COMPONENT_Y ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA]); } } } }*/ /*// Turha, alustetaan jo ylmepänä // init Frame stats buffers const int number_of_channels = (chroma_fmt == KVZ_CSP_400) ? 1 : MAX_NUM_CHANNEL_TYPE; for (int channel_idx = 0; channel_idx < number_of_channels; channel_idx++) { const channel_type channel_id = channel_idx; bool is_luma = channel_id == CHANNEL_TYPE_LUMA ? true : false; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const int num_alts = is_luma ? 1 : MAX_NUM_ALF_ALTERNATIVES_CHROMA; //#endif const int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB for (int alt_idx = 0; alt_idx < num_alts; ++alt_idx) { //#endif for (int shape = 0; shape != 1/*m_filterShapes[channel_idx].size()*/; shape++) { for (int class_idx = 0; class_idx < num_classes; class_idx++) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB reset_alf_covariance(&g_alf_covariance_frame[channel_idx][shape][is_luma ? class_idx : alt_idx], g_alf_num_clipping_values[channel_id]); /*#else reset_alf_covariance(&g_alf_covariance_frame[channel_idx][shape][class_idx], g_alf_num_clipping_values[channel_id]); #endif*/ } } } }*/ // init alf enable flags for (int comp_idx = 0; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { for (int ctu_idx = 0; ctu_idx < g_num_ctus_in_pic; ctu_idx++) { g_ctu_enable_flag[comp_idx][ctu_idx] = 0; //cs.picture->getAlfCtuEnableFlag( comp_idx ); if (comp_idx != 0) { g_ctu_alternative[comp_idx][ctu_idx] = 0; //cs.picture->getAlfCtuAlternativeData(comp_idx); } } } const size_t simd_padding_width = 64; int width = state->tile->frame->width; int height = state->tile->frame->height; int stride = state->tile->frame->source->stride; unsigned int luma_size = (width + 8) * (height + 8); unsigned chroma_sizes[] = { 0, luma_size / 4, luma_size / 2, luma_size }; unsigned chroma_size = chroma_sizes[chroma_fmt]; alf_fulldata_buf = MALLOC_SIMD_PADDED(kvz_pixel, (luma_size + 2 * chroma_size), simd_padding_width * 2); alf_fulldata = &alf_fulldata_buf[4 * (width + 8) + 4] + simd_padding_width / sizeof(kvz_pixel); alf_tmp_y = &alf_fulldata[0]; if (chroma_fmt == KVZ_CSP_400) { alf_tmp_u = NULL; alf_tmp_v = NULL; } else { alf_tmp_u = &alf_fulldata[luma_size - (4 * (width + 8) + 4) + (2 * (stride / 2) + 2)]; alf_tmp_v = &alf_fulldata[luma_size - (4 * (width + 8) + 4) + chroma_size + (2 * (stride / 2) + 2)]; } g_aps_id_cc_alf_start[0] = (int)MAX_NUM_APS; g_aps_id_cc_alf_start[1] = (int)MAX_NUM_APS; for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { int num_filters = MAX_NUM_CC_ALF_FILTERS; g_alf_covariance_cc_alf[comp_idx - 1] = malloc(/*m_filterShapesCcAlf[comp_idx - 1].size()*/ 1 * sizeof(***g_alf_covariance_cc_alf[comp_idx - 1])); g_alf_covariance_frame_cc_alf[comp_idx - 1] = malloc(/*m_filterShapesCcAlf[comp_idx - 1].size()*/ 1 * sizeof(**g_alf_covariance_frame_cc_alf[comp_idx - 1])); for (int i = 0; i != /*g_filter_shapes_cc_alf[comp_idx - 1].size()*/ 1; i++) { g_alf_covariance_frame_cc_alf[comp_idx - 1][i] = malloc(num_filters * sizeof(*g_alf_covariance_frame_cc_alf[comp_idx - 1][i])); for (int k = 0; k < num_filters; k++) { g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].num_coeff = 8; g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].num_bins = MAX_ALF_NUM_CLIPPING_VALUES; g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].pix_acc = 0; memset(g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].y, 0, sizeof(g_alf_covariance_frame_cc_alf[comp_idx][i][k].y)); memset(g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].ee, 0, sizeof(g_alf_covariance_frame_cc_alf[comp_idx][i][k].ee)); } g_alf_covariance_cc_alf[comp_idx - 1][i] = malloc(num_filters * sizeof(**g_alf_covariance_cc_alf[comp_idx - 1][i])); for (int j = 0; j < num_filters; j++) { g_alf_covariance_cc_alf[comp_idx - 1][i][j] = malloc(num_ctus_in_pic * sizeof(*g_alf_covariance_cc_alf[comp_idx - 1][i][j])); for (int k = 0; k < num_ctus_in_pic; k++) { //g_alf_covariance_cc_alf[comp_idx - 1][i][j] = malloc(g_num_ctus_in_pic * sizeof(*g_alf_covariance_cc_alf[comp_idx - 1][i][j])); g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].num_coeff = 8; g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].num_bins = MAX_ALF_NUM_CLIPPING_VALUES; g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].pix_acc = 0; memset(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].y, 0, sizeof(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].y)); memset(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].ee, 0, sizeof(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].ee)); } } } } g_training_cov_control = malloc(num_ctus_in_pic * sizeof(*g_training_cov_control)); for (int i = 0; i < MAX_NUM_CC_ALF_FILTERS; i++) { g_training_distortion[i] = malloc(num_ctus_in_pic * sizeof(*g_training_distortion[i])); } g_filter_control = malloc(num_ctus_in_pic * sizeof(*g_filter_control)); g_luma_swing_greater_than_threshold_count = malloc(num_ctus_in_pic * sizeof(*g_luma_swing_greater_than_threshold_count)); g_chroma_sample_count_near_mid_point = malloc(num_ctus_in_pic * sizeof(*g_chroma_sample_count_near_mid_point)); g_best_filter_control = malloc(sizeof(*g_best_filter_control) * num_ctus_in_pic);; g_created = true; } void kvz_alf_reconstruct(encoder_state_t * const state) { if (g_created) { kvz_alf_reconstructor(state); } } void kvz_alf_enc_destroy(videoframe_t * const frame) { if (!g_created) { return; } const int32_t num_ctus_in_pic = frame->height_in_lcu * frame->width_in_lcu; for (int channel_idx = 0; channel_idx < MAX_NUM_CHANNEL_TYPE; channel_idx++) { if (g_alf_covariance_frame[channel_idx]) { for (int i = 0; i != 1/*m_filterShapes[ch_type].size()*/; i++) { FREE_POINTER(g_alf_covariance_frame[channel_idx][i]); } FREE_POINTER(g_alf_covariance_frame[channel_idx]); } } for (int comp_idx = 0; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { if (g_ctu_enable_flag[comp_idx]) { FREE_POINTER(g_ctu_enable_flag[comp_idx]); } if (g_ctu_enable_flag_tmp[comp_idx]) { FREE_POINTER(g_ctu_enable_flag_tmp[comp_idx]); } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB /*if (g_ctu_enable_flag_tmp2[comp_idx]) { FREE_POINTER(g_ctu_enable_flag_tmp2[comp_idx]); }*/ if (g_ctu_alternative_tmp[comp_idx]) { FREE_POINTER(g_ctu_alternative_tmp[comp_idx]); } if (g_ctu_alternative[comp_idx]) { FREE_POINTER(g_ctu_alternative[comp_idx]); } //#endif if (g_alf_covariance[comp_idx]) { for (int i = 0; i != 1/*m_filterShapes[ch_type].size()*/; i++) { for (int j = 0; j < num_ctus_in_pic; j++) { FREE_POINTER(g_alf_covariance[comp_idx][i][j]); } FREE_POINTER(g_alf_covariance[comp_idx][i]); } FREE_POINTER(g_alf_covariance[comp_idx]); } } if (g_filter_coeff_set) { for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { FREE_POINTER(g_filter_coeff_set[i]); } FREE_POINTER(g_filter_coeff_set); } if (g_filter_clipp_set) { for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { FREE_POINTER(g_filter_clipp_set[i]); } FREE_POINTER(g_filter_clipp_set); } /*if (g_ctb_distortion_fixed_filter != NULL) { FREE_POINTER(g_ctb_distortion_fixed_filter); }*/ for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) { if (g_ctb_distortion_unfilter[comp] != NULL) { FREE_POINTER(g_ctb_distortion_unfilter[comp]); } } if (g_alf_ctb_filter_index) { FREE_POINTER(g_alf_ctb_filter_index); } if (g_alf_ctb_filter_set_index_tmp) { FREE_POINTER(g_alf_ctb_filter_set_index_tmp); } if (alf_tmp_y) { alf_tmp_y = NULL; } if (alf_tmp_u) { alf_tmp_u = NULL; } if (alf_tmp_v) { alf_tmp_v = NULL; } if (alf_fulldata) { alf_fulldata = NULL; } if (alf_fulldata_buf) { FREE_POINTER(alf_fulldata_buf); } for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { int num_filters = MAX_NUM_CC_ALF_FILTERS; if (g_alf_covariance_frame_cc_alf[comp_idx - 1]) { for (int i = 0; i != 1 /*m_filterShapesCcAlf[comp_idx - 1].size()*/; i++) { /*for (int k = 0; k < num_filters; k++) { FREE_POINTER(g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k]); }*/ FREE_POINTER(g_alf_covariance_frame_cc_alf[comp_idx - 1][i]); } FREE_POINTER(g_alf_covariance_frame_cc_alf[comp_idx - 1]); } if (g_alf_covariance_cc_alf[comp_idx - 1]) { for (int i = 0; i != 1/*m_filterShapesCcAlf[comp_idx - 1].size()*/; i++) { for (int j = 0; j < num_filters; j++) { /*for (int k = 0; k < g_num_ctus_in_pic; k++) { FREE_POINTER(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k]); }*/ FREE_POINTER(g_alf_covariance_cc_alf[comp_idx - 1][i][j]); } FREE_POINTER(g_alf_covariance_cc_alf[comp_idx - 1][i]); } FREE_POINTER(g_alf_covariance_cc_alf[comp_idx - 1]); } } if (g_training_cov_control) { FREE_POINTER(g_training_cov_control); } for (int i = 0; i < MAX_NUM_CC_ALF_FILTERS; i++) { if (g_training_distortion[i]) { FREE_POINTER(g_training_distortion[i]); } } if (g_filter_control) { FREE_POINTER(g_filter_control); } if (g_best_filter_control) { FREE_POINTER(g_best_filter_control); } /* if (m_bufOrigin) { xFree(m_bufOrigin); m_bufOrigin = nullptr; } if (m_buf) { delete m_buf; m_buf = nullptr; } */ if (g_luma_swing_greater_than_threshold_count) { FREE_POINTER(g_luma_swing_greater_than_threshold_count); } if (g_chroma_sample_count_near_mid_point) { FREE_POINTER(g_chroma_sample_count_near_mid_point); } //if (tmp_rec_pic) /*{ memcpy(&frame->rec->y, &tmp_rec_pic->y, sizeof(frame->rec->y)); memcpy(&frame->rec->u, &tmp_rec_pic->u, sizeof(frame->rec->u)); memcpy(&frame->rec->v, &tmp_rec_pic->v, sizeof(frame->rec->v)); memcpy(&frame->rec->data[0], &tmp_rec_pic->data[0], sizeof(frame->rec->data[0])); memcpy(&frame->rec->data[1], &tmp_rec_pic->data[1], sizeof(frame->rec->data[1])); memcpy(&frame->rec->data[2], &tmp_rec_pic->data[2], sizeof(frame->rec->data[2])); tmp_rec_pic = NULL; }*/ /* for (int h = 0; h < height; h++) { for (int w = 0; w < width; w++) { frame->rec->y[h * stride + w] = alf_tmp_y[h * stride + w]; } } stride = stride >> 1; for (int h = 0; h < height >> 1; h++) { for (int w = 0; w < width >> 1; w++) { frame->rec->u[h * stride + w] = alf_tmp_u[h * stride + w]; frame->rec->v[h * stride + w] = alf_tmp_v[h * stride + w]; } } } }*/ //memcpy(&frame->rec->y, &alf_tmp_y, sizeof(frame->rec->y)); //memcpy(&frame->rec->u, &alf_tmp_u, sizeof(frame->rec->u)); //memcpy(&frame->rec->v, &alf_tmp_v, sizeof(frame->rec->v)); if (g_classifier) { FREE_POINTER(g_classifier[0]); FREE_POINTER(g_classifier); } g_created = false; if (g_cc_alf_filter_control[0]) { FREE_POINTER(g_cc_alf_filter_control[0]) } if (g_cc_alf_filter_control[1]) { FREE_POINTER(g_cc_alf_filter_control[1]) } } void kvz_alf_encoder(encoder_state_t * const state, alf_aps *aps, channel_type channel, const double lambda_chroma_weight // = 0.0 ) { //const TempCtx ctxStart(m_CtxCache, AlfCtx(m_CABACEstimator->getCtx())); cabac_data_t ctx_start; memcpy(&ctx_start, &cabac_estimator, sizeof(ctx_start)); //TempCtx ctxBest(m_CtxCache); cabac_data_t ctx_best; bool is_luma = channel == CHANNEL_TYPE_LUMA ? 1 : 0; kvz_config cfg = state->encoder_control->cfg; double cost_min = MAX_DOUBLE; double lambda = state->frame->lambda; g_bits_new_filter[channel] = 0; const int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; const int32_t num_ctus_in_pic = state->lcu_order_count; int ui_coeff_bits = 0; for (int i_shape_idx = 0; i_shape_idx < 1/*alfFilterShape.size()*/; i_shape_idx++) { //m_alfSliceParamTemp = alfSliceParam; copy_alf_param(&g_alf_aps_temp, aps); //1. get unfiltered distortion //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (!is_luma) g_alf_aps_temp.num_alternatives_chroma = 1; //#endif double cost = get_unfiltered_distortion_cov_channel(g_alf_covariance_frame[channel][i_shape_idx], channel); cost /= 1.001; // slight preference for unfiltered choice if (cost < cost_min) { cost_min = cost; if (is_luma) { aps->enabled_flag[COMPONENT_Y] = 0; } else { aps->enabled_flag[COMPONENT_Cb] = 0; aps->enabled_flag[COMPONENT_Cr] = 0; } // no CABAC signalling //ctxBest = AlfCtx(ctxStart); memcpy(&ctx_best, &ctx_start, sizeof(ctx_best)); //setCtuEnableFlag(m_ctuEnableFlagTmp, channel, 0); #if !FULL_FRAME set_ctu_enable_flag(g_ctu_enable_flag_tmp, channel, ctu_idx, 0); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (!is_luma) { for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_ctu_alternative_tmp[COMPONENT_Cb][ctu_idx] = 0; g_ctu_alternative_tmp[COMPONENT_Cr][ctu_idx] = 0; } } //#endif } const int non_linear_flag_max = //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB (is_luma ? cfg.alf_non_linear_luma : 0) // For Chroma non linear flag is check for each alternative filter /*#else (isLuma(channel) ? m_encCfg->getUseNonLinearAlfLuma() : m_encCfg->getUseNonLinearAlfChroma()) #endif*/ ? 2 : 1; for (int non_linear_flag = 0; non_linear_flag < non_linear_flag_max; non_linear_flag++) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB for (int num_alternatives = is_luma ? 1 : MIN(g_num_ctus_in_pic * 2, MAX_NUM_ALF_ALTERNATIVES_CHROMA); num_alternatives > 0; num_alternatives--) { if (!is_luma) { g_alf_aps_temp.num_alternatives_chroma = num_alternatives; //#endif //2. all CTUs are on if (is_luma) { g_alf_aps_temp.enabled_flag[COMPONENT_Y] = 1; } else { g_alf_aps_temp.enabled_flag[COMPONENT_Cb] = 1; g_alf_aps_temp.enabled_flag[COMPONENT_Cr] = 1; } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (is_luma) g_alf_aps_temp.non_linear_flag[channel][0] = non_linear_flag; /*#else m_alfParamTemp.nonLinearFlag[channel] = nonLinearFlag; #endif*/ //m_CABACEstimator->getCtx() = AlfCtx(ctxStart); memcpy(&cabac_estimator, &ctx_start, sizeof(cabac_estimator)); //setCtuEnableFlag(m_ctuEnableFlag, channel, 1); set_ctu_enable_flag(g_ctu_enable_flag, channel, 1, num_ctus_in_pic); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB // all alternatives are on if (!is_luma) { init_ctu_alternative_chroma(g_ctu_alternative, num_ctus_in_pic); } cost = kvz_alf_get_filter_coeff_and_cost(state, channel, 0, &ui_coeff_bits, i_shape_idx, true, false); /*#else cost = kvz_alf_get_filter_coeff_and_cost(state, channel, 0, &ui_coeff_bits, i_shape_idx, non_linear_flag != 0, false); #endif*/ if (cost < cost_min) { g_bits_new_filter[channel] = ui_coeff_bits; cost_min = cost; copy_alf_param_w_channel(aps, &g_alf_aps_temp, channel); //ctxBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_best, &cabac_estimator, sizeof(ctx_best)); //setCtuEnableFlag(m_ctuEnableFlagTmp, channel, 1); set_ctu_enable_flag(g_ctu_enable_flag_tmp, channel, 1, num_ctus_in_pic); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (!is_luma) { memcpy(g_ctu_alternative_tmp[COMPONENT_Cb], g_ctu_alternative[COMPONENT_Cb], sizeof(uint8_t) * num_ctus_in_pic); memcpy(g_ctu_alternative_tmp[COMPONENT_Cr], g_ctu_alternative[COMPONENT_Cr], sizeof(uint8_t) * num_ctus_in_pic); } //#endif } //3. CTU decision double dist_unfilter = 0; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB double prev_it_cost = MAX_DOUBLE; //#endif //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const int iter_num = is_luma ? (2 * 4 + 1) : (2 * (2 + g_alf_aps_temp.num_alternatives_chroma - 1) + 1); /*#else const int iterNum = isLuma(channel) ? (2 * 4 + 1) : (2 * 2 + 1); #endif*/ for (int iter = 0; iter < iter_num; iter++) { if ((iter & 0x01) == 0) { //m_CABACEstimator->getCtx() = AlfCtx(ctxStart); memcpy(&cabac_estimator, &ctx_start, sizeof(cabac_estimator)); cost = lambda * ui_coeff_bits; cost += kvz_alf_derive_ctb_alf_enable_flags(state, channel, i_shape_idx, &dist_unfilter, num_classes, lambda_chroma_weight); if (cost < cost_min) { g_bits_new_filter[channel] = ui_coeff_bits; cost_min = cost; //ctxBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_best, &cabac_estimator, sizeof(ctx_best)); //copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, channel); copy_ctu_enable_flag(g_ctu_enable_flag_tmp, g_ctu_enable_flag, channel, num_ctus_in_pic); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (!is_luma) { for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_ctu_alternative_tmp[COMPONENT_Cb][ctu_idx] = g_ctu_alternative[COMPONENT_Cb][ctu_idx]; g_ctu_alternative_tmp[COMPONENT_Cr][ctu_idx] = g_ctu_alternative[COMPONENT_Cr][ctu_idx]; } } //#endif copy_alf_param_w_channel(aps, &g_alf_aps_temp, channel); } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB else if (cost >= prev_it_cost) { // High probability that we have converged or we are diverging break; } prev_it_cost = cost; //#endif } else { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB // no need to reset CABAC here, since uiCoeffBits is not affected /*cost = */kvz_alf_get_filter_coeff_and_cost(state, channel, dist_unfilter, &ui_coeff_bits, i_shape_idx, true, false); /*#else cost = kvz_alf_get_filter_coeff_and_cost(state, channel, dist_unfilter, &ui_coeff_bits, i_shape_idx, true, false); #endif*/ } }//for iter // Decrease number of alternatives and reset ctu params and filters //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB } //#endif }//for non_linea_flag }//for shape_idx //m_CABACEstimator->getCtx() = AlfCtx(ctxBest); memcpy(&cabac_estimator, &ctx_best, sizeof(cabac_estimator)); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (!is_luma) { memcpy(g_ctu_alternative[COMPONENT_Cb], g_ctu_alternative_tmp[COMPONENT_Cb], sizeof(uint8_t) * num_ctus_in_pic); memcpy(g_ctu_alternative[COMPONENT_Cr], g_ctu_alternative_tmp[COMPONENT_Cr], sizeof(uint8_t) * num_ctus_in_pic); } copy_ctu_enable_flag(g_ctu_enable_flag, g_ctu_enable_flag_tmp, channel, num_ctus_in_pic); //#endif } #if !FULL_FRAME void kvz_alf_get_avai_aps_ids_luma(encoder_state_t *const state, int *new_aps_id, int *aps_ids, int *size_of_aps_ids) { param_set_map *aps_set = state->encoder_control->cfg.param_set_map; for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { state->slice->apss[i] = aps_set[i + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set; } //std::vector result; int aps_id_checked = 0, cur_aps_id = g_aps_id_start; if (cur_aps_id < ALF_CTB_MAX_NUM_APS) { while (aps_id_checked < ALF_CTB_MAX_NUM_APS && (state->frame->slicetype == KVZ_SLICE_I) && *size_of_aps_ids < ALF_CTB_MAX_NUM_APS /*&& /*!cs.slice->getPendingRasInit()*/ && (state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_IDR_N_LP)) { alf_aps *cur_aps = &state->slice->apss[cur_aps_id]; bool aps_found = aps_set[cur_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed; if (aps_found/*cur_aps*/ && cur_aps->t_layer/*cur_aps->getTemporalId()*/ <= state->slice->id/*cs.slice->getTLayer()*/ && cur_aps->new_filter_flag[CHANNEL_TYPE_LUMA]) { //result.push_back(cur_aps_id); bool add_aps = true; for (int aps_idx = 0; aps_idx < (*size_of_aps_ids); aps_idx++) { if (aps_ids[aps_idx] == cur_aps_id) { add_aps = false; continue; } } if (add_aps) { aps_ids[*size_of_aps_ids] = cur_aps_id; (*size_of_aps_ids)++; } } aps_id_checked++; cur_aps_id = (cur_aps_id + 1) % ALF_CTB_MAX_NUM_APS; } } state->slice->tile_group_num_aps = *size_of_aps_ids; for (int i = 0; i < state->slice->tile_group_num_aps; i++) { state->slice->tile_group_luma_aps_id[i] = aps_ids[i]; } //*new_aps_id = g_aps_id_start - 1; *new_aps_id = ALF_CTB_MAX_NUM_APS - *size_of_aps_ids - 1; if (*new_aps_id < 0) { *new_aps_id = (int)ALF_CTB_MAX_NUM_APS - 1; } assert(*new_aps_id < (int)MAX_NUM_APS); //Wrong APS index assignment in getAvaiApsIdsLuma } #else void kvz_alf_get_avai_aps_ids_luma(encoder_state_t * const state, int *new_aps_id, int *aps_ids, int *size_of_aps_ids) { //alf_aps *apss = state->slice->apss; for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { copy_aps(&state->slice->apss[i], &state->encoder_control->cfg.param_set_map[i + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set); } //std::vector result; int aps_id_checked = 0, cur_aps_id = g_aps_id_start; if (cur_aps_id < ALF_CTB_MAX_NUM_APS) { while ((aps_id_checked < ALF_CTB_MAX_NUM_APS) && state->frame->slicetype != KVZ_SLICE_I && *size_of_aps_ids < ALF_CTB_MAX_NUM_APS /*&& /*!cs.slice->getPendingRasInit()*/ && !(state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_IDR_N_LP)) { alf_aps *cur_aps = &state->slice->apss[cur_aps_id]; bool aps_found = (0 <= cur_aps->aps_id && cur_aps->aps_id < ALF_CTB_MAX_NUM_APS); if (aps_found/*cur_aps*/ && cur_aps->layer_id == 0/*cs.slice->getPic()->layerId*/ && cur_aps->temporal_id <= state->slice->id /*cs.slice->getTLayer(*/ && cur_aps->new_filter_flag[CHANNEL_TYPE_LUMA]) { for (int id = 0; id < ALF_CTB_MAX_NUM_APS; id++) { if (aps_ids[id] == -1) { aps_ids[id] = cur_aps_id; (*size_of_aps_ids)++; break; } } } aps_id_checked++; cur_aps_id = (cur_aps_id + 1) % ALF_CTB_MAX_NUM_APS; } } state->slice->tile_group_num_aps = *size_of_aps_ids; for (int i = 0; i < state->slice->tile_group_num_aps; i++) { state->slice->tile_group_luma_aps_id[i] = aps_ids[i]; } //*new_aps_id = g_aps_id_start - 1; *new_aps_id = g_aps_id_start - 1; if (*new_aps_id < 0) { *new_aps_id = (int)ALF_CTB_MAX_NUM_APS - 1; } assert(*new_aps_id < (int)ALF_CTB_MAX_NUM_APS); //Wrong APS index assignment in getAvaiApsIdsLuma } void kvz_alf_derive_stats_for_filtering(encoder_state_t * const state) { enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; bool chroma_scale_x = (chroma_fmt == KVZ_CSP_444) ? 0 : 1; bool chroma_scale_y = (chroma_fmt != KVZ_CSP_420) ? 0 : 1; const int32_t num_ctus_in_pic = state->lcu_order_count; const int alf_vb_luma_ctu_height = LCU_WIDTH; const int alf_vb_chma_ctu_height = (LCU_WIDTH >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)); const int alf_vb_luma_pos = LCU_WIDTH - ALF_VB_POS_ABOVE_CTUROW_LUMA; const int alf_vb_chma_pos = (LCU_WIDTH >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA; int32_t pic_width = state->tile->frame->width; int32_t pic_height = state->tile->frame->height; int ctu_rs_addr = 0; const int number_of_components = (chroma_fmt == KVZ_CSP_400) ? 1 : MAX_NUM_COMPONENT; // init CTU stats buffers for (int comp_idx = 0; comp_idx < number_of_components; comp_idx++) { bool is_luma = comp_idx == 0 ? 1 : 0; const int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; for (int shape = 0; shape != 1 /*m_filterShapes[toChannelType(comp_id)].size()*/; shape++) { for (int class_idx = 0; class_idx < num_classes; class_idx++) { for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { reset_alf_covariance(&g_alf_covariance[comp_idx][shape][ctu_idx][class_idx], MAX_ALF_NUM_CLIPPING_VALUES); } } } } //kerran jossain muualla (kai?) // init Frame stats buffers const int number_of_channels = (chroma_fmt == KVZ_CSP_400) ? 1 : MAX_NUM_CHANNEL_TYPE; for (int channel_idx = 0; channel_idx < number_of_channels; channel_idx++) { const channel_type channel_id = channel_idx; const int num_classes = channel_id == CHANNEL_TYPE_LUMA ? MAX_NUM_ALF_CLASSES : 1; for (int shape = 0; shape != 1/*m_filterShapes[channel_idx].size()*/; shape++) { for (int class_idx = 0; class_idx < num_classes; class_idx++) { reset_alf_covariance(&g_alf_covariance_frame[channel_idx][shape][class_idx], MAX_ALF_NUM_CLIPPING_VALUES); } } } int max_cu_height = LCU_WIDTH; #if FULL_FRAME for (int y_pos = 0; y_pos < pic_height; y_pos += LCU_WIDTH) { for (int x_pos = 0; x_pos < pic_width; x_pos += LCU_WIDTH) { const int width = (x_pos + LCU_WIDTH > pic_width) ? (pic_width - x_pos) : LCU_WIDTH; const int height = (y_pos + LCU_WIDTH > pic_height) ? (pic_height - y_pos) : LCU_WIDTH; { for (int comp_idx = 0; comp_idx < number_of_components; comp_idx++) { const bool is_luma = comp_idx == COMPONENT_Y ? 1 : 0; channel_type ch_type = is_luma ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA; int blk_w = is_luma ? width : width >> chroma_scale_x; int blk_h = is_luma ? height : height >> chroma_scale_y; int pos_x = is_luma ? x_pos : x_pos >> chroma_scale_x; int pos_y = is_luma ? y_pos : y_pos >> chroma_scale_y; int32_t org_stride = is_luma ? state->tile->frame->source->stride : state->tile->frame->source->stride >> chroma_scale_x; int32_t rec_stride = is_luma ? state->tile->frame->rec->stride : state->tile->frame->rec->stride >> chroma_scale_x; kvz_pixel *org = comp_idx ? (comp_idx - 1 ? &state->tile->frame->source->v[pos_x + pos_y * org_stride] : &state->tile->frame->source->u[pos_x + pos_y * org_stride]) : &state->tile->frame->source->y[pos_x + pos_y * org_stride]; kvz_pixel *rec = comp_idx ? (comp_idx - 1 ? &state->tile->frame->rec->v[pos_x + pos_y * rec_stride] : &state->tile->frame->rec->u[pos_x + pos_y * rec_stride]) : &state->tile->frame->rec->y[pos_x + pos_y * rec_stride]; for (int shape = 0; shape != 1/*m_filterShapes[ch_type].size()*/; shape++) { kvz_alf_get_blk_stats(state, lcu, ch_type, &g_alf_covariance[comp_idx][shape][lcu->index], comp_idx ? NULL : g_classifier, org, org_stride, rec, rec_stride, pos_x, pos_y, pos_x, pos_y, blk_w, blk_h , (is_luma ? g_alf_vb_luma_ctu_height : g_alf_vb_chma_ctu_height) , ((y_pos + max_cu_height >= pic_height) ? pic_height : ((is_luma) ? g_alf_vb_luma_pos : g_alf_vb_chma_pos))); const int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; for (int class_idx = 0; class_idx < num_classes; class_idx++) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB add_alf_cov(&g_alf_covariance_frame[ch_type][shape][is_luma ? class_idx : 0], &g_alf_covariance[comp_idx][shape][lcu->index][class_idx]); /*#else add_alf_cov(&g_alf_covariance_frame[ch_type][shape][class_idx], &g_alf_covariance[comp_idx][shape][lcu->index][class_idx]); #endif*/ } } } } ctu_rs_addr++; } } } void kvz_alf_get_blk_stats(encoder_state_t * const state, channel_type channel, alf_covariance **alf_covariance, alf_classifier **g_classifier, kvz_pixel *org, int32_t org_stride, kvz_pixel *rec, int32_t rec_stride, const int x_pos, const int y_pos, const int x_dst, const int y_dst, const int width, const int height, int vb_ctu_height, int vb_pos) { int16_t e_local[MAX_NUM_ALF_LUMA_COEFF][MAX_ALF_NUM_CLIPPING_VALUES]; const int num_bins = MAX_ALF_NUM_CLIPPING_VALUES; int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; int transpose_idx = 0; int class_idx = 0; for (int i = 0; i < height; i++) { int vb_distance = ((y_dst + i) % vb_ctu_height) - vb_pos; for (int j = 0; j < width; j++) { if (g_classifier && g_classifier[y_dst + i][x_dst + j].class_idx == ALF_UNUSED_CLASS_IDX && g_classifier[y_dst + i][x_dst + j].transpose_idx == ALF_UNUSED_TRANSPOSE_IDX) { continue; } memset(e_local, 0, sizeof(e_local)); if (g_classifier) { alf_classifier* cl = &g_classifier[y_dst + i][x_dst + j]; transpose_idx = cl->transpose_idx; class_idx = cl->class_idx; } double weight = 1.0; if (0/*m_alfWSSD*/) { //weight = g_luma_level_to_weight_plut[org[j]]; } int16_t y_local = org[j] - rec[j]; kvz_alf_calc_covariance(e_local, rec + j, rec_stride, channel, transpose_idx, vb_distance); for (int k = 0; k < num_coeff; k++) { for (int l = k; l < num_coeff; l++) { for (int b0 = 0; b0 < num_bins; b0++) { for (int b1 = 0; b1 < num_bins; b1++) { if (0/*m_alfWSSD*/) { (*alf_covariance)[class_idx].ee[b0][b1][k][l] += weight * (e_local[k][b0] * (double)e_local[l][b1]); } else { (*alf_covariance)[class_idx].ee[b0][b1][k][l] += e_local[k][b0] * (double)e_local[l][b1]; } } } } for (int b = 0; b < num_bins; b++) { if (0/*m_alfWSSD*/) { (*alf_covariance)[class_idx].y[b][k] += weight * (e_local[k][b] * (double)y_local); } else { (*alf_covariance)[class_idx].y[b][k] += e_local[k][b] * (double)y_local; } } } if (0/*m_alfWSSD*/) { (*alf_covariance)[class_idx].pix_acc += weight * (y_local * (double)y_local); } else { (*alf_covariance)[class_idx].pix_acc += y_local * (double)y_local; } } org += org_stride; rec += rec_stride; } int num_classes = g_classifier ? MAX_NUM_ALF_CLASSES : 1; for (class_idx = 0; class_idx < num_classes; class_idx++) { for (int k = 1; k < num_coeff; k++) { for (int l = 0; l < k; l++) { for (int b0 = 0; b0 < num_bins; b0++) { for (int b1 = 0; b1 < num_bins; b1++) { (*alf_covariance)[class_idx].ee[b0][b1][k][l] = (*alf_covariance)[class_idx].ee[b1][b0][l][k]; } } } } } } void kvz_alf_calc_covariance(int16_t e_local[MAX_NUM_ALF_LUMA_COEFF][MAX_ALF_NUM_CLIPPING_VALUES], const kvz_pixel *rec, const int stride, const channel_type channel, const int transpose_idx, int vb_distance) { int clip_top_row = -4; int clip_bot_row = 4; if (vb_distance >= -3 && vb_distance < 0) { clip_bot_row = -vb_distance - 1; clip_top_row = -clip_bot_row; // symmetric } else if (vb_distance >= 0 && vb_distance < 3) { clip_top_row = -vb_distance; clip_bot_row = -clip_top_row; // symmetric } const bool is_luma = channel == CHANNEL_TYPE_LUMA; const int *filter_pattern = is_luma ? alf_pattern_7 : alf_pattern_5; const int half_filter_length = (is_luma ? 7 : 5) >> 1; const short* clip = g_alf_clipping_values[channel]; const int num_bins = MAX_ALF_NUM_CLIPPING_VALUES; int k = 0; const int16_t curr = rec[0]; if (transpose_idx == 0) { for (int i = -half_filter_length; i < 0; i++) { const kvz_pixel* rec0 = rec + MAX(i, clip_top_row) * stride; const kvz_pixel* rec1 = rec - MAX(i, -clip_bot_row) * stride; for (int j = -half_filter_length - i; j <= half_filter_length + i; j++, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec0[j], rec1[-j]); } } } for (int j = -half_filter_length; j < 0; j++, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec[j], rec[-j]); } } } else if (transpose_idx == 1) { for (int j = -half_filter_length; j < 0; j++) { const kvz_pixel* rec0 = rec + j; const kvz_pixel* rec1 = rec - j; for (int i = -half_filter_length - j; i <= half_filter_length + j; i++, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec0[MAX(i, clip_top_row) * stride], rec1[-MAX(i, -clip_bot_row) * stride]); } } } for (int i = -half_filter_length; i < 0; i++, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec[MAX(i, clip_top_row) * stride], rec[-MAX(i, -clip_bot_row) * stride]); } } } else if (transpose_idx == 2) { for (int i = -half_filter_length; i < 0; i++) { const kvz_pixel* rec0 = rec + MAX(i, clip_top_row) * stride; const kvz_pixel* rec1 = rec - MAX(i, -clip_bot_row) * stride; for (int j = half_filter_length + i; j >= -half_filter_length - i; j--, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec0[j], rec1[-j]); } } } for (int j = -half_filter_length; j < 0; j++, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec[j], rec[-j]); } } } else { for (int j = -half_filter_length; j < 0; j++) { const kvz_pixel* rec0 = rec + j; const kvz_pixel* rec1 = rec - j; for (int i = half_filter_length + j; i >= -half_filter_length - j; i--, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec0[MAX(i, clip_top_row) * stride], rec1[-MAX(i, -clip_bot_row) * stride]); } } } for (int i = -half_filter_length; i < 0; i++, k++) { for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += clip_alf(clip[b], curr, rec[MAX(i, clip_top_row) * stride], rec[-MAX(i, -clip_bot_row) * stride]); } } } for (int b = 0; b < num_bins; b++) { e_local[filter_pattern[k]][b] += curr; } } double kvz_alf_get_filter_coeff_and_cost(encoder_state_t * const state, channel_type channel, double dist_unfilter, int *ui_coeff_bits, int i_shape_idx, bool b_re_collect_stat, bool only_filter_cost) { bool is_luma = channel == CHANNEL_TYPE_LUMA ? 1 : 0; const int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; double lambda = state->frame->lambda; int clip_merged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF]; const int8_t bit_depth = state->encoder_control->bitdepth; const int32_t num_ctus_in_pic = state->lcu_order_count; //collect stat based on CTU decision if (b_re_collect_stat) { get_frame_stats(channel, i_shape_idx, num_ctus_in_pic); } double dist = dist_unfilter; (*ui_coeff_bits) = 0; /*#if !JVET_O0491_HLS_CLEANUP int ui_slice_flag = 0;*/ //AlfFilterShape& alfFilterShape = m_alfSliceParamTemp.filterShapes[channel][iShapeIdx]; //get filter coeff if (is_luma) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //Tarvitaanko tätä alustusta ollenkaan? const int fill_val = g_alf_aps_temp.non_linear_flag[channel][0] ? g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] / 2 : 0; for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { for (int j = 0; j < MAX_NUM_ALF_CLASSES; j++) { for (int k = 0; k < MAX_NUM_ALF_LUMA_COEFF; k++) { clip_merged[i_shape_idx][i][j][k] = fill_val; } } } /*#else std::fill_n(m_alfClipMerged[iShapeIdx][0][0], MAX_NUM_ALF_LUMA_COEFF*MAX_NUM_ALF_CLASSES*MAX_NUM_ALF_CLASSES, m_alfParamTemp.nonLinearFlag[channel] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0); #endif*/ // Reset Merge Tmp Cov reset_alf_covariance(&g_alf_covariance_merged[i_shape_idx][MAX_NUM_ALF_CLASSES], MAX_ALF_NUM_CLIPPING_VALUES); reset_alf_covariance(&g_alf_covariance_merged[i_shape_idx][MAX_NUM_ALF_CLASSES + 1], MAX_ALF_NUM_CLIPPING_VALUES); //distortion dist += kvz_alf_merge_filters_and_cost(state, &g_alf_aps_temp, channel, ui_coeff_bits, g_alf_covariance_frame[channel][i_shape_idx], g_alf_covariance_merged[i_shape_idx], clip_merged[i_shape_idx]); } else { //distortion /*#if !JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB assert(num_coeff == g_alf_covariance_frame[channel][i_shape_idx][0].num_coeff); //std::fill_n(m_filterClippSet[0], MAX_NUM_ALF_CHROMA_COEFF, m_alfParamTemp.non_linear_flag[channel] ? AlfNumClippingValues[CHANNEL_TYPE_CHROMA] / 2 : 0); const int fill_val = g_alf_aps_temp.non_linear_flag[channel] ? g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] / 2 : 0; for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { g_filter_clipp_set[0][i] = fill_val; } dist += g_alf_covariance_frame[channel][i_shape_idx][0].pix_acc + kvz_alf_derive_coeff_quant(channel, g_filter_clipp_set[0], g_filter_coeff_set[0], &g_alf_covariance_frame[channel][i_shape_idx][0], ALF_NUM_BITS, g_alf_aps_temp.non_linear_flag[channel]); #endif*/ /*#if !JVET_O0491_HLS_CLEANUP //setEnableFlag( m_alfSliceParamTemp, channel, m_ctuEnableFlag ); const int alf_chroma_idc = g_alf_aps_temp.enabled_flag[COMPONENT_Cb] * 2 + g_alf_aps_temp.enabled_flag[COMPONENT_Cr]; #endif*/ for (int alt_idx = 0; alt_idx < g_alf_aps_temp.num_alternatives_chroma; ++alt_idx) { assert(num_coeff == g_alf_covariance_frame[channel][i_shape_idx][alt_idx].num_coeff); alf_aps best_slice_param; double best_cost = MAX_DOUBLE; double best_dist = MAX_DOUBLE; int best_coeff_bits = 0; const int non_linear_flag_max = state->encoder_control->cfg.alf_non_linear_chroma ? 2 : 1; for (int non_linear_flag = 0; non_linear_flag < non_linear_flag_max; non_linear_flag++) { int current_non_linear_flag = g_alf_aps_temp.non_linear_flag[channel] ? 1 : 0; if (non_linear_flag != current_non_linear_flag) { continue; } int fill_val = non_linear_flag ? MAX_ALF_NUM_CLIPPING_VALUES / 2 : 0; for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { g_filter_clipp_set[alt_idx][i] = fill_val; } double dist = g_alf_covariance_frame[channel][i_shape_idx][alt_idx].pix_acc + kvz_alf_derive_coeff_quant(channel, g_filter_clipp_set[alt_idx], g_filter_coeff_set[alt_idx], &g_alf_covariance_frame[channel][i_shape_idx][alt_idx], bit_depth, non_linear_flag); for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { g_alf_aps_temp.chroma_coeff[alt_idx][i] = g_filter_coeff_set[alt_idx][i]; g_alf_aps_temp.chroma_clipp[alt_idx][i] = g_filter_clipp_set[alt_idx][i]; } int coeff_bits = get_chroma_coeff_rate(&g_alf_aps_temp, alt_idx); double cost = dist + lambda * coeff_bits; if (cost < best_cost) { best_cost = cost; best_dist = dist; best_coeff_bits = coeff_bits; copy_alf_param(&best_slice_param, &g_alf_aps_temp); } } *ui_coeff_bits += best_coeff_bits; dist += best_dist; copy_alf_param(&g_alf_aps_temp, &best_slice_param); } (*ui_coeff_bits) += length_uvlc(g_alf_aps_temp.num_alternatives_chroma - 1); (*ui_coeff_bits)++; /*#if !JVET_O0491_HLS_CLEANUP uiSliceFlag = lengthTruncatedUnary(alfChromaIdc, 3) - lengthTruncatedUnary(0, 3); // rate already put on Luma #endif*/ /*#else for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { g_alf_aps_temp.chroma_coeff[i] = g_filter_coeff_set[0][i]; g_alf_aps_temp.chroma_clipp[i] = g_filter_clipp_set[0][i]; } #endif*/ } if (only_filter_cost) { return dist + lambda * (*ui_coeff_bits); } /*#if !JVET_O0491_HLS_CLEANUP double rate = *ui_coeff_bits + ui_slice_flag;*/ double rate = *ui_coeff_bits; //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); //m_CABACEstimator->codeAlfCtuEnableFlags(cs, channel, &m_alfParamTemp); code_alf_ctu_enable_flags_channel(state, &cabac_estimator, channel, &g_alf_aps_temp); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //for (int ctu_idx = 0; ctu_idx < g_num_ctus_in_pic; ctu_idx++) { if (is_luma) { // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation assert(g_alf_ctb_filter_index[ctu_idx] == ALF_NUM_FIXED_FILTER_SETS); assert(state->slice->tile_group_num_aps == 1); //m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctu_idx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]); code_alf_ctu_filter_index(state, &cabac_estimator, ctu_idx, g_alf_aps_temp.enabled_flag[COMPONENT_Y]); } } //m_CABACEstimator->codeAlfCtuAlternatives(cs, channel, &m_alfParamTemp); code_alf_ctu_alternatives_channel(state, &cabac_estimator, channel, &g_alf_aps_temp); //#endif rate += (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale * 0;/*(double)m_CABACEstimator->getEstFracBits();*/ return dist + lambda * rate; } int kvz_alf_derive_filter_coefficients_prediction_mode(channel_type channel, int **filter_set, const int num_filters) { return (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] ? get_cost_filter_clipp(channel, filter_set, num_filters) : 0) + get_cost_filter_coeff(channel, filter_set, num_filters); /* #else int kvz_alf_derive_filter_coefficients_prediction_mode(channel_type channel, int **filter_set, int** filter_coeff_diff, const int num_filters) { int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; int rate_pred_mode0 = get_cost_filter_coeff(channel, filter_set, num_filters); for (int ind = 0; ind < num_filters; ++ind) { if (ind == 0) { memcpy(filter_coeff_diff[ind], filter_set[ind], sizeof(int) * num_coeff); } else { for (int i = 0; i < num_coeff; i++) { filter_coeff_diff[ind][i] = filter_set[ind][i] - filter_set[ind - 1][i]; } } } int rate_pred_mode1 = get_cost_filter_coeff(channel, filter_coeff_diff, num_filters); *pred_mode = (rate_pred_mode1 < rate_pred_mode0 && num_filters > 1) ? 1 : 0; return (num_filters > 1 ? 1 : 0) // coeff_delta_pred_mode_flag + (pred_mode ? rate_pred_mode1 : rate_pred_mode0); // min_golomb_order, golomb_order_increase_flag, alf_coeff_luma_delta */ } void kvz_alf_merge_classes(channel_type channel, alf_covariance* cov, alf_covariance* cov_merged, int clip_merged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int num_classes, short filter_indices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES]) { int tmp_clip[MAX_NUM_ALF_LUMA_COEFF]; int best_merge_clip[MAX_NUM_ALF_LUMA_COEFF]; double err[MAX_NUM_ALF_CLASSES]; double best_merge_err = MAX_DOUBLE; bool available_class[MAX_NUM_ALF_CLASSES]; int8_t index_list[MAX_NUM_ALF_CLASSES]; int8_t index_list_temp[MAX_NUM_ALF_CLASSES]; int num_remaining = num_classes; memset(filter_indices, 0, sizeof(short) * MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_CLASSES); for (int i = 0; i < num_classes; i++) { filter_indices[num_remaining - 1][i] = i; index_list[i] = i; available_class[i] = true; //cov_merged[i] = cov[i]; copy_cov(&cov_merged[i], &cov[i]); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB cov_merged[i].num_bins = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA][0] ? g_alf_num_clipping_values[COMPONENT_Y] : 1; /*#else cov_merged[i].num_bins = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] ? g_alf_num_clipping_values[COMPONENT_Y] : 1; #endif*/ } // Try merging different covariance matrices // temporal AlfCovariance structure is allocated as the last element in covMerged array, the size of covMerged is MAX_NUM_ALF_CLASSES + 1 alf_covariance* tmp_cov = &cov_merged[MAX_NUM_ALF_CLASSES]; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB tmp_cov->num_bins = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA][0] ? g_alf_num_clipping_values[COMPONENT_Y] : 1; /*#else tmp_cov->num_bins = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] ? g_alf_num_clipping_values[COMPONENT_Y] : 1; #endif*/ // init Clip for (int i = 0; i < num_classes; i++) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB for (int val = 0; val < MAX_NUM_ALF_LUMA_COEFF; val++) { clip_merged[num_remaining - 1][i][val] = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] ? MAX_ALF_NUM_CLIPPING_VALUES / 2 : 0; } if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA]) /*#else for (int val = 0; val < MAX_NUM_ALF_LUMA_COEFF; val++) { clip_merged[num_remaining - 1][i][val] = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] ? g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] / 2 : 0; } if (g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA]) #endif*/ { err[i] = optimize_filter_clip(&cov_merged[i], clip_merged[num_remaining - 1][i]); } else { err[i] = calculate_error_opt_filt(&cov_merged[i], clip_merged[num_remaining - 1][i]); } } while (num_remaining > 2) { double error_min = MAX_DOUBLE; //std::numeric_limits::max(); int best_to_merge_idx1 = 0, best_to_merge_idx2 = 1; for (int i = 0; i < num_classes - 1; i++) { if (available_class[i]) { for (int j = i + 1; j < num_classes; j++) { if (available_class[j]) { double error1 = err[i]; double error2 = err[j]; add_alf_cov_lhs_rhs(tmp_cov, &cov_merged[i], &cov_merged[j]); for (int l = 0; l < MAX_NUM_ALF_LUMA_COEFF; ++l) { tmp_clip[l] = (clip_merged[num_remaining - 1][i][l] + clip_merged[num_remaining - 1][j][l] + 1) >> 1; } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB double error_merged = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA][0] ? optimize_filter_clip(tmp_cov, tmp_clip) : calculate_error_opt_filt(tmp_cov, tmp_clip); /*#else double error_merged = g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] ? optimize_filter_clip(tmp_cov, tmp_clip) : calculate_error_opt_filt(tmp_cov, tmp_clip); #endif*/ double error = error_merged - error1 - error2; if (error < error_min) { best_merge_err = error_merged; memcpy(best_merge_clip, tmp_clip, sizeof(best_merge_clip)); error_min = error; best_to_merge_idx1 = i; best_to_merge_idx2 = j; } } } } } add_alf_cov(&cov_merged[best_to_merge_idx1], &cov_merged[best_to_merge_idx2]); memcpy(clip_merged[num_remaining - 2], clip_merged[num_remaining - 1], sizeof(int[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF])); memcpy(clip_merged[num_remaining - 2][best_to_merge_idx1], best_merge_clip, sizeof(best_merge_clip)); err[best_to_merge_idx1] = best_merge_err; available_class[best_to_merge_idx2] = false; for (int i = 0; i < num_classes; i++) { if (index_list[i] == best_to_merge_idx2) { index_list[i] = best_to_merge_idx1; } } num_remaining--; if (num_remaining <= num_classes) { memcpy(index_list_temp, index_list, sizeof(int8_t) * num_classes); bool exist = false; int ind = 0; for (int j = 0; j < num_classes; j++) { exist = false; for (int i = 0; i < num_classes; i++) { if (index_list_temp[i] == j) { exist = true; break; } } if (exist) { for (int i = 0; i < num_classes; i++) { if (index_list_temp[i] == j) { filter_indices[num_remaining - 1][i] = ind; index_list_temp[i] = -1; } } ind++; } } } } } double kvz_alf_merge_filters_and_cost(encoder_state_t * const state, alf_aps *alf_aps, channel_type channel, int *ui_coeff_bits, alf_covariance *cov_frame, alf_covariance *cov_merged, int clip_merged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF]) { const int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; int num_filters_best = 0; int num_filters = MAX_NUM_ALF_CLASSES; bool coded_var_bins[MAX_NUM_ALF_CLASSES]; double error_force_0_coeff_tab[MAX_NUM_ALF_CLASSES][2]; double lambda = state->frame->lambda; const int8_t bit_depth = state->encoder_control->bitdepth; double cost, cost0, dist, dist_force0, cost_min = MAX_DOUBLE; int coeff_bits, coeff_bits_force0; //clip_merged:iä ei tarvitse nollata ennen kvz_alf_merge_classes(channel, cov_frame, cov_merged, clip_merged, MAX_NUM_ALF_CLASSES, g_filter_indices); while (num_filters >= 1) { dist = kvz_alf_derive_filter_coeffs(alf_aps, channel, cov_frame, cov_merged, g_filter_indices[num_filters-1], num_filters, error_force_0_coeff_tab, clip_merged, bit_depth); // filter coeffs are stored in m_filterCoeffSet dist_force0 = get_dist_force_0(channel, num_filters, error_force_0_coeff_tab, coded_var_bins, lambda); coeff_bits = kvz_alf_derive_filter_coefficients_prediction_mode(channel, g_filter_coeff_set, num_filters); coeff_bits_force0 = get_cost_filter_coeff_force_0(channel, g_filter_coeff_set, num_filters, coded_var_bins); cost = dist + lambda * coeff_bits; cost0 = dist_force0 + lambda * coeff_bits_force0; if (cost0 < cost) { cost = cost0; } /* #if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (*fixed_filter_set_index > 0) { int len = 0; len += get_tb_length(*fixed_filter_set_index - 1, ALF_NUM_FIXED_FILTER_SETS); len += 1; //fixed filter flag pattern if (*fixed_filter_pattern > 0) { len += MAX_NUM_ALF_CLASSES; //"fixed_filter_flag" for each class } cost += g_lambda[COMPONENT_Y] * len; }*/ if (cost <= cost_min) { cost_min = cost; num_filters_best = num_filters; //best_pred_mode = pred_mode; #if !JVET_O0669_REMOVE_ALF_COEFF_PRED } num_filters--; } dist = kvz_alf_derive_filter_coeffs(alf_aps, channel, cov_frame, cov_merged, g_filter_indices[num_filters_best - 1], num_filters_best, error_force_0_coeff_tab, clip_merged, bit_depth); coeff_bits = kvz_alf_derive_filter_coefficients_prediction_mode(channel, g_filter_coeff_set, num_filters_best); dist_force0 = get_dist_force_0(channel, num_filters_best, error_force_0_coeff_tab, coded_var_bins, lambda); coeff_bits_force0 = get_cost_filter_coeff_force_0(channel, g_filter_coeff_set, num_filters_best, coded_var_bins); cost = dist + lambda * coeff_bits; cost0 = dist_force0 + lambda * coeff_bits_force0; alf_aps->num_luma_filters = num_filters_best; double dist_return; if (cost <= cost0) { dist_return = dist; alf_aps->alf_luma_coeff_delta_flag = 0; *ui_coeff_bits = coeff_bits; //alf_aps->alf_luma_coeff_delta_prediction_flag = best_pred_mode; #if !JVET_O0669_REMOVE_ALF_COEFF_PRED } else { dist_return = dist_force0; alf_aps->alf_luma_coeff_delta_flag = 1; *ui_coeff_bits = coeff_bits_force0; memcpy(alf_aps->alf_luma_coeff_flag, coded_var_bins, sizeof(coded_var_bins)); //*alf_luma_coeff_delta_prediction_flag = 0; #if !JVET_O0669_REMOVE_ALF_COEFF_PRED for (int var_ind = 0; var_ind < num_filters_best; var_ind++) { if (coded_var_bins[var_ind] == 0) { memset(g_filter_coeff_set[var_ind], 0, sizeof(int) * MAX_NUM_ALF_LUMA_COEFF); memset(g_filter_clipp_set[var_ind], 0, sizeof(int) * MAX_NUM_ALF_LUMA_COEFF); } } } for (int ind = 0; ind < alf_aps->num_luma_filters; ++ind) { for (int i = 0; i < num_coeff; i++) { // #if JVET_O0669_REMOVE_ALF_COEFF_PRED alf_aps->luma_coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = g_filter_coeff_set[ind][i]; /* #else if (alf_aps->alf_luma_coeff_delta_prediction_flag) { alf_aps->luma_coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = g_diff_filter_coeff[ind][i]; } else { alf_aps->luma_coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = g_filter_coeff_set[ind][i]; }*/ alf_aps->luma_clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = g_filter_clipp_set[ind][i]; } } memcpy(alf_aps->filter_coeff_delta_idx, g_filter_indices[num_filters_best - 1], sizeof(short) * MAX_NUM_ALF_CLASSES); *ui_coeff_bits += get_non_filter_coeff_rate(alf_aps); return dist_return; } double kvz_alf_derive_filter_coeffs(alf_aps *aps, channel_type channel, alf_covariance *cov, alf_covariance *covMerged, short* filter_indices, int num_filters, double error_tab_force_0_coeff[MAX_NUM_ALF_CLASSES][2], int clip_merged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int bit_depth) { // #if !JVET_O0669_REMOVE_ALF_COEFF_PRED //int *fixed_filter_pattern = &aps->fixed_filter_pattern; //int *fixed_filter_idx = aps->fixed_filter_idx; //int *fixed_filter_set_index = &aps->fixed_filter_set_index; int num_coeff = channel == CHANNEL_TYPE_LUMA ? 13 : 7; double error = 0.0; alf_covariance *tmp_cov = &covMerged[MAX_NUM_ALF_CLASSES]; /* #if !JVET_O0669_REMOVE_ALF_COEFF_PRED *fixed_filter_set_index = 0; alf_covariance tmp_cov_ff = covMerged[MAX_NUM_ALF_CLASSES + 1]; double factor = 1 << (ALF_NUM_BITS - 1); double error_min = 0; double error_min_per_class[MAX_NUM_ALF_CLASSES] = { 0 }; double error_cur_set_per_class[MAX_NUM_ALF_CLASSES] = { 0 }; int fixed_filter_flag_per_class[MAX_NUM_ALF_CLASSES] = { 0 }; for (int filter_set_idx = 0; filter_set_idx < ALF_NUM_FIXED_FILTER_SETS; filter_set_idx++) { double error_cur = 0; for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) { int fixed_filter_idx = g_class_to_filter_mapping[filter_set_idx][class_idx]; error_cur_set_per_class[class_idx] = calc_error_for_coeffs(cov[class_idx].ee, cov[class_idx].y, g_fixed_filter_set_coeff[fixed_filter_idx], MAX_NUM_ALF_LUMA_COEFF, ALF_NUM_BITS); if (error_cur_set_per_class[class_idx] >= 0) { error_cur_set_per_class[class_idx] = 0; fixed_filter_flag_per_class[class_idx] = 0; } else { error_cur += error_cur_set_per_class[class_idx]; fixed_filter_flag_per_class[class_idx] = 1; } } if (error_cur < error_min) { memcpy(fixed_filter_idx, fixed_filter_flag_per_class, sizeof(fixed_filter_flag_per_class)); *fixed_filter_set_index = filter_set_idx + 1; error_min = error_cur; memcpy(error_min_per_class, error_cur_set_per_class, sizeof(error_min_per_class)); } } *fixed_filter_pattern = 0; if (*fixed_filter_set_index > 0) { for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) { if (fixed_filter_idx[class_idx] == 0) { *fixed_filter_pattern = 1; break; } } }*/ for( int filt_idx = 0; filt_idx < num_filters; filt_idx++ ) { reset_alf_covariance(tmp_cov, -1); bool found_clip = false; for( int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++ ) { if( filter_indices[class_idx] == filt_idx ) { //tmp_cov += cov[class_idx]; add_alf_cov(tmp_cov, &cov[class_idx]); /* #if !JVET_O0669_REMOVE_ALF_COEFF_PRED //adjust stat tmp_cov_ff = cov[class_idx]; if (*fixed_filter_set_index > 0 && fixed_filter_idx[class_idx] > 0) { int fixed_filter_idx = g_class_to_filter_mapping[*fixed_filter_set_index - 1][class_idx]; tmp_cov_ff.pix_acc += error_min_per_class[class_idx]; for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++) { double sum = 0; for (int j = 0; j < MAX_NUM_ALF_LUMA_COEFF; j++) { sum += tmp_cov_ff.ee[i][j] * g_fixed_filter_set_coeff[fixed_filter_idx][j]; } sum /= factor; tmp_cov_ff.y[i] -= sum; } } //tmp_cov += tmp_cov_ff; for (int j = 0; j < tmp_cov.num_coeff; j++) { for (int i = 0; i < tmp_cov.num_coeff; i++) { tmp_cov.ee[j][i] += tmp_cov_ff.ee[j][i]; } tmp_cov.y[j] += tmp_cov_ff.y[j]; } tmp_cov.pix_acc += tmp_cov_ff.pix_acc; */ if (!found_clip) { found_clip = true; // clip should be at the adress of shortest one memcpy(g_filter_clipp_set[filt_idx], clip_merged[num_filters - 1][class_idx], sizeof(int[MAX_NUM_ALF_LUMA_COEFF])); } } } // Find coeffcients assert(num_coeff == tmp_cov->num_coeff); error_tab_force_0_coeff[filt_idx][1] = tmp_cov->pix_acc + kvz_alf_derive_coeff_quant(channel, g_filter_clipp_set[filt_idx], g_filter_coeff_set[filt_idx], tmp_cov, bit_depth, false); error_tab_force_0_coeff[filt_idx][0] = tmp_cov->pix_acc; error += error_tab_force_0_coeff[filt_idx][1]; } return error; } double kvz_alf_derive_coeff_quant(channel_type channel, int *filter_clipp, int *filter_coeff_quant, const alf_covariance* cov, const int bit_depth, const bool optimize_clip) { const bool is_luma = channel == CHANNEL_TYPE_LUMA ? true : false; const int num_coeff = is_luma ? 13 : 7; const int factor = 1 << (bit_depth - 1); const int max_value = factor - 1; const int min_value = -factor + 1; double filter_coeff[MAX_NUM_ALF_LUMA_COEFF]; optimize_filter(cov, filter_clipp, filter_coeff, optimize_clip); //roundFiltCoeff(filter_coeff_quant, filter_coeff, num_coeff, factor); for (int i = 0; i < num_coeff; i++) { int sign = filter_coeff[i] > 0 ? 1 : -1; filter_coeff_quant[i] = (int)(filter_coeff[i] * sign * factor + 0.5) * sign; } for (int i = 0; i < num_coeff - 1; i++) { filter_coeff_quant[i] = MIN(max_value, MAX(min_value, filter_coeff_quant[i])); } filter_coeff_quant[num_coeff - 1] = 0; int modified = 1; double err_ref = calc_error_for_coeffs(cov, filter_clipp, filter_coeff_quant, num_coeff, bit_depth); int sign; while (modified) { modified = 0; for (int sign_count = 0; sign_count <= 1; sign_count++) { sign = sign_count == 0 ? 1 : -1; double err_min = MAX_DOUBLE; int min_ind = -1; for (int k = 0; k < num_coeff - 1; k++) { if (filter_coeff_quant[k] - sign > max_value || filter_coeff_quant[k] - sign < min_value) { continue; } filter_coeff_quant[k] -= sign; double error = calc_error_for_coeffs(cov, filter_clipp, filter_coeff_quant, num_coeff, bit_depth); if (error < err_min) { err_min = error; min_ind = k; } filter_coeff_quant[k] += sign; } if (err_min < err_ref) { filter_coeff_quant[min_ind] -= sign; modified++; err_ref = err_min; } } } return err_ref; } void kvz_alf_encoder_ctb(encoder_state_t * const state, alf_aps *aps, const double lambda_chroma_weight) { //TempCtx ctxStart(m_CtxCache, AlfCtx(m_CABACEstimator->getCtx())); cabac_data_t ctx_start; memcpy(&ctx_start, &cabac_estimator, sizeof(ctx_start)); //TempCtx ctxBest(m_CtxCache); cabac_data_t ctx_best; //TempCtx ctxTempStart(m_CtxCache); cabac_data_t ctx_temp_start; //TempCtx ctxTempBest(m_CtxCache);*/ cabac_data_t ctx_temp_best; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB /*TempCtx ctxTempAltStart(m_CtxCache); TempCtx ctxTempAltBest(m_CtxCache);*/ cabac_data_t ctx_temp_alt_start; //cabac_data_t ctx_temp_alt_best; //#endif int best_aps_ids[ALF_CTB_MAX_NUM_APS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; int size_of_best_aps_ids = 0; int clip_default[13] = { 0,0,0,0,0,0,0,0,0,0,0,0,0 }; const int8_t bit_depth = state->encoder_control->bitdepth; double lambda = state->frame->lambda; int size_of_aps_ids = 0; const int32_t num_ctus_in_pic = state->lcu_order_count; alf_aps alf_aps_temp_nl; //AlfSliceParam alfSliceParamNewFiltersBest = alfSliceParamNewFilters; alf_aps alf_aps_new_filters_best; copy_alf_param(&alf_aps_new_filters_best, aps); alf_aps* apss = state->slice->apss; bool has_new_filters[2] = { aps->enabled_flag[COMPONENT_Y] , aps->enabled_flag[COMPONENT_Cb] || aps->enabled_flag[COMPONENT_Cr] }; //initDistortion(); for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) { for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_ctb_distortion_unfilter[comp][ctu_idx] = get_unfiltered_distortion_cov_classes(g_alf_covariance[comp][0][ctu_idx], comp == 0 ? MAX_NUM_ALF_CLASSES : 1); } } //luma //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB copy_alf_param(&g_alf_aps_temp, aps); //#endif //memset(g_ctu_enable_flag[COMPONENT_Y], 1, sizeof(uint8_t) * g_num_ctus_in_pic); g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 1; get_frame_stats(CHANNEL_TYPE_LUMA, 0, ctu_idx); g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 0; #endif double cost_off = get_unfiltered_distortion_cov_channel(g_alf_covariance_frame[CHANNEL_TYPE_LUMA][0], CHANNEL_TYPE_LUMA); int new_aps_id; int aps_ids[ALF_CTB_MAX_NUM_APS]; for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { aps_ids[i] = -1; } kvz_alf_get_avai_aps_ids_luma(state, &new_aps_id, aps_ids, &size_of_aps_ids); double cost_min = MAX_DOUBLE; kvz_alf_reconstruct_coeff_aps(state, true, false, true); int num_loops = has_new_filters[CHANNEL_TYPE_LUMA] ? 2 : 1; for (int use_new_filter = 0; use_new_filter < num_loops; use_new_filter++) { int bits_new_filter = 0; if (use_new_filter == 1) { if (!has_new_filters[CHANNEL_TYPE_LUMA]) { continue; } else { bits_new_filter = g_bits_new_filter[CHANNEL_TYPE_LUMA]; kvz_alf_reconstruct_coeff(state, aps, CHANNEL_TYPE_LUMA, true, true); } } int num_iter = use_new_filter ? 2 : 1; for (int num_temporal_aps = 0; num_temporal_aps <= size_of_aps_ids/*apsIds.size()*/; num_temporal_aps++) { if (num_temporal_aps + use_new_filter >= ALF_CTB_MAX_NUM_APS) { continue; } //cs.slice->setTileGroupNumAps(numTemporalAps + useNewFilter); state->slice->tile_group_num_aps = num_temporal_aps + use_new_filter; int num_filter_set = ALF_NUM_FIXED_FILTER_SETS + num_temporal_aps + use_new_filter; if (num_temporal_aps == size_of_aps_ids && num_temporal_aps > 0 && use_new_filter && new_aps_id == aps_ids[size_of_aps_ids - 1] /*apsIds.back()*/) //last temporalAPS is occupied by new filter set and this temporal APS becomes unavailable { continue; } for (int iter = 0; iter < num_iter; iter++) { //g_alf_aps_temp = aps; copy_alf_param(&g_alf_aps_temp, aps); g_alf_aps_temp.enabled_flag[CHANNEL_TYPE_LUMA] = true; double cur_cost = 3 * lambda; if (iter > 0) //re-derive new filter-set { double d_dist_org_new_filter = 0; int blocks_using_new_filter = 0; for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { if (g_ctu_enable_flag[COMPONENT_Y][ctu_idx] && g_alf_ctb_filter_index[ctu_idx] != ALF_NUM_FIXED_FILTER_SETS) { g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 0; } else if (g_ctu_enable_flag[COMPONENT_Y][ctu_idx] && g_alf_ctb_filter_index[ctu_idx] == ALF_NUM_FIXED_FILTER_SETS) { blocks_using_new_filter++; d_dist_org_new_filter += g_ctb_distortion_unfilter[COMPONENT_Y][ctu_idx]; for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) { short* p_coeff = g_coeff_final; int16_t* p_clipp = g_clipp_final; for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++) { g_filter_tmp[i] = p_coeff[class_idx * MAX_NUM_ALF_LUMA_COEFF + i]; g_clip_tmp[i] = p_clipp[class_idx * MAX_NUM_ALF_LUMA_COEFF + i]; } d_dist_org_new_filter += calc_error_for_coeffs(&g_alf_covariance[COMPONENT_Y][0][ctu_idx][class_idx], g_clip_tmp, g_filter_tmp, MAX_NUM_ALF_LUMA_COEFF, bit_depth); } } } //for ctb if (blocks_using_new_filter > 0 && blocks_using_new_filter < num_ctus_in_pic) { int bit_nl[2] = { 0, 0 }; double err_nl[2] = { 0.0, 0.0 }; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA][0] = 1; /*#else g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] = 1; #endif*/ if (state->encoder_control->cfg.alf_non_linear_luma) { err_nl[1] = kvz_alf_get_filter_coeff_and_cost(state, CHANNEL_TYPE_LUMA, 0, &bit_nl[1], 0, true, true); copy_alf_param(&alf_aps_temp_nl, &g_alf_aps_temp); } else { err_nl[1] = MAX_DOUBLE; } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA][0] = 0; /*#else g_alf_aps_temp.non_linear_flag[CHANNEL_TYPE_LUMA] = 0; #endif*/ //errNL[0] = getFilterCoeffAndCost(cs, 0, CHANNEL_TYPE_LUMA, true, 0, bitNL[0], true); err_nl[0] = kvz_alf_get_filter_coeff_and_cost(state, CHANNEL_TYPE_LUMA, 0, &bit_nl[0], 0, true, true, ctu_idx); int bits_new_filter_temp_luma = bit_nl[0]; double err = err_nl[0]; if (err_nl[1] < err_nl[0]) { err = err_nl[1]; bits_new_filter_temp_luma = bit_nl[1]; copy_alf_param(&g_alf_aps_temp, &alf_aps_temp_nl); } if (d_dist_org_new_filter + lambda * g_bits_new_filter[CHANNEL_TYPE_LUMA] < err) //re-derived filter is not good, skip { continue; } kvz_alf_reconstruct_coeff(state, &g_alf_aps_temp, CHANNEL_TYPE_LUMA, true, true); bits_new_filter = bits_new_filter_temp_luma; } else //no blocks using new filter, skip { continue; } } //m_CABACEstimator->getCtx() = ctxStart; memcpy(&cabac_estimator, &ctx_start, sizeof(cabac_estimator)); for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { double dist_unfilter_ctb = g_ctb_distortion_unfilter[COMPONENT_Y][ctu_idx]; //ctb on g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 1; double cost_on = MAX_DOUBLE; //ctxTempStart = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_start, &cabac_estimator, sizeof(ctx_temp_start)); ctx_temp_start.only_count = 1; int i_best_filter_set_idx = 0; for (int filter_set_idx = 0; filter_set_idx < num_filter_set; filter_set_idx++) { //rate //m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); //m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, COMPONENT_Y, &m_alfSliceParamTemp); code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, COMPONENT_Y, &g_alf_aps_temp); g_alf_ctb_filter_index[ctu_idx] = filter_set_idx; code_alf_ctu_filter_index(state, &cabac_estimator, ctu_idx, g_alf_aps_temp.enabled_flag[COMPONENT_Y]); double rate_on = (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale * 0; /*(double)m_CABACEstimator->getEstFracBits()*/ ; //distortion double dist = dist_unfilter_ctb; for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) { if (filter_set_idx < ALF_NUM_FIXED_FILTER_SETS) { int filter_idx = g_class_to_filter_mapping[filter_set_idx][class_idx]; dist += calc_error_for_coeffs(&g_alf_covariance[COMPONENT_Y][0][ctu_idx][class_idx], clip_default, g_fixed_filter_set_coeff[filter_idx], MAX_NUM_ALF_LUMA_COEFF, bit_depth); } else { short *p_coeff; int16_t *p_clipp; if (use_new_filter && filter_set_idx == ALF_NUM_FIXED_FILTER_SETS) { p_coeff = g_coeff_final; p_clipp = g_clipp_final; } else if (use_new_filter) { p_coeff = g_coeff_aps_luma[filter_set_idx - 1 - ALF_NUM_FIXED_FILTER_SETS]; p_clipp = g_clipp_aps_luma[filter_set_idx - 1 - ALF_NUM_FIXED_FILTER_SETS]; } else { p_coeff = g_coeff_aps_luma[filter_set_idx - ALF_NUM_FIXED_FILTER_SETS]; p_clipp = g_clipp_aps_luma[filter_set_idx - ALF_NUM_FIXED_FILTER_SETS]; } for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++) { g_filter_tmp[i] = p_coeff[class_idx * MAX_NUM_ALF_LUMA_COEFF + i]; g_clip_tmp[i] = p_clipp[class_idx * MAX_NUM_ALF_LUMA_COEFF + i]; } dist += calc_error_for_coeffs(&g_alf_covariance[COMPONENT_Y][0][ctu_idx][class_idx], g_clip_tmp, g_filter_tmp, MAX_NUM_ALF_LUMA_COEFF, bit_depth); } } //cost double cost_on_tmp = dist + lambda * rate_on; if (cost_on_tmp < cost_on) { //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); ctx_temp_best.only_count = 1; cost_on = cost_on_tmp; i_best_filter_set_idx = filter_set_idx; } } //ctb off g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 0; //rate //m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); //m_CABACEstimator->codeAlfCtuEnableFlag(cs, ctbIdx, COMPONENT_Y, &m_alfSliceParamTemp); code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, COMPONENT_Y, &g_alf_aps_temp); //cost double cost_off = dist_unfilter_ctb + lambda * (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3);// frac_bits_scale * 0; /* (double)m_CABACEstimator->getEstFracBits()*/ ; if (cost_on < cost_off) { //m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest); memcpy(&cabac_estimator, &ctx_temp_best, sizeof(cabac_estimator)); g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 1; g_alf_ctb_filter_index[ctu_idx] = i_best_filter_set_idx; cur_cost += cost_on; } else { g_ctu_enable_flag[COMPONENT_Y][ctu_idx] = 0; cur_cost += cost_off; } } //for(ctbIdx) int tmp_bits = bits_new_filter + 3 * (num_filter_set - ALF_NUM_FIXED_FILTER_SETS); cur_cost += tmp_bits * lambda; if (cur_cost < cost_min) { cost_min = cur_cost; size_of_best_aps_ids = num_filter_set - ALF_NUM_FIXED_FILTER_SETS; for (int i = 0; i < size_of_best_aps_ids; i++) { if (i == 0 && use_new_filter) { best_aps_ids[i] = new_aps_id; } else { best_aps_ids[i] = aps_ids[i - use_new_filter]; } } //alfSliceParamNewFiltersBest = m_alfSliceParamTemp; copy_alf_param(&alf_aps_new_filters_best, &g_alf_aps_temp); //ctxBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_best, &cabac_estimator, sizeof(ctx_best)); //copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, CHANNEL_TYPE_LUMA); memcpy(g_ctu_enable_flag_tmp[COMPONENT_Y], g_ctu_enable_flag[COMPONENT_Y], sizeof(uint8_t) * num_ctus_in_pic); for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_alf_ctb_filter_set_index_tmp[ctu_idx] = g_alf_ctb_filter_index[ctu_idx]; } alf_aps_new_filters_best.new_filter_flag[CHANNEL_TYPE_LUMA] = use_new_filter; } }//for (int iter = 0; iter < numIter; iter++) }// for (int numTemporalAps = 0; numTemporalAps < apsIds.size(); numTemporalAps++) }//for (int useNewFilter = 0; useNewFilter <= 1; useNewFilter++) state->slice->tile_group_cc_alf_cb_aps_id = new_aps_id; state->slice->tile_group_cc_alf_cr_aps_id = new_aps_id; if (cost_off <= cost_min) { memset(state->slice->tile_group_alf_enabled_flag, 0, sizeof(state->slice->tile_group_alf_enabled_flag)); state->slice->tile_group_num_aps = 0; for (int i = 0; i < MAX_NUM_COMPONENT; i++) { memset(g_ctu_enable_flag[i], 0, sizeof(uint8_t) * num_ctus_in_pic); } return; } else { //alfSliceParamNewFiltersBest.tLayer = cs.slice->getTLayer(); alf_aps_new_filters_best.t_layer = state->slice->id; //cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Y, true); state->slice->tile_group_alf_enabled_flag[COMPONENT_Y] = true; //cs.slice->setTileGroupNumAps((int)bestApsIds.size()); state->slice->tile_group_num_aps = size_of_best_aps_ids; //cs.slice->setAPSs(bestApsIds); for (int i = 0; i < size_of_best_aps_ids; i++) { state->slice->tile_group_luma_aps_id[i] = best_aps_ids[i]; } //copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_LUMA); copy_ctu_enable_flag(g_ctu_enable_flag, g_ctu_enable_flag_tmp, CHANNEL_TYPE_LUMA, num_ctus_in_pic); for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_alf_ctb_filter_index[ctu_idx] = g_alf_ctb_filter_set_index_tmp[ctu_idx]; } if (alf_aps_new_filters_best.new_filter_flag[CHANNEL_TYPE_LUMA]) { //APS* newAPS = m_apsMap->getPS((new_aps_id << NUM_APS_TYPE_LEN) + T_ALF_APS); alf_aps* new_aps = &state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set; if (new_aps->aps_id < 0 || new_aps->aps_id >= ALF_CTB_MAX_NUM_APS) // new_aps == NULL { //newAPS = m_apsMap->allocatePS(new_aps_id); assert(new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS < MAX_NUM_APS); //Invalid PS id bool found = false; for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { if (state->encoder_control->cfg.param_set_map[i].parameter_set.aps_id == new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS) { found = true; } } if (!found) { state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; //state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN+ T_ALF_APS].p_nalu_data = 0; //state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set = malloc(sizeof(alf_aps)); state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set.aps_id = new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS; } copy_alf_param(new_aps, &state->slice->param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set); new_aps->aps_id = new_aps_id; new_aps->aps_type = T_ALF_APS; } copy_alf_param(new_aps, &alf_aps_new_filters_best); new_aps->new_filter_flag[CHANNEL_TYPE_CHROMA] = false; state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; g_aps_id_start = new_aps_id; } int8_t *aps_ids = state->slice->tile_group_luma_aps_id; for (int i = 0; i < state->slice->tile_group_num_aps; i++) { copy_aps(&apss[aps_ids[i]], &state->encoder_control->cfg.param_set_map[aps_ids[i] + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set); } } //chroma //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB copy_alf_param(&g_alf_aps_temp, &alf_aps_new_filters_best); if (g_alf_aps_temp.num_alternatives_chroma < 1) { g_alf_aps_temp.num_alternatives_chroma = 1; } //set_ctu_alternative_chroma(m_ctuAlternative, 0); //for (int ctu_idx = 0; ctu_idx < g_num_ctus_in_pic; ctu_idx++) { g_ctu_alternative[COMPONENT_Cb][ctu_idx] = 0; g_ctu_alternative[COMPONENT_Cr][ctu_idx] = 0; } //#endif //memset(g_ctu_enable_flag[COMPONENT_Cb], 1, sizeof(uint8_t) * g_num_ctus_in_pic); //memset(g_ctu_enable_flag[COMPONENT_Cr], 1, sizeof(uint8_t) * g_num_ctus_in_pic); set_ctu_enable_flag(g_ctu_enable_flag, CHANNEL_TYPE_CHROMA, ctu_idx, 1); get_frame_stats(CHANNEL_TYPE_CHROMA, 0, ctu_idx); cost_off = get_unfiltered_distortion_cov_channel(g_alf_covariance_frame[CHANNEL_TYPE_CHROMA][0], CHANNEL_TYPE_CHROMA); cost_min = MAX_DOUBLE; //m_CABACEstimator->getCtx() = AlfCtx(ctxBest); memcpy(&cabac_estimator, &ctx_best, sizeof(cabac_estimator)); //ctxStart = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_start, &cabac_estimator, sizeof(ctx_start)); ctx_start.only_count = 1; int new_aps_id_chroma = -1; if (alf_aps_new_filters_best.new_filter_flag[CHANNEL_TYPE_LUMA] && (alf_aps_new_filters_best.enabled_flag[COMPONENT_Cb] || alf_aps_new_filters_best.enabled_flag[COMPONENT_Cr])) { new_aps_id_chroma = new_aps_id; } else if (alf_aps_new_filters_best.enabled_flag[COMPONENT_Cb] || alf_aps_new_filters_best.enabled_flag[COMPONENT_Cr]) { int cur_id = g_aps_id_start; if (size_of_aps_ids < 8 || state->slice->tile_group_num_aps < 8) { g_alf_aps_temp.num_alternatives_chroma = 1; } for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_ctu_alternative[COMPONENT_Cb][ctu_idx] = 0; g_ctu_alternative[COMPONENT_Cr][ctu_idx] = 0; } set_ctu_enable_flag(g_ctu_enable_flag, CHANNEL_TYPE_CHROMA, 1, num_ctus_in_pic); get_frame_stats(CHANNEL_TYPE_CHROMA, 0, num_ctus_in_pic); cost_off = get_unfiltered_distortion_cov_channel(g_alf_covariance_frame[CHANNEL_TYPE_CHROMA][0], CHANNEL_TYPE_CHROMA); cost_min = MAX_DOUBLE; //m_CABACEstimator->getCtx() = AlfCtx(ctxBest); memcpy(&cabac_estimator, &ctx_best, sizeof(cabac_estimator)); //ctxStart = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_start, &cabac_estimator, sizeof(ctx_start)); ctx_start.only_count = 1; int new_aps_id_chroma = -1; if (alf_aps_new_filters_best.new_filter_flag[CHANNEL_TYPE_LUMA] && (alf_aps_new_filters_best.enabled_flag[COMPONENT_Cb] || alf_aps_new_filters_best.enabled_flag[COMPONENT_Cr])) { new_aps_id_chroma = new_aps_id; } else if (alf_aps_new_filters_best.enabled_flag[COMPONENT_Cb] || alf_aps_new_filters_best.enabled_flag[COMPONENT_Cr]) { int cur_id = g_aps_id_start; if (size_of_aps_ids < 8 || state->slice->tile_group_num_aps < 8) { while (new_aps_id_chroma < 0) { cur_id--; if (cur_id < 0) { cur_id = ALF_CTB_MAX_NUM_APS - 1; } bool found = false; for (int i = 0; i < 8; i++) { if (cur_id == best_aps_ids[i]) { found = true; } } if (!found) { new_aps_id_chroma = cur_id; } } } } for (int cur_aps_id = 0; cur_aps_id < ALF_CTB_MAX_NUM_APS; cur_aps_id++) { if ((/*(cs.slice->getPendingRasInit() ||*/ (state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_IDR_N_LP) || (state->frame->slicetype == KVZ_SLICE_I)) && cur_aps_id != new_aps_id_chroma) { continue; } //APS* cur_aps = m_apsMap->getPS(cur_aps_id); alf_aps* cur_aps = &state->encoder_control->cfg.param_set_map[cur_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set; if (cur_aps && cur_aps->layer_id != 0 /*cs.slice->getPic()->layerId*/) { continue; } double cur_cost = lambda * 3; if (cur_aps_id == new_aps_id_chroma) { copy_alf_param(&g_alf_aps_temp, aps); cur_cost += g_lambda[CHANNEL_TYPE_CHROMA] * g_bits_new_filter[CHANNEL_TYPE_CHROMA]; } else if (cur_aps && cur_aps->t_layer <= state->slice->id && cur_aps->new_filter_flag[CHANNEL_TYPE_CHROMA]) { //g_alf_slice_aps_temp = cur_aps; copy_alf_param(&g_alf_aps_temp, cur_aps); } else { continue; } kvz_alf_reconstruct_coeff(state, &g_alf_aps_temp, CHANNEL_TYPE_CHROMA, true, true); //m_CABACEstimator->getCtx() = AlfCtx(ctxStart); memcpy(&cabac_estimator, &ctx_start, sizeof(cabac_estimator)); for (int comp_id = 1; comp_id < MAX_NUM_COMPONENT; comp_id++) { g_alf_aps_temp.enabled_flag[comp_id] = true; //for (int ctb_idx = 0; ctb_idx < g_num_ctus_in_pic; ctb_idx++) { double dist_unfilter_ctu = g_ctb_distortion_unfilter[comp_id][ctu_idx]; //cost on g_ctu_enable_flag[comp_id][ctu_idx] = 1; //ctxTempStart = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_start, &cabac_estimator, sizeof(ctx_temp_start)); ctx_temp_start.only_count = 1; //rate //m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); //ctb flag code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); double rate_on = (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale*(double)838/*m_CABACEstimator->getEstFracBits()*/; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //#if ENABLE_QPA const double ctu_lambda = lambda_chroma_weight > 0.0 ? 0/*cs.picture->m_uEnerHpCtu[ctbIdx]*/ / lambda_chroma_weight : g_lambda[comp_id]; /*#else const double ctu_lambda = m_lambda[compId]; #endif*/ double dist = MAX_DOUBLE; int num_alts = g_alf_aps_temp.num_alternatives_chroma; //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); ctx_temp_best.only_count = 1; double best_alt_rate = 0; double best_alt_cost = MAX_DOUBLE; int best_alt_idx = -1; //ctxTempAltStart = AlfCtx(ctxTempBest); memcpy(&ctx_temp_alt_start, &ctx_temp_best, sizeof(ctx_temp_alt_start)); for (int alt_idx = 0; alt_idx < num_alts; ++alt_idx) { double dist_unfilter_ctu = g_ctb_distortion_unfilter[comp_id][ctu_idx]; //cost on g_ctu_enable_flag[comp_id][ctu_idx] = 1; memcpy(&ctx_temp_start, &cabac_estimator, sizeof(ctx_temp_start)); ctx_temp_start.only_count = 1; //rate //memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); kvz_cabac_reset_bits(&cabac_estimator); //ctb flag code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); double rate_on = (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale*(double)838/*m_CABACEstimator->getEstFracBits()*/; const double ctu_lambda = lambda_chroma_weight > 0.0 ? 0/*cs.picture->m_uEnerHpCtu[ctbIdx]*/ / lambda_chroma_weight : lambda; double dist = MAX_DOUBLE; int num_alts = g_alf_aps_temp.num_alternatives_chroma; //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); ctx_temp_best.only_count = 1; double best_alt_rate = 0; double best_alt_cost = MAX_DOUBLE; int best_alt_idx = -1; //ctxTempAltStart = AlfCtx(ctxTempBest); memcpy(&ctx_temp_alt_start, &ctx_temp_best, sizeof(ctx_temp_alt_start)); for (int alt_idx = 0; alt_idx < num_alts; ++alt_idx) { if (alt_idx) { //m_CABACEstimator->getCtx() = AlfCtx(ctxTempAltStart); memcpy(&cabac_estimator, &ctx_temp_alt_start, sizeof(cabac_estimator)); } //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); g_ctu_alternative[comp_id][ctu_idx] = alt_idx; //m_CABACEstimator->codeAlfCtuAlternative(cs, ctbIdx, compId, &m_alfParamTemp); code_alf_ctu_alternative_ctu(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); double alt_rate = (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale * 0/*m_CABACEstimator->getEstFracBits()*/; double r_alt_cost = ctu_lambda * alt_rate; //distortion for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { g_filter_tmp[i] = g_chroma_coeff_final[alt_idx][i]; g_clip_tmp[i] = g_chroma_clipp_final[alt_idx][i]; } double alt_dist = calc_error_for_coeffs(&g_alf_covariance[comp_id][0][ctu_idx][0], g_clip_tmp, g_filter_tmp, MAX_NUM_ALF_CHROMA_COEFF, bit_depth); double alt_cost = alt_dist + r_alt_cost; if (alt_cost < best_alt_cost) { best_alt_cost = alt_cost; best_alt_idx = alt_idx; best_alt_rate = alt_rate; //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); ctx_temp_best.only_count = 1; dist = alt_dist; } } g_ctu_alternative[comp_id][ctu_idx] = best_alt_idx; rate_on += best_alt_rate; dist += dist_unfilter_ctu; //cost double cost_on = dist + ctu_lambda * rate_on; //cost off g_ctu_enable_flag[comp_id][ctu_idx] = 0; //rate memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); kvz_cabac_reset_bits(&cabac_estimator); code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); //cost double cost_off = dist_unfilter_ctu + lambda * (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale*(double)838/*m_CABACEstimator->getEstFracBits()*/; if (cost_on < cost_off) { //m_CABACEstimator->getCtx() = AlfCtx(ctxTempBest); memcpy(&cabac_estimator, &ctx_temp_best, sizeof(cabac_estimator)); g_ctu_enable_flag[comp_id][ctu_idx] = 1; cur_cost += cost_on; } } g_ctu_alternative[comp_id][ctu_idx] = best_alt_idx; rate_on += best_alt_rate; dist += dist_unfilter_ctu; //cost double cost_on = dist + ctu_lambda * rate_on; /*#else //distortion for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { g_filter_tmp[i] = g_chroma_coeff_final[i]; g_clip_tmp[i] = g_chroma_clipp_final[i]; } double dist = dist_unfilter_ctu + calc_error_for_coeffs(&g_alf_covariance[comp_id][0][ctb_idx][0], g_clip_tmp, g_filter_tmp, MAX_NUM_ALF_CHROMA_COEFF, ALF_NUM_BITS); double cost_on = dist + g_lambda[comp_id] * rate_on; //ctxTempBest = AlfCtx(m_CABACEstimator->getCtx()); memcpy(&ctx_temp_best, &cabac_estimator, sizeof(ctx_temp_best)); #endif*/ //cost off g_ctu_enable_flag[comp_id][ctu_idx] = 0; //rate //m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart); memcpy(&cabac_estimator, &ctx_temp_start, sizeof(cabac_estimator)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); code_alf_ctu_enable_flag(state, &cabac_estimator, ctu_idx, comp_id, &g_alf_aps_temp); //cost double cost_off = dist_unfilter_ctu + g_lambda[comp_id] * (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); //frac_bits_scale*(double)838/*m_CABACEstimator->getEstFracBits()*/; if (cost_on < cost_off) { if (g_ctu_enable_flag[comp_id][ctu_idx]) { g_alf_aps_temp.enabled_flag[comp_id] = true; break; } } } if (cur_cost < cost_min) { cost_min = cur_cost; state->slice->tile_group_chroma_aps_id = cur_aps_id; state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] = g_alf_aps_temp.enabled_flag[COMPONENT_Cb]; state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr] = g_alf_aps_temp.enabled_flag[COMPONENT_Cr]; copy_ctu_enable_flag(g_ctu_enable_flag_tmp, g_ctu_enable_flag, CHANNEL_TYPE_CHROMA, num_ctus_in_pic); for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { g_ctu_alternative_tmp[COMPONENT_Cb][ctu_idx] = g_ctu_alternative[COMPONENT_Cb][ctu_idx]; g_ctu_alternative_tmp[COMPONENT_Cr][ctu_idx] = g_ctu_alternative[COMPONENT_Cr][ctu_idx]; } } } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB /*#if !JVET_O0491_HLS_CLEANUP curCost += (lengthTruncatedUnary(alfChromaIdc, 3) - lengthTruncatedUnary(0, 3)) * m_lambda[CHANNEL_TYPE_CHROMA]; #endif #else cur_cost += length_truncated_unary(alf_chroma_idc, 3) * g_lambda[CHANNEL_TYPE_CHROMA]; #endif*/ if (new_aps_id_chroma >= 0) { cost_min = cur_cost; state->slice->tile_group_chroma_aps_id = cur_aps_id; state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] = g_alf_aps_temp.enabled_flag[COMPONENT_Cb]; state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr] = g_alf_aps_temp.enabled_flag[COMPONENT_Cr]; //memcpy(g_ctu_enable_flag_tmp[COMPONENT_Cb], g_ctu_enable_flag[COMPONENT_Cb], sizeof(uint8_t) * g_num_ctus_in_pic); //memcpy(g_ctu_enable_flag_tmp[COMPONENT_Cr], g_ctu_enable_flag[COMPONENT_Cr], sizeof(uint8_t) * g_num_ctus_in_pic); copy_ctu_enable_flag(g_ctu_enable_flag_tmp, g_ctu_enable_flag, CHANNEL_TYPE_CHROMA, ctu_idx); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //for (int idx = 0; idx < g_num_ctus_in_pic; idx++) { g_ctu_alternative_tmp[COMPONENT_Cb][ctu_idx] = g_ctu_alternative[COMPONENT_Cb][ctu_idx]; g_ctu_alternative_tmp[COMPONENT_Cr][ctu_idx] = g_ctu_alternative[COMPONENT_Cr][ctu_idx]; } //#endif } } if (cost_off < cost_min) { state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] = false; state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr] = false; //memset(g_ctu_enable_flag[COMPONENT_Cb], 0, sizeof(uint8_t) * g_num_ctus_in_pic); //memset(g_ctu_enable_flag[COMPONENT_Cr], 0, sizeof(uint8_t) * g_num_ctus_in_pic); set_ctu_enable_flag(g_ctu_enable_flag, CHANNEL_TYPE_CHROMA, ctu_idx, 0); } else { //memcpy(g_ctu_enable_flag[COMPONENT_Cb], g_ctu_enable_flag_tmp[COMPONENT_Cb], sizeof(uint8_t) * g_num_ctus_in_pic); //memcpy(g_ctu_enable_flag[COMPONENT_Cr], g_ctu_enable_flag_tmp[COMPONENT_Cr], sizeof(uint8_t) * g_num_ctus_in_pic); copy_ctu_enable_flag(g_ctu_enable_flag, g_ctu_enable_flag_tmp, CHANNEL_TYPE_CHROMA, ctu_idx); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //for (int idx = 0; idx < g_num_ctus_in_pic; idx++) { state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] = false; state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr] = false; set_ctu_enable_flag(g_ctu_enable_flag, CHANNEL_TYPE_CHROMA, 0, num_ctus_in_pic); } //#endif if (state->slice->tile_group_chroma_aps_id == new_aps_id_chroma) //new filter { copy_ctu_enable_flag(g_ctu_enable_flag, g_ctu_enable_flag_tmp, CHANNEL_TYPE_CHROMA, num_ctus_in_pic); for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { //newAPS = m_apsMap->allocatePS(new_aps_id); assert(new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS < MAX_NUM_APS); //Invalid PS id bool found = false; for (int i = 0; i < (sizeof(state->slice->param_set_map) / sizeof(state->slice->param_set_map[0])); i++) { if (state->slice->param_set_map[i].parameter_set.aps_id == new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS) { found = true; } if (!found) { state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; //state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].p_nalu_data = 0; //state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set = malloc(sizeof(alf_aps)); state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set.aps_id = new_aps_id + T_ALF_APS; } //copy_alf_param(new_aps, &state->encoder_control->cfg.param_set_map[new_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set); new_aps->aps_id = new_aps_id; new_aps->aps_type = T_ALF_APS; reset_alf_param(new_aps); } new_aps->new_filter_flag[CHANNEL_TYPE_CHROMA] = true; if (!alf_aps_new_filters_best.new_filter_flag[CHANNEL_TYPE_LUMA]) { new_aps->new_filter_flag[CHANNEL_TYPE_LUMA] = false; } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB new_aps->num_alternatives_chroma = aps->num_alternatives_chroma; for (int alt_idx = 0; alt_idx < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++alt_idx) new_aps->non_linear_flag[CHANNEL_TYPE_CHROMA][alt_idx] = aps->non_linear_flag[CHANNEL_TYPE_CHROMA][alt_idx]; /*#else new_aps->non_linear_flag[CHANNEL_TYPE_CHROMA] = aps->non_linear_flag[CHANNEL_TYPE_CHROMA]; #endif*/ new_aps->t_layer = state->slice->id; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB for (int alt_idx = 0; alt_idx < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++alt_idx) { for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { new_aps->chroma_coeff[alt_idx][i] = aps->chroma_coeff[alt_idx][i]; new_aps->chroma_clipp[alt_idx][i] = aps->chroma_clipp[alt_idx][i]; } } state->encoder_control->cfg.param_set_map[new_aps_id_chroma + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; g_aps_id_start = new_aps_id_chroma; } /*#else for (int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++) { new_aps->chroma_coeff[i] = aps->chroma_coeff[i]; new_aps->chroma_clipp[i] = aps->chroma_clipp[i]; } #endif*/ state->slice->param_set_map[new_aps_id_chroma + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; g_aps_id_start = new_aps_id_chroma; } } } void kvz_alf_reconstructor(encoder_state_t * const state) { if (!state->slice->tile_group_alf_enabled_flag[COMPONENT_Y]) { return; } kvz_alf_reconstruct_coeff_aps(state, true, state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] || state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr], false); enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; bool chroma_scale_x = (chroma_fmt == KVZ_CSP_444) ? 0 : 1; bool chroma_scale_y = (chroma_fmt != KVZ_CSP_420) ? 0 : 1; const int alf_vb_luma_ctu_height = LCU_WIDTH; const int alf_vb_chma_ctu_height = (LCU_WIDTH >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)); const int alf_vb_luma_pos = LCU_WIDTH - ALF_VB_POS_ABOVE_CTUROW_LUMA; const int alf_vb_chma_pos = (LCU_WIDTH >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA; const int luma_height = state->tile->frame->height; const int luma_width = state->tile->frame->width; const int max_cu_width = LCU_WIDTH; const int max_cu_height = LCU_WIDTH; int ctu_idx = 0; const int luma_stride = state->tile->frame->rec->stride; const int chroma_stride = luma_stride >> chroma_scale_x; const int chroma_height = luma_height >> chroma_scale_y; const int chroma_padding = MAX_ALF_PADDING_SIZE >> chroma_scale_x; const int index_luma = -(luma_stride * MAX_ALF_PADDING_SIZE + MAX_ALF_PADDING_SIZE); const int index_chroma = -(chroma_stride * chroma_padding + chroma_padding); //Copy reconstructed samples to a buffer. memcpy(&alf_tmp_y[index_luma], &state->tile->frame->rec->y[index_luma], sizeof(kvz_pixel) * luma_stride * (luma_height + MAX_ALF_PADDING_SIZE * 2)); memcpy(&alf_tmp_u[index_chroma], &state->tile->frame->rec->u[index_chroma], sizeof(kvz_pixel) * chroma_stride * (chroma_height + chroma_padding * 2)); memcpy(&alf_tmp_v[index_chroma], &state->tile->frame->rec->v[index_chroma], sizeof(kvz_pixel) * chroma_stride * (chroma_height + chroma_padding * 2)); for (int y_pos = 0; y_pos < luma_height; y_pos += max_cu_height) { for (int x_pos = 0; x_pos < luma_width; x_pos += max_cu_width) { const int width = (x_pos + max_cu_width > luma_width) ? (luma_width - x_pos) : max_cu_width; const int height = (y_pos + max_cu_height > luma_height) ? (luma_height - y_pos) : max_cu_height; bool ctu_enable_flag = g_ctu_enable_flag[COMPONENT_Y][ctu_idx]; for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { ctu_enable_flag |= g_ctu_enable_flag[comp_idx][ctu_idx] > 0; } { if (g_ctu_enable_flag[COMPONENT_Y][ctu_idx]) { short filter_set_index = g_alf_ctb_filter_index[ctu_idx]; short *coeff; int16_t *clip; if (filter_set_index >= ALF_NUM_FIXED_FILTER_SETS) { coeff = g_coeff_aps_luma[filter_set_index - ALF_NUM_FIXED_FILTER_SETS]; clip = g_clipp_aps_luma[filter_set_index - ALF_NUM_FIXED_FILTER_SETS]; } else { coeff = g_fixed_filter_set_coeff_dec[filter_set_index]; clip = g_clip_default; } kvz_alf_filter_block(state, alf_tmp_y, state->tile->frame->rec->y, luma_stride, luma_stride, coeff, clip, g_clp_rngs.comp[COMPONENT_Y], COMPONENT_Y, width, height, x_pos, y_pos, x_pos, y_pos, alf_vb_luma_pos, alf_vb_luma_ctu_height); } for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { coeff = g_fixed_filter_set_coeff_dec[filter_set_index]; clip = g_clip_default; } kvz_alf_filter_block(state, state->tile->frame->rec->y, alf_tmp_y, state->tile->frame->rec->stride, state->tile->frame->rec->stride, coeff, clip, g_clp_rngs.comp[COMPONENT_Y], COMPONENT_Y, width, height, x_pos, y_pos, x_pos, y_pos, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_luma_pos), g_alf_vb_luma_ctu_height); } /*else { int stride = state->tile->frame->rec->stride; for (int h = y_pos; h < y_pos + height; h++) { for (int w = x_pos; w < x_pos + width; w++) { alf_tmp_y[h * stride + w] = state->tile->frame->rec->y[h * stride + w]; } } }*/ for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { alf_component_id comp_id = comp_idx; if (g_ctu_enable_flag[comp_idx][ctu_idx]) { kvz_pixel *dst_pixels = comp_id - 1 ? state->tile->frame->rec->v : state->tile->frame->rec->u; const kvz_pixel *src_pixels = comp_id - 1 ? alf_tmp_v : alf_tmp_u; const int alt_num = g_ctu_alternative[comp_id][ctu_idx]; kvz_alf_filter_block(state, src_pixels, dst_pixels, src_stride, dst_stride, g_chroma_coeff_final[alt_num], g_chroma_clipp_final[alt_num], g_clp_rngs.comp[comp_idx], comp_idx, width >> chroma_scale_x, height >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_chma_pos), g_alf_vb_chma_ctu_height); /*#else kvz_alf_filter_block(state, g_chroma_coeff_final, g_chroma_clipp_final, g_clp_rngs.comp[comp_idx], comp_idx, width >> chroma_scale_x, height >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_chma_pos), g_alf_vb_chma_ctu_height); #endif*/ } /*else { int stride = state->tile->frame->rec->stride >> chroma_scale_y; int h_start = y_pos >> chroma_scale_y; int w_start = x_pos >> chroma_scale_x; int c_width = width >> chroma_scale_x; int c_height = height >> chroma_scale_y; if (comp_idx == COMPONENT_Cb) { for (int h = h_start; h < h_start + c_height; h++) { for (int w = w_start; w < w_start + c_width; w++) { alf_tmp_u[h * stride + w] = state->tile->frame->rec->u[h * stride + w]; } } } if (comp_idx == COMPONENT_Cr) { for (int h = h_start; h < h_start + c_height; h++) { for (int w = w_start; w < w_start + c_width; w++) { alf_tmp_v[h * stride + w] = state->tile->frame->rec->v[h * stride + w]; } } } }*/ } ctu_idx++; } } //ctu_idx++; //} //} //} } //---------------------------------------------------------------------- //-------------------------cabac writer functions------------------------ void kvz_cabac_reset_bits(cabac_data_t * const data) { data->low = 0; data->bits_left = 23; data->num_buffered_bytes = 0; data->buffered_byte = 0xff; } void code_alf_ctu_enable_flags_channel(encoder_state_t * const state, cabac_data_t * const cabac, channel_type channel, alf_aps *aps) { if (channel == CHANNEL_TYPE_LUMA) { if (aps->enabled_flag[COMPONENT_Y]) code_alf_ctu_enable_flags_component(state, cabac, COMPONENT_Y, aps); } else { if (aps->enabled_flag[COMPONENT_Cb]) code_alf_ctu_enable_flags_component(state, cabac, COMPONENT_Cb, aps); if (aps->enabled_flag[COMPONENT_Cr]) code_alf_ctu_enable_flags_component(state, cabac, COMPONENT_Cr, aps); } } void code_alf_ctu_enable_flags_component(encoder_state_t * const state, cabac_data_t * const cabac, alf_component_id component_id, alf_aps *aps) { const int32_t num_ctus_in_pic = state->lcu_order_count; for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { code_alf_ctu_enable_flag(state, cabac, ctu_idx, component_id, aps); } } void code_alf_ctu_enable_flag(encoder_state_t * const state, cabac_data_t * const cabac, uint32_t ctu_rs_addr, alf_component_id component_id, alf_aps *aps) { const encoder_control_t * const encoder = state->encoder_control; const bool alf_component_enabled = (aps != NULL) ? aps->enabled_flag[component_id] : state->slice->tile_group_alf_enabled_flag[component_id]; if (encoder->cfg.alf_type && alf_component_enabled) { int frame_width_in_ctus = state->tile->frame->width_in_lcu; bool left_avail = state->lcu_order[ctu_rs_addr].left ? 1 : 0; bool above_avail = state->lcu_order[ctu_rs_addr].above ? 1 : 0; int left_ctu_addr = left_avail ? ctu_rs_addr - 1 : -1; int above_ctu_addr = above_avail ? ctu_rs_addr - frame_width_in_ctus : -1; uint8_t* ctb_alf_flag = g_ctu_enable_flag[component_id]; int ctx = 0; ctx += left_ctu_addr > -1 ? (ctb_alf_flag[left_ctu_addr] ? 1 : 0) : 0; ctx += above_ctu_addr > -1 ? (ctb_alf_flag[above_ctu_addr] ? 1 : 0) : 0; cabac->cur_ctx = &(cabac->ctx.alf_ctb_flag_model[component_id * 3 + ctx]); CABAC_BIN(cabac, ctb_alf_flag[ctu_rs_addr], "alf_ctb_flag"); } } void code_alf_ctu_filter_index(encoder_state_t * const state, cabac_data_t * const cabac, uint32_t ctu_rs_addr, bool alf_enable_luma) { const encoder_control_t * const encoder = state->encoder_control; if (!encoder->cfg.alf_type || !alf_enable_luma)//(!cs.sps->getALFEnabledFlag()) || (!alfEnableLuma)) { return; } if (!g_ctu_enable_flag[COMPONENT_Y][ctu_rs_addr]) { return; } const unsigned filter_set_idx = g_alf_ctb_filter_index[ctu_rs_addr]; unsigned num_aps = state->slice->tile_group_num_aps; unsigned num_available_filt_sets = num_aps + ALF_NUM_FIXED_FILTER_SETS; if (num_available_filt_sets > ALF_NUM_FIXED_FILTER_SETS) { #if JVET_P0162_REMOVE_ALF_CTB_FIRST_USE_APS_FLAG int use_temporal_filt = (filter_set_idx >= ALF_NUM_FIXED_FILTER_SETS) ? 1 : 0; cabac->cur_ctx = &(cabac->ctx.alf_temporal_filt); CABAC_BIN(cabac, use_temporal_filt, "use_latest_filt"); if (use_temporal_filt) { assert(filter_set_idx < num_available_filt_sets); //"temporal non-latest set" if (num_aps > 1) { kvz_cabac_encode_trunc_bin(cabac, filter_set_idx - ALF_NUM_FIXED_FILTER_SETS, num_available_filt_sets - ALF_NUM_FIXED_FILTER_SETS); } } else { assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //"fixed set larger than temporal" kvz_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS); } #else int use_latest_filt = (filter_set_idx == ALF_NUM_FIXED_FILTER_SETS) ? 1 : 0; /*if (num_aps == 0) { use_latest_filt = 1; }*/ cabac->cur_ctx = &(cabac->ctx.alf_latest_filt); CABAC_BIN(cabac, use_latest_filt, "use_latest_filt"); if (!use_latest_filt) { if (num_aps == 1) { assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //Fixed set numavail kvz_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS); } else { int use_temporal_filt = (filter_set_idx > ALF_NUM_FIXED_FILTER_SETS) ? 1 : 0; cabac->cur_ctx = &(cabac->ctx.alf_temporal_filt); CABAC_BIN(cabac, use_temporal_filt, "use_temporal_filt"); if (use_temporal_filt) { assert((filter_set_idx - (ALF_NUM_FIXED_FILTER_SETS + 1)) < (num_aps - 1)); //Temporal non-latest set if (num_aps > 2) { kvz_cabac_encode_trunc_bin(cabac, filter_set_idx - (ALF_NUM_FIXED_FILTER_SETS + 1), num_available_filt_sets - (ALF_NUM_FIXED_FILTER_SETS + 1)); } } else { assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //Fixed set larger than temporal kvz_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS); } } } #endif } else { assert(filter_set_idx < ALF_NUM_FIXED_FILTER_SETS); //Fixed set numavail < num_fixed kvz_cabac_encode_trunc_bin(cabac, filter_set_idx, ALF_NUM_FIXED_FILTER_SETS); } } void code_alf_ctu_alternatives_channel(encoder_state_t * const state, cabac_data_t * const cabac, channel_type channel, alf_aps* aps) { if (channel == CHANNEL_TYPE_CHROMA) { if (aps->enabled_flag[COMPONENT_Cb]) code_alf_ctu_alternatives_component(state, cabac, COMPONENT_Cb, aps); if (aps->enabled_flag[COMPONENT_Cr]) code_alf_ctu_alternatives_component(state, cabac, COMPONENT_Cr, aps); } } void code_alf_ctu_alternatives_component(encoder_state_t * const state, cabac_data_t * const cabac, alf_component_id comp_id, alf_aps* aps) { if (comp_id == COMPONENT_Y) return; uint32_t num_ctus = state->lcu_order_count; uint8_t* ctb_alf_flag = g_ctu_enable_flag[comp_id]; for (int ctu_idx = 0; ctu_idx < num_ctus; ctu_idx++) { if (ctb_alf_flag[ctu_idx]) { code_alf_ctu_alternative_ctu(state, cabac, ctu_idx, comp_id, aps); } } } void code_alf_ctu_alternative_ctu(encoder_state_t * const state, cabac_data_t * const cabac, uint32_t ctu_rs_addr, const alf_component_id comp_idx, const alf_aps* aps) { if (comp_idx == COMPONENT_Y) return; int aps_idx = aps ? 0 : state->slice->tile_group_chroma_aps_id; const alf_aps* alf_param_ref = aps ? (aps) : &state->slice->apss[aps_idx]; if (aps || (state->encoder_control->cfg.alf_type && state->slice->tile_group_alf_enabled_flag[comp_idx])) { uint8_t* ctb_alf_flag = g_ctu_enable_flag[comp_idx]; if (ctb_alf_flag[ctu_rs_addr]) { const int num_alts = alf_param_ref->num_alternatives_chroma; uint8_t* ctb_alf_alternative = g_ctu_alternative[comp_idx]; unsigned num_ones = ctb_alf_alternative[ctu_rs_addr]; assert(ctb_alf_alternative[ctu_rs_addr] < num_alts); for (int i = 0; i < num_ones; ++i) { cabac->cur_ctx = &cabac->ctx.alf_ctb_alternatives[comp_idx - 1]; CABAC_BIN(cabac, 1, "alf_ctb_alternatives"); } if (num_ones < num_alts - 1) { cabac->cur_ctx = &cabac->ctx.alf_ctb_alternatives[comp_idx - 1]; CABAC_BIN(cabac, 0, "alf_ctb_alternatives"); } } } } void kvz_encode_alf_bits(encoder_state_t * const state, const int ctu_idx) { if (state->encoder_control->cfg.alf_type) { for (int comp_idx = 0; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { bool is_luma = comp_idx == COMPONENT_Y ? true : false; //Pitäisi poistaa// /*if (!is_luma) { state->slice->tile_group_alf_enabled_flag[comp_idx] = false; }*/ //---------------// code_alf_ctu_enable_flag(state, &state->cabac, ctu_idx, comp_idx, NULL); if (is_luma) { if (g_ctu_enable_flag[comp_idx][ctu_idx]) { //int num_aps = state->slice->tile_group_num_aps; //state->slice->tile_group_num_aps = 0; code_alf_ctu_filter_index(state, &state->cabac, ctu_idx, state->slice->tile_group_alf_enabled_flag[COMPONENT_Y]); //state->slice->tile_group_num_aps = num_aps; } } if (!is_luma) { uint8_t* ctb_alf_flag = state->slice->tile_group_alf_enabled_flag[comp_idx] ? g_ctu_enable_flag[comp_idx] : NULL; if (ctb_alf_flag && ctb_alf_flag[ctu_idx]) { code_alf_ctu_alternative_ctu(state, &state->cabac, ctu_idx, comp_idx, NULL); } } } int num_components = state->encoder_control->chroma_format == KVZ_CSP_400 ? 1 : MAX_NUM_COMPONENT; for (int comp_idx = 1; comp_idx < num_components; comp_idx++) { if (g_cc_alf_filter_param.cc_alf_filter_enabled[comp_idx - 1]) { const int filter_count = g_cc_alf_filter_param.cc_alf_filter_count[comp_idx - 1]; code_cc_alf_filter_control_idc(state, &state->cabac, g_cc_alf_filter_control[comp_idx - 1][ctu_idx], comp_idx, ctu_idx, g_cc_alf_filter_control[comp_idx - 1], filter_count); } } } } void encoder_state_write_adaptation_parameter_set(encoder_state_t * const state, alf_aps *aps) { #ifdef KVZ_DEBUG printf("=========== Adaptation Parameter Set ===========\n"); #endif bitstream_t * const stream = &state->stream; WRITE_U(stream, (int)aps->aps_type, 3, "aps_params_type"); WRITE_U(stream, aps->aps_id, 5, "adaptation_parameter_set_id"); WRITE_U(stream, state->encoder_control->chroma_format != KVZ_CSP_400, "aps_chroma_present_flag"); //WRITE_CODE(pcAPS->getAPSType(), 3, "aps_params_type"); WRITE_U(stream, aps->aps_type, 3, "aps_params_type"); if (aps->aps_type == T_ALF_APS) { encode_alf_aps_flags(state, aps); } /*else if (aps->aps_type == T_LMCS_APS) { codeLmcsAps(pcAPS); }*/ WRITE_U(stream, 0, 1, "aps_extension_flag"); //Implementation when this flag is equal to 1 should be added when it is needed. Currently in the spec we don't have case when this flag is equal to 1 kvz_bitstream_add_rbsp_trailing_bits(stream); } void encode_alf_aps_flags(encoder_state_t * const state, alf_aps* aps) { bitstream_t * const stream = &state->stream; WRITE_U(stream, aps->new_filter_flag[CHANNEL_TYPE_LUMA], 1, "alf_luma_new_filter"); if (state->encoder_control->chroma_format != KVZ_CSP_400) { WRITE_U(stream, aps->new_filter_flag[CHANNEL_TYPE_CHROMA], 1, "alf_chroma_new_filter") } if (state->encoder_control->chroma_format != KVZ_CSP_400) { WRITE_U(stream, aps->cc_alf_aps_param.new_cc_alf_filter[COMPONENT_Cb - 1], 1, "alf_cc_cb_filter_signal_flag"); WRITE_U(stream, aps->cc_alf_aps_param.new_cc_alf_filter[COMPONENT_Cr - 1], 1, "alf_cc_cr_filter_signal_flag"); } if (aps->new_filter_flag[CHANNEL_TYPE_LUMA]) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB //WRITE_FLAG(param.nonLinearFlag[CHANNEL_TYPE_LUMA][0], "alf_luma_clip"); WRITE_U(stream, aps->non_linear_flag[CHANNEL_TYPE_LUMA], 1, "alf_luma_clip"); /*#else WRITE_FLAG(param.nonLinearFlag[CHANNEL_TYPE_LUMA], "alf_luma_clip"); #endif*/ //#if JVET_O0491_HLS_CLEANUP //WRITE_UVLC(param.numLumaFilters - 1, "alf_luma_num_filters_signalled_minus1"); WRITE_UE(stream, aps->num_luma_filters - 1, "alf_luma_num_filters_signalled_minus1"); /*#else xWriteTruncBinCode(param.numLumaFilters - 1, MAX_NUM_ALF_CLASSES); //number_of_filters_minus1 #endif*/ if (aps->num_luma_filters > 1) { //#if JVET_O0491_HLS_CLEANUP //const int length = ceilLog2(param.numLumaFilters); const int length = kvz_math_ceil_log2(aps->num_luma_filters); //#endif for (int i = 0; i < MAX_NUM_ALF_CLASSES; i++) { //#if JVET_O0491_HLS_CLEANUP //WRITE_CODE(param.filterCoeffDeltaIdx[i], length, "alf_luma_coeff_delta_idx"); WRITE_U(stream, aps->filter_coeff_delta_idx[i], length, "alf_luma_coeff_delta_idx"); /*#else xWriteTruncBinCode((uint32_t)param.filterCoeffDeltaIdx[i], param.numLumaFilters); //filter_coeff_delta[i] #endif*/ } } /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED WRITE_FLAG(param.fixedFilterSetIndex > 0 ? 1 : 0, "fixed_filter_set_flag"); if (param.fixedFilterSetIndex > 0) { xWriteTruncBinCode(param.fixedFilterSetIndex - 1, NUM_FIXED_FILTER_SETS); WRITE_FLAG(param.fixedFilterPattern, "fixed_filter_flag_pattern"); for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++) { if (param.fixedFilterPattern > 0) { WRITE_FLAG(param.fixedFilterIdx[classIdx], "fixed_filter_flag"); } else { CHECK(param.fixedFilterIdx[classIdx] != 1, "Disabled fixed filter"); } } } #endif*/ //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB alf_filter(state, aps, false, 0); /*#else alfFilter(param, false); #endif*/ } if (aps->new_filter_flag[CHANNEL_TYPE_CHROMA]) { //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (MAX_NUM_ALF_ALTERNATIVES_CHROMA > 1) { //WRITE_UVLC(param.numAlternativesChroma - 1, "alf_chroma_num_alts_minus1"); WRITE_UE(stream, aps->num_alternatives_chroma - 1, "alf_chroma_num_alts_minus1"); } for (int alt_idx = 0; alt_idx < aps->num_alternatives_chroma; ++alt_idx) { encode_alf_aps_filter(state, aps, true, alt_idx); } /*#else WRITE_FLAG(param.nonLinearFlag[CHANNEL_TYPE_CHROMA], "alf_chroma_clip"); alfFilter(param, true); #endif*/ } for (int cc_idx = 0; cc_idx < 2; cc_idx++) { if (aps->cc_alf_aps_param.new_cc_alf_filter[cc_idx]) { const int filter_count = aps->cc_alf_aps_param.cc_alf_filter_count[cc_idx]; assert(filter_count <= MAX_NUM_CC_ALF_FILTERS); // "CC ALF Filter count is too large" assert(filter_count > 0); // "CC ALF Filter count is too small" if (MAX_NUM_CC_ALF_FILTERS > 1) { WRITE_UE(stream, filter_count - 1, cc_idx == 0 ? "alf_cc_cb_filters_signalled_minus1" : "alf_cc_cr_filters_signalled_minus1"); } for (int filter_idx = 0; filter_idx < filter_count; filter_idx++) { //AlfFilterShape alfShape(size_CC_ALF); int num_coeff = 8; //CC_ALF_FILTER const short *coeff = aps->cc_alf_aps_param.cc_alf_coeff[cc_idx][filter_idx]; // Filter coefficients for (int i = 0; i < num_coeff - 1; i++) { if (coeff[i] == 0) { WRITE_U(stream, 0, CC_ALF_BITS_PER_COEFF_LEVEL, cc_idx == 0 ? "alf_cc_cb_mapped_coeff_abs" : "alf_cc_cr_mapped_coeff_abs"); } else { WRITE_U(stream, 1 + kvz_math_floor_log2(abs(coeff[i])), CC_ALF_BITS_PER_COEFF_LEVEL, cc_idx == 0 ? "alf_cc_cb_mapped_coeff_abs" : "alf_cc_cr_mapped_coeff_abs"); WRITE_U(stream, coeff[i] < 0 ? 1 : 0, 1, cc_idx == 0 ? "alf_cc_cb_coeff_sign" : "alf_cc_cr_coeff_sign"); } } /*DTRACE(g_trace_ctx, D_SYNTAX, "%s coeff filter_idx %d: ", cc_idx == 0 ? "Cb" : "Cr", filter_idx); for (int i = 0; i < alfShape.numCoeff; i++) { DTRACE(g_trace_ctx, D_SYNTAX, "%d ", coeff[i]); } DTRACE(g_trace_ctx, D_SYNTAX, "\n");*/ } } } } //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB void alf_filter(encoder_state_t * const state, alf_aps* aps, const bool is_chroma, const int alt_idx) /*#else void HLSWriter::alfFilter(const AlfParam& alfParam, const bool is_chroma) #endif*/ { bitstream_t * const stream = &state->stream; if (!is_chroma) { //WRITE_FLAG(alfParam.alfLumaCoeffDeltaFlag, "alf_luma_coeff_delta_flag"); WRITE_U(stream, aps->alf_luma_coeff_delta_flag, 1, "alf_luma_coeff_delta_flag"); /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (!alfParam.alfLumaCoeffDeltaFlag) { if (alfParam.numLumaFilters > 1) { WRITE_FLAG(alfParam.alfLumaCoeffDeltaPredictionFlag, "alf_luma_coeff_delta_prediction_flag"); } } #endif*/ } //AlfFilterShape alfShape(is_chroma ? 5 : 7); const int num_coeff = is_chroma ? 7 : 13; /*#if !JVET_O0216_ALF_COEFF_EG3 || !JVET_O0064_SIMP_ALF_CLIP_CODING static int bitsCoeffScan[EncAdaptiveLoopFilter::m_MAX_SCAN_VAL][EncAdaptiveLoopFilter::m_MAX_EXP_GOLOMB]; memset(bitsCoeffScan, 0, sizeof(bitsCoeffScan)); const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx(alfShape.filterType); #endif*/ //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const short* coeff = is_chroma ? aps->chroma_coeff[alt_idx] : aps->luma_coeff; const int16_t* clipp = is_chroma ? aps->chroma_clipp[alt_idx] : aps->luma_clipp; /*#else const short* coeff = is_chroma ? alfParam.chromaCoeff : alfParam.lumaCoeff; const short* clipp = is_chroma ? alfParam.chromaClipp : alfParam.lumaClipp; #endif*/ const int num_filters = is_chroma ? 1 : aps->num_luma_filters; // vlc for all /*#if !JVET_O0216_ALF_COEFF_EG3 for (int ind = 0; ind < num_filters; ++ind) { if (is_chroma || !alfParam.alfLumaCoeffDeltaFlag || alfParam.alfLumaCoeffFlag[ind]) { for (int i = 0; i < alfShape.numCoeff - 1; i++) { int coeffVal = abs(coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i]); for (int k = 1; k < 15; k++) { bitsCoeffScan[alfShape.golombIdx[i]][k] += EncAdaptiveLoopFilter::lengthGolomb(coeffVal, k); } } } } #endif*/ /*#if !JVET_O0216_ALF_COEFF_EG3 || !JVET_O0064_SIMP_ALF_CLIP_CODING static int kMinTab[MAX_NUM_ALF_COEFF]; #endif*/ /*#if !JVET_O0216_ALF_COEFF_EG3 int kMin = EncAdaptiveLoopFilter::getGolombKMin(alfShape, num_filters, kMinTab, bitsCoeffScan); // Golomb parameters WRITE_UVLC(kMin - 1, is_chroma ? "alf_chroma_min_eg_order_minus1" : "alf_luma_min_eg_order_minus1"); for (int idx = 0; idx < maxGolombIdx; idx++) { bool golombOrderIncreaseFlag = (kMinTab[idx] != kMin) ? true : false; CHECK(!(kMinTab[idx] <= kMin + 1), "ALF Golomb parameter not consistent"); WRITE_FLAG(golombOrderIncreaseFlag, is_chroma ? "alf_chroma_eg_order_increase_flag" : "alf_luma_eg_order_increase_flag"); kMin = kMinTab[idx]; } #endif*/ if (!is_chroma) { if (aps->alf_luma_coeff_delta_flag) { for (int ind = 0; ind < num_filters; ++ind) { //WRITE_FLAG(alfParam.alfLumaCoeffFlag[ind], "alf_luma_coeff_flag[i]"); WRITE_U(stream, aps->alf_luma_coeff_flag[ind], 1, "alf_luma_coeff_flag[i]"); } } } // Filter coefficients for (int ind = 0; ind < num_filters; ++ind) { if (!is_chroma && !aps->alf_luma_coeff_flag[ind] && aps->alf_luma_coeff_delta_flag) { continue; } for (int i = 0; i < num_coeff - 1; i++) { //#if JVET_O0216_ALF_COEFF_EG3 alf_golomb_encode(state, coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i], 3, true); // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j] /*#else alfGolombEncode(coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], kMinTab[alfShape.golombIdx[i]]); // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j] #endif*/ } } // Clipping values coding //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB if (aps->non_linear_flag[is_chroma][alt_idx]) /*#else if (alfParam.nonLinearFlag[is_chroma]) #endif*/ { //#if JVET_O0064_SIMP_ALF_CLIP_CODING for (int ind = 0; ind < num_filters; ++ind) { for (int i = 0; i < num_coeff - 1; i++) { WRITE_U(stream, clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i], 2, is_chroma ? "alf_chroma_clip_idx" : "alf_luma_clip_idx"); } } /*#else memset(bitsCoeffScan, 0, sizeof(bitsCoeffScan)); short recCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; if (is_chroma) { memcpy(recCoeff, coeff, sizeof(short) * MAX_NUM_ALF_CHROMA_COEFF); } else { memcpy(recCoeff, coeff, sizeof(short) * num_filters * MAX_NUM_ALF_LUMA_COEFF); #if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (alfParam.alfLumaCoeffDeltaPredictionFlag) { for (int i = 1; i < num_filters; i++) { for (int j = 0; j < alfShape.numCoeff - 1; j++) { recCoeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += recCoeff[(i - 1) * MAX_NUM_ALF_LUMA_COEFF + j]; } } } #endif } // vlc for all for (int ind = 0; ind < num_filters; ++ind) { if (is_chroma || !alfParam.alfLumaCoeffDeltaFlag || alfParam.alfLumaCoeffFlag[ind]) { for (int i = 0; i < alfShape.numCoeff - 1; i++) { if (!abs(recCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i])) continue; int coeffVal = abs(clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i]); for (int k = 1; k < 15; k++) { bitsCoeffScan[alfShape.golombIdx[i]][k] += EncAdaptiveLoopFilter::lengthGolomb(coeffVal, k, false); } } } } #if JVET_O0216_ALF_COEFF_EG3 int kMin = EncAdaptiveLoopFilter::getGolombKMin(alfShape, num_filters, kMinTab, bitsCoeffScan); #else kMin = EncAdaptiveLoopFilter::getGolombKMin(alfShape, num_filters, kMinTab, bitsCoeffScan); #endif // Golomb parameters WRITE_UVLC(kMin - 1, "clip_min_golomb_order"); for (int idx = 0; idx < maxGolombIdx; idx++) { bool golombOrderIncreaseFlag = (kMinTab[idx] != kMin) ? true : false; CHECK(!(kMinTab[idx] <= kMin + 1), "ALF Golomb parameter not consistent"); WRITE_FLAG(golombOrderIncreaseFlag, "clip_golomb_order_increase_flag"); kMin = kMinTab[idx]; } // Filter coefficients for (int ind = 0; ind < num_filters; ++ind) { if (!is_chroma && !alfParam.alfLumaCoeffFlag[ind] && alfParam.alfLumaCoeffDeltaFlag) { continue; } for (int i = 0; i < alfShape.numCoeff - 1; i++) { if (!abs(recCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i])) continue; alfGolombEncode(clipp[ind* MAX_NUM_ALF_LUMA_COEFF + i], kMinTab[alfShape.golombIdx[i]], false); // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j] } } #endif*/ } } void encode_alf_adaptive_parameter_set(encoder_state_t * const state) { //send LMCS APS when LMCSModel is updated. It can be updated even current slice does not enable reshaper. //For example, in RA, update is on intra slice, but intra slice may not use reshaper encode_alf_aps_lmcs(state); // only 1 SCALING LIST data for 1 picture encode_alf_aps_scaling_list(state); encode_alf_aps(state); } // set CTU enable flags for (int comp_idx = 0; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { for (int ctu_idx = 0; ctu_idx < g_num_ctus_in_pic; ctu_idx++) { g_ctu_enable_flag[comp_idx][ctu_idx] = g_alf_ctu_enable_flag[comp_idx][ctu_idx]; //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB g_ctu_alternative[comp_idx][ctu_idx] = g_alf_ctu_alternative[comp_idx][ctu_idx]; //#endif } } } // only 1 SCALING LIST data for 1 picture void encode_alf_aps_scaling_list(encoder_state_t * const state) { if (0/*pcSlice->getSPS()->getScalingListFlag() && (m_pcCfg->getUseScalingListId() == SCALING_LIST_FILE_READ)*/) {/* int apsId = picHeader->getScalingListAPSId(); ParameterSetMap *apsMap = m_pcEncLib->getApsMap(); APS* aps = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + SCALING_LIST_APS); bool writeAPS = aps && apsMap->getChangedFlag((apsId << NUM_APS_TYPE_LEN) + SCALING_LIST_APS); if (writeAPS) { #if JVET_R0433 aps->chromaPresentFlag = pcSlice->getSPS()->getChromaFormatIdc() != CHROMA_400; #endif actualTotalBits += xWriteAPS(accessUnit, aps, m_pcEncLib->getLayerId(), true); apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + SCALING_LIST_APS); CHECK(aps != picHeader->getScalingListAPS(), "Wrong SCALING LIST APS pointer in compressGOP"); }*/ } } void encode_alf_aps(encoder_state_t * const state) { const encoder_control_t * const encoder = state->encoder_control; bitstream_t * const stream = &state->stream; if (encoder->cfg.alf_type) // && (state->slice->tile_group_alf_enabled_flag[COMPONENT_Y] || state->slice->tile_group_cc_alf_cb_enabled_flag || state->slice->tile_group_cc_alf_cr_enabled_flag)) { param_set_map *aps_map = state->encoder_control->cfg.param_set_map; for (int aps_id = 0; aps_id < ALF_CTB_MAX_NUM_APS; aps_id++) { alf_aps aps = aps_map[aps_id + T_ALF_APS + NUM_APS_TYPE_LEN].parameter_set; bool write_aps = aps_map[aps_id + T_ALF_APS + NUM_APS_TYPE_LEN].b_changed; /*if (!write_aps && state->slice->apss && state->slice->apss[aps_id].aps_id >= 0 && state->slice->apss[aps_id].aps_id < 8) { write_aps = true; aps = state->slice->apss[aps_id]; // use aps from slice header //*apsMap->allocatePS(apsId) = *aps; //allocate and cpy copy_aps_to_map(aps_map, &aps, aps_id + T_ALF_APS + NUM_APS_TYPE_LEN); //m_pcALF->setApsIdStart(apsId); g_aps_id_start = aps_id; }*/ //else if /*if (state->slice->tile_group_cc_alf_cb_enabled_flag && !write_aps && aps_id == state->slice->tile_group_cc_alf_cb_aps_id) { write_aps = true; aps = aps_map[(state->slice->tile_group_cc_alf_cb_aps_id << NUM_APS_TYPE_LEN) + T_ALF_APS].parameter_set; } if(ctuEnableFlag && is_crossed_by_virtual_boundaries(x_pos, y_pos, width, height, &clip_top, &clip_bottom, &clip_left, &clip_right, &num_hor_vir_bndry, &num_ver_vir_bndry, hor_vir_bndry_pos, ver_vir_bndry_pos, state)) { write_aps = true; aps = aps_map[(state->slice->tile_group_cc_alf_cr_aps_id << NUM_APS_TYPE_LEN) + T_ALF_APS].parameter_set; }*/ const bool clipT = (i == 0 && clip_top) || (i > 0) || (y_start == 0); const bool clipB = (i == num_hor_vir_bndry && clip_bottom) || (i < num_hor_vir_bndry) || (y_end == luma_height); int x_start = x_pos; for (int j = 0; j <= num_ver_vir_bndry; j++) { const int x_end = j == num_ver_vir_bndry ? x_pos + width : ver_vir_bndry_pos[j]; const int w = x_end - x_start; const bool clipL = (j == 0 && clip_left) || (j > 0) || (x_start == 0); const bool clipR = (j == num_ver_vir_bndry && clip_right) || (j < num_ver_vir_bndry) || (x_end == luma_width); const int wBuf = w + (clipL ? 0 : MAX_ALF_PADDING_SIZE) + (clipR ? 0 : MAX_ALF_PADDING_SIZE); const int hBuf = h + (clipT ? 0 : MAX_ALF_PADDING_SIZE) + (clipB ? 0 : MAX_ALF_PADDING_SIZE); /* PelUnitBuf buf = m_tempBuf2.subBuf(UnitArea(cs.area.chromaFormat, Area(0, 0, w_buf, h_buf))); buf.copyFrom(tmpYuv.subBuf(UnitArea(cs.area.chromaFormat, Area(x_start - (clip_l ? 0 : MAX_ALF_PADDING_SIZE), y_start - (clip_t ? 0 : MAX_ALF_PADDING_SIZE), w_buf, h_buf)))); buf.extendBorderPel(MAX_ALF_PADDING_SIZE); buf = buf.subBuf(UnitArea(cs.area.chromaFormat, Area(clip_l ? 0 : MAX_ALF_PADDING_SIZE, clip_t ? 0 : MAX_ALF_PADDING_SIZE, w, h))); */ if (g_ctu_enable_flag[COMPONENT_Y][ctu_idx]) { //const Area blkSrc(0, 0, w, h); //const Area blkDst(x_start, y_start, w, h); //deriveClassification(m_classifier, buf.get(COMPONENT_Y), blkDst, blkSrc); kvz_alf_derive_classification(state, w, h, x_start, y_start, x_start, y_start); //const Area blkPCM(x_start, y_start, w, h); //#if !JVET_O0525_REMOVE_PCM //resetPCMBlkClassInfo(cs, m_classifier, buf.get(COMPONENT_Y), blkPCM); //kvz_alf_reset_pcm_blk_class_info(state, lcu, w, h, x_start, y_start); short filter_set_index = g_alf_ctb_filter_index[ctu_idx]; short *coeff; int16_t *clip; if (filter_set_index >= ALF_NUM_FIXED_FILTER_SETS) { coeff = g_coeff_aps_luma[filter_set_index - ALF_NUM_FIXED_FILTER_SETS]; clip = g_clipp_aps_luma[filter_set_index - ALF_NUM_FIXED_FILTER_SETS]; } else { coeff = g_fixed_filter_set_coeff_dec[filter_set_index]; clip = g_clip_default; } kvz_alf_filter_block(state, state->tile->frame->rec->y, alf_tmp_y, state->tile->frame->rec->stride, state->tile->frame->rec->stride, coeff, clip, g_clp_rngs.comp[COMPONENT_Y], COMPONENT_Y, w, h, x_start, y_start, x_start, y_start, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_luma_pos), g_alf_vb_luma_ctu_height); } for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { alf_component_id comp_id = comp_idx; if (g_ctu_enable_flag[comp_idx][ctu_idx]) { //const Area blkSrc(0, 0, w >> chromaScaleX, h >> chromaScaleY); //const Area blkDst(x_start >> chromaScaleX, y_start >> chromaScaleY, w >> chromaScaleX, h >> chromaScaleY); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const kvz_pixel *src_pixels = comp_id - 1 ? state->tile->frame->rec->v : state->tile->frame->rec->u; kvz_pixel *dst_pixels = comp_id - 1 ? alf_tmp_v : alf_tmp_u; const int src_stride = state->tile->frame->rec->stride >> 1; const int dst_stride = state->tile->frame->rec->stride >> 1; uint8_t alt_num = g_ctu_alternative[comp_idx][ctu_idx]; kvz_alf_filter_block(state, src_pixels, dst_pixels, src_stride, dst_stride, g_chroma_coeff_final[alt_num], g_chroma_clipp_final[alt_num], g_clp_rngs.comp[comp_idx], comp_id, w >> chroma_scale_x, h >> chroma_scale_y, x_start >> chroma_scale_x, y_start >> chroma_scale_y, x_start >> chroma_scale_x, y_start >> chroma_scale_y, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_chma_pos), g_alf_vb_chma_ctu_height); /*#else kvz_alf_filter_block(state, g_chroma_coeff_final, g_chroma_clipp_final, g_clp_rngs.comp[comp_idx], comp_id, w >> chroma_scale_x, h >> chroma_scale_y, x_start >> chroma_scale_x, y_start >> chroma_scale_y, x_start >> chroma_scale_x, y_start >> chroma_scale_y, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_chma_pos), g_alf_vb_chma_ctu_height); #endif*/ } } x_start = x_end; } y_start = y_end; } } else { //actualTotalBits += xWriteAPS(accessUnit, aps); kvz_nal_write(stream, NAL_UNIT_PREFIX_APS, 0, state->frame->first_nal); state->frame->first_nal = false; encoder_state_write_adaptation_parameter_set(state, &aps); //deriveClassification(m_classifier, tmpYuv.get(COMPONENT_Y), blk, blk); //apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + ALF_APS); aps_map[aps_id + T_ALF_APS].b_changed = false; //Area blkPCM(x_pos, y_pos, width, height); //#if !JVET_O0525_REMOVE_PCM //resetPCMBlkClassInfo(cs, m_classifier, tmpYuv.get(COMPONENT_Y), blkPCM); //kvz_alf_reset_pcm_blk_class_info(state, lcu, width, height, x_pos, y_pos); short filter_set_index = g_alf_ctb_filter_index[ctu_idx]; short *coeff; int16_t *clip; if (filter_set_index >= ALF_NUM_FIXED_FILTER_SETS) { coeff = g_coeff_aps_luma[filter_set_index - ALF_NUM_FIXED_FILTER_SETS]; clip = g_clipp_aps_luma[filter_set_index - ALF_NUM_FIXED_FILTER_SETS]; } else { coeff = g_fixed_filter_set_coeff_dec[filter_set_index]; clip = g_clip_default; } kvz_alf_filter_block(state, state->tile->frame->rec->y, alf_tmp_y, state->tile->frame->rec->stride, state->tile->frame->rec->stride, coeff, clip, g_clp_rngs.comp[COMPONENT_Y], COMPONENT_Y, width, height, x_pos, y_pos, x_pos, y_pos, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_luma_pos), g_alf_vb_luma_ctu_height); } for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { alf_component_id comp_id = comp_idx; if (g_ctu_enable_flag[comp_idx][ctu_idx]) { //Area blk(x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, width >> chroma_scale_x, height >> chroma_scale_y); //m_filter5x5Blk(m_classifier, recYuv, tmpYuv, blk, comp_id, m_chromaCoeffFinal, clp_rngs.comp[comp_idx], cs); //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const kvz_pixel *src_pixels = comp_id - 1 ? state->tile->frame->rec->v : state->tile->frame->rec->u; kvz_pixel *dst_pixels = comp_id - 1 ? alf_tmp_v : alf_tmp_u; const int src_stride = state->tile->frame->rec->stride >> 1; const int dst_stride = state->tile->frame->rec->stride >> 1; uint8_t alt_num = g_ctu_alternative[comp_idx][ctu_idx]; kvz_alf_filter_block(state, src_pixels, dst_pixels, src_stride, dst_stride, g_chroma_coeff_final[alt_num], g_chroma_clipp_final[alt_num], g_clp_rngs.comp[comp_idx], comp_idx, width >> chroma_scale_x, height >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_chma_pos), g_alf_vb_chma_ctu_height); /*#else kvz_alf_filter_block(state, g_chroma_coeff_final, g_chroma_clipp_final, g_clp_rngs.comp[comp_idx], comp_idx, width >> chroma_scale_x, height >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, x_pos >> chroma_scale_x, y_pos >> chroma_scale_y, ((y_pos + max_cu_height >= luma_height) ? luma_height : g_alf_vb_chma_pos), g_alf_vb_chma_ctu_height); #endif*/ } } } } } } //------------------------- CC ALF cabac writer functions------------------------ void code_cc_alf_filter_control_idc(encoder_state_t * const state, cabac_data_t * const cabac, uint8_t idc_val, const alf_component_id comp_id, const int ctu_idx, const uint8_t *filter_control_idc, const int filter_count) { assert(!(idc_val > filter_count)); //Filter index is too large bool left_avail = state->lcu_order[ctu_idx].left ? 1 : 0; bool above_avail = state->lcu_order[ctu_idx].above ? 1 : 0; int ctxt = 0; if (left_avail) { ctxt += (filter_control_idc[ctu_idx - 1]) ? 1 : 0; } if (above_avail) { ctxt += (filter_control_idc[ctu_idx - state->tile->frame->width_in_lcu]) ? 1 : 0; } ctxt += (comp_id == COMPONENT_Cr) ? 3 : 0; //m_BinEncoder.encodeBin((idc_val == 0) ? 0 : 1, Ctx::CcAlfFilterControlFlag(ctxt)); // ON/OFF flag is context coded cabac->cur_ctx = &(cabac->ctx.alf_cc_filter_control_flag[ctxt]); CABAC_BIN(cabac, (idc_val == 0) ? 0 : 1, "cc_alf_filter_control_flag"); if (idc_val > 0) { int val = (idc_val - 1); while (val) { //m_BinEncoder.encodeBinEP(1); CABAC_BIN_EP(cabac, 1, "cc_alf_filter_control_flag"); val--; } if (idc_val < filter_count) { //m_BinEncoder.encodeBinEP(0); CABAC_BIN_EP(cabac, 0, "cc_alf_filter_control_flag"); } } //DTRACE(g_trace_ctx, D_SYNTAX, "ccAlfFilterControlIdc() comp_id=%d pos=(%d,%d) ctxt=%d, filter_count=%d, idc_val=%d\n", comp_id, lumaPos.x, lumaPos.y, ctxt, filter_count, idc_val); } //--------------------------------------------------------------------- //--------------------------------------------------------------------- //-------------------------CTU functions-------------------------------- void kvz_alf_reconstruct_coeff_aps(encoder_state_t * const state, bool luma, bool chroma, bool is_rdo) { //luma alf_aps* apss = state->slice->apss; //AlfSliceParam alfSliceParamTmp; alf_aps alf_param_tmp; //APS* cur_aps; alf_aps* cur_aps; if (luma) { for (int i = 0; i < state->slice->tile_group_num_aps /* 1, cs.slice->getTileGroupNumAps()*/; i++) { int aps_idx = state->slice->tile_group_luma_aps_id[i]; cur_aps = &apss[aps_idx]; assert(cur_aps != NULL); // "invalid APS" alf_param_tmp = *cur_aps; kvz_alf_reconstruct_coeff(state, &alf_param_tmp, CHANNEL_TYPE_LUMA, is_rdo, true); memcpy(g_coeff_aps_luma[i], g_coeff_final, sizeof(g_coeff_final)); memcpy(g_clipp_aps_luma[i], g_clipp_final, sizeof(g_clipp_final)); } } //chroma if (chroma) { int aps_idx_chroma = state->slice->tile_group_chroma_aps_id; cur_aps = &apss[aps_idx_chroma]; //copy_alf_param(g_alf_aps_chroma, cur_aps); //copy_alf_param(&alf_param_tmp, g_alf_aps_chroma); copy_alf_param(&alf_param_tmp, cur_aps); kvz_alf_reconstruct_coeff(state, &alf_param_tmp, CHANNEL_TYPE_CHROMA, is_rdo, true); } } //void reconstructCoeff(AlfSliceParam& alfSliceParam, ChannelType channel, const bool isRdo, const bool isRedo) void kvz_alf_reconstruct_coeff(encoder_state_t * const state, alf_aps *aps, channel_type channel, const bool is_rdo, const bool is_redo) { const int8_t bit_depth = state->encoder_control->bitdepth; int factor = is_rdo ? 0 : (1 << (bit_depth - 1)); bool is_luma = channel == CHANNEL_TYPE_LUMA ? 1 : 0; alf_filter_type filter_type = is_luma ? ALF_FILTER_7X7 : ALF_FILTER_5X5; int num_classes = is_luma ? MAX_NUM_ALF_CLASSES : 1; int num_coeff = filter_type == ALF_FILTER_5X5 ? 7 : 13; int num_coeff_minus1 = num_coeff - 1; /*#if !JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB int num_filters = is_luma ? num_luma_filters : 1; short* coeff = is_luma ? luma_coeff : chroma_coeff; short* clipp = is_luma ? luma_clipp : chroma_clipp; #endif*/ //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB const int num_alts = is_luma ? 1 : aps->num_alternatives_chroma; for (int alt_idx = 0; alt_idx < num_alts; ++alt_idx) { int num_filters = is_luma ? aps->num_luma_filters : 1; short* coeff = is_luma ? aps->luma_coeff : aps->chroma_coeff[alt_idx]; int16_t* clipp = is_luma ? aps->luma_clipp : aps->chroma_clipp[alt_idx]; /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (alfParam.alfLumaCoeffDeltaPredictionFlag && isLuma(channel)) { for (int i = 1; i < num_filters; i++) { for (int j = 0; j < numCoeffMinus1; j++) { coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += coeff[(i - 1) * MAX_NUM_ALF_LUMA_COEFF + j]; } } } #endif*/ for (int filter_idx = 0; filter_idx < num_filters; filter_idx++) { coeff[filter_idx * MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = factor; } if (!is_luma) { for (int coeff_idx = 0; coeff_idx < num_coeff_minus1; ++coeff_idx) { g_chroma_coeff_final[alt_idx][coeff_idx] = coeff[coeff_idx]; int clip_idx = aps->non_linear_flag[channel] ? clipp[coeff_idx] : 0; g_chroma_clipp_final[alt_idx][coeff_idx] = is_rdo ? clip_idx : g_alf_clipping_values[channel][clip_idx]; } g_chroma_coeff_final[alt_idx][num_coeff_minus1] = factor; g_chroma_clipp_final[alt_idx][num_coeff_minus1] = is_rdo ? 0 : g_alf_clipping_values[channel][0]; continue; } for (int class_idx = 0; class_idx < num_classes; class_idx++) { int filterIdx = aps->filter_coeff_delta_idx[class_idx]; /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED int fixedFilterIdx = alfParam.fixedFilterSetIndex; if (fixedFilterIdx > 0 && alfParam.fixedFilterIdx[class_idx] > 0) { fixedFilterIdx = m_classToFilterMapping[fixedFilterIdx - 1][class_idx]; } else { fixedFilterIdx = -1; } #endif*/ for (int coeff_idx = 0; coeff_idx < num_coeff_minus1; ++coeff_idx) { g_coeff_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] = coeff[filter_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx]; /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED //fixed filter if (fixedFilterIdx >= 0) { m_coeffFinal[class_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] += m_fixedFilterSetCoeff[fixedFilterIdx][coeff_idx]; } #endif*/ } g_coeff_final[class_idx* MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = factor; g_clipp_final[class_idx* MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = is_rdo ? 0 : g_alf_clipping_values[channel][0]; for (int coeff_idx = 0; coeff_idx < num_coeff_minus1; ++coeff_idx) { int clip_idx = aps->non_linear_flag[channel] ? clipp[filter_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] : 0; assert((clip_idx >= 0 && clip_idx < MAX_ALF_NUM_CLIPPING_VALUES)); // "Bad clip idx in ALF" g_clipp_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] = is_rdo ? clip_idx : g_alf_clipping_values[channel][clip_idx]; } g_clipp_final[class_idx* MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = is_rdo ? 0 : g_alf_clipping_values[channel][0]; } } /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (is_chroma(channel)) return; if (isRedo && alfParam.alfLumaCoeffDeltaPredictionFlag) { int num_filters = alfParam.numLumaFilters; short* coeff = alfParam.lumaCoeff; for (int i = num_filters - 1; i > 0; i--) { for (int j = 0; j < numCoeffMinus1; j++) { coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] = coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] - coeff[(i - 1) * MAX_NUM_ALF_LUMA_COEFF + j]; } } } #endif*/ /*#else /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (*alf_luma_coeff_delta_prediction_flag && is_luma) { for (int i = 1; i < num_filters; i++) { for (int j = 0; j < num_coeff_minus1; j++) { coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += coeff[(i - 1) * MAX_NUM_ALF_LUMA_COEFF + j]; } } }*//* for (int filter_idx = 0; filter_idx < num_filters; filter_idx++) { coeff[filter_idx* MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = factor; } if ( !is_luma ) { for (int coeff_idx = 0; coeff_idx < num_coeff_minus1; ++coeff_idx) { g_chroma_coeff_final[coeff_idx] = chroma_coeff[coeff_idx]; g_chroma_coeff_final[coeff_idx] = chroma_coeff[coeff_idx]; int clip_idx = aps->non_linear_flag[channel] ? clipp[coeff_idx] : 0; g_chroma_clipp_final[coeff_idx] = is_rdo ? clip_idx : g_alf_clipping_values[channel][clip_idx]; } g_chroma_coeff_final[num_coeff_minus1] = factor; g_chroma_clipp_final[num_coeff_minus1] = is_rdo ? 0 : g_alf_clipping_values[channel][0]; return; } for (int class_idx = 0; class_idx < num_classes; class_idx++) { int filter_idx = filter_coeff_delta_idx[class_idx]; /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED int fixed_filter_idx = *fixed_filter_set_index; //13 if (fixed_filter_idx > 0 && aps->fixed_filter_idx[class_idx] > 0) { fixed_filter_idx = g_class_to_filter_mapping[fixed_filter_idx - 1][class_idx]; } else { fixed_filter_idx = -1; }*//* for (int coeff_idx = 0; coeff_idx < num_coeff_minus1; ++coeff_idx) { g_coeff_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] = coeff[filter_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx]; /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED //fixed filter if (fixed_filter_idx >= 0) { g_coeff_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] += g_fixed_filter_set_coeff[fixed_filter_idx][coeff_idx]; }*//* } g_coeff_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = factor; g_clipp_final[class_idx* MAX_NUM_ALF_LUMA_COEFF + num_coeff_minus1] = is_rdo ? 0 : g_alf_clipping_values[channel][0]; for (int coeff_idx = 0; coeff_idx < num_coeff_minus1; ++coeff_idx) { int clip_idx = aps->non_linear_flag[channel] ? (clipp + filter_idx * MAX_NUM_ALF_LUMA_COEFF)[coeff_idx] : 0; g_clipp_final[class_idx * MAX_NUM_ALF_LUMA_COEFF + coeff_idx] = is_rdo ? clip_idx : g_alf_clipping_values[channel][clip_idx]; } } /*#if !JVET_O0669_REMOVE_ALF_COEFF_PRED if (is_redo && state->cabac.ctx.alf_luma_coeff_delta_prediction_flag.state[0]) { for (int i = num_filters - 1; i > 0; i--) { for (int j = 0; j < num_coeff_minus1; j++) { coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] = coeff[i * MAX_NUM_ALF_LUMA_COEFF + j] - coeff[(i - 1) * MAX_NUM_ALF_LUMA_COEFF + j]; } } }*/ } void kvz_alf_create(encoder_state_t const *state) { const int pic_width = state->tile->frame->width; const int pic_height = state->tile->frame->height; const int max_cu_width = LCU_WIDTH; //128 const int max_cu_height = LCU_WIDTH; //128 enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; const int num_ctus_in_width = (pic_width / max_cu_width) + ((pic_width % max_cu_width) ? 1 : 0); const int num_ctus_in_height = (pic_height / max_cu_height) + ((pic_height % max_cu_height) ? 1 : 0); g_num_ctus_in_pic = num_ctus_in_width * num_ctus_in_height; g_alf_vb_luma_pos = max_cu_height - ALF_VB_POS_ABOVE_CTUROW_LUMA; g_alf_vb_chma_pos = (max_cu_height >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA; g_alf_vb_luma_ctu_height = max_cu_height; g_alf_vb_chma_ctu_height = (max_cu_height >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)); assert(g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] > 0); //"g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] must be at least one" g_alf_clipping_values[CHANNEL_TYPE_LUMA][0] = 1 << g_input_bit_depth[CHANNEL_TYPE_LUMA]; int shift_luma = g_input_bit_depth[CHANNEL_TYPE_LUMA] - 8; for (int i = 1; i < g_alf_num_clipping_values[CHANNEL_TYPE_LUMA]; ++i) { g_alf_clipping_values[CHANNEL_TYPE_LUMA][i] = (short)round(pow(2., g_input_bit_depth[CHANNEL_TYPE_LUMA] * (g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] - i) / g_alf_num_clipping_values[CHANNEL_TYPE_LUMA])); } assert(g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] > 0); //"g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] must be at least one" g_alf_clipping_values[CHANNEL_TYPE_CHROMA][0] = 1 << g_input_bit_depth[CHANNEL_TYPE_CHROMA]; int shift_chroma = g_input_bit_depth[CHANNEL_TYPE_CHROMA] - 8; for (int i = 1; i < g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA]; ++i) { g_alf_clipping_values[CHANNEL_TYPE_CHROMA][i] = (short)round(pow(2., g_input_bit_depth[CHANNEL_TYPE_CHROMA] - 8 + 8. * (g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] - i - 1) / (g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] - 1))); } if (g_created) { return; } // Classification g_classifier = malloc(pic_height * sizeof(**g_classifier)); g_classifier[0] = malloc(pic_height * pic_width * sizeof(*g_classifier)); for (int i = 1; i < pic_height; i++) { g_classifier[i] = g_classifier[0] + i * pic_width; } for (int filter_set_index = 0; filter_set_index < ALF_NUM_FIXED_FILTER_SETS; filter_set_index++) { for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) { int fixed_filter_idx = g_class_to_filter_mapping[filter_set_index][class_idx]; for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF - 1; i++) { g_fixed_filter_set_coeff_dec[filter_set_index][class_idx * MAX_NUM_ALF_LUMA_COEFF + i] = g_fixed_filter_set_coeff[fixed_filter_idx][i]; } g_fixed_filter_set_coeff_dec[filter_set_index][class_idx * MAX_NUM_ALF_LUMA_COEFF + MAX_NUM_ALF_LUMA_COEFF - 1] = (1 << (kvz_bit_depth - 1)); } } for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES; i++) { g_clip_default[i] = g_alf_clipping_values[CHANNEL_TYPE_LUMA][0]; } g_created = true; g_cc_alf_filter_control[0] = malloc(g_num_ctus_in_pic * sizeof(*g_cc_alf_filter_control)); g_cc_alf_filter_control[1] = malloc(g_num_ctus_in_pic * sizeof(*g_cc_alf_filter_control)); } void kvz_alf_destroy(videoframe_t * const frame) { if (!g_created) { return; } if (g_classifier) { FREE_POINTER(g_classifier[0]); FREE_POINTER(g_classifier); } g_created = false; if (g_cc_alf_filter_control[0]) { FREE_POINTER(g_cc_alf_filter_control[0]) } if (g_cc_alf_filter_control[1]) { FREE_POINTER(g_cc_alf_filter_control[1]) } } void kvz_alf_derive_classification(encoder_state_t * const state, const int width, const int height, int x_pos, int y_pos, const int blk_dst_x, const int blk_dst_y)//, //alf_classifier** g_classifier) { enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; bool chroma_scale_x = (chroma_fmt == KVZ_CSP_444) ? 0 : 1; bool chroma_scale_y = (chroma_fmt != KVZ_CSP_420) ? 0 : 1; const int alf_vb_luma_ctu_height = LCU_WIDTH; const int alf_vb_luma_pos = LCU_WIDTH - ALF_VB_POS_ABOVE_CTUROW_LUMA; int32_t pic_height = state->tile->frame->rec->height; int32_t pic_width = state->tile->frame->rec->width; int max_height = y_pos + height; int max_width = x_pos + width; //Use if adjacent CTUs are not reconstructed adjust_pixels(state->tile->frame->rec->y, x_pos, pic_width, y_pos, pic_height, state->tile->frame->rec->stride, pic_width, pic_height); //Use if adjacent CTUs are reconstructed /*adjust_pixels_CTU_plus_4_pix(state->tile->frame->rec->y, x_pos, state->tile->frame->width, y_pos, state->tile->frame->height, state->tile->frame->rec->stride, state->tile->frame->width, state->tile->frame->height);*/ adjust_pixels_chroma(state->tile->frame->rec->u, x_pos >> chroma_scale_x, pic_width >> chroma_scale_x, y_pos >> chroma_scale_y, pic_height >> chroma_scale_y, state->tile->frame->rec->stride >> chroma_scale_x, pic_width >> chroma_scale_x, pic_height >> chroma_scale_y); adjust_pixels_chroma(state->tile->frame->rec->v, x_pos >> chroma_scale_x, pic_width >> chroma_scale_x, y_pos >> chroma_scale_y, pic_height >> chroma_scale_y, state->tile->frame->rec->stride >> chroma_scale_x, pic_width >> chroma_scale_x, pic_height >> chroma_scale_y); for (int i = y_pos; i < max_height; i += CLASSIFICATION_BLK_SIZE) { int n_height = MIN(i + CLASSIFICATION_BLK_SIZE, max_height) - i; for (int j = x_pos; j < max_width; j += CLASSIFICATION_BLK_SIZE) { int n_width = MIN(j + CLASSIFICATION_BLK_SIZE, max_width) - j; kvz_alf_derive_classification_blk(state, state->encoder_control->cfg.input_bitdepth + 4, n_height, n_width, j, i, j - x_pos + blk_dst_x, i - y_pos + blk_dst_y, alf_vb_luma_ctu_height, alf_vb_luma_pos); } } } void kvz_alf_derive_classification_blk(encoder_state_t * const state, const int shift, const int n_height, const int n_width, const int blk_pos_x, const int blk_pos_y, const int blk_dst_x, const int blk_dst_y, const int vb_ctu_height, int vb_pos) { videoframe_t* const frame = state->tile->frame; //int ***g_laplacian = state->tile->frame->alf_info->g_laplacian; //alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier; //CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2"); static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 }; int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5]; memset(laplacian, 0, sizeof(laplacian)); const int stride = frame->rec->stride; kvz_pixel *src = state->tile->frame->rec->y; const int max_activity = 15; int fl = 2; int fl_p1 = fl + 1; int fl2 = 2 * fl; int main_direction, secondary_direction, dir_temp_hv, dir_temp_d; int pix_y; int height = n_height + fl2; int width = n_width + fl2; int pos_x = blk_pos_x; int pos_y = blk_pos_y; int start_height = pos_y - fl_p1; for (int i = 0; i < height; i += 2) { int yoffset = (i + 1 + start_height) * stride - fl_p1; const kvz_pixel *src0 = &src[yoffset - stride]; const kvz_pixel *src1 = &src[yoffset]; const kvz_pixel *src2 = &src[yoffset + stride]; const kvz_pixel *src3 = &src[yoffset + stride * 2]; const int y = blk_dst_y - 2 + i; if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2) { src3 = &src[yoffset + stride]; } else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos) { src0 = &src[yoffset]; } int *p_y_ver = g_laplacian[ALF_VER][i]; int *p_y_hor = g_laplacian[ALF_HOR][i]; int *p_y_dig0 = g_laplacian[ALF_DIAG0][i]; int *p_y_dig1 = g_laplacian[ALF_DIAG1][i]; for (int j = 0; j < width; j += 2) { pix_y = j + 1 + pos_x; const kvz_pixel *p_y = src1 + pix_y; const kvz_pixel *p_y_down = src0 + pix_y; const kvz_pixel *p_y_up = src2 + pix_y; const kvz_pixel *p_y_up2 = src3 + pix_y; const int16_t y0 = p_y[0] << 1; const int16_t y_up1 = p_y_up[1] << 1; p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]); p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]); p_y_dig0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]); p_y_dig1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]); if (j > 4 && (j - 6) % 4 == 0) { int j_m6 = j - 6; int j_m4 = j - 4; int j_m2 = j - 2; p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j]; p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j]; p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j]; p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j]; } } } // classification block size const int cls_size_y = 4; const int cls_size_x = 4; //for (int i = 0; i < blk.height; i += cls_size_y) for (int i = 0; i < n_height; i += cls_size_y) { int* p_y_ver = laplacian[ALF_VER][i]; int* p_y_ver2 = laplacian[ALF_VER][i + 2]; int* p_y_ver4 = laplacian[ALF_VER][i + 4]; int* p_y_ver6 = laplacian[ALF_VER][i + 6]; int* p_y_hor = laplacian[ALF_HOR][i]; int* p_y_hor2 = laplacian[ALF_HOR][i + 2]; int* p_y_hor4 = laplacian[ALF_HOR][i + 4]; int* p_y_hor6 = laplacian[ALF_HOR][i + 6]; int* p_y_dig0 = laplacian[ALF_DIAG0][i]; int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2]; int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4]; int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6]; int* p_y_dig1 = laplacian[ALF_DIAG1][i]; int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2]; int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4]; int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6]; //for (int j = 0; j < blk.width; j += cls_size_x) for (int j = 0; j < n_width; j += cls_size_x) { int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0; if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4)) { sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j]; sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j]; sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j]; sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j]; } else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos) { sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j]; sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j]; sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j]; sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j]; } else { sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j]; sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j]; sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j]; sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j]; } int temp_act = sum_v + sum_h; int activity = 0; const int y = (i + blk_dst_y) & (vb_ctu_height - 1); if (y == vb_pos - 4 || y == vb_pos) { activity = alf_clip3(0, max_activity, (temp_act * 96) >> shift); } else { activity = alf_clip3(0, max_activity, (temp_act * 64) >> shift); } int class_idx = th[activity]; int hv1, hv0, d1, d0, hvd1, hvd0; if (sum_v > sum_h) { hv1 = sum_v; hv0 = sum_h; dir_temp_hv = 1; } else { hv1 = sum_h; hv0 = sum_v; dir_temp_hv = 3; } if (sum_d0 > sum_d1) { d1 = sum_d0; d0 = sum_d1; dir_temp_d = 0; } else { d1 = sum_d1; d0 = sum_d0; dir_temp_d = 2; } if((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0) { hvd1 = d1; hvd0 = d0; main_direction = dir_temp_d; secondary_direction = dir_temp_hv; } else { hvd1 = hv1; hvd0 = hv0; main_direction = dir_temp_hv; secondary_direction = dir_temp_d; } int direction_strength = 0; if (hvd1 > 2 * hvd0) { direction_strength = 1; } if (hvd1 * 2 > 9 * hvd0) { direction_strength = 2; } if (direction_strength) { class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5; } static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 }; int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)]; int y_offset = i + blk_dst_y; int x_offset = j + blk_dst_x; alf_classifier *cl0 = g_classifier[y_offset] + x_offset; alf_classifier *cl1 = g_classifier[y_offset + 1] + x_offset; alf_classifier *cl2 = g_classifier[y_offset + 2] + x_offset; alf_classifier *cl3 = g_classifier[y_offset + 3] + x_offset; cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx = cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx = cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx = cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx; cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx = cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx = cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx = cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx; } } } void kvz_alf_filter_block(encoder_state_t * const state, const kvz_pixel *src_pixels, kvz_pixel *dst_pixels, const int src_stride, const int dst_stride, const short* filter_set, const int16_t *fClipSet, clp_rng clp_rng, alf_component_id component_id, const int width, const int height, int x_pos, int y_pos, int blk_dst_x, int blk_dst_y, int vb_pos, const int vb_ctu_height) { alf_filter_type const filter_type = component_id == COMPONENT_Y ? ALF_FILTER_7X7 : ALF_FILTER_5X5; const bool chroma = component_id == COMPONENT_Y ? 0 : 1; const int8_t bit_depth = state->encoder_control->bitdepth; if (chroma) { assert((int)filter_type == 0); //Chroma needs to have filtType == 0 } /*#if !JVET_O0525_REMOVE_PCM //bool isDualTree = CS::isDualITree(cs); bool is_dual_tree = false; bool is_pcm_filter_enabled = ENABLE_PCM; enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; */ //const int srcStride = srcLuma.stride; //const int src_stride = frame->rec->stride; //const int dstStride = dstLuma.stride; //const int dst_stride = frame->rec->stride; const int start_height = y_pos; const int end_height = start_height + height; const int start_width = x_pos; const int end_width = start_width + width; const kvz_pixel *src = src_pixels; kvz_pixel *dst = dst_pixels + blk_dst_y * dst_stride; const kvz_pixel *p_img_y_pad_0, *p_img_y_pad_1, *p_img_y_pad_2, *p_img_y_pad_3, *p_img_y_pad_4, *p_img_y_pad_5, *p_img_y_pad_6; const kvz_pixel *p_img_0, *p_img_1, *p_img_2, *p_img_3, *p_img_4, *p_img_5, *p_img_6; const short *coef = filter_set; const int16_t *clip = fClipSet; const int shift = bit_depth - 1; const int offset = 1 << (shift - 1); int transpose_idx = 0; const int cls_size_y = 4; const int cls_size_x = 4; /*#if !JVET_O0525_REMOVE_PCM bool pcm_flags_2x2[4] = { 0,0,0,0 };*/ assert((start_height % cls_size_y) == 0); //Wrong startHeight in filtering assert((start_width % cls_size_x) == 0); //Wrong startWidth in filtering assert(((end_height - start_height) % cls_size_y) == 0); //Wrong endHeight in filtering assert(((end_width - start_width) % cls_size_x) == 0); //Wrong endWidth in filtering alf_classifier *p_class = NULL; int dst_stride2 = dst_stride * cls_size_y; int src_stride2 = src_stride * cls_size_y; //std::vector filter_coeff(MAX_NUM_ALF_LUMA_COEFF); int filter_coeff[MAX_NUM_ALF_LUMA_COEFF]; memset(filter_coeff, 0, MAX_NUM_ALF_LUMA_COEFF * sizeof(int)); //std::array filterClipp; int filter_clipp[MAX_NUM_ALF_LUMA_COEFF]; memset(filter_clipp, 0, MAX_NUM_ALF_LUMA_COEFF * sizeof(int)); p_img_y_pad_0 = src + start_height * src_stride + start_width; p_img_y_pad_1 = p_img_y_pad_0 + src_stride; p_img_y_pad_2 = p_img_y_pad_0 - src_stride; p_img_y_pad_3 = p_img_y_pad_1 + src_stride; p_img_y_pad_4 = p_img_y_pad_2 - src_stride; p_img_y_pad_5 = p_img_y_pad_3 + src_stride; p_img_y_pad_6 = p_img_y_pad_4 - src_stride; kvz_pixel* p_rec_0 = dst + blk_dst_x;//start_width; kvz_pixel* p_rec_1 = p_rec_0 + dst_stride; for (int i = 0; i < end_height - start_height; i += cls_size_y) { if (!chroma) { p_class = g_classifier[blk_dst_y + i] + blk_dst_x; } for (int j = 0; j < end_width - start_width; j += cls_size_x) { if (!chroma) { alf_classifier cl = p_class[j]; transpose_idx = cl.transpose_idx; /*#if !JVET_O0525_REMOVE_PCM if (is_pcm_filter_enabled && cl.class_idx == ALF_UNUSED_CLASS_IDX && transpose_idx == ALF_UNUSED_TRANSPOSE_IDX) { continue; }*/ coef = filter_set + cl.class_idx * MAX_NUM_ALF_LUMA_COEFF; clip = fClipSet + cl.class_idx * MAX_NUM_ALF_LUMA_COEFF; } /*#if !JVET_O0525_REMOVE_PCM else if (is_pcm_filter_enabled) { int blk_x, blk_y; bool *flags = pcm_flags_2x2; // check which chroma 2x2 blocks use PCM // chroma PCM may not be aligned with 4x4 ALF processing grid for (blk_y = 0; blk_y < 4; blk_y += 2) { for (blk_x = 0; blk_x < 4; blk_x += 2) { //Position pos(j + blkDst.x + blkX, i + blkDst.y + blkY); //CodingUnit* cu = is_dual_tree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L); *flags++ = 1; //cu->ipcm ? 1 : 0; } } // skip entire 4x4 if all chroma 2x2 blocks use PCM if (pcm_flags_2x2[0] && pcm_flags_2x2[1] && pcm_flags_2x2[2] && pcm_flags_2x2[3]) { continue; } }*/ if (filter_type == ALF_FILTER_7X7) { if (transpose_idx == 1) { filter_coeff[0] = coef[9]; filter_coeff[1] = coef[4]; filter_coeff[2] = coef[10]; filter_coeff[3] = coef[8]; filter_coeff[4] = coef[1]; filter_coeff[5] = coef[5]; filter_coeff[6] = coef[11]; filter_coeff[7] = coef[7]; filter_coeff[8] = coef[3]; filter_coeff[9] = coef[0]; filter_coeff[10] = coef[2]; filter_coeff[11] = coef[6]; filter_coeff[12] = coef[12]; filter_clipp[0] = clip[9]; filter_clipp[1] = clip[4]; filter_clipp[2] = clip[10]; filter_clipp[3] = clip[8]; filter_clipp[4] = clip[1]; filter_clipp[5] = clip[5]; filter_clipp[6] = clip[11]; filter_clipp[7] = clip[7]; filter_clipp[8] = clip[3]; filter_clipp[9] = clip[0]; filter_clipp[10] = clip[2]; filter_clipp[11] = clip[6]; filter_clipp[12] = clip[12]; } else if (transpose_idx == 2) { filter_coeff[0] = coef[0]; filter_coeff[1] = coef[3]; filter_coeff[2] = coef[2]; filter_coeff[3] = coef[1]; filter_coeff[4] = coef[8]; filter_coeff[5] = coef[7]; filter_coeff[6] = coef[6]; filter_coeff[7] = coef[5]; filter_coeff[8] = coef[4]; filter_coeff[9] = coef[9]; filter_coeff[10] = coef[10]; filter_coeff[11] = coef[11]; filter_coeff[12] = coef[12]; filter_clipp[0] = clip[0]; filter_clipp[1] = clip[3]; filter_clipp[2] = clip[2]; filter_clipp[3] = clip[1]; filter_clipp[4] = clip[8]; filter_clipp[5] = clip[7]; filter_clipp[6] = clip[6]; filter_clipp[7] = clip[5]; filter_clipp[8] = clip[4]; filter_clipp[9] = clip[9]; filter_clipp[10] = clip[10]; filter_clipp[11] = clip[11]; filter_clipp[12] = clip[12]; } else if (transpose_idx == 3) { filter_coeff[0] = coef[9]; filter_coeff[1] = coef[8]; filter_coeff[2] = coef[10]; filter_coeff[3] = coef[4]; filter_coeff[4] = coef[3]; filter_coeff[5] = coef[7]; filter_coeff[6] = coef[11]; filter_coeff[7] = coef[5]; filter_coeff[8] = coef[1]; filter_coeff[9] = coef[0]; filter_coeff[10] = coef[2]; filter_coeff[11] = coef[6]; filter_coeff[12] = coef[12]; filter_clipp[0] = clip[9]; filter_clipp[1] = clip[8]; filter_clipp[2] = clip[10]; filter_clipp[3] = clip[4]; filter_clipp[4] = clip[3]; filter_clipp[5] = clip[7]; filter_clipp[6] = clip[11]; filter_clipp[7] = clip[5]; filter_clipp[8] = clip[1]; filter_clipp[9] = clip[0]; filter_clipp[10] = clip[2]; filter_clipp[11] = clip[6]; filter_clipp[12] = clip[12]; } else { filter_coeff[0] = coef[0]; filter_coeff[1] = coef[1]; filter_coeff[2] = coef[2]; filter_coeff[3] = coef[3]; filter_coeff[4] = coef[4]; filter_coeff[5] = coef[5]; filter_coeff[6] = coef[6]; filter_coeff[7] = coef[7]; filter_coeff[8] = coef[8]; filter_coeff[9] = coef[9]; filter_coeff[10] = coef[10]; filter_coeff[11] = coef[11]; filter_coeff[12] = coef[12]; filter_clipp[0] = clip[0]; filter_clipp[1] = clip[1]; filter_clipp[2] = clip[2]; filter_clipp[3] = clip[3]; filter_clipp[4] = clip[4]; filter_clipp[5] = clip[5]; filter_clipp[6] = clip[6]; filter_clipp[7] = clip[7]; filter_clipp[8] = clip[8]; filter_clipp[9] = clip[9]; filter_clipp[10] = clip[10]; filter_clipp[11] = clip[11]; filter_clipp[12] = clip[12]; } } else { if (transpose_idx == 1) { filter_coeff[0] = coef[4]; filter_coeff[1] = coef[1]; filter_coeff[2] = coef[5]; filter_coeff[3] = coef[3]; filter_coeff[4] = coef[0]; filter_coeff[5] = coef[2]; filter_coeff[6] = coef[6]; filter_clipp[0] = clip[4]; filter_clipp[1] = clip[1]; filter_clipp[2] = clip[5]; filter_clipp[3] = clip[3]; filter_clipp[4] = clip[0]; filter_clipp[5] = clip[2]; filter_clipp[6] = clip[6]; } else if (transpose_idx == 2) { filter_coeff[0] = coef[0]; filter_coeff[1] = coef[3]; filter_coeff[2] = coef[2]; filter_coeff[3] = coef[1]; filter_coeff[4] = coef[4]; filter_coeff[5] = coef[5]; filter_coeff[6] = coef[6]; filter_clipp[0] = clip[0]; filter_clipp[1] = clip[3]; filter_clipp[2] = clip[2]; filter_clipp[3] = clip[1]; filter_clipp[4] = clip[4]; filter_clipp[5] = clip[5]; filter_clipp[6] = clip[6]; } else if (transpose_idx == 3) { filter_coeff[0] = coef[4]; filter_coeff[1] = coef[3]; filter_coeff[2] = coef[5]; filter_coeff[3] = coef[1]; filter_coeff[4] = coef[0]; filter_coeff[5] = coef[2]; filter_coeff[6] = coef[6]; filter_clipp[0] = clip[4]; filter_clipp[1] = clip[3]; filter_clipp[2] = clip[5]; filter_clipp[3] = clip[1]; filter_clipp[4] = clip[0]; filter_clipp[5] = clip[2]; filter_clipp[6] = clip[6]; } else { filter_coeff[0] = coef[0]; filter_coeff[1] = coef[1]; filter_coeff[2] = coef[2]; filter_coeff[3] = coef[3]; filter_coeff[4] = coef[4]; filter_coeff[5] = coef[5]; filter_coeff[6] = coef[6]; filter_clipp[0] = clip[0]; filter_clipp[1] = clip[1]; filter_clipp[2] = clip[2]; filter_clipp[3] = clip[3]; filter_clipp[4] = clip[4]; filter_clipp[5] = clip[5]; filter_clipp[6] = clip[6]; } } for (int ii = 0; ii < cls_size_y; ii++) { p_img_0 = p_img_y_pad_0 + j + ii * src_stride; p_img_1 = p_img_y_pad_1 + j + ii * src_stride; p_img_2 = p_img_y_pad_2 + j + ii * src_stride; p_img_3 = p_img_y_pad_3 + j + ii * src_stride; p_img_4 = p_img_y_pad_4 + j + ii * src_stride; p_img_5 = p_img_y_pad_5 + j + ii * src_stride; p_img_6 = p_img_y_pad_6 + j + ii * src_stride; p_rec_1 = p_rec_0 + j + ii * dst_stride; const int y_vb = (blk_dst_y + i + ii) & (vb_ctu_height - 1); if (y_vb < vb_pos && (y_vb >= vb_pos - (chroma ? 2 : 4))) // above { p_img_1 = (y_vb == vb_pos - 1) ? p_img_0 : p_img_1; p_img_3 = (y_vb >= vb_pos - 2) ? p_img_1 : p_img_3; p_img_5 = (y_vb >= vb_pos - 3) ? p_img_3 : p_img_5; p_img_2 = (y_vb == vb_pos - 1) ? p_img_0 : p_img_2; p_img_4 = (y_vb >= vb_pos - 2) ? p_img_2 : p_img_4; p_img_6 = (y_vb >= vb_pos - 3) ? p_img_4 : p_img_6; } else if (y_vb >= vb_pos && (y_vb <= vb_pos + (chroma ? 1 : 3))) // bottom { p_img_2 = (y_vb == vb_pos) ? p_img_0 : p_img_2; p_img_4 = (y_vb <= vb_pos + 1) ? p_img_2 : p_img_4; p_img_6 = (y_vb <= vb_pos + 2) ? p_img_4 : p_img_6; p_img_1 = (y_vb == vb_pos) ? p_img_0 : p_img_1; p_img_3 = (y_vb <= vb_pos + 1) ? p_img_1 : p_img_3; p_img_5 = (y_vb <= vb_pos + 2) ? p_img_3 : p_img_5; } bool is_near_vb_above = y_vb < vb_pos && (y_vb >= vb_pos - 1); bool is_near_vb_below = y_vb >= vb_pos && (y_vb <= vb_pos); for (int jj = 0; jj < cls_size_x; jj++) { /*#if !JVET_O0525_REMOVE_PCM // skip 2x2 PCM chroma blocks if (chroma && is_pcm_filter_enabled) { if (pcm_flags_2x2[2 * (ii >> 1) + (jj >> 1)]) { p_img_0++; p_img_1++; p_img_2++; p_img_3++; p_img_4++; p_img_5++; p_img_6++; continue; } }*/ int sum = 0; const kvz_pixel curr = p_img_0[+0]; if (filter_type == ALF_FILTER_7X7) { sum += filter_coeff[0] * (clip_alf(filter_clipp[0], curr, p_img_5[+0], p_img_6[+0])); sum += filter_coeff[1] * (clip_alf(filter_clipp[1], curr, p_img_3[+1], p_img_4[-1])); sum += filter_coeff[2] * (clip_alf(filter_clipp[2], curr, p_img_3[+0], p_img_4[+0])); sum += filter_coeff[3] * (clip_alf(filter_clipp[3], curr, p_img_3[-1], p_img_4[+1])); sum += filter_coeff[4] * (clip_alf(filter_clipp[4], curr, p_img_1[+2], p_img_2[-2])); sum += filter_coeff[5] * (clip_alf(filter_clipp[5], curr, p_img_1[+1], p_img_2[-1])); sum += filter_coeff[6] * (clip_alf(filter_clipp[6], curr, p_img_1[+0], p_img_2[+0])); sum += filter_coeff[7] * (clip_alf(filter_clipp[7], curr, p_img_1[-1], p_img_2[+1])); sum += filter_coeff[8] * (clip_alf(filter_clipp[8], curr, p_img_1[-2], p_img_2[+2])); sum += filter_coeff[9] * (clip_alf(filter_clipp[9], curr, p_img_0[+3], p_img_0[-3])); sum += filter_coeff[10] * (clip_alf(filter_clipp[10], curr, p_img_0[+2], p_img_0[-2])); sum += filter_coeff[11] * (clip_alf(filter_clipp[11], curr, p_img_0[+1], p_img_0[-1])); } else { sum += filter_coeff[0] * (clip_alf(filter_clipp[0], curr, p_img_3[+0], p_img_4[+0])); sum += filter_coeff[1] * (clip_alf(filter_clipp[1], curr, p_img_1[+1], p_img_2[-1])); sum += filter_coeff[2] * (clip_alf(filter_clipp[2], curr, p_img_1[+0], p_img_2[+0])); sum += filter_coeff[3] * (clip_alf(filter_clipp[3], curr, p_img_1[-1], p_img_2[+1])); sum += filter_coeff[4] * (clip_alf(filter_clipp[4], curr, p_img_0[+2], p_img_0[-2])); sum += filter_coeff[5] * (clip_alf(filter_clipp[5], curr, p_img_0[+1], p_img_0[-1])); } sum = (sum + offset) >> shift; sum += curr; p_rec_1[jj] = alf_clip_pixel(sum, clp_rng); p_img_0++; p_img_1++; p_img_2++; p_img_3++; p_img_4++; p_img_5++; p_img_6++; } } } p_rec_0 += dst_stride2; p_rec_1 += dst_stride2; p_img_y_pad_0 += src_stride2; p_img_y_pad_1 += src_stride2; p_img_y_pad_2 += src_stride2; p_img_y_pad_3 += src_stride2; p_img_y_pad_4 += src_stride2; p_img_y_pad_5 += src_stride2; p_img_y_pad_6 += src_stride2; } } //-------------------------CC ALF encoding functions------------------------ void apply_cc_alf_filter(encoder_state_t * const state, alf_component_id comp_id, const kvz_pixel *dst_buf, const kvz_pixel *rec_yuv_ext, const int luma_stride, uint8_t *filter_control, const short filter_set[MAX_NUM_CC_ALF_FILTERS][MAX_NUM_CC_ALF_CHROMA_COEFF], const int selected_filter_idx) { enum kvz_chroma_format chroma_format = state->encoder_control->chroma_format; uint8_t component_scale_y = (comp_id == COMPONENT_Y || chroma_format != KVZ_CSP_420) ? 0 : 1; uint8_t component_scale_x = (comp_id == COMPONENT_Y || chroma_format == KVZ_CSP_444) ? 0 : 1; const int pic_height = state->tile->frame->height; const int pic_width = state->tile->frame->width; const int max_ctu_height_log2 = kvz_math_floor_log2(LCU_WIDTH); const int max_ctu_width_log2 = kvz_math_floor_log2(LCU_WIDTH); const int width_in_ctus = state->tile->frame->width_in_lcu; const int alf_vb_luma_ctu_height = LCU_WIDTH; const int alf_vb_luma_pos = LCU_WIDTH - ALF_VB_POS_ABOVE_CTUROW_LUMA; int ctu_idx = 0; for (int y_pos = 0; y_pos < pic_height; y_pos += LCU_WIDTH) { for (int x_pos = 0; x_pos < pic_width; x_pos += LCU_WIDTH) { int filter_idx = (filter_control == NULL) ? selected_filter_idx : filter_control[(y_pos >> max_ctu_height_log2) * width_in_ctus + (x_pos >> max_ctu_width_log2)]; bool skip_filtering = (filter_control != NULL && filter_idx == 0) ? true : false; if (!skip_filtering) { if (filter_control != NULL) { filter_idx--; } const int16_t *filter_coeff = filter_set[filter_idx]; const int width = (x_pos + LCU_WIDTH > pic_width) ? (pic_width - x_pos) : LCU_WIDTH; const int height = (y_pos + LCU_WIDTH > pic_height) ? (pic_height - y_pos) : LCU_WIDTH; int raster_slice_alf_pad = 0; { filter_blk_cc_alf(state, dst_buf, rec_yuv_ext, luma_stride, comp_id, filter_coeff, g_clp_rngs, alf_vb_luma_ctu_height, alf_vb_luma_pos, x_pos >> component_scale_x, y_pos >> component_scale_y, width >> component_scale_x, height >> component_scale_y); } } ctu_idx++; } } } void setup_cc_alf_aps(encoder_state_t * const state) { if (g_cc_alf_filter_param.cc_alf_filter_enabled[COMPONENT_Cb - 1]) { int cc_alf_cb_aps_id = state->slice->tile_group_cc_alf_cb_aps_id; alf_aps *aps = &state->encoder_control->cfg.param_set_map[cc_alf_cb_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set; if (aps->aps_id >= 0 && aps->aps_id < ALF_CTB_MAX_NUM_APS) { //aps = m_apsMap->allocatePS((cc_alf_cb_aps_id << NUM_APS_TYPE_LEN) + ALF_APS); aps->temporal_id = 0; // cs.slice->getTLayer() } aps->cc_alf_aps_param.cc_alf_filter_enabled[COMPONENT_Cb - 1] = 1; aps->cc_alf_aps_param.cc_alf_filter_count[COMPONENT_Cb - 1] = g_cc_alf_filter_param.cc_alf_filter_count[COMPONENT_Cb - 1]; for ( int filter_idx = 0; filter_idx < MAX_NUM_CC_ALF_FILTERS; filter_idx++ ) { aps->cc_alf_aps_param.cc_alf_filter_idx_enabled[COMPONENT_Cb - 1][filter_idx] = g_cc_alf_filter_param.cc_alf_filter_idx_enabled[COMPONENT_Cb - 1][filter_idx]; memcpy(aps->cc_alf_aps_param.cc_alf_coeff[COMPONENT_Cb - 1][filter_idx], g_cc_alf_filter_param.cc_alf_coeff[COMPONENT_Cb - 1][filter_idx], sizeof(short) * MAX_NUM_CC_ALF_CHROMA_COEFF); } aps->aps_id = cc_alf_cb_aps_id; aps->aps_type = T_ALF_APS; if (g_reuse_aps_id[COMPONENT_Cb - 1] < 0) { aps->cc_alf_aps_param.new_cc_alf_filter[COMPONENT_Cb - 1] = 1; state->encoder_control->cfg.param_set_map[cc_alf_cb_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; aps->temporal_id = 0; // cs.slice->getTLayer() } state->slice->tile_group_cc_alf_cb_enabled_flag = true; } else { state->slice->tile_group_cc_alf_cb_enabled_flag = false; } if (g_cc_alf_filter_param.cc_alf_filter_enabled[COMPONENT_Cr - 1]) { int cc_alf_cr_aps_id = state->slice->tile_group_cc_alf_cr_aps_id; alf_aps *aps = &state->encoder_control->cfg.param_set_map[cc_alf_cr_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set; if (aps->aps_id >= 0 && aps->aps_id < ALF_CTB_MAX_NUM_APS) { //aps = m_apsMap->allocatePS((cc_alf_cb_aps_id << NUM_APS_TYPE_LEN) + ALF_APS); aps->temporal_id = 0; // cs.slice->getTLayer() } aps->cc_alf_aps_param.cc_alf_filter_enabled[COMPONENT_Cr - 1] = 1; aps->cc_alf_aps_param.cc_alf_filter_count[COMPONENT_Cr - 1] = g_cc_alf_filter_param.cc_alf_filter_count[COMPONENT_Cr - 1]; for (int filter_idx = 0; filter_idx < MAX_NUM_CC_ALF_FILTERS; filter_idx++) { aps->cc_alf_aps_param.cc_alf_filter_idx_enabled[COMPONENT_Cr - 1][filter_idx] = g_cc_alf_filter_param.cc_alf_filter_idx_enabled[COMPONENT_Cr - 1][filter_idx]; memcpy(aps->cc_alf_aps_param.cc_alf_coeff[COMPONENT_Cr - 1][filter_idx], g_cc_alf_filter_param.cc_alf_coeff[COMPONENT_Cr - 1][filter_idx], sizeof(short) * MAX_NUM_CC_ALF_CHROMA_COEFF); } aps->aps_id = cc_alf_cr_aps_id; aps->aps_type = T_ALF_APS; if (g_reuse_aps_id[COMPONENT_Cr - 1] < 0) { aps->cc_alf_aps_param.new_cc_alf_filter[COMPONENT_Cr - 1] = 1; state->encoder_control->cfg.param_set_map[cc_alf_cr_aps_id + NUM_APS_TYPE_LEN + T_ALF_APS].b_changed = true; aps->temporal_id = 0; // cs.slice->getTLayer() } state->slice->tile_group_cc_alf_cr_enabled_flag = true; } else { state->slice->tile_group_cc_alf_cr_enabled_flag = false; } } void round_filt_coeff_cc_alf(int16_t *filter_coeff_quant, double *filter_coeff, const int num_coeff, const int factor ) { for( int i = 0; i < num_coeff; i++ ) { int sign = filter_coeff[i] > 0 ? 1 : -1; double best_err = 128.0*128.0; int best_index = 0; for(int k = 0; k < CCALF_CANDS_COEFF_NR; k++) { double err = (filter_coeff[i] * sign * factor - cc_alf_small_tab[k]); err = err*err; if(err < best_err) { best_err = err; best_index = k; } } filter_coeff_quant[i] = cc_alf_small_tab[best_index] * sign; } } int get_coeff_rate_cc_alf(short chroma_coeff[MAX_NUM_CC_ALF_FILTERS][MAX_NUM_CC_ALF_CHROMA_COEFF], bool filter_enabled[MAX_NUM_CC_ALF_FILTERS], uint8_t filter_count, alf_component_id comp_id) { int bits = 0; if ( filter_count > 0 ) { bits += length_uvlc(filter_count - 1); int signaled_filter_count = 0; for ( int filterIdx=0; filterIdx (1 << CCALF_DYNAMIC_RANGE))); // this is not possible: filter_coeff_int[k] > (1 << CCALF_DYNAMIC_RANGE) } // Refine quanitzation int modified = 1; double err_ref = calc_error_for_cc_alf_coeffs(&g_alf_covariance_frame_cc_alf[comp_id - 1][0][filter_idx], filter_coeff_int, size, (7/*m_scaleBits*/+1)); while (modified) { modified = 0; for (int i = 1; i > -2; i -= 2 ) { int delta = i; double err_min = MAX_DOUBLE; int idx_min = -1; int min_index = -1; for (int k = 0; k < size; k++) { int org_idx = -1; for (int i = 0; i < CCALF_CANDS_COEFF_NR * 2 - 1; i++) { if (forward_tab[i] == filter_coeff_int[k]) { org_idx = i; break; } } assert(!(org_idx < 0)); //this is wrong, does not find coeff from forward_tab if ( (org_idx - delta < 0) || (org_idx - delta >= CCALF_CANDS_COEFF_NR * 2 - 1) ) continue; filter_coeff_int[k] = forward_tab[org_idx - delta]; double error = calc_error_for_cc_alf_coeffs(&g_alf_covariance_frame_cc_alf[comp_id - 1][0][filter_idx], filter_coeff_int, size, (7/*m_scaleBits*/+1)); if( error < err_min ) { err_min = error; idx_min = k; min_index = org_idx; } filter_coeff_int[k] = forward_tab[org_idx]; } if (err_min < err_ref) { min_index -= delta; assert(!(min_index < 0));// this is wrong, index - delta < 0 assert(!(min_index >= CCALF_CANDS_COEFF_NR * 2 - 1)); // this is wrong, index - delta >= CCALF_CANDS_COEFF_NR * 2 - 1 filter_coeff_int[idx_min] = forward_tab[min_index]; modified++; err_ref = err_min; } } } for (int k = 0; k < (size + 1); k++) { assert(!((filter_coeff_int[k] < -(1 << CCALF_DYNAMIC_RANGE)) || (filter_coeff_int[k] > (1 << CCALF_DYNAMIC_RANGE)))); //Exceeded valid range for CC ALF coefficient filter_coeff[filter_idx][k] = filter_coeff_int[k]; } } void determine_control_idc_values(encoder_state_t *const state, const alf_component_id comp_id, const int ctu_width_c, const int ctu_height_c, const int pic_width_c, const int pic_height_c, double **unfiltered_distortion, uint64_t *training_distortion[MAX_NUM_CC_ALF_FILTERS], uint64_t *luma_swing_greater_than_threshold_count, uint64_t *chroma_sample_count_near_mid_point, bool reuse_temporal_filter_coeff, uint8_t *training_cov_control, uint8_t *filter_control, uint64_t *cur_total_distortion, double *cur_total_rate, bool filter_enabled[MAX_NUM_CC_ALF_FILTERS], uint8_t map_filter_idx_to_filter_idc[MAX_NUM_CC_ALF_FILTERS + 1], uint8_t *cc_alf_filter_count) { bool cur_filter_enabled[MAX_NUM_CC_ALF_FILTERS]; //std::fill_n(cur_filter_enabled, MAX_NUM_CC_ALF_FILTERS, false); memset(cur_filter_enabled, false, sizeof(cur_filter_enabled)); #if MAX_NUM_CC_ALF_FILTERS>1 filter_idx_count filter_idx_count[MAX_NUM_CC_ALF_FILTERS]; for (int i = 0; i < MAX_NUM_CC_ALF_FILTERS; i++) { filter_idx_count[i].count = 0; filter_idx_count[i].filter_idx = i; } double prev_rate = (*cur_total_rate); #endif cabac_data_t ctx_initial; cabac_data_t ctx_best; cabac_data_t ctx_start; memcpy(&ctx_initial, &cabac_estimator, sizeof(ctx_initial)); memcpy(&ctx_best, &cabac_estimator, sizeof(ctx_best)); cabac_estimator.only_count = 1; ctx_initial.only_count = 1; ctx_best.only_count = 1; ctx_start.only_count = 1; enum kvz_chroma_format chroma_format = state->encoder_control->chroma_format; uint8_t component_scale_y = (comp_id == COMPONENT_Y || chroma_format != KVZ_CSP_420) ? 0 : 1; uint8_t component_scale_x = (comp_id == COMPONENT_Y || chroma_format == KVZ_CSP_444) ? 0 : 1; double lambda = state->frame->lambda; bool limit_cc_alf = state->encoder_control->cfg.qp >= 37; int ctu_idx = 0; for (int y_ctu = 0; y_ctu < pic_height_c; y_ctu += ctu_height_c) { for (int x_ctu = 0; x_ctu < pic_width_c; x_ctu += ctu_width_c) { uint64_t ssd; double rate; double cost; uint64_t best_ssd = (MAX_INT64 >> 1); double best_rate = MAX_DOUBLE; double best_cost = MAX_DOUBLE; uint8_t best_filter_idc = 0; uint8_t best_filter_idx = 0; const uint32_t threshold_s = MIN(pic_height_c - y_ctu, ctu_height_c) << (chroma_format != KVZ_CSP_420 ? 0 : 1); const uint32_t number_of_chroma_samples = MIN(pic_height_c - y_ctu, ctu_height_c) * MIN(pic_height_c - x_ctu, ctu_width_c); const uint32_t threshold_c = (number_of_chroma_samples >> 2); memcpy(&cabac_estimator, &ctx_best, sizeof(cabac_estimator)); memcpy(&ctx_start, &cabac_estimator, sizeof(ctx_start)); for (int filter_idx = 0; filter_idx <= MAX_NUM_CC_ALF_FILTERS; filter_idx++) { uint8_t filter_idc = map_filter_idx_to_filter_idc[filter_idx]; if (filter_idx < MAX_NUM_CC_ALF_FILTERS && !filter_enabled[filter_idx]) { continue; } if (filter_idx == MAX_NUM_CC_ALF_FILTERS) { ssd = (uint64_t)unfiltered_distortion[comp_id][ctu_idx]; // restore saved distortion computation } else { ssd = training_distortion[filter_idx][ctu_idx]; } memcpy(&cabac_estimator, &ctx_start, sizeof(cabac_estimator)); kvz_cabac_reset_bits(&cabac_estimator); //const Position lumaPos = Position({ xCtu << getComponentScaleX(comp_id, cs.pcv->chrFormat), // yCtu << getComponentScaleY(comp_id, cs.pcv->chrFormat) }); code_cc_alf_filter_control_idc(state, &cabac_estimator, filter_idc, comp_id, ctu_idx, filter_control, *cc_alf_filter_count); //rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); rate = (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); cost = rate * lambda + ssd; bool limitation_exceeded = false; if (limit_cc_alf && filter_idx < MAX_NUM_CC_ALF_FILTERS) { limitation_exceeded = limitation_exceeded || (luma_swing_greater_than_threshold_count[ctu_idx] >= threshold_s); limitation_exceeded = limitation_exceeded || (chroma_sample_count_near_mid_point[ctu_idx] >= threshold_c); } if (cost < best_cost && !limitation_exceeded) { best_cost = cost; best_rate = rate; best_ssd = ssd; best_filter_idc = filter_idc; best_filter_idx = filter_idx; //ctx_best = SubCtx(Ctx::CcAlfFilterControlFlag, m_CABACEstimator->getCtx()); memcpy(&ctx_best, &cabac_estimator, sizeof(ctx_best)); training_cov_control[ctu_idx] = (filter_idx == MAX_NUM_CC_ALF_FILTERS) ? 0 : (filter_idx + 1); filter_control[ctu_idx] = (filter_idx == MAX_NUM_CC_ALF_FILTERS) ? 0 : (filter_idx + 1); } } if (best_filter_idc != 0) { cur_filter_enabled[best_filter_idx] = true; #if MAX_NUM_CC_ALF_FILTERS>1 filter_idx_count[best_filter_idx].count++; #endif } (*cur_total_rate) += best_rate; (*cur_total_distortion) += best_ssd; ctu_idx++; } } #if MAX_NUM_CC_ALF_FILTERS>1 if (!reuse_temporal_filter_coeff) { memcpy(cur_filter_enabled, filter_enabled, sizeof(cur_filter_enabled)); qsort(filter_idx_count, MAX_NUM_CC_ALF_FILTERS, sizeof(*filter_idx_count), comparator); int filter_idc = 1; (*cc_alf_filter_count) = 0; for (int i = 0; i < MAX_NUM_CC_ALF_FILTERS; i++) { const int filter_idx = filter_idx_count[i].filter_idx; if (filter_enabled[filter_idx]) { map_filter_idx_to_filter_idc[filter_idx] = filter_idc; filter_idc++; (*cc_alf_filter_count)++; } } (*cur_total_rate) = prev_rate; //m_CABACEstimator->getCtx() = ctx_initial; memcpy(&cabac_estimator, &ctx_initial, sizeof(cabac_estimator)); //m_CABACEstimator->resetBits(); kvz_cabac_reset_bits(&cabac_estimator); int ctu_idx = 0; for (int y = 0; y < pic_height_c; y += ctu_height_c) { for (int x = 0; x < pic_width_c; x += ctu_width_c) { const int filter_idx_plus1 = filter_control[ctu_idx]; //const Position lumaPos = Position( // { x << getComponentScaleX(comp_id, cs.pcv->chrFormat), y << getComponentScaleY(comp_id, cs.pcv->chrFormat) }); code_cc_alf_filter_control_idc(state, &cabac_estimator, (filter_idx_plus1 == 0 ? 0 : map_filter_idx_to_filter_idc[filter_idx_plus1 - 1]), comp_id, ctu_idx, filter_control, *cc_alf_filter_count); ctu_idx++; } } (*cur_total_rate) += (23 - cabac_estimator.bits_left) + (cabac_estimator.num_buffered_bytes << 3); } #endif // restore for next iteration //m_CABACEstimator->getCtx() = ctx_initial; memcpy(&cabac_estimator, &ctx_initial, sizeof(cabac_estimator)); } void get_available_cc_alf_aps_ids(encoder_state_t *const state, alf_component_id compID, int *aps_ids_size, int *aps_ids) { alf_aps* apss = state->slice->apss; for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { copy_aps(&apss[i], &state->encoder_control->cfg.param_set_map[i + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set); } int aps_id_checked = 0, cur_aps_id = g_aps_id_start; if (cur_aps_id < ALF_CTB_MAX_NUM_APS) { while (aps_id_checked < ALF_CTB_MAX_NUM_APS && state->frame->slicetype != KVZ_SLICE_I && (*aps_ids_size) < ALF_CTB_MAX_NUM_APS && !(state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_IDR_N_LP)) //&& !cs.slice->getPendingRasInit() { alf_aps cur_aps = state->slice->apss[cur_aps_id]; bool aps_found = (0 <= cur_aps.aps_id && cur_aps.aps_id < ALF_CTB_MAX_NUM_APS); if (aps_found && cur_aps.temporal_id <= state->slice->id && cur_aps.cc_alf_aps_param.new_cc_alf_filter[compID - 1]) { aps_ids[(*aps_ids_size)] = cur_aps_id; (*aps_ids_size) += 1; } aps_id_checked++; cur_aps_id = (cur_aps_id + 1) % ALF_CTB_MAX_NUM_APS; } } } void derive_cc_alf_filter(encoder_state_t * const state, alf_component_id comp_id, const kvz_picture *org_yuv, const kvz_picture *rec_dst_yuv ) { if (!state->slice->tile_group_alf_enabled_flag[COMPONENT_Y]) { g_cc_alf_filter_param.cc_alf_filter_enabled[comp_id - 1] = false; return; } bool limit_cc_alf = state->encoder_control->cfg.qp >= 37; // m_encCfg->getCCALFQpThreshold(); if (limit_cc_alf) // && state->slice. cs.slice->getSliceQp() <= m_encCfg->getBaseQP() + 1) { g_cc_alf_filter_param.cc_alf_filter_enabled[comp_id - 1] = false; return; } enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; uint8_t best_map_filter_idx_to_filter_idc[MAX_NUM_CC_ALF_FILTERS+1]; bool scale_x = (comp_id == COMPONENT_Y || chroma_fmt == KVZ_CSP_444) ? 0 : 1; bool scale_y = (comp_id == COMPONENT_Y || chroma_fmt != KVZ_CSP_420) ? 0 : 1; const int ctu_width_c = LCU_WIDTH >> scale_x; const int ctu_height_c = LCU_WIDTH >> scale_y; const int pic_width_c = state->tile->frame->width >> scale_x; const int pic_height_c = state->tile->frame->height >> scale_y; const int pic_stride_c = rec_dst_yuv->stride >> scale_x; const int8_t bit_depth = state->encoder_control->bitdepth; const int max_training_iter_count = 15; int max_ctu_height_log2 = kvz_math_floor_log2(LCU_WIDTH); int max_ctu_height_log2_chrma = kvz_math_floor_log2(LCU_WIDTH) >> scale_y; int max_ctu_width_log2 = kvz_math_floor_log2(LCU_WIDTH); int max_ctu_width_log2_chrma = kvz_math_floor_log2(LCU_WIDTH) >> scale_x; int32_t ctus_in_width = state->tile->frame->width_in_lcu; const uint32_t num_ctus_in_pic = state->lcu_order_count; short best_filter_coeff_set[MAX_NUM_CC_ALF_FILTERS][MAX_NUM_CC_ALF_CHROMA_COEFF]; bool best_filter_idx_enabled[MAX_NUM_CC_ALF_FILTERS]; uint8_t best_filter_count = 0; double lambda = state->frame->lambda; if (limit_cc_alf) { count_luma_swing_greater_than_threshold(rec_dst_yuv->y, rec_dst_yuv->stride, rec_dst_yuv->height, rec_dst_yuv->width, max_ctu_width_log2, max_ctu_height_log2, g_luma_swing_greater_than_threshold_count, ctus_in_width, bit_depth); } if (limit_cc_alf) { if (comp_id == COMPONENT_Cb) { count_luma_swing_greater_than_threshold(rec_dst_yuv->u, pic_stride_c, pic_height_c, pic_width_c, max_ctu_width_log2_chrma, max_ctu_height_log2_chrma, g_luma_swing_greater_than_threshold_count, ctus_in_width, bit_depth); } else if (comp_id == COMPONENT_Cr) { count_luma_swing_greater_than_threshold(rec_dst_yuv->v, pic_stride_c, pic_height_c, pic_width_c, max_ctu_width_log2_chrma, max_ctu_height_log2_chrma, g_luma_swing_greater_than_threshold_count, ctus_in_width, bit_depth); } else { assert(false); // Component ID not allowed. } } for ( int filter_idx = 0; filter_idx <= MAX_NUM_CC_ALF_FILTERS; filter_idx++ ) { if ( filter_idx < MAX_NUM_CC_ALF_FILTERS) { memset(best_filter_coeff_set[filter_idx], 0, sizeof(best_filter_coeff_set[filter_idx]) ); best_map_filter_idx_to_filter_idc[filter_idx] = filter_idx + 1; } else { best_map_filter_idx_to_filter_idc[filter_idx] = 0; } } memset(g_best_filter_control, 0, sizeof(uint8_t) * num_ctus_in_pic); int cc_alf_reuse_aps_id = -1; g_reuse_aps_id[comp_id - 1] = -1; //const TempCtx ctxStartCcAlfFilterControlFlag(m_CtxCache, SubCtx(Ctx::CcAlfFilterControlFlag, m_CABACEstimator->getCtx())); cabac_data_t ctx_start_cc_alf_filter_control_flag; memcpy(&ctx_start_cc_alf_filter_control_flag, &cabac_estimator, sizeof(ctx_start_cc_alf_filter_control_flag)); ctx_start_cc_alf_filter_control_flag.only_count = 1; // compute cost of not filtering uint64_t unfiltered_distortion = 0; for (int ctb_idx = 0; ctb_idx < num_ctus_in_pic; ctb_idx++) { unfiltered_distortion += (uint64_t)g_alf_covariance_cc_alf[comp_id - 1][0][0][ctb_idx].pix_acc; } double best_unfiltered_total_cost = 1 * lambda + unfiltered_distortion; // 1 bit is for gating flag bool cc_alf_filter_idx_enabled[MAX_NUM_CC_ALF_FILTERS]; short cc_alf_filter_coeff[MAX_NUM_CC_ALF_FILTERS][MAX_NUM_CC_ALF_CHROMA_COEFF]; uint8_t cc_alf_filter_count = MAX_NUM_CC_ALF_FILTERS; double best_filtered_total_cost = MAX_DOUBLE; bool best_reuse_temporal_filter_coeff = false; int training_iter_count = 0; bool keep_training = true; bool improvement = false; double prev_total_cost = MAX_DOUBLE; const int num_coeff = 8/*g_filter_shapes_cc_alf[comp_id - 1][0].num_coeff*/ - 1; int log2_block_width = max_ctu_width_log2 - scale_x; int log2_block_height = max_ctu_height_log2 - scale_y; uint64_t cur_total_distortion = 0; double cur_total_rate = 0; int aps_ids_size = 0; int aps_ids[ALF_CTB_MAX_NUM_APS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; get_available_cc_alf_aps_ids(state, comp_id, &aps_ids_size, aps_ids); for (int test_filter_idx = 0; test_filter_idx < (aps_ids_size + 1 ); test_filter_idx++ ) { bool referencing_existing_aps = (test_filter_idx < aps_ids_size) ? true : false; int max_number_of_filters_being_tested = MAX_NUM_CC_ALF_FILTERS - (test_filter_idx - aps_ids_size); if (max_number_of_filters_being_tested < 0) { max_number_of_filters_being_tested = 1; } { // Instead of rewriting the control buffer for every training iteration just keep a mapping from filterIdx to filterIdc uint8_t map_filter_idx_to_filter_idc[MAX_NUM_CC_ALF_FILTERS + 1]; for (int filter_idx = 0; filter_idx <= MAX_NUM_CC_ALF_FILTERS; filter_idx++) { if (filter_idx == MAX_NUM_CC_ALF_FILTERS) { map_filter_idx_to_filter_idc[filter_idx] = 0; } else { map_filter_idx_to_filter_idc[filter_idx] = filter_idx + 1; } } // initialize filters for ( int filter_idx = 0; filter_idx < MAX_NUM_CC_ALF_FILTERS; filter_idx++ ) { cc_alf_filter_idx_enabled[filter_idx] = false; memset(cc_alf_filter_coeff[filter_idx], 0, sizeof(cc_alf_filter_coeff[filter_idx])); } if ( referencing_existing_aps ) { max_number_of_filters_being_tested = state->encoder_control->cfg.param_set_map[aps_ids[test_filter_idx] + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set.cc_alf_aps_param.cc_alf_filter_count[comp_id - 1]; cc_alf_filter_count = max_number_of_filters_being_tested; for (int filter_idx = 0; filter_idx < max_number_of_filters_being_tested; filter_idx++) { cc_alf_filter_idx_enabled[filter_idx] = true; memcpy(cc_alf_filter_coeff[filter_idx], g_cc_alf_filter_param.cc_alf_coeff[comp_id - 1][filter_idx], sizeof(cc_alf_filter_coeff[filter_idx])); } memcpy( cc_alf_filter_coeff, state->encoder_control->cfg.param_set_map[aps_ids[test_filter_idx] + NUM_APS_TYPE_LEN + T_ALF_APS].parameter_set.cc_alf_aps_param.cc_alf_coeff[comp_id - 1], sizeof(cc_alf_filter_coeff) ); } else { for (int i = 0; i < max_number_of_filters_being_tested; i++) { cc_alf_filter_idx_enabled[i] = true; } cc_alf_filter_count = max_number_of_filters_being_tested; } // initialize int control_idx = 0; const int column_size = (pic_width_c / max_number_of_filters_being_tested); for (int y = 0; y < pic_height_c; y += ctu_height_c) { for (int x = 0; x < pic_width_c; x += ctu_width_c) { g_training_cov_control[control_idx] = ( x / column_size ) + 1; control_idx++; } } // compute cost of filtering training_iter_count = 0; keep_training = true; improvement = false; prev_total_cost = MAX_DOUBLE; while (keep_training) { improvement = false; for (int filter_idx = 0; filter_idx < max_number_of_filters_being_tested; filter_idx++) { if (cc_alf_filter_idx_enabled[filter_idx]) { if (!referencing_existing_aps) { get_frame_stats_cc_alf(comp_id, (filter_idx + 1), state->lcu_order_count); derive_cc_alf_filter_coeff(comp_id, cc_alf_filter_coeff, filter_idx); } for (int y = 0; y < pic_height_c; y += (1 << log2_block_height)) { for (int x = 0; x < pic_width_c; x += (1 << log2_block_width)) { int ctu_idx = (y >> log2_block_height) * ctus_in_width + (x >> log2_block_width); g_training_distortion[filter_idx][ctu_idx] = (int)(g_ctb_distortion_unfilter[comp_id][ctu_idx] + calc_error_for_cc_alf_coeffs(&g_alf_covariance_cc_alf[comp_id - 1][0][0][ctu_idx], cc_alf_filter_coeff[filter_idx], num_coeff, 7 + 1)); } } } } memcpy(&cabac_estimator, &ctx_start_cc_alf_filter_control_flag, sizeof(cabac_estimator)); cur_total_distortion = 0; cur_total_rate = 0; determine_control_idc_values(state, comp_id, ctu_width_c, ctu_height_c, pic_width_c, pic_height_c, g_ctb_distortion_unfilter, g_training_distortion, g_luma_swing_greater_than_threshold_count, g_chroma_sample_count_near_mid_point, (referencing_existing_aps == true), g_training_cov_control, g_filter_control, &cur_total_distortion, &cur_total_rate, cc_alf_filter_idx_enabled, map_filter_idx_to_filter_idc, &cc_alf_filter_count); // compute coefficient coding bit cost if (cc_alf_filter_count > 0) { if (referencing_existing_aps) { cur_total_rate += 1 + 3; // +1 for enable flag, +3 APS ID in slice header } else { cur_total_rate += get_coeff_rate_cc_alf(cc_alf_filter_coeff, cc_alf_filter_idx_enabled, cc_alf_filter_count, comp_id) + 1 + 9; // +1 for the enable flag, +9 3-bit for APS ID in slice header, 5-bit for APS ID in APS, a 1-bit // new filter flags (ignore shared cost such as other new-filter flags/NALU header/RBSP // terminating bit/byte alignment bits)*/ } double cur_total_cost = cur_total_rate * lambda + cur_total_distortion; if (cur_total_cost < prev_total_cost) { prev_total_cost = cur_total_cost; improvement = true; } if (cur_total_cost < best_filtered_total_cost) { best_filtered_total_cost = cur_total_cost; memcpy(best_filter_idx_enabled, cc_alf_filter_idx_enabled, sizeof(cc_alf_filter_idx_enabled)); memcpy(best_filter_coeff_set, cc_alf_filter_coeff, sizeof(cc_alf_filter_coeff)); memcpy(g_best_filter_control, g_filter_control, sizeof(uint8_t) * num_ctus_in_pic); best_filter_count = cc_alf_filter_count; cc_alf_reuse_aps_id = referencing_existing_aps ? aps_ids[test_filter_idx] : -1; memcpy(best_map_filter_idx_to_filter_idc, map_filter_idx_to_filter_idc, sizeof(map_filter_idx_to_filter_idc)); } } training_iter_count++; if (!improvement || training_iter_count > max_training_iter_count || referencing_existing_aps) { keep_training = false; } } } } if (best_unfiltered_total_cost < best_filtered_total_cost) { memset(g_best_filter_control, 0, sizeof(uint8_t) * num_ctus_in_pic); } // save best coeff and control bool atleast_one_block_undergoes_fitlering = false; for (int controlIdx = 0; best_filter_count > 0 && controlIdx < num_ctus_in_pic; controlIdx++) { if (g_best_filter_control[controlIdx]) { atleast_one_block_undergoes_fitlering = true; break; } } g_cc_alf_filter_param.number_valid_components = (chroma_fmt == KVZ_CSP_400) ? 1 : MAX_NUM_COMPONENT; g_cc_alf_filter_param.cc_alf_filter_enabled[comp_id - 1] = atleast_one_block_undergoes_fitlering; if (atleast_one_block_undergoes_fitlering) { // update the filter control indicators if (best_reuse_temporal_filter_coeff!=1) { short stored_best_filter_coeff_set[MAX_NUM_CC_ALF_FILTERS][MAX_NUM_CC_ALF_CHROMA_COEFF]; for (int filter_idx=0; filter_idx= 0 ) { g_reuse_aps_id[comp_id - 1] = cc_alf_reuse_aps_id; if (comp_id == COMPONENT_Cb) { state->slice->tile_group_cc_alf_cb_aps_id == cc_alf_reuse_aps_id; } else { state->slice->tile_group_cc_alf_cr_aps_id == cc_alf_reuse_aps_id; } } } } void derive_stats_for_cc_alf_filtering(encoder_state_t * const state, const kvz_picture *org_yuv, const int comp_idx, const int mask_stride, const uint8_t filter_idc) { const int32_t num_ctus_in_pic = state->lcu_order_count; const int filter_idx = filter_idc - 1; // init CTU stats buffers for( int shape = 0; shape != 1/*m_filterShapesCcAlf[comp_idx-1].size()*/; shape++ ) { for (int ctu_idx = 0; ctu_idx < num_ctus_in_pic; ctu_idx++) { reset_alf_covariance(&g_alf_covariance_cc_alf[comp_idx - 1][shape][filter_idx][ctu_idx], -1); } } // init Frame stats buffers for (int shape = 0; shape != 1/*m_filterShapesCcAlf[comp_idx - 1].size()*/; shape++) { reset_alf_covariance(&g_alf_covariance_frame_cc_alf[comp_idx - 1][shape][filter_idx], -1); } int ctu_rs_addr = 0; const int frame_height = state->tile->frame->height; const int frame_width = state->tile->frame->width; const int max_cu_width = LCU_WIDTH; const int max_cu_height = LCU_WIDTH; for (int y_pos = 0; y_pos < state->tile->frame->height; y_pos += max_cu_height) { for (int x_pos = 0; x_pos < state->tile->frame->width; x_pos += max_cu_width) { const int width = (x_pos + max_cu_width > frame_width) ? (frame_width - x_pos) : max_cu_width; const int height = (y_pos + max_cu_height > frame_height) ? (frame_height - y_pos) : max_cu_height; int raster_slice_alf_pad = 0; { for (int shape = 0; shape != 1/*m_filterShapesCcAlf[comp_idx - 1].size()*/; shape++) { get_blk_stats_cc_alf(state, &g_alf_covariance_cc_alf[comp_idx - 1][0][filter_idx][ctu_rs_addr], org_yuv, comp_idx, x_pos, y_pos, width, height); add_alf_cov(&g_alf_covariance_frame_cc_alf[comp_idx - 1][shape][filter_idx], &g_alf_covariance_cc_alf[comp_idx - 1][shape][filter_idx][ctu_rs_addr]); } } ctu_rs_addr++; } } } void get_blk_stats_cc_alf(encoder_state_t * const state, alf_covariance *alf_covariance, const kvz_picture *org_yuv, const alf_component_id comp_id, const int x_pos, const int y_pos, const int width, const int height) { enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; bool chroma_scale_x = (chroma_fmt == KVZ_CSP_444) ? 0 : 1; bool chroma_scale_y = (chroma_fmt != KVZ_CSP_420) ? 0 : 1; const int frame_height = state->tile->frame->height; const int alf_vb_luma_pos = LCU_WIDTH - ALF_VB_POS_ABOVE_CTUROW_LUMA; const int alf_vb_luma_ctu_height = LCU_WIDTH; const int max_cu_height = LCU_WIDTH; const int x_pos_c = x_pos >> chroma_scale_x; const int y_pos_c = y_pos >> chroma_scale_y; const int c_width = width >> chroma_scale_x; const int c_height = height >> chroma_scale_y; const int num_coeff = 8; const channel_type channel = (comp_id == COMPONENT_Y) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA; enum kvz_chroma_format chroma_format = state->encoder_control->chroma_format; const int number_of_components = (chroma_format == KVZ_CSP_400) ? 1 : MAX_NUM_COMPONENT;; int rec_stride[MAX_NUM_COMPONENT]; int rec_pixel_idx[MAX_NUM_COMPONENT]; const int luma_rec_pos = y_pos * state->tile->frame->rec->stride + x_pos; const int chroma_rec_pos = y_pos_c * (state->tile->frame->rec->stride >> chroma_scale_x) + x_pos_c; kvz_pixel *rec_y = &alf_tmp_y[luma_rec_pos]; kvz_pixel *rec_u = &alf_tmp_u[chroma_rec_pos]; kvz_pixel *rec_v = &alf_tmp_v[chroma_rec_pos]; for (int c_idx = 0; c_idx < number_of_components; c_idx++) { bool is_luma = c_idx == COMPONENT_Y; rec_stride[c_idx] = state->tile->frame->rec->stride >> (is_luma ? 0 : chroma_scale_x); rec_pixel_idx[c_idx] = 0; } int org_stride = 0; const kvz_pixel *org = 0; if (comp_id == COMPONENT_Y) { org_stride = org_yuv->stride; org = &org_yuv->y[y_pos*org_stride + x_pos]; } else if (comp_id == COMPONENT_Cb) { org_stride = org_yuv->stride >> chroma_scale_x; org = &org_yuv->u[y_pos_c*org_stride + x_pos_c]; } else if (comp_id == COMPONENT_Cr) { org_stride = org_yuv->stride >> chroma_scale_x; org = &org_yuv->v[y_pos_c*org_stride + x_pos_c]; } const int num_bins = 1; int vb_ctu_height = alf_vb_luma_ctu_height; int vb_pos = alf_vb_luma_pos; if ((y_pos + max_cu_height) >= frame_height) { vb_pos = frame_height; } int32_t e_local[MAX_NUM_CC_ALF_CHROMA_COEFF][1]; kvz_pixel *rec_pixels = (comp_id == COMPONENT_Y ? rec_y : (comp_id == COMPONENT_Cb ? rec_u : rec_v)); uint8_t component_scale_y = (comp_id == COMPONENT_Y || chroma_format != KVZ_CSP_420) ? 0 : 1; uint8_t component_scale_x = (comp_id == COMPONENT_Y || chroma_format == KVZ_CSP_444) ? 0 : 1; int16_t y_local = 0; for (int i = 0; i < (comp_id == COMPONENT_Y ? height : c_height); i++) { int vb_distance = ((i << component_scale_y) % vb_ctu_height) - vb_pos; const bool skip_this_row = (component_scale_y == 0 && (vb_distance == 0 || vb_distance == 1)); for (int j = 0; j < (comp_id == COMPONENT_Y ? width : c_width) && (!skip_this_row); j++) { memset(e_local, 0, sizeof(e_local)); double weight = 1.0; if (0 /*g_alf_wssd*/) { //weight = m_lumaLevelToWeightPLUT[org[j]]; } y_local = org[j] - rec_pixels[j + rec_pixel_idx[comp_id]]; calc_covariance_cc_alf(e_local, rec_y + rec_pixel_idx[COMPONENT_Y] + (j << component_scale_x), rec_stride[COMPONENT_Y], vb_distance); for (int k = 0; k < (num_coeff - 1); k++) { for (int l = k; l < (num_coeff - 1); l++) { for (int b0 = 0; b0 < num_bins; b0++) { for (int b1 = 0; b1 < num_bins; b1++) { if (0 /*g_alf_wssd*/) { alf_covariance->ee[b0][b1][k][l] += weight * (e_local[k][b0] * (double)e_local[l][b1]); } else { alf_covariance->ee[b0][b1][k][l] += e_local[k][b0] * (double)e_local[l][b1]; } } } } for (int b = 0; b < num_bins; b++) { if (0 /*g_alf_wssd*/) { alf_covariance->y[b][k] += weight * (e_local[k][b] * (double)y_local); } else { alf_covariance->y[b][k] += e_local[k][b] * (double)y_local; } } } if (0 /*g_alf_wssd*/) { alf_covariance->pix_acc += weight * (y_local * (double)y_local); } else { alf_covariance->pix_acc += y_local * (double)y_local; } } org += org_stride; for (int src_c_idx = 0; src_c_idx < number_of_components; src_c_idx++) { const channel_type c_channel = (src_c_idx == COMPONENT_Y) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA; if (c_channel == channel) { rec_pixel_idx[src_c_idx] += rec_stride[src_c_idx]; } else { if (comp_id == COMPONENT_Y) { rec_pixel_idx[src_c_idx] += rec_stride[src_c_idx] >> ((src_c_idx == COMPONENT_Y || chroma_format != KVZ_CSP_420) ? 0 : 1); } else { rec_pixel_idx[src_c_idx] += rec_stride[src_c_idx] << ((comp_id == COMPONENT_Y || chroma_format != KVZ_CSP_420) ? 0 : 1); } } } } for (int k = 1; k < (MAX_NUM_CC_ALF_CHROMA_COEFF - 1); k++) { for (int l = 0; l < k; l++) { for (int b0 = 0; b0 < num_bins; b0++) { for (int b1 = 0; b1 < num_bins; b1++) { alf_covariance->ee[b0][b1][k][l] = alf_covariance->ee[b1][b0][l][k]; } } } } } void calc_covariance_cc_alf(int32_t e_local[MAX_NUM_CC_ALF_CHROMA_COEFF][1], const kvz_pixel *rec, const int stride, int vb_distance) { const kvz_pixel *rec_y_m1 = rec - 1 * stride; const kvz_pixel *rec_y_0 = rec; const kvz_pixel *rec_y_p1 = rec + 1 * stride; const kvz_pixel *rec_y_p2 = rec + 2 * stride; if (vb_distance == -2 || vb_distance == +1) { rec_y_p2 = rec_y_p1; } else if (vb_distance == -1 || vb_distance == 0) { rec_y_m1 = rec_y_0; rec_y_p2 = rec_y_p1 = rec_y_0; } const kvz_pixel center_value = rec_y_0[+0]; for (int b = 0; b < 1; b++) { e_local[0][b] += rec_y_m1[+0] - center_value; e_local[1][b] += rec_y_0[-1] - center_value; e_local[2][b] += rec_y_0[+1] - center_value; e_local[3][b] += rec_y_p1[-1] - center_value; e_local[4][b] += rec_y_p1[+0] - center_value; e_local[5][b] += rec_y_p1[+1] - center_value; e_local[6][b] += rec_y_p2[+0] - center_value; } } void count_luma_swing_greater_than_threshold(const kvz_pixel* luma, int luma_stride, int height, int width, int log2_block_width, int log2_block_height, uint64_t* luma_swing_greater_than_threshold_count, int luma_count_stride, int8_t input_bit_depth) { const int threshold = (1 << (input_bit_depth - 2 )) - 1; // 3x4 Diamond int x_support[] = { 0, -1, 0, 1, -1, 0, 1, 0 }; int y_support[] = { -1, 0, 0, 0, 1, 1, 1, 2 }; for (int y = 0; y < height; y += (1 << log2_block_height)) { for (int x = 0; x < width; x += (1 << log2_block_width)) { luma_swing_greater_than_threshold_count[(y >> log2_block_height) * luma_count_stride + (x >> log2_block_width)] = 0; for (int y_off = 0; y_off < (1 << log2_block_height); y_off++) { for (int x_off = 0; x_off < (1 << log2_block_width); x_off++) { if ((y + y_off) >= (height - 2) || (x + x_off) >= (width - 1) || (y + y_off) < 1 || (x + x_off) < 1) // only consider samples that are fully supported by picture { continue; } int min_val = ((1 << input_bit_depth) - 1); int max_val = 0; for (int i = 0; i < 8; i++) { kvz_pixel p = luma[(y_off + y_support[i]) * luma_stride + x + x_off + x_support[i]]; if ( p < min_val ) { min_val = p; } if ( p > max_val ) { max_val = p; } } if ((max_val - min_val) > threshold) { luma_swing_greater_than_threshold_count[(y >> log2_block_height) * luma_count_stride + (x >> log2_block_width)]++; } } } } luma += (luma_stride << log2_block_height); } } /* void EncAdaptiveLoopFilter::countChromaSampleValueNearMidPoint(const Pel* chroma, int chromaStride, int height, int width, int log2BlockWidth, int log2BlockHeight, uint64_t* chromaSampleCountNearMidPoint, int chromaSampleCountNearMidPointStride) { const int midPoint = (1 << m_inputBitDepth[CH_C]) >> 1; const int threshold = 16; for (int y = 0; y < height; y += (1 << log2BlockHeight)) { for (int x = 0; x < width; x += (1 << log2BlockWidth)) { chromaSampleCountNearMidPoint[(y >> log2BlockHeight)* chromaSampleCountNearMidPointStride + (x >> log2BlockWidth)] = 0; for (int yOff = 0; yOff < (1 << log2BlockHeight); yOff++) { for (int xOff = 0; xOff < (1 << log2BlockWidth); xOff++) { if ((y + yOff) >= height || (x + xOff) >= width) { continue; } int distanceToMidPoint = abs(chroma[yOff * chromaStride + x + xOff] - midPoint); if (distanceToMidPoint < threshold) { chromaSampleCountNearMidPoint[(y >> log2BlockHeight)* chromaSampleCountNearMidPointStride + (x >> log2BlockWidth)]++; } } } } chroma += (chromaStride << log2BlockHeight); } } */ void init_distortion_cc_alf(const int num_ctus) { for (int comp = 1; comp < MAX_NUM_COMPONENT; comp++) { for (int ctb_idx = 0; ctb_idx < num_ctus; ctb_idx++) { g_ctb_distortion_unfilter[comp][ctb_idx] = g_alf_covariance_cc_alf[comp - 1][0][0][ctb_idx].pix_acc; } } } void get_frame_stats_cc_alf(alf_component_id comp_idx, int filter_idc, const int num_ctus_in_frame) { const int filter_idx = filter_idc - 1; // init Frame stats buffers for (int shape = 0; shape != 1/*m_filterShapesCcAlf[comp_idx - 1].size()*/; shape++) { reset_alf_covariance(&g_alf_covariance_frame_cc_alf[comp_idx - 1][shape][filter_idx], -1); } for (int ctu_rs_addr = 0; ctu_rs_addr < num_ctus_in_frame; ctu_rs_addr++) { if (g_training_cov_control[ctu_rs_addr] == filter_idc) { for (int shape = 0; shape != 1/*m_filterShapesCcAlf[comp_idx - 1].size()*/; shape++) { add_alf_cov(&g_alf_covariance_frame_cc_alf[comp_idx - 1][shape][filter_idx], &g_alf_covariance_cc_alf[comp_idx - 1][shape][0][ctu_rs_addr]); } } } } void filter_blk_cc_alf(encoder_state_t * const state, const kvz_pixel *dst_buf, const kvz_pixel *rec_src, const int rec_luma_stride, const alf_component_id comp_id, const int16_t *filter_coeff, const clp_rngs clp_rngs, int vb_ctu_height, int vb_pos, const int x_pos, const int y_pos, const int blk_width, const int blk_height) { assert(!(1 << kvz_math_floor_log2(vb_ctu_height) != vb_ctu_height)); //Not a power of 2 assert(comp_id != COMPONENT_Y); //Must be chroma enum kvz_chroma_format chroma_format = state->encoder_control->chroma_format; uint8_t scale_y = (comp_id == COMPONENT_Y || chroma_format != KVZ_CSP_420) ? 0 : 1; uint8_t scale_x = (comp_id == COMPONENT_Y || chroma_format == KVZ_CSP_444) ? 0 : 1; const int cls_size_y = 4; const int cls_size_x = 4; const int start_height = y_pos; const int end_height = y_pos + blk_height; const int start_width = x_pos; const int end_width = x_pos + blk_width; const int luma_start_height = start_height << scale_y; const int luma_start_width = start_width << scale_x; assert(!(start_height % cls_size_y)); //Wrong start_height in filtering assert(!(start_width % cls_size_x)); //Wrong start_width in filtering assert(!((end_height - start_height) % cls_size_y)); //Wrong end_height in filtering assert(!((end_width - start_width) % cls_size_x)); //Wrong end_width in filtering kvz_pixel* src_buf = rec_src; const kvz_pixel* luma_ptr = src_buf + luma_start_height * rec_luma_stride + luma_start_width; const int chroma_stride = rec_luma_stride >> scale_x; kvz_pixel* chroma_ptr = dst_buf + start_height * chroma_stride + start_width; for (int i = 0; i < end_height - start_height; i += cls_size_y) { for (int j = 0; j < end_width - start_width; j += cls_size_x) { for (int ii = 0; ii < cls_size_y; ii++) { int row = ii; int col = j; kvz_pixel *src_self = chroma_ptr + col + row * chroma_stride; int offset1 = rec_luma_stride; int offset2 = -rec_luma_stride; int offset3 = 2 * rec_luma_stride; row <<= scale_y; col <<= scale_x; const kvz_pixel *src_cross = luma_ptr + col + row * rec_luma_stride; int pos = ((start_height + i + ii) << scale_y) & (vb_ctu_height - 1); if (scale_y == 0 && (pos == vb_pos || pos == vb_pos + 1)) { continue; } if (pos == (vb_pos - 2) || pos == (vb_pos + 1)) { offset3 = offset1; } else if (pos == (vb_pos - 1) || pos == vb_pos) { offset1 = 0; offset2 = 0; offset3 = 0; } for (int jj = 0; jj < cls_size_x; jj++) { const int jj2 = (jj << scale_x); const int offset0 = 0; int sum = 0; const kvz_pixel curr_src_cross = src_cross[offset0 + jj2]; sum += filter_coeff[0] * (src_cross[offset2 + jj2] - curr_src_cross); sum += filter_coeff[1] * (src_cross[offset0 + jj2 - 1] - curr_src_cross); sum += filter_coeff[2] * (src_cross[offset0 + jj2 + 1] - curr_src_cross); sum += filter_coeff[3] * (src_cross[offset1 + jj2 - 1] - curr_src_cross); sum += filter_coeff[4] * (src_cross[offset1 + jj2] - curr_src_cross); sum += filter_coeff[5] * (src_cross[offset1 + jj2 + 1] - curr_src_cross); sum += filter_coeff[6] * (src_cross[offset3 + jj2] - curr_src_cross); sum = (sum + ((1 << 7/*m_scaleBits*/) >> 1)) >> 7/*m_scaleBits*/; const int offset = 1 << clp_rngs.comp[comp_id].bd >> 1; sum = alf_clip_pixel(sum + offset, clp_rngs.comp[comp_id]) - offset; sum += src_self[jj]; src_self[jj] = alf_clip_pixel(sum, clp_rngs.comp[comp_id]); } } } chroma_ptr += chroma_stride * cls_size_y; luma_ptr += rec_luma_stride * cls_size_y << scale_y; } }