From da04fffaec00eb49e8579e44fc37a9a75a2c0a3e Mon Sep 17 00:00:00 2001 From: Arttu Makinen Date: Fri, 23 Oct 2020 10:24:35 +0300 Subject: [PATCH] Updated the creating of ALF parameters and init for them. --- src/alf.c | 217 ++++++++++++++++++++++----------------------- src/alf.h | 6 +- src/encoderstate.c | 4 - 3 files changed, 112 insertions(+), 115 deletions(-) diff --git a/src/alf.c b/src/alf.c index 3df31c10..71316b44 100644 --- a/src/alf.c +++ b/src/alf.c @@ -1622,6 +1622,12 @@ void kvz_alf_enc_process(encoder_state_t *const state ) { + kvz_alf_enc_create(state); +#if !FULL_FRAME + kvz_alf_enc_init(state); + kvz_alf_init(state); +#endif + #if FULL_FRAME //int layerIdx = cs.vps == nullptr ? 0 : cs.vps->getGeneralLayerIdx(cs.slice->getPic()->layerId); @@ -2144,20 +2150,89 @@ double kvz_alf_derive_ctb_alf_enable_flags(encoder_state_t * const state, return cost; } + void kvz_alf_enc_create(encoder_state_t const *state) { - if (g_curr_frame == g_old_frame) { + if (g_created) { return; } - g_curr_frame = g_old_frame; - kvz_bit_depth = state->encoder_control->bitdepth; + + enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; + const int pic_width = state->tile->frame->width; + const int pic_height = state->tile->frame->height; - kvz_alf_create(state); + g_num_ctus_in_pic = state->lcu_order_count; + + g_alf_vb_luma_pos = LCU_WIDTH - ALF_VB_POS_ABOVE_CTUROW_LUMA; + g_alf_vb_chma_pos = (LCU_WIDTH >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)) - ALF_VB_POS_ABOVE_CTUROW_CHMA; + + g_alf_vb_luma_ctu_height = LCU_WIDTH; + g_alf_vb_chma_ctu_height = (LCU_WIDTH >> ((chroma_fmt == KVZ_CSP_420) ? 1 : 0)); + + assert(g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] > 0); //"g_alf_num_clipping_values[CHANNEL_TYPE_LUMA] must be at least one" + g_alf_clipping_values[CHANNEL_TYPE_LUMA][0] = 1 << g_input_bit_depth[CHANNEL_TYPE_LUMA]; + int shift_luma = g_input_bit_depth[CHANNEL_TYPE_LUMA] - 8; + for (int i = 1; i < g_alf_num_clipping_values[CHANNEL_TYPE_LUMA]; ++i) + { + g_alf_clipping_values[CHANNEL_TYPE_LUMA][i] = 1 << (7 - 2 * i + shift_luma); + } + + assert(g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] > 0); //"g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA] must be at least one" + g_alf_clipping_values[CHANNEL_TYPE_CHROMA][0] = 1 << g_input_bit_depth[CHANNEL_TYPE_CHROMA]; + int shift_chroma = g_input_bit_depth[CHANNEL_TYPE_CHROMA] - 8; + for (int i = 1; i < g_alf_num_clipping_values[CHANNEL_TYPE_CHROMA]; ++i) + { + g_alf_clipping_values[CHANNEL_TYPE_CHROMA][i] = 1 << (7 - 2 * i + shift_chroma); + } + + // Classification + g_classifier = malloc(pic_height * sizeof(**g_classifier)); + g_classifier[0] = malloc(pic_height * pic_width * sizeof(*g_classifier)); + for (int i = 1; i < pic_height; i++) + { + g_classifier[i] = g_classifier[0] + i * pic_width; + } + + for (int filter_set_index = 0; filter_set_index < ALF_NUM_FIXED_FILTER_SETS; filter_set_index++) + { + for (int class_idx = 0; class_idx < MAX_NUM_ALF_CLASSES; class_idx++) + { + int fixed_filter_idx = g_class_to_filter_mapping[filter_set_index][class_idx]; + for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF - 1; i++) + { + g_fixed_filter_set_coeff_dec[filter_set_index][class_idx * MAX_NUM_ALF_LUMA_COEFF + i] = g_fixed_filter_set_coeff[fixed_filter_idx][i]; + } + g_fixed_filter_set_coeff_dec[filter_set_index][class_idx * MAX_NUM_ALF_LUMA_COEFF + MAX_NUM_ALF_LUMA_COEFF - 1] = (1 << (kvz_bit_depth - 1)); + } + } + + for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES; i++) + { + g_clip_default[i] = g_alf_clipping_values[CHANNEL_TYPE_LUMA][0]; + } + + for (int i = 0; i != 1/*m_filterShapes[COMPONENT_Y].size()*/; i++) + { + for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++) + { + g_alf_covariance_merged[i][j].num_coeff = 13; + g_alf_covariance_merged[i][j].num_bins = g_max_alf_num_clipping_values; + memset(g_alf_covariance_merged[i][j].y, 0, sizeof(g_alf_covariance_merged[i][j].y)); + memset(g_alf_covariance_merged[i][j].ee, 0, sizeof(g_alf_covariance_merged[i][j].ee)); + } + } + + memset(g_clip_default_enc, 0, sizeof(g_clip_default_enc)); + + g_cc_alf_filter_control[0] = malloc(g_num_ctus_in_pic * sizeof(*g_cc_alf_filter_control)); + g_cc_alf_filter_control[1] = malloc(g_num_ctus_in_pic * sizeof(*g_cc_alf_filter_control)); + + kvz_bit_depth = state->encoder_control->bitdepth; for (int channel_idx = 0; channel_idx < MAX_NUM_CHANNEL_TYPE; channel_idx++) { channel_type ch_type = (channel_type)channel_idx; - + //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB int num_classes = channel_idx ? MAX_NUM_ALF_ALTERNATIVES_CHROMA : MAX_NUM_ALF_CLASSES; /*#else @@ -2195,8 +2270,6 @@ void kvz_alf_enc_create(encoder_state_t const *state) { g_ctu_alternative_tmp[comp_idx] = malloc(g_num_ctus_in_pic * sizeof(*g_ctu_alternative_tmp[comp_idx])); g_ctu_alternative[comp_idx] = malloc(g_num_ctus_in_pic * sizeof(*g_ctu_alternative[comp_idx])); - - //std::fill_n(m_ctuAlternativeTmp[comp_idx], m_numCTUsInPic, 0); for (int ctu_idx = 0; ctu_idx < g_num_ctus_in_pic; ctu_idx++) { g_ctu_alternative_tmp[comp_idx][ctu_idx] = 0; g_ctu_alternative[comp_idx][ctu_idx] = 0; @@ -2227,17 +2300,6 @@ void kvz_alf_enc_create(encoder_state_t const *state) } } - for (int i = 0; i != 1/*m_filterShapes[COMPONENT_Y].size()*/; i++) - { - for (int j = 0; j <= MAX_NUM_ALF_CLASSES + 1; j++) - { - g_alf_covariance_merged[i][j].num_coeff = 13; - g_alf_covariance_merged[i][j].num_bins = g_max_alf_num_clipping_values; - memset(g_alf_covariance_merged[i][j].y, 0, sizeof(g_alf_covariance_merged[i][j].y)); - memset(g_alf_covariance_merged[i][j].ee, 0, sizeof(g_alf_covariance_merged[i][j].ee)); - } - } - //#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB g_filter_coeff_set = malloc(/*MAX(*/MAX_NUM_ALF_CLASSES/*, MAX_NUM_ALF_ALTERNATIVES_CHROMA)*/ * sizeof(int*)); g_filter_clipp_set = malloc(/*MAX(*/MAX_NUM_ALF_CLASSES/*, MAX_NUM_ALF_ALTERNATIVES_CHROMA)*/ * sizeof(int*)); @@ -2254,16 +2316,13 @@ void kvz_alf_enc_create(encoder_state_t const *state) g_diff_filter_coeff[i] = malloc(MAX_NUM_ALF_LUMA_COEFF * sizeof(int)); } - //g_ctb_distortion_fixed_filter = malloc(g_num_ctus_in_pic * sizeof(double)); for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++) { g_ctb_distortion_unfilter[comp] = malloc(g_num_ctus_in_pic * sizeof(double)); } - + g_alf_ctb_filter_index = malloc(g_num_ctus_in_pic * sizeof(*g_alf_ctb_filter_index)); g_alf_ctb_filter_set_index_tmp = malloc(g_num_ctus_in_pic * sizeof(*g_alf_ctb_filter_set_index_tmp)); - - memset(g_clip_default_enc, 0, sizeof(g_clip_default_enc)); enum kvz_chroma_format chroma_fmt = state->encoder_control->chroma_format; const int number_of_components = (chroma_fmt == KVZ_CSP_400) ? 1 : MAX_NUM_COMPONENT; @@ -2333,7 +2392,7 @@ void kvz_alf_enc_create(encoder_state_t const *state) unsigned int luma_size = (width + 8) * (height + 8); unsigned chroma_sizes[] = { 0, luma_size / 4, luma_size / 2, luma_size }; unsigned chroma_size = chroma_sizes[chroma_fmt]; - + alf_fulldata = MALLOC_SIMD_PADDED(kvz_pixel, (luma_size + 2 * chroma_size), simd_padding_width * 2); alf_fulldata = &alf_fulldata[4 * (width + 8) + 4] + simd_padding_width / sizeof(kvz_pixel); alf_tmp_y = &alf_fulldata[0]; @@ -2347,8 +2406,6 @@ void kvz_alf_enc_create(encoder_state_t const *state) alf_tmp_v = &alf_fulldata[luma_size - (4 * (width + 8) + 4) + chroma_size + (2 * (stride / 2) + 2)]; } - //size_of_best_aps_ids = 0; - size_of_aps_ids = 0; #if !FULL_FRAME //kvz_alf_encoder_ctb for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { @@ -2358,8 +2415,8 @@ void kvz_alf_enc_create(encoder_state_t const *state) blocks_using_new_filter = 0; #endif // !FULL_FRAME - g_aps_id_cc_alf_start[0] = (int) MAX_NUM_APS; - g_aps_id_cc_alf_start[1] = (int) MAX_NUM_APS; + g_aps_id_cc_alf_start[0] = (int)MAX_NUM_APS; + g_aps_id_cc_alf_start[1] = (int)MAX_NUM_APS; for (int comp_idx = 1; comp_idx < MAX_NUM_COMPONENT; comp_idx++) { int num_filters = MAX_NUM_CC_ALF_FILTERS; @@ -2370,13 +2427,11 @@ void kvz_alf_enc_create(encoder_state_t const *state) g_alf_covariance_frame_cc_alf[comp_idx - 1][i] = malloc(num_filters * sizeof(*g_alf_covariance_frame_cc_alf[comp_idx - 1][i])); for (int k = 0; k < num_filters; k++) { - //g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].create(m_filterShapesCcAlf[comp_idx - 1][i].numCoeff); - g_alf_covariance_frame_cc_alf[comp_idx-1][i][k].num_coeff = 8; - g_alf_covariance_frame_cc_alf[comp_idx-1][i][k].num_bins = g_max_alf_num_clipping_values; - g_alf_covariance_frame_cc_alf[comp_idx-1][i][k].pix_acc = 0; - memset(g_alf_covariance_frame_cc_alf[comp_idx-1][i][k].y, 0, sizeof(g_alf_covariance_frame_cc_alf[comp_idx][i][k].y)); - memset(g_alf_covariance_frame_cc_alf[comp_idx-1][i][k].ee, 0, sizeof(g_alf_covariance_frame_cc_alf[comp_idx][i][k].ee)); - + g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].num_coeff = 8; + g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].num_bins = g_max_alf_num_clipping_values; + g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].pix_acc = 0; + memset(g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].y, 0, sizeof(g_alf_covariance_frame_cc_alf[comp_idx][i][k].y)); + memset(g_alf_covariance_frame_cc_alf[comp_idx - 1][i][k].ee, 0, sizeof(g_alf_covariance_frame_cc_alf[comp_idx][i][k].ee)); } g_alf_covariance_cc_alf[comp_idx - 1][i] = malloc(num_filters * sizeof(**g_alf_covariance_cc_alf[comp_idx - 1][i])); @@ -2389,12 +2444,13 @@ void kvz_alf_enc_create(encoder_state_t const *state) g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].num_coeff = 8; g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].num_bins = g_max_alf_num_clipping_values; g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].pix_acc = 0; - memset(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].y, 0, sizeof(g_alf_covariance_cc_alf[comp_idx-1][i][j][k].y)); - memset(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].ee, 0, sizeof(g_alf_covariance_cc_alf[comp_idx-1][i][j][k].ee)); + memset(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].y, 0, sizeof(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].y)); + memset(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].ee, 0, sizeof(g_alf_covariance_cc_alf[comp_idx - 1][i][j][k].ee)); } } } } + g_training_cov_control = malloc(g_num_ctus_in_pic * sizeof(*g_training_cov_control)); for (int i = 0; i < MAX_NUM_CC_ALF_FILTERS; i++) { @@ -2402,13 +2458,25 @@ void kvz_alf_enc_create(encoder_state_t const *state) } g_filter_control = malloc(g_num_ctus_in_pic * sizeof(*g_filter_control)); g_best_filter_control = malloc(g_num_ctus_in_pic * sizeof(*g_best_filter_control)); - //uint32_t area = (picWidth >> getComponentScaleX(COMPONENT_Cb, chromaFormatIDC))*(picHeight >> getComponentScaleY(COMPONENT_Cb, chromaFormatIDC)); - //m_bufOrigin = (Pel*)xMalloc(Pel, area); - //m_buf = new PelBuf(m_bufOrigin, picWidth >> getComponentScaleX(COMPONENT_Cb, chromaFormatIDC), picWidth >> getComponentScaleX(COMPONENT_Cb, chromaFormatIDC), picHeight >> getComponentScaleY(COMPONENT_Cb, chromaFormatIDC)); g_luma_swing_greater_than_threshold_count = malloc(g_num_ctus_in_pic * sizeof(*g_luma_swing_greater_than_threshold_count)); g_chroma_sample_count_near_mid_point = malloc(g_num_ctus_in_pic * sizeof(*g_chroma_sample_count_near_mid_point)); + + g_created = true; } +#if !FULL_FRAME +void kvz_alf_enc_init(encoder_state_t const *state) +{ + //kvz_alf_encoder_ctb + for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++) { + aps_ids[i] = -1; + } + d_dist_org_new_filter = 0; + blocks_using_new_filter = 0; +} +#endif // !FULL_FRAME + + void kvz_alf_reconstruct(encoder_state_t const *state #if !FULL_FRAME , const lcu_order_element_t *const lcu @@ -2430,81 +2498,11 @@ void kvz_alf_reconstruct(encoder_state_t const *state void kvz_alf_enc_destroy(videoframe_t * const frame) { - /*if (lcu->index != g_num_ctus_in_pic - 1) { - return; - }*/ - if (!g_created) { return; } - g_curr_frame += 1; - - /*const int width = frame->width; - const int height = frame->height; - int height_in_lcu = frame->height_in_lcu; - int width_in_lcu = frame->width_in_lcu; - int luma_stride = frame->rec->stride; - int chroma_stride = luma_stride >> chroma_scale_x; - int h_end = 0; - int h_start = 0; - int h_end_chroma = 0; - int h_start_chroma = 0; - int w_end = 0; - int w_start = 0; - int w_end_chroma = 0; - int w_start_chroma = 0; - int cur_ctb = 0; - { - if (state->slice->tile_group_alf_enabled_flag[COMPONENT_Y]) - { - for (int h_lcu = 0; h_lcu < height_in_lcu; h_lcu++) - { - h_end = MIN(h_end + LCU_WIDTH, height); - h_end_chroma = h_end >> chroma_scale_y; - - for (int w_lcu = 0; w_lcu < width_in_lcu; w_lcu++) - { - w_end = MIN(w_end + LCU_WIDTH, width); - w_end_chroma = w_end >> chroma_scale_x; - - if (g_ctu_enable_flag[COMPONENT_Y][cur_ctb]) - { - for (int h = h_start; h < h_end; h++) - { - for (int w = w_start; w < w_end; w++) - { - frame->rec->y[h * luma_stride + w] = alf_tmp_y[h * luma_stride + w]; - } - } - } - if (state->slice->tile_group_alf_enabled_flag[COMPONENT_Cb] || state->slice->tile_group_alf_enabled_flag[COMPONENT_Cr]) - { - if (g_ctu_enable_flag[COMPONENT_Cb][cur_ctb] && g_ctu_enable_flag[COMPONENT_Cr][cur_ctb]) - { - for (int h = h_start_chroma; h < h_end_chroma; h++) - { - for (int w = w_start_chroma; w < w_end_chroma; w++) - { - frame->rec->u[h * chroma_stride + w] = alf_tmp_u[h * chroma_stride + w]; - frame->rec->v[h * chroma_stride + w] = alf_tmp_v[h * chroma_stride + w]; - } - } - } - } - cur_ctb += 1; - w_start = w_end; - w_start_chroma = w_end_chroma; - } - w_start = w_end = 0; - w_start_chroma = w_end_chroma = 0; - h_start = h_end; - h_start_chroma = h_end_chroma; - } - } - }*/ - for (int channel_idx = 0; channel_idx < MAX_NUM_CHANNEL_TYPE; channel_idx++) { if (g_alf_covariance_frame[channel_idx]) @@ -4320,6 +4318,7 @@ void kvz_alf_encoder_ctb(encoder_state_t *const state, } #endif // FULL_FRAME + size_of_aps_ids = 0; kvz_alf_get_avai_aps_ids_luma(state, &new_aps_id, aps_ids, &size_of_aps_ids); double cost_min = MAX_DOUBLE; diff --git a/src/alf.h b/src/alf.h index dfdd21f1..3ec2a16c 100644 --- a/src/alf.h +++ b/src/alf.h @@ -550,6 +550,10 @@ double kvz_alf_derive_ctb_alf_enable_flags(encoder_state_t * const state, void kvz_alf_enc_create(encoder_state_t const *state); +#if !FULL_FRAME +void kvz_alf_enc_init(encoder_state_t const *state); +#endif // !FULL_FRAME + void kvz_alf_reconstruct(encoder_state_t const *state #if !FULL_FRAME , const lcu_order_element_t *const lcu @@ -764,8 +768,6 @@ void kvz_alf_reconstruct_coeff(encoder_state_t *const state, const bool is_rdo, const bool is_redo); -void kvz_alf_create(encoder_state_t const *state); - void kvz_alf_destroy(videoframe_t * const frame); void kvz_alf_derive_classification(encoder_state_t *const state, diff --git a/src/encoderstate.c b/src/encoderstate.c index a5a2beed..8a2d9406 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -943,10 +943,6 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) //Encode ALF if (encoder->cfg.alf_enable) { - kvz_alf_enc_create(state); -#if !FULL_FRAME - kvz_alf_init(state); -#endif kvz_alf_enc_process(state); kvz_alf_reconstruct(state); }