/***************************************************************************** * This file is part of uvg266 VVC encoder. * * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, this * list of conditions and the following disclaimer in the documentation and/or * other materials provided with the distribution. * * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS ****************************************************************************/ #include "intra.h" #include #include "image.h" #include "uvg_math.h" #include "mip_data.h" #include "rdo.h" #include "search.h" #include "search_intra.h" #include "strategies-picture.h" #include "strategies/strategies-intra.h" #include "tables.h" #include "transform.h" #include "videoframe.h" // Tables for looking up the number of intra reference pixels based on // prediction units coordinate within an LCU. // generated by "tools/generate_ref_pixel_tables.py". static const uint8_t num_ref_pixels_top[16][16] = { { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 } }; static const uint8_t num_ref_pixels_left[16][16] = { { 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 }, { 60, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 }, { 56, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 }, { 52, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 }, { 48, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 }, { 44, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 }, { 40, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 36, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }, { 32, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 }, { 28, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 }, { 24, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 }, { 20, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 }, { 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 }, { 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 }, { 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }, { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }; static void mip_predict( const encoder_state_t* const state, const uvg_intra_references* const refs, const uint16_t pred_block_width, const uint16_t pred_block_height, uvg_pixel* dst, const int mip_mode, const bool mip_transp); int8_t uvg_intra_get_dir_luma_predictor( const uint32_t x, const uint32_t y, int8_t *preds, const cu_info_t *const cur_pu, const cu_info_t *const left_pu, const cu_info_t *const above_pu) { enum { PLANAR_IDX = 0, DC_IDX = 1, HOR_IDX = 18, VER_IDX = 50, }; int8_t number_of_candidates = 0; // The default mode if block is not coded yet is INTRA_PLANAR. // If the neighboring blocks were MIP blocks, intra mode is set to planar. int8_t left_intra_dir = 0; if (left_pu && left_pu->type == CU_INTRA) { if (left_pu->intra.mip_flag) { left_intra_dir = PLANAR_IDX; } else { left_intra_dir = left_pu->intra.mode; } } int8_t above_intra_dir = 0; if (above_pu && above_pu->type == CU_INTRA && y % LCU_WIDTH != 0) { if (above_pu->intra.mip_flag) { above_intra_dir = PLANAR_IDX; } else { above_intra_dir = above_pu->intra.mode; } } const int offset = 61; const int mod = 64; preds[0] = PLANAR_IDX; preds[1] = DC_IDX; preds[2] = VER_IDX; preds[3] = HOR_IDX; preds[4] = VER_IDX - 4; preds[5] = VER_IDX + 4; // If the predictions are the same, add new predictions if (left_intra_dir == above_intra_dir) { number_of_candidates = 1; if (left_intra_dir > DC_IDX) { // angular modes preds[0] = PLANAR_IDX; preds[1] = left_intra_dir; preds[2] = ((left_intra_dir + offset) % mod) + 2; preds[3] = ((left_intra_dir - 1) % mod) + 2; preds[4] = ((left_intra_dir + offset - 1) % mod) + 2; preds[5] = (left_intra_dir % mod) + 2; } } else { // If we have two distinct predictions number_of_candidates = 2; uint8_t max_cand_mode_idx = preds[0] > preds[1] ? 0 : 1; if (left_intra_dir > DC_IDX && above_intra_dir > DC_IDX) { preds[0] = PLANAR_IDX; preds[1] = left_intra_dir; preds[2] = above_intra_dir; max_cand_mode_idx = preds[1] > preds[2] ? 1 : 2; uint8_t min_cand_mode_idx = preds[1] > preds[2] ? 2 : 1; if (preds[max_cand_mode_idx] - preds[min_cand_mode_idx] == 1) { preds[3] = ((preds[min_cand_mode_idx] + offset) % mod) + 2; preds[4] = ((preds[max_cand_mode_idx] - 1) % mod) + 2; preds[5] = ((preds[min_cand_mode_idx] + offset - 1) % mod) + 2; } else if (preds[max_cand_mode_idx] - preds[min_cand_mode_idx] >= 62) { preds[3] = ((preds[min_cand_mode_idx] - 1) % mod) + 2; preds[4] = ((preds[max_cand_mode_idx] + offset) % mod) + 2; preds[5] = (preds[min_cand_mode_idx] % mod) + 2; } else if (preds[max_cand_mode_idx] - preds[min_cand_mode_idx] == 2) { preds[3] = ((preds[min_cand_mode_idx] - 1) % mod) + 2; preds[4] = ((preds[min_cand_mode_idx] + offset) % mod) + 2; preds[5] = ((preds[max_cand_mode_idx] - 1) % mod) + 2; } else { preds[3] = ((preds[min_cand_mode_idx] + offset) % mod) + 2; preds[4] = ((preds[min_cand_mode_idx] - 1) % mod) + 2; preds[5] = ((preds[max_cand_mode_idx] + offset) % mod) + 2; } } else if(left_intra_dir + above_intra_dir >= 2){ // Add DC mode if it's not present, otherwise VER_IDX. preds[0] = PLANAR_IDX; preds[1] = (left_intra_dir < above_intra_dir) ? above_intra_dir : left_intra_dir; max_cand_mode_idx = 1; preds[2] = ((preds[max_cand_mode_idx] + offset) % mod) + 2; preds[3] = ((preds[max_cand_mode_idx] - 1) % mod) + 2; preds[4] = ((preds[max_cand_mode_idx] +offset - 1) % mod) + 2; preds[5] = ( preds[max_cand_mode_idx] % mod) + 2; } } return number_of_candidates; } static void intra_filter_reference( int_fast8_t log2_width, int_fast8_t log2_height, uvg_intra_references *refs) { if (refs->filtered_initialized) { return; } else { refs->filtered_initialized = true; } const int_fast8_t ref_width = 2 * (1 << log2_width) + 1; const int_fast8_t ref_height = 2 * (1 << log2_height) + 1; uvg_intra_ref *ref = &refs->ref; uvg_intra_ref *filtered_ref = &refs->filtered_ref; // Starting point at top left for both iterations filtered_ref->left[0] = (ref->left[1] + 2 * ref->left[0] + ref->top[1] + 2) >> 2; filtered_ref->top[0] = filtered_ref->left[0]; // Top to bottom for (int_fast8_t y = 1; y < ref_height - 1; ++y) { uvg_pixel *p = &ref->left[y]; filtered_ref->left[y] = (p[-1] + 2 * p[0] + p[1] + 2) >> 2; } // Bottom left (not filtered) filtered_ref->left[ref_height - 1] = ref->left[ref_height - 1]; // Left to right for (int_fast8_t x = 1; x < ref_width - 1; ++x) { uvg_pixel *p = &ref->top[x]; filtered_ref->top[x] = (p[-1] + 2 * p[0] + p[1] + 2) >> 2; } // Top right (not filtered) filtered_ref->top[ref_width - 1] = ref->top[ref_width - 1]; } /** * \brief Generate dc prediction. * \param cu_loc CU location and size data. * \param color Color channel. * \param ref_top Pointer to -1 index of above reference, length=width*2+1. * \param ref_left Pointer to -1 index of left reference, length=width*2+1. * \param dst Buffer of size width*width. * \param multi_ref_idx Multi reference line index for use with MRL. */ static void intra_pred_dc( const cu_loc_t* const cu_loc, const color_t color, const uvg_pixel *const ref_top, const uvg_pixel *const ref_left, uvg_pixel *const out_block, const uint8_t multi_ref_idx) { const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; int_fast16_t sum = 0; // Only one loop is done for non-square blocks. // In case of non-square blocks, only the longer reference is summed. if (width >= height) { for (int_fast8_t i = 0; i < width; ++i) { sum += ref_top[i + 1 + multi_ref_idx]; } } if (width <= height) { for (int_fast8_t j = 0; j < height; ++j) { sum += ref_left[j + 1 + multi_ref_idx]; } } // JVET_K0122 const int denom = width == height ? width << 1 : MAX(width, height); const int divShift = uvg_math_floor_log2(denom); const int divOffset = denom >> 1; const uvg_pixel dc_val = (sum + divOffset) >> divShift; //const uvg_pixel dc_val = (sum + width) >> (log2_width + 1); const int_fast16_t block_size = width * height; for (int_fast16_t i = 0; i < block_size; ++i) { out_block[i] = dc_val; } } bool uvg_cclm_is_allowed(const encoder_state_t* const state, const cu_loc_t * const luma_loc, cu_info_t const * const cur_cu, enum uvg_tree_type tree_type) { if (tree_type != UVG_CHROMA_T) { return true; } uint32_t chroma_split_depth0 = GET_SPLITDATA(cur_cu, 0); uint32_t chroma_split_depth1 = GET_SPLITDATA(cur_cu, 1); bool allow = false; if (chroma_split_depth0 == QT_SPLIT || (chroma_split_depth0 == BT_HOR_SPLIT && chroma_split_depth1 == BT_VER_SPLIT)) allow = true; else if (chroma_split_depth0 == NO_SPLIT) allow = true; else if (chroma_split_depth0 == BT_HOR_SPLIT && chroma_split_depth1 == NO_SPLIT) allow = true; if (!allow) { return false; } const cu_info_t* const luma_cu = uvg_cu_array_at_const(state->tile->frame->cu_array, luma_loc->x, luma_loc->y); uint32_t split = GET_SPLITDATA(luma_cu, 0); if (split != NO_SPLIT) { allow = split == QT_SPLIT; } else if (split != NO_SPLIT && luma_cu->intra.isp_mode != ISP_MODE_NO_ISP) { allow = false; } return allow; } enum lm_mode { LM_CHROMA_IDX = 81, LM_CHROMA_L_IDX = 82, LM_CHROMA_T_IDX = 83, }; static void get_cclm_parameters( encoder_state_t const* const state, int8_t width, int8_t height, int8_t mode, int x0, int y0, int avai_above_right_units, int avai_left_below_units, uvg_intra_ref* luma_src, uvg_intra_references*chroma_ref, int16_t *a, int16_t*b, int16_t*shift) { const int base_unit_size = 4; // TODO: take into account YUV422 const int unit_w = base_unit_size >> 1; const int unit_h = base_unit_size >> 1; const int c_height = height; const int c_width = width; height *= 2; width *= 2; const int tu_width_in_units = c_width / unit_w; const int tu_height_in_units = c_height / unit_h; //int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H; //int left_template_samp_num = height; // These are used for calculating some stuff for non-square CUs //int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w; //int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h; //int total_units = total_left_units + total_above_units + 1; //int above_right_units = total_above_units - tu_width_in_units; //int left_below_units = total_left_units - tu_height_in_units; //int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs //int avai_left_below_units = 0; int avai_above_units = y0 ? tu_width_in_units : 0; int avai_left_units = x0 ? tu_height_in_units : 0; bool above_available = avai_above_units != 0; bool left_available = avai_left_units != 0; char internal_bit_depth = state->encoder_control->bitdepth; int min_luma[2] = { MAX_INT, 0 }; int max_luma[2] = { -MAX_INT, 0 }; uvg_pixel* src; int actualTopTemplateSampNum = 0; int actualLeftTemplateSampNum = 0; if (mode == LM_CHROMA_T_IDX) { left_available = 0; avai_above_right_units = avai_above_right_units > (c_height / unit_w) ? c_height / unit_w : avai_above_right_units; actualTopTemplateSampNum = unit_w * (avai_above_units + avai_above_right_units); } else if (mode == LM_CHROMA_L_IDX) { above_available = 0; avai_left_below_units = avai_left_below_units > (c_width / unit_h) ? c_width / unit_h : avai_left_below_units; actualLeftTemplateSampNum = unit_h * (avai_left_units + avai_left_below_units); } else if (mode == LM_CHROMA_IDX) { actualTopTemplateSampNum = c_width; actualLeftTemplateSampNum = c_height; } int startPos[2]; //0:Above, 1: Left int pickStep[2]; int aboveIs4 = left_available ? 0 : 1; int leftIs4 = above_available ? 0 : 1; startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4); pickStep[0] = MAX(1, actualTopTemplateSampNum >> (1 + aboveIs4)); startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4); pickStep[1] = MAX(1, actualLeftTemplateSampNum >> (1 + leftIs4)); uvg_pixel selectLumaPix[4] = { 0, 0, 0, 0 }; uvg_pixel selectChromaPix[4] = { 0, 0, 0, 0 }; int cntT, cntL; cntT = cntL = 0; int cnt = 0; if (above_available) { cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1); src = luma_src->top; const uvg_pixel* cur = chroma_ref->ref.top + 1; for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++) { selectLumaPix[cnt] = src[pos]; selectChromaPix[cnt] = cur[pos]; } } if (left_available) { cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1); src = luma_src->left; const uvg_pixel* cur = chroma_ref->ref.left + 1; for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++) { selectLumaPix[cnt + cntT] = src[pos]; selectChromaPix[cnt + cntT] = cur[pos]; } } cnt = cntL + cntT; if (cnt == 2) { selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0]; selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1]; selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1]; selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3]; } int minGrpIdx[2] = { 0, 2 }; int maxGrpIdx[2] = { 1, 3 }; int* tmpMinGrp = minGrpIdx; int* tmpMaxGrp = maxGrpIdx; if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) { SWAP(tmpMinGrp[0], tmpMinGrp[1], int); } if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) { SWAP(tmpMaxGrp[0], tmpMaxGrp[1], int); } if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) { SWAP(tmpMinGrp, tmpMaxGrp, int*); } if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) { SWAP(tmpMinGrp[1], tmpMaxGrp[0], int); } min_luma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1; min_luma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1; max_luma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1; max_luma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1; if (left_available || above_available) { int diff = max_luma[0] - min_luma[0]; if (diff > 0) { int diffC = max_luma[1] - min_luma[1]; int x = uvg_math_floor_log2(diff); static const uint8_t DivSigTable[1 << 4] = { // 4bit significands - 8 ( MSB is omitted ) 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 }; int normDiff = (diff << 4 >> x) & 15; int v = DivSigTable[normDiff] | 8; x += normDiff != 0; int y = diffC ? uvg_math_floor_log2(abs(diffC)) + 1 : 0; int add = 1 << y >> 1; *a = (diffC * v + add) >> y; *shift = 3 + x - y; if (*shift < 1) { *shift = 1; *a = ((*a == 0) ? 0 : (*a < 0) ? -15 : 15); // a=Sign(a)*15 } *b = min_luma[1] - ((*a * min_luma[0]) >> *shift); } else { *a = 0; *b = min_luma[1]; *shift = 0; } } else { *a = 0; *b = 1 << (internal_bit_depth - 1); *shift = 0; } } static void linear_transform_cclm(const cclm_parameters_t* cclm_params, uvg_pixel * src, uvg_pixel * dst, int stride, int height) { int scale = cclm_params->a; int shift = cclm_params->shift; int offset = cclm_params->b; for (int y = 0; y < height; ++y) { for (int x=0; x < stride; ++x) { int val = src[x + y * stride] * scale; val >>= shift; val += offset; val = CLIP_TO_PIXEL(val); dst[x + y * stride] = val; } } } static void predict_cclm( encoder_state_t const* const state, const color_t color, const int8_t width, const int8_t height, const int16_t x0, const int16_t y0, const int16_t stride, const int8_t mode, const lcu_t* const lcu, uvg_intra_references* chroma_ref, uvg_pixel* dst, cclm_parameters_t* cclm_params, enum uvg_tree_type tree_type ) { assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); assert(state->encoder_control->cfg.cclm); uvg_intra_ref sampled_luma_ref; uvg_pixel sampled_luma[LCU_CHROMA_SIZE]; int x_scu = SUB_SCU(x0); int y_scu = SUB_SCU(y0); int available_above_right = 0; int available_left_below = 0; const uvg_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH; const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA); tree_type = state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I ? tree_type : UVG_BOTH_T; const int ctu_size = tree_type == UVG_CHROMA_T ? LCU_WIDTH_C : LCU_WIDTH; if (y0) { if (y_scu == 0) available_above_right = MIN(MIN(width / 2, (64-x_scu - width * 2) / 4), (state->tile->frame->width - x0 - width* 2) / 4); for (; available_above_right < width / 2; available_above_right++) { int x_extension = x_scu + width * 2 + 4 * available_above_right; x_extension >>= tree_type == UVG_CHROMA_T; const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, (y_scu >> (tree_type==UVG_CHROMA_T)) - 4); if (x_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break; } if(y_scu == 0) { if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4); memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride2 / 2)], sizeof(uvg_pixel) * (width + available_above_right * 2)); } else { for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { bool left_padding = x0 || x; int s = 4; s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 2) * stride] * 2; s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2; s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; sampled_luma_ref.top[x / 2] = s >> 3; } } } if(x0) { if (x_scu == 0) available_left_below = MIN(MIN(height / 2, (64 - y_scu - height * 2) / 4), (state->tile->frame->height - y0 - height * 2) / 4); for (; available_left_below < height / 2; available_left_below++) { int y_extension = y_scu + height * 2 + 4 * available_left_below; y_extension >>= tree_type == UVG_CHROMA_T; const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, (x_scu >> (tree_type == UVG_CHROMA_T)) - 4, y_extension); if (y_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break; if(x_scu == 32 && y_scu == 0 && pu->log2_height == 6 && pu->log2_width == 6 ) break; } for(int i = 0; i < height + available_left_below * 2; i++) { sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride2/2) + x0 / 2 - 1]; } } uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride2) / 4], sampled_luma, width, height, stride2 / 2, width); int16_t a, b, shift; get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift); cclm_params->shift = shift; cclm_params->a = a; cclm_params->b = b; if(dst) linear_transform_cclm(cclm_params, sampled_luma, dst, width, height); } uint8_t uvg_get_mip_flag_context( const cu_loc_t* const cu_loc, const lcu_t* lcu, cu_array_t* const cu_a) { assert(!(lcu && cu_a)); if (cu_loc->width > 2 * cu_loc->height || cu_loc->height > 2 * cu_loc->width) { return 3; } const int x = cu_loc->x; const int y = cu_loc->y; int context = 0; const cu_info_t* left = NULL; const cu_info_t* top = NULL; if (lcu) { int x_local = SUB_SCU(x); int y_local = SUB_SCU(y); if (x) { left = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); } if (y) { top = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1); } } else { if (x > 0) { left = uvg_cu_array_at_const(cu_a, x - 1, y); } if (y > 0) { top = uvg_cu_array_at_const(cu_a, x, y - 1); } } context += left && left->type == CU_INTRA ? left->intra.mip_flag : 0; context += top && top->type == CU_INTRA ? top->intra.mip_flag : 0; return context; } void uvg_mip_boundary_downsampling_1D(int* reduced_dst, const int* const ref_src, int src_len, int dst_len) { if (dst_len < src_len) { // Create reduced boundary by downsampling uint16_t down_smp_factor = src_len / dst_len; const int log2_factor = uvg_math_floor_log2(down_smp_factor); const int rounding_offset = (1 << (log2_factor - 1)); uint16_t src_idx = 0; for (uint16_t dst_idx = 0; dst_idx < dst_len; dst_idx++) { int sum = 0; for (int k = 0; k < down_smp_factor; k++) { sum += ref_src[src_idx++]; } reduced_dst[dst_idx] = (sum + rounding_offset) >> log2_factor; } } else { // Copy boundary if no downsampling is needed for (uint16_t i = 0; i < dst_len; ++i) { reduced_dst[i] = ref_src[i]; } } } void uvg_mip_reduced_pred(int* const output, const int* const input, const uint8_t* matrix, const bool transpose, const int red_bdry_size, const int red_pred_size, const int size_id, const int in_offset, const int in_offset_tr) { const int input_size = 2 * red_bdry_size; // Use local buffer for transposed result int out_buf_transposed[LCU_WIDTH * LCU_WIDTH]; int* const out_ptr = transpose ? out_buf_transposed : output; int sum = 0; for (int i = 0; i < input_size; i++) { sum += input[i]; } const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; assert((input_size == 4 * (input_size >> 2)) && "MIP input size must be divisible by four"); const uint8_t* weight = matrix; const int input_offset = transpose ? in_offset_tr : in_offset; const bool red_size = (size_id == 2); int pos_res = 0; for (int y = 0; y < red_pred_size; y++) { for (int x = 0; x < red_pred_size; x++) { if (red_size) { weight -= 1; } int tmp0 = red_size ? 0 : (input[0] * weight[0]); int tmp1 = input[1] * weight[1]; int tmp2 = input[2] * weight[2]; int tmp3 = input[3] * weight[3]; for (int i = 4; i < input_size; i += 4) { tmp0 += input[i] * weight[i]; tmp1 += input[i + 1] * weight[i + 1]; tmp2 += input[i + 2] * weight[i + 2]; tmp3 += input[i + 3] * weight[i + 3]; } out_ptr[pos_res] = CLIP_TO_PIXEL(((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) + input_offset); pos_res++; weight += input_size; } } if (transpose) { for (int y = 0; y < red_pred_size; y++) { for (int x = 0; x < red_pred_size; x++) { output[y * red_pred_size + x] = out_ptr[x * red_pred_size + y]; } } } } void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int* const boundary, const uint16_t src_size_ups_dim, const uint16_t src_size_orth_dim, const uint16_t src_step, const uint16_t src_stride, const uint16_t dst_step, const uint16_t dst_stride, const uint16_t boundary_step, const uint16_t ups_factor) { const int log2_factor = uvg_math_floor_log2(ups_factor); assert(ups_factor >= 2 && "Upsampling factor must be at least 2."); const int rounding_offset = 1 << (log2_factor - 1); uint16_t idx_orth_dim = 0; const int* src_line = src; int* dst_line = dst; const int* boundary_line = boundary + boundary_step - 1; while (idx_orth_dim < src_size_orth_dim) { uint16_t idx_upsample_dim = 0; const int* before = boundary_line; const int* behind = src_line; int* cur_dst = dst_line; while (idx_upsample_dim < src_size_ups_dim) { uint16_t pos = 1; int scaled_before = (*before) << log2_factor; int scaled_behind = 0; while (pos <= ups_factor) { scaled_before -= *before; scaled_behind += *behind; *cur_dst = (scaled_before + scaled_behind + rounding_offset) >> log2_factor; pos++; cur_dst += dst_step; } idx_upsample_dim++; before = behind; behind += src_step; } idx_orth_dim++; src_line += src_stride; dst_line += dst_stride; boundary_line += boundary_step; } } /** \brief Matrix weighted intra prediction. */ static void mip_predict( const encoder_state_t* const state, const uvg_intra_references* const refs, const uint16_t pred_block_width, const uint16_t pred_block_height, uvg_pixel* dst, const int mip_mode, const bool mip_transp) { // MIP prediction uses int values instead of uvg_pixel as some temp values may be negative uvg_pixel* out = dst; int result[32*32] = {0}; const int mode_idx = mip_mode; // *** INPUT PREP *** // Initialize prediction parameters START uint16_t width = pred_block_width; uint16_t height = pred_block_height; int size_id; // Prediction block type if (width == 4 && height == 4) { size_id = 0; } else if (width == 4 || height == 4 || (width == 8 && height == 8)) { size_id = 1; } else { size_id = 2; } // Reduced boundary and prediction sizes int red_bdry_size = (size_id == 0) ? 2 : 4; int red_pred_size = (size_id < 2) ? 4 : 8; // Upsampling factors uint16_t ups_hor_factor = width / red_pred_size; uint16_t ups_ver_factor = height / red_pred_size; // Upsampling factors must be powers of two assert(!((ups_hor_factor < 1) || ((ups_hor_factor & (ups_hor_factor - 1))) != 0) && "Horizontal upsampling factor must be power of two."); assert(!((ups_ver_factor < 1) || ((ups_ver_factor & (ups_ver_factor - 1))) != 0) && "Vertical upsampling factor must be power of two."); // Initialize prediction parameters END int ref_samples_top[INTRA_REF_LENGTH]; int ref_samples_left[INTRA_REF_LENGTH]; for (int i = 1; i < INTRA_REF_LENGTH; i++) { ref_samples_top[i-1] = (int)refs->ref.top[i]; // NOTE: in VTM code these are indexed as x + 1 & y + 1 during init ref_samples_left[i-1] = (int)refs->ref.left[i]; } // Compute reduced boundary with Haar-downsampling const int input_size = 2 * red_bdry_size; int red_bdry[MIP_MAX_INPUT_SIZE]; int red_bdry_trans[MIP_MAX_INPUT_SIZE]; int* const top_reduced = &red_bdry[0]; int* const left_reduced = &red_bdry[red_bdry_size]; uvg_mip_boundary_downsampling_1D(top_reduced, ref_samples_top, width, red_bdry_size); uvg_mip_boundary_downsampling_1D(left_reduced, ref_samples_left, height, red_bdry_size); // Transposed reduced boundaries int* const left_reduced_trans = &red_bdry_trans[0]; int* const top_reduced_trans = &red_bdry_trans[red_bdry_size]; for (int x = 0; x < red_bdry_size; x++) { top_reduced_trans[x] = top_reduced[x]; } for (int y = 0; y < red_bdry_size; y++) { left_reduced_trans[y] = left_reduced[y]; } int input_offset = red_bdry[0]; int input_offset_trans = red_bdry_trans[0]; const bool has_first_col = (size_id < 2); // First column of matrix not needed for large blocks red_bdry[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset) : 0; red_bdry_trans[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset_trans) : 0; for (int i = 1; i < input_size; ++i) { red_bdry[i] -= input_offset; red_bdry_trans[i] -= input_offset_trans; } // *** INPUT PREP *** END // *** BLOCK PREDICT *** const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1); const bool transpose = mip_transp; const uint8_t* matrix; switch (size_id) { case 0: matrix = &uvg_mip_matrix_4x4[mode_idx][0][0]; break; case 1: matrix = &uvg_mip_matrix_8x8[mode_idx][0][0]; break; case 2: matrix = &uvg_mip_matrix_16x16[mode_idx][0][0]; break; default: assert(false && "Invalid MIP size id."); } // Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8 int red_pred_buffer[8*8]; int* const reduced_pred = need_upsampling ? red_pred_buffer : result; const int* const reduced_bdry = transpose ? red_bdry_trans : red_bdry; uvg_mip_reduced_pred(reduced_pred, reduced_bdry, matrix, transpose, red_bdry_size, red_pred_size, size_id, input_offset, input_offset_trans); if (need_upsampling) { const int* ver_src = reduced_pred; uint16_t ver_src_step = width; if (ups_hor_factor > 1) { int* const hor_dst = result + (ups_ver_factor - 1) * width; ver_src = hor_dst; ver_src_step *= ups_ver_factor; uvg_mip_pred_upsampling_1D(hor_dst, reduced_pred, ref_samples_left, red_pred_size, red_pred_size, 1, red_pred_size, 1, ver_src_step, ups_ver_factor, ups_hor_factor); } if (ups_ver_factor > 1) { uvg_mip_pred_upsampling_1D(result, ver_src, ref_samples_top, red_pred_size, width, ver_src_step, 1, width, 1, 1, ups_ver_factor); } } // Assign and cast values from temp array to output for (int i = 0; i < 32 * 32; i++) { out[i] = (uvg_pixel)result[i]; } // *** BLOCK PREDICT *** END } int8_t uvg_wide_angle_correction( int_fast8_t mode, const int log2_width, const int log2_height, const bool account_for_dc_planar) { int8_t pred_mode = mode; if (log2_width != log2_height) { if (mode > 1 && mode <= 66) { const int modeShift[] = { 0, 6, 10, 12, 14, 15 }; const int deltaSize = abs(log2_width - log2_height); if (log2_width > log2_height && mode < 2 + modeShift[deltaSize]) { pred_mode += (66 - 1); } else if (log2_height > log2_width && mode > 66 - modeShift[deltaSize]) { pred_mode -= (66 - 1) + (account_for_dc_planar ? 2 : 0); } } } return pred_mode; } static void intra_predict_regular( const encoder_state_t* const state, uvg_intra_references *refs, const cu_info_t* const cur_cu, const cu_loc_t* const cu_loc, const cu_loc_t* const pu_loc, int_fast8_t mode, color_t color, uvg_pixel *dst, const uint8_t multi_ref_idx, const uint8_t isp_mode) { const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; const int log2_width = uvg_g_convert_to_log2[width]; const int log2_height = uvg_g_convert_to_log2[height]; const uvg_config *cfg = &state->encoder_control->cfg; // MRL only for luma uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0; uint8_t isp = color == COLOR_Y ? isp_mode : 0; // Wide angle correction int8_t pred_mode = uvg_wide_angle_correction( mode, color == COLOR_Y ? cur_cu->log2_width : log2_width, color == COLOR_Y ? cur_cu->log2_height : log2_height, false ); const uvg_intra_ref *used_ref = &refs->ref; if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || isp_mode /*ISP_TODO: replace this fake ISP check*/) { // For chroma, DC and 4x4 blocks, always use unfiltered reference. } else if (mode == 0) { // Otherwise, use filtered for planar. if (width * height > 32) { used_ref = &refs->filtered_ref; } } else { // Angular modes use smoothed reference pixels, unless the mode is close // to being either vertical or horizontal. static const int uvg_intra_hor_ver_dist_thres[8] = {24, 24, 24, 14, 2, 0, 0, 0 }; int filter_threshold = uvg_intra_hor_ver_dist_thres[(log2_width + log2_height) >> 1]; int dist_from_vert_or_hor = MIN(abs(pred_mode - 50), abs(pred_mode - 18)); if (dist_from_vert_or_hor > filter_threshold) { static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; const int_fast8_t mode_disp = (pred_mode >= 34) ? pred_mode - 50 : 18 - pred_mode; const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; if ((abs(sample_disp) & 0x1F) == 0) { used_ref = &refs->filtered_ref; } } } if (used_ref == &refs->filtered_ref && !refs->filtered_initialized) { intra_filter_reference(log2_width, log2_height, refs); } if (mode == 0) { uvg_intra_pred_planar(pu_loc, color, used_ref->top, used_ref->left, dst); } else if (mode == 1) { intra_pred_dc(pu_loc, color, used_ref->top, used_ref->left, dst, multi_ref_index); } else { uvg_angular_pred( pu_loc, pred_mode, color, used_ref->top, used_ref->left, dst, multi_ref_index, isp, isp_mode == ISP_MODE_HOR ? cu_loc->height : cu_loc->width); } // pdpc // bool pdpcCondition = (mode == 0 || mode == 1 || mode == 18 || mode == 50); bool pdpcCondition = (mode == 0 || mode == 1); // Planar and DC pdpcCondition &= width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH; if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL. { uvg_pdpc_planar_dc(mode, pu_loc, color, used_ref, dst); } } void uvg_intra_build_reference_any( const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, const vector2d_t *const luma_px, const vector2d_t *const pic_px, const lcu_t *const lcu, uvg_intra_references *const refs, const uint8_t multi_ref_idx, uvg_pixel *extra_ref_lines, const uint8_t isp_mode) { const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; const int log2_width = uvg_g_convert_to_log2[width]; const int log2_height = uvg_g_convert_to_log2[height]; // These are only used with ISP, so no need to check chroma const int cu_width = cu_loc->width; const int cu_height = cu_loc->height; const int pu_x = pu_loc->x; const int pu_y = pu_loc->y; const int cu_x = cu_loc->x; const int cu_y = cu_loc->y; bool is_first_isp_block = isp_mode ? pu_x == cu_x && pu_y == cu_y : false; assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5); refs->filtered_initialized = false; uvg_pixel *out_left_ref = &refs->ref.left[0]; uvg_pixel *out_top_ref = &refs->ref.top[0]; const uvg_pixel dc_val = 1 << (UVG_BIT_DEPTH - 1); //TODO: add used bitdepth as a variable const int is_chroma = color != COLOR_Y ? 1 : 0; const int is_dual_tree = is_chroma && state->encoder_control->cfg.dual_tree && state->frame->is_irap; // Get multi ref index from CU under prediction or reconstrcution. Do not use MRL if not luma const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0; assert(multi_ref_index < MAX_REF_LINE_IDX); // Convert luma coordinates to chroma coordinates for chroma. const vector2d_t lcu_px = { luma_px->x % LCU_WIDTH, luma_px->y % LCU_WIDTH }; const vector2d_t px = { lcu_px.x >> is_chroma, lcu_px.y >> is_chroma, }; // Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0. const uvg_pixel *left_ref; bool extra_ref = false; // On the left LCU edge, if left neighboring LCU is available, // left_ref needs to point to correct extra reference line if MRL is used. if (luma_px->x > 0 && lcu_px.x == 0 && multi_ref_index != 0) { left_ref = &extra_ref_lines[multi_ref_index * 128]; extra_ref = true; } else { left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1]; } const uvg_pixel *top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1]; const uvg_pixel *rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v; // Init top borders pointer to point to the correct place in the correct reference array. const uvg_pixel *top_border; if (px.y) { top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)]; } else { top_border = &top_ref[px.x]; // Top row, no need for multi_ref_index } // Init left borders pointer to point to the correct place in the correct reference array. const uvg_pixel *left_border; int left_stride; // Distance between reference samples. if (px.x) { left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)]; left_stride = LCU_WIDTH >> is_chroma; } else { if (extra_ref) { left_border = &left_ref[MAX_REF_LINE_IDX]; } else { left_border = &left_ref[px.y]; } left_stride = 1; } // Generate left reference. if (luma_px->x > 0) { // Get the number of reference pixels based on the PU coordinate within the LCU. int px_available_left; if (isp_mode && !is_first_isp_block && !is_chroma) { if (isp_mode == ISP_MODE_VER) { px_available_left = height; } else { px_available_left = uvg_count_available_edge_cus(cu_loc, lcu, true) * 4; px_available_left -= pu_loc->y - cu_loc->y; } } else { if (!is_dual_tree) { const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } else { const int num_cus = uvg_count_chroma_tree_available_edge_cus(cu_loc->x >> 1, cu_loc->y >> 1, width, height, lcu, true); px_available_left = num_cus * 4; } } // Limit the number of available pixels based on block size and dimensions // of the picture. px_available_left = MIN(px_available_left, cu_height * 2 + multi_ref_index); px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma); // Copy pixels from coded CUs. for (int i = 0; i < px_available_left; ++i) { // Reserve space for top left reference out_left_ref[i + 1 + multi_ref_index] = left_border[i * left_stride]; } // Extend the last pixel for the rest of the reference values. uvg_pixel nearest_pixel = left_border[(px_available_left - 1) * left_stride]; // If first isp split, take samples as if it were normal square block int tmp_h = is_first_isp_block ? cu_height * 2 : (isp_mode ? cu_height + height : height * 2); for (int i = px_available_left; i < tmp_h + multi_ref_index * 2; ++i) { out_left_ref[i + 1 + multi_ref_index] = nearest_pixel; } } else { // If we are on the left edge, extend the first pixel of the top row. uvg_pixel nearest_pixel = luma_px->y > 0 ? top_border[0] : dc_val; // If first isp split, take samples as if it were normal square block int tmp_h = is_first_isp_block ? cu_height * 2 : (isp_mode ? cu_height + height : height * 2); for (int i = 0; i < tmp_h + multi_ref_index; i++) { // Reserve space for top left reference out_left_ref[i + 1 + multi_ref_index] = nearest_pixel; } } // Generate top-left reference if (multi_ref_index) { if (luma_px->x > 0 && luma_px->y > 0) { // If the block is at an LCU border, the top-left must be copied from // the border that points to the LCUs 1D reference buffer. // Inner picture cases if (px.x == 0 && px.y == 0) { // LCU top left corner case. Multi ref will be 0. out_left_ref[0] = out_left_ref[1]; out_top_ref[0] = out_left_ref[1]; } else if (px.x == 0) { // LCU left border case uvg_pixel *top_left_corner = &extra_ref_lines[multi_ref_index * 128]; for (int i = 0; i <= multi_ref_index; ++i) { out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride]; out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index]; } } else if (px.y == 0) { // LCU top border case. Multi ref will be 0. out_left_ref[0] = top_border[-1]; out_top_ref[0] = top_border[-1]; } else { // Inner case for (int i = 0; i <= multi_ref_index; ++i) { out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride]; out_top_ref[i] = top_border[i - 1 - multi_ref_index]; } } } else { // Picture border cases if (px.x == 0 && px.y == 0) { // Top left picture corner case. Multi ref will be 0. out_left_ref[0] = out_left_ref[1]; out_top_ref[0] = out_left_ref[1]; } else if (px.x == 0) { // Picture left border case. Reference pixel cannot be taken from outside LCU border uvg_pixel nearest = out_left_ref[1 + multi_ref_index]; for (int i = 0; i <= multi_ref_index; ++i) { out_left_ref[i] = nearest; out_top_ref[i] = nearest; } } else { // Picture top border case. Multi ref will be 0. out_left_ref[0] = top_border[-1]; out_top_ref[0] = top_border[-1]; } } } else { if (luma_px->x > 0 && luma_px->y > 0) { // If the block is at an LCU border, the top-left must be copied from // the border that points to the LCUs 1D reference buffer. if (px.x == 0) { out_left_ref[0] = left_border[-1 * left_stride]; out_top_ref[0] = left_border[-1 * left_stride]; } else { out_left_ref[0] = top_border[-1]; out_top_ref[0] = top_border[-1]; } } else { // Copy reference clockwise. out_left_ref[0] = out_left_ref[1]; out_top_ref[0] = out_left_ref[1]; } } // Generate top reference. int px_available_top; if (luma_px->y > 0) { // Get the number of reference pixels based on the PU coordinate within the LCU. if (isp_mode && !is_first_isp_block && !is_chroma) { if (isp_mode == ISP_MODE_HOR) { px_available_top = width; } else { px_available_top = uvg_count_available_edge_cus(cu_loc, lcu, false) * 4; px_available_top -= pu_loc->x - cu_loc->x; } } else { if (!is_dual_tree) { const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, false); px_available_top = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } else { const int num_cus = uvg_count_chroma_tree_available_edge_cus(cu_loc->x >> 1, cu_loc->y >> 1, width, height, lcu, false); px_available_top = num_cus * 4; } } // Limit the number of available pixels based on block size and dimensions // of the picture. px_available_top = MIN(px_available_top, cu_width * 2 + multi_ref_index); px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma); // Copy all the pixels we can. for (int i = 0; i < px_available_top; ++i) { out_top_ref[i + 1 + multi_ref_index] = top_border[i]; } // Extend the last pixel for the rest of the reference values. uvg_pixel nearest_pixel = top_border[px_available_top - 1]; // If first isp split, take samples as if it were normal square block int tmp_w = is_first_isp_block ? cu_width * 2 : (isp_mode ? cu_width + width : width * 2); for (int i = px_available_top; i < tmp_w + multi_ref_index * 2; ++i) { out_top_ref[i + 1 + multi_ref_index] = nearest_pixel; } } else { // Extend nearest pixel. uvg_pixel nearest_pixel = luma_px->x > 0 ? left_border[0] : dc_val; // If first isp split, take samples as if it were normal square block int tmp_w = is_first_isp_block ? cu_width * 2 : (isp_mode ? cu_width + width : width * 2); for (int i = 0; i < tmp_w + multi_ref_index * 2; i++) { out_top_ref[i + 1] = nearest_pixel; } } } void uvg_intra_build_reference_inner( const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, const vector2d_t *const luma_px, const vector2d_t *const pic_px, const lcu_t *const lcu, uvg_intra_references *const refs, bool entropy_sync, const uint8_t multi_ref_idx, uvg_pixel* extra_ref_lines, uint8_t isp_mode) { const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; const int cu_width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; const int cu_height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; const int log2_width = uvg_g_convert_to_log2[width]; const int log2_height = uvg_g_convert_to_log2[height]; // These are only used with ISP, so no need to check chroma const int pu_x = pu_loc->x; const int pu_y = pu_loc->y; const int cu_x = cu_loc->x; const int cu_y = cu_loc->y; bool is_first_isp_block = isp_mode ? pu_x == cu_x && pu_y == cu_y : false; // Log2_dim 1 is possible with ISP blocks assert((log2_width >= 2 && log2_width <= 5) && log2_height <= 5); refs->filtered_initialized = false; uvg_pixel * __restrict out_left_ref = &refs->ref.left[0]; uvg_pixel * __restrict out_top_ref = &refs->ref.top[0]; const int is_chroma = color != COLOR_Y ? 1 : 0; const int is_dual_tree = is_chroma && state->encoder_control->cfg.dual_tree && state->frame->is_irap; // Get multiRefIdx from CU under prediction. Do not use MRL if not luma const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0; assert(multi_ref_index < MAX_REF_LINE_IDX); // Convert luma coordinates to chroma coordinates for chroma. const vector2d_t lcu_px = { luma_px->x % LCU_WIDTH, luma_px->y % LCU_WIDTH }; const vector2d_t px = { lcu_px.x >> is_chroma, lcu_px.y >> is_chroma, }; // Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0. const uvg_pixel* left_ref; bool extra_ref = false; // On the left LCU edge, if left neighboring LCU is available, // left_ref needs to point to correct extra reference line if MRL is used. if (lcu_px.x == 0 && multi_ref_index != 0) { left_ref = &extra_ref_lines[multi_ref_index * 128]; extra_ref = true; } else { left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1]; } const uvg_pixel * __restrict top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1]; const uvg_pixel * __restrict rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v; // Init top borders pointer to point to the correct place in the correct reference array. const uvg_pixel * __restrict top_border; if (px.y) { top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)]; } else { top_border = &top_ref[px.x]; // At the top line. No need for multi_ref_index } // Init left borders pointer to point to the correct place in the correct reference array. const uvg_pixel * __restrict left_border; int left_stride; // Distance between reference samples. if (px.x) { left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)]; left_stride = LCU_WIDTH >> is_chroma; } else { if (extra_ref) { left_border = &left_ref[MAX_REF_LINE_IDX]; } else { left_border = &left_ref[px.y]; } left_stride = 1; } // Generate top-left reference if (multi_ref_index) { // Inner picture cases if (px.x == 0 && px.y == 0) { // LCU top left corner case. Multi ref will be 0. out_left_ref[0] = out_left_ref[1]; out_top_ref[0] = out_left_ref[1]; } else if (px.x == 0) { // LCU left border case uvg_pixel* top_left_corner = &extra_ref_lines[multi_ref_index * 128]; for (int i = 0; i <= multi_ref_index; ++i) { out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride]; out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index]; } } else if (px.y == 0) { // LCU top border case. Multi ref will be 0. out_left_ref[0] = top_border[-1]; out_top_ref[0] = top_border[-1]; } else { // Inner case for (int i = 0; i <= multi_ref_index; ++i) { out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride]; out_top_ref[i] = top_border[i - 1 - multi_ref_index]; } } } else { // If the block is at an LCU border, the top-left must be copied from // the border that points to the LCUs 1D reference buffer. if (px.x == 0) { out_left_ref[0] = left_border[-1 * left_stride]; out_top_ref[0] = left_border[-1 * left_stride]; } else { out_left_ref[0] = top_border[-1]; out_top_ref[0] = top_border[-1]; } } // Generate left reference. // Get the number of reference pixels based on the PU coordinate within the LCU. int px_available_left; if (isp_mode && !is_first_isp_block && !is_chroma) { if (isp_mode == ISP_MODE_VER) { px_available_left = height; } else { px_available_left = uvg_count_available_edge_cus(cu_loc, lcu, true) * 4; px_available_left -= pu_loc->y - cu_loc->y; } } else { if(!is_dual_tree) { const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } else { const int num_cus = uvg_count_chroma_tree_available_edge_cus(cu_loc->x >> 1, cu_loc->y >> 1, width, height, lcu, true); px_available_left = num_cus * 4; } } // Limit the number of available pixels based on block size and dimensions // of the picture. px_available_left = MIN(px_available_left, cu_height * 2); px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma); // Copy pixels from coded CUs. int i = multi_ref_index; // Offset by multi_ref_index // Do different loop for heights smaller than 4 (possible for some ISP splits) if (px.y % 4 != 0 || px_available_left < 4) { do { out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; i += 1; } while (i < px_available_left); } else { do { out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride]; out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride]; i += 4; } while (i < px_available_left); } // Extend the last pixel for the rest of the reference values. uvg_pixel nearest_pixel = out_left_ref[i]; // If first isp split, take samples as if it were normal square block int tmp_h = is_first_isp_block ? cu_height * 2 : (isp_mode ? cu_height + height : height * 2); for (; i < tmp_h; i += 4) { out_left_ref[i + 1] = nearest_pixel; out_left_ref[i + 2] = nearest_pixel; out_left_ref[i + 3] = nearest_pixel; out_left_ref[i + 4] = nearest_pixel; } // Extend for MRL if (multi_ref_index) { for (; i < height * 2 + multi_ref_index; ++i) { out_left_ref[i + 1] = nearest_pixel; } } // Generate top reference. // Get the number of reference pixels based on the PU coordinate within the LCU. int px_available_top; if (isp_mode && !is_first_isp_block && !is_chroma) { if (isp_mode == ISP_MODE_HOR) { px_available_top = width; } else { px_available_top = uvg_count_available_edge_cus(cu_loc, lcu, false) * 4; px_available_top -= pu_loc->x - cu_loc->x; } } else { if (!is_dual_tree) { const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, false); px_available_top = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus * 2; } else { const int num_cus = uvg_count_chroma_tree_available_edge_cus(cu_loc->x >> 1, cu_loc->y >> 1, width, height, lcu, false); px_available_top = num_cus * 4; } } // Limit the number of available pixels based on block size and dimensions // of the picture. px_available_top = MIN(px_available_top, cu_width * 2 + multi_ref_index); px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma); if (entropy_sync && px.y == 0) px_available_top = MIN(px_available_top, ((LCU_WIDTH >> is_chroma) - px.x) -1); // Copy all the pixels we can. i = 0; do { memcpy(out_top_ref + i + 1 + multi_ref_index, top_border + i, 4 * sizeof(uvg_pixel)); i += 4; } while (i < px_available_top); // Extend the last pixel for the rest of the reference values. nearest_pixel = out_top_ref[i + multi_ref_index]; // If first isp split, take samples as if it were normal square block int tmp_w = is_first_isp_block ? cu_width * 2 : (isp_mode ? cu_width + width : width * 2); for (; i < tmp_w + (multi_ref_index * 2); i += 4) { out_top_ref[i + 1 + multi_ref_index] = nearest_pixel; out_top_ref[i + 2 + multi_ref_index] = nearest_pixel; out_top_ref[i + 3 + multi_ref_index] = nearest_pixel; out_top_ref[i + 4 + multi_ref_index] = nearest_pixel; } } void uvg_intra_build_reference( const encoder_state_t* const state, const cu_loc_t* const pu_loc, const cu_loc_t* const cu_loc, const color_t color, const vector2d_t *const luma_px, const vector2d_t *const pic_px, const lcu_t *const lcu, uvg_intra_references *const refs, bool entropy_sync, uvg_pixel *extra_ref_lines, uint8_t multi_ref_idx, const uint8_t isp_mode) { assert(!(extra_ref_lines == NULL && multi_ref_idx != 0) && "Trying to use MRL with NULL extra references."); //bool first_split = color == COLOR_Y && isp_mode && pu_loc->x == cu_loc->x && pu_loc->y == cu_loc->y; //uint8_t isp = first_split ? 0 : isp_mode; // Much logic can be discarded if not on the edge if (luma_px->x > 0 && luma_px->y > 0) { uvg_intra_build_reference_inner(state, pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines, isp_mode); } else { uvg_intra_build_reference_any(state, pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines, isp_mode); } } void uvg_intra_predict( const encoder_state_t* const state, uvg_intra_references* const refs, const cu_loc_t* const cu_loc, const cu_loc_t* const pu_loc, const color_t color, uvg_pixel* dst, const intra_search_data_t* data, const lcu_t* lcu, enum uvg_tree_type tree_type ) { const int stride = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA); // TODO: what is this used for? // const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); bool use_mip = false; const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; const int x = pu_loc->x; const int y = pu_loc->y; int8_t intra_mode = color == COLOR_Y ? data->pred_cu.intra.mode : data->pred_cu.intra.mode_chroma; if (data->pred_cu.intra.mip_flag) { if (color == COLOR_Y) { use_mip = true; } else { use_mip = state->encoder_control->chroma_format == UVG_CSP_444; } } if (intra_mode < 68) { if (use_mip) { assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]"); mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed); } else { intra_predict_regular(state, refs, &data->pred_cu, cu_loc, pu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode); } } else { uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, height, stride / 2, width); if (width != 1 << data->pred_cu.log2_chroma_width || height != 1 << data->pred_cu.log2_chroma_height || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) { predict_cclm( state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst, (cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1], tree_type); } else { linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height); } } } // This function works on luma coordinates int8_t uvg_get_co_located_luma_mode( const cu_loc_t* const chroma_loc, const cu_loc_t* const cu_loc, const cu_info_t* luma_cu, const lcu_t* const lcu, const cu_array_t* const cu_array, enum uvg_tree_type tree_type) { int x = chroma_loc->x; int y = chroma_loc->y; assert((cu_array || lcu) && !(cu_array && lcu)); assert(tree_type != UVG_LUMA_T && "Luma only CU shouldn't need colocated luma CU"); if(tree_type == UVG_CHROMA_T) { x += chroma_loc->width >> 1; y += chroma_loc->height >> 1; } const cu_info_t* cu; if (lcu && cu_loc->x <= x && x < cu_loc->x + cu_loc->width && cu_loc->y <= y && y < cu_loc->y + cu_loc->height) { cu = luma_cu; } else if(cu_array) { cu = uvg_cu_array_at_const(cu_array, x, y); } else { cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y)); } if (cu->intra.mip_flag) { return 0; } return cu->intra.mode; } /** * \brief Returns ISP split partition size based on block dimensions and split type. * * Returns ISP split partition size based on block dimensions and split type. * Will fail if resulting partition size has less than 16 samples. * * \param width Block width. * \param height Block height. * \param split_type Horizontal or vertical split. */ int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_split) { assert(split_type != ISP_MODE_NO_ISP && "Cannot calculate split dimension if no split type is set. Make sure this function is not called in this case."); bool divide_in_rows = split_type == SPLIT_TYPE_HOR; int split_dim_size, non_split_dim_size, partition_size, div_shift = 2; if (divide_in_rows) { split_dim_size = height; non_split_dim_size = width; } else { split_dim_size = width; non_split_dim_size = height; } const int min_num_samples = 16; // Minimum allowed number of samples for split block const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1; partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift); // Minimum width for ISP splits are 4. (JVET-T2001 chapter 8.4.5.1 equation 246: nPbW = Max(4, nW)) // Except this does not apply for transform blocks for some reason. VTM does seem to expect 4 transform blocks even if only two pred blocks were used // Height can be 2. if (!divide_in_rows && !is_transform_split) { partition_size = MAX(4, partition_size); } assert((uvg_math_floor_log2(partition_size) + uvg_math_floor_log2(non_split_dim_size) >= uvg_math_floor_log2(min_num_samples)) && "Partition has less than allowed minimum number of samples."); return partition_size; } int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_split) { assert((split_type != ISP_MODE_NO_ISP) && "This function cannot be called if ISP mode is 0."); int split_dim = uvg_get_isp_split_dim(width, height, split_type, is_transform_split); int num = split_type == ISP_MODE_HOR ? height / split_dim : width / split_dim; return num; } void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_split) { // Check for illegal splits assert(!(block_w == 4 && block_h == 4) || split_idx == 0 && "Trying to get ISP split CU when split is not allowed."); assert(!((block_w * block_h) <= 16) || split_idx < 2 && "Split index for small blocks must be in [0, 1]"); assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3]."); assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP."); int part_dim = block_w; if (split_type != ISP_MODE_NO_ISP) { part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type, is_transform_split); } if(split_type == ISP_MODE_VER && block_w < 16 && block_h != 4 && !is_transform_split) { split_idx /= 2; } const int offset = part_dim * split_idx; const int part_x = split_type == ISP_MODE_HOR ? x : x + offset; const int part_y = split_type == ISP_MODE_HOR ? y + offset : y; const int part_w = split_type == ISP_MODE_HOR ? block_w : part_dim; const int part_h = split_type == ISP_MODE_HOR ? part_dim : block_h; uvg_cu_loc_ctor(loc, part_x, part_y, part_w, part_h); } static void intra_recon_tb_leaf( encoder_state_t* const state, const cu_loc_t* pu_loc, const cu_loc_t* cu_loc, lcu_t *lcu, color_t color, const intra_search_data_t* search_data, enum uvg_tree_type tree_type) { const uvg_config *cfg = &state->encoder_control->cfg; const int shift = color == COLOR_Y ? 0 : 1; const int x = pu_loc->x; const int y = pu_loc->y; const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width; const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height; const int lcu_width = LCU_WIDTH >> shift; const vector2d_t luma_px = { x, y }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height, }; int x_scu = SUB_SCU(x); int y_scu = SUB_SCU(y); const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift }; uint8_t multi_ref_index = color == COLOR_Y ? search_data->pred_cu.intra.multi_ref_idx: 0; uint8_t isp_mode = color == COLOR_Y ? search_data->pred_cu.intra.isp_mode : 0; uvg_intra_references refs; // Extra reference lines for use with MRL. Extra lines needed only for left edge. uvg_pixel extra_refs[128 * MAX_REF_LINE_IDX] = { 0 }; if (luma_px.x > 0 && lcu_px.x == 0 && lcu_px.y > 0 && multi_ref_index != 0) { videoframe_t* const frame = state->tile->frame; // Copy extra ref lines, including ref line 1 and top left corner. for (int i = 0; i < MAX_REF_LINE_IDX; ++i) { int ref_height = height * 2 + MAX_REF_LINE_IDX; ref_height = MIN(ref_height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist. ref_height = MIN(ref_height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX); uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)], &extra_refs[i * 128], 1, ref_height, frame->rec->stride, 1); } } uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode); uvg_pixel pred[32 * 32]; uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu, tree_type); const int index = lcu_px.x + lcu_px.y * lcu_width; uvg_pixel *block = NULL; uvg_pixel *block2 = NULL; switch (color) { case COLOR_Y: block = &lcu->rec.y[index]; break; case COLOR_U: block = &lcu->rec.u[index]; block2 = &lcu->rec.joint_u[index]; break; case COLOR_V: block = &lcu->rec.v[index]; block2 = &lcu->rec.joint_v[index]; break; default: break; } uvg_pixels_blit(pred, block , width, height, width, lcu_width); if(color != COLOR_Y && cfg->jccr) { uvg_pixels_blit(pred, block2, width, height, width, lcu_width); } } /** * \brief Reconstruct an intra CU * * \param state encoder state * \param x x-coordinate of the CU in luma pixels * \param y y-coordinate of the CU in luma pixels * \param depth depth in the CU tree * \param mode_luma intra mode for luma, or -1 to skip luma recon * \param mode_chroma intra mode for chroma, or -1 to skip chroma recon * \param cur_cu pointer to the CU, or NULL to fetch CU from LCU * \param cclm_params pointer for the cclm_parameters, can be NULL if the mode is not cclm mode * \param mip_flag indicates whether the passed mode_luma is a MIP mode * \param mip_transp indicates whether the used MIP mode is transposed * \param lcu containing LCU */ void uvg_intra_recon_cu( encoder_state_t* const state, intra_search_data_t* search_data, const cu_loc_t* cu_loc, cu_info_t *cur_cu, lcu_t *lcu, enum uvg_tree_type tree_type, bool recon_luma, bool recon_chroma) { const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; const vector2d_t lcu_px = { cu_loc->local_x >> (tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I), cu_loc->local_y >> (tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I), }; const int8_t width = cu_loc->width; const int8_t height = cu_loc->height; if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } // Reset CBFs because CBFs might have been set // for depth earlier if (recon_luma) { cbf_clear(&cur_cu->cbf, COLOR_Y); } if (recon_chroma) { cbf_clear(&cur_cu->cbf, COLOR_U); cbf_clear(&cur_cu->cbf, COLOR_V); } if (width > TR_MAX_WIDTH || height > TR_MAX_WIDTH) { enum split_type split; if (cu_loc->width > TR_MAX_WIDTH && cu_loc->height > TR_MAX_WIDTH) { split = QT_SPLIT; } else if (cu_loc->width > TR_MAX_WIDTH) { split = BT_VER_SPLIT; } else { split = BT_HOR_SPLIT; } cu_loc_t split_cu_loc[4]; const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); for (int i = 0; i < split_count; ++i) { uvg_intra_recon_cu( state, search_data, &split_cu_loc[i], NULL, lcu, state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I ? tree_type : UVG_BOTH_T, recon_luma, recon_chroma); } return; } if (search_data->pred_cu.intra.isp_mode != ISP_MODE_NO_ISP && recon_luma ) { search_data->best_isp_cbfs = 0; // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; int split_limit = uvg_get_isp_split_num(width, height, split_type, true); for (int i = 0; i < split_limit; ++i) { cu_loc_t tu_loc; uvg_get_isp_split_loc(&tu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true); cu_loc_t pu_loc; uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false); cur_cu->intra.isp_index = 0; if(tu_loc.x % 4 == 0) { intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type); } uvg_quantize_lcu_residual(state, true, false, false, &tu_loc, cur_cu, lcu, false, tree_type); search_data->best_isp_cbfs |= cbf_is_set(cur_cu->cbf, COLOR_Y) << i; cur_cu->intra.isp_cbfs = search_data->best_isp_cbfs; } } const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP; const bool has_chroma = recon_chroma; // Process a leaf TU. if (has_luma) { intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type); } if (has_chroma) { intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data, tree_type); intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data, tree_type); } // TODO: not necessary to call if only luma and ISP is on uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3), search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma, cu_loc, cur_cu, lcu, false, tree_type); } /** * \brief Check if ISP can be used for block size. * * \return True if isp can be used. * \param width Block width. * \param height Block height. * \param max_tr_size Maximum supported transform block size (64). */ bool uvg_can_use_isp(const int width, const int height) { assert(!(width > LCU_WIDTH || height > LCU_WIDTH) && "Block size larger than max LCU size."); assert(!(width < TR_MIN_WIDTH || height < TR_MIN_WIDTH) && "Block size smaller than min TR_WIDTH."); const int log2_width = uvg_g_convert_to_log2[width]; const int log2_height = uvg_g_convert_to_log2[height]; // Each split block must have at least 16 samples. bool not_enough_samples = (log2_width + log2_height <= 4); bool cu_size_larger_than_max_tr_size = width > TR_MAX_WIDTH || height > TR_MAX_WIDTH; if (not_enough_samples || cu_size_larger_than_max_tr_size) { return false; } return true; } /** * \brief Check if given ISP mode can be used with LFNST. * * \return True if isp can be used. * \param width Block width. * \param height Block height. * \param isp_mode ISP mode. * \param tree_type Tree type. Dual, luma or chroma tree. */ bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp_split_type, const enum uvg_tree_type tree_type) { if (tree_type == UVG_CHROMA_T) { return false; } if (isp_split_type == ISP_MODE_NO_ISP) { return true; } const int tu_width = (isp_split_type == ISP_MODE_HOR) ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER, true); const int tu_height = (isp_split_type == ISP_MODE_HOR) ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR, true) : height; if (!(tu_width >= TR_MIN_WIDTH && tu_height >= TR_MIN_WIDTH)) { return false; } return true; } double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, const cu_loc_t* const cu_loc, double cost_treshold, intra_search_data_t* const search_data, lcu_t* const lcu) { assert(state->search_cabac.update && "ISP reconstruction must be done with CABAC update"); double cost = 0; const int width = cu_loc->width; const int height = cu_loc->height; search_data->best_isp_cbfs = 0; // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. // Small blocks are split only twice. int split_type = search_data->pred_cu.intra.isp_mode; int split_limit = uvg_get_isp_split_num(width, height, split_type, true); int cbf_context = 2; for (int i = 0; i < split_limit; ++i) { cu_loc_t tu_loc; uvg_get_isp_split_loc(&tu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true); cu_loc_t pu_loc; uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false); search_data->pred_cu.intra.isp_index = 0; if (tu_loc.x % 4 == 0) { intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, UVG_LUMA_T); } uvg_quantize_lcu_residual(state, true, false, false, &tu_loc, &search_data->pred_cu, lcu, false, UVG_LUMA_T); int index = tu_loc.local_y * LCU_WIDTH + tu_loc.local_x; int ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], LCU_WIDTH, LCU_WIDTH, tu_loc.width, tu_loc.height); double coeff_bits = uvg_get_coeff_cost(state, lcu->coeff.y, NULL, &tu_loc, 0, SCAN_DIAG, false, COEFF_ORDER_CU); int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y); if (i + 1 != split_limit || search_data->best_isp_cbfs != 1 << (split_limit - 1)) { CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon"); } cost += ssd + coeff_bits * state->lambda; cbf_context = 2 + cbf; search_data->best_isp_cbfs |= cbf << i; search_data->pred_cu.intra.isp_cbfs = search_data->best_isp_cbfs; } return cost; }