From ef5a205faa42cbed59b7240ae76d9214171bf107 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Fri, 12 Nov 2021 13:11:54 +0200 Subject: [PATCH 01/19] [cclm] WIP: initial implementation of the cclm parameter calculation function --- src/intra.c | 238 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/src/intra.c b/src/intra.c index a582d3e1..69ebaf15 100644 --- a/src/intra.c +++ b/src/intra.c @@ -248,6 +248,244 @@ static void intra_pred_dc( } +enum lm_mode +{ + LM_CHROMA_IDX = 67, + LM_CHROMA_L_IDX = 68, + LM_CHROMA_T_IDX = 69, +}; + + +static void get_cclm_parameters( + encoder_state_t const* const state, + int8_t width, int8_t height, int8_t mode, + int x0, int y0, + kvz_pixel * luma_src, kvz_pixel *chroma_ref, + int *a, int *b, int *shift) { + + const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX); + + // TODO: take into account YUV422 + const int unit_w = base_unit_size >> 1; + const int unit_h = base_unit_size >> 1; + + const int tu_width_in_units = width / unit_w; + const int tu_height_in_units = height / unit_h; + + const int c_height = height / 2; + const int c_width = width / 2; + + int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H; + int left_template_samp_num = height; + + int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w; + int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h; + int total_units = total_left_units + total_above_units + 1; + int above_right_units = total_above_units - tu_width_in_units; + int left_below_units = total_left_units - tu_height_in_units; + int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs + int avai_left_below_units = 0; + int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size); + int avai_left_units = CLIP(0, tu_width_in_units, x0 / base_unit_size); + + bool above_available = avai_above_units != 0; + bool left_available = avai_left_units != 0; + // Not sure if LCU_CU_WIDTH is correct macro here, + // should be 16 for 64 CTU width 32 for 128 + + int min_luma[2] = { MAX_INT, 0 }; + int max_luma[2] = { -MAX_INT, 0 }; + + kvz_pixel *src_color0 = luma_src; + kvz_pixel* cur_chroma0 = chroma_ref; + + char internal_bit_depth = state->encoder_control->bitdepth; + + int minLuma[2] = { MAX_INT, 0 }; + int maxLuma[2] = { -MAX_INT, 0 }; + + int32_t src_stride = state->tile->frame->source->stride; + kvz_pixel* src = src_color0 - src_stride; + int actualTopTemplateSampNum = 0; + int actualLeftTemplateSampNum = 0; + if (mode == LM_CHROMA_T_IDX) + { + left_available = 0; + avai_above_right_units = avai_above_right_units > (c_height / unit_w) ? c_height / unit_w : avai_above_right_units; + actualTopTemplateSampNum = unit_w * (avai_above_units + avai_above_right_units); + } + else if (mode == LM_CHROMA_L_IDX) + { + above_available = 0; + avai_left_below_units = avai_left_below_units > (c_width / unit_h) ? c_width / unit_h : avai_left_below_units; + actualLeftTemplateSampNum = unit_h * (avai_left_units + avai_left_below_units); + } + else if (mode == LM_CHROMA_IDX) + { + actualTopTemplateSampNum = c_width; + actualLeftTemplateSampNum = c_height; + } + int startPos[2]; //0:Above, 1: Left + int pickStep[2]; + + int aboveIs4 = left_available ? 0 : 1; + int leftIs4 = above_available ? 0 : 1; + + startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4); + pickStep[0] = MAX(1, actualTopTemplateSampNum >> (1 + aboveIs4)); + + startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4); + pickStep[1] = MAX(1, actualLeftTemplateSampNum >> (1 + leftIs4)); + + kvz_pixel selectLumaPix[4] = { 0, 0, 0, 0 }; + kvz_pixel selectChromaPix[4] = { 0, 0, 0, 0 }; + + int cntT, cntL; + cntT = cntL = 0; + int cnt = 0; + if (above_available) + { + cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1); + src = src_color0 - src_stride; + const kvz_pixel* cur = cur_chroma0 + 1; + for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++) + { + selectLumaPix[cnt] = src[pos]; + selectChromaPix[cnt] = cur[pos]; + } + } + + if (left_available) + { + cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1); + src = src_color0 - 1; + const kvz_pixel* cur = cur_chroma0 + src_stride/2 + 1; + for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++) + { + selectLumaPix[cnt + cntT] = src[pos * src_stride]; + selectChromaPix[cnt + cntT] = cur[pos]; + } + } + cnt = cntL + cntT; + + if (cnt == 2) + { + selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0]; + selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1]; + selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1]; + selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3]; + } + + int minGrpIdx[2] = { 0, 2 }; + int maxGrpIdx[2] = { 1, 3 }; + int* tmpMinGrp = minGrpIdx; + int* tmpMaxGrp = maxGrpIdx; + if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) + { + SWAP(tmpMinGrp[0], tmpMinGrp[1], int); + } + if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) + { + SWAP(tmpMaxGrp[0], tmpMaxGrp[1], int); + } + if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) + { + SWAP(tmpMinGrp, tmpMaxGrp, int); + } + if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) + { + SWAP(tmpMinGrp[1], tmpMaxGrp[0], int); + } + + minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1; + minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1; + maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1; + maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1; + + if (left_available || above_available) + { + int diff = maxLuma[0] - minLuma[0]; + if (diff > 0) + { + int diffC = maxLuma[1] - minLuma[1]; + int x = kvz_math_floor_log2(diff); + static const uint8_t DivSigTable[1 << 4] = { + // 4bit significands - 8 ( MSB is omitted ) + 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 + }; + int normDiff = (diff << 4 >> x) & 15; + int v = DivSigTable[normDiff] | 8; + x += normDiff != 0; + + int y = kvz_math_floor_log2(abs(diffC)) + 1; + int add = 1 << y >> 1; + *a = (diffC * v + add) >> y; + *shift = 3 + x - y; + if (*shift < 1) + { + *shift = 1; + *a = ((*a == 0) ? 0 : (*a < 0) ? -15 : 15); // a=Sign(a)*15 + } + *b = minLuma[1] - ((*a * minLuma[0]) >> *shift); + } + else + { + *a = 0; + *b = minLuma[1]; + *shift = 0; + } + } + else + { + *a = 0; + + *b = 1 << (internal_bit_depth - 1); + + *shift = 0; + } +} + +static void linear_transform_cclm(int a, int b, int shift, kvz_pixel * dst) { + +} + + +void kvz_predict_cclm( + encoder_state_t const* const state, + const color_t color, + const int8_t width, + const int8_t height, + const int16_t x0, + const int16_t y0, + const int16_t stride, + const int8_t mode, + kvz_pixel* const y_rec, + kvz_pixel* dst +) +{ + assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); + + kvz_pixel sampled_luma[(LCU_WIDTH_C+1)*(LCU_WIDTH_C+1)]; + + for (int y = MAX(0, y0 -1); y < y0 + height; y++) { + for (int x = MAX(0, x0 - 1); x < x0 + width; x++) { + int s = 4; + s += y_rec[2 * x] * 2; + s += y_rec[2 * x + 1]; + s += y_rec[2 * x - (x + x0 > 0)]; + s += y_rec[2 * x + stride] * 2; + s += y_rec[2 * x + 1 + stride]; + s += y_rec[2 * x - (x + x0 > 0) + stride]; + sampled_luma[x + 1 + (y + 1) * 32] = s >> 3; + } + y += stride; + } + + int a, b, shift; + get_cclm_parameters(state, width, height, mode,x0, y0, state->tile->frame->rec->y, state->tile->frame->source->u, &a, &b, &shift); + linear_transform_cclm(a, b, shift, dst); +} + void kvz_intra_predict( encoder_state_t *const state, kvz_intra_references *refs, From 93c02644c87d0e10cc939a4a1290fb98c8e81650 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 16 Nov 2021 07:10:31 +0200 Subject: [PATCH 02/19] [cclm] WIP fix parameter calculation and add calling to the functions --- src/intra.c | 110 ++++++++++++++++++++++++++++----------------- src/intra.h | 14 ++++++ src/search_intra.c | 48 ++++++++++++++------ 3 files changed, 117 insertions(+), 55 deletions(-) diff --git a/src/intra.c b/src/intra.c index 69ebaf15..2393b344 100644 --- a/src/intra.c +++ b/src/intra.c @@ -260,7 +260,7 @@ static void get_cclm_parameters( encoder_state_t const* const state, int8_t width, int8_t height, int8_t mode, int x0, int y0, - kvz_pixel * luma_src, kvz_pixel *chroma_ref, + kvz_intra_ref* luma_src, kvz_intra_references*chroma_ref, int *a, int *b, int *shift) { const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX); @@ -269,11 +269,14 @@ static void get_cclm_parameters( const int unit_w = base_unit_size >> 1; const int unit_h = base_unit_size >> 1; + const int c_height = height; + const int c_width = width; + height *= 2; + width *= 2; + const int tu_width_in_units = width / unit_w; const int tu_height_in_units = height / unit_h; - const int c_height = height / 2; - const int c_width = width / 2; int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H; int left_template_samp_num = height; @@ -292,20 +295,13 @@ static void get_cclm_parameters( bool left_available = avai_left_units != 0; // Not sure if LCU_CU_WIDTH is correct macro here, // should be 16 for 64 CTU width 32 for 128 + + char internal_bit_depth = state->encoder_control->bitdepth; int min_luma[2] = { MAX_INT, 0 }; int max_luma[2] = { -MAX_INT, 0 }; - - kvz_pixel *src_color0 = luma_src; - kvz_pixel* cur_chroma0 = chroma_ref; - - char internal_bit_depth = state->encoder_control->bitdepth; - - int minLuma[2] = { MAX_INT, 0 }; - int maxLuma[2] = { -MAX_INT, 0 }; - - int32_t src_stride = state->tile->frame->source->stride; - kvz_pixel* src = src_color0 - src_stride; + + kvz_pixel* src; int actualTopTemplateSampNum = 0; int actualLeftTemplateSampNum = 0; if (mode == LM_CHROMA_T_IDX) @@ -346,8 +342,8 @@ static void get_cclm_parameters( if (above_available) { cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1); - src = src_color0 - src_stride; - const kvz_pixel* cur = cur_chroma0 + 1; + src = luma_src->top + 1; + const kvz_pixel* cur = chroma_ref->ref.top + 1; for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++) { selectLumaPix[cnt] = src[pos]; @@ -358,11 +354,11 @@ static void get_cclm_parameters( if (left_available) { cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1); - src = src_color0 - 1; - const kvz_pixel* cur = cur_chroma0 + src_stride/2 + 1; + src = luma_src->left + 1; + const kvz_pixel* cur = chroma_ref->ref.left + 1; for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++) { - selectLumaPix[cnt + cntT] = src[pos * src_stride]; + selectLumaPix[cnt + cntT] = src[pos]; selectChromaPix[cnt + cntT] = cur[pos]; } } @@ -390,24 +386,24 @@ static void get_cclm_parameters( } if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) { - SWAP(tmpMinGrp, tmpMaxGrp, int); + SWAP(tmpMinGrp, tmpMaxGrp, int*); } if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) { SWAP(tmpMinGrp[1], tmpMaxGrp[0], int); } - minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1; - minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1; - maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1; - maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1; + min_luma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1; + min_luma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1; + max_luma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1; + max_luma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1; if (left_available || above_available) { - int diff = maxLuma[0] - minLuma[0]; + int diff = max_luma[0] - min_luma[0]; if (diff > 0) { - int diffC = maxLuma[1] - minLuma[1]; + int diffC = max_luma[1] - min_luma[1]; int x = kvz_math_floor_log2(diff); static const uint8_t DivSigTable[1 << 4] = { // 4bit significands - 8 ( MSB is omitted ) @@ -426,12 +422,12 @@ static void get_cclm_parameters( *shift = 1; *a = ((*a == 0) ? 0 : (*a < 0) ? -15 : 15); // a=Sign(a)*15 } - *b = minLuma[1] - ((*a * minLuma[0]) >> *shift); + *b = min_luma[1] - ((*a * min_luma[0]) >> *shift); } else { *a = 0; - *b = minLuma[1]; + *b = min_luma[1]; *shift = 0; } } @@ -459,30 +455,62 @@ void kvz_predict_cclm( const int16_t y0, const int16_t stride, const int8_t mode, - kvz_pixel* const y_rec, + kvz_pixel const * y_rec, + kvz_intra_references* chroma_ref, kvz_pixel* dst ) { assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); - kvz_pixel sampled_luma[(LCU_WIDTH_C+1)*(LCU_WIDTH_C+1)]; + + kvz_intra_ref sampled_luma; - for (int y = MAX(0, y0 -1); y < y0 + height; y++) { - for (int x = MAX(0, x0 - 1); x < x0 + width; x++) { + int x_scu = SUB_SCU(x0); + int y_scu = SUB_SCU(y0); + + if(x0) { + for(int y = 0; y < height * 2; y+=2) { int s = 4; - s += y_rec[2 * x] * 2; - s += y_rec[2 * x + 1]; - s += y_rec[2 * x - (x + x0 > 0)]; - s += y_rec[2 * x + stride] * 2; - s += y_rec[2 * x + 1 + stride]; - s += y_rec[2 * x - (x + x0 > 0) + stride]; - sampled_luma[x + 1 + (y + 1) * 32] = s >> 3; + s += x_scu ? y_rec[y * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride] * 2; + s += x_scu ? y_rec[y * LCU_WIDTH - 2]: state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride]; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] * 2: state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride] * 2; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2]: state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride]; + s += y_rec[y * LCU_WIDTH]; + s += y_rec[(y + 1) * LCU_WIDTH]; + sampled_luma.left[y/2] = s >> 3; } - y += stride; } + if(y0) { + for(int x = 0; x < width*2; x += 2) { + bool left_padding = x0 || x; + int s = 4; + s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 2) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 1) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; + sampled_luma.top[x / 2] = s >> 3; + } + } + + //for (int y = MAX(0, y0 -1) % 64; y < y0 + height; y++) { + // for (int x = MAX(0, x0 - 1) % 64; x < x0 + width; x++) { + // int s = 4; + // s += y_rec[2 * x] * 2; + // s += y_rec[2 * x + 1]; + // s += y_rec[2 * x - (x + x0 > 0)]; + // s += y_rec[2 * x + stride] * 2; + // s += y_rec[2 * x + 1 + stride]; + // s += y_rec[2 * x - (x + x0 > 0) + stride]; + // sampled_luma[x + 1 + (y + 1) * 33] = s >> 3; + // } + // y_rec += 64; + //} + int a, b, shift; - get_cclm_parameters(state, width, height, mode,x0, y0, state->tile->frame->rec->y, state->tile->frame->source->u, &a, &b, &shift); + get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma, chroma_ref, &a, &b, &shift); linear_transform_cclm(a, b, shift, dst); } diff --git a/src/intra.h b/src/intra.h index e69621c4..5652ec41 100644 --- a/src/intra.h +++ b/src/intra.h @@ -120,3 +120,17 @@ void kvz_intra_recon_cu( cu_info_t *cur_cu, lcu_t *lcu); + +void kvz_predict_cclm( + encoder_state_t const* const state, + const color_t color, + const int8_t width, + const int8_t height, + const int16_t x0, + const int16_t y0, + const int16_t stride, + const int8_t mode, + kvz_pixel const* y_rec, + kvz_intra_references* chroma_ref, + kvz_pixel* dst +); \ No newline at end of file diff --git a/src/search_intra.c b/src/search_intra.c index 206a26a1..4960f6cf 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -461,7 +461,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2); - for (int i = 0; i < 5; ++i) { + for (int i = 0; i < 8; ++i) { costs[i] = 0; } @@ -476,15 +476,20 @@ static void search_intra_chroma_rough(encoder_state_t * const state, kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { - if (modes[i] == luma_mode) continue; + if (modes[i] == -1) continue; kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } + for (int i = 5; i < 8; i++) { + kvz_predict_cclm( + state, + COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], state->tile->frame->rec->y, refs_u, _pred); + } kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { - if (modes[i] == luma_mode) continue; + if (modes[i] == -1) continue; kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); @@ -820,11 +825,20 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, int x_px, int y_px, int depth, int8_t intra_mode, - int8_t modes[5], int8_t num_modes, + int8_t modes[8], int8_t num_modes, lcu_t *const lcu) { const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4); + + kvz_intra_references refs; + const vector2d_t luma_px = { x_px, y_px }; + const vector2d_t pic_px = { + state->tile->frame->width, + state->tile->frame->height, + }; + kvz_intra_build_reference(6-depth, COLOR_U, &luma_px, &pic_px, lcu, &refs, state->encoder_control->cfg.wpp); + if (reconstruct_chroma) { const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); @@ -839,12 +853,18 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) { chroma.mode = modes[chroma_mode_i]; - - kvz_intra_recon_cu(state, - x_px, y_px, - depth, - -1, chroma.mode, // skip luma - NULL, lcu); + if (chroma.mode == -1) continue; + if(chroma.mode < 67) { + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + -1, chroma.mode, // skip luma + NULL, lcu); + } + else { + kvz_predict_cclm( + state, COLOR_U, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs, NULL); + } chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); @@ -871,8 +891,8 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); int8_t intra_mode = cur_pu->intra.mode; - double costs[5]; - int8_t modes[5] = { 0, 50, 18, 1, 67 }; + double costs[8]; + int8_t modes[8] = { 0, 50, 18, 1, -1, 67, 68, 69 }; if (intra_mode != 0 && intra_mode != 50 && intra_mode != 18 && intra_mode != 1) { modes[4] = intra_mode; } @@ -885,13 +905,13 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, int num_modes = modes_in_depth[depth]; if (state->encoder_control->cfg.rdo == 3) { - num_modes = modes[4] == intra_mode ? 5 : 4; + num_modes = 8; } // Don't do rough mode search if all modes are selected. // FIXME: It might make more sense to only disable rough search if // num_modes is 0.is 0. - if (num_modes != 1 && num_modes != 5 && num_modes != 4) { + if (num_modes != 1 && num_modes != 5 && num_modes != 4 && num_modes != 8) { const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2); const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; const vector2d_t luma_px = { x_px, y_px }; From 4e8c9043a1fbed71a289d75f716506dabcb7d054 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 16 Nov 2021 08:31:32 +0200 Subject: [PATCH 03/19] [cclm] CCLM parameter calculation *should* work correctly --- src/intra.c | 9 +++++---- src/search_intra.c | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/intra.c b/src/intra.c index 2393b344..e1ffe692 100644 --- a/src/intra.c +++ b/src/intra.c @@ -274,8 +274,8 @@ static void get_cclm_parameters( height *= 2; width *= 2; - const int tu_width_in_units = width / unit_w; - const int tu_height_in_units = height / unit_h; + const int tu_width_in_units = c_width / unit_w; + const int tu_height_in_units = c_height / unit_h; int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H; @@ -342,7 +342,7 @@ static void get_cclm_parameters( if (above_available) { cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1); - src = luma_src->top + 1; + src = luma_src->top; const kvz_pixel* cur = chroma_ref->ref.top + 1; for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++) { @@ -354,7 +354,7 @@ static void get_cclm_parameters( if (left_available) { cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1); - src = luma_src->left + 1; + src = luma_src->left; const kvz_pixel* cur = chroma_ref->ref.left + 1; for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++) { @@ -467,6 +467,7 @@ void kvz_predict_cclm( int x_scu = SUB_SCU(x0); int y_scu = SUB_SCU(y0); + y_rec += x_scu + y_scu * LCU_WIDTH; if(x0) { for(int y = 0; y < height * 2; y+=2) { diff --git a/src/search_intra.c b/src/search_intra.c index 4960f6cf..52b654bd 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -454,7 +454,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride, kvz_intra_references *refs_u, kvz_intra_references *refs_v, int8_t luma_mode, - int8_t modes[5], double costs[5]) + int8_t modes[8], double costs[8], lcu_t* lcu) { assert(!(x_px & 4 || y_px & 4)); @@ -484,7 +484,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, for (int i = 5; i < 8; i++) { kvz_predict_cclm( state, - COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], state->tile->frame->rec->y, refs_u, _pred); + COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu->rec.y, refs_u, _pred); } kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); @@ -929,7 +929,7 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, search_intra_chroma_rough(state, x_px, y_px, depth, ref_u, ref_v, LCU_WIDTH_C, &refs_u, &refs_v, - intra_mode, modes, costs); + intra_mode, modes, costs, lcu); } int8_t intra_mode_chroma = intra_mode; From 677bf1edcb974250ca3fdbc4515db45dbe8535a7 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 16 Nov 2021 09:31:47 +0200 Subject: [PATCH 04/19] [cclm] linear transform --- src/intra.c | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/src/intra.c b/src/intra.c index e1ffe692..32d1750c 100644 --- a/src/intra.c +++ b/src/intra.c @@ -441,8 +441,16 @@ static void get_cclm_parameters( } } -static void linear_transform_cclm(int a, int b, int shift, kvz_pixel * dst) { - +static void linear_transform_cclm(int scale, int shift, int offset, kvz_pixel * src, kvz_pixel * dst, int stride, int height) { + for (int y = 0; y < height; ++y) { + for (int x=0; x < stride; ++x) { + int val = src[x + y * stride] * scale; + val >>= shift; + val += offset; + val = CLIP_TO_PIXEL(val); + dst[x + y * stride] = val; + } + } } @@ -463,7 +471,8 @@ void kvz_predict_cclm( assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); - kvz_intra_ref sampled_luma; + kvz_intra_ref sampled_luma_ref; + kvz_pixel sampled_luma[LCU_CHROMA_SIZE]; int x_scu = SUB_SCU(x0); int y_scu = SUB_SCU(y0); @@ -478,7 +487,7 @@ void kvz_predict_cclm( s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2]: state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride]; s += y_rec[y * LCU_WIDTH]; s += y_rec[(y + 1) * LCU_WIDTH]; - sampled_luma.left[y/2] = s >> 3; + sampled_luma_ref.left[y/2] = s >> 3; } } @@ -492,27 +501,27 @@ void kvz_predict_cclm( s += y_scu ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; - sampled_luma.top[x / 2] = s >> 3; + sampled_luma_ref.top[x / 2] = s >> 3; } } - //for (int y = MAX(0, y0 -1) % 64; y < y0 + height; y++) { - // for (int x = MAX(0, x0 - 1) % 64; x < x0 + width; x++) { - // int s = 4; - // s += y_rec[2 * x] * 2; - // s += y_rec[2 * x + 1]; - // s += y_rec[2 * x - (x + x0 > 0)]; - // s += y_rec[2 * x + stride] * 2; - // s += y_rec[2 * x + 1 + stride]; - // s += y_rec[2 * x - (x + x0 > 0) + stride]; - // sampled_luma[x + 1 + (y + 1) * 33] = s >> 3; - // } - // y_rec += 64; - //} + for (int y = 0; y < height * 2; y+=2) { + for (int x = 0; x < width * 2; x+=2) { + int s = 4; + s += y_rec[2 * x] * 2; + s += y_rec[2 * x + 1]; + s += x0 && !x ? state->tile->frame->rec->y[x0 - 1 + y0 * stride] : y_rec[2 * x - (x + x0 > 0)]; + s += y_rec[2 * x + LCU_WIDTH] * 2; + s += y_rec[2 * x + 1 + LCU_WIDTH]; + s += x0 && !x ? state->tile->frame->rec->y[x0 - 1 + (y0 + 1) * stride] : y_rec[2 * x - (x + x0 > 0) + stride]; + sampled_luma[x + y * width] = s >> 3; + } + y_rec += LCU_WIDTH; + } int a, b, shift; - get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma, chroma_ref, &a, &b, &shift); - linear_transform_cclm(a, b, shift, dst); + get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma_ref, chroma_ref, &a, &b, &shift); + linear_transform_cclm(a, shift, b,sampled_luma, dst, width, height); } void kvz_intra_predict( From ecc55c9edf97e9a2be48fff68fa4f4fd579d3389 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 16 Nov 2021 14:21:38 +0200 Subject: [PATCH 05/19] [cclm] align ref pixel generation code and add comments --- src/intra.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/intra.c b/src/intra.c index 32d1750c..be85b3d3 100644 --- a/src/intra.c +++ b/src/intra.c @@ -281,11 +281,12 @@ static void get_cclm_parameters( int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H; int left_template_samp_num = height; - int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w; - int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h; - int total_units = total_left_units + total_above_units + 1; - int above_right_units = total_above_units - tu_width_in_units; - int left_below_units = total_left_units - tu_height_in_units; + // These are used for calculating some stuff for non-square CUs + //int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w; + //int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h; + //int total_units = total_left_units + total_above_units + 1; + //int above_right_units = total_above_units - tu_width_in_units; + //int left_below_units = total_left_units - tu_height_in_units; int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs int avai_left_below_units = 0; int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size); @@ -293,8 +294,6 @@ static void get_cclm_parameters( bool above_available = avai_above_units != 0; bool left_available = avai_left_units != 0; - // Not sure if LCU_CU_WIDTH is correct macro here, - // should be 16 for 64 CTU width 32 for 128 char internal_bit_depth = state->encoder_control->bitdepth; @@ -478,15 +477,20 @@ void kvz_predict_cclm( int y_scu = SUB_SCU(y0); y_rec += x_scu + y_scu * LCU_WIDTH; + // Essentially what this does is that it uses 6-tap filtering to downsample + // the luma intra references down to match the resolution of the chroma channel. + // The luma reference is only needed when we are not on the edge of the picture. + // Because the reference pixels that are needed on the edge of the ctu this code + // is kinda messy but what can you do if(x0) { for(int y = 0; y < height * 2; y+=2) { int s = 4; - s += x_scu ? y_rec[y * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride] * 2; - s += x_scu ? y_rec[y * LCU_WIDTH - 2]: state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride]; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] * 2: state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride] * 2; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2]: state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride]; - s += y_rec[y * LCU_WIDTH]; - s += y_rec[(y + 1) * LCU_WIDTH]; + s += x_scu ? y_rec[y * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride] * 2; + s += x_scu ? y_rec[y * LCU_WIDTH - 2] : state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride]; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride] * 2; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2] : state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride]; + s += y_rec[y * LCU_WIDTH]; + s += y_rec[(y + 1) * LCU_WIDTH]; sampled_luma_ref.left[y/2] = s >> 3; } } @@ -495,16 +499,18 @@ void kvz_predict_cclm( for(int x = 0; x < width*2; x += 2) { bool left_padding = x0 || x; int s = 4; - s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 2) * stride] * 2; - s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 1) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 2) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 1) * stride] * 2; s += y_scu ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; sampled_luma_ref.top[x / 2] = s >> 3; } } + // Downsample the reconstructed luma sample so that they can be mapped into the chroma + // to generate the chroma prediction for (int y = 0; y < height * 2; y+=2) { for (int x = 0; x < width * 2; x+=2) { int s = 4; @@ -521,6 +527,7 @@ void kvz_predict_cclm( int a, b, shift; get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma_ref, chroma_ref, &a, &b, &shift); + linear_transform_cclm(a, shift, b,sampled_luma, dst, width, height); } From 91877ef2c4ac9c24a67c94ab53787938c84aaca2 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Tue, 16 Nov 2021 15:02:19 +0200 Subject: [PATCH 06/19] [cclm] fix condition for selecting the data from frame buffer and not ctu --- src/intra.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/intra.c b/src/intra.c index be85b3d3..2cde088b 100644 --- a/src/intra.c +++ b/src/intra.c @@ -516,10 +516,13 @@ void kvz_predict_cclm( int s = 4; s += y_rec[2 * x] * 2; s += y_rec[2 * x + 1]; - s += x0 && !x ? state->tile->frame->rec->y[x0 - 1 + y0 * stride] : y_rec[2 * x - (x + x0 > 0)]; + // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, + // *except* when we are also at the edge of the frame, in which case we want to duplicate + // the edge pixel + s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + y0 * stride] : y_rec[2 * x - ((x + x0) > 0)]; s += y_rec[2 * x + LCU_WIDTH] * 2; s += y_rec[2 * x + 1 + LCU_WIDTH]; - s += x0 && !x ? state->tile->frame->rec->y[x0 - 1 + (y0 + 1) * stride] : y_rec[2 * x - (x + x0 > 0) + stride]; + s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + stride]; sampled_luma[x + y * width] = s >> 3; } y_rec += LCU_WIDTH; From 70f6afff4f480ce74eac13762d7f9d25ef470fd5 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 17 Nov 2021 11:19:55 +0200 Subject: [PATCH 07/19] [cclm] Search should be working still has duplicate code and inefficiencies, but the actual search *should* be working. --- src/intra.c | 72 ++++++++++++++++++++++++++++++++++------------ src/intra.h | 10 ++++++- src/search.c | 9 +++--- src/search_intra.c | 61 ++++++++++++++++++++++++++------------- src/search_intra.h | 5 ++-- 5 files changed, 111 insertions(+), 46 deletions(-) diff --git a/src/intra.c b/src/intra.c index 2cde088b..21c539cf 100644 --- a/src/intra.c +++ b/src/intra.c @@ -250,9 +250,9 @@ static void intra_pred_dc( enum lm_mode { - LM_CHROMA_IDX = 67, - LM_CHROMA_L_IDX = 68, - LM_CHROMA_T_IDX = 69, + LM_CHROMA_IDX = 81, + LM_CHROMA_L_IDX = 82, + LM_CHROMA_T_IDX = 83, }; @@ -261,7 +261,7 @@ static void get_cclm_parameters( int8_t width, int8_t height, int8_t mode, int x0, int y0, kvz_intra_ref* luma_src, kvz_intra_references*chroma_ref, - int *a, int *b, int *shift) { + int16_t *a, int16_t*b, int16_t*shift) { const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX); @@ -412,7 +412,7 @@ static void get_cclm_parameters( int v = DivSigTable[normDiff] | 8; x += normDiff != 0; - int y = kvz_math_floor_log2(abs(diffC)) + 1; + int y = diffC ? kvz_math_floor_log2(abs(diffC)) + 1 : 0; int add = 1 << y >> 1; *a = (diffC * v + add) >> y; *shift = 3 + x - y; @@ -440,7 +440,10 @@ static void get_cclm_parameters( } } -static void linear_transform_cclm(int scale, int shift, int offset, kvz_pixel * src, kvz_pixel * dst, int stride, int height) { +static void linear_transform_cclm(cclm_parameters_t* cclm_params, kvz_pixel * src, kvz_pixel * dst, int stride, int height) { + int scale = cclm_params->a; + int shift = cclm_params->shift; + int offset = cclm_params->b; for (int y = 0; y < height; ++y) { for (int x=0; x < stride; ++x) { int val = src[x + y * stride] * scale; @@ -464,7 +467,8 @@ void kvz_predict_cclm( const int8_t mode, kvz_pixel const * y_rec, kvz_intra_references* chroma_ref, - kvz_pixel* dst + kvz_pixel* dst, + cclm_parameters_t* cclm_params ) { assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); @@ -523,15 +527,19 @@ void kvz_predict_cclm( s += y_rec[2 * x + LCU_WIDTH] * 2; s += y_rec[2 * x + 1 + LCU_WIDTH]; s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + stride]; - sampled_luma[x + y * width] = s >> 3; + sampled_luma[x / 2 + y / 2 * width] = s >> 3; } y_rec += LCU_WIDTH; } - int a, b, shift; + int16_t a, b, shift; get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma_ref, chroma_ref, &a, &b, &shift); + cclm_params->shift = shift; + cclm_params->a = a; + cclm_params->b = b; - linear_transform_cclm(a, shift, b,sampled_luma, dst, width, height); + if(dst) + linear_transform_cclm(cclm_params, sampled_luma, dst, width, height); } void kvz_intra_predict( @@ -859,6 +867,7 @@ static void intra_recon_tb_leaf( int y, int depth, int8_t intra_mode, + cclm_parameters_t *cclm_params, lcu_t *lcu, color_t color) { @@ -878,14 +887,37 @@ static void intra_recon_tb_leaf( state->tile->frame->width, state->tile->frame->height, }; - const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; + int x_scu = SUB_SCU(x); + const vector2d_t lcu_px = {x_scu >> shift, SUB_SCU(y) >> shift}; kvz_intra_references refs; kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp); kvz_pixel pred[32 * 32]; + int stride = state->tile->frame->source->stride; const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); - kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary); + if(intra_mode < 68) { + kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary); + } else { + kvz_pixel *y_rec = lcu->rec.y; + for (int y_ = 0; y_ < width * 2; y_ += 2) { + for (int x_ = 0; x_ < width * 2; x_ += 2) { + int s = 4; + s += y_rec[2 * x_] * 2; + s += y_rec[2 * x_ + 1]; + // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, + // *except* when we are also at the edge of the frame, in which case we want to duplicate + // the edge pixel + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + y * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; + s += y_rec[2 * x_ + LCU_WIDTH] * 2; + s += y_rec[2 * x_ + 1 + LCU_WIDTH]; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + stride]; + pred[x_ / 2 + y_ * width / 2] = s >> 3; + } + y_rec += LCU_WIDTH; + } + linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width); + } const int index = lcu_px.x + lcu_px.y * lcu_width; kvz_pixel *block = NULL; @@ -920,6 +952,7 @@ static void intra_recon_tb_leaf( * \param mode_luma intra mode for luma, or -1 to skip luma recon * \param mode_chroma intra mode for chroma, or -1 to skip chroma recon * \param cur_cu pointer to the CU, or NULL to fetch CU from LCU + * \param cclm_params pointer for the cclm_parameters, can be NULL if the mode is not cclm mode * \param lcu containing LCU */ void kvz_intra_recon_cu( @@ -930,6 +963,7 @@ void kvz_intra_recon_cu( int8_t mode_luma, int8_t mode_chroma, cu_info_t *cur_cu, + cclm_parameters_t *cclm_params, lcu_t *lcu) { const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; @@ -954,10 +988,10 @@ void kvz_intra_recon_cu( const int32_t x2 = x + offset; const int32_t y2 = y + offset; - kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); - kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); - kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu); - kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu); + kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); + kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); + kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); + kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); // Propagate coded block flags from child CUs to parent CU. uint16_t child_cbfs[3] = { @@ -978,11 +1012,11 @@ void kvz_intra_recon_cu( const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0); // Process a leaf TU. if (has_luma) { - intra_recon_tb_leaf(state, x, y, depth, mode_luma, lcu, COLOR_Y); + intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y); } if (has_chroma) { - intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_U); - intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_V); + intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U); + intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V); } kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false); diff --git a/src/intra.h b/src/intra.h index 5652ec41..eb737be7 100644 --- a/src/intra.h +++ b/src/intra.h @@ -54,6 +54,12 @@ typedef struct bool filtered_initialized; } kvz_intra_references; +typedef struct +{ + int16_t a; + int16_t shift; + int16_t b; +} cclm_parameters_t; /** * \brief Function for deriving intra luma predictions @@ -118,6 +124,7 @@ void kvz_intra_recon_cu( int8_t mode_luma, int8_t mode_chroma, cu_info_t *cur_cu, + cclm_parameters_t* cclm_params, lcu_t *lcu); @@ -132,5 +139,6 @@ void kvz_predict_cclm( const int8_t mode, kvz_pixel const* y_rec, kvz_intra_references* chroma_ref, - kvz_pixel* dst + kvz_pixel* dst, + cclm_parameters_t* cclm_params ); \ No newline at end of file diff --git a/src/search.c b/src/search.c index 278b5d50..685c8428 100644 --- a/src/search.c +++ b/src/search.c @@ -709,7 +709,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, x, y, depth, cur_cu->intra.mode, -1, // skip chroma - NULL, lcu); + NULL, NULL, lcu); // TODO: This heavily relies to square CUs if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) { @@ -717,8 +717,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // rd2. Possibly because the luma mode search already takes chroma // into account, so there is less of a chanse of luma mode being // really bad for chroma. + cclm_parameters_t cclm_params[2]; if (ctrl->cfg.rdo == 3) { - cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu); + cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); } @@ -726,7 +727,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, x & ~7, y & ~7, // TODO: as does this depth, -1, cur_cu->intra.mode_chroma, // skip luma - NULL, lcu); + NULL, cclm_params, lcu); } } else if (cur_cu->type == CU_INTER) { @@ -883,7 +884,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, x, y, depth, cur_cu->intra.mode, mode_chroma, - NULL, lcu); + NULL,NULL, lcu); cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu); if (has_chroma) { diff --git a/src/search_intra.c b/src/search_intra.c index 52b654bd..bbef6fbe 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -258,6 +258,7 @@ static double search_intra_trdepth(encoder_state_t * const state, int intra_mode, int cost_treshold, cu_info_t *const pred_cu, lcu_t *const lcu, + cclm_parameters_t *cclm_params, const int mts_mode) { assert(depth >= 0 && depth <= MAX_PU_DEPTH); @@ -332,7 +333,7 @@ static double search_intra_trdepth(encoder_state_t * const state, x_px, y_px, depth, intra_mode, -1, - pred_cu, lcu); + pred_cu, cclm_params, lcu); // TODO: Not sure if this should be 0 or 1 but at least seems to work with 1 if (pred_cu->tr_idx > 1) @@ -360,7 +361,7 @@ static double search_intra_trdepth(encoder_state_t * const state, x_px, y_px, depth, -1, chroma_mode, - pred_cu, lcu); + pred_cu, cclm_params, lcu); best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); } pred_cu->tr_skip = best_tr_idx == MTS_SKIP; @@ -391,15 +392,15 @@ static double search_intra_trdepth(encoder_state_t * const state, if (depth < max_depth && depth < MAX_PU_DEPTH) { split_cost = 3 * state->lambda; - split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); + split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1); if (split_cost < nosplit_cost) { - split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); + split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1); } if (split_cost < nosplit_cost) { - split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); + split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1); } if (split_cost < nosplit_cost) { - split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); + split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1); } double cbf_bits = 0.0; @@ -468,6 +469,8 @@ static void search_intra_chroma_rough(encoder_state_t * const state, cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); + cclm_parameters_t cclm_params; + kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); @@ -484,7 +487,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, for (int i = 5; i < 8; i++) { kvz_predict_cclm( state, - COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu->rec.y, refs_u, _pred); + COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu->rec.y, refs_u, pred, &cclm_params); } kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); @@ -749,7 +752,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state, // Reset transform split data in lcu.cu for this area. kvz_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth); - double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, -1); + double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, NULL, -1); costs[rdo_mode] += mode_cost; trafo[rdo_mode] = pred_cu.tr_idx; @@ -774,7 +777,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state, pred_cu.intra.mode = modes[0]; pred_cu.intra.mode_chroma = modes[0]; FILL(pred_cu.cbf, 0); - search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, trafo[0]); + search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, NULL, trafo[0]); } return modes_to_check; @@ -826,18 +829,21 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, int x_px, int y_px, int depth, int8_t intra_mode, int8_t modes[8], int8_t num_modes, - lcu_t *const lcu) + lcu_t *const lcu, cclm_parameters_t *best_cclm) { const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4); - kvz_intra_references refs; + kvz_intra_references refs[2]; const vector2d_t luma_px = { x_px, y_px }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height, }; - kvz_intra_build_reference(6-depth, COLOR_U, &luma_px, &pic_px, lcu, &refs, state->encoder_control->cfg.wpp); + kvz_intra_build_reference(6-depth, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); + kvz_intra_build_reference(6-depth, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); + + cclm_parameters_t cclm_params[2] = {0}; if (reconstruct_chroma) { const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; @@ -846,8 +852,11 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, struct { double cost; int8_t mode; + cclm_parameters_t cclm[2]; } chroma, best_chroma; + // chroma.cclm = cclm_params; + best_chroma.mode = 0; best_chroma.cost = MAX_INT; @@ -856,14 +865,24 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, if (chroma.mode == -1) continue; if(chroma.mode < 67) { kvz_intra_recon_cu(state, - x_px, y_px, - depth, - -1, chroma.mode, // skip luma - NULL, lcu); + x_px, y_px, + depth, + -1, chroma.mode, // skip luma + NULL, NULL, lcu); } else { kvz_predict_cclm( - state, COLOR_U, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs, NULL); + state, COLOR_U, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs[0], NULL, &cclm_params[0]); + chroma.cclm[0] = cclm_params[0]; + kvz_predict_cclm( + state, COLOR_V, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs[1], NULL, &cclm_params[1]); + chroma.cclm[1] = cclm_params[1]; + + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + -1, chroma.mode, // skip luma + NULL, cclm_params, lcu); } chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); @@ -874,6 +893,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, best_chroma = chroma; } } + best_cclm[0] = best_chroma.cclm[0]; + best_cclm[1] = best_chroma.cclm[1]; return best_chroma.mode; } @@ -884,7 +905,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, const int x_px, const int y_px, - const int depth, lcu_t *lcu) + const int depth, lcu_t *lcu, cclm_parameters_t *best_cclm) { const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; @@ -892,7 +913,7 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, int8_t intra_mode = cur_pu->intra.mode; double costs[8]; - int8_t modes[8] = { 0, 50, 18, 1, -1, 67, 68, 69 }; + int8_t modes[8] = { 0, 50, 18, 1, -1, 81, 82, 83 }; if (intra_mode != 0 && intra_mode != 50 && intra_mode != 18 && intra_mode != 1) { modes[4] = intra_mode; } @@ -934,7 +955,7 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, int8_t intra_mode_chroma = intra_mode; if (num_modes > 1) { - intra_mode_chroma = kvz_search_intra_chroma_rdo(state, x_px, y_px, depth, intra_mode, modes, num_modes, lcu); + intra_mode_chroma = kvz_search_intra_chroma_rdo(state, x_px, y_px, depth, intra_mode, modes, num_modes, lcu, best_cclm); } return intra_mode_chroma; diff --git a/src/search_intra.h b/src/search_intra.h index e7cc79a0..83a109f6 100644 --- a/src/search_intra.h +++ b/src/search_intra.h @@ -41,17 +41,18 @@ #include "cu.h" #include "encoderstate.h" #include "global.h" // IWYU pragma: keep +#include "intra.h" double kvz_luma_mode_bits(const encoder_state_t *state, - int8_t luma_mode, const int8_t *intra_preds); + int8_t luma_mode, const int8_t *intra_preds); double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, int8_t luma_mode); int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, const int x_px, const int y_px, - const int depth, lcu_t *lcu); + const int depth, lcu_t *lcu, cclm_parameters_t* best_cclm); void kvz_search_cu_intra(encoder_state_t * const state, const int x_px, const int y_px, From 1c431d8f884d270342fc2c3a24efe7f9a86b72c7 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 17 Nov 2021 11:33:57 +0200 Subject: [PATCH 08/19] [cclm] Add commandline argument --- configure.ac | 2 +- src/cfg.c | 5 +++++ src/cli.c | 10 ++++++++-- src/encoder_state-bitstream.c | 2 +- src/kvazaar.h | 2 ++ 5 files changed, 17 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index dbbb1b72..3a0d1582 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c]) # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=6 -ver_minor=6 +ver_minor=7 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS diff --git a/src/cfg.c b/src/cfg.c index a7a1f074..f85e5c71 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -209,6 +209,8 @@ int kvz_config_init(kvz_config *cfg) cfg->amvr = 0; + cfg->cclm = 0; + return 1; } @@ -1486,6 +1488,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) else if OPT("amvr") { cfg->amvr = (bool)atobool(value); } + else if OPT("cclm") { + cfg->cclm = (bool)atobool(value); + } else { return 0; } diff --git a/src/cli.c b/src/cli.c index edefc814..270e924f 100644 --- a/src/cli.c +++ b/src/cli.c @@ -178,6 +178,8 @@ static const struct option long_options[] = { { "no-jccr", no_argument, NULL, 0 }, { "amvr", no_argument, NULL, 0 }, { "no-amvr", no_argument, NULL, 0 }, + { "cclm", no_argument, NULL, 0 }, + { "no-cclm", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -629,8 +631,12 @@ void print_help(void) " - both: MTS applied for both intra and inter blocks.\n" " - implicit: uses implicit MTS. Applies DST7 instead \n" " of DCT2 to certain intra blocks.\n" - " --(no-)jccr : Joint coding of chroma residual.\n" - " Requires rdo> = 2. [disabled]\n" + " --(no-)jccr : Joint coding of chroma residual. " + " Requires rdo> = 2. [disabled]\n" + " --(no-)cclm : Cross component linear model. \n" + " Extra chroma prediction modes that are formed\n" + " via linear transformation from the luma\n" + " prediction. Requires rdo >=3. [disabled\n" " --(no-)amvr : Adaptive Motion Vector Resolution.\n" " Code some mv's with reduced resolution [disabled]\n" "\n" diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 7d73544e..537c1be9 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -722,7 +722,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, WRITE_U(stream, 0, 1, "sps_mip_enabled_flag"); // if(!no_cclm_constraint_flag) if(encoder->chroma_format != KVZ_CSP_400) { - WRITE_U(stream, 0, 1, "sps_cclm_enabled_flag"); + WRITE_U(stream, encoder->cfg.cclm, 1, "sps_cclm_enabled_flag"); } if (encoder->chroma_format == KVZ_CSP_420) { WRITE_U(stream, 0, 1, "sps_chroma_horizontal_collocated_flag"); diff --git a/src/kvazaar.h b/src/kvazaar.h index 7506d3bc..61c13031 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -516,6 +516,8 @@ typedef struct kvz_config int8_t jccr; + int8_t cclm; + int8_t amvr; /* \brief Adaptive motion vector resolution parameter */ } kvz_config; From 754607dae4840dbd3f436b8cd6bb984f9b63c536 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 17 Nov 2021 12:41:42 +0200 Subject: [PATCH 09/19] [cclm] bitstream writing + cabac --- src/cabac.h | 2 ++ src/context.c | 17 +++++++++++++++++ src/encode_coding_tree.c | 24 ++++++++++++++++++++---- 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/src/cabac.h b/src/cabac.h index 34f48c2c..9e7e983b 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -117,6 +117,8 @@ typedef struct cabac_ctx_t transform_skip_gt1[4]; cabac_ctx_t transform_skip_par; cabac_ctx_t transform_skip_gt2[5]; + cabac_ctx_t cclm_flag; + cabac_ctx_t cclm_model; } ctx; } cabac_data_t; diff --git a/src/context.c b/src/context.c index 7e174fb9..78d3f134 100644 --- a/src/context.c +++ b/src/context.c @@ -395,6 +395,20 @@ static const uint8_t INIT_IMV_FLAG[4][5] = { { 0, 5, 0, 0, 4, }, }; +static const uint8_t INIT_CCLM_FLAG[4] = { + { 26, }, + { 34, }, + { 59, }, + { 4, }, +}; + +static const uint8_t INIT_CCLM_MODEL[4] = { + { 27, }, + { 27, }, + { 27, }, + { 9, }, +}; + /* static const uint16_t g_inistateToCount[128] = { 614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342, @@ -471,6 +485,9 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) kvz_ctx_init(&cabac->ctx.chroma_pred_model, QP, INIT_CHROMA_PRED_MODE[slice], INIT_CHROMA_PRED_MODE[3]); + kvz_ctx_init(&cabac->ctx.cclm_flag, QP, INIT_CCLM_FLAG[slice], INIT_CCLM_FLAG[3]); + kvz_ctx_init(&cabac->ctx.cclm_model, QP, INIT_CCLM_MODEL[slice], INIT_CCLM_MODEL[3]); + for (i = 0; i < 3; i++) { kvz_ctx_init(&cabac->ctx.cu_skip_flag_model[i], QP, INIT_SKIP_FLAG[slice][i], INIT_SKIP_FLAG[3][i]); diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 7de4f61a..cfc0f4cf 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -701,7 +701,7 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state, return non_zero_mvd; } -static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* const cur_cu, int x, int y, const videoframe_t* const frame, const int cu_width) { +static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* const cur_cu, int x, int y, const videoframe_t* const frame, const int cu_width, const int cclm_enabled) { unsigned pred_mode = 0; unsigned chroma_pred_modes[8] = {0, 50, 18, 1, 67, 81, 82, 83}; const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, 0); @@ -710,7 +710,23 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c int8_t chroma_intra_dir = first_pu->intra.mode_chroma; int8_t luma_intra_dir = first_pu->intra.mode; + bool derived_mode = chroma_intra_dir == luma_intra_dir; + bool cclm_mode = chroma_intra_dir > 67; + + if (cclm_enabled) { + cabac->cur_ctx = &cabac->ctx.cclm_flag; + CABAC_BIN(cabac, cclm_mode, "cclm_flag"); + if(cclm_mode) { + cabac->cur_ctx = &cabac->ctx.cclm_model; + CABAC_BIN(cabac, chroma_intra_dir != 81, "cclm_model_1"); + if(chroma_intra_dir != 81) { + CABAC_BIN(cabac, chroma_intra_dir == 83, "cclm_model_2"); + } + return; + } + + } cabac->cur_ctx = &(cabac->ctx.chroma_pred_model); CABAC_BIN(cabac, derived_mode ? 0 : 1, "intra_chroma_pred_mode"); @@ -722,7 +738,7 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c break; } }*/ - for (; pred_mode < 8; pred_mode++) { + for (; pred_mode < 5; pred_mode++) { if (chroma_intra_dir == chroma_pred_modes[pred_mode]) { break; } @@ -983,7 +999,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state, // Code chroma prediction mode. if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth != 4) { - encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width); + encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm); } encode_transform_coeff(state, x, y, depth, 0, 0, 0, 0, coeff); @@ -991,7 +1007,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state, encode_mts_idx(state, cabac, cur_cu); if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth == 4 && x % 8 && y % 8) { - encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width); + encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm); encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff); } From 7187678526110b65473fd0b5253fabf82081e129 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 17 Nov 2021 13:01:06 +0200 Subject: [PATCH 10/19] [cclm] Only do cclm search when it is enabled and add test for cclm --- src/intra.c | 1 + src/search_intra.c | 3 ++- tests/test_intra.sh | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/intra.c b/src/intra.c index 21c539cf..5de302c9 100644 --- a/src/intra.c +++ b/src/intra.c @@ -472,6 +472,7 @@ void kvz_predict_cclm( ) { assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); + assert(state->encoder_control->cfg.cclm); kvz_intra_ref sampled_luma_ref; diff --git a/src/search_intra.c b/src/search_intra.c index bbef6fbe..39e4c48a 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -485,6 +485,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, costs[i] += satd_func(pred, orig_block); } for (int i = 5; i < 8; i++) { + assert(state->encoder_control->cfg.cclm); kvz_predict_cclm( state, COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu->rec.y, refs_u, pred, &cclm_params); @@ -926,7 +927,7 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, int num_modes = modes_in_depth[depth]; if (state->encoder_control->cfg.rdo == 3) { - num_modes = 8; + num_modes = state->encoder_control->cfg.cclm ? 8 : 5; } // Don't do rough mode search if all modes are selected. diff --git a/tests/test_intra.sh b/tests/test_intra.sh index 4c2e93bd..af806a6a 100755 --- a/tests/test_intra.sh +++ b/tests/test_intra.sh @@ -10,8 +10,9 @@ common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no valgrind_test $common_args --rd=1 valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq +valgrind_test $common_args --rd=3 valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0 valgrind_test $common_args --alf=full --wpp --threads=1 -valgrind_test $common_args --jccr valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra +valgrind_test $common_args --rd=3 --cclm --jccr From 87a458dc85ddc9e4839ba13533f0b2b3ab98d03e Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 17 Nov 2021 13:12:21 +0200 Subject: [PATCH 11/19] [cclm] fix incorrect log_width for chroma ref generation --- src/search_intra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search_intra.c b/src/search_intra.c index 39e4c48a..e4afdcf1 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -841,8 +841,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, state->tile->frame->width, state->tile->frame->height, }; - kvz_intra_build_reference(6-depth, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); - kvz_intra_build_reference(6-depth, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); + kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); + kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); cclm_parameters_t cclm_params[2] = {0}; From c16ce9a82bb095c4984456eead156751e6940ac9 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Thu, 18 Nov 2021 06:55:42 +0200 Subject: [PATCH 12/19] [cclm] fix bin type for the second cclm mode bit --- src/encode_coding_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index cfc0f4cf..4cbc4a0f 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -721,7 +721,7 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c cabac->cur_ctx = &cabac->ctx.cclm_model; CABAC_BIN(cabac, chroma_intra_dir != 81, "cclm_model_1"); if(chroma_intra_dir != 81) { - CABAC_BIN(cabac, chroma_intra_dir == 83, "cclm_model_2"); + CABAC_BIN_EP(cabac, chroma_intra_dir == 83, "cclm_model_2"); } return; } From d9875a3d923c6eef94d41f1ff4de104e862f516f Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Thu, 18 Nov 2021 07:13:16 +0200 Subject: [PATCH 13/19] [cclm] fix trying cclm on the upper level --- src/intra.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/intra.c b/src/intra.c index 5de302c9..79079404 100644 --- a/src/intra.c +++ b/src/intra.c @@ -917,7 +917,14 @@ static void intra_recon_tb_leaf( } y_rec += LCU_WIDTH; } - linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width); + if(cclm_params == NULL) { + cclm_parameters_t temp_params; + kvz_predict_cclm( + state, color, width, width, x, y, stride, intra_mode, lcu->rec.y, &refs, pred, &temp_params); + } + else { + linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width); + } } const int index = lcu_px.x + lcu_px.y * lcu_width; From f030158703e3467c10a6a18a2009b1c46f8e5adc Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Fri, 19 Nov 2021 11:54:51 +0200 Subject: [PATCH 14/19] [cclm] Fix search and parameter generation for CCLM --- src/encoder_state-ctors_dtors.c | 2 +- src/intra.c | 121 ++++++++++++++++++++------------ src/intra.h | 2 +- src/search.c | 39 +++++++++- src/search_intra.c | 51 +++++++++++--- src/videoframe.c | 11 ++- src/videoframe.h | 4 +- 7 files changed, 170 insertions(+), 60 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index c7c9985d..7a5cff52 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -122,7 +122,7 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, const int width, const int height, const int width_in_lcu, const int height_in_lcu) { const encoder_control_t * const encoder = state->encoder_control; - state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type); + state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type, encoder->cfg.cclm); state->tile->frame->rec = NULL; diff --git a/src/intra.c b/src/intra.c index 79079404..f786781c 100644 --- a/src/intra.c +++ b/src/intra.c @@ -259,7 +259,7 @@ enum lm_mode static void get_cclm_parameters( encoder_state_t const* const state, int8_t width, int8_t height, int8_t mode, - int x0, int y0, + int x0, int y0, int avai_above_right_units, int avai_left_below_units, kvz_intra_ref* luma_src, kvz_intra_references*chroma_ref, int16_t *a, int16_t*b, int16_t*shift) { @@ -287,8 +287,8 @@ static void get_cclm_parameters( //int total_units = total_left_units + total_above_units + 1; //int above_right_units = total_above_units - tu_width_in_units; //int left_below_units = total_left_units - tu_height_in_units; - int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs - int avai_left_below_units = 0; + //int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs + //int avai_left_below_units = 0; int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size); int avai_left_units = CLIP(0, tu_width_in_units, x0 / base_unit_size); @@ -465,7 +465,7 @@ void kvz_predict_cclm( const int16_t y0, const int16_t stride, const int8_t mode, - kvz_pixel const * y_rec, + lcu_t* const lcu, kvz_intra_references* chroma_ref, kvz_pixel* dst, cclm_parameters_t* cclm_params @@ -480,61 +480,92 @@ void kvz_predict_cclm( int x_scu = SUB_SCU(x0); int y_scu = SUB_SCU(y0); - y_rec += x_scu + y_scu * LCU_WIDTH; + + int available_above_right = 0; + int available_left_below = 0; + + + kvz_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH; // Essentially what this does is that it uses 6-tap filtering to downsample // the luma intra references down to match the resolution of the chroma channel. // The luma reference is only needed when we are not on the edge of the picture. // Because the reference pixels that are needed on the edge of the ctu this code // is kinda messy but what can you do + + if (y0) { + for (; available_above_right < width / 2; available_above_right++) { + int x_extension = x_scu + width * 2 + 4 * available_above_right; + cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4); + if (pu->type == CU_NOTSET || x_extension > LCU_WIDTH) break; + } + if(y_scu == 0) { + if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4); + for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { + bool left_padding = x0 || x; + sampled_luma_ref.top[x / 2] = (state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2 + + state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride] + + state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride] + + 2) >> 2; + } + } + else { + for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { + bool left_padding = x0 || x; + int s = 4; + s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 2) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; + s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; + s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; + sampled_luma_ref.top[x / 2] = s >> 3; + } + } + } + if(x0) { - for(int y = 0; y < height * 2; y+=2) { + for (; available_left_below < height / 2; available_left_below++) { + int y_extension = y_scu + height * 2 + 4 * available_left_below; + cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension); + if (pu->type == CU_NOTSET || y_extension > LCU_WIDTH) break; + if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break; + } + for(int y = 0; y < height * (available_left_below ? 4 : 2); y+=2) { int s = 4; - s += x_scu ? y_rec[y * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride] * 2; - s += x_scu ? y_rec[y * LCU_WIDTH - 2] : state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride]; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride] * 2; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2] : state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride]; - s += y_rec[y * LCU_WIDTH]; - s += y_rec[(y + 1) * LCU_WIDTH]; + s += x_scu ? y_rec[y * LCU_WIDTH - 2] * 2 : state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride] * 2; + s += x_scu ? y_rec[y * LCU_WIDTH - 1] : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride]; + s += x_scu ? y_rec[y * LCU_WIDTH - 3] : state->tile->frame->rec->y[x0 - 3 + (y0 + y) * stride]; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2] * 2 : state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride] * 2; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] : state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride]; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 3] : state->tile->frame->rec->y[x0 - 3 + (y0 + y + 1) * stride]; sampled_luma_ref.left[y/2] = s >> 3; } } - if(y0) { - for(int x = 0; x < width*2; x += 2) { - bool left_padding = x0 || x; - int s = 4; - s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 2) * stride] * 2; - s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 1) * stride] * 2; - s += y_scu ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; - sampled_luma_ref.top[x / 2] = s >> 3; - } - } + // Downsample the reconstructed luma sample so that they can be mapped into the chroma // to generate the chroma prediction - for (int y = 0; y < height * 2; y+=2) { - for (int x = 0; x < width * 2; x+=2) { + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { int s = 4; s += y_rec[2 * x] * 2; s += y_rec[2 * x + 1]; // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, // *except* when we are also at the edge of the frame, in which case we want to duplicate // the edge pixel - s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + y0 * stride] : y_rec[2 * x - ((x + x0) > 0)]; + s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + y*2) * stride] : y_rec[2 * x - ((x + x0) > 0)]; s += y_rec[2 * x + LCU_WIDTH] * 2; s += y_rec[2 * x + 1 + LCU_WIDTH]; - s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + stride]; - sampled_luma[x / 2 + y / 2 * width] = s >> 3; + s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + y * 2 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + LCU_WIDTH]; + sampled_luma[x + y * width] = s >> 3; } - y_rec += LCU_WIDTH; + y_rec += LCU_WIDTH * 2; } int16_t a, b, shift; - get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma_ref, chroma_ref, &a, &b, &shift); + get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift); cclm_params->shift = shift; cclm_params->a = a; cclm_params->b = b; @@ -889,7 +920,8 @@ static void intra_recon_tb_leaf( state->tile->frame->height, }; int x_scu = SUB_SCU(x); - const vector2d_t lcu_px = {x_scu >> shift, SUB_SCU(y) >> shift}; + int y_scu = SUB_SCU(y); + const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift }; kvz_intra_references refs; kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp); @@ -901,26 +933,27 @@ static void intra_recon_tb_leaf( kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary); } else { kvz_pixel *y_rec = lcu->rec.y; - for (int y_ = 0; y_ < width * 2; y_ += 2) { - for (int x_ = 0; x_ < width * 2; x_ += 2) { + y_rec += x_scu + y_scu * LCU_WIDTH; + for (int y_ = 0; y_ < width; y_++) { + for (int x_ = 0; x_ < width; x_++) { int s = 4; s += y_rec[2 * x_] * 2; s += y_rec[2 * x_ + 1]; // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, // *except* when we are also at the edge of the frame, in which case we want to duplicate // the edge pixel - s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + y * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; s += y_rec[2 * x_ + LCU_WIDTH] * 2; s += y_rec[2 * x_ + 1 + LCU_WIDTH]; - s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + stride]; - pred[x_ / 2 + y_ * width / 2] = s >> 3; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH]; + pred[x_ + y_ * width] = s >> 3; } - y_rec += LCU_WIDTH; + y_rec += LCU_WIDTH * 2; } if(cclm_params == NULL) { cclm_parameters_t temp_params; kvz_predict_cclm( - state, color, width, width, x, y, stride, intra_mode, lcu->rec.y, &refs, pred, &temp_params); + state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params); } else { linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width); @@ -996,10 +1029,10 @@ void kvz_intra_recon_cu( const int32_t x2 = x + offset; const int32_t y2 = y + offset; - kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); - kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); - kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); - kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); + kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); + kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); + kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); + kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. uint16_t child_cbfs[3] = { diff --git a/src/intra.h b/src/intra.h index eb737be7..846d77b2 100644 --- a/src/intra.h +++ b/src/intra.h @@ -137,7 +137,7 @@ void kvz_predict_cclm( const int16_t y0, const int16_t stride, const int8_t mode, - kvz_pixel const* y_rec, + lcu_t* const lcu, kvz_intra_references* chroma_ref, kvz_pixel* dst, cclm_parameters_t* cclm_params diff --git a/src/search.c b/src/search.c index 685c8428..5691d361 100644 --- a/src/search.c +++ b/src/search.c @@ -241,6 +241,33 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree, } +static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec) { + if (!state->encoder_control->cfg.cclm) return; + int x_scu = SUB_SCU(x); + int y_scu = SUB_SCU(y); + y_rec += x_scu + y_scu * LCU_WIDTH; + int stride = state->tile->frame->source->stride; + + for (int y_ = 0; y_ < height && y_ * 2 + y < state->encoder_control->cfg.height; y_++) { + for (int x_ = 0; x_ < width; x_++) { + int s = 4; + s += y_rec[2 * x_] * 2; + s += y_rec[2 * x_ + 1]; + // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, + // *except* when we are also at the edge of the frame, in which case we want to duplicate + // the edge pixel + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; + s += y_rec[2 * x_ + LCU_WIDTH] * 2; + s += y_rec[2 * x_ + 1 + LCU_WIDTH]; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH]; + int index = x / 2 + x_ + (y / 2 + y_ )* stride / 2; + state->tile->frame->cclm_luma_rec[index] = s >> 3; + } + y_rec += LCU_WIDTH * 2; + } +} + + /** * Calculate RD cost for a Coding Unit. * \return Cost of block @@ -711,6 +738,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, cur_cu->intra.mode, -1, // skip chroma NULL, NULL, lcu); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + ); + // TODO: This heavily relies to square CUs if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) { // There is almost no benefit to doing the chroma mode search for @@ -863,7 +894,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // gets used, at least in the most obvious cases, while avoiding any // searching. if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH - && x + cu_width <= frame->width && y + cu_width <= frame->height) + && x + cu_width <= frame->width && y + cu_width <= frame->height && 0) { cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local); @@ -913,6 +944,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // Copy this CU's mode all the way down for use in adjacent CUs mode // search. work_tree_copy_down(x_local, y_local, depth, work_tree); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + ); if (state->frame->slicetype != KVZ_SLICE_I) { // Reset HMVP to the beginning of this CU level search and add this CU as the mvp @@ -925,6 +959,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // Need to copy modes down since the lower level of the work tree is used // when searching SMP and AMP blocks. work_tree_copy_down(x_local, y_local, depth, work_tree); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + ); if (state->frame->slicetype != KVZ_SLICE_I) { // Reset HMVP to the beginning of this CU level search and add this CU as the mvp diff --git a/src/search_intra.c b/src/search_intra.c index e4afdcf1..f225c777 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -488,7 +488,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, assert(state->encoder_control->cfg.cclm); kvz_predict_cclm( state, - COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu->rec.y, refs_u, pred, &cclm_params); + COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params); } kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); @@ -498,6 +498,12 @@ static void search_intra_chroma_rough(encoder_state_t * const state, //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } + for (int i = 5; i < 8; i++) { + assert(state->encoder_control->cfg.cclm); + kvz_predict_cclm( + state, + COLOR_V, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params); + } kvz_sort_modes(modes, costs, 5); } @@ -836,17 +842,22 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, kvz_intra_references refs[2]; - const vector2d_t luma_px = { x_px, y_px }; + const vector2d_t luma_px = { x_px & ~7, y_px & ~7 }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height, }; - kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); - kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); - cclm_parameters_t cclm_params[2] = {0}; if (reconstruct_chroma) { + + int c_width = MAX(32 >> (depth), 4); + + kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); + kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); + + cclm_parameters_t cclm_params[2] = { 0 }; + const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); @@ -864,7 +875,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) { chroma.mode = modes[chroma_mode_i]; if (chroma.mode == -1) continue; - if(chroma.mode < 67) { + if(chroma.mode < 67 || depth == 0) { kvz_intra_recon_cu(state, x_px, y_px, depth, @@ -872,18 +883,38 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, NULL, NULL, lcu); } else { + kvz_predict_cclm( - state, COLOR_U, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs[0], NULL, &cclm_params[0]); + state, COLOR_U, + c_width, c_width, + x_px & ~7, y_px & ~7, + state->tile->frame->source->stride, + chroma.mode, + lcu, + &refs[0], NULL, + &cclm_params[0]); + chroma.cclm[0] = cclm_params[0]; + kvz_predict_cclm( - state, COLOR_V, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs[1], NULL, &cclm_params[1]); + state, COLOR_V, + c_width, c_width, + x_px & ~7, y_px & ~7, + state->tile->frame->source->stride, + chroma.mode, + lcu, + &refs[1], NULL, + &cclm_params[1]); + chroma.cclm[1] = cclm_params[1]; - kvz_intra_recon_cu(state, + kvz_intra_recon_cu( + state, x_px, y_px, depth, -1, chroma.mode, // skip luma - NULL, cclm_params, lcu); + NULL, cclm_params, lcu + ); } chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); diff --git a/src/videoframe.c b/src/videoframe.c index 76ab1da7..77919a84 100644 --- a/src/videoframe.c +++ b/src/videoframe.c @@ -46,7 +46,7 @@ videoframe_t * kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, - enum kvz_alf alf_type) + enum kvz_alf alf_type, bool cclm) { videoframe_t *frame = calloc(1, sizeof(videoframe_t)); if (!frame) return 0; @@ -59,8 +59,12 @@ videoframe_t * kvz_videoframe_alloc(int32_t width, frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); if (chroma_format != KVZ_CSP_400) { frame->sao_chroma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); + if (cclm) { + assert(chroma_format == KVZ_CSP_420); + frame->cclm_luma_rec = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4); + } } - + return frame; } @@ -76,6 +80,9 @@ int kvz_videoframe_free(videoframe_t * const frame) kvz_image_free(frame->rec_lmcs); frame->source_lmcs_mapped = false; } + if(frame->cclm_luma_rec) { + FREE_POINTER(frame->cclm_luma_rec); + } kvz_image_free(frame->source); frame->source = NULL; diff --git a/src/videoframe.h b/src/videoframe.h index 3e8a6ed4..74963d85 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -53,6 +53,8 @@ typedef struct videoframe kvz_picture *rec; //!< \brief Reconstructed image. kvz_picture *rec_lmcs; //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source. + kvz_pixel *cclm_luma_rec; //!< \brief buffer for the downsampled luma reconstruction for cclm + uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available int32_t* lmcs_avg; //!< \brief Average of LCU border pixels @@ -78,7 +80,7 @@ typedef struct videoframe } videoframe_t; -videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type); +videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type, bool cclm); int kvz_videoframe_free(videoframe_t * const frame); void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc); From 80ddb60ccf0b8a892526726b68779bca94e099c1 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 24 Nov 2021 08:46:08 +0200 Subject: [PATCH 15/19] [cclm] fix cclm when deblocking is enabled --- src/intra.c | 10 ++-------- src/search.c | 19 +++++++++++++++---- src/videoframe.c | 4 ++++ src/videoframe.h | 1 + 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/intra.c b/src/intra.c index f786781c..59a47cd7 100644 --- a/src/intra.c +++ b/src/intra.c @@ -497,17 +497,11 @@ void kvz_predict_cclm( for (; available_above_right < width / 2; available_above_right++) { int x_extension = x_scu + width * 2 + 4 * available_above_right; cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4); - if (pu->type == CU_NOTSET || x_extension > LCU_WIDTH) break; + if (x_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break; } if(y_scu == 0) { if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4); - for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { - bool left_padding = x0 || x; - sampled_luma_ref.top[x / 2] = (state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2 + - state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride] + - state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride] + - 2) >> 2; - } + memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride / 2)], sizeof(kvz_pixel) * (width + available_above_right * 2)); } else { for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { diff --git a/src/search.c b/src/search.c index 5691d361..aaa98c40 100644 --- a/src/search.c +++ b/src/search.c @@ -241,7 +241,7 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree, } -static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec) { +static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec, kvz_pixel extra_pixel) { if (!state->encoder_control->cfg.cclm) return; int x_scu = SUB_SCU(x); int y_scu = SUB_SCU(y); @@ -265,6 +265,17 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, } y_rec += LCU_WIDTH * 2; } + if((y + height * 2) % 64 == 0) { + int line = y / 64 * stride / 2; + y_rec -= LCU_WIDTH; + for (int i = 0; i < width; ++i) { + int s = 2; + s += y_rec[i * 2] * 2; + s += y_rec[i * 2 + 1]; + s += !x_scu && !i && x ? extra_pixel : y_rec[i * 2 - ((i + x) > 0)] ; + state->tile->frame->cclm_luma_rec_top_line[i + x / 2 + line] = s >> 2; + } + } } @@ -739,7 +750,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, NULL, NULL, lcu); downsample_cclm_rec( - state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] ); // TODO: This heavily relies to square CUs @@ -945,7 +956,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // search. work_tree_copy_down(x_local, y_local, depth, work_tree); downsample_cclm_rec( - state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] ); if (state->frame->slicetype != KVZ_SLICE_I) { @@ -960,7 +971,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // when searching SMP and AMP blocks. work_tree_copy_down(x_local, y_local, depth, work_tree); downsample_cclm_rec( - state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] ); if (state->frame->slicetype != KVZ_SLICE_I) { diff --git a/src/videoframe.c b/src/videoframe.c index 77919a84..a483202d 100644 --- a/src/videoframe.c +++ b/src/videoframe.c @@ -62,6 +62,7 @@ videoframe_t * kvz_videoframe_alloc(int32_t width, if (cclm) { assert(chroma_format == KVZ_CSP_420); frame->cclm_luma_rec = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4); + frame->cclm_luma_rec_top_line = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) / 2 * CEILDIV(height, 64)); } } @@ -83,6 +84,9 @@ int kvz_videoframe_free(videoframe_t * const frame) if(frame->cclm_luma_rec) { FREE_POINTER(frame->cclm_luma_rec); } + if(frame->cclm_luma_rec_top_line) { + FREE_POINTER(frame->cclm_luma_rec_top_line); + } kvz_image_free(frame->source); frame->source = NULL; diff --git a/src/videoframe.h b/src/videoframe.h index 74963d85..f77ec840 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -54,6 +54,7 @@ typedef struct videoframe kvz_picture *rec_lmcs; //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source. kvz_pixel *cclm_luma_rec; //!< \brief buffer for the downsampled luma reconstruction for cclm + kvz_pixel *cclm_luma_rec_top_line; //!< \brief buffer for the downsampled luma reconstruction for cclm uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available int32_t* lmcs_avg; //!< \brief Average of LCU border pixels From 85ff5f23b1cf380a7f00f62efe515b53ff8d5783 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Mon, 22 Nov 2021 07:08:03 +0200 Subject: [PATCH 16/19] [cclm] Fix accessing elements beyond the CU array # Conflicts: # src/intra.c --- src/intra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intra.c b/src/intra.c index 59a47cd7..2d576208 100644 --- a/src/intra.c +++ b/src/intra.c @@ -522,7 +522,7 @@ void kvz_predict_cclm( for (; available_left_below < height / 2; available_left_below++) { int y_extension = y_scu + height * 2 + 4 * available_left_below; cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension); - if (pu->type == CU_NOTSET || y_extension > LCU_WIDTH) break; + if (y_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break; if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break; } for(int y = 0; y < height * (available_left_below ? 4 : 2); y+=2) { From 697d4c0652e2eb9aa7c6df6c149d2210cf27f9d2 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Mon, 22 Nov 2021 08:15:08 +0200 Subject: [PATCH 17/19] [cclm] Add bits to RD calculation --- src/search_intra.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/search_intra.c b/src/search_intra.c index f225c777..a232d3d9 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -825,7 +825,19 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in if (chroma_mode == luma_mode) { mode_bits = CTX_ENTROPY_FBITS(ctx, 0); } else { - mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1); + if(chroma_mode > 67) { + mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1); + } + else { + ctx = &(state->cabac.ctx.cclm_model); + mode_bits = CTX_ENTROPY_FBITS(ctx, chroma_mode != 81); + if (chroma_mode != 81) mode_bits += 1; + } + } + // Technically this is encoded first but for this method of counting bits it does not matter + if(state->encoder_control->cfg.cclm) { + ctx = &(state->cabac.ctx.cclm_flag); + mode_bits += CTX_ENTROPY_FBITS(ctx, chroma_mode > 67); } return mode_bits; From ce175c503fafa33dfb1944e90820c39a9679feaa Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 24 Nov 2021 09:36:36 +0200 Subject: [PATCH 18/19] [cclm] remove unnecessary calculation of the downsample luma reference --- src/intra.c | 53 +++++------------------------------------------------ 1 file changed, 5 insertions(+), 48 deletions(-) diff --git a/src/intra.c b/src/intra.c index 2d576208..6def8cfc 100644 --- a/src/intra.c +++ b/src/intra.c @@ -525,38 +525,12 @@ void kvz_predict_cclm( if (y_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break; if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break; } - for(int y = 0; y < height * (available_left_below ? 4 : 2); y+=2) { - int s = 4; - s += x_scu ? y_rec[y * LCU_WIDTH - 2] * 2 : state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride] * 2; - s += x_scu ? y_rec[y * LCU_WIDTH - 1] : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride]; - s += x_scu ? y_rec[y * LCU_WIDTH - 3] : state->tile->frame->rec->y[x0 - 3 + (y0 + y) * stride]; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2] * 2 : state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride] * 2; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] : state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride]; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 3] : state->tile->frame->rec->y[x0 - 3 + (y0 + y + 1) * stride]; - sampled_luma_ref.left[y/2] = s >> 3; - } + for(int i = 0; i < height + available_left_below * 2; i++) { + sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride/2) + x0 / 2 - 1]; + } } - - - // Downsample the reconstructed luma sample so that they can be mapped into the chroma - // to generate the chroma prediction - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - int s = 4; - s += y_rec[2 * x] * 2; - s += y_rec[2 * x + 1]; - // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, - // *except* when we are also at the edge of the frame, in which case we want to duplicate - // the edge pixel - s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + y*2) * stride] : y_rec[2 * x - ((x + x0) > 0)]; - s += y_rec[2 * x + LCU_WIDTH] * 2; - s += y_rec[2 * x + 1 + LCU_WIDTH]; - s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + y * 2 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + LCU_WIDTH]; - sampled_luma[x + y * width] = s >> 3; - } - y_rec += LCU_WIDTH * 2; - } + kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride) / 4], sampled_luma, width, height, stride / 2, width); int16_t a, b, shift; get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift); @@ -926,24 +900,7 @@ static void intra_recon_tb_leaf( if(intra_mode < 68) { kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary); } else { - kvz_pixel *y_rec = lcu->rec.y; - y_rec += x_scu + y_scu * LCU_WIDTH; - for (int y_ = 0; y_ < width; y_++) { - for (int x_ = 0; x_ < width; x_++) { - int s = 4; - s += y_rec[2 * x_] * 2; - s += y_rec[2 * x_ + 1]; - // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, - // *except* when we are also at the edge of the frame, in which case we want to duplicate - // the edge pixel - s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; - s += y_rec[2 * x_ + LCU_WIDTH] * 2; - s += y_rec[2 * x_ + 1 + LCU_WIDTH]; - s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH]; - pred[x_ + y_ * width] = s >> 3; - } - y_rec += LCU_WIDTH * 2; - } + kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width); if(cclm_params == NULL) { cclm_parameters_t temp_params; kvz_predict_cclm( From 385e91399aa62c77deb877ee8feb8006dd770902 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Thu, 25 Nov 2021 12:32:27 +0200 Subject: [PATCH 19/19] [intra rdo] Raise maximum rd level to 4 Separate the rd option for chroma intra mode search from the full intra mode search, i.e., rd=3 enables chroma mode search and rd=4 enables full intra mode search. --- src/cli.c | 4 ++-- src/search.c | 2 +- src/search_inter.c | 2 +- src/search_intra.c | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cli.c b/src/cli.c index 270e924f..28d7b773 100644 --- a/src/cli.c +++ b/src/cli.c @@ -552,8 +552,8 @@ void print_help(void) " - 0: Skip intra if inter is good enough.\n" " - 1: Rough intra mode search with SATD.\n" " - 2: Refine intra mode search with SSE.\n" - " - 3: Try all intra modes and enable intra\n" - " chroma mode search.\n" + " - 3: Enable intra chroma mode search.\n" + " - 4: Try all intra modes.\n" " --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n" " [disabled]\n" " --(no-)zero-coeff-rdo : If a CU is set inter, check if forcing zero\n" diff --git a/src/search.c b/src/search.c index aaa98c40..3e715d53 100644 --- a/src/search.c +++ b/src/search.c @@ -760,7 +760,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // into account, so there is less of a chanse of luma mode being // really bad for chroma. cclm_parameters_t cclm_params[2]; - if (ctrl->cfg.rdo == 3) { + if (ctrl->cfg.rdo >= 3) { cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); } diff --git a/src/search_inter.c b/src/search_inter.c index 3b83c5a4..6ecb422d 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1937,7 +1937,7 @@ static void search_pu_inter(encoder_state_t * const state, } // TODO: this probably should have a separate command line option - if (cfg->rdo == 3) { + if (cfg->rdo >= 3) { search_pu_inter_bipred(&info, depth, lcu, cur_cu, inter_cost, inter_bitcost); } } diff --git a/src/search_intra.c b/src/search_intra.c index a232d3d9..e6890d5a 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -969,7 +969,7 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, const int8_t modes_in_depth[5] = { 1, 1, 1, 1, 2 }; int num_modes = modes_in_depth[depth]; - if (state->encoder_control->cfg.rdo == 3) { + if (state->encoder_control->cfg.rdo >= 3) { num_modes = state->encoder_control->cfg.cclm ? 8 : 5; } @@ -1054,7 +1054,7 @@ void kvz_search_cu_intra(encoder_state_t * const state, kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; int8_t number_of_modes = 0; - bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 3); + bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4); if (!skip_rough_search) { number_of_modes = search_intra_rough(state, ref_pixels, LCU_WIDTH, @@ -1075,9 +1075,9 @@ void kvz_search_cu_intra(encoder_state_t * const state, const int32_t rdo_level = state->encoder_control->cfg.rdo; if (rdo_level >= 2 || skip_rough_search) { int number_of_modes_to_search; - if (rdo_level == 3) { + if (rdo_level == 4) { number_of_modes_to_search = 67; - } else if (rdo_level == 2) { + } else if (rdo_level == 2 || rdo_level == 3) { number_of_modes_to_search = (cu_width == 4) ? 3 : 2; } else { // Check only the predicted modes.