diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index c7c9985d..7a5cff52 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -122,7 +122,7 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, const int width, const int height, const int width_in_lcu, const int height_in_lcu) { const encoder_control_t * const encoder = state->encoder_control; - state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type); + state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type, encoder->cfg.cclm); state->tile->frame->rec = NULL; diff --git a/src/intra.c b/src/intra.c index 79079404..f786781c 100644 --- a/src/intra.c +++ b/src/intra.c @@ -259,7 +259,7 @@ enum lm_mode static void get_cclm_parameters( encoder_state_t const* const state, int8_t width, int8_t height, int8_t mode, - int x0, int y0, + int x0, int y0, int avai_above_right_units, int avai_left_below_units, kvz_intra_ref* luma_src, kvz_intra_references*chroma_ref, int16_t *a, int16_t*b, int16_t*shift) { @@ -287,8 +287,8 @@ static void get_cclm_parameters( //int total_units = total_left_units + total_above_units + 1; //int above_right_units = total_above_units - tu_width_in_units; //int left_below_units = total_left_units - tu_height_in_units; - int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs - int avai_left_below_units = 0; + //int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs + //int avai_left_below_units = 0; int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size); int avai_left_units = CLIP(0, tu_width_in_units, x0 / base_unit_size); @@ -465,7 +465,7 @@ void kvz_predict_cclm( const int16_t y0, const int16_t stride, const int8_t mode, - kvz_pixel const * y_rec, + lcu_t* const lcu, kvz_intra_references* chroma_ref, kvz_pixel* dst, cclm_parameters_t* cclm_params @@ -480,61 +480,92 @@ void kvz_predict_cclm( int x_scu = SUB_SCU(x0); int y_scu = SUB_SCU(y0); - y_rec += x_scu + y_scu * LCU_WIDTH; + + int available_above_right = 0; + int available_left_below = 0; + + + kvz_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH; // Essentially what this does is that it uses 6-tap filtering to downsample // the luma intra references down to match the resolution of the chroma channel. // The luma reference is only needed when we are not on the edge of the picture. // Because the reference pixels that are needed on the edge of the ctu this code // is kinda messy but what can you do + + if (y0) { + for (; available_above_right < width / 2; available_above_right++) { + int x_extension = x_scu + width * 2 + 4 * available_above_right; + cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4); + if (pu->type == CU_NOTSET || x_extension > LCU_WIDTH) break; + } + if(y_scu == 0) { + if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4); + for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { + bool left_padding = x0 || x; + sampled_luma_ref.top[x / 2] = (state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2 + + state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride] + + state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride] + + 2) >> 2; + } + } + else { + for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) { + bool left_padding = x0 || x; + int s = 4; + s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 2) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; + s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; + s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2; + s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; + s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; + sampled_luma_ref.top[x / 2] = s >> 3; + } + } + } + if(x0) { - for(int y = 0; y < height * 2; y+=2) { + for (; available_left_below < height / 2; available_left_below++) { + int y_extension = y_scu + height * 2 + 4 * available_left_below; + cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension); + if (pu->type == CU_NOTSET || y_extension > LCU_WIDTH) break; + if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break; + } + for(int y = 0; y < height * (available_left_below ? 4 : 2); y+=2) { int s = 4; - s += x_scu ? y_rec[y * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride] * 2; - s += x_scu ? y_rec[y * LCU_WIDTH - 2] : state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride]; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] * 2 : state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride] * 2; - s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2] : state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride]; - s += y_rec[y * LCU_WIDTH]; - s += y_rec[(y + 1) * LCU_WIDTH]; + s += x_scu ? y_rec[y * LCU_WIDTH - 2] * 2 : state->tile->frame->rec->y[x0 - 2 + (y0 + y) * stride] * 2; + s += x_scu ? y_rec[y * LCU_WIDTH - 1] : state->tile->frame->rec->y[x0 - 1 + (y0 + y) * stride]; + s += x_scu ? y_rec[y * LCU_WIDTH - 3] : state->tile->frame->rec->y[x0 - 3 + (y0 + y) * stride]; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 2] * 2 : state->tile->frame->rec->y[x0 - 2 + (y0 + y + 1) * stride] * 2; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 1] : state->tile->frame->rec->y[x0 - 1 + (y0 + y + 1) * stride]; + s += x_scu ? y_rec[(y + 1) * LCU_WIDTH - 3] : state->tile->frame->rec->y[x0 - 3 + (y0 + y + 1) * stride]; sampled_luma_ref.left[y/2] = s >> 3; } } - if(y0) { - for(int x = 0; x < width*2; x += 2) { - bool left_padding = x0 || x; - int s = 4; - s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 2) * stride] * 2; - s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x +(y0 - 1) * stride] * 2; - s += y_scu ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride]; - s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride]; - sampled_luma_ref.top[x / 2] = s >> 3; - } - } + // Downsample the reconstructed luma sample so that they can be mapped into the chroma // to generate the chroma prediction - for (int y = 0; y < height * 2; y+=2) { - for (int x = 0; x < width * 2; x+=2) { + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { int s = 4; s += y_rec[2 * x] * 2; s += y_rec[2 * x + 1]; // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, // *except* when we are also at the edge of the frame, in which case we want to duplicate // the edge pixel - s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + y0 * stride] : y_rec[2 * x - ((x + x0) > 0)]; + s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + y*2) * stride] : y_rec[2 * x - ((x + x0) > 0)]; s += y_rec[2 * x + LCU_WIDTH] * 2; s += y_rec[2 * x + 1 + LCU_WIDTH]; - s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + stride]; - sampled_luma[x / 2 + y / 2 * width] = s >> 3; + s += !x_scu && !x && x0 ? state->tile->frame->rec->y[x0 - 1 + (y0 + y * 2 + 1) * stride] : y_rec[2 * x - ((x + x0) > 0) + LCU_WIDTH]; + sampled_luma[x + y * width] = s >> 3; } - y_rec += LCU_WIDTH; + y_rec += LCU_WIDTH * 2; } int16_t a, b, shift; - get_cclm_parameters(state, width, height, mode,x0, y0, &sampled_luma_ref, chroma_ref, &a, &b, &shift); + get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift); cclm_params->shift = shift; cclm_params->a = a; cclm_params->b = b; @@ -889,7 +920,8 @@ static void intra_recon_tb_leaf( state->tile->frame->height, }; int x_scu = SUB_SCU(x); - const vector2d_t lcu_px = {x_scu >> shift, SUB_SCU(y) >> shift}; + int y_scu = SUB_SCU(y); + const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift }; kvz_intra_references refs; kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp); @@ -901,26 +933,27 @@ static void intra_recon_tb_leaf( kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary); } else { kvz_pixel *y_rec = lcu->rec.y; - for (int y_ = 0; y_ < width * 2; y_ += 2) { - for (int x_ = 0; x_ < width * 2; x_ += 2) { + y_rec += x_scu + y_scu * LCU_WIDTH; + for (int y_ = 0; y_ < width; y_++) { + for (int x_ = 0; x_ < width; x_++) { int s = 4; s += y_rec[2 * x_] * 2; s += y_rec[2 * x_ + 1]; // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, // *except* when we are also at the edge of the frame, in which case we want to duplicate // the edge pixel - s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + y * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; s += y_rec[2 * x_ + LCU_WIDTH] * 2; s += y_rec[2 * x_ + 1 + LCU_WIDTH]; - s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + stride]; - pred[x_ / 2 + y_ * width / 2] = s >> 3; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH]; + pred[x_ + y_ * width] = s >> 3; } - y_rec += LCU_WIDTH; + y_rec += LCU_WIDTH * 2; } if(cclm_params == NULL) { cclm_parameters_t temp_params; kvz_predict_cclm( - state, color, width, width, x, y, stride, intra_mode, lcu->rec.y, &refs, pred, &temp_params); + state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params); } else { linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width); @@ -996,10 +1029,10 @@ void kvz_intra_recon_cu( const int32_t x2 = x + offset; const int32_t y2 = y + offset; - kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); - kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); - kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); - kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, cclm_params, lcu); + kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); + kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); + kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); + kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. uint16_t child_cbfs[3] = { diff --git a/src/intra.h b/src/intra.h index eb737be7..846d77b2 100644 --- a/src/intra.h +++ b/src/intra.h @@ -137,7 +137,7 @@ void kvz_predict_cclm( const int16_t y0, const int16_t stride, const int8_t mode, - kvz_pixel const* y_rec, + lcu_t* const lcu, kvz_intra_references* chroma_ref, kvz_pixel* dst, cclm_parameters_t* cclm_params diff --git a/src/search.c b/src/search.c index 685c8428..5691d361 100644 --- a/src/search.c +++ b/src/search.c @@ -241,6 +241,33 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree, } +static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec) { + if (!state->encoder_control->cfg.cclm) return; + int x_scu = SUB_SCU(x); + int y_scu = SUB_SCU(y); + y_rec += x_scu + y_scu * LCU_WIDTH; + int stride = state->tile->frame->source->stride; + + for (int y_ = 0; y_ < height && y_ * 2 + y < state->encoder_control->cfg.height; y_++) { + for (int x_ = 0; x_ < width; x_++) { + int s = 4; + s += y_rec[2 * x_] * 2; + s += y_rec[2 * x_ + 1]; + // If we are at the edge of the CTU read the pixel from the frame reconstruct buffer, + // *except* when we are also at the edge of the frame, in which case we want to duplicate + // the edge pixel + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)]; + s += y_rec[2 * x_ + LCU_WIDTH] * 2; + s += y_rec[2 * x_ + 1 + LCU_WIDTH]; + s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH]; + int index = x / 2 + x_ + (y / 2 + y_ )* stride / 2; + state->tile->frame->cclm_luma_rec[index] = s >> 3; + } + y_rec += LCU_WIDTH * 2; + } +} + + /** * Calculate RD cost for a Coding Unit. * \return Cost of block @@ -711,6 +738,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, cur_cu->intra.mode, -1, // skip chroma NULL, NULL, lcu); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + ); + // TODO: This heavily relies to square CUs if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) { // There is almost no benefit to doing the chroma mode search for @@ -863,7 +894,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // gets used, at least in the most obvious cases, while avoiding any // searching. if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH - && x + cu_width <= frame->width && y + cu_width <= frame->height) + && x + cu_width <= frame->width && y + cu_width <= frame->height && 0) { cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local); @@ -913,6 +944,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // Copy this CU's mode all the way down for use in adjacent CUs mode // search. work_tree_copy_down(x_local, y_local, depth, work_tree); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + ); if (state->frame->slicetype != KVZ_SLICE_I) { // Reset HMVP to the beginning of this CU level search and add this CU as the mvp @@ -925,6 +959,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, // Need to copy modes down since the lower level of the work tree is used // when searching SMP and AMP blocks. work_tree_copy_down(x_local, y_local, depth, work_tree); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y + ); if (state->frame->slicetype != KVZ_SLICE_I) { // Reset HMVP to the beginning of this CU level search and add this CU as the mvp diff --git a/src/search_intra.c b/src/search_intra.c index e4afdcf1..f225c777 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -488,7 +488,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state, assert(state->encoder_control->cfg.cclm); kvz_predict_cclm( state, - COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu->rec.y, refs_u, pred, &cclm_params); + COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params); } kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); @@ -498,6 +498,12 @@ static void search_intra_chroma_rough(encoder_state_t * const state, //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); costs[i] += satd_func(pred, orig_block); } + for (int i = 5; i < 8; i++) { + assert(state->encoder_control->cfg.cclm); + kvz_predict_cclm( + state, + COLOR_V, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params); + } kvz_sort_modes(modes, costs, 5); } @@ -836,17 +842,22 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, kvz_intra_references refs[2]; - const vector2d_t luma_px = { x_px, y_px }; + const vector2d_t luma_px = { x_px & ~7, y_px & ~7 }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height, }; - kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); - kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); - cclm_parameters_t cclm_params[2] = {0}; if (reconstruct_chroma) { + + int c_width = MAX(32 >> (depth), 4); + + kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp); + kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp); + + cclm_parameters_t cclm_params[2] = { 0 }; + const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); @@ -864,7 +875,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) { chroma.mode = modes[chroma_mode_i]; if (chroma.mode == -1) continue; - if(chroma.mode < 67) { + if(chroma.mode < 67 || depth == 0) { kvz_intra_recon_cu(state, x_px, y_px, depth, @@ -872,18 +883,38 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, NULL, NULL, lcu); } else { + kvz_predict_cclm( - state, COLOR_U, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs[0], NULL, &cclm_params[0]); + state, COLOR_U, + c_width, c_width, + x_px & ~7, y_px & ~7, + state->tile->frame->source->stride, + chroma.mode, + lcu, + &refs[0], NULL, + &cclm_params[0]); + chroma.cclm[0] = cclm_params[0]; + kvz_predict_cclm( - state, COLOR_V, 32 >> (depth), 32 >> (depth), x_px, y_px, state->tile->frame->source->stride, chroma.mode, lcu->rec.y, &refs[1], NULL, &cclm_params[1]); + state, COLOR_V, + c_width, c_width, + x_px & ~7, y_px & ~7, + state->tile->frame->source->stride, + chroma.mode, + lcu, + &refs[1], NULL, + &cclm_params[1]); + chroma.cclm[1] = cclm_params[1]; - kvz_intra_recon_cu(state, + kvz_intra_recon_cu( + state, x_px, y_px, depth, -1, chroma.mode, // skip luma - NULL, cclm_params, lcu); + NULL, cclm_params, lcu + ); } chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); diff --git a/src/videoframe.c b/src/videoframe.c index 76ab1da7..77919a84 100644 --- a/src/videoframe.c +++ b/src/videoframe.c @@ -46,7 +46,7 @@ videoframe_t * kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, - enum kvz_alf alf_type) + enum kvz_alf alf_type, bool cclm) { videoframe_t *frame = calloc(1, sizeof(videoframe_t)); if (!frame) return 0; @@ -59,8 +59,12 @@ videoframe_t * kvz_videoframe_alloc(int32_t width, frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); if (chroma_format != KVZ_CSP_400) { frame->sao_chroma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); + if (cclm) { + assert(chroma_format == KVZ_CSP_420); + frame->cclm_luma_rec = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4); + } } - + return frame; } @@ -76,6 +80,9 @@ int kvz_videoframe_free(videoframe_t * const frame) kvz_image_free(frame->rec_lmcs); frame->source_lmcs_mapped = false; } + if(frame->cclm_luma_rec) { + FREE_POINTER(frame->cclm_luma_rec); + } kvz_image_free(frame->source); frame->source = NULL; diff --git a/src/videoframe.h b/src/videoframe.h index 3e8a6ed4..74963d85 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -53,6 +53,8 @@ typedef struct videoframe kvz_picture *rec; //!< \brief Reconstructed image. kvz_picture *rec_lmcs; //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source. + kvz_pixel *cclm_luma_rec; //!< \brief buffer for the downsampled luma reconstruction for cclm + uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available int32_t* lmcs_avg; //!< \brief Average of LCU border pixels @@ -78,7 +80,7 @@ typedef struct videoframe } videoframe_t; -videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type); +videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type, bool cclm); int kvz_videoframe_free(videoframe_t * const frame); void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc);