Merge branch 'intra-pred-rewrite'

This commit is contained in:
Ari Koivula 2015-10-05 06:26:53 +03:00
commit f055e4db74
4 changed files with 603 additions and 674 deletions

View file

@ -124,6 +124,7 @@ typedef int16_t coeff_t;
#define MAX(a,b) (((a)>(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b))
#define MIN(a,b) (((a)<(b))?(a):(b)) #define MIN(a,b) (((a)<(b))?(a):(b))
#define CLIP(low,high,value) MAX((low),MIN((high),(value))) #define CLIP(low,high,value) MAX((low),MIN((high),(value)))
#define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value))
#define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; }
#define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth)
#define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))

File diff suppressed because it is too large Load diff

View file

@ -27,29 +27,91 @@
#include "global.h" #include "global.h"
#include "image.h"
#include "encoder.h"
#include "encoderstate.h" #include "encoderstate.h"
//void kvz_intra_set_block_mode(image* im,uint32_t x_ctb, uint32_t y_ctb, uint8_t depth, uint8_t mode, uint8_t part_mode); typedef struct {
kvz_pixel left[2 * 32 + 1];
kvz_pixel top[2 * 32 + 1];
} kvz_intra_ref;
typedef struct
{
kvz_intra_ref ref;
kvz_intra_ref filtered_ref;
bool filtered_initialized;
} kvz_intra_references;
int8_t kvz_intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu);
void kvz_intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height );
void kvz_intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu); /**
void kvz_intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode); * \brief Function for deriving intra luma predictions
* \param pic picture to use
* \param x_cu x CU position (smallest CU)
* \param y_cu y CU position (smallest CU)
* \param preds output buffer for 3 predictions
* \returns (predictions are found)?1:0
*/
int8_t kvz_intra_get_dir_luma_predictor(
const uint32_t x,
const uint32_t y,
int8_t *preds,
const cu_info_t *const cur_cu,
const cu_info_t *const left_cu,
const cu_info_t *const above_cu);
/* Predictions */ /**
void kvz_intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma); * \brief Generage angular predictions.
* \param width Width in pixels, range 4..32.
* \param color What color pixels to use.
* \param luma_px Luma coordinates of the prediction block.
* \param pic_px Picture dimensions in luma pixels.
* \param lcu LCU struct.
* \param out_left_ref Left reference pixels, index 0 is the top-left.
* \param out_top_ref Top reference pixels, index 0 is the top-left.
*/
void kvz_intra_build_reference(
const int_fast8_t log2_width,
const color_t color,
const vector2d_t *const luma_px,
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs);
kvz_pixel kvz_intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width); /**
void kvz_intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride); * \brief Generate intra predictions.
void kvz_intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter); * \param refs Reference pixels used for the prediction.
* \param log2_width Width of the predicted block.
* \param mode Intra mode used for the prediction.
* \param color Color of the prediction.
* \param dst Buffer for the predicted pixels.
*/
void kvz_intra_predict(
kvz_intra_references *refs,
int_fast8_t log2_width,
int_fast8_t mode,
color_t color,
kvz_pixel *dst);
void kvz_intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); /**
* \brief Do a full intra prediction cycle on a CU in lcu for luma.
*/
void kvz_intra_recon_lcu_luma(
encoder_state_t *const state,
int x,
int y,
int depth,
int8_t intra_mode,
cu_info_t *cur_cu,
lcu_t *lcu);
void kvz_intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); /**
void kvz_intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); * \brief Do a full intra prediction cycle on a CU in lcu for chroma.
*/
void kvz_intra_recon_lcu_chroma(
encoder_state_t *const state,
int x,
int y,
int depth,
int8_t intra_mode,
cu_info_t *cur_cu,
lcu_t *lcu);
#endif #endif

View file

@ -271,14 +271,14 @@ static double search_intra_trdepth(encoder_state_t * const state,
static void search_intra_chroma_rough(encoder_state_t * const state, static void search_intra_chroma_rough(encoder_state_t * const state,
int x_px, int y_px, int depth, int x_px, int y_px, int depth,
const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride, const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride,
const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride, kvz_intra_references *refs_u, kvz_intra_references *refs_v,
int8_t luma_mode, int8_t luma_mode,
int8_t modes[5], double costs[5]) int8_t modes[5], double costs[5])
{ {
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4); assert(!(x_px & 4 || y_px & 4));
if (!reconstruct_chroma) return;
const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
costs[i] = 0; costs[i] = 0;
@ -287,16 +287,16 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width);
//cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width);
kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT];
kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
if (modes[i] == luma_mode) continue; if (modes[i] == luma_mode) continue;
kvz_intra_get_pred(state->encoder_control, rec_u, NULL, recstride, pred, width, modes[i], 1); kvz_intra_predict(refs_u, log2_width_c, modes[i], COLOR_U, pred);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
costs[i] += satd_func(pred, orig_block); costs[i] += satd_func(pred, orig_block);
} }
@ -304,7 +304,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
if (modes[i] == luma_mode) continue; if (modes[i] == luma_mode) continue;
kvz_intra_get_pred(state->encoder_control, rec_v, NULL, recstride, pred, width, modes[i], 2); kvz_intra_predict(refs_v, log2_width_c, modes[i], COLOR_V, pred);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
costs[i] += satd_func(pred, orig_block); costs[i] += satd_func(pred, orig_block);
} }
@ -343,41 +343,25 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
*/ */
static int8_t search_intra_rough(encoder_state_t * const state, static int8_t search_intra_rough(encoder_state_t * const state,
kvz_pixel *orig, int32_t origstride, kvz_pixel *orig, int32_t origstride,
kvz_pixel *rec, int16_t recstride, kvz_intra_references *refs,
int width, int8_t *intra_preds, int log2_width, int8_t *intra_preds,
int8_t modes[35], double costs[35]) int8_t modes[35], double costs[35])
{ {
assert(log2_width >= 2 && log2_width <= 5);
int_fast8_t width = 1 << log2_width;
cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width); cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width); cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
// Temporary block arrays // Temporary block arrays
kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT];
kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
assert(width == 4 || width == 8 || width == 16 || width == 32);
// Store original block for SAD computation // Store original block for SAD computation
kvz_pixels_blit(orig, orig_block, width, width, origstride, width); kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
// Generate filtered reference pixels.
{
int16_t x, y;
for (y = -1; y < recstride; y++) {
recf[y*recstride - 1] = rec[y*recstride - 1];
}
for (x = 0; x < recstride; x++) {
recf[x - recstride] = rec[x - recstride];
}
kvz_intra_filter(recf, recstride, width, 0);
}
int8_t modes_selected = 0; int8_t modes_selected = 0;
unsigned min_cost = UINT_MAX; unsigned min_cost = UINT_MAX;
unsigned max_cost = 0; unsigned max_cost = 0;
@ -387,18 +371,15 @@ static int8_t search_intra_rough(encoder_state_t * const state,
int offset; int offset;
if (state->encoder_control->full_intra_search) { if (state->encoder_control->full_intra_search) {
offset = 1; offset = 1;
} else if (width == 4) {
offset = 2;
} else if (width == 8) {
offset = 4;
} else { } else {
offset = 8; static const int8_t offsets[4] = { 2, 4, 8, 8 };
offset = offsets[log2_width - 2];
} }
// Calculate SAD for evenly spaced modes to select the starting point for // Calculate SAD for evenly spaced modes to select the starting point for
// the recursive search. // the recursive search.
for (int mode = 2; mode <= 34; mode += offset) { for (int mode = 2; mode <= 34; mode += offset) {
kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
modes[modes_selected] = mode; modes[modes_selected] = mode;
@ -421,7 +402,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
int8_t center_node = best_mode; int8_t center_node = best_mode;
int8_t mode = center_node - offset; int8_t mode = center_node - offset;
if (mode >= 2) { if (mode >= 2) {
kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
modes[modes_selected] = mode; modes[modes_selected] = mode;
if (costs[modes_selected] < best_cost) { if (costs[modes_selected] < best_cost) {
@ -433,7 +414,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
mode = center_node + offset; mode = center_node + offset;
if (mode <= 34) { if (mode <= 34) {
kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
modes[modes_selected] = mode; modes[modes_selected] = mode;
if (costs[modes_selected] < best_cost) { if (costs[modes_selected] < best_cost) {
@ -460,7 +441,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
} }
if (!has_mode) { if (!has_mode) {
kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, mode, 0); kvz_intra_predict(refs, log2_width, mode, COLOR_Y, pred);
costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width); costs[modes_selected] = get_cost(state, pred, orig_block, satd_func, sad_func, width);
modes[modes_selected] = mode; modes[modes_selected] = mode;
++modes_selected; ++modes_selected;
@ -507,7 +488,6 @@ static int8_t search_intra_rough(encoder_state_t * const state,
static int8_t search_intra_rdo(encoder_state_t * const state, static int8_t search_intra_rdo(encoder_state_t * const state,
int x_px, int y_px, int depth, int x_px, int y_px, int depth,
kvz_pixel *orig, int32_t origstride, kvz_pixel *orig, int32_t origstride,
kvz_pixel *rec, int16_t recstride,
int8_t *intra_preds, int8_t *intra_preds,
int modes_to_check, int modes_to_check,
int8_t modes[35], double costs[35], int8_t modes[35], double costs[35],
@ -517,31 +497,14 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
const int width = LCU_WIDTH >> depth; const int width = LCU_WIDTH >> depth;
kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1];
int rdo_mode;
int pred_mode;
kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
// Generate filtered reference pixels.
{
int x, y;
for (y = -1; y < recstride; y++) {
recf[y*recstride - 1] = rec[y*recstride - 1];
}
for (x = 0; x < recstride; x++) {
recf[x - recstride] = rec[x - recstride];
}
kvz_intra_filter(recf, recstride, width, 0);
}
kvz_pixels_blit(orig, orig_block, width, width, origstride, width); kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
// Check that the predicted modes are in the RDO mode list // Check that the predicted modes are in the RDO mode list
if (modes_to_check < 35) { if (modes_to_check < 35) {
for (pred_mode = 0; pred_mode < 3; pred_mode++) { for (int pred_mode = 0; pred_mode < 3; pred_mode++) {
int mode_found = 0; int mode_found = 0;
for (rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) { for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
if (intra_preds[pred_mode] == modes[rdo_mode]) { if (intra_preds[pred_mode] == modes[rdo_mode]) {
mode_found = 1; mode_found = 1;
break; break;
@ -555,42 +518,27 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
} }
} }
for(rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) { for(int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode ++) {
int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds); int rdo_bitcost = kvz_luma_mode_bits(state, modes[rdo_mode], intra_preds);
costs[rdo_mode] = rdo_bitcost * (int)(state->global->cur_lambda_cost + 0.5); costs[rdo_mode] = rdo_bitcost * (int)(state->global->cur_lambda_cost + 0.5);
#if 0
if (width != 4 && tr_depth == depth) {
// This code path has been disabled for now because it increases bdrate
// by 1-2 %. Possibly due to not taking chroma into account during luma
// mode search. Enabling separate chroma search compensates a little,
// but not enough.
// The idea for this code path is, that it would do the same thing as // Perform transform split search and save mode RD cost for the best one.
// the more general search_intra_trdepth, but would only handle cases cu_info_t pred_cu;
// where transform split or transform skip don't need to be handled. pred_cu.depth = depth;
kvz_intra_get_pred(state->encoder_control, rec, recf, recstride, pred, width, modes[rdo_mode], 0); pred_cu.type = CU_INTRA;
costs[rdo_mode] += kvz_rdo_cost_intra(state, pred, orig_block, width, modes[rdo_mode], width == 4 ? 1 : 0); pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
} else pred_cu.intra[0].mode = modes[rdo_mode];
#endif pred_cu.intra[1].mode = modes[rdo_mode];
{ pred_cu.intra[2].mode = modes[rdo_mode];
// Perform transform split search and save mode RD cost for the best one. pred_cu.intra[3].mode = modes[rdo_mode];
cu_info_t pred_cu; pred_cu.intra[0].mode_chroma = modes[rdo_mode];
pred_cu.depth = depth; FILL(pred_cu.cbf, 0);
pred_cu.type = CU_INTRA;
pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
pred_cu.intra[0].mode = modes[rdo_mode];
pred_cu.intra[1].mode = modes[rdo_mode];
pred_cu.intra[2].mode = modes[rdo_mode];
pred_cu.intra[3].mode = modes[rdo_mode];
pred_cu.intra[0].mode_chroma = modes[rdo_mode];
FILL(pred_cu.cbf, 0);
// Reset transform split data in lcu.cu for this area. // Reset transform split data in lcu.cu for this area.
kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth); kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth);
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu); double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu);
costs[rdo_mode] += mode_cost; costs[rdo_mode] += mode_cost;
}
} }
// The best transform split hierarchy is not saved anywhere, so to get the // The best transform split hierarchy is not saved anywhere, so to get the
@ -697,7 +645,6 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const int x_px, const int y_px, const int x_px, const int y_px,
const int depth, lcu_t *lcu) const int depth, lcu_t *lcu)
{ {
const videoframe_t * const frame = state->tile->frame;
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH; const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
@ -726,23 +673,15 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
// FIXME: It might make more sense to only disable rough search if // FIXME: It might make more sense to only disable rough search if
// num_modes is 0.is 0. // num_modes is 0.is 0.
if (num_modes != 1 && num_modes != 5) { if (num_modes != 1 && num_modes != 5) {
kvz_pixel rec_u[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
kvz_pixel rec_v[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
const vector2d_t luma_px = { x_px, y_px };
const int16_t width_c = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); kvz_intra_references refs_u;
const int16_t rec_stride = width_c * 2 + 8; kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u);
const int16_t out_stride = rec_stride;
kvz_intra_build_reference_border(state->encoder_control, kvz_intra_references refs_v;
x_px, y_px, out_stride, kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v);
rec_u, rec_stride, COLOR_U,
frame->width / 2, frame->height / 2,
lcu);
kvz_intra_build_reference_border(state->encoder_control,
x_px, y_px, out_stride,
rec_v, rec_stride, COLOR_V,
frame->width / 2, frame->height / 2,
lcu);
vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 }; vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 };
kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
@ -750,7 +689,7 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
search_intra_chroma_rough(state, x_px, y_px, depth, search_intra_chroma_rough(state, x_px, y_px, depth,
ref_u, ref_v, LCU_WIDTH_C, ref_u, ref_v, LCU_WIDTH_C,
&rec_u[rec_stride + 1], &rec_v[rec_stride + 1], rec_stride, &refs_u, &refs_v,
intra_mode, modes, costs); intra_mode, modes, costs);
} }
@ -771,16 +710,15 @@ double kvz_search_cu_intra(encoder_state_t * const state,
const int x_px, const int y_px, const int x_px, const int y_px,
const int depth, lcu_t *lcu) const int depth, lcu_t *lcu)
{ {
const videoframe_t * const frame = state->tile->frame;
const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f }; const vector2d_t lcu_px = { x_px & 0x3f, y_px & 0x3f };
const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const vector2d_t lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
const int8_t cu_width = (LCU_WIDTH >> (depth)); const int8_t cu_width = (LCU_WIDTH >> (depth));
const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH; const int cu_index = LCU_CU_OFFSET + lcu_cu.x + lcu_cu.y * LCU_T_CU_WIDTH;
const int_fast8_t log2_width = LOG2_LCU_WIDTH - depth;
cu_info_t *cur_cu = &lcu->cu[cu_index]; cu_info_t *cur_cu = &lcu->cu[cu_index];
kvz_pixel rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)]; kvz_intra_references refs;
kvz_pixel *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1];
int8_t candidate_modes[3]; int8_t candidate_modes[3];
@ -798,12 +736,9 @@ double kvz_search_cu_intra(encoder_state_t * const state,
kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); kvz_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
if (depth > 0) { if (depth > 0) {
// Build reconstructed block to use in prediction with extrapolated borders const vector2d_t luma_px = { x_px, y_px };
kvz_intra_build_reference_border(state->encoder_control, x_px, y_px, cu_width * 2 + 8, const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
rec_buffer, cu_width * 2 + 8, 0, kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs);
frame->width,
frame->height,
lcu);
} }
int8_t modes[35]; int8_t modes[35];
@ -817,10 +752,10 @@ double kvz_search_cu_intra(encoder_state_t * const state,
bool skip_rough_search = (depth == 0 || state->encoder_control->rdo >= 3); bool skip_rough_search = (depth == 0 || state->encoder_control->rdo >= 3);
if (!skip_rough_search) { if (!skip_rough_search) {
number_of_modes = search_intra_rough(state, number_of_modes = search_intra_rough(state,
ref_pixels, LCU_WIDTH, ref_pixels, LCU_WIDTH,
cu_in_rec_buffer, cu_width * 2 + 8, &refs,
cu_width, candidate_modes, log2_width, candidate_modes,
modes, costs); modes, costs);
} else { } else {
number_of_modes = 35; number_of_modes = 35;
for (int i = 0; i < number_of_modes; ++i) { for (int i = 0; i < number_of_modes; ++i) {
@ -849,7 +784,6 @@ double kvz_search_cu_intra(encoder_state_t * const state,
number_of_modes = search_intra_rdo(state, number_of_modes = search_intra_rdo(state,
x_px, y_px, depth, x_px, y_px, depth,
ref_pixels, LCU_WIDTH, ref_pixels, LCU_WIDTH,
cu_in_rec_buffer, cu_width * 2 + 8,
candidate_modes, candidate_modes,
num_modes_to_check, num_modes_to_check,
modes, costs, lcu); modes, costs, lcu);