Huge refactoring

Split some parts of encoder_control into encoder_state
(idea: encoder_control is immutable)

Goal is to allow multiple substreams in the future.
This commit is contained in:
Laurent Fasnacht 2014-04-17 14:42:20 +02:00
parent 88a67a4e49
commit 5fea5875a5
17 changed files with 588 additions and 537 deletions

View file

@ -60,7 +60,9 @@ int main(int argc, char *argv[])
config *cfg = NULL; //!< Global configuration
FILE *input = NULL; //!< input file (YUV)
FILE *output = NULL; //!< output file (HEVC NAL stream)
encoder_control *encoder = NULL; //!< Encoder control struct
encoder_control encoder;
encoder_state encoder_state;
picture *cur_pic;
double psnr[3] = { 0.0, 0.0, 0.0 };
uint64_t curpos = 0;
uint64_t lastpos = 0;
@ -207,67 +209,78 @@ int main(int argc, char *argv[])
}
}
encoder = init_encoder_control(cfg);
if (!encoder)
//Allocate and init exp golomb table
if (!init_exp_golomb(4096*8)) {
fprintf(stderr, "Failed to allocate the exp golomb code table, shutting down!\n");
goto exit_failure;
}
if (!encoder_control_init(&encoder, cfg)) {
goto exit_failure;
}
// Set output file
encoder.out.file = output;
encoder->output = output;
encoder->stream.file.output = output;
// input init (TODO: read from commandline / config)
encoder->bitdepth = 8;
encoder->frame = 0;
encoder->QP = (int8_t)encoder->cfg->qp;
encoder->in.video_format = FORMAT_420;
// deblocking filter
encoder->deblock_enable = (int8_t)encoder->cfg->deblock_enable;
encoder->beta_offset_div2 = (int8_t)encoder->cfg->deblock_beta;
encoder->tc_offset_div2 = (int8_t)encoder->cfg->deblock_tc;
// SAO
encoder->sao_enable = (int8_t)encoder->cfg->sao_enable;
// RDO
encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable;
encoder->rdo = (int8_t)encoder->cfg->rdo;
// TR SKIP
encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable;
// VUI
encoder->vui.sar_width = (int16_t)encoder->cfg->vui.sar_width;
encoder->vui.sar_height = (int16_t)encoder->cfg->vui.sar_height;
encoder->vui.overscan = encoder->cfg->vui.overscan;
encoder->vui.videoformat = encoder->cfg->vui.videoformat;
encoder->vui.fullrange = encoder->cfg->vui.fullrange;
encoder->vui.colorprim = encoder->cfg->vui.colorprim;
encoder->vui.transfer = encoder->cfg->vui.transfer;
encoder->vui.colormatrix = encoder->cfg->vui.colormatrix;
encoder->vui.chroma_loc = (int8_t)encoder->cfg->vui.chroma_loc;
// AUD
encoder->aud_enable = (int8_t)encoder->cfg->aud_enable;
encoder.bitdepth = 8;
encoder.in.video_format = FORMAT_420;
init_encoder_input(&encoder->in, input, cfg->width, cfg->height);
// deblocking filter
encoder.deblock_enable = (int8_t)encoder.cfg->deblock_enable;
encoder.beta_offset_div2 = (int8_t)encoder.cfg->deblock_beta;
encoder.tc_offset_div2 = (int8_t)encoder.cfg->deblock_tc;
// SAO
encoder.sao_enable = (int8_t)encoder.cfg->sao_enable;
// RDO
encoder.rdoq_enable = (int8_t)encoder.cfg->rdoq_enable;
encoder.rdo = (int8_t)encoder.cfg->rdo;
// TR SKIP
encoder.trskip_enable = (int8_t)encoder.cfg->trskip_enable;
// VUI
encoder.vui.sar_width = (int16_t)encoder.cfg->vui.sar_width;
encoder.vui.sar_height = (int16_t)encoder.cfg->vui.sar_height;
encoder.vui.overscan = encoder.cfg->vui.overscan;
encoder.vui.videoformat = encoder.cfg->vui.videoformat;
encoder.vui.fullrange = encoder.cfg->vui.fullrange;
encoder.vui.colorprim = encoder.cfg->vui.colorprim;
encoder.vui.transfer = encoder.cfg->vui.transfer;
encoder.vui.colormatrix = encoder.cfg->vui.colormatrix;
encoder.vui.chroma_loc = (int8_t)encoder.cfg->vui.chroma_loc;
// AUD
encoder.aud_enable = (int8_t)encoder.cfg->aud_enable;
encoder_control_input_init(&encoder, input, cfg->width, cfg->height);
fprintf(stderr, "Input: %s, output: %s\n", cfg->input, cfg->output);
fprintf(stderr, " Video size: %dx%d (input=%dx%d)\n",
encoder->in.cur_pic->width, encoder->in.cur_pic->height,
encoder->in.real_width, encoder->in.real_height);
encoder.in.width, encoder.in.height,
encoder.in.real_width, encoder.in.real_height);
if (!encoder_state_init(&encoder_state, &encoder)) {
goto exit_failure;
}
encoder_state.frame = 0;
encoder_state.QP = (int8_t)encoder.cfg->qp;
// Only the code that handles conformance window coding needs to know
// the real dimensions. As a quick fix for broken non-multiple of 8 videos,
// change the input values here to be the real values. For a real fix
// encoder.in probably needs to be merged into cfg.
// The real fix would be: never go dig in cfg
//cfg->width = encoder->in.width;
//cfg->height = encoder->in.height;
//cfg->width = encoder.in.width;
//cfg->height = encoder.in.height;
// Start coding cycle while data on input and not on the last frame
while(!cfg->frames || encoder->frame < cfg->frames) {
while(!cfg->frames || encoder_state.frame < cfg->frames) {
int32_t diff;
double temp_psnr[3];
// Skip '--seek' frames before input.
// This block can be moved outside this while loop when there is a
// mechanism to skip the while loop on error.
if (encoder->frame == 0 && cfg->seek > 0) {
if (encoder_state.frame == 0 && cfg->seek > 0) {
int frame_bytes = cfg->width * cfg->height * 3 / 2;
int error = 0;
@ -275,7 +288,7 @@ int main(int argc, char *argv[])
// Input is stdin.
int i;
for (i = 0; !error && i < cfg->seek; ++i) {
error = !read_one_frame(input, encoder);
error = !read_one_frame(input, &encoder_state);
}
} else {
// input is a file. We hope. Proper detection is OS dependent.
@ -288,25 +301,27 @@ int main(int argc, char *argv[])
}
// Read one frame from the input
if (!read_one_frame(input, encoder)) {
if (!read_one_frame(input, &encoder_state)) {
if (!feof(input))
fprintf(stderr, "Failed to read a frame %d\n", encoder->frame);
fprintf(stderr, "Failed to read a frame %d\n", encoder_state.frame);
break;
}
// The actual coding happens here, after this function we have a coded frame
encode_one_frame(encoder);
encode_one_frame(&encoder_state);
cur_pic = encoder_state.cur_pic;
if (cfg->debug != NULL) {
// Write reconstructed frame out.
// Use conformance-window dimensions instead of internal ones.
const int width = encoder->in.cur_pic->width;
const int out_width = encoder->in.real_width;
const int out_height = encoder->in.real_height;
const int width = cur_pic->width;
const int out_width = encoder.in.real_width;
const int out_height = encoder.in.real_height;
int y;
const pixel *y_rec = encoder->in.cur_pic->y_recdata;
const pixel *u_rec = encoder->in.cur_pic->u_recdata;
const pixel *v_rec = encoder->in.cur_pic->v_recdata;
const pixel *y_rec = cur_pic->y_recdata;
const pixel *u_rec = cur_pic->u_recdata;
const pixel *v_rec = cur_pic->v_recdata;
for (y = 0; y < out_height; ++y) {
fwrite(&y_rec[y * width], sizeof(*y_rec), out_width, recout);
@ -325,12 +340,12 @@ int main(int argc, char *argv[])
lastpos = curpos;
// PSNR calculations
temp_psnr[0] = image_psnr(encoder->in.cur_pic->y_data, encoder->in.cur_pic->y_recdata, cfg->width, cfg->height);
temp_psnr[1] = image_psnr(encoder->in.cur_pic->u_data, encoder->in.cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
temp_psnr[2] = image_psnr(encoder->in.cur_pic->v_data, encoder->in.cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
temp_psnr[0] = image_psnr(cur_pic->y_data, cur_pic->y_recdata, cfg->width, cfg->height);
temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder->frame,
"BPI"[encoder->in.cur_pic->slicetype%3], diff<<3,
fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame,
"BPI"[cur_pic->slicetype%3], diff<<3,
temp_psnr[0], temp_psnr[1], temp_psnr[2]);
// Increment total PSNR
@ -342,33 +357,33 @@ int main(int argc, char *argv[])
// TODO: add more than one reference
// Remove the ref pic (if present)
if (encoder->ref->used_size == (uint32_t)encoder->cfg->ref_frames) {
picture_list_rem(encoder->ref, encoder->ref->used_size-1, 1);
if (encoder_state.ref->used_size == (uint32_t)encoder.cfg->ref_frames) {
picture_list_rem(encoder_state.ref, encoder_state.ref->used_size-1, 1);
}
// Add current picture as reference
picture_list_add(encoder->ref, encoder->in.cur_pic);
picture_list_add(encoder_state.ref, cur_pic);
// Allocate new memory to current picture
// TODO: reuse memory from old reference
encoder->in.cur_pic = picture_init(encoder->in.cur_pic->width, encoder->in.cur_pic->height, encoder->in.cur_pic->width_in_lcu, encoder->in.cur_pic->height_in_lcu);
encoder_state.cur_pic = picture_init(encoder_state.cur_pic->width, encoder_state.cur_pic->height, encoder_state.cur_pic->width_in_lcu, encoder_state.cur_pic->height_in_lcu);
// Copy pointer from the last cur_pic because we don't want to reallocate it
MOVE_POINTER(encoder->in.cur_pic->coeff_y,encoder->ref->pics[0]->coeff_y);
MOVE_POINTER(encoder->in.cur_pic->coeff_u,encoder->ref->pics[0]->coeff_u);
MOVE_POINTER(encoder->in.cur_pic->coeff_v,encoder->ref->pics[0]->coeff_v);
MOVE_POINTER(encoder_state.cur_pic->coeff_y,encoder_state.ref->pics[0]->coeff_y);
MOVE_POINTER(encoder_state.cur_pic->coeff_u,encoder_state.ref->pics[0]->coeff_u);
MOVE_POINTER(encoder_state.cur_pic->coeff_v,encoder_state.ref->pics[0]->coeff_v);
MOVE_POINTER(encoder->in.cur_pic->pred_y,encoder->ref->pics[0]->pred_y);
MOVE_POINTER(encoder->in.cur_pic->pred_u,encoder->ref->pics[0]->pred_u);
MOVE_POINTER(encoder->in.cur_pic->pred_v,encoder->ref->pics[0]->pred_v);
MOVE_POINTER(encoder_state.cur_pic->pred_y,encoder_state.ref->pics[0]->pred_y);
MOVE_POINTER(encoder_state.cur_pic->pred_u,encoder_state.ref->pics[0]->pred_u);
MOVE_POINTER(encoder_state.cur_pic->pred_v,encoder_state.ref->pics[0]->pred_v);
encoder->frame++;
encoder->poc++;
encoder_state.frame++;
encoder_state.poc++;
}
// Coding finished
fgetpos(output,(fpos_t*)&curpos);
// Print statistics of the coding
fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, (long long unsigned int) curpos<<3,
psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame);
fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, (long long unsigned int) curpos<<3,
psnr[0] / encoder_state.frame, psnr[1] / encoder_state.frame, psnr[2] / encoder_state.frame);
fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC);
fclose(input);
@ -377,12 +392,9 @@ int main(int argc, char *argv[])
// Deallocating
config_destroy(cfg);
scalinglist_destroy(&encoder->scaling_list);
picture_list_destroy(encoder->ref);
picture_destroy(encoder->in.cur_pic);
FREE_POINTER(encoder->in.cur_pic);
bitstream_finalize(&encoder->stream);
free(encoder);
encoder_state_finalize(&encoder_state);
encoder_control_finalize(&encoder);
free_exp_golomb();
return EXIT_SUCCESS;

File diff suppressed because it is too large Load diff

View file

@ -44,33 +44,33 @@ typedef struct
enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 };
/* Input info struct */
typedef struct
{
FILE *file;
int32_t real_width; /*!< \brief real input picture width */
int32_t real_height; /*!< \brief real input picture width */
picture *cur_pic;
int8_t video_format;
int8_t bitdepth; /*!< \brief input bit depth (8,10) */
} encoder_input;
/* Encoder control options, the main struct */
typedef struct
{
int32_t frame;
int32_t poc; /*!< \brief picture order count */
/* Configuration */
const config *cfg;
encoder_input in;
/* Input */
struct {
FILE *file;
int32_t width;
int32_t height;
int32_t width_in_lcu;
int32_t height_in_lcu;
int32_t real_width; /*!< \brief real input picture width */
int32_t real_height; /*!< \brief real input picture width */
int8_t video_format;
int8_t bitdepth; /*!< \brief input bit depth (8,10) */
} in;
/* Output */
struct {
FILE *file;
} out;
encoder_me me;
bitstream stream;
FILE *output;
picture_list *ref;
int8_t ref_list;
int8_t ref_idx_num[2];
int8_t QP; // \brief Quantization parameter
int8_t bitdepth;
double cur_lambda_cost;
/* Filtering */
int8_t deblock_enable; // \brief Flag to enable deblocking filter
@ -101,20 +101,48 @@ typedef struct
scaling_list scaling_list;
} encoder_control;
void init_lambda(encoder_control *encoder);
encoder_control *init_encoder_control(config *cfg);
void init_encoder_input(encoder_input *input, FILE* inputfile,
int32_t width, int32_t height);
void encode_one_frame(encoder_control *encoder);
int read_one_frame(FILE *file, const encoder_control * const encoder);
typedef struct encoder_state {
const encoder_control *encoder_control;
void encode_seq_parameter_set(encoder_control * const encoder);
void encode_pic_parameter_set(encoder_control * const encoder);
void encode_vid_parameter_set(encoder_control * const encoder);
void encode_slice_header(encoder_control * const encoder);
void encode_access_unit_delimiter(encoder_control * const encoder);
void encode_prefix_sei_version(encoder_control * const encoder);
void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac, uint16_t x_ctb,
picture *cur_pic;
int32_t frame;
int32_t poc; /*!< \brief picture order count */
bitstream stream;
picture_list *ref;
int8_t ref_list;
int8_t ref_idx_num[2];
int8_t QP; // \brief Quantization parameter
double cur_lambda_cost;
cabac_data cabac;
struct encoder_state *children;
} encoder_state;
int encoder_control_init(encoder_control *encoder, const config *cfg);
int encoder_control_finalize(encoder_control *encoder);
void encoder_control_input_init(encoder_control *encoder, FILE *inputfile, int32_t width, int32_t height);
int encoder_state_init(encoder_state *encoder_state, const encoder_control * encoder);
int encoder_state_finalize(encoder_state *encoder_state);
void encoder_state_init_lambda(encoder_state *encoder_state);
void init_encoder_input(encoder_control *encoder, FILE* inputfile,
int32_t width, int32_t height);
void encode_one_frame(encoder_state *encoder_state);
int read_one_frame(FILE* file, const encoder_state *encoder);
void encode_seq_parameter_set(encoder_state *encoder);
void encode_pic_parameter_set(encoder_state *encoder);
void encode_vid_parameter_set(encoder_state *encoder);
void encode_slice_header(encoder_state * encoder);
void encode_access_unit_delimiter(encoder_state *encoder);
void encode_prefix_sei_version(encoder_state *encoder);
void encode_coding_tree(encoder_state *encoder, cabac_data *cabac, uint16_t x_ctb,
uint16_t y_ctb, uint8_t depth);
void encode_last_significant_xy(cabac_data *cabac,
@ -123,8 +151,8 @@ void encode_last_significant_xy(cabac_data *cabac,
uint8_t type, uint8_t scan);
void encode_coeff_nxn(const encoder_control * const encoder, cabac_data *cabac, int16_t *coeff, uint8_t width,
uint8_t type, int8_t scan_mode, int8_t tr_skip);
void encode_transform_tree(const encoder_control * const encoder, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu );
void encode_transform_coeff(const encoder_control * const encoder, cabac_data *cabac, int32_t x_cu, int32_t y_cu,
void encode_transform_tree(encoder_state *encoder_state, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu );
void encode_transform_coeff(encoder_state *encoder_state, cabac_data *cabac, int32_t x_cu, int32_t y_cu,
int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v);
void encode_block_residual(const encoder_control * const encoder,
uint16_t x_ctb, uint16_t y_ctb, uint8_t depth);

View file

@ -163,11 +163,13 @@ INLINE void filter_deblock_chroma(const encoder_control * const encoder, pixel *
/**
* \brief
*/
void filter_deblock_edge_luma(const encoder_control * const encoder,
void filter_deblock_edge_luma(encoder_state * const encoder_state,
int32_t xpos, int32_t ypos,
int8_t depth, int8_t dir)
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
const encoder_control * const encoder = encoder_state->encoder_control;
cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)];
{
@ -192,7 +194,7 @@ void filter_deblock_edge_luma(const encoder_control * const encoder,
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
int8_t strength = 0;
int32_t qp = encoder->QP;
int32_t qp = encoder_state->QP;
int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8);
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
int32_t beta = g_beta_table_8x8[b_index] * bitdepth_scale;
@ -288,11 +290,12 @@ void filter_deblock_edge_luma(const encoder_control * const encoder,
/**
* \brief
*/
void filter_deblock_edge_chroma(const encoder_control * const encoder,
void filter_deblock_edge_chroma(encoder_state * const encoder_state,
int32_t x, int32_t y,
int8_t depth, int8_t dir)
{
const picture * const cur_pic = encoder->in.cur_pic;
const encoder_control * const encoder = encoder_state->encoder_control;
const picture * const cur_pic = encoder_state->cur_pic;
cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)];
// Chroma edges that do not lay on a 8x8 grid are not deblocked.
@ -324,7 +327,7 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder,
int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
int8_t strength = 2;
int32_t QP = g_chroma_scale[encoder->QP];
int32_t QP = g_chroma_scale[encoder_state->QP];
int32_t bitdepth_scale = 1 << (encoder->bitdepth-8);
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
@ -384,9 +387,9 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder,
* until the coded block size has been achived. Calls luma and chroma filtering
* functions for each coded CU size.
*/
void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int32_t edge)
void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge)
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x + y*(cur_pic->width_in_lcu << MAX_DEPTH)];
uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
uint8_t border_x = (cur_pic->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
@ -404,15 +407,15 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
// Tell clang-analyzer that everything is ok.
assert(depth >= 0 && depth < MAX_DEPTH);
filter_deblock_cu(encoder, x, y, depth + 1, edge);
filter_deblock_cu(encoder_state, x, y, depth + 1, edge);
if(!border_x || border_split_x) {
filter_deblock_cu(encoder, x + change, y, depth + 1, edge);
filter_deblock_cu(encoder_state, x + change, y, depth + 1, edge);
}
if(!border_y || border_split_y) {
filter_deblock_cu(encoder, x , y + change, depth + 1, edge);
filter_deblock_cu(encoder_state, x , y + change, depth + 1, edge);
}
if((!border_x && !border_y) || (border_split_x && border_split_y)) {
filter_deblock_cu(encoder, x + change, y + change, depth + 1, edge);
filter_deblock_cu(encoder_state, x + change, y + change, depth + 1, edge);
}
return;
}
@ -421,8 +424,8 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return;
// do the filtering for block edge
filter_deblock_edge_luma(encoder, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge);
filter_deblock_edge_chroma(encoder, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
filter_deblock_edge_luma(encoder_state, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge);
filter_deblock_edge_chroma(encoder_state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
}
/**
@ -433,9 +436,9 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
* the Largest Coding Units (LCU) and call filter_deblock_cu with absolute
* X and Y coordinates of the LCU.
*/
void filter_deblock(const encoder_control * const encoder)
void filter_deblock(encoder_state * const encoder_state)
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
int16_t x, y;
// TODO: Optimization: add thread for each LCU
@ -444,7 +447,7 @@ void filter_deblock(const encoder_control * const encoder)
{
for (x = 0; x < cur_pic->width_in_lcu; x++)
{
filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER);
filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER);
}
}
@ -453,7 +456,7 @@ void filter_deblock(const encoder_control * const encoder)
{
for (x = 0; x < cur_pic->width_in_lcu; x++)
{
filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR);
filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR);
}
}
}
@ -469,11 +472,11 @@ void filter_deblock(const encoder_control * const encoder)
* - After vertical filtering the left edge, filter the last 4 pixels of
* horizontal edges in the LCU to the left.
*/
void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px)
void filter_deblock_lcu(encoder_state * const encoder_state, int x_px, int y_px)
{
const vector2d lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH };
filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
// Filter rightmost 4 pixels from last LCU now that they have been
// finally deblocked vertically.
@ -481,15 +484,15 @@ void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_p
int y;
for (y = 0; y < 64; y += 8) {
if (lcu.y + y == 0) continue;
filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
filter_deblock_edge_luma(encoder_state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
}
for (y = 0; y < 32; y += 8) {
if (lcu.y + y == 0) continue;
filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
filter_deblock_edge_chroma(encoder_state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
}
}
filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
}

View file

@ -32,16 +32,16 @@
//////////////////////////////////////////////////////////////////////////
// FUNCTIONS
// Deblocking
void filter_deblock_cu(const encoder_control * const encoder, int32_t x_px, int32_t y_px,
void filter_deblock_cu(encoder_state *encoder_state, int32_t x_px, int32_t y_px,
int8_t depth, int32_t edge);
void filter_deblock_edge_luma(const encoder_control * const encoder,
void filter_deblock_edge_luma(encoder_state *encoder_state,
int32_t x_pos, int32_t y_pos,
int8_t depth, int8_t dir);
void filter_deblock_edge_chroma(const encoder_control * const encoder,
void filter_deblock_edge_chroma(encoder_state *encoder_state,
int32_t xpos, int32_t ypos,
int8_t depth, int8_t dir);
void filter_deblock(const encoder_control * const encoder);
void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px);
void filter_deblock(encoder_state *encoder_state);
void filter_deblock_lcu(encoder_state *encoder_state, int x_px, int y_px);
void filter_deblock_luma(const encoder_control * const encoder, pixel *src, int32_t offset, int32_t tc , int8_t sw,
int8_t part_p_nofilter, int8_t part_q_nofilter,
int32_t thr_cut,

View file

@ -322,7 +322,7 @@ void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_i
* \param depth current block depth
* \param mv_pred[2][2] 2x motion vector prediction
*/
void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu)
void inter_get_mv_cand(const encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu)
{
uint8_t candidates = 0;
uint8_t b_candidates = 0;
@ -332,8 +332,8 @@ void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t
inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu);
#define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6)
#define APPLY_MV_SCALING(cu, cand) {int td = encoder->poc - encoder->ref->pics[(cu)->inter.mv_ref]->poc;\
int tb = encoder->poc - encoder->ref->pics[cur_cu->inter.mv_ref]->poc;\
#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->poc - encoder_state->ref->pics[(cu)->inter.mv_ref]->poc;\
int tb = encoder_state->poc - encoder_state->ref->pics[cur_cu->inter.mv_ref]->poc;\
if (td != tb) { \
int scale = CALCULATE_SCALE(cu,tb,td); \
mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \

View file

@ -35,6 +35,6 @@ void inter_recon_lcu(const encoder_control *encoder, picture* ref,int32_t xpos,
void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_info **b0, cu_info **b1,
cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
void inter_get_mv_cand(const encoder_state *encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
uint8_t inter_get_merge_cand(int32_t x, int32_t y, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][3], lcu_t *lcu);
#endif

View file

@ -327,7 +327,7 @@ static void intra_get_pred(const encoder_control * const encoder, pixel *rec[2],
* \param sad_out sad value of best mode
* \returns best intra mode
*/
int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
int16_t intra_prediction(const encoder_state * const encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
uint8_t width, uint32_t *sad_out,
int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac)
{
@ -336,6 +336,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
int16_t best_mode = 1;
uint32_t best_bitcost = 0;
int16_t mode;
int8_t rdo = encoder_state->encoder_control->rdo;
// Check 8 modes for 4x4 and 8x8, 3 for others
int8_t rdo_modes_to_check = (width == 4 || width == 8)? 8 : 3;
@ -371,12 +372,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
// Try all modes and select the best one.
for (mode = 0; mode < 35; mode++) {
uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds);
intra_get_pred(encoder, ref, recstride, pred, width, mode, 0);
intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0);
sad = cost_func(pred, orig_block);
sad += mode_cost * (int)(encoder->cur_lambda_cost + 0.5);
sad += mode_cost * (int)(encoder_state->cur_lambda_cost + 0.5);
// When rdo == 2, store best costs to an array and do full RDO later
if(encoder->rdo == 2) {
if(rdo == 2) {
int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad);
if(rdo_mode != -1) {
rdo_modes[rdo_mode] = mode; rdo_costs[rdo_mode] = sad;
@ -390,7 +391,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
}
// Select from three best modes if using RDO
if(encoder->rdo == 2) {
if(rdo == 2) {
int rdo_mode;
int pred_mode;
// Check that the predicted modes are in the RDO mode list
@ -413,12 +414,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
for(rdo_mode = 0; rdo_mode < rdo_modes_to_check; rdo_mode ++) {
int rdo_bitcost;
// The reconstruction is calculated again here, it could be saved from before..
intra_recon(encoder, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0);
rdo_costs[rdo_mode] = rdo_cost_intra(encoder,pred,orig_block,width,cabac,rdo_modes[rdo_mode]);
intra_recon(encoder_state->encoder_control, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0);
rdo_costs[rdo_mode] = rdo_cost_intra(encoder_state,pred,orig_block,width,cabac,rdo_modes[rdo_mode]);
// Bitcost also calculated again for this mode
rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds);
// Add bitcost * lambda
rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder->cur_lambda_cost + 0.5);
rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->cur_lambda_cost + 0.5);
if(rdo_costs[rdo_mode] < best_sad) {
best_sad = rdo_costs[rdo_mode];
@ -832,8 +833,9 @@ void intra_get_planar_pred(pixel* src, int32_t srcstride, uint32_t width, pixel*
}
}
void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
void intra_recon_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
{
const encoder_control * const encoder = encoder_state->encoder_control;
int x_local = (x&0x3f), y_local = (y&0x3f);
cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
@ -890,5 +892,5 @@ void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, i
rec_stride, width, width);
}
encode_transform_tree(encoder, cabac, x, y, depth, lcu);
encode_transform_tree(encoder_state, cabac, x, y, depth, lcu);
}

View file

@ -39,7 +39,7 @@ void intra_build_reference_border(const encoder_control *encoder, int32_t x_luma
void intra_filter(pixel* ref, int32_t stride, int32_t width, int8_t mode);
/* Predictions */
int16_t intra_prediction(const encoder_control *encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
int16_t intra_prediction(const encoder_state *encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
uint8_t width, uint32_t *sad_out,
int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac);
@ -49,6 +49,6 @@ void intra_get_angular_pred(const encoder_control *encoder, pixel* src, int32_t
void intra_recon(const encoder_control *encoder, pixel* rec, int32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
void intra_recon_lcu(const encoder_control *encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
void intra_recon_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
#endif

View file

@ -63,8 +63,9 @@ const uint32_t entropy_bits[128] =
** Only for luma
*/
uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode)
uint32_t rdo_cost_intra(const encoder_state * const encoder_state, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode)
{
const encoder_control * const encoder = encoder_state->encoder_control;
coefficient pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2];
int16_t block[LCU_WIDTH*LCU_WIDTH>>2];
int16_t temp_block[LCU_WIDTH*LCU_WIDTH>>2];
@ -92,11 +93,11 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe
}
transform2d(encoder, block,pre_quant_coeff,width,0);
if(encoder->rdoq_enable) {
rdoq(encoder, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0);
rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0);
} else {
quant(encoder, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA);
quant(encoder_state, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA);
}
dequant(encoder, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA);
dequant(encoder_state, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA);
itransform2d(encoder, temp_block,pre_quant_coeff,width,0);
// SSD between original and reconstructed
@ -111,12 +112,12 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe
for (i = 0; i < width*width; i++) {
coeffcost += abs((int)temp_coeff[i]);
}
cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder->cur_lambda_cost+0.5);
cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5);
// Full RDO
} else if(encoder->rdo == 2) {
coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, width, 0, luma_scan_mode);
cost += coeffcost*((int)encoder->cur_lambda_cost+0.5);
cost += coeffcost*((int)encoder_state->cur_lambda_cost+0.5);
}
return cost;
}
@ -284,7 +285,7 @@ int32_t get_ic_rate( cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one
* This method calculates the best quantized transform level for a given scan position.
* From HM 12.0
*/
uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
uint32_t get_coded_level ( const encoder_state * const encoder_state, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
int32_t level_double, uint32_t max_abs_level,
uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
uint16_t abs_go_rice,
@ -298,7 +299,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma);
if( !last && max_abs_level < 3 ) {
*coded_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
*coded_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
*coded_cost = *coded_cost0 + *coded_cost_sig;
if (max_abs_level == 0) return best_abs_level;
} else {
@ -306,13 +307,13 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
}
if( !last ) {
cur_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
cur_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
}
min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
double err = (double)(level_double - ( abs_level << q_bits ) );
double cur_cost = err * err * temp + encoder->cur_lambda_cost *
double cur_cost = err * err * temp + encoder_state->cur_lambda_cost *
get_ic_rate_cost( cabac, abs_level, ctx_num_one, ctx_num_abs,
abs_go_rice, c1_idx, c2_idx, type);
cur_cost += cur_cost_sig;
@ -336,7 +337,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
*
* From HM 12.0
*/
static double get_rate_last(const encoder_control * const encoder,
static double get_rate_last(const encoder_state * const encoder_state,
const uint32_t pos_x, const uint32_t pos_y,
int32_t* last_x_bits, int32_t* last_y_bits)
{
@ -349,7 +350,7 @@ static double get_rate_last(const encoder_control * const encoder,
if( ctx_y > 3 ) {
uiCost += 32768.0 * ((ctx_y-2)>>1);
}
return encoder->cur_lambda_cost*uiCost;
return encoder_state->cur_lambda_cost*uiCost;
}
static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int8_t type,
@ -388,9 +389,10 @@ static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int
* coding engines using probability models like CABAC
* From HM 12.0
*/
void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
void rdoq(const encoder_state * const encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth)
{
const encoder_control * const encoder = encoder_state->encoder_control;
uint32_t log2_tr_size = g_convert_to_bit[ width ] + 2;
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; // Represents scaling through forward transform
uint16_t go_rice_param = 0;
@ -398,7 +400,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
uint32_t max_num_coeff = width * height;
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
{
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
@ -511,7 +513,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
uint16_t abs_ctx = ctx_set + c2;
if( scanpos == last_scanpos ) {
level = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
level = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
level_double, max_abs_level, 0, one_ctx, abs_ctx, go_rice_param,
c1_idx, c2_idx, q_bits, temp, 1, type );
} else {
@ -519,7 +521,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
uint32_t pos_x = blkpos - ( pos_y << log2_block_size );
uint16_t ctx_sig = (uint16_t)context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y,
log2_block_size, type);
level = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
level = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
level_double, max_abs_level, ctx_sig, one_ctx, abs_ctx, go_rice_param,
c1_idx, c2_idx, q_bits, temp, 0, type );
sig_rate_delta[ blkpos ] = CTX_ENTROPY_BITS(&baseCtx[ctx_sig],1) - CTX_ENTROPY_BITS(&baseCtx[ctx_sig],0);
@ -587,7 +589,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
if (sig_coeffgroup_flag[ cg_blkpos ] == 0) {
uint32_t ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
cg_pos_y, width);
cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
base_cost += cost_coeffgroup_sig[ cg_scanpos ] - rd_stats.sig_cost;
} else {
if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below.
@ -604,9 +606,9 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
cg_pos_y, width);
if (cg_scanpos < cg_last_scanpos) {
cost_coeffgroup_sig[cg_scanpos] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
cost_coeffgroup_sig[cg_scanpos] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
base_cost += cost_coeffgroup_sig[cg_scanpos];
cost_zero_cg += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
cost_zero_cg += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
}
// try to convert the current coeff group from non-zero to all-zero
@ -620,7 +622,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
sig_coeffgroup_flag[ cg_blkpos ] = 0;
base_cost = cost_zero_cg;
if (cg_scanpos < cg_last_scanpos) {
cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
}
// reset coeffs to 0 in this block
for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) {
@ -648,13 +650,13 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
best_cost = block_uncoded_cost + encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
base_cost += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
} else {
cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma);
ctx_cbf = ( type ? tr_depth : !tr_depth);
best_cost = block_uncoded_cost + encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
base_cost += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
}
for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
@ -672,7 +674,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
uint32_t pos_y = blkpos >> log2_block_size;
uint32_t pos_x = blkpos - ( pos_y << log2_block_size );
double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder, pos_x, pos_y, last_x_bits,last_y_bits );
double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder_state, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder_state, pos_x, pos_y, last_x_bits,last_y_bits );
double totalCost = base_cost + cost_last - cost_sig[ scanpos ];
if( totalCost < best_cost ) {
@ -708,7 +710,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
if(*abs_sum >= 2) {
int64_t rd_factor = (int64_t) (
g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6)))
/ encoder->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
/ encoder_state->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
+ 0.5);
int32_t lastCG = -1;
int32_t absSum = 0;

View file

@ -42,10 +42,10 @@ extern const uint32_t g_go_rice_range[5];
extern const uint32_t g_go_rice_prefix_len[5];
void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
void rdoq(const encoder_state *encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);
uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode);
uint32_t rdo_cost_intra(const encoder_state *encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode);
int32_t get_coeff_cost(const encoder_control * const encoder, cabac_data *cabac, coefficient *coeff, int32_t width, int32_t type, int8_t scan_mode);
@ -53,7 +53,7 @@ int32_t get_ic_rate(cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one,
uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
double get_ic_rate_cost (cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs,
uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
uint32_t get_coded_level ( const encoder_state * encoder_state, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
int32_t level_double, uint32_t max_abs_level,
uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
uint16_t abs_go_rice,

View file

@ -54,12 +54,12 @@ static int sao_calc_eo_cat(pixel a, pixel b, pixel c)
}
int sao_band_ddistortion(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data,
int sao_band_ddistortion(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data,
int block_width, int block_height,
int band_pos, int sao_bands[4])
{
int y, x;
int shift = encoder->bitdepth-5;
int shift = encoder_state->encoder_control->bitdepth-5;
int sum = 0;
for (y = 0; y < block_height; ++y) {
@ -345,12 +345,12 @@ static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4],
* \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
* \param sao_bands an array of bands for original and reconstructed block
*/
static void calc_sao_bands(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data,
static void calc_sao_bands(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data,
int block_width, int block_height,
int sao_bands[2][32])
{
int y, x;
int shift = encoder->bitdepth-5;
int shift = encoder_state->encoder_control->bitdepth-5;
//Loop pixels and take top 5 bits to classify different bands
for (y = 0; y < block_height; ++y) {
@ -608,7 +608,7 @@ void sao_reconstruct(const encoder_control * const encoder, picture * pic, const
static void sao_search_edge_sao(const encoder_control * const encoder,
static void sao_search_edge_sao(const encoder_state * const encoder_state,
const pixel * data[], const pixel * recdata[],
int block_width, int block_height,
unsigned buf_cnt,
@ -669,7 +669,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder,
{
int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left);
sum_ddistortion += (int)((double)mode_bits*(encoder->cur_lambda_cost+0.5));
sum_ddistortion += (int)((double)mode_bits*(encoder_state->cur_lambda_cost+0.5));
}
// SAO is not applied for category 0.
edge_offset[SAO_EO_CAT0] = 0;
@ -684,7 +684,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder,
}
static void sao_search_band_sao(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[],
static void sao_search_band_sao(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[],
int block_width, int block_height,
unsigned buf_cnt,
sao_info *sao_out, sao_info *sao_top,
@ -704,14 +704,14 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix
memset(sao_bands, 0, 2 * 32 * sizeof(int));
for (i = 0; i < buf_cnt; ++i) {
calc_sao_bands(encoder, data[i], recdata[i],block_width,
calc_sao_bands(encoder_state, data[i], recdata[i],block_width,
block_height,sao_bands);
}
ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position);
temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left);
ddistortion += (int)((double)temp_rate*(encoder->cur_lambda_cost+0.5));
ddistortion += (int)((double)temp_rate*(encoder_state->cur_lambda_cost+0.5));
// Select band sao over edge sao when distortion is lower
if (ddistortion < sao_out->ddistortion) {
@ -731,7 +731,7 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix
* \param buf_cnt Number of pointers data and recdata have.
* \param sao_out Output parameter for the best sao parameters.
*/
static void sao_search_best_mode(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[],
static void sao_search_best_mode(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[],
int block_width, int block_height,
unsigned buf_cnt,
sao_info *sao_out, sao_info *sao_top,
@ -740,12 +740,12 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
sao_info edge_sao;
sao_info band_sao;
sao_search_edge_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
sao_search_band_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);
sao_search_edge_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
sao_search_band_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);
{
int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left);
int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5);
int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
unsigned buf_i;
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
@ -759,11 +759,11 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
{
int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left);
int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5);
int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
unsigned buf_i;
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
ddistortion += sao_band_ddistortion(encoder, data[buf_i], recdata[buf_i],
ddistortion += sao_band_ddistortion(encoder_state, data[buf_i], recdata[buf_i],
block_width, block_height,
band_sao.band_position, &band_sao.offsets[1]);
}
@ -780,7 +780,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
// Choose between SAO and doing nothing, taking into account the
// rate-distortion cost of coding do nothing.
{
int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder->cur_lambda_cost + 0.5);
int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->cur_lambda_cost + 0.5);
if (sao_out->ddistortion >= cost_of_nothing) {
sao_out->type = SAO_TYPE_NONE;
}
@ -794,7 +794,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
return;
}
void sao_search_chroma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
void sao_search_chroma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
{
int block_width = (LCU_WIDTH / 2);
int block_height = (LCU_WIDTH / 2);
@ -827,10 +827,10 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
}
// Calculate
sao_search_best_mode(encoder, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left);
sao_search_best_mode(encoder_state, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left);
}
void sao_search_luma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
void sao_search_luma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
{
pixel orig[LCU_LUMA_SIZE];
pixel rec[LCU_LUMA_SIZE];
@ -857,13 +857,13 @@ void sao_search_luma(const encoder_control * const encoder, const picture *pic,
orig_list[0] = orig;
rec_list[0] = rec;
sao_search_best_mode(encoder, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left);
sao_search_best_mode(encoder_state, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left);
}
void sao_reconstruct_frame(const encoder_control * const encoder)
void sao_reconstruct_frame(encoder_state * const encoder_state)
{
vector2d lcu;
picture * const cur_pic = encoder->in.cur_pic;
picture * const cur_pic = encoder_state->cur_pic;
// These are needed because SAO needs the pre-SAO pixels form left and
// top LCUs. Single pixel wide buffers, like what search_lcu takes, would
@ -882,9 +882,9 @@ void sao_reconstruct_frame(const encoder_control * const encoder)
sao_info *sao_chroma = &cur_pic->sao_chroma[lcu.y * stride + lcu.x];
// sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma);
sao_reconstruct(encoder, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
sao_reconstruct(encoder, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
sao_reconstruct(encoder, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
sao_reconstruct(encoder_state->encoder_control, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
sao_reconstruct(encoder_state->encoder_control, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
sao_reconstruct(encoder_state->encoder_control, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
}
}

View file

@ -46,11 +46,11 @@ typedef struct sao_info_struct {
void init_sao_info(sao_info *sao);
void sao_search_chroma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
void sao_search_luma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
void sao_search_chroma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
void sao_search_luma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
void sao_reconstruct(const encoder_control * encoder, picture *pic, const pixel *old_rec,
unsigned x_ctb, unsigned y_ctb,
const sao_info *sao, color_index color_i);
void sao_reconstruct_frame(const encoder_control * const encoder);
void sao_reconstruct_frame(encoder_state *encoder_state);
#endif

View file

@ -117,7 +117,7 @@ static uint32_t get_mvd_coding_cost(vector2d *mvd)
return bitcost;
}
static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y,
int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
int16_t num_cand,int32_t ref_idx, uint32_t *bitcost)
{
@ -159,7 +159,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
}
*bitcost = temp_bitcost;
return temp_bitcost*(int32_t)(encoder->cur_lambda_cost+0.5);
return temp_bitcost*(int32_t)(encoder_state->cur_lambda_cost+0.5);
}
@ -183,7 +183,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
* the predicted motion vector is way off. In the future even more additional
* points like 0,0 might be used, such as vectors from top or left.
*/
static unsigned hexagon_search(const encoder_control * const encoder, unsigned depth,
static unsigned hexagon_search(const encoder_state * const encoder_state, unsigned depth,
const picture *pic, const picture *ref,
const vector2d *orig, vector2d *mv_in_out,
int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
@ -203,7 +203,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
orig->x + mv.x + pattern->x, orig->y + mv.y + pattern->y,
block_width, block_width);
cost += calc_mvd_cost(encoder, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
if (cost < best_cost) {
best_cost = cost;
@ -217,7 +217,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
orig->x, orig->y,
block_width, block_width);
cost += calc_mvd_cost(encoder, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
// If the 0,0 is better, redo the hexagon around that point.
if (cost < best_cost) {
@ -233,7 +233,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
orig->x + pattern->x,
orig->y + pattern->y,
block_width, block_width);
cost += calc_mvd_cost(encoder, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
if (cost < best_cost) {
best_cost = cost;
@ -268,7 +268,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
orig->x + mv.x + offset->x,
orig->y + mv.y + offset->y,
block_width, block_width);
cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
if (cost < best_cost) {
best_cost = cost;
@ -291,7 +291,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
orig->x + mv.x + offset->x,
orig->y + mv.y + offset->y,
block_width, block_width);
cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
if (cost > 0 && cost < best_cost) {
best_cost = cost;
@ -370,9 +370,9 @@ static unsigned search_mv_full(unsigned depth,
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
*/
static int search_cu_inter(const encoder_control * const encoder, int x, int y, int depth, lcu_t *lcu)
static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu)
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
uint32_t ref_idx = 0;
int x_local = (x&0x3f), y_local = (y&0x3f);
int x_cu = x>>3;
@ -392,8 +392,8 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y,
cur_cu->inter.cost = UINT_MAX;
for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) {
picture *ref_pic = encoder->ref->pics[ref_idx];
for (ref_idx = 0; ref_idx < encoder_state->ref->used_size; ref_idx++) {
picture *ref_pic = encoder_state->ref->pics[ref_idx];
unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu];
uint32_t temp_bitcost = 0;
@ -413,13 +413,13 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y,
}
// Get MV candidates
cur_cu->inter.mv_ref = ref_idx;
inter_get_mv_cand(encoder, x, y, depth, mv_cand, cur_cu, lcu);
inter_get_mv_cand(encoder_state, x, y, depth, mv_cand, cur_cu, lcu);
cur_cu->inter.mv_ref = temp_ref_idx;
#if SEARCH_MV_FULL_RADIUS
temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
#else
temp_cost += hexagon_search(encoder, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
temp_cost += hexagon_search(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
#endif
merged = 0;
@ -664,11 +664,11 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
*/
static int search_cu_intra(const encoder_control * const encoder,
static int search_cu_intra(const encoder_state * const encoder_state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu, cabac_data *cabac)
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f };
const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
const int8_t cu_width = (LCU_WIDTH >> (depth));
@ -696,7 +696,7 @@ static int search_cu_intra(const encoder_control * const encoder,
intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
// Build reconstructed block to use in prediction with extrapolated borders
intra_build_reference_border(encoder, x_px, y_px, cu_width * 2 + 8,
intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8,
rec_buffer, cu_width * 2 + 8, 0,
cur_pic->width,
cur_pic->height,
@ -709,7 +709,7 @@ static int search_cu_intra(const encoder_control * const encoder,
uint32_t bitcost = UINT32_MAX;
pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
mode = intra_prediction(encoder,ref_pixels, LCU_WIDTH,
mode = intra_prediction(encoder_state,ref_pixels, LCU_WIDTH,
cu_in_rec_buffer, cu_width * 2 + 8, cu_width,
&cost, candidate_modes, &bitcost, cabac);
cur_cu->intra[pu_index].mode = (int8_t)mode;
@ -728,7 +728,7 @@ static int search_cu_intra(const encoder_control * const encoder,
* coding (bitcost * lambda) and cost for coding coefficients (estimated
* here as (coefficient_sum * 1.5) * lambda)
*/
static int lcu_get_final_cost(const encoder_control * const encoder,
static int lcu_get_final_cost(const encoder_state * const encoder_state,
cabac_data *cabac,
const int x_px, const int y_px,
const int depth, lcu_t *lcu)
@ -737,6 +737,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
int x_local = (x_px&0x3f), y_local = (y_px&0x3f);
int cost = 0;
int coeff_cost = 0;
const int rdo = encoder_state->encoder_control->rdo;
int width = LCU_WIDTH>>depth;
int x,y;
@ -759,7 +760,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
}
}
if(encoder->rdo == 1) {
if(rdo == 1) {
// sum of coeffs
for (y = y_local; y < y_local+width; ++y) {
for (x = x_local; x < x_local+width; ++x) {
@ -774,11 +775,11 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
}
}
// Coefficient costs
cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder->cur_lambda_cost+0.5);
cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->cur_lambda_cost+0.5);
// Calculate actual bit costs for coding the coeffs
// RDO
} else if (encoder->rdo == 2) {
} else if (rdo == 2) {
coefficient coeff_temp[32*32];
coefficient coeff_temp_u[16*16];
coefficient coeff_temp_v[16*16];
@ -817,7 +818,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
// Calculate luma coeff bit count
picture_blit_coeffs(&lcu->coeff.y[(blk_y*LCU_WIDTH)+blk_x],coeff_temp,blockwidth,blockwidth,LCU_WIDTH,blockwidth);
coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp, blockwidth, 0, luma_scan_mode);
coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp, blockwidth, 0, luma_scan_mode);
blk_y >>= 1;
blk_x >>= 1;
@ -832,15 +833,15 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
picture_blit_coeffs(&lcu->coeff.u[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_u,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
picture_blit_coeffs(&lcu->coeff.v[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_v,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
}
// Multiply bit count with lambda to get RD-cost
cost += coeff_cost * (int32_t)(encoder->cur_lambda_cost+0.5);
cost += coeff_cost * (int32_t)(encoder_state->cur_lambda_cost+0.5);
}
// Bitcost
cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder->cur_lambda_cost+0.5);
cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->cur_lambda_cost+0.5);
return cost;
}
@ -855,9 +856,9 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
* - All the final data for the LCU gets eventually copied to depth 0, which
* will be the final output of the recursion.
*/
static int search_cu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
static int search_cu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
int cu_width = LCU_WIDTH >> depth;
int cost = MAX_INT;
cu_info *cur_cu;
@ -885,7 +886,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
depth >= MIN_INTER_SEARCH_DEPTH &&
depth <= MAX_INTER_SEARCH_DEPTH)
{
int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]);
int mode_cost = search_cu_inter(encoder_state, x, y, depth, &work_tree[depth]);
if (mode_cost < cost) {
cost = mode_cost;
cur_cu->type = CU_INTER;
@ -895,7 +896,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
if (depth >= MIN_INTRA_SEARCH_DEPTH &&
depth <= MAX_INTRA_SEARCH_DEPTH)
{
int mode_cost = search_cu_intra(encoder, x, y, depth, &work_tree[depth], cabac);
int mode_cost = search_cu_intra(encoder_state, x, y, depth, &work_tree[depth], cabac);
if (mode_cost < cost) {
cost = mode_cost;
cur_cu->type = CU_INTRA;
@ -906,10 +907,10 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
// mode search of adjacent CUs.
if (cur_cu->type == CU_INTRA) {
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
intra_recon_lcu(encoder, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
intra_recon_lcu(encoder_state, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
} else if (cur_cu->type == CU_INTER) {
inter_recon_lcu(encoder, encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
encode_transform_tree(encoder, cabac, x, y, depth, &work_tree[depth]);
inter_recon_lcu(encoder_state->encoder_control, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
encode_transform_tree(encoder_state, cabac, x, y, depth, &work_tree[depth]);
if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
cur_cu->merged = 0;
@ -922,23 +923,23 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
}
}
if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
cost = lcu_get_final_cost(encoder, cabac, x, y, depth, &work_tree[depth]);
cost = lcu_get_final_cost(encoder_state, cabac, x, y, depth, &work_tree[depth]);
}
// Recursively split all the way to max search depth.
if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
int half_cu = cu_width / 2;
int split_cost = (int)(4.5 * encoder->cur_lambda_cost);
int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost);
// If skip mode was selected for the block, skip further search.
// Skip mode means there's no coefficients in the block, so splitting
// might not give any better results but takes more time to do.
if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] ||
cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
split_cost += search_cu(encoder, cabac, x, y, depth + 1, work_tree);
split_cost += search_cu(encoder, cabac, x + half_cu, y, depth + 1, work_tree);
split_cost += search_cu(encoder, cabac, x, y + half_cu, depth + 1, work_tree);
split_cost += search_cu(encoder, cabac, x + half_cu, y + half_cu, depth + 1, work_tree);
split_cost += search_cu(encoder_state, cabac, x, y, depth + 1, work_tree);
split_cost += search_cu(encoder_state, cabac, x + half_cu, y, depth + 1, work_tree);
split_cost += search_cu(encoder_state, cabac, x, y + half_cu, depth + 1, work_tree);
split_cost += search_cu(encoder_state, cabac, x + half_cu, y + half_cu, depth + 1, work_tree);
} else {
split_cost = INT_MAX;
}
@ -963,9 +964,9 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
* - Copy reference pixels from neighbouring LCUs.
* - Copy reference pixels from this LCU.
*/
static void init_lcu_t(const encoder_control * const encoder, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
{
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
// Copy reference cu_info structs from neighbouring LCUs.
{
@ -1045,7 +1046,7 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const
// Copy LCU pixels.
{
const picture * const pic = encoder->in.cur_pic;
const picture * const pic = encoder_state->cur_pic;
int pic_width = cur_pic->width;
int x_max = MIN(x + LCU_WIDTH, pic_width) - x;
int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y;
@ -1069,13 +1070,13 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const
/**
* Copy CU and pixel data to it's place in picture datastructure.
*/
static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px, int y_px, const lcu_t *lcu)
static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x_px, int y_px, const lcu_t *lcu)
{
// Copy non-reference CUs to picture.
{
const int x_cu = x_px >> MAX_DEPTH;
const int y_cu = y_px >> MAX_DEPTH;
const picture * const cur_pic = encoder->in.cur_pic;
const picture * const cur_pic = encoder_state->cur_pic;
const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH;
cu_info *const cu_array = cur_pic->cu_array[MAX_DEPTH];
@ -1095,7 +1096,7 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px,
// Copy pixels to picture.
{
picture * const pic = encoder->in.cur_pic;
picture * const pic = encoder_state->cur_pic;
const int pic_width = pic->width;
const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px;
const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px;
@ -1123,18 +1124,18 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px,
* Search LCU for modes.
* - Best mode gets copied to current picture.
*/
void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf)
void search_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf)
{
lcu_t work_tree[MAX_PU_DEPTH + 1];
int depth;
// Initialize work tree.
for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) {
memset(&work_tree[depth], 0, sizeof(work_tree[depth]));
init_lcu_t(encoder, x, y, &work_tree[depth], hor_buf, ver_buf);
init_lcu_t(encoder_state, x, y, &work_tree[depth], hor_buf, ver_buf);
}
// Start search from depth 0.
search_cu(encoder, cabac, x, y, 0, work_tree);
search_cu(encoder_state, cabac, x, y, 0, work_tree);
copy_lcu_to_cu_data(encoder, x, y, &work_tree[0]);
copy_lcu_to_cu_data(encoder_state, x, y, &work_tree[0]);
}

View file

@ -30,6 +30,6 @@
#include "picture.h"
void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf);
void search_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf);
#endif

View file

@ -623,9 +623,10 @@ void itransform2d(const encoder_control * const encoder,int16_t *block,int16_t *
* \brief quantize transformed coefficents
*
*/
void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef, int32_t width,
void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_coef, int32_t width,
int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type )
{
const encoder_control * const encoder = encoder_state->encoder_control;
const uint32_t log2_block_size = g_convert_to_bit[ width ] + 2;
const uint32_t * const scan = g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ];
@ -633,7 +634,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2];
#endif
int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
//New block for variable definitions
{
@ -645,7 +646,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
int32_t add = ((encoder->in.cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
int32_t add = ((encoder_state->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
int32_t q_bits8 = q_bits - 8;
for (n = 0; n < width * height; n++) {
@ -754,13 +755,14 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
* \brief inverse quantize transformed and quantized coefficents
*
*/
void dequant(const encoder_control * const encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
{
const encoder_control * const encoder = encoder_state->encoder_control;
int32_t shift,add,coeff_q,clip_q_coef;
int32_t n;
int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2);
int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
shift = 20 - QUANT_SHIFT - transform_shift;

View file

@ -34,9 +34,9 @@ extern const uint8_t g_chroma_scale[58];
extern const int16_t g_inv_quant_scales[6];
void quant(const encoder_control *encoder, int16_t *coef, int16_t *q_coef, int32_t width,
void quant(const encoder_state *encoder_state, int16_t *coef, int16_t *q_coef, int32_t width,
int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type);
void dequant(const encoder_control *encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);
void dequant(const encoder_state *encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);
void transformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
void itransformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);