diff --git a/src/encmain.c b/src/encmain.c index ede264bb..c8e730bc 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -60,7 +60,9 @@ int main(int argc, char *argv[]) config *cfg = NULL; //!< Global configuration FILE *input = NULL; //!< input file (YUV) FILE *output = NULL; //!< output file (HEVC NAL stream) - encoder_control *encoder = NULL; //!< Encoder control struct + encoder_control encoder; + encoder_state encoder_state; + picture *cur_pic; double psnr[3] = { 0.0, 0.0, 0.0 }; uint64_t curpos = 0; uint64_t lastpos = 0; @@ -206,68 +208,79 @@ int main(int argc, char *argv[]) goto exit_failure; } } - - encoder = init_encoder_control(cfg); - if (!encoder) + + //Allocate and init exp golomb table + if (!init_exp_golomb(4096*8)) { + fprintf(stderr, "Failed to allocate the exp golomb code table, shutting down!\n"); goto exit_failure; + } + if (!encoder_control_init(&encoder, cfg)) { + goto exit_failure; + } + // Set output file - - encoder->output = output; - encoder->stream.file.output = output; + encoder.out.file = output; + // input init (TODO: read from commandline / config) - encoder->bitdepth = 8; - encoder->frame = 0; - encoder->QP = (int8_t)encoder->cfg->qp; - encoder->in.video_format = FORMAT_420; + encoder.bitdepth = 8; + encoder.in.video_format = FORMAT_420; + // deblocking filter - encoder->deblock_enable = (int8_t)encoder->cfg->deblock_enable; - encoder->beta_offset_div2 = (int8_t)encoder->cfg->deblock_beta; - encoder->tc_offset_div2 = (int8_t)encoder->cfg->deblock_tc; + encoder.deblock_enable = (int8_t)encoder.cfg->deblock_enable; + encoder.beta_offset_div2 = (int8_t)encoder.cfg->deblock_beta; + encoder.tc_offset_div2 = (int8_t)encoder.cfg->deblock_tc; // SAO - encoder->sao_enable = (int8_t)encoder->cfg->sao_enable; + encoder.sao_enable = (int8_t)encoder.cfg->sao_enable; // RDO - encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable; - encoder->rdo = (int8_t)encoder->cfg->rdo; + encoder.rdoq_enable = (int8_t)encoder.cfg->rdoq_enable; + encoder.rdo = (int8_t)encoder.cfg->rdo; // TR SKIP - encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable; + encoder.trskip_enable = (int8_t)encoder.cfg->trskip_enable; // VUI - encoder->vui.sar_width = (int16_t)encoder->cfg->vui.sar_width; - encoder->vui.sar_height = (int16_t)encoder->cfg->vui.sar_height; - encoder->vui.overscan = encoder->cfg->vui.overscan; - encoder->vui.videoformat = encoder->cfg->vui.videoformat; - encoder->vui.fullrange = encoder->cfg->vui.fullrange; - encoder->vui.colorprim = encoder->cfg->vui.colorprim; - encoder->vui.transfer = encoder->cfg->vui.transfer; - encoder->vui.colormatrix = encoder->cfg->vui.colormatrix; - encoder->vui.chroma_loc = (int8_t)encoder->cfg->vui.chroma_loc; + encoder.vui.sar_width = (int16_t)encoder.cfg->vui.sar_width; + encoder.vui.sar_height = (int16_t)encoder.cfg->vui.sar_height; + encoder.vui.overscan = encoder.cfg->vui.overscan; + encoder.vui.videoformat = encoder.cfg->vui.videoformat; + encoder.vui.fullrange = encoder.cfg->vui.fullrange; + encoder.vui.colorprim = encoder.cfg->vui.colorprim; + encoder.vui.transfer = encoder.cfg->vui.transfer; + encoder.vui.colormatrix = encoder.cfg->vui.colormatrix; + encoder.vui.chroma_loc = (int8_t)encoder.cfg->vui.chroma_loc; // AUD - encoder->aud_enable = (int8_t)encoder->cfg->aud_enable; + encoder.aud_enable = (int8_t)encoder.cfg->aud_enable; - init_encoder_input(&encoder->in, input, cfg->width, cfg->height); + encoder_control_input_init(&encoder, input, cfg->width, cfg->height); fprintf(stderr, "Input: %s, output: %s\n", cfg->input, cfg->output); fprintf(stderr, " Video size: %dx%d (input=%dx%d)\n", - encoder->in.cur_pic->width, encoder->in.cur_pic->height, - encoder->in.real_width, encoder->in.real_height); + encoder.in.width, encoder.in.height, + encoder.in.real_width, encoder.in.real_height); + + if (!encoder_state_init(&encoder_state, &encoder)) { + goto exit_failure; + } + + encoder_state.frame = 0; + encoder_state.QP = (int8_t)encoder.cfg->qp; // Only the code that handles conformance window coding needs to know // the real dimensions. As a quick fix for broken non-multiple of 8 videos, // change the input values here to be the real values. For a real fix // encoder.in probably needs to be merged into cfg. // The real fix would be: never go dig in cfg - //cfg->width = encoder->in.width; - //cfg->height = encoder->in.height; + //cfg->width = encoder.in.width; + //cfg->height = encoder.in.height; // Start coding cycle while data on input and not on the last frame - while(!cfg->frames || encoder->frame < cfg->frames) { + while(!cfg->frames || encoder_state.frame < cfg->frames) { int32_t diff; double temp_psnr[3]; // Skip '--seek' frames before input. // This block can be moved outside this while loop when there is a // mechanism to skip the while loop on error. - if (encoder->frame == 0 && cfg->seek > 0) { + if (encoder_state.frame == 0 && cfg->seek > 0) { int frame_bytes = cfg->width * cfg->height * 3 / 2; int error = 0; @@ -275,7 +288,7 @@ int main(int argc, char *argv[]) // Input is stdin. int i; for (i = 0; !error && i < cfg->seek; ++i) { - error = !read_one_frame(input, encoder); + error = !read_one_frame(input, &encoder_state); } } else { // input is a file. We hope. Proper detection is OS dependent. @@ -288,25 +301,27 @@ int main(int argc, char *argv[]) } // Read one frame from the input - if (!read_one_frame(input, encoder)) { + if (!read_one_frame(input, &encoder_state)) { if (!feof(input)) - fprintf(stderr, "Failed to read a frame %d\n", encoder->frame); + fprintf(stderr, "Failed to read a frame %d\n", encoder_state.frame); break; } // The actual coding happens here, after this function we have a coded frame - encode_one_frame(encoder); + encode_one_frame(&encoder_state); + + cur_pic = encoder_state.cur_pic; if (cfg->debug != NULL) { // Write reconstructed frame out. // Use conformance-window dimensions instead of internal ones. - const int width = encoder->in.cur_pic->width; - const int out_width = encoder->in.real_width; - const int out_height = encoder->in.real_height; + const int width = cur_pic->width; + const int out_width = encoder.in.real_width; + const int out_height = encoder.in.real_height; int y; - const pixel *y_rec = encoder->in.cur_pic->y_recdata; - const pixel *u_rec = encoder->in.cur_pic->u_recdata; - const pixel *v_rec = encoder->in.cur_pic->v_recdata; + const pixel *y_rec = cur_pic->y_recdata; + const pixel *u_rec = cur_pic->u_recdata; + const pixel *v_rec = cur_pic->v_recdata; for (y = 0; y < out_height; ++y) { fwrite(&y_rec[y * width], sizeof(*y_rec), out_width, recout); @@ -325,12 +340,12 @@ int main(int argc, char *argv[]) lastpos = curpos; // PSNR calculations - temp_psnr[0] = image_psnr(encoder->in.cur_pic->y_data, encoder->in.cur_pic->y_recdata, cfg->width, cfg->height); - temp_psnr[1] = image_psnr(encoder->in.cur_pic->u_data, encoder->in.cur_pic->u_recdata, cfg->width>>1, cfg->height>>1); - temp_psnr[2] = image_psnr(encoder->in.cur_pic->v_data, encoder->in.cur_pic->v_recdata, cfg->width>>1, cfg->height>>1); + temp_psnr[0] = image_psnr(cur_pic->y_data, cur_pic->y_recdata, cfg->width, cfg->height); + temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1); + temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1); - fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, - "BPI"[encoder->in.cur_pic->slicetype%3], diff<<3, + fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, + "BPI"[cur_pic->slicetype%3], diff<<3, temp_psnr[0], temp_psnr[1], temp_psnr[2]); // Increment total PSNR @@ -342,33 +357,33 @@ int main(int argc, char *argv[]) // TODO: add more than one reference // Remove the ref pic (if present) - if (encoder->ref->used_size == (uint32_t)encoder->cfg->ref_frames) { - picture_list_rem(encoder->ref, encoder->ref->used_size-1, 1); + if (encoder_state.ref->used_size == (uint32_t)encoder.cfg->ref_frames) { + picture_list_rem(encoder_state.ref, encoder_state.ref->used_size-1, 1); } // Add current picture as reference - picture_list_add(encoder->ref, encoder->in.cur_pic); + picture_list_add(encoder_state.ref, cur_pic); // Allocate new memory to current picture // TODO: reuse memory from old reference - encoder->in.cur_pic = picture_init(encoder->in.cur_pic->width, encoder->in.cur_pic->height, encoder->in.cur_pic->width_in_lcu, encoder->in.cur_pic->height_in_lcu); + encoder_state.cur_pic = picture_init(encoder_state.cur_pic->width, encoder_state.cur_pic->height, encoder_state.cur_pic->width_in_lcu, encoder_state.cur_pic->height_in_lcu); // Copy pointer from the last cur_pic because we don't want to reallocate it - MOVE_POINTER(encoder->in.cur_pic->coeff_y,encoder->ref->pics[0]->coeff_y); - MOVE_POINTER(encoder->in.cur_pic->coeff_u,encoder->ref->pics[0]->coeff_u); - MOVE_POINTER(encoder->in.cur_pic->coeff_v,encoder->ref->pics[0]->coeff_v); + MOVE_POINTER(encoder_state.cur_pic->coeff_y,encoder_state.ref->pics[0]->coeff_y); + MOVE_POINTER(encoder_state.cur_pic->coeff_u,encoder_state.ref->pics[0]->coeff_u); + MOVE_POINTER(encoder_state.cur_pic->coeff_v,encoder_state.ref->pics[0]->coeff_v); - MOVE_POINTER(encoder->in.cur_pic->pred_y,encoder->ref->pics[0]->pred_y); - MOVE_POINTER(encoder->in.cur_pic->pred_u,encoder->ref->pics[0]->pred_u); - MOVE_POINTER(encoder->in.cur_pic->pred_v,encoder->ref->pics[0]->pred_v); + MOVE_POINTER(encoder_state.cur_pic->pred_y,encoder_state.ref->pics[0]->pred_y); + MOVE_POINTER(encoder_state.cur_pic->pred_u,encoder_state.ref->pics[0]->pred_u); + MOVE_POINTER(encoder_state.cur_pic->pred_v,encoder_state.ref->pics[0]->pred_v); - encoder->frame++; - encoder->poc++; + encoder_state.frame++; + encoder_state.poc++; } // Coding finished fgetpos(output,(fpos_t*)&curpos); // Print statistics of the coding - fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, (long long unsigned int) curpos<<3, - psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame); + fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, (long long unsigned int) curpos<<3, + psnr[0] / encoder_state.frame, psnr[1] / encoder_state.frame, psnr[2] / encoder_state.frame); fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC); fclose(input); @@ -377,12 +392,9 @@ int main(int argc, char *argv[]) // Deallocating config_destroy(cfg); - scalinglist_destroy(&encoder->scaling_list); - picture_list_destroy(encoder->ref); - picture_destroy(encoder->in.cur_pic); - FREE_POINTER(encoder->in.cur_pic); - bitstream_finalize(&encoder->stream); - free(encoder); + encoder_state_finalize(&encoder_state); + encoder_control_finalize(&encoder); + free_exp_golomb(); return EXIT_SUCCESS; diff --git a/src/encoder.c b/src/encoder.c index 92253e5a..e81c79f7 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -44,9 +44,9 @@ #include "rdo.h" /* Local functions. */ -static void add_checksum(encoder_control * const encoder); -static void encode_VUI(encoder_control * const encoder); -static void encode_sao(encoder_control * const encoder, +static void add_checksum(encoder_state *encoder); +static void encode_VUI(encoder_state *encoder); +static void encode_sao(encoder_state *encoder, cabac_data *cabac, unsigned x_lcu, uint16_t y_lcu, sao_info *sao_luma, sao_info *sao_chroma); @@ -57,10 +57,10 @@ static void encode_sao(encoder_control * const encoder, Implementation closer to HM (Used HM12 as reference) - Still missing functionality when GOP and B-pictures are used */ -void init_lambda(encoder_control * const encoder) +void encoder_state_init_lambda(encoder_state * const encoder_state) { - const picture * const cur_pic = encoder->in.cur_pic; - double qp = encoder->QP; + const picture * const cur_pic = encoder_state->cur_pic; + double qp = encoder_state->QP; double lambda_scale = 1.0; double qp_temp = qp - 12; double lambda; @@ -78,180 +78,179 @@ void init_lambda(encoder_control * const encoder) lambda *= 0.95; } - encoder->cur_lambda_cost = lambda; + encoder_state->cur_lambda_cost = lambda; } -encoder_control *init_encoder_control(config *cfg) -{ - encoder_control *enc_c = NULL; - bitstream *stream = NULL; - picture_list *pic_list = NULL; - +int encoder_control_init(encoder_control * const encoder, const config * const cfg) { if (!cfg) { fprintf(stderr, "Config object must not be null!\n"); - goto init_failure; + return 0; } - - // Allocate the main struct - enc_c = malloc(sizeof(encoder_control)); - if(!enc_c){ - fprintf(stderr, "Failed to allocate encoder_control!\n"); - goto init_failure; - } - - // Config pointer to encoder struct - enc_c->cfg = cfg; - - // input init (TODO: read from commandline / config) - enc_c->bitdepth = 8; - enc_c->frame = 0; - enc_c->QP = (int8_t)enc_c->cfg->qp; - enc_c->in.video_format = FORMAT_420; + + // Config pointer to config struct + encoder->cfg = cfg; + encoder->bitdepth = 8; + // deblocking filter - enc_c->deblock_enable = 1; - enc_c->beta_offset_div2 = 0; - enc_c->tc_offset_div2 = 0; + encoder->deblock_enable = 1; + encoder->beta_offset_div2 = 0; + encoder->tc_offset_div2 = 0; // SAO - enc_c->sao_enable = 1; + encoder->sao_enable = 1; // Rate-distortion optimization level - enc_c->rdo = 1; - - // Allocate the bitstream struct - bitstream_init(&enc_c->stream, BITSTREAM_TYPE_FILE); - - //Allocate and init exp golomb table - if (!init_exp_golomb(4096*8)) { - fprintf(stderr, "Failed to allocate the exp golomb code table, shutting down!\n"); - goto init_failure; - } - + encoder->rdo = 1; + // Initialize the scaling list - scalinglist_init(&enc_c->scaling_list); - - pic_list = picture_list_init(MAX_REF_PIC_COUNT); - if(!pic_list) { - fprintf(stderr, "Failed to allocate the picture list!\n"); - goto init_failure; - } - - enc_c->ref = pic_list; - enc_c->ref_list = REF_PIC_LIST_0; + scalinglist_init(&encoder->scaling_list); // CQM { FILE* cqmfile; cqmfile = cfg->cqmfile ? fopen(cfg->cqmfile, "rb") : NULL; if (cqmfile) { - scalinglist_parse(&enc_c->scaling_list, cqmfile); + scalinglist_parse(&encoder->scaling_list, cqmfile); fclose(cqmfile); } } - scalinglist_process(&enc_c->scaling_list, enc_c->bitdepth); + scalinglist_process(&encoder->scaling_list, encoder->bitdepth); - return enc_c; - -init_failure: - // Free everything allocated in this function - free(pic_list); - free(stream); - free(enc_c); - - return NULL; + return 1; } -void init_encoder_input(encoder_input *input, FILE *inputfile, +int encoder_control_finalize(encoder_control * const encoder) { + scalinglist_destroy(&encoder->scaling_list); + + return 1; +} + +int encoder_state_init(encoder_state * const encoder_state, const encoder_control * const encoder) { + picture_list *pic_list = NULL; + + encoder_state->encoder_control = encoder; + + // Allocate the bitstream struct + if (!bitstream_init(&encoder_state->stream, BITSTREAM_TYPE_FILE)) { + fprintf(stderr, "Could not initialize stream!\n"); + return 0; + } + + pic_list = picture_list_init(MAX_REF_PIC_COUNT); + if(!pic_list) { + fprintf(stderr, "Failed to allocate the picture list!\n"); + return 0; + } + + encoder_state->ref = pic_list; + encoder_state->ref_list = REF_PIC_LIST_0; + + encoder_state->frame = 0; + encoder_state->poc = 0; + + // Allocate the picture and CU array + encoder_state->cur_pic = picture_init(encoder->in.width, encoder->in.height, + encoder->in.width_in_lcu, encoder->in.height_in_lcu); + + if (!encoder_state->cur_pic) { + printf("Error allocating picture!\r\n"); + return 0; + } + + // Init coeff data table + encoder_state->cur_pic->coeff_y = MALLOC(coefficient, encoder->in.width * encoder->in.height); + encoder_state->cur_pic->coeff_u = MALLOC(coefficient, (encoder->in.width * encoder->in.height) >> 2); + encoder_state->cur_pic->coeff_v = MALLOC(coefficient, (encoder->in.width * encoder->in.height) >> 2); + + // Init predicted data table + encoder_state->cur_pic->pred_y = MALLOC(pixel, encoder->in.width * encoder->in.height); + encoder_state->cur_pic->pred_u = MALLOC(pixel, (encoder->in.width * encoder->in.height) >> 2); + encoder_state->cur_pic->pred_v = MALLOC(pixel, (encoder->in.width * encoder->in.height) >> 2); + + encoder_state->children = NULL; + + encoder_state->stream.file.output = encoder->out.file; + + // Set CABAC output bitstream + encoder_state->cabac.stream = &encoder_state->stream; + + return 1; +} + +int encoder_state_finalize(encoder_state * const encoder_state) { + picture_destroy(encoder_state->cur_pic); + FREE_POINTER(encoder_state->cur_pic); + + picture_list_destroy(encoder_state->ref); + bitstream_finalize(&encoder_state->stream); + return 1; +} + +void encoder_control_input_init(encoder_control * const encoder, FILE *inputfile, const int32_t width, const int32_t height) { - int32_t i_width = width; /*!< \brief input picture width (divisible by the minimum block size)*/ - int32_t i_height = height; /*!< \brief input picture height (divisible by the minimum block size) */ - int32_t i_width_in_lcu; /*!< \brief input picture width in LCU*/ - int32_t i_height_in_lcu; /*!< \brief input picture height in LCU */ - input->file = inputfile; - i_width = width; - i_height = height; - input->real_width = width; - input->real_height = height; + encoder->in.file = inputfile; + encoder->in.width = width; + encoder->in.height = height; + encoder->in.real_width = width; + encoder->in.real_height = height; // If input dimensions are not divisible by the smallest block size, add // pixels to the dimensions, so that they are. These extra pixels will be // compressed along with the real ones but they will be cropped out before // rendering. - if (i_width % CU_MIN_SIZE_PIXELS) { - i_width += CU_MIN_SIZE_PIXELS - (width % CU_MIN_SIZE_PIXELS); + if (encoder->in.width % CU_MIN_SIZE_PIXELS) { + encoder->in.width += CU_MIN_SIZE_PIXELS - (width % CU_MIN_SIZE_PIXELS); } - if (i_height % CU_MIN_SIZE_PIXELS) { - i_height += CU_MIN_SIZE_PIXELS - (height % CU_MIN_SIZE_PIXELS); + if (encoder->in.height % CU_MIN_SIZE_PIXELS) { + encoder->in.height += CU_MIN_SIZE_PIXELS - (height % CU_MIN_SIZE_PIXELS); } - i_height_in_lcu = i_height / LCU_WIDTH; - i_width_in_lcu = i_width / LCU_WIDTH; + encoder->in.height_in_lcu = encoder->in.height / LCU_WIDTH; + encoder->in.width_in_lcu = encoder->in.width / LCU_WIDTH; // Add one extra LCU when image not divisible by LCU_WIDTH - if (i_height_in_lcu * LCU_WIDTH < height) { - i_height_in_lcu++; + if (encoder->in.height_in_lcu * LCU_WIDTH < height) { + encoder->in.height_in_lcu++; } - if (i_width_in_lcu * LCU_WIDTH < width) { - i_width_in_lcu++; + if (encoder->in.width_in_lcu * LCU_WIDTH < width) { + encoder->in.width_in_lcu++; } - // Allocate the picture and CU array - input->cur_pic = picture_init(i_width, i_height, - i_width_in_lcu, - i_height_in_lcu); - if (!input->cur_pic) { - printf("Error allocating picture!\r\n"); - exit(1); - } #ifdef _DEBUG if (width != i_width || height != i_height) { printf("Picture buffer has been extended to be a multiple of the smallest block size:\r\n"); - printf(" Width = %d (%d), Height = %d (%d)\r\n", width, i_width, height, - i_height); + printf(" Width = %d (%d), Height = %d (%d)\r\n", width, encoder->in.width, height, + encoder->in.height); } #endif - - // Init coeff data table - input->cur_pic->coeff_y = MALLOC(coefficient, i_width * i_height); - input->cur_pic->coeff_u = MALLOC(coefficient, (i_width * i_height) >> 2); - input->cur_pic->coeff_v = MALLOC(coefficient, (i_width * i_height) >> 2); - - // Init predicted data table - input->cur_pic->pred_y = MALLOC(pixel, i_width * i_height); - input->cur_pic->pred_u = MALLOC(pixel, (i_width * i_height) >> 2); - input->cur_pic->pred_v = MALLOC(pixel, (i_width * i_height) >> 2); } -static void write_aud(encoder_control * const encoder) +static void write_aud(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; - encode_access_unit_delimiter(encoder); + bitstream * const stream = &encoder_state->stream; + encode_access_unit_delimiter(encoder_state); nal_write(stream, AUD_NUT, 0, 1); bitstream_align(stream); } -void encode_one_frame(encoder_control* encoder) +void encode_one_frame(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; - picture * const cur_pic = encoder->in.cur_pic; + const encoder_control * const encoder = encoder_state->encoder_control; + bitstream * const stream = &encoder_state->stream; - yuv_t *hor_buf = alloc_yuv_t(cur_pic->width); + yuv_t *hor_buf = alloc_yuv_t(encoder_state->cur_pic->width); // Allocate 2 extra luma pixels so we get 1 extra chroma pixel for the // for the extra pixel on the top right. yuv_t *ver_buf = alloc_yuv_t(LCU_WIDTH + 2); - const int is_first_frame = (encoder->frame == 0); - const int is_i_radl = (encoder->cfg->intra_period == 1 && encoder->frame % 2 == 0); - const int is_p_radl = (encoder->cfg->intra_period > 1 && (encoder->frame % encoder->cfg->intra_period) == 0); + const int is_first_frame = (encoder_state->frame == 0); + const int is_i_radl = (encoder->cfg->intra_period == 1 && encoder_state->frame % 2 == 0); + const int is_p_radl = (encoder->cfg->intra_period > 1 && (encoder_state->frame % encoder->cfg->intra_period) == 0); const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; - - - cabac_data cabac; - /** IDR picture when: period == 0 and frame == 0 * period == 1 && frame%2 == 0 @@ -259,48 +258,48 @@ void encode_one_frame(encoder_control* encoder) **/ if (is_radl_frame) { // Clear the reference list - while (encoder->ref->used_size) { - picture_list_rem(encoder->ref, encoder->ref->used_size - 1, 1); + while (encoder_state->ref->used_size) { + picture_list_rem(encoder_state->ref, encoder_state->ref->used_size - 1, 1); } - encoder->poc = 0; + encoder_state->poc = 0; - cur_pic->slicetype = SLICE_I; - cur_pic->type = NAL_IDR_W_RADL; + encoder_state->cur_pic->slicetype = SLICE_I; + encoder_state->cur_pic->type = NAL_IDR_W_RADL; // Access Unit Delimiter (AUD) if (encoder->aud_enable) - write_aud(encoder); + write_aud(encoder_state); // Video Parameter Set (VPS) nal_write(stream, NAL_VPS_NUT, 0, 1); - encode_vid_parameter_set(encoder); + encode_vid_parameter_set(encoder_state); bitstream_align(stream); // Sequence Parameter Set (SPS) nal_write(stream, NAL_SPS_NUT, 0, 1); - encode_seq_parameter_set(encoder); + encode_seq_parameter_set(encoder_state); bitstream_align(stream); // Picture Parameter Set (PPS) nal_write(stream, NAL_PPS_NUT, 0, 1); - encode_pic_parameter_set(encoder); + encode_pic_parameter_set(encoder_state); bitstream_align(stream); - if (encoder->frame == 0) { + if (encoder_state->frame == 0) { // Prefix SEI nal_write(stream, PREFIX_SEI_NUT, 0, 0); - encode_prefix_sei_version(encoder); + encode_prefix_sei_version(encoder_state); bitstream_align(stream); } } else { // When intra period == 1, all pictures are intra - cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; - cur_pic->type = NAL_TRAIL_R; + encoder_state->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; + encoder_state->cur_pic->type = NAL_TRAIL_R; // Access Unit Delimiter (AUD) if (encoder->aud_enable) - write_aud(encoder); + write_aud(encoder_state); } { @@ -312,18 +311,16 @@ void encode_one_frame(encoder_control* encoder) is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code); } - // Set CABAC output bitstream - cabac.stream = stream; - - cabac_start(&cabac); - init_contexts(&cabac, encoder->QP, cur_pic->slicetype); - encode_slice_header(encoder); + cabac_start(&encoder_state->cabac); + init_contexts(&encoder_state->cabac, encoder_state->QP, encoder_state->cur_pic->slicetype); + encode_slice_header(encoder_state); bitstream_align(stream); // Initialize lambda value(s) to use in search - init_lambda(encoder); + encoder_state_init_lambda(encoder_state); { + picture* const cur_pic = encoder_state->cur_pic; vector2d lcu; const vector2d size = { cur_pic->width, cur_pic->height }; const vector2d size_lcu = { cur_pic->width_in_lcu, cur_pic->height_in_lcu }; @@ -339,7 +336,7 @@ void encode_one_frame(encoder_control* encoder) const int right = px.x + lcu_dim.x; const int bottom = px.y + lcu_dim.y; - search_lcu(encoder, &cabac, px.x, px.y, hor_buf, ver_buf); + search_lcu(encoder_state, &encoder_state->cabac, px.x, px.y, hor_buf, ver_buf); // Take the bottom right pixel from the LCU above and put it as the // first pixel in this LCUs rightmost pixels. @@ -371,7 +368,7 @@ void encode_one_frame(encoder_control* encoder) 1, lcu_dim.y / 2, size.x / 2, 1); if (encoder->deblock_enable) { - filter_deblock_lcu(encoder, px.x, px.y); + filter_deblock_lcu(encoder_state, px.x, px.y); } if (encoder->sao_enable) { @@ -384,43 +381,43 @@ void encode_one_frame(encoder_control* encoder) { sao_info *sao_top = lcu. y != 0 ? &cur_pic->sao_luma[(lcu.y - 1) * stride + lcu.x] : NULL; sao_info *sao_left = lcu.x != 0 ? &cur_pic->sao_luma[lcu.y * stride + lcu.x -1] : NULL; - sao_search_luma(encoder, cur_pic, lcu.x, lcu.y, sao_luma, sao_top, sao_left); + sao_search_luma(encoder_state, cur_pic, lcu.x, lcu.y, sao_luma, sao_top, sao_left); } { sao_info *sao_top = lcu.y != 0 ? &cur_pic->sao_chroma[(lcu.y - 1) * stride + lcu.x] : NULL; sao_info *sao_left = lcu.x != 0 ? &cur_pic->sao_chroma[lcu.y * stride + lcu.x - 1] : NULL; - sao_search_chroma(encoder, cur_pic, lcu.x, lcu.y, sao_chroma, sao_top, sao_left); + sao_search_chroma(encoder_state, cur_pic, lcu.x, lcu.y, sao_chroma, sao_top, sao_left); } // Merge only if both luma and chroma can be merged sao_luma->merge_left_flag = sao_luma->merge_left_flag & sao_chroma->merge_left_flag; sao_luma->merge_up_flag = sao_luma->merge_up_flag & sao_chroma->merge_up_flag; - encode_sao(encoder, &cabac, lcu.x, lcu.y, sao_luma, sao_chroma); + encode_sao(encoder_state, &encoder_state->cabac, lcu.x, lcu.y, sao_luma, sao_chroma); } - - encode_coding_tree(encoder, &cabac, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0); + + encode_coding_tree(encoder_state, &encoder_state->cabac, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0); { const int last_lcu = (lcu.x == size_lcu.x - 1 && lcu.y == size_lcu.y - 1); - cabac_encode_bin_trm(&cabac, last_lcu ? 1 : 0); // end_of_slice_segment_flag + cabac_encode_bin_trm(&encoder_state->cabac, last_lcu ? 1 : 0); // end_of_slice_segment_flag } } } } - cabac_flush(&cabac); + cabac_flush(&encoder_state->cabac); bitstream_align(stream); if (encoder->sao_enable) { - sao_reconstruct_frame(encoder); + sao_reconstruct_frame(encoder_state); } // Calculate checksum - add_checksum(encoder); + add_checksum(encoder_state); - cur_pic->poc = encoder->poc; + encoder_state->cur_pic->poc = encoder_state->poc; dealloc_yuv_t(hor_buf); dealloc_yuv_t(ver_buf); @@ -465,43 +462,42 @@ static int read_and_fill_frame_data(FILE *file, return 1; } -int read_one_frame(FILE* file, const encoder_control * const encoder) +int read_one_frame(FILE* file, const encoder_state * const encoder_state) { - const encoder_input* const in = &encoder->in; - unsigned width = in->real_width; - unsigned height = in->real_height; - unsigned array_width = in->cur_pic->width; - unsigned array_height = in->cur_pic->height; + unsigned width = encoder_state->encoder_control->in.real_width; + unsigned height = encoder_state->encoder_control->in.real_height; + unsigned array_width = encoder_state->cur_pic->width; + unsigned array_height = encoder_state->cur_pic->height; if (width != array_width) { // In the case of frames not being aligned on 8 bit borders, bits need to be copied to fill them in. if (!read_and_fill_frame_data(file, width, height, array_width, - in->cur_pic->y_data) || + encoder_state->cur_pic->y_data) || !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1, - in->cur_pic->u_data) || + encoder_state->cur_pic->u_data) || !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1, - in->cur_pic->v_data)) + encoder_state->cur_pic->v_data)) return 0; } else { // Otherwise the data can be read directly to the array. unsigned y_size = width * height; unsigned uv_size = (width >> 1) * (height >> 1); - if (y_size != fread(in->cur_pic->y_data, sizeof(unsigned char), + if (y_size != fread(encoder_state->cur_pic->y_data, sizeof(unsigned char), y_size, file) || - uv_size != fread(in->cur_pic->u_data, sizeof(unsigned char), + uv_size != fread(encoder_state->cur_pic->u_data, sizeof(unsigned char), uv_size, file) || - uv_size != fread(in->cur_pic->v_data, sizeof(unsigned char), + uv_size != fread(encoder_state->cur_pic->v_data, sizeof(unsigned char), uv_size, file)) return 0; } if (height != array_height) { fill_after_frame(height, array_width, array_height, - in->cur_pic->y_data); + encoder_state->cur_pic->y_data); fill_after_frame(height >> 1, array_width >> 1, array_height >> 1, - in->cur_pic->u_data); + encoder_state->cur_pic->u_data); fill_after_frame(height >> 1, array_width >> 1, array_height >> 1, - in->cur_pic->v_data); + encoder_state->cur_pic->v_data); } return 1; } @@ -511,10 +507,10 @@ int read_one_frame(FILE* file, const encoder_control * const encoder) * \param encoder The encoder. * \returns Void */ -static void add_checksum(encoder_control * const encoder) +static void add_checksum(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; - const picture * const cur_pic = encoder->in.cur_pic; + bitstream * const stream = &encoder_state->stream; + const picture * const cur_pic = encoder_state->cur_pic; unsigned char checksum[3][SEI_HASH_MAX_LENGTH]; uint32_t checksum_val; unsigned int i; @@ -538,24 +534,24 @@ static void add_checksum(encoder_control * const encoder) bitstream_align(stream); } -void encode_access_unit_delimiter(encoder_control * const encoder) +void encode_access_unit_delimiter(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; - const picture * const cur_pic = encoder->in.cur_pic; + bitstream * const stream = &encoder_state->stream; + const picture * const cur_pic = encoder_state->cur_pic; uint8_t pic_type = cur_pic->slicetype == SLICE_I ? 0 : cur_pic->slicetype == SLICE_P ? 1 : 2; WRITE_U(stream, pic_type, 3, "pic_type"); } -void encode_prefix_sei_version(encoder_control * const encoder) +void encode_prefix_sei_version(encoder_state * const encoder_state) { #define STR_BUF_LEN 1000 - bitstream * const stream = &encoder->stream; + bitstream * const stream = &encoder_state->stream; int i, length; char buf[STR_BUF_LEN] = { 0 }; char *s = buf + 16; - const config *cfg = encoder->cfg; + const config * const cfg = encoder_state->encoder_control->cfg; // random uuid_iso_iec_11578 generated with www.famkruithof.net/uuid/uuidgen static const uint8_t uuid[16] = { @@ -595,9 +591,9 @@ void encode_prefix_sei_version(encoder_control * const encoder) #undef STR_BUF_LEN } -void encode_pic_parameter_set(encoder_control * const encoder) +void encode_pic_parameter_set(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; + bitstream * const stream = &encoder_state->stream; #ifdef _DEBUG printf("=========== Picture Parameter Set ID: 0 ===========\n"); #endif @@ -611,9 +607,9 @@ void encode_pic_parameter_set(encoder_control * const encoder) WRITE_UE(stream, 0, "num_ref_idx_l0_default_active_minus1"); WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1"); - WRITE_SE(stream, ((int8_t)encoder->QP)-26, "pic_init_qp_minus26"); + WRITE_SE(stream, ((int8_t)encoder_state->QP)-26, "pic_init_qp_minus26"); WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); - WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag"); + WRITE_U(stream, encoder_state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag"); WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); //if cu_qp_delta_enabled_flag //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); @@ -636,13 +632,13 @@ void encode_pic_parameter_set(encoder_control * const encoder) WRITE_U(stream, 1, 1, "deblocking_filter_control_present_flag"); //IF deblocking_filter WRITE_U(stream, 0, 1, "deblocking_filter_override_enabled_flag"); - WRITE_U(stream, encoder->deblock_enable ? 0 : 1, 1, + WRITE_U(stream, encoder_state->encoder_control->deblock_enable ? 0 : 1, 1, "pps_disable_deblocking_filter_flag"); //IF !disabled - if (encoder->deblock_enable) { - WRITE_SE(stream, encoder->beta_offset_div2, "beta_offset_div2"); - WRITE_SE(stream, encoder->tc_offset_div2, "tc_offset_div2"); + if (encoder_state->encoder_control->deblock_enable) { + WRITE_SE(stream, encoder_state->encoder_control->beta_offset_div2, "beta_offset_div2"); + WRITE_SE(stream, encoder_state->encoder_control->tc_offset_div2, "tc_offset_div2"); } //ENDIF @@ -656,9 +652,9 @@ void encode_pic_parameter_set(encoder_control * const encoder) WRITE_U(stream, 0, 1, "pps_extension_flag"); } -static void encode_PTL(encoder_control * const encoder) +static void encode_PTL(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; + bitstream * const stream = &encoder_state->stream; int i; // PTL // Profile Tier @@ -696,9 +692,10 @@ static void encode_PTL(encoder_control * const encoder) // end PTL } -static void encode_scaling_list(encoder_control * const encoder) +static void encode_scaling_list(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; + const encoder_control * const encoder = encoder_state->encoder_control; + bitstream * const stream = &encoder_state->stream; uint32_t size_id; for (size_id = 0; size_id < SCALING_LIST_SIZE_NUM; size_id++) { int32_t list_id; @@ -752,11 +749,10 @@ static void encode_scaling_list(encoder_control * const encoder) } } -void encode_seq_parameter_set(encoder_control * const encoder) +void encode_seq_parameter_set(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; - const picture * const cur_pic = encoder->in.cur_pic; - const encoder_input* const in = &encoder->in; + bitstream * const stream = &encoder_state->stream; + const picture * const cur_pic = encoder_state->cur_pic; #ifdef _DEBUG printf("=========== Sequence Parameter Set ID: 0 ===========\n"); @@ -767,20 +763,20 @@ void encode_seq_parameter_set(encoder_control * const encoder) WRITE_U(stream, 1, 3, "sps_max_sub_layers_minus1"); WRITE_U(stream, 0, 1, "sps_temporal_id_nesting_flag"); - encode_PTL(encoder); + encode_PTL(encoder_state); WRITE_UE(stream, 0, "sps_seq_parameter_set_id"); - WRITE_UE(stream, encoder->in.video_format, + WRITE_UE(stream, encoder_state->encoder_control->in.video_format, "chroma_format_idc"); - if (encoder->in.video_format == 3) { + if (encoder_state->encoder_control->in.video_format == 3) { WRITE_U(stream, 0, 1, "separate_colour_plane_flag"); } WRITE_UE(stream, cur_pic->width, "pic_width_in_luma_samples"); WRITE_UE(stream, cur_pic->height, "pic_height_in_luma_samples"); - if (cur_pic->width != in->real_width || cur_pic->height != in->real_height) { + if (cur_pic->width != encoder_state->encoder_control->in.real_width || cur_pic->height != encoder_state->encoder_control->in.real_height) { // The standard does not seem to allow setting conf_win values such that // the number of luma samples is not a multiple of 2. Options are to either // hide one line or show an extra line of non-video. Neither seems like a @@ -788,10 +784,10 @@ void encode_seq_parameter_set(encoder_control * const encoder) assert(!(cur_pic->width % 2)); WRITE_U(stream, 1, 1, "conformance_window_flag"); WRITE_UE(stream, 0, "conf_win_left_offset"); - WRITE_UE(stream, (cur_pic->width - in->real_width) >> 1, + WRITE_UE(stream, (cur_pic->width - encoder_state->encoder_control->in.real_width) >> 1, "conf_win_right_offset"); WRITE_UE(stream, 0, "conf_win_top_offset"); - WRITE_UE(stream, (cur_pic->height - in->real_height) >> 1, + WRITE_UE(stream, (cur_pic->height - encoder_state->encoder_control->in.real_height) >> 1, "conf_win_bottom_offset"); } else { WRITE_U(stream, 0, 1, "conformance_window_flag"); @@ -800,8 +796,8 @@ void encode_seq_parameter_set(encoder_control * const encoder) //IF window flag //END IF - WRITE_UE(stream, encoder->bitdepth-8, "bit_depth_luma_minus8"); - WRITE_UE(stream, encoder->bitdepth-8, "bit_depth_chroma_minus8"); + WRITE_UE(stream, encoder_state->encoder_control->bitdepth-8, "bit_depth_luma_minus8"); + WRITE_UE(stream, encoder_state->encoder_control->bitdepth-8, "bit_depth_chroma_minus8"); WRITE_UE(stream, 0, "log2_max_pic_order_cnt_lsb_minus4"); WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag"); @@ -819,14 +815,14 @@ void encode_seq_parameter_set(encoder_control * const encoder) WRITE_UE(stream, TR_DEPTH_INTRA, "max_transform_hierarchy_depth_intra"); // scaling list - WRITE_U(stream, encoder->scaling_list.enable, 1, "scaling_list_enable_flag"); - if (encoder->scaling_list.enable) { + WRITE_U(stream, encoder_state->encoder_control->scaling_list.enable, 1, "scaling_list_enable_flag"); + if (encoder_state->encoder_control->scaling_list.enable) { WRITE_U(stream, 1, 1, "sps_scaling_list_data_present_flag"); - encode_scaling_list(encoder); + encode_scaling_list(encoder_state); } WRITE_U(stream, 0, 1, "amp_enabled_flag"); - WRITE_U(stream, encoder->sao_enable ? 1 : 0, 1, + WRITE_U(stream, encoder_state->encoder_control->sao_enable ? 1 : 0, 1, "sample_adaptive_offset_enabled_flag"); WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag"); #if ENABLE_PCM == 1 @@ -852,14 +848,14 @@ void encode_seq_parameter_set(encoder_control * const encoder) WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag"); WRITE_U(stream, 1, 1, "vui_parameters_present_flag"); - encode_VUI(encoder); + encode_VUI(encoder_state); WRITE_U(stream, 0, 1, "sps_extension_flag"); } -void encode_vid_parameter_set(encoder_control * const encoder) +void encode_vid_parameter_set(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; + bitstream * const stream = &encoder_state->stream; int i; #ifdef _DEBUG printf("=========== Video Parameter Set ID: 0 ===========\n"); @@ -872,7 +868,7 @@ void encode_vid_parameter_set(encoder_control * const encoder) WRITE_U(stream, 0, 1, "vps_temporal_id_nesting_flag"); WRITE_U(stream, 0xffff, 16, "vps_reserved_ffff_16bits"); - encode_PTL(encoder); + encode_PTL(encoder_state); WRITE_U(stream, 0, 1, "vps_sub_layer_ordering_info_present_flag"); @@ -893,9 +889,10 @@ void encode_vid_parameter_set(encoder_control * const encoder) WRITE_U(stream, 0, 1, "vps_extension_flag"); } -static void encode_VUI(encoder_control * const encoder) +static void encode_VUI(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; + bitstream * const stream = &encoder_state->stream; + const encoder_control * const encoder = encoder_state->encoder_control; #ifdef _DEBUG printf("=========== VUI Set ID: 0 ===========\n"); #endif @@ -993,10 +990,11 @@ static void encode_VUI(encoder_control * const encoder) //ENDIF } -void encode_slice_header(encoder_control * const encoder) +void encode_slice_header(encoder_state * const encoder_state) { - bitstream * const stream = &encoder->stream; - const picture * const cur_pic = encoder->in.cur_pic; + const encoder_control * const encoder = encoder_state->encoder_control; + bitstream * const stream = &encoder_state->stream; + const picture * const cur_pic = encoder_state->cur_pic; #ifdef _DEBUG printf("=========== Slice ===========\n"); @@ -1024,9 +1022,9 @@ void encode_slice_header(encoder_control * const encoder) if (cur_pic->type != NAL_IDR_W_RADL && cur_pic->type != NAL_IDR_N_LP) { int j; - int ref_negative = encoder->ref->used_size; + int ref_negative = encoder_state->ref->used_size; int ref_positive = 0; - WRITE_U(stream, encoder->poc&0xf, 4, "pic_order_cnt_lsb"); + WRITE_U(stream, encoder_state->poc&0xf, 4, "pic_order_cnt_lsb"); WRITE_U(stream, 0, 1, "short_term_ref_pic_set_sps_flag"); WRITE_UE(stream, ref_negative, "num_negative_pics"); WRITE_UE(stream, ref_positive, "num_positive_pics"); @@ -1049,7 +1047,7 @@ void encode_slice_header(encoder_control * const encoder) if (cur_pic->slicetype != SLICE_I) { WRITE_U(stream, 1, 1, "num_ref_idx_active_override_flag"); - WRITE_UE(stream, encoder->ref->used_size-1, "num_ref_idx_l0_active_minus1"); + WRITE_UE(stream, encoder_state->ref->used_size-1, "num_ref_idx_l0_active_minus1"); WRITE_UE(stream, 5-MRG_MAX_NUM_CANDS, "five_minus_max_num_merge_cand"); } @@ -1064,10 +1062,10 @@ void encode_slice_header(encoder_control * const encoder) } -static void encode_sao_color(const encoder_control * const encoder, cabac_data *cabac, sao_info *sao, +static void encode_sao_color(const encoder_state * const encoder_state, cabac_data *cabac, sao_info *sao, color_index color_i) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; sao_eo_cat i; // Skip colors with no SAO. @@ -1131,7 +1129,7 @@ static void encode_sao_merge_flags(sao_info *sao, cabac_data *cabac, /** * \brief Encode SAO information. */ -static void encode_sao(encoder_control * const encoder, +static void encode_sao(encoder_state * const encoder_state, cabac_data *cabac, unsigned x_lcu, uint16_t y_lcu, sao_info *sao_luma, sao_info *sao_chroma) @@ -1141,17 +1139,17 @@ static void encode_sao(encoder_control * const encoder, // If SAO is merged, nothing else needs to be coded. if (!sao_luma->merge_left_flag && !sao_luma->merge_up_flag) { - encode_sao_color(encoder, cabac, sao_luma, COLOR_Y); - encode_sao_color(encoder, cabac, sao_chroma, COLOR_U); - encode_sao_color(encoder, cabac, sao_chroma, COLOR_V); + encode_sao_color(encoder_state, cabac, sao_luma, COLOR_Y); + encode_sao_color(encoder_state, cabac, sao_chroma, COLOR_U); + encode_sao_color(encoder_state, cabac, sao_chroma, COLOR_V); } } -void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac, +void encode_coding_tree(encoder_state * const encoder_state, cabac_data *cabac, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x_ctb + y_ctb * (cur_pic->width_in_lcu << MAX_DEPTH)]; uint8_t split_flag = GET_SPLITDATA(cur_cu, depth); uint8_t split_model = 0; @@ -1184,17 +1182,17 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac if (split_flag || border) { // Split blocks and remember to change x and y block positions uint8_t change = 1<<(MAX_DEPTH-1-depth); - encode_coding_tree(encoder, cabac, x_ctb, y_ctb, depth + 1); // x,y + encode_coding_tree(encoder_state, cabac, x_ctb, y_ctb, depth + 1); // x,y // TODO: fix when other half of the block would not be completely over the border if (!border_x || border_split_x) { - encode_coding_tree(encoder, cabac, x_ctb + change, y_ctb, depth + 1); + encode_coding_tree(encoder_state, cabac, x_ctb + change, y_ctb, depth + 1); } if (!border_y || border_split_y) { - encode_coding_tree(encoder, cabac, x_ctb, y_ctb + change, depth + 1); + encode_coding_tree(encoder_state, cabac, x_ctb, y_ctb + change, depth + 1); } if (!border || (border_split_x && border_split_y)) { - encode_coding_tree(encoder, cabac, x_ctb + change, y_ctb + change, depth + 1); + encode_coding_tree(encoder_state, cabac, x_ctb + change, y_ctb + change, depth + 1); } return; } @@ -1307,10 +1305,10 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac */ for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) { - //if(encoder->ref_idx_num[uiRefListIdx] > 0) + //if(encoder_state->ref_idx_num[uiRefListIdx] > 0) { if (cur_cu->inter.mv_dir & (1 << ref_list_idx)) { - if (encoder->ref->used_size != 1) { //encoder->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1) + if (encoder_state->ref->used_size != 1) { //encoder_state->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1) // parseRefFrmIdx int32_t ref_frame = cur_cu->inter.mv_ref; @@ -1319,7 +1317,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac if (ref_frame > 0) { int32_t i; - int32_t ref_num = encoder->ref->used_size - 2; + int32_t ref_num = encoder_state->ref->used_size - 2; cabac->ctx = &(cabac->ctx_cu_ref_pic_model[1]); ref_frame--; @@ -1337,7 +1335,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac } } - if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { + if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder_state->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { const int32_t mvd_hor = cur_cu->inter.mvd[0]; const int32_t mvd_ver = cur_cu->inter.mvd[1]; const int8_t hor_abs_gr0 = mvd_hor != 0; @@ -1394,7 +1392,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac // Code (possible) coeffs to bitstream if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { - encode_transform_coeff(encoder, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); + encode_transform_coeff(encoder_state, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); } @@ -1516,7 +1514,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac } } // end intra chroma pred mode coding - encode_transform_coeff(encoder, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); + encode_transform_coeff(encoder_state, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); } #if ENABLE_PCM == 1 @@ -1567,11 +1565,12 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac /* end coding_unit */ } -static void transform_chroma(const encoder_control * const encoder, cabac_data *cabac, cu_info *cur_cu, +static void transform_chroma(encoder_state * const encoder_state, cabac_data *cabac, cu_info *cur_cu, int depth, pixel *base_u, pixel *pred_u, coefficient *coeff_u, int8_t scan_idx_chroma, coefficient *pre_quant_coeff, coefficient *block) { + const encoder_control * const encoder = encoder_state->encoder_control; int base_stride = LCU_WIDTH; int pred_stride = LCU_WIDTH; @@ -1592,15 +1591,15 @@ static void transform_chroma(const encoder_control * const encoder, cabac_data * transform2d(encoder, block, pre_quant_coeff, width_c, 65535); if (encoder->rdoq_enable) { - rdoq(encoder, cabac, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, + rdoq(encoder_state, cabac, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, scan_idx_chroma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth); } else { - quant(encoder, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, + quant(encoder_state, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, scan_idx_chroma, cur_cu->type); } } -static void reconstruct_chroma(const encoder_control * const encoder, cu_info *cur_cu, +static void reconstruct_chroma(const encoder_state * const encoder_state, cu_info *cur_cu, int depth, int has_coeffs, coefficient *coeff_u, pixel *recbase_u, pixel *pred_u, int color_type, coefficient *pre_quant_coeff, coefficient *block) @@ -1613,8 +1612,8 @@ static void reconstruct_chroma(const encoder_control * const encoder, cu_info *c if (has_coeffs) { // RECONSTRUCT for predictions - dequant(encoder, coeff_u, pre_quant_coeff, width_c, width_c, (int8_t)color_type, cur_cu->type); - itransform2d(encoder, block, pre_quant_coeff, width_c, 65535); + dequant(encoder_state, coeff_u, pre_quant_coeff, width_c, width_c, (int8_t)color_type, cur_cu->type); + itransform2d(encoder_state->encoder_control, block, pre_quant_coeff, width_c, 65535); i = 0; @@ -1637,8 +1636,9 @@ static void reconstruct_chroma(const encoder_control * const encoder, cu_info *c } } -void encode_transform_tree(const encoder_control * const encoder, cabac_data* cabac, int32_t x, int32_t y, const uint8_t depth, lcu_t* lcu) +void encode_transform_tree(encoder_state * const encoder_state, cabac_data* cabac, int32_t x, int32_t y, const uint8_t depth, lcu_t* lcu) { + const encoder_control * const encoder = encoder_state->encoder_control; // we have 64>>depth transform size int x_local = (x&0x3f), y_local = (y&0x3f); cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; @@ -1654,10 +1654,10 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca // Split transform and increase depth if (depth == 0 || cur_cu->tr_depth > depth) { int offset = width_c; - encode_transform_tree(encoder, cabac, x, y, depth+1, lcu); - encode_transform_tree(encoder, cabac, x + offset, y, depth+1, lcu); - encode_transform_tree(encoder, cabac, x, y + offset, depth+1, lcu); - encode_transform_tree(encoder, cabac, x + offset, y + offset, depth+1, lcu); + encode_transform_tree(encoder_state, cabac, x, y, depth+1, lcu); + encode_transform_tree(encoder_state, cabac, x + offset, y, depth+1, lcu); + encode_transform_tree(encoder_state, cabac, x, y + offset, depth+1, lcu); + encode_transform_tree(encoder_state, cabac, x + offset, y + offset, depth+1, lcu); // Derive coded coeff flags from the next depth if (depth == MAX_DEPTH) { @@ -1796,20 +1796,20 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca // Test for transform skip transformskip(encoder, block,pre_quant_coeff,width); if (encoder->rdoq_enable) { - rdoq(encoder, cabac, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0); + rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0); } else { - quant(encoder, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type); + quant(encoder_state, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type); } - dequant(encoder, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type); + dequant(encoder_state, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type); itransformskip(encoder, temp_block,pre_quant_coeff,width); transform2d(encoder, block,pre_quant_coeff,width,0); if (encoder->rdoq_enable) { - rdoq(encoder, cabac, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0); + rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0); } else { - quant(encoder, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type); + quant(encoder_state, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type); } - dequant(encoder, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type); + dequant(encoder_state, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type); itransform2d(encoder, temp_block2,pre_quant_coeff,width,0); // SSD between original and reconstructed @@ -1828,15 +1828,15 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca coeffcost += abs((int)temp_coeff[i]); coeffcost2 += abs((int)temp_coeff2[i]); } - cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder->cur_lambda_cost+0.5); - cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder->cur_lambda_cost+0.5); + cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5); + cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder_state->cur_lambda_cost+0.5); // Full RDO } else if(encoder->rdo == 2) { coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma); coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma); - cost += coeffcost*((int)encoder->cur_lambda_cost+0.5); - cost2 += coeffcost2*((int)encoder->cur_lambda_cost+0.5); + cost += coeffcost*((int)encoder_state->cur_lambda_cost+0.5); + cost2 += coeffcost2*((int)encoder_state->cur_lambda_cost+0.5); } cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2); @@ -1850,10 +1850,10 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca } if (encoder->rdoq_enable) { - rdoq(encoder, cabac, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, + rdoq(encoder_state, cabac, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth); } else { - quant(encoder, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type); + quant(encoder_state, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type); } // Check for non-zero coeffs @@ -1891,7 +1891,7 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca } } - dequant(encoder, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type); + dequant(encoder_state, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type); if(width == 4 && cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip) { itransformskip(encoder, block,pre_quant_coeff,width); } else { @@ -1934,7 +1934,7 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca } } - transform_chroma(encoder, cabac, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); + transform_chroma(encoder_state, cabac, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); for (i = 0; i < chroma_size; i++) { if (coeff_u[i] != 0) { int d; @@ -1944,7 +1944,7 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca break; } } - transform_chroma(encoder, cabac, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block); + transform_chroma(encoder_state, cabac, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block); for (i = 0; i < chroma_size; i++) { if (coeff_v[i] != 0) { int d; @@ -1967,11 +1967,11 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca } } - reconstruct_chroma(encoder, cur_cu, chroma_depth, + reconstruct_chroma(encoder_state, cur_cu, chroma_depth, cur_cu->coeff_top_u[depth], coeff_u, recbase_u, pred_u, color_type_u, pre_quant_coeff, block); - reconstruct_chroma(encoder, cur_cu, chroma_depth, + reconstruct_chroma(encoder_state, cur_cu, chroma_depth, cur_cu->coeff_top_v[depth], coeff_v, recbase_v, pred_v, color_type_v, pre_quant_coeff, block); @@ -1983,10 +1983,11 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca // end Residual Coding } -static void encode_transform_unit(const encoder_control * const encoder, cabac_data *cabac, +static void encode_transform_unit(encoder_state * const encoder_state, cabac_data *cabac, int x_pu, int y_pu, int depth, int tr_depth) { - const picture * const cur_pic = encoder->in.cur_pic; + const encoder_control * const encoder = encoder_state->encoder_control; + const picture * const cur_pic = encoder_state->cur_pic; uint8_t width = LCU_WIDTH >> depth; uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); @@ -2134,12 +2135,12 @@ static void encode_transform_unit(const encoder_control * const encoder, cabac_d * \param parent_coeff_u What was signaled at previous level for cbf_cb. * \param parent_coeff_v What was signlaed at previous level for cbf_cr. */ -void encode_transform_coeff(const encoder_control * const encoder, cabac_data *cabac, int32_t x_pu,int32_t y_pu, +void encode_transform_coeff(encoder_state * const encoder_state, cabac_data *cabac, int32_t x_pu,int32_t y_pu, int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v) { int32_t x_cu = x_pu / 2; int32_t y_cu = y_pu / 2; - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (cur_pic->width_in_lcu << MAX_DEPTH)]; // NxN signifies implicit transform split at the first transform level. @@ -2193,10 +2194,10 @@ void encode_transform_coeff(const encoder_control * const encoder, cabac_data *c if (split) { uint8_t pu_offset = 1 << (MAX_PU_DEPTH - (depth + 1)); - encode_transform_coeff(encoder, cabac, x_pu, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); - encode_transform_coeff(encoder, cabac, x_pu + pu_offset, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); - encode_transform_coeff(encoder, cabac, x_pu, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); - encode_transform_coeff(encoder, cabac, x_pu + pu_offset, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder_state, cabac, x_pu, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder_state, cabac, x_pu + pu_offset, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder_state, cabac, x_pu, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder_state, cabac, x_pu + pu_offset, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); return; } @@ -2211,7 +2212,7 @@ void encode_transform_coeff(const encoder_control * const encoder, cabac_data *c } if (cb_flag_y | cb_flag_u | cb_flag_v) { - encode_transform_unit(encoder, cabac, x_pu, y_pu, depth, tr_depth); + encode_transform_unit(encoder_state, cabac, x_pu, y_pu, depth, tr_depth); } } diff --git a/src/encoder.h b/src/encoder.h index 80d4deeb..f0b41634 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -44,33 +44,33 @@ typedef struct enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 }; -/* Input info struct */ -typedef struct -{ - FILE *file; - int32_t real_width; /*!< \brief real input picture width */ - int32_t real_height; /*!< \brief real input picture width */ - picture *cur_pic; - int8_t video_format; - int8_t bitdepth; /*!< \brief input bit depth (8,10) */ -} encoder_input; - /* Encoder control options, the main struct */ typedef struct { - int32_t frame; - int32_t poc; /*!< \brief picture order count */ + /* Configuration */ const config *cfg; - encoder_input in; + + /* Input */ + struct { + FILE *file; + int32_t width; + int32_t height; + int32_t width_in_lcu; + int32_t height_in_lcu; + int32_t real_width; /*!< \brief real input picture width */ + int32_t real_height; /*!< \brief real input picture width */ + int8_t video_format; + int8_t bitdepth; /*!< \brief input bit depth (8,10) */ + } in; + + /* Output */ + struct { + FILE *file; + } out; + encoder_me me; - bitstream stream; - FILE *output; - picture_list *ref; - int8_t ref_list; - int8_t ref_idx_num[2]; - int8_t QP; // \brief Quantization parameter + int8_t bitdepth; - double cur_lambda_cost; /* Filtering */ int8_t deblock_enable; // \brief Flag to enable deblocking filter @@ -101,20 +101,48 @@ typedef struct scaling_list scaling_list; } encoder_control; -void init_lambda(encoder_control *encoder); -encoder_control *init_encoder_control(config *cfg); -void init_encoder_input(encoder_input *input, FILE* inputfile, - int32_t width, int32_t height); -void encode_one_frame(encoder_control *encoder); -int read_one_frame(FILE *file, const encoder_control * const encoder); +typedef struct encoder_state { + const encoder_control *encoder_control; + + picture *cur_pic; + int32_t frame; + int32_t poc; /*!< \brief picture order count */ + + bitstream stream; + + picture_list *ref; + int8_t ref_list; + int8_t ref_idx_num[2]; + int8_t QP; // \brief Quantization parameter + + double cur_lambda_cost; + + cabac_data cabac; + + struct encoder_state *children; +} encoder_state; -void encode_seq_parameter_set(encoder_control * const encoder); -void encode_pic_parameter_set(encoder_control * const encoder); -void encode_vid_parameter_set(encoder_control * const encoder); -void encode_slice_header(encoder_control * const encoder); -void encode_access_unit_delimiter(encoder_control * const encoder); -void encode_prefix_sei_version(encoder_control * const encoder); -void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac, uint16_t x_ctb, +int encoder_control_init(encoder_control *encoder, const config *cfg); +int encoder_control_finalize(encoder_control *encoder); + +void encoder_control_input_init(encoder_control *encoder, FILE *inputfile, int32_t width, int32_t height); + +int encoder_state_init(encoder_state *encoder_state, const encoder_control * encoder); +int encoder_state_finalize(encoder_state *encoder_state); +void encoder_state_init_lambda(encoder_state *encoder_state); + +void init_encoder_input(encoder_control *encoder, FILE* inputfile, + int32_t width, int32_t height); +void encode_one_frame(encoder_state *encoder_state); +int read_one_frame(FILE* file, const encoder_state *encoder); + +void encode_seq_parameter_set(encoder_state *encoder); +void encode_pic_parameter_set(encoder_state *encoder); +void encode_vid_parameter_set(encoder_state *encoder); +void encode_slice_header(encoder_state * encoder); +void encode_access_unit_delimiter(encoder_state *encoder); +void encode_prefix_sei_version(encoder_state *encoder); +void encode_coding_tree(encoder_state *encoder, cabac_data *cabac, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth); void encode_last_significant_xy(cabac_data *cabac, @@ -123,8 +151,8 @@ void encode_last_significant_xy(cabac_data *cabac, uint8_t type, uint8_t scan); void encode_coeff_nxn(const encoder_control * const encoder, cabac_data *cabac, int16_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode, int8_t tr_skip); -void encode_transform_tree(const encoder_control * const encoder, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu ); -void encode_transform_coeff(const encoder_control * const encoder, cabac_data *cabac, int32_t x_cu, int32_t y_cu, +void encode_transform_tree(encoder_state *encoder_state, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu ); +void encode_transform_coeff(encoder_state *encoder_state, cabac_data *cabac, int32_t x_cu, int32_t y_cu, int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v); void encode_block_residual(const encoder_control * const encoder, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth); diff --git a/src/filter.c b/src/filter.c index 03f03a53..b82ca6df 100644 --- a/src/filter.c +++ b/src/filter.c @@ -163,11 +163,13 @@ INLINE void filter_deblock_chroma(const encoder_control * const encoder, pixel * /** * \brief */ -void filter_deblock_edge_luma(const encoder_control * const encoder, +void filter_deblock_edge_luma(encoder_state * const encoder_state, int32_t xpos, int32_t ypos, int8_t depth, int8_t dir) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; + const encoder_control * const encoder = encoder_state->encoder_control; + cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)]; { @@ -192,7 +194,7 @@ void filter_deblock_edge_luma(const encoder_control * const encoder, int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE; int8_t strength = 0; - int32_t qp = encoder->QP; + int32_t qp = encoder_state->QP; int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8); int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1)); int32_t beta = g_beta_table_8x8[b_index] * bitdepth_scale; @@ -288,11 +290,12 @@ void filter_deblock_edge_luma(const encoder_control * const encoder, /** * \brief */ -void filter_deblock_edge_chroma(const encoder_control * const encoder, +void filter_deblock_edge_chroma(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int8_t dir) { - const picture * const cur_pic = encoder->in.cur_pic; + const encoder_control * const encoder = encoder_state->encoder_control; + const picture * const cur_pic = encoder_state->cur_pic; cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)]; // Chroma edges that do not lay on a 8x8 grid are not deblocked. @@ -324,7 +327,7 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder, int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1); int8_t strength = 2; - int32_t QP = g_chroma_scale[encoder->QP]; + int32_t QP = g_chroma_scale[encoder_state->QP]; int32_t bitdepth_scale = 1 << (encoder->bitdepth-8); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale; @@ -384,9 +387,9 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder, * until the coded block size has been achived. Calls luma and chroma filtering * functions for each coded CU size. */ -void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int32_t edge) +void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x + y*(cur_pic->width_in_lcu << MAX_DEPTH)]; uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0; uint8_t border_x = (cur_pic->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0; @@ -404,15 +407,15 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t // Tell clang-analyzer that everything is ok. assert(depth >= 0 && depth < MAX_DEPTH); - filter_deblock_cu(encoder, x, y, depth + 1, edge); + filter_deblock_cu(encoder_state, x, y, depth + 1, edge); if(!border_x || border_split_x) { - filter_deblock_cu(encoder, x + change, y, depth + 1, edge); + filter_deblock_cu(encoder_state, x + change, y, depth + 1, edge); } if(!border_y || border_split_y) { - filter_deblock_cu(encoder, x , y + change, depth + 1, edge); + filter_deblock_cu(encoder_state, x , y + change, depth + 1, edge); } if((!border_x && !border_y) || (border_split_x && border_split_y)) { - filter_deblock_cu(encoder, x + change, y + change, depth + 1, edge); + filter_deblock_cu(encoder_state, x + change, y + change, depth + 1, edge); } return; } @@ -421,8 +424,8 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return; // do the filtering for block edge - filter_deblock_edge_luma(encoder, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge); - filter_deblock_edge_chroma(encoder, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge); + filter_deblock_edge_luma(encoder_state, x*(LCU_WIDTH >> MAX_DEPTH), y*(LCU_WIDTH >> MAX_DEPTH), depth, edge); + filter_deblock_edge_chroma(encoder_state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge); } /** @@ -433,9 +436,9 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t * the Largest Coding Units (LCU) and call filter_deblock_cu with absolute * X and Y coordinates of the LCU. */ -void filter_deblock(const encoder_control * const encoder) +void filter_deblock(encoder_state * const encoder_state) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; int16_t x, y; // TODO: Optimization: add thread for each LCU @@ -444,7 +447,7 @@ void filter_deblock(const encoder_control * const encoder) { for (x = 0; x < cur_pic->width_in_lcu; x++) { - filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER); + filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER); } } @@ -453,7 +456,7 @@ void filter_deblock(const encoder_control * const encoder) { for (x = 0; x < cur_pic->width_in_lcu; x++) { - filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR); + filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR); } } } @@ -469,11 +472,11 @@ void filter_deblock(const encoder_control * const encoder) * - After vertical filtering the left edge, filter the last 4 pixels of * horizontal edges in the LCU to the left. */ -void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px) +void filter_deblock_lcu(encoder_state * const encoder_state, int x_px, int y_px) { const vector2d lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH }; - filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER); + filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER); // Filter rightmost 4 pixels from last LCU now that they have been // finally deblocked vertically. @@ -481,15 +484,15 @@ void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_p int y; for (y = 0; y < 64; y += 8) { if (lcu.y + y == 0) continue; - filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR); + filter_deblock_edge_luma(encoder_state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR); } for (y = 0; y < 32; y += 8) { if (lcu.y + y == 0) continue; - filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR); + filter_deblock_edge_chroma(encoder_state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR); } } - filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR); + filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR); } diff --git a/src/filter.h b/src/filter.h index e7448c29..b7a51fa5 100644 --- a/src/filter.h +++ b/src/filter.h @@ -32,16 +32,16 @@ ////////////////////////////////////////////////////////////////////////// // FUNCTIONS // Deblocking -void filter_deblock_cu(const encoder_control * const encoder, int32_t x_px, int32_t y_px, +void filter_deblock_cu(encoder_state *encoder_state, int32_t x_px, int32_t y_px, int8_t depth, int32_t edge); -void filter_deblock_edge_luma(const encoder_control * const encoder, +void filter_deblock_edge_luma(encoder_state *encoder_state, int32_t x_pos, int32_t y_pos, int8_t depth, int8_t dir); -void filter_deblock_edge_chroma(const encoder_control * const encoder, +void filter_deblock_edge_chroma(encoder_state *encoder_state, int32_t xpos, int32_t ypos, int8_t depth, int8_t dir); -void filter_deblock(const encoder_control * const encoder); -void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px); +void filter_deblock(encoder_state *encoder_state); +void filter_deblock_lcu(encoder_state *encoder_state, int x_px, int y_px); void filter_deblock_luma(const encoder_control * const encoder, pixel *src, int32_t offset, int32_t tc , int8_t sw, int8_t part_p_nofilter, int8_t part_q_nofilter, int32_t thr_cut, diff --git a/src/inter.c b/src/inter.c index 020d7e72..0b45cffe 100644 --- a/src/inter.c +++ b/src/inter.c @@ -322,7 +322,7 @@ void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_i * \param depth current block depth * \param mv_pred[2][2] 2x motion vector prediction */ -void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu) +void inter_get_mv_cand(const encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu) { uint8_t candidates = 0; uint8_t b_candidates = 0; @@ -332,8 +332,8 @@ void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu); #define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6) -#define APPLY_MV_SCALING(cu, cand) {int td = encoder->poc - encoder->ref->pics[(cu)->inter.mv_ref]->poc;\ - int tb = encoder->poc - encoder->ref->pics[cur_cu->inter.mv_ref]->poc;\ +#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->poc - encoder_state->ref->pics[(cu)->inter.mv_ref]->poc;\ + int tb = encoder_state->poc - encoder_state->ref->pics[cur_cu->inter.mv_ref]->poc;\ if (td != tb) { \ int scale = CALCULATE_SCALE(cu,tb,td); \ mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \ diff --git a/src/inter.h b/src/inter.h index 5020cb38..8613f8b9 100644 --- a/src/inter.h +++ b/src/inter.h @@ -35,6 +35,6 @@ void inter_recon_lcu(const encoder_control *encoder, picture* ref,int32_t xpos, void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_info **b0, cu_info **b1, cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu); -void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu); +void inter_get_mv_cand(const encoder_state *encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu); uint8_t inter_get_merge_cand(int32_t x, int32_t y, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][3], lcu_t *lcu); #endif diff --git a/src/intra.c b/src/intra.c index ab08ed93..72ca69d9 100644 --- a/src/intra.c +++ b/src/intra.c @@ -327,7 +327,7 @@ static void intra_get_pred(const encoder_control * const encoder, pixel *rec[2], * \param sad_out sad value of best mode * \returns best intra mode */ -int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride, +int16_t intra_prediction(const encoder_state * const encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride, uint8_t width, uint32_t *sad_out, int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac) { @@ -336,6 +336,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int int16_t best_mode = 1; uint32_t best_bitcost = 0; int16_t mode; + int8_t rdo = encoder_state->encoder_control->rdo; // Check 8 modes for 4x4 and 8x8, 3 for others int8_t rdo_modes_to_check = (width == 4 || width == 8)? 8 : 3; @@ -371,12 +372,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int // Try all modes and select the best one. for (mode = 0; mode < 35; mode++) { uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds); - intra_get_pred(encoder, ref, recstride, pred, width, mode, 0); + intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0); sad = cost_func(pred, orig_block); - sad += mode_cost * (int)(encoder->cur_lambda_cost + 0.5); + sad += mode_cost * (int)(encoder_state->cur_lambda_cost + 0.5); // When rdo == 2, store best costs to an array and do full RDO later - if(encoder->rdo == 2) { + if(rdo == 2) { int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad); if(rdo_mode != -1) { rdo_modes[rdo_mode] = mode; rdo_costs[rdo_mode] = sad; @@ -390,7 +391,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int } // Select from three best modes if using RDO - if(encoder->rdo == 2) { + if(rdo == 2) { int rdo_mode; int pred_mode; // Check that the predicted modes are in the RDO mode list @@ -413,12 +414,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int for(rdo_mode = 0; rdo_mode < rdo_modes_to_check; rdo_mode ++) { int rdo_bitcost; // The reconstruction is calculated again here, it could be saved from before.. - intra_recon(encoder, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0); - rdo_costs[rdo_mode] = rdo_cost_intra(encoder,pred,orig_block,width,cabac,rdo_modes[rdo_mode]); + intra_recon(encoder_state->encoder_control, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0); + rdo_costs[rdo_mode] = rdo_cost_intra(encoder_state,pred,orig_block,width,cabac,rdo_modes[rdo_mode]); // Bitcost also calculated again for this mode rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds); // Add bitcost * lambda - rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder->cur_lambda_cost + 0.5); + rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->cur_lambda_cost + 0.5); if(rdo_costs[rdo_mode] < best_sad) { best_sad = rdo_costs[rdo_mode]; @@ -832,8 +833,9 @@ void intra_get_planar_pred(pixel* src, int32_t srcstride, uint32_t width, pixel* } } -void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height) +void intra_recon_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height) { + const encoder_control * const encoder = encoder_state->encoder_control; int x_local = (x&0x3f), y_local = (y&0x3f); cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; @@ -890,5 +892,5 @@ void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, i rec_stride, width, width); } - encode_transform_tree(encoder, cabac, x, y, depth, lcu); + encode_transform_tree(encoder_state, cabac, x, y, depth, lcu); } \ No newline at end of file diff --git a/src/intra.h b/src/intra.h index 1a0378c5..43537c36 100644 --- a/src/intra.h +++ b/src/intra.h @@ -39,7 +39,7 @@ void intra_build_reference_border(const encoder_control *encoder, int32_t x_luma void intra_filter(pixel* ref, int32_t stride, int32_t width, int8_t mode); /* Predictions */ -int16_t intra_prediction(const encoder_control *encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride, +int16_t intra_prediction(const encoder_state *encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride, uint8_t width, uint32_t *sad_out, int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac); @@ -49,6 +49,6 @@ void intra_get_angular_pred(const encoder_control *encoder, pixel* src, int32_t void intra_recon(const encoder_control *encoder, pixel* rec, int32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); -void intra_recon_lcu(const encoder_control *encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height); +void intra_recon_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height); #endif diff --git a/src/rdo.c b/src/rdo.c index 107db134..4115f379 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -63,8 +63,9 @@ const uint32_t entropy_bits[128] = ** Only for luma */ -uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode) +uint32_t rdo_cost_intra(const encoder_state * const encoder_state, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode) { + const encoder_control * const encoder = encoder_state->encoder_control; coefficient pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; int16_t temp_block[LCU_WIDTH*LCU_WIDTH>>2]; @@ -92,11 +93,11 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe } transform2d(encoder, block,pre_quant_coeff,width,0); if(encoder->rdoq_enable) { - rdoq(encoder, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0); + rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0); } else { - quant(encoder, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA); + quant(encoder_state, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA); } - dequant(encoder, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA); + dequant(encoder_state, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA); itransform2d(encoder, temp_block,pre_quant_coeff,width,0); // SSD between original and reconstructed @@ -111,12 +112,12 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe for (i = 0; i < width*width; i++) { coeffcost += abs((int)temp_coeff[i]); } - cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder->cur_lambda_cost+0.5); + cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5); // Full RDO } else if(encoder->rdo == 2) { coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, width, 0, luma_scan_mode); - cost += coeffcost*((int)encoder->cur_lambda_cost+0.5); + cost += coeffcost*((int)encoder_state->cur_lambda_cost+0.5); } return cost; } @@ -284,7 +285,7 @@ int32_t get_ic_rate( cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one * This method calculates the best quantized transform level for a given scan position. * From HM 12.0 */ -uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig, +uint32_t get_coded_level ( const encoder_state * const encoder_state, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig, int32_t level_double, uint32_t max_abs_level, uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, @@ -298,7 +299,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma); if( !last && max_abs_level < 3 ) { - *coded_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0); + *coded_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0); *coded_cost = *coded_cost0 + *coded_cost_sig; if (max_abs_level == 0) return best_abs_level; } else { @@ -306,13 +307,13 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca } if( !last ) { - cur_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1); + cur_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1); } min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 ); for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) { double err = (double)(level_double - ( abs_level << q_bits ) ); - double cur_cost = err * err * temp + encoder->cur_lambda_cost * + double cur_cost = err * err * temp + encoder_state->cur_lambda_cost * get_ic_rate_cost( cabac, abs_level, ctx_num_one, ctx_num_abs, abs_go_rice, c1_idx, c2_idx, type); cur_cost += cur_cost_sig; @@ -336,7 +337,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca * * From HM 12.0 */ -static double get_rate_last(const encoder_control * const encoder, +static double get_rate_last(const encoder_state * const encoder_state, const uint32_t pos_x, const uint32_t pos_y, int32_t* last_x_bits, int32_t* last_y_bits) { @@ -349,7 +350,7 @@ static double get_rate_last(const encoder_control * const encoder, if( ctx_y > 3 ) { uiCost += 32768.0 * ((ctx_y-2)>>1); } - return encoder->cur_lambda_cost*uiCost; + return encoder_state->cur_lambda_cost*uiCost; } static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int8_t type, @@ -388,9 +389,10 @@ static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int * coding engines using probability models like CABAC * From HM 12.0 */ -void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width, +void rdoq(const encoder_state * const encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width, int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth) { + const encoder_control * const encoder = encoder_state->encoder_control; uint32_t log2_tr_size = g_convert_to_bit[ width ] + 2; int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; // Represents scaling through forward transform uint16_t go_rice_param = 0; @@ -398,7 +400,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient uint32_t max_num_coeff = width * height; int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); - int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0); + int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0); { int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; @@ -511,7 +513,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient uint16_t abs_ctx = ctx_set + c2; if( scanpos == last_scanpos ) { - level = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ], + level = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ], level_double, max_abs_level, 0, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, q_bits, temp, 1, type ); } else { @@ -519,7 +521,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient uint32_t pos_x = blkpos - ( pos_y << log2_block_size ); uint16_t ctx_sig = (uint16_t)context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y, log2_block_size, type); - level = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ], + level = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ], level_double, max_abs_level, ctx_sig, one_ctx, abs_ctx, go_rice_param, c1_idx, c2_idx, q_bits, temp, 0, type ); sig_rate_delta[ blkpos ] = CTX_ENTROPY_BITS(&baseCtx[ctx_sig],1) - CTX_ENTROPY_BITS(&baseCtx[ctx_sig],0); @@ -587,7 +589,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient if (sig_coeffgroup_flag[ cg_blkpos ] == 0) { uint32_t ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); - cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); base_cost += cost_coeffgroup_sig[ cg_scanpos ] - rd_stats.sig_cost; } else { if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below. @@ -604,9 +606,9 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); if (cg_scanpos < cg_last_scanpos) { - cost_coeffgroup_sig[cg_scanpos] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1); + cost_coeffgroup_sig[cg_scanpos] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1); base_cost += cost_coeffgroup_sig[cg_scanpos]; - cost_zero_cg += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_zero_cg += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); } // try to convert the current coeff group from non-zero to all-zero @@ -620,7 +622,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient sig_coeffgroup_flag[ cg_blkpos ] = 0; base_cost = cost_zero_cg; if (cg_scanpos < cg_last_scanpos) { - cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); } // reset coeffs to 0 in this block for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) { @@ -648,13 +650,13 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) { - best_cost = block_uncoded_cost + encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0); - base_cost += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1); + best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0); + base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1); } else { cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma); ctx_cbf = ( type ? tr_depth : !tr_depth); - best_cost = block_uncoded_cost + encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0); - base_cost += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1); + best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0); + base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1); } for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) { @@ -672,7 +674,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient uint32_t pos_y = blkpos >> log2_block_size; uint32_t pos_x = blkpos - ( pos_y << log2_block_size ); - double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder, pos_x, pos_y, last_x_bits,last_y_bits ); + double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder_state, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder_state, pos_x, pos_y, last_x_bits,last_y_bits ); double totalCost = base_cost + cost_last - cost_sig[ scanpos ]; if( totalCost < best_cost ) { @@ -708,7 +710,7 @@ void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient if(*abs_sum >= 2) { int64_t rd_factor = (int64_t) ( g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6))) - / encoder->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8))) + / encoder_state->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8))) + 0.5); int32_t lastCG = -1; int32_t absSum = 0; diff --git a/src/rdo.h b/src/rdo.h index d89a9f81..1392a9d8 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -42,10 +42,10 @@ extern const uint32_t g_go_rice_range[5]; extern const uint32_t g_go_rice_prefix_len[5]; -void rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width, +void rdoq(const encoder_state *encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width, int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth); -uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode); +uint32_t rdo_cost_intra(const encoder_state *encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode); int32_t get_coeff_cost(const encoder_control * const encoder, cabac_data *cabac, coefficient *coeff, int32_t width, int32_t type, int8_t scan_mode); @@ -53,7 +53,7 @@ int32_t get_ic_rate(cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one, uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type); double get_ic_rate_cost (cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type); -uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig, +uint32_t get_coded_level ( const encoder_state * encoder_state, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig, int32_t level_double, uint32_t max_abs_level, uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, diff --git a/src/sao.c b/src/sao.c index b75b9e83..aff4cf6f 100644 --- a/src/sao.c +++ b/src/sao.c @@ -54,12 +54,12 @@ static int sao_calc_eo_cat(pixel a, pixel b, pixel c) } -int sao_band_ddistortion(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data, +int sao_band_ddistortion(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data, int block_width, int block_height, int band_pos, int sao_bands[4]) { int y, x; - int shift = encoder->bitdepth-5; + int shift = encoder_state->encoder_control->bitdepth-5; int sum = 0; for (y = 0; y < block_height; ++y) { @@ -345,12 +345,12 @@ static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4], * \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma. * \param sao_bands an array of bands for original and reconstructed block */ -static void calc_sao_bands(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data, +static void calc_sao_bands(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data, int block_width, int block_height, int sao_bands[2][32]) { int y, x; - int shift = encoder->bitdepth-5; + int shift = encoder_state->encoder_control->bitdepth-5; //Loop pixels and take top 5 bits to classify different bands for (y = 0; y < block_height; ++y) { @@ -608,7 +608,7 @@ void sao_reconstruct(const encoder_control * const encoder, picture * pic, const -static void sao_search_edge_sao(const encoder_control * const encoder, +static void sao_search_edge_sao(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[], int block_width, int block_height, unsigned buf_cnt, @@ -669,7 +669,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder, { int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left); - sum_ddistortion += (int)((double)mode_bits*(encoder->cur_lambda_cost+0.5)); + sum_ddistortion += (int)((double)mode_bits*(encoder_state->cur_lambda_cost+0.5)); } // SAO is not applied for category 0. edge_offset[SAO_EO_CAT0] = 0; @@ -684,7 +684,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder, } -static void sao_search_band_sao(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[], +static void sao_search_band_sao(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[], int block_width, int block_height, unsigned buf_cnt, sao_info *sao_out, sao_info *sao_top, @@ -704,14 +704,14 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix memset(sao_bands, 0, 2 * 32 * sizeof(int)); for (i = 0; i < buf_cnt; ++i) { - calc_sao_bands(encoder, data[i], recdata[i],block_width, + calc_sao_bands(encoder_state, data[i], recdata[i],block_width, block_height,sao_bands); } ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position); temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left); - ddistortion += (int)((double)temp_rate*(encoder->cur_lambda_cost+0.5)); + ddistortion += (int)((double)temp_rate*(encoder_state->cur_lambda_cost+0.5)); // Select band sao over edge sao when distortion is lower if (ddistortion < sao_out->ddistortion) { @@ -731,7 +731,7 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix * \param buf_cnt Number of pointers data and recdata have. * \param sao_out Output parameter for the best sao parameters. */ -static void sao_search_best_mode(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[], +static void sao_search_best_mode(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[], int block_width, int block_height, unsigned buf_cnt, sao_info *sao_out, sao_info *sao_top, @@ -740,12 +740,12 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi sao_info edge_sao; sao_info band_sao; - sao_search_edge_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left); - sao_search_band_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left); + sao_search_edge_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left); + sao_search_band_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left); { int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left); - int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5); + int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5); unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { @@ -759,11 +759,11 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi { int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left); - int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5); + int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5); unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { - ddistortion += sao_band_ddistortion(encoder, data[buf_i], recdata[buf_i], + ddistortion += sao_band_ddistortion(encoder_state, data[buf_i], recdata[buf_i], block_width, block_height, band_sao.band_position, &band_sao.offsets[1]); } @@ -780,7 +780,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi // Choose between SAO and doing nothing, taking into account the // rate-distortion cost of coding do nothing. { - int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder->cur_lambda_cost + 0.5); + int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->cur_lambda_cost + 0.5); if (sao_out->ddistortion >= cost_of_nothing) { sao_out->type = SAO_TYPE_NONE; } @@ -794,7 +794,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi return; } - void sao_search_chroma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left) + void sao_search_chroma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left) { int block_width = (LCU_WIDTH / 2); int block_height = (LCU_WIDTH / 2); @@ -827,10 +827,10 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi } // Calculate - sao_search_best_mode(encoder, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left); + sao_search_best_mode(encoder_state, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left); } -void sao_search_luma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left) +void sao_search_luma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left) { pixel orig[LCU_LUMA_SIZE]; pixel rec[LCU_LUMA_SIZE]; @@ -857,13 +857,13 @@ void sao_search_luma(const encoder_control * const encoder, const picture *pic, orig_list[0] = orig; rec_list[0] = rec; - sao_search_best_mode(encoder, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left); + sao_search_best_mode(encoder_state, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left); } -void sao_reconstruct_frame(const encoder_control * const encoder) +void sao_reconstruct_frame(encoder_state * const encoder_state) { vector2d lcu; - picture * const cur_pic = encoder->in.cur_pic; + picture * const cur_pic = encoder_state->cur_pic; // These are needed because SAO needs the pre-SAO pixels form left and // top LCUs. Single pixel wide buffers, like what search_lcu takes, would @@ -882,9 +882,9 @@ void sao_reconstruct_frame(const encoder_control * const encoder) sao_info *sao_chroma = &cur_pic->sao_chroma[lcu.y * stride + lcu.x]; // sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma); - sao_reconstruct(encoder, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y); - sao_reconstruct(encoder, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U); - sao_reconstruct(encoder, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V); + sao_reconstruct(encoder_state->encoder_control, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y); + sao_reconstruct(encoder_state->encoder_control, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U); + sao_reconstruct(encoder_state->encoder_control, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V); } } diff --git a/src/sao.h b/src/sao.h index 365bb5f2..93fd7967 100644 --- a/src/sao.h +++ b/src/sao.h @@ -46,11 +46,11 @@ typedef struct sao_info_struct { void init_sao_info(sao_info *sao); -void sao_search_chroma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left); -void sao_search_luma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left); +void sao_search_chroma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left); +void sao_search_luma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left); void sao_reconstruct(const encoder_control * encoder, picture *pic, const pixel *old_rec, unsigned x_ctb, unsigned y_ctb, const sao_info *sao, color_index color_i); -void sao_reconstruct_frame(const encoder_control * const encoder); +void sao_reconstruct_frame(encoder_state *encoder_state); #endif diff --git a/src/search.c b/src/search.c index f272c6ec..b0584dc0 100644 --- a/src/search.c +++ b/src/search.c @@ -117,7 +117,7 @@ static uint32_t get_mvd_coding_cost(vector2d *mvd) return bitcost; } -static int calc_mvd_cost(const encoder_control * const encoder, int x, int y, +static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y, int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3], int16_t num_cand,int32_t ref_idx, uint32_t *bitcost) { @@ -159,7 +159,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y, temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost; } *bitcost = temp_bitcost; - return temp_bitcost*(int32_t)(encoder->cur_lambda_cost+0.5); + return temp_bitcost*(int32_t)(encoder_state->cur_lambda_cost+0.5); } @@ -183,7 +183,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y, * the predicted motion vector is way off. In the future even more additional * points like 0,0 might be used, such as vectors from top or left. */ -static unsigned hexagon_search(const encoder_control * const encoder, unsigned depth, +static unsigned hexagon_search(const encoder_state * const encoder_state, unsigned depth, const picture *pic, const picture *ref, const vector2d *orig, vector2d *mv_in_out, int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3], @@ -203,7 +203,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d unsigned cost = calc_sad(pic, ref, orig->x, orig->y, orig->x + mv.x + pattern->x, orig->y + mv.y + pattern->y, block_width, block_width); - cost += calc_mvd_cost(encoder, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -217,7 +217,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d unsigned cost = calc_sad(pic, ref, orig->x, orig->y, orig->x, orig->y, block_width, block_width); - cost += calc_mvd_cost(encoder, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); // If the 0,0 is better, redo the hexagon around that point. if (cost < best_cost) { @@ -233,7 +233,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d orig->x + pattern->x, orig->y + pattern->y, block_width, block_width); - cost += calc_mvd_cost(encoder, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -268,7 +268,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d orig->x + mv.x + offset->x, orig->y + mv.y + offset->y, block_width, block_width); - cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -291,7 +291,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d orig->x + mv.x + offset->x, orig->y + mv.y + offset->y, block_width, block_width); - cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); + cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); if (cost > 0 && cost < best_cost) { best_cost = cost; @@ -370,9 +370,9 @@ static unsigned search_mv_full(unsigned depth, * Update lcu to have best modes at this depth. * \return Cost of best mode. */ -static int search_cu_inter(const encoder_control * const encoder, int x, int y, int depth, lcu_t *lcu) +static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; uint32_t ref_idx = 0; int x_local = (x&0x3f), y_local = (y&0x3f); int x_cu = x>>3; @@ -392,8 +392,8 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y, cur_cu->inter.cost = UINT_MAX; - for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) { - picture *ref_pic = encoder->ref->pics[ref_idx]; + for (ref_idx = 0; ref_idx < encoder_state->ref->used_size; ref_idx++) { + picture *ref_pic = encoder_state->ref->pics[ref_idx]; unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu); cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu]; uint32_t temp_bitcost = 0; @@ -413,13 +413,13 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y, } // Get MV candidates cur_cu->inter.mv_ref = ref_idx; - inter_get_mv_cand(encoder, x, y, depth, mv_cand, cur_cu, lcu); + inter_get_mv_cand(encoder_state, x, y, depth, mv_cand, cur_cu, lcu); cur_cu->inter.mv_ref = temp_ref_idx; #if SEARCH_MV_FULL_RADIUS temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); #else - temp_cost += hexagon_search(encoder, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); + temp_cost += hexagon_search(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost); #endif merged = 0; @@ -664,11 +664,11 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu * Update lcu to have best modes at this depth. * \return Cost of best mode. */ -static int search_cu_intra(const encoder_control * const encoder, +static int search_cu_intra(const encoder_state * const encoder_state, const int x_px, const int y_px, const int depth, lcu_t *lcu, cabac_data *cabac) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f }; const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const int8_t cu_width = (LCU_WIDTH >> (depth)); @@ -696,7 +696,7 @@ static int search_cu_intra(const encoder_control * const encoder, intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); // Build reconstructed block to use in prediction with extrapolated borders - intra_build_reference_border(encoder, x_px, y_px, cu_width * 2 + 8, + intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8, rec_buffer, cu_width * 2 + 8, 0, cur_pic->width, cur_pic->height, @@ -709,7 +709,7 @@ static int search_cu_intra(const encoder_control * const encoder, uint32_t bitcost = UINT32_MAX; pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2); - mode = intra_prediction(encoder,ref_pixels, LCU_WIDTH, + mode = intra_prediction(encoder_state,ref_pixels, LCU_WIDTH, cu_in_rec_buffer, cu_width * 2 + 8, cu_width, &cost, candidate_modes, &bitcost, cabac); cur_cu->intra[pu_index].mode = (int8_t)mode; @@ -728,7 +728,7 @@ static int search_cu_intra(const encoder_control * const encoder, * coding (bitcost * lambda) and cost for coding coefficients (estimated * here as (coefficient_sum * 1.5) * lambda) */ -static int lcu_get_final_cost(const encoder_control * const encoder, +static int lcu_get_final_cost(const encoder_state * const encoder_state, cabac_data *cabac, const int x_px, const int y_px, const int depth, lcu_t *lcu) @@ -737,6 +737,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder, int x_local = (x_px&0x3f), y_local = (y_px&0x3f); int cost = 0; int coeff_cost = 0; + const int rdo = encoder_state->encoder_control->rdo; int width = LCU_WIDTH>>depth; int x,y; @@ -759,7 +760,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder, } } - if(encoder->rdo == 1) { + if(rdo == 1) { // sum of coeffs for (y = y_local; y < y_local+width; ++y) { for (x = x_local; x < x_local+width; ++x) { @@ -774,11 +775,11 @@ static int lcu_get_final_cost(const encoder_control * const encoder, } } // Coefficient costs - cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder->cur_lambda_cost+0.5); + cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->cur_lambda_cost+0.5); // Calculate actual bit costs for coding the coeffs // RDO - } else if (encoder->rdo == 2) { + } else if (rdo == 2) { coefficient coeff_temp[32*32]; coefficient coeff_temp_u[16*16]; coefficient coeff_temp_v[16*16]; @@ -817,7 +818,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder, // Calculate luma coeff bit count picture_blit_coeffs(&lcu->coeff.y[(blk_y*LCU_WIDTH)+blk_x],coeff_temp,blockwidth,blockwidth,LCU_WIDTH,blockwidth); - coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp, blockwidth, 0, luma_scan_mode); + coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp, blockwidth, 0, luma_scan_mode); blk_y >>= 1; blk_x >>= 1; @@ -832,15 +833,15 @@ static int lcu_get_final_cost(const encoder_control * const encoder, picture_blit_coeffs(&lcu->coeff.u[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_u,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth); picture_blit_coeffs(&lcu->coeff.v[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_v,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth); - coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode); - coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode); + coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode); + coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode); } // Multiply bit count with lambda to get RD-cost - cost += coeff_cost * (int32_t)(encoder->cur_lambda_cost+0.5); + cost += coeff_cost * (int32_t)(encoder_state->cur_lambda_cost+0.5); } // Bitcost - cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder->cur_lambda_cost+0.5); + cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->cur_lambda_cost+0.5); return cost; } @@ -855,9 +856,9 @@ static int lcu_get_final_cost(const encoder_control * const encoder, * - All the final data for the LCU gets eventually copied to depth 0, which * will be the final output of the recursion. */ -static int search_cu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH]) +static int search_cu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH]) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; int cu_width = LCU_WIDTH >> depth; int cost = MAX_INT; cu_info *cur_cu; @@ -885,7 +886,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i depth >= MIN_INTER_SEARCH_DEPTH && depth <= MAX_INTER_SEARCH_DEPTH) { - int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]); + int mode_cost = search_cu_inter(encoder_state, x, y, depth, &work_tree[depth]); if (mode_cost < cost) { cost = mode_cost; cur_cu->type = CU_INTER; @@ -895,7 +896,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i if (depth >= MIN_INTRA_SEARCH_DEPTH && depth <= MAX_INTRA_SEARCH_DEPTH) { - int mode_cost = search_cu_intra(encoder, x, y, depth, &work_tree[depth], cabac); + int mode_cost = search_cu_intra(encoder_state, x, y, depth, &work_tree[depth], cabac); if (mode_cost < cost) { cost = mode_cost; cur_cu->type = CU_INTRA; @@ -906,10 +907,10 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i // mode search of adjacent CUs. if (cur_cu->type == CU_INTRA) { lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size); - intra_recon_lcu(encoder, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height); + intra_recon_lcu(encoder_state, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height); } else if (cur_cu->type == CU_INTER) { - inter_recon_lcu(encoder, encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); - encode_transform_tree(encoder, cabac, x, y, depth, &work_tree[depth]); + inter_recon_lcu(encoder_state->encoder_control, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); + encode_transform_tree(encoder_state, cabac, x, y, depth, &work_tree[depth]); if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { cur_cu->merged = 0; @@ -922,23 +923,23 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i } } if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) { - cost = lcu_get_final_cost(encoder, cabac, x, y, depth, &work_tree[depth]); + cost = lcu_get_final_cost(encoder_state, cabac, x, y, depth, &work_tree[depth]); } // Recursively split all the way to max search depth. if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) { int half_cu = cu_width / 2; - int split_cost = (int)(4.5 * encoder->cur_lambda_cost); + int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost); // If skip mode was selected for the block, skip further search. // Skip mode means there's no coefficients in the block, so splitting // might not give any better results but takes more time to do. if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] || cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) { - split_cost += search_cu(encoder, cabac, x, y, depth + 1, work_tree); - split_cost += search_cu(encoder, cabac, x + half_cu, y, depth + 1, work_tree); - split_cost += search_cu(encoder, cabac, x, y + half_cu, depth + 1, work_tree); - split_cost += search_cu(encoder, cabac, x + half_cu, y + half_cu, depth + 1, work_tree); + split_cost += search_cu(encoder_state, cabac, x, y, depth + 1, work_tree); + split_cost += search_cu(encoder_state, cabac, x + half_cu, y, depth + 1, work_tree); + split_cost += search_cu(encoder_state, cabac, x, y + half_cu, depth + 1, work_tree); + split_cost += search_cu(encoder_state, cabac, x + half_cu, y + half_cu, depth + 1, work_tree); } else { split_cost = INT_MAX; } @@ -963,9 +964,9 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i * - Copy reference pixels from neighbouring LCUs. * - Copy reference pixels from this LCU. */ -static void init_lcu_t(const encoder_control * const encoder, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf) +static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf) { - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; // Copy reference cu_info structs from neighbouring LCUs. { @@ -1045,7 +1046,7 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const // Copy LCU pixels. { - const picture * const pic = encoder->in.cur_pic; + const picture * const pic = encoder_state->cur_pic; int pic_width = cur_pic->width; int x_max = MIN(x + LCU_WIDTH, pic_width) - x; int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y; @@ -1069,13 +1070,13 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const /** * Copy CU and pixel data to it's place in picture datastructure. */ -static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px, int y_px, const lcu_t *lcu) +static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x_px, int y_px, const lcu_t *lcu) { // Copy non-reference CUs to picture. { const int x_cu = x_px >> MAX_DEPTH; const int y_cu = y_px >> MAX_DEPTH; - const picture * const cur_pic = encoder->in.cur_pic; + const picture * const cur_pic = encoder_state->cur_pic; const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH; cu_info *const cu_array = cur_pic->cu_array[MAX_DEPTH]; @@ -1095,7 +1096,7 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px, // Copy pixels to picture. { - picture * const pic = encoder->in.cur_pic; + picture * const pic = encoder_state->cur_pic; const int pic_width = pic->width; const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px; const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px; @@ -1123,18 +1124,18 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px, * Search LCU for modes. * - Best mode gets copied to current picture. */ -void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf) +void search_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf) { lcu_t work_tree[MAX_PU_DEPTH + 1]; int depth; // Initialize work tree. for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) { memset(&work_tree[depth], 0, sizeof(work_tree[depth])); - init_lcu_t(encoder, x, y, &work_tree[depth], hor_buf, ver_buf); + init_lcu_t(encoder_state, x, y, &work_tree[depth], hor_buf, ver_buf); } // Start search from depth 0. - search_cu(encoder, cabac, x, y, 0, work_tree); + search_cu(encoder_state, cabac, x, y, 0, work_tree); - copy_lcu_to_cu_data(encoder, x, y, &work_tree[0]); + copy_lcu_to_cu_data(encoder_state, x, y, &work_tree[0]); } diff --git a/src/search.h b/src/search.h index 6f9f69d3..68642b84 100644 --- a/src/search.h +++ b/src/search.h @@ -30,6 +30,6 @@ #include "picture.h" -void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf); +void search_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf); #endif diff --git a/src/transform.c b/src/transform.c index 2d6bc148..4cc1d26d 100644 --- a/src/transform.c +++ b/src/transform.c @@ -623,9 +623,10 @@ void itransform2d(const encoder_control * const encoder,int16_t *block,int16_t * * \brief quantize transformed coefficents * */ -void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef, int32_t width, +void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_coef, int32_t width, int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type ) { + const encoder_control * const encoder = encoder_state->encoder_control; const uint32_t log2_block_size = g_convert_to_bit[ width ] + 2; const uint32_t * const scan = g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ]; @@ -633,7 +634,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2]; #endif - int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0); + int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0); //New block for variable definitions { @@ -645,7 +646,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; - int32_t add = ((encoder->in.cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9); + int32_t add = ((encoder_state->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9); int32_t q_bits8 = q_bits - 8; for (n = 0; n < width * height; n++) { @@ -754,13 +755,14 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef * \brief inverse quantize transformed and quantized coefficents * */ -void dequant(const encoder_control * const encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type) +void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type) { + const encoder_control * const encoder = encoder_state->encoder_control; int32_t shift,add,coeff_q,clip_q_coef; int32_t n; int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2); - int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0); + int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0); shift = 20 - QUANT_SHIFT - transform_shift; diff --git a/src/transform.h b/src/transform.h index 95c2d1eb..4cc8870a 100644 --- a/src/transform.h +++ b/src/transform.h @@ -34,9 +34,9 @@ extern const uint8_t g_chroma_scale[58]; extern const int16_t g_inv_quant_scales[6]; -void quant(const encoder_control *encoder, int16_t *coef, int16_t *q_coef, int32_t width, +void quant(const encoder_state *encoder_state, int16_t *coef, int16_t *q_coef, int32_t width, int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type); -void dequant(const encoder_control *encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type); +void dequant(const encoder_state *encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type); void transformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size); void itransformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);