From f30b9c2a110a1d38117b0770dadd9a90d23e4bb6 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Mon, 5 May 2014 15:17:52 +0200 Subject: [PATCH 01/21] Fix a buffer overflow in parse_tiles_specification --- src/config.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index b74ab843..ec146a8a 100644 --- a/src/config.c +++ b/src/config.c @@ -155,7 +155,7 @@ static int parse_enum(const char *arg, const char * const *names, int8_t *dst) static int parse_tiles_specification(const char* const arg, int32_t * const ntiles, int32_t** const array) { const char* current_arg = NULL; int32_t current_value; - int32_t values[256]; + int32_t values[MAX_TILES_PER_DIM]; int i; @@ -189,6 +189,7 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil if (current_arg) ++current_arg; values[*ntiles] = current_value; ++(*ntiles); + if (MAX_TILES_PER_DIM <= *ntiles) break; } while (current_arg); if (MAX_TILES_PER_DIM <= *ntiles || 0 >= *ntiles) { From f0b076876fd6945a515ded0f1dc66621be23fc9e Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Mon, 5 May 2014 11:19:56 +0200 Subject: [PATCH 02/21] Moved all the stream related stuff into substream_write_bitstream --- src/encoder.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index c9998a74..e76f2130 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -499,7 +499,7 @@ static void write_aud(encoder_state * const encoder_state) bitstream_align(stream); } -static void substream_write_bitstream(encoder_state * const encoder_state, const int last_part) { +static void substream_write_bitstream(encoder_state * const encoder_state, const int end_of_sub_stream) { const encoder_control * const encoder = encoder_state->encoder_control; const picture* const cur_pic = encoder_state->cur_pic; const int lcu_count = cur_pic->width_in_lcu * cur_pic->height_in_lcu; @@ -517,11 +517,18 @@ static void substream_write_bitstream(encoder_state * const encoder_state, const encode_coding_tree(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0); - cabac_encode_bin_trm(&encoder_state->cabac, ((lcu_id == lcu_count - 1) && last_part) ? 1 : 0); // end_of_slice_segment_flag + cabac_encode_bin_trm(&encoder_state->cabac, ((lcu_id == lcu_count - 1) && !end_of_sub_stream) ? 1 : 0); // end_of_slice_segment_flag + } + if (end_of_sub_stream) { + cabac_encode_bin_trm(&encoder_state->cabac, 1); // end_of_sub_stream_one_bit == 1 + cabac_flush(&encoder_state->cabac); + } else { + cabac_flush(&encoder_state->cabac); + bitstream_align(&encoder_state->stream); } } -static void substream_encode(encoder_state * const encoder_state, const int last_part) { +static void substream_encode(encoder_state * const encoder_state) { const encoder_control * const encoder = encoder_state->encoder_control; #ifndef NDEBUG const unsigned long long int debug_bitstream_position = bitstream_tell(&(encoder_state->stream)); @@ -630,9 +637,6 @@ static void substream_encode(encoder_state * const encoder_state, const int last //We should not have written to bitstream! assert(debug_bitstream_position == bitstream_tell(&(encoder_state->stream))); - - //Now, write bitstream - substream_write_bitstream(encoder_state, last_part); yuv_t_free(hor_buf); yuv_t_free(ver_buf); @@ -775,7 +779,8 @@ void encode_one_frame(encoder_state * const main_state) subencoder->cur_pic->slicetype = main_state->cur_pic->slicetype; subencoder->cur_pic->type = main_state->cur_pic->type; - substream_encode(subencoder, !(main_state->children[i+1].encoder_control)); + substream_encode(subencoder); + substream_write_bitstream(subencoder, (main_state->children[i+1].encoder_control) != NULL); subencoder_blit_pixels(main_state, main_state->cur_pic->y_recdata, subencoder, subencoder->cur_pic->y_recdata, 1); subencoder_blit_pixels(main_state, main_state->cur_pic->u_recdata, subencoder, subencoder->cur_pic->u_recdata, 0); @@ -785,15 +790,6 @@ void encode_one_frame(encoder_state * const main_state) //This has to be serial i = 0; do { - if (!main_state->children[i+1].encoder_control) { - //last tile - cabac_flush(&main_state->children[i].cabac); - bitstream_align(&main_state->children[i].stream); - } else { - //Other tiles - cabac_encode_bin_trm(&main_state->children[i].cabac, 1); // end_of_sub_stream_one_bit == 1 - cabac_flush(&main_state->children[i].cabac); - } //Append bitstream to main stream bitstream_append(&main_state->stream, &main_state->children[i].stream); bitstream_clear(&main_state->children[i].stream); @@ -801,9 +797,8 @@ void encode_one_frame(encoder_state * const main_state) } else { //Encode the whole thing as one stream - substream_encode(main_state, 1); - cabac_flush(&main_state->cabac); - bitstream_align(stream); + substream_encode(main_state); + substream_write_bitstream(main_state, 0); } // Calculate checksum From 2d6f1992461684b633a0547a56701697eae18c94 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Mon, 5 May 2014 13:33:41 +0200 Subject: [PATCH 03/21] reorganized encoder_state structure --- src/encoder.c | 2 ++ src/encoder.h | 33 +++++++++++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index e76f2130..45b1bf4d 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -787,6 +787,8 @@ void encode_one_frame(encoder_state * const main_state) subencoder_blit_pixels(main_state, main_state->cur_pic->v_recdata, subencoder, subencoder->cur_pic->v_recdata, 0); } + //We should do the slice header here, because we can have the entry points + //This has to be serial i = 0; do { diff --git a/src/encoder.h b/src/encoder.h index 3fee4c6a..43a6de50 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -124,28 +124,29 @@ typedef struct typedef struct encoder_state { const encoder_control *encoder_control; - - int32_t lcu_offset_x; - int32_t lcu_offset_y; - - picture *cur_pic; - int32_t frame; - int32_t poc; /*!< \brief picture order count */ - - bitstream stream; - - picture_list *ref; - int8_t ref_list; - int8_t ref_idx_num[2]; - int8_t QP; // \brief Quantization parameter - double cur_lambda_cost; - + bitstream stream; cabac_data cabac; //List of children, the last item of this list is a pseudo-encoder with encoder_control = NULL //Use do { } while (encoder_state->children[++i].encoder_control) struct encoder_state *children; + + //Tile: offset in LCU for current encoder_state + int32_t lcu_offset_x; + int32_t lcu_offset_y; + + //Current picture to encode + picture *cur_pic; + int32_t frame; + int32_t poc; /*!< \brief picture order count */ + + //Current picture available references + picture_list *ref; + int8_t ref_list; + int8_t ref_idx_num[2]; + + int8_t QP; //!< \brief Quantization parameter } encoder_state; int encoder_control_init(encoder_control *encoder, const config *cfg); From c2872bd6b0e424c87fc87d9bce9b14102eaef7a3 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Mon, 5 May 2014 15:17:22 +0200 Subject: [PATCH 04/21] Slices and WPP in command line and encoder --- src/config.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/config.h | 5 ++++ src/encmain.c | 10 +++++++ src/encoder.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/encoder.h | 7 +++++ src/global.h | 1 + 6 files changed, 170 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index ec146a8a..f2efdb34 100644 --- a/src/config.c +++ b/src/config.c @@ -85,6 +85,12 @@ int config_init(config *cfg) cfg->tiles_height_count = 0; cfg->tiles_width_split = NULL; cfg->tiles_height_split = NULL; + + cfg->wpp = 0; + cfg->slice_count = 1; + cfg->slice_addresses_in_ts = MALLOC(int32_t, 1); + cfg->slice_addresses_in_ts[0] = 0; + return 1; } @@ -101,6 +107,7 @@ int config_destroy(config *cfg) FREE_POINTER(cfg->cqmfile); FREE_POINTER(cfg->tiles_width_split); FREE_POINTER(cfg->tiles_height_split); + FREE_POINTER(cfg->slice_addresses_in_ts); free(cfg); return 1; @@ -211,6 +218,67 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil return 1; } +static int parse_slice_specification(const char* const arg, int32_t * const nslices, int32_t** const array) { + const char* current_arg = NULL; + int32_t current_value; + int32_t values[MAX_SLICES]; + + int i; + + //Free pointer in any case + if (*array) { + FREE_POINTER(*array); + } + + //If the arg starts with u, we want an uniform split + if (arg[0]=='u') { + *nslices = atoi(arg+1); + if (MAX_SLICES <= *nslices || 0 >= *nslices) { + fprintf(stderr, "Invalid number of tiles (0 < %d <= %d = MAX_SLICES)!\n", *nslices + 1, MAX_SLICES); + return 0; + } + //Done with parsing + return 1; + } + + //We have a comma-separated list of int for the split... + current_arg = arg; + //We always have a slice starting at 0 + values[0] = 0; + *nslices = 1; + do { + int ret = sscanf(current_arg, "%d", ¤t_value); + if (ret != 1) { + fprintf(stderr, "Could not parse integer \"%s\"!\n", current_arg); + return 0; + } + current_arg = strchr(current_arg, ','); + //Skip the , if we found one + if (current_arg) ++current_arg; + values[*nslices] = current_value; + ++(*nslices); + if (MAX_SLICES <= *nslices) break; + } while (current_arg); + + if (MAX_SLICES <= *nslices || 0 >= *nslices) { + fprintf(stderr, "Invalid number of slices (0 < %d <= %d = MAX_SLICES)!\n", *nslices, MAX_SLICES); + return 0; + } + + *array = MALLOC(int32_t, *nslices); + if (!*array) { + fprintf(stderr, "Could not allocate array for slices\n"); + return 0; + } + + //TODO: memcpy? + for (i = 0; i < *nslices; ++i) { + (*array)[i] = values[i]; + } + + return 1; +} + static int config_parse(config *cfg, const char *name, const char *value) { static const char * const overscan_names[] = { "undef", "show", "crop", NULL }; @@ -341,6 +409,10 @@ static int config_parse(config *cfg, const char *name, const char *value) error = !parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split); else if OPT("tiles-height-split") error = !parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split); + else if OPT("wpp") + cfg->wpp = atobool(value); + else if OPT("slice-addresses") + error = !parse_slice_specification(value, &cfg->slice_count, &cfg->slice_addresses_in_ts); else return 0; #undef OPT @@ -389,6 +461,8 @@ int config_read(config *cfg,int argc, char *argv[]) { "seek", required_argument, NULL, 0 }, { "tiles-width-split", required_argument, NULL, 0 }, { "tiles-height-split", required_argument, NULL, 0 }, + { "wpp", no_argument, NULL, 0 }, + { "slice-addresses", required_argument, NULL, 0 }, {0, 0, 0, 0} }; diff --git a/src/config.h b/src/config.h index 8c424655..8886988d 100644 --- a/src/config.h +++ b/src/config.h @@ -69,6 +69,11 @@ typedef struct int32_t tiles_height_count; /*!< \brief number of tiles separation in y direction */ int32_t* tiles_width_split; /*!< \brief tiles split x coordinates (dimension: tiles_width_count) */ int32_t* tiles_height_split; /*!< \brief tiles split y coordinates (dimension: tiles_height_count) */ + + int wpp; + + int32_t slice_count; + int32_t* slice_addresses_in_ts; } config; /* Function definitions */ diff --git a/src/encmain.c b/src/encmain.c index c8035b43..e8e98b32 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -151,6 +151,16 @@ int main(int argc, char *argv[]) " Can also be u followed by and a single int n,\n" " in which case it produces rows of uniform height.\n" "\n" + " Wpp:\n" + " --wpp: Enable wavefront parallel processing\n" + "\n" + " Slices:\n" + " --slice-addresses |u: \n" + " Specifies a comma separated list of LCU\n" + " positions in tile scan order of tile separations.\n" + " Can also be u followed by and a single int n,\n" + " in which case it produces uniform slice length.\n" + "\n" " Deprecated parameters: (might be removed at some point)\n" " Use --input-res:\n" " -w, --width : Width of input in pixels\n" diff --git a/src/encoder.c b/src/encoder.c index 45b1bf4d..3e50bdfe 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -80,6 +80,26 @@ void encoder_state_init_lambda(encoder_state * const encoder_state) encoder_state->cur_lambda_cost = lambda; } +static int lcu_at_slice_start(encoder_control * const encoder, int lcu_addr_in_rs) { + int i; + assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_rs == 0) return 1; + for (i = 0; i < encoder->slice_count; ++i) { + if (encoder->slice_addresses_in_ts[i] == lcu_addr_in_rs) return 1; + } + return 0; +} + +static int lcu_at_slice_end(encoder_control * const encoder, int lcu_addr_in_rs) { + int i; + assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_rs == encoder->in.height_in_lcu * encoder->in.width_in_lcu - 1) return 1; + for (i = 0; i < encoder->slice_count; ++i) { + if (encoder->slice_addresses_in_ts[i] == lcu_addr_in_rs + 1) return 1; + } + return 0; +} + int encoder_control_init(encoder_control * const encoder, const config * const cfg) { if (!cfg) { fprintf(stderr, "Config object must not be null!\n"); @@ -253,6 +273,41 @@ int encoder_control_init(encoder_control * const encoder, const config * const c encoder->tiles_ctb_addr_ts_to_rs = tiles_ctb_addr_ts_to_rs; encoder->tiles_tile_id = tiles_tile_id; + + //Slices + { + int *slice_addresses_in_ts; + encoder->slice_count = encoder->cfg->slice_count; + if (encoder->slice_count == 0) { + encoder->slice_count = 1; + slice_addresses_in_ts = MALLOC(int, encoder->slice_count); + slice_addresses_in_ts[0] = 0; + } else { + int i; + slice_addresses_in_ts = MALLOC(int, encoder->slice_count); + if (!encoder->cfg->slice_addresses_in_ts) { + slice_addresses_in_ts[0] = 0; + for (i=1; i < encoder->slice_count; ++i) { + slice_addresses_in_ts[i] = encoder->in.width_in_lcu * encoder->in.height_in_lcu * i / encoder->slice_count; + } + } else { + for (i=0; i < encoder->slice_count; ++i) { + slice_addresses_in_ts[i] = encoder->cfg->slice_addresses_in_ts[i]; + } + } + } + + encoder->slice_addresses_in_ts = slice_addresses_in_ts; + } + + encoder->wpp = encoder->cfg->wpp; + + //FIXME: remove + if (encoder->slice_count) { + + lcu_at_slice_start(encoder, 0); + lcu_at_slice_end(encoder, 0); + } #ifdef _DEBUG printf("Tiles columns width:"); @@ -268,18 +323,35 @@ int encoder_control_init(encoder_control * const encoder, const config * const c //Print tile index map for (y = 0; y < encoder->in.height_in_lcu; ++y) { for (x = 0; x < encoder->in.width_in_lcu; ++x) { - printf("%2d ", encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[y * encoder->in.width_in_lcu + x]]); + const int lcu_id_rs = y * encoder->in.width_in_lcu + x; + const int lcu_id_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_id_rs]; + const char slice_start = lcu_at_slice_start(encoder, lcu_id_ts) ? '|' : ' '; + const char slice_end = lcu_at_slice_end(encoder, lcu_id_ts) ? '|' : ' '; + + printf("%c%03d%c", slice_start, encoder->tiles_tile_id[lcu_id_ts], slice_end); } printf("\n"); } + printf("\n"); + if (encoder->wpp) { + printf("Wavefront Parallel Processing: enabled\n"); + } else { + printf("Wavefront Parallel Processing: disabled\n"); + } + printf("\n"); #endif //_DEBUG + + } return 1; } int encoder_control_finalize(encoder_control * const encoder) { + //Slices + FREE_POINTER(encoder->slice_addresses_in_ts); + //Tiles FREE_POINTER(encoder->tiles_col_width); FREE_POINTER(encoder->tiles_row_height); diff --git a/src/encoder.h b/src/encoder.h index 43a6de50..a00b616a 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -120,6 +120,13 @@ typedef struct const int32_t *tiles_tile_id; /*! Date: Tue, 6 May 2014 07:29:16 +0200 Subject: [PATCH 05/21] lcu_at_tile_start and lcu_at_tile_end helper functions --- src/encoder.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/encoder.c b/src/encoder.c index 3e50bdfe..03e6ce60 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -100,6 +100,24 @@ static int lcu_at_slice_end(encoder_control * const encoder, int lcu_addr_in_rs) return 0; } +static int lcu_at_tile_start(encoder_control * const encoder, int lcu_addr_in_rs) { + assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_rs == 0) return 1; + if (encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs - 1]] != encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs]]) { + return 1; + } + return 0; +} + +static int lcu_at_tile_end(encoder_control * const encoder, int lcu_addr_in_rs) { + assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_rs == encoder->in.height_in_lcu * encoder->in.width_in_lcu - 1) return 1; + if (encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs + 1]] != encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs]]) { + return 1; + } + return 0; +} + int encoder_control_init(encoder_control * const encoder, const config * const cfg) { if (!cfg) { fprintf(stderr, "Config object must not be null!\n"); @@ -304,9 +322,10 @@ int encoder_control_init(encoder_control * const encoder, const config * const c //FIXME: remove if (encoder->slice_count) { - lcu_at_slice_start(encoder, 0); lcu_at_slice_end(encoder, 0); + lcu_at_tile_start(encoder, 0); + lcu_at_tile_end(encoder, 0); } #ifdef _DEBUG From a23edd0339ed1e4f775b3787d89feac13b7c4592 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Tue, 6 May 2014 08:06:10 +0200 Subject: [PATCH 06/21] added parent to encoder_state --- src/encoder.c | 4 +++- src/encoder.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/encoder.c b/src/encoder.c index 03e6ce60..8c695d96 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -387,13 +387,15 @@ int encoder_control_finalize(encoder_control * const encoder) { return 1; } -static int encoder_state_init_one(encoder_state * const state, const encoder_state * const parent_state, const int tile_x, const int tile_y) { +static int encoder_state_init_one(encoder_state * const state, encoder_state * const parent_state, const int tile_x, const int tile_y) { const encoder_control *encoder; int width_in_lcu; int height_in_lcu; int width; int height; + state->parent = parent_state; + if (!parent_state) { //Use encoder_control from current state (has to be initialized) encoder = state->encoder_control; diff --git a/src/encoder.h b/src/encoder.h index a00b616a..4956fe39 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -138,6 +138,7 @@ typedef struct encoder_state { //List of children, the last item of this list is a pseudo-encoder with encoder_control = NULL //Use do { } while (encoder_state->children[++i].encoder_control) struct encoder_state *children; + struct encoder_state *parent; //Tile: offset in LCU for current encoder_state int32_t lcu_offset_x; From 6c6adf18c7292a30055b962f06c64c74af6df733 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Tue, 6 May 2014 10:13:18 +0200 Subject: [PATCH 07/21] Refactor encoder_state --- src/encmain.c | 21 +- src/encoder.c | 607 ++++++++++++++++++++++++++++++------------------ src/encoder.h | 82 +++++-- src/filter.c | 12 +- src/inter.c | 34 +-- src/intra.c | 4 +- src/rdo.c | 32 +-- src/sao.c | 12 +- src/search.c | 50 ++-- src/transform.c | 6 +- 10 files changed, 535 insertions(+), 325 deletions(-) diff --git a/src/encmain.c b/src/encmain.c index e8e98b32..9170fc40 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -266,12 +266,13 @@ int main(int argc, char *argv[]) encoder.in.width, encoder.in.height, encoder.in.real_width, encoder.in.real_height); - if (!encoder_state_init(&encoder_state, &encoder)) { + encoder_state.encoder_control = &encoder; + if (!encoder_state_init(&encoder_state, NULL)) { goto exit_failure; } - encoder_state.frame = 0; - encoder_state.QP = (int8_t)encoder.cfg->qp; + encoder_state.global->frame = 0; + encoder_state.global->QP = (int8_t)encoder.cfg->qp; // Only the code that handles conformance window coding needs to know // the real dimensions. As a quick fix for broken non-multiple of 8 videos, @@ -282,14 +283,14 @@ int main(int argc, char *argv[]) //cfg->height = encoder.in.height; // Start coding cycle while data on input and not on the last frame - while(!cfg->frames || encoder_state.frame < cfg->frames) { + while(!cfg->frames || encoder_state.global->frame < cfg->frames) { int32_t diff; double temp_psnr[3]; // Skip '--seek' frames before input. // This block can be moved outside this while loop when there is a // mechanism to skip the while loop on error. - if (encoder_state.frame == 0 && cfg->seek > 0) { + if (encoder_state.global->frame == 0 && cfg->seek > 0) { int frame_bytes = cfg->width * cfg->height * 3 / 2; int error = 0; @@ -312,14 +313,14 @@ int main(int argc, char *argv[]) // Read one frame from the input if (!read_one_frame(input, &encoder_state)) { if (!feof(input)) - fprintf(stderr, "Failed to read a frame %d\n", encoder_state.frame); + fprintf(stderr, "Failed to read a frame %d\n", encoder_state.global->frame); break; } // The actual coding happens here, after this function we have a coded frame encode_one_frame(&encoder_state); - cur_pic = encoder_state.cur_pic; + cur_pic = encoder_state.tile->cur_pic; if (cfg->debug != NULL) { // Write reconstructed frame out. @@ -353,7 +354,7 @@ int main(int argc, char *argv[]) temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1); temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1); - fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, + fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.global->frame, "BPI"[cur_pic->slicetype%3], diff<<3, temp_psnr[0], temp_psnr[1], temp_psnr[2]); @@ -371,8 +372,8 @@ int main(int argc, char *argv[]) fgetpos(output,(fpos_t*)&curpos); // Print statistics of the coding - fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, (long long unsigned int) curpos<<3, - psnr[0] / encoder_state.frame, psnr[1] / encoder_state.frame, psnr[2] / encoder_state.frame); + fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.global->frame, (long long unsigned int) curpos<<3, + psnr[0] / encoder_state.global->frame, psnr[1] / encoder_state.global->frame, psnr[2] / encoder_state.global->frame); fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC); fclose(input); diff --git a/src/encoder.c b/src/encoder.c index 8c695d96..14b82907 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -58,8 +58,8 @@ static void encode_sao(encoder_state *encoder, */ void encoder_state_init_lambda(encoder_state * const encoder_state) { - const picture * const cur_pic = encoder_state->cur_pic; - double qp = encoder_state->QP; + const picture * const cur_pic = encoder_state->tile->cur_pic; + double qp = encoder_state->global->QP; double lambda_scale = 1.0; double qp_temp = qp - 12; double lambda; @@ -77,42 +77,42 @@ void encoder_state_init_lambda(encoder_state * const encoder_state) lambda *= 0.95; } - encoder_state->cur_lambda_cost = lambda; + encoder_state->global->cur_lambda_cost = lambda; } -static int lcu_at_slice_start(encoder_control * const encoder, int lcu_addr_in_rs) { +static int lcu_at_slice_start(const encoder_control * const encoder, int lcu_addr_in_ts) { int i; - assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); - if (lcu_addr_in_rs == 0) return 1; + assert(lcu_addr_in_ts >= 0 && lcu_addr_in_ts < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_ts == 0) return 1; for (i = 0; i < encoder->slice_count; ++i) { - if (encoder->slice_addresses_in_ts[i] == lcu_addr_in_rs) return 1; + if (encoder->slice_addresses_in_ts[i] == lcu_addr_in_ts) return 1; } return 0; } -static int lcu_at_slice_end(encoder_control * const encoder, int lcu_addr_in_rs) { +static int lcu_at_slice_end(const encoder_control * const encoder, int lcu_addr_in_ts) { int i; - assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); - if (lcu_addr_in_rs == encoder->in.height_in_lcu * encoder->in.width_in_lcu - 1) return 1; + assert(lcu_addr_in_ts >= 0 && lcu_addr_in_ts < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_ts == encoder->in.height_in_lcu * encoder->in.width_in_lcu - 1) return 1; for (i = 0; i < encoder->slice_count; ++i) { - if (encoder->slice_addresses_in_ts[i] == lcu_addr_in_rs + 1) return 1; + if (encoder->slice_addresses_in_ts[i] == lcu_addr_in_ts + 1) return 1; } return 0; } -static int lcu_at_tile_start(encoder_control * const encoder, int lcu_addr_in_rs) { - assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); - if (lcu_addr_in_rs == 0) return 1; - if (encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs - 1]] != encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs]]) { +static int lcu_at_tile_start(const encoder_control * const encoder, int lcu_addr_in_ts) { + assert(lcu_addr_in_ts >= 0 && lcu_addr_in_ts < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_ts == 0) return 1; + if (encoder->tiles_tile_id[lcu_addr_in_ts - 1] != encoder->tiles_tile_id[lcu_addr_in_ts]) { return 1; } return 0; } -static int lcu_at_tile_end(encoder_control * const encoder, int lcu_addr_in_rs) { - assert(lcu_addr_in_rs >= 0 && lcu_addr_in_rs < encoder->in.height_in_lcu * encoder->in.width_in_lcu); - if (lcu_addr_in_rs == encoder->in.height_in_lcu * encoder->in.width_in_lcu - 1) return 1; - if (encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs + 1]] != encoder->tiles_tile_id[encoder->tiles_ctb_addr_rs_to_ts[lcu_addr_in_rs]]) { +static int lcu_at_tile_end(const encoder_control * const encoder, int lcu_addr_in_ts) { + assert(lcu_addr_in_ts >= 0 && lcu_addr_in_ts < encoder->in.height_in_lcu * encoder->in.width_in_lcu); + if (lcu_addr_in_ts == encoder->in.height_in_lcu * encoder->in.width_in_lcu - 1) return 1; + if (encoder->tiles_tile_id[lcu_addr_in_ts + 1] != encoder->tiles_tile_id[lcu_addr_in_ts]) { return 1; } return 0; @@ -387,101 +387,279 @@ int encoder_control_finalize(encoder_control * const encoder) { return 1; } -static int encoder_state_init_one(encoder_state * const state, encoder_state * const parent_state, const int tile_x, const int tile_y) { - const encoder_control *encoder; - int width_in_lcu; - int height_in_lcu; - int width; - int height; - - state->parent = parent_state; - - if (!parent_state) { - //Use encoder_control from current state (has to be initialized) - encoder = state->encoder_control; - assert(encoder); - - width_in_lcu = encoder->in.width_in_lcu; - height_in_lcu = encoder->in.height_in_lcu; - width = encoder->in.width; - height = encoder->in.height; - - state->lcu_offset_x = 0; - state->lcu_offset_y = 0; - } else { - //Use parent encoder_control - encoder = parent_state->encoder_control; - assert(encoder); - state->encoder_control = parent_state->encoder_control; - - state->lcu_offset_x = encoder->tiles_col_bd[tile_x]; - state->lcu_offset_y = encoder->tiles_row_bd[tile_y]; - - width_in_lcu = encoder->tiles_col_bd[tile_x+1]-encoder->tiles_col_bd[tile_x]; - height_in_lcu = encoder->tiles_row_bd[tile_y+1]-encoder->tiles_row_bd[tile_y]; - width = MIN(width_in_lcu * LCU_WIDTH, encoder->in.width - state->lcu_offset_x * LCU_WIDTH); - height = MIN(height_in_lcu * LCU_WIDTH, encoder->in.height - state->lcu_offset_y * LCU_WIDTH); +static int encoder_state_config_global_init(encoder_state * const encoder_state) { + encoder_state->global->ref = picture_list_init(MAX_REF_PIC_COUNT); + if(!encoder_state->global->ref) { + fprintf(stderr, "Failed to allocate the picture list!\n"); + return 0; } - - //Ok we have all the variables initialized, do the real work now - - if (parent_state) { - if (!bitstream_init(&state->stream, BITSTREAM_TYPE_MEMORY)) { - fprintf(stderr, "Could not initialize stream (subencoder)!\n"); - return 0; - } - - //FIXME: at some point, we may want to have a ref list for each subencoder (would allow overlapping between frames) - state->ref = parent_state->ref; - state->ref_list = parent_state->ref_list; - } else { - // Allocate the bitstream struct - if (!bitstream_init(&state->stream, BITSTREAM_TYPE_FILE)) { - fprintf(stderr, "Could not initialize stream!\n"); - return 0; - } - - state->ref = picture_list_init(MAX_REF_PIC_COUNT); - if(!state->ref) { - fprintf(stderr, "Failed to allocate the picture list!\n"); - return 0; - } - state->ref_list = REF_PIC_LIST_0; - } - - state->frame = 0; - state->poc = 0; - - state->cur_pic = picture_alloc(width, height, width_in_lcu, height_in_lcu); + encoder_state->global->ref_list = REF_PIC_LIST_0; + encoder_state->global->frame = 0; + encoder_state->global->poc = 0; + return 1; +} - if (!state->cur_pic) { +static void encoder_state_config_global_finalize(encoder_state * const encoder_state) { + picture_list_destroy(encoder_state->global->ref); +} + + + +static int encoder_state_config_tile_init(encoder_state * const encoder_state, + const int lcu_offset_x, const int lcu_offset_y, + const int width, const int height, const int width_in_lcu, const int height_in_lcu) { + + const encoder_control * const encoder = encoder_state->encoder_control; + encoder_state->tile->cur_pic = picture_alloc(width, height, width_in_lcu, height_in_lcu); + + if (!encoder_state->tile->cur_pic) { printf("Error allocating picture!\r\n"); return 0; } // Init coeff data table - state->cur_pic->coeff_y = MALLOC(coefficient, width * height); - state->cur_pic->coeff_u = MALLOC(coefficient, (width * height) >> 2); - state->cur_pic->coeff_v = MALLOC(coefficient, (width * height) >> 2); - - state->children = NULL; + //FIXME: move them + encoder_state->tile->cur_pic->coeff_y = MALLOC(coefficient, width * height); + encoder_state->tile->cur_pic->coeff_u = MALLOC(coefficient, (width * height) >> 2); + encoder_state->tile->cur_pic->coeff_v = MALLOC(coefficient, (width * height) >> 2); - // Set CABAC output bitstream - state->cabac.stream = &state->stream; + encoder_state->tile->lcu_offset_x = lcu_offset_x; + encoder_state->tile->lcu_offset_y = lcu_offset_y; + encoder_state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu]; return 1; } -int encoder_state_init(encoder_state * const encoder_state, const encoder_control * const encoder) { - encoder_state->encoder_control = encoder; - if (!encoder_state_init_one(encoder_state, NULL, 0, 0)) { - fprintf(stderr, "Could not initialize main encoder state!\n"); - return 0; +static void encoder_state_config_tile_finalize(encoder_state * const encoder_state) { + picture_free(encoder_state->tile->cur_pic); + encoder_state->tile->cur_pic = NULL; +} + +static int encoder_state_config_slice_init(encoder_state * const encoder_state, + const int start_address_in_ts, const int end_address_in_ts) { + //Has to be called AFTER initializing encoder_state->tile + encoder_state->slice->start_in_ts = start_address_in_ts - encoder_state->tile->lcu_offset_in_ts; + encoder_state->slice->end_in_ts = end_address_in_ts - encoder_state->tile->lcu_offset_in_ts; + + encoder_state->slice->start_in_rs = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[start_address_in_ts]; + encoder_state->slice->end_in_ts = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[end_address_in_ts]; + return 1; +} + +static void encoder_state_config_slice_finalize(encoder_state * const encoder_state) { + //Nothing to do (yet?) +} + +static int encoder_state_config_wfrow_init(encoder_state * const encoder_state, + const int lcu_offset_y) { + + encoder_state->wfrow->lcu_offset_y = lcu_offset_y; + return 1; +} + +static void encoder_state_config_wfrow_finalize(encoder_state * const encoder_state) { + //Nothing to do (yet?) +} + + +int encoder_state_init(encoder_state * const child_state, encoder_state * const parent_state) { + //We require that, if parent_state is NULL: + //child_state->encoder_control is set + // + //If parent_state is not NULL, the following variable should either be set to NULL, + //in order to inherit from parent, or should point to a valid structure: + //child_state->global + //child_state->tile + //child_state->slice + //child_state->wfrow + + child_state->parent = parent_state; + child_state->children = MALLOC(encoder_state, 1); + child_state->children[0].encoder_control = NULL; + + if (!parent_state) { + const encoder_control * const encoder = child_state->encoder_control; + child_state->type = ENCODER_STATE_TYPE_MAIN; + assert(child_state->encoder_control); + child_state->global = MALLOC(encoder_state_config_global, 1); + if (!child_state->global || !encoder_state_config_global_init(child_state)) { + fprintf(stderr, "Could not initialize encoder_state->global!\n"); + return 0; + } + child_state->tile = MALLOC(encoder_state_config_tile, 1); + if (!child_state->tile || !encoder_state_config_tile_init(child_state, 0, 0, encoder->in.width, encoder->in.height, encoder->in.width_in_lcu, encoder->in.height_in_lcu)) { + fprintf(stderr, "Could not initialize encoder_state->tile!\n"); + return 0; + } + child_state->slice = MALLOC(encoder_state_config_slice, 1); + if (!child_state->slice || !encoder_state_config_slice_init(child_state, 0, encoder->in.width_in_lcu * encoder->in.height_in_lcu - 1)) { + fprintf(stderr, "Could not initialize encoder_state->slice!\n"); + return 0; + } + child_state->wfrow = MALLOC(encoder_state_config_wfrow, 1); + if (!child_state->wfrow || !encoder_state_config_wfrow_init(child_state, 0)) { + fprintf(stderr, "Could not initialize encoder_state->wfrow!\n"); + return 0; + } + } else { + child_state->encoder_control = parent_state->encoder_control; + if (!child_state->global) child_state->global = parent_state->global; + if (!child_state->tile) child_state->tile = parent_state->tile; + if (!child_state->slice) child_state->slice = parent_state->slice; + if (!child_state->wfrow) child_state->wfrow = parent_state->wfrow; } - encoder_state->stream.file.output = encoder->out.file; + //Allocate bitstream + if (child_state->type == ENCODER_STATE_TYPE_MAIN) { + //Main encoder outputs to file + if (!bitstream_init(&child_state->stream, BITSTREAM_TYPE_FILE)) { + fprintf(stderr, "Could not initialize stream!\n"); + return 0; + } + child_state->stream.file.output = child_state->encoder_control->out.file; + } else { + //Other encoders use a memory bitstream + if (!bitstream_init(&child_state->stream, BITSTREAM_TYPE_MEMORY)) { + fprintf(stderr, "Could not initialize stream!\n"); + return 0; + } + } - if (encoder->tiles_enable) { + // Set CABAC output bitstream + child_state->cabac.stream = &child_state->stream; + + //Create sub-encoders + { + const encoder_control * const encoder = child_state->encoder_control; + int child_count = 0; + //We first check the type of this element. + //If it's a MAIN, it can allow both slices or tiles as child + //If it's a TILE, it can allow slices as child, if its parent is not a slice, or wavefront rows if there is no other children + //If it's a SLICE, it can allow tiles as child, if its parent is not a tile, or wavefront rows if there is no other children + //If it's a WAVEFRONT_ROW, it doesn't allow any children + int children_allow_wavefront_row = 0; + int children_allow_slice = 0; + int children_allow_tile = 0; + + int start_in_ts, end_in_ts; + + switch(child_state->type) { + case ENCODER_STATE_TYPE_MAIN: + children_allow_slice = 1; + children_allow_tile = 1; + break; + case ENCODER_STATE_TYPE_SLICE: + assert(child_state->parent); + if (child_state->parent->type != ENCODER_STATE_TYPE_TILE) children_allow_tile = 1; + children_allow_wavefront_row = encoder->wpp; + break; + case ENCODER_STATE_TYPE_TILE: + assert(child_state->parent); + if (child_state->parent->type != ENCODER_STATE_TYPE_SLICE) children_allow_slice = 1; + children_allow_wavefront_row = encoder->wpp; + break; + case ENCODER_STATE_TYPE_WAVEFRONT_ROW: + break; + default: + fprintf(stderr, "Invalid encoder_state->type %d!\n", child_state->type); + assert(0); + } + + //Full span to analyze + start_in_ts = child_state->tile->lcu_offset_in_ts + child_state->slice->start_in_ts; + end_in_ts = MIN(child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu, child_state->tile->lcu_offset_in_ts + child_state->slice->end_in_ts); + while (start_in_ts < end_in_ts) { + encoder_state *new_child = NULL; + int range_start = start_in_ts; + int range_end_slice = start_in_ts; //Will be incremented to get the range of the "thing" + int range_end_tile = start_in_ts; //Will be incremented to get the range of the "thing" + + int tile_allowed = lcu_at_tile_start(encoder, range_start) && children_allow_tile; + int slice_allowed = lcu_at_slice_start(encoder, range_start) && children_allow_slice; + + //Find the smallest structure following the cursor + if (slice_allowed) { + while(!lcu_at_slice_end(encoder, range_end_slice)) { + ++range_end_slice; + } + } + + if (tile_allowed) { + while(!lcu_at_tile_end(encoder, range_end_tile)) { + ++range_end_tile; + } + } + + //printf("range_start=%d, range_end_slice=%d, range_end_tile=%d, tile_allowed=%d, slice_allowed=%d\n",range_start,range_end_slice,range_end_tile,tile_allowed,slice_allowed); + + if ((!tile_allowed || (range_end_slice >= range_end_tile)) && !new_child && slice_allowed) { + //Create a slice + + printf("%p slice: %d - %d\n", child_state, range_start, range_end_slice); + new_child = &child_state->children[child_count]; + new_child->encoder_control = encoder; + new_child->type = ENCODER_STATE_TYPE_SLICE; + new_child->global = child_state->global; + new_child->tile = child_state->tile; + new_child->wfrow = child_state->wfrow; + new_child->slice = MALLOC(encoder_state_config_slice, 1); + if (!new_child->slice || !encoder_state_config_slice_init(new_child, range_start, range_end_slice)) { + fprintf(stderr, "Could not initialize encoder_state->slice!\n"); + return 0; + } + } + + if ((!slice_allowed || (range_end_slice < range_end_tile)) && !new_child && tile_allowed) { + //Create a tile + int tile_id = encoder->tiles_tile_id[range_start]; + int tile_x = tile_id % encoder->tiles_num_tile_columns; + int tile_y = tile_id / encoder->tiles_num_tile_columns; + + int lcu_offset_x = encoder->tiles_col_bd[tile_x]; + int lcu_offset_y = encoder->tiles_row_bd[tile_y]; + int width_in_lcu = encoder->tiles_col_bd[tile_x+1]-encoder->tiles_col_bd[tile_x]; + int height_in_lcu = encoder->tiles_row_bd[tile_y+1]-encoder->tiles_row_bd[tile_y]; + int width = MIN(width_in_lcu * LCU_WIDTH, encoder->in.width - lcu_offset_x * LCU_WIDTH); + int height = MIN(height_in_lcu * LCU_WIDTH, encoder->in.height - lcu_offset_y * LCU_WIDTH); + + printf("%p tile: %d - %d (%d)\n", child_state, range_start, range_end_tile, tile_id); + new_child = &child_state->children[child_count]; + new_child->encoder_control = encoder; + new_child->type = ENCODER_STATE_TYPE_TILE; + new_child->global = child_state->global; + new_child->tile = MALLOC(encoder_state_config_tile, 1); + new_child->slice = child_state->slice; + new_child->wfrow = child_state->wfrow; + + if (!new_child->tile || !encoder_state_config_tile_init(new_child, lcu_offset_x, lcu_offset_y, width, height, width_in_lcu, height_in_lcu)) { + fprintf(stderr, "Could not initialize encoder_state->tile!\n"); + return 0; + } + } + + if (new_child) { + child_state->children = realloc(child_state->children, sizeof(encoder_state) * (2+child_count)); + child_state->children[1+child_count].encoder_control = NULL; + if (!child_state->children) { + fprintf(stderr, "Failed to allocate memory for children...\n"); + return 0; + } + if (!encoder_state_init(&child_state->children[child_count], child_state)) { + fprintf(stderr, "Unable to init child...\n"); + return 0; + } + child_count += 1; + } + + start_in_ts = MAX(range_end_slice, range_end_tile) + 1; + } + + if (children_allow_wavefront_row) { + printf("Wavefront\n"); + } + } + +/* if (encoder->tiles_enable) { int x,y; //Allocate subencoders (valid subencoder have a non null encoder_control field, so we use a null one to mark the end of the list) encoder_state->children = MALLOC(struct encoder_state, encoder->tiles_num_tile_columns * encoder->tiles_num_tile_rows + 1); @@ -496,49 +674,50 @@ int encoder_state_init(encoder_state * const encoder_state, const encoder_contro return 0; } } - } - } - + */ return 1; } -static int encoder_state_finalize_one(encoder_state * const encoder_state) { - picture_free(encoder_state->cur_pic); - encoder_state->cur_pic = NULL; - - bitstream_finalize(&encoder_state->stream); - return 1; -} - - -int encoder_state_finalize(encoder_state * const encoder_state) { +void encoder_state_finalize(encoder_state * const encoder_state) { if (encoder_state->children) { int i=0; do { - encoder_state_finalize_one(&encoder_state->children[i]); + encoder_state_finalize(&encoder_state->children[i]); } while (encoder_state->children[++i].encoder_control); FREE_POINTER(encoder_state->children); } - encoder_state_finalize_one(encoder_state); - picture_list_destroy(encoder_state->ref); - return 1; -} - -static void encoder_clear_refs(encoder_state *encoder_state) { - if (encoder_state->children) { - int i=0; - do { - encoder_state->children[i].poc = 0; - } while (encoder_state->children[++i].encoder_control); + if (!encoder_state->parent || (encoder_state->parent->wfrow != encoder_state->wfrow)) { + encoder_state_config_wfrow_finalize(encoder_state); + FREE_POINTER(encoder_state->wfrow); } - while (encoder_state->ref->used_size) { - picture_list_rem(encoder_state->ref, encoder_state->ref->used_size - 1); + if (!encoder_state->parent || (encoder_state->parent->slice != encoder_state->slice)) { + encoder_state_config_slice_finalize(encoder_state); + FREE_POINTER(encoder_state->slice); + } + + if (!encoder_state->parent || (encoder_state->parent->tile != encoder_state->tile)) { + encoder_state_config_tile_finalize(encoder_state); + FREE_POINTER(encoder_state->tile); + } + + if (!encoder_state->parent || (encoder_state->parent->global != encoder_state->global)) { + encoder_state_config_global_finalize(encoder_state); + FREE_POINTER(encoder_state->global); + } + + bitstream_finalize(&encoder_state->stream); +} + + +static void encoder_clear_refs(encoder_state *encoder_state) { + while (encoder_state->global->ref->used_size) { + picture_list_rem(encoder_state->global->ref, encoder_state->global->ref->used_size - 1); } - encoder_state->poc = 0; + encoder_state->global->poc = 0; } void encoder_control_input_init(encoder_control * const encoder, @@ -594,7 +773,7 @@ static void write_aud(encoder_state * const encoder_state) static void substream_write_bitstream(encoder_state * const encoder_state, const int end_of_sub_stream) { const encoder_control * const encoder = encoder_state->encoder_control; - const picture* const cur_pic = encoder_state->cur_pic; + const picture* const cur_pic = encoder_state->tile->cur_pic; const int lcu_count = cur_pic->width_in_lcu * cur_pic->height_in_lcu; int lcu_id; vector2d lcu; @@ -627,19 +806,19 @@ static void substream_encode(encoder_state * const encoder_state) { const unsigned long long int debug_bitstream_position = bitstream_tell(&(encoder_state->stream)); #endif - yuv_t *hor_buf = yuv_t_alloc(encoder_state->cur_pic->width); + yuv_t *hor_buf = yuv_t_alloc(encoder_state->tile->cur_pic->width); // Allocate 2 extra luma pixels so we get 1 extra chroma pixel for the // for the extra pixel on the top right. yuv_t *ver_buf = yuv_t_alloc(LCU_WIDTH + 2); cabac_start(&encoder_state->cabac); - init_contexts(encoder_state, encoder_state->QP, encoder_state->cur_pic->slicetype); + init_contexts(encoder_state, encoder_state->global->QP, encoder_state->tile->cur_pic->slicetype); // Initialize lambda value(s) to use in search encoder_state_init_lambda(encoder_state); { - picture* const cur_pic = encoder_state->cur_pic; + picture* const cur_pic = encoder_state->tile->cur_pic; int lcu_id; int lcu_count = cur_pic->width_in_lcu * cur_pic->height_in_lcu; @@ -736,14 +915,14 @@ static void substream_encode(encoder_state * const encoder_state) { } static void subencoder_blit_pixels(const encoder_state * const target_enc, pixel * const target, const encoder_state * const source_enc, const pixel * const source, const int is_y_channel) { - const int source_offset_x = source_enc->lcu_offset_x * LCU_WIDTH; - const int source_offset_y = source_enc->lcu_offset_y * LCU_WIDTH; + const int source_offset_x = source_enc->tile->lcu_offset_x * LCU_WIDTH; + const int source_offset_y = source_enc->tile->lcu_offset_y * LCU_WIDTH; - const int target_offset_x = target_enc->lcu_offset_x * LCU_WIDTH; - const int target_offset_y = target_enc->lcu_offset_y * LCU_WIDTH; + const int target_offset_x = target_enc->tile->lcu_offset_x * LCU_WIDTH; + const int target_offset_y = target_enc->tile->lcu_offset_y * LCU_WIDTH; - int source_stride = source_enc->cur_pic->width; - int target_stride = target_enc->cur_pic->width; + int source_stride = source_enc->tile->cur_pic->width; + int target_stride = target_enc->tile->cur_pic->width; int width; int height; @@ -755,21 +934,21 @@ static void subencoder_blit_pixels(const encoder_state * const target_enc, pixel assert(target_enc->children || source_enc->children); if (is_y_channel) { - target_offset = source_offset_x + source_offset_y * target_enc->cur_pic->width; - source_offset = target_offset_x + target_offset_y * source_enc->cur_pic->width; + target_offset = source_offset_x + source_offset_y * target_enc->tile->cur_pic->width; + source_offset = target_offset_x + target_offset_y * source_enc->tile->cur_pic->width; } else { - target_offset = source_offset_x/2 + source_offset_y/2 * target_enc->cur_pic->width/2; - source_offset = target_offset_x/2 + target_offset_y/2 * source_enc->cur_pic->width/2; + target_offset = source_offset_x/2 + source_offset_y/2 * target_enc->tile->cur_pic->width/2; + source_offset = target_offset_x/2 + target_offset_y/2 * source_enc->tile->cur_pic->width/2; } if (target_enc->children) { //Use information from the source - width = MIN(source_enc->cur_pic->width_in_lcu * LCU_WIDTH, target_enc->cur_pic->width - source_offset_x); - height = MIN(source_enc->cur_pic->height_in_lcu * LCU_WIDTH, target_enc->cur_pic->height - source_offset_y); + width = MIN(source_enc->tile->cur_pic->width_in_lcu * LCU_WIDTH, target_enc->tile->cur_pic->width - source_offset_x); + height = MIN(source_enc->tile->cur_pic->height_in_lcu * LCU_WIDTH, target_enc->tile->cur_pic->height - source_offset_y); } else { //Use information from the target - width = MIN(target_enc->cur_pic->width_in_lcu * LCU_WIDTH, source_enc->cur_pic->width - target_offset_x); - height = MIN(target_enc->cur_pic->height_in_lcu * LCU_WIDTH, source_enc->cur_pic->height - target_offset_y); + width = MIN(target_enc->tile->cur_pic->width_in_lcu * LCU_WIDTH, source_enc->tile->cur_pic->width - target_offset_x); + height = MIN(target_enc->tile->cur_pic->height_in_lcu * LCU_WIDTH, source_enc->tile->cur_pic->height - target_offset_y); } if (!is_y_channel) { @@ -789,9 +968,9 @@ void encode_one_frame(encoder_state * const main_state) const encoder_control * const encoder = main_state->encoder_control; bitstream * const stream = &main_state->stream; - const int is_first_frame = (main_state->frame == 0); - const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->frame % 2 == 0); - const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->frame % encoder->cfg->intra_period) == 0); + const int is_first_frame = (main_state->global->frame == 0); + const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->global->frame % 2 == 0); + const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->global->frame % encoder->cfg->intra_period) == 0); const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; @@ -803,8 +982,8 @@ void encode_one_frame(encoder_state * const main_state) // Clear the reference list encoder_clear_refs(main_state); - main_state->cur_pic->slicetype = SLICE_I; - main_state->cur_pic->type = NAL_IDR_W_RADL; + main_state->tile->cur_pic->slicetype = SLICE_I; + main_state->tile->cur_pic->type = NAL_IDR_W_RADL; // Access Unit Delimiter (AUD) if (encoder->aud_enable) @@ -825,7 +1004,7 @@ void encode_one_frame(encoder_state * const main_state) encode_pic_parameter_set(main_state); bitstream_align(stream); - if (main_state->frame == 0) { + if (main_state->global->frame == 0) { // Prefix SEI nal_write(stream, PREFIX_SEI_NUT, 0, 0); encode_prefix_sei_version(main_state); @@ -833,8 +1012,8 @@ void encode_one_frame(encoder_state * const main_state) } } else { // When intra period == 1, all pictures are intra - main_state->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; - main_state->cur_pic->type = NAL_TRAIL_R; + main_state->tile->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; + main_state->tile->cur_pic->type = NAL_TRAIL_R; // Access Unit Delimiter (AUD) if (encoder->aud_enable) @@ -860,24 +1039,21 @@ void encode_one_frame(encoder_state * const main_state) #pragma omp parallel for for (i = 0; i < encoder->tiles_num_tile_rows * encoder->tiles_num_tile_columns; ++i) { encoder_state *subencoder = &(main_state->children[i]); - - //TODO: ref frames - subencoder->QP = main_state->QP; + subencoder_blit_pixels(subencoder, subencoder->tile->cur_pic->y_data, main_state, main_state->tile->cur_pic->y_data, 1); + subencoder_blit_pixels(subencoder, subencoder->tile->cur_pic->u_data, main_state, main_state->tile->cur_pic->u_data, 0); + subencoder_blit_pixels(subencoder, subencoder->tile->cur_pic->v_data, main_state, main_state->tile->cur_pic->v_data, 0); - subencoder_blit_pixels(subencoder, subencoder->cur_pic->y_data, main_state, main_state->cur_pic->y_data, 1); - subencoder_blit_pixels(subencoder, subencoder->cur_pic->u_data, main_state, main_state->cur_pic->u_data, 0); - subencoder_blit_pixels(subencoder, subencoder->cur_pic->v_data, main_state, main_state->cur_pic->v_data, 0); - - subencoder->cur_pic->slicetype = main_state->cur_pic->slicetype; - subencoder->cur_pic->type = main_state->cur_pic->type; + //FIXME: remove this once these are in slice + subencoder->tile->cur_pic->slicetype = main_state->tile->cur_pic->slicetype; + subencoder->tile->cur_pic->type = main_state->tile->cur_pic->type; substream_encode(subencoder); substream_write_bitstream(subencoder, (main_state->children[i+1].encoder_control) != NULL); - subencoder_blit_pixels(main_state, main_state->cur_pic->y_recdata, subencoder, subencoder->cur_pic->y_recdata, 1); - subencoder_blit_pixels(main_state, main_state->cur_pic->u_recdata, subencoder, subencoder->cur_pic->u_recdata, 0); - subencoder_blit_pixels(main_state, main_state->cur_pic->v_recdata, subencoder, subencoder->cur_pic->v_recdata, 0); + subencoder_blit_pixels(main_state, main_state->tile->cur_pic->y_recdata, subencoder, subencoder->tile->cur_pic->y_recdata, 1); + subencoder_blit_pixels(main_state, main_state->tile->cur_pic->u_recdata, subencoder, subencoder->tile->cur_pic->u_recdata, 0); + subencoder_blit_pixels(main_state, main_state->tile->cur_pic->v_recdata, subencoder, subencoder->tile->cur_pic->v_recdata, 0); } //We should do the slice header here, because we can have the entry points @@ -899,7 +1075,8 @@ void encode_one_frame(encoder_state * const main_state) // Calculate checksum add_checksum(main_state); - main_state->cur_pic->poc = main_state->poc; + //FIXME: Why is this needed? + main_state->tile->cur_pic->poc = main_state->global->poc; } static void fill_after_frame(unsigned height, unsigned array_width, @@ -945,38 +1122,38 @@ int read_one_frame(FILE* file, const encoder_state * const encoder_state) { unsigned width = encoder_state->encoder_control->in.real_width; unsigned height = encoder_state->encoder_control->in.real_height; - unsigned array_width = encoder_state->cur_pic->width; - unsigned array_height = encoder_state->cur_pic->height; + unsigned array_width = encoder_state->tile->cur_pic->width; + unsigned array_height = encoder_state->tile->cur_pic->height; if (width != array_width) { // In the case of frames not being aligned on 8 bit borders, bits need to be copied to fill them in. if (!read_and_fill_frame_data(file, width, height, array_width, - encoder_state->cur_pic->y_data) || + encoder_state->tile->cur_pic->y_data) || !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1, - encoder_state->cur_pic->u_data) || + encoder_state->tile->cur_pic->u_data) || !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1, - encoder_state->cur_pic->v_data)) + encoder_state->tile->cur_pic->v_data)) return 0; } else { // Otherwise the data can be read directly to the array. unsigned y_size = width * height; unsigned uv_size = (width >> 1) * (height >> 1); - if (y_size != fread(encoder_state->cur_pic->y_data, sizeof(unsigned char), + if (y_size != fread(encoder_state->tile->cur_pic->y_data, sizeof(unsigned char), y_size, file) || - uv_size != fread(encoder_state->cur_pic->u_data, sizeof(unsigned char), + uv_size != fread(encoder_state->tile->cur_pic->u_data, sizeof(unsigned char), uv_size, file) || - uv_size != fread(encoder_state->cur_pic->v_data, sizeof(unsigned char), + uv_size != fread(encoder_state->tile->cur_pic->v_data, sizeof(unsigned char), uv_size, file)) return 0; } if (height != array_height) { fill_after_frame(height, array_width, array_height, - encoder_state->cur_pic->y_data); + encoder_state->tile->cur_pic->y_data); fill_after_frame(height >> 1, array_width >> 1, array_height >> 1, - encoder_state->cur_pic->u_data); + encoder_state->tile->cur_pic->u_data); fill_after_frame(height >> 1, array_width >> 1, array_height >> 1, - encoder_state->cur_pic->v_data); + encoder_state->tile->cur_pic->v_data); } return 1; } @@ -989,7 +1166,7 @@ int read_one_frame(FILE* file, const encoder_state * const encoder_state) static void add_checksum(encoder_state * const encoder_state) { bitstream * const stream = &encoder_state->stream; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; unsigned char checksum[3][SEI_HASH_MAX_LENGTH]; uint32_t checksum_val; unsigned int i; @@ -1016,7 +1193,7 @@ static void add_checksum(encoder_state * const encoder_state) void encode_access_unit_delimiter(encoder_state * const encoder_state) { bitstream * const stream = &encoder_state->stream; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; uint8_t pic_type = cur_pic->slicetype == SLICE_I ? 0 : cur_pic->slicetype == SLICE_P ? 1 : 2; @@ -1087,7 +1264,7 @@ void encode_pic_parameter_set(encoder_state * const encoder_state) WRITE_UE(stream, 0, "num_ref_idx_l0_default_active_minus1"); WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1"); - WRITE_SE(stream, ((int8_t)encoder_state->QP)-26, "pic_init_qp_minus26"); + WRITE_SE(stream, ((int8_t)encoder_state->global->QP)-26, "pic_init_qp_minus26"); WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); WRITE_U(stream, encoder_state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag"); WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); @@ -1250,7 +1427,8 @@ static void encode_scaling_list(encoder_state * const encoder_state) void encode_seq_parameter_set(encoder_state * const encoder_state) { bitstream * const stream = &encoder_state->stream; - const picture * const cur_pic = encoder_state->cur_pic; + //FIXME: use encoder_control instead of cur_pic + const picture * const cur_pic = encoder_state->tile->cur_pic; #ifdef _DEBUG printf("=========== Sequence Parameter Set ID: 0 ===========\n"); @@ -1493,43 +1671,33 @@ void encoder_next_frame(encoder_state *encoder_state) { picture *old_pic; // Remove the ref pic (if present) - if (encoder_state->ref->used_size == (uint32_t)encoder->cfg->ref_frames) { - picture_list_rem(encoder_state->ref, encoder_state->ref->used_size-1); + if (encoder_state->global->ref->used_size == (uint32_t)encoder->cfg->ref_frames) { + picture_list_rem(encoder_state->global->ref, encoder_state->global->ref->used_size-1); } // Add current picture as reference - picture_list_add(encoder_state->ref, encoder_state->cur_pic); + picture_list_add(encoder_state->global->ref, encoder_state->tile->cur_pic); // Allocate new memory to current picture - old_pic = encoder_state->cur_pic; + old_pic = encoder_state->tile->cur_pic; // TODO: reuse memory from old reference - encoder_state->cur_pic = picture_alloc(encoder_state->cur_pic->width, encoder_state->cur_pic->height, encoder_state->cur_pic->width_in_lcu, encoder_state->cur_pic->height_in_lcu); + encoder_state->tile->cur_pic = picture_alloc(encoder_state->tile->cur_pic->width, encoder_state->tile->cur_pic->height, encoder_state->tile->cur_pic->width_in_lcu, encoder_state->tile->cur_pic->height_in_lcu); + //FIXME: does the coeff_* really belongs to cur_pic? // Copy pointer from the last cur_pic because we don't want to reallocate it - MOVE_POINTER(encoder_state->cur_pic->coeff_y,old_pic->coeff_y); - MOVE_POINTER(encoder_state->cur_pic->coeff_u,old_pic->coeff_u); - MOVE_POINTER(encoder_state->cur_pic->coeff_v,old_pic->coeff_v); + MOVE_POINTER(encoder_state->tile->cur_pic->coeff_y,old_pic->coeff_y); + MOVE_POINTER(encoder_state->tile->cur_pic->coeff_u,old_pic->coeff_u); + MOVE_POINTER(encoder_state->tile->cur_pic->coeff_v,old_pic->coeff_v); picture_free(old_pic); - encoder_state->frame++; - encoder_state->poc++; - - if (encoder_state->children) { - int x,y; - for (y=0; y < encoder->tiles_num_tile_rows; ++y) { - for (x=0; x < encoder->tiles_num_tile_columns; ++x) { - const int i = y * encoder->tiles_num_tile_columns + x; - encoder_state->children[i].frame++; - encoder_state->children[i].poc++; - } - } - } + encoder_state->global->frame++; + encoder_state->global->poc++; } void encode_slice_header(encoder_state * const encoder_state) { const encoder_control * const encoder = encoder_state->encoder_control; bitstream * const stream = &encoder_state->stream; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; #ifdef _DEBUG printf("=========== Slice ===========\n"); @@ -1557,9 +1725,9 @@ void encode_slice_header(encoder_state * const encoder_state) if (cur_pic->type != NAL_IDR_W_RADL && cur_pic->type != NAL_IDR_N_LP) { int j; - int ref_negative = encoder_state->ref->used_size; + int ref_negative = encoder_state->global->ref->used_size; int ref_positive = 0; - WRITE_U(stream, encoder_state->poc&0xf, 4, "pic_order_cnt_lsb"); + WRITE_U(stream, encoder_state->global->poc&0xf, 4, "pic_order_cnt_lsb"); WRITE_U(stream, 0, 1, "short_term_ref_pic_set_sps_flag"); WRITE_UE(stream, ref_negative, "num_negative_pics"); WRITE_UE(stream, ref_positive, "num_positive_pics"); @@ -1582,7 +1750,7 @@ void encode_slice_header(encoder_state * const encoder_state) if (cur_pic->slicetype != SLICE_I) { WRITE_U(stream, 1, 1, "num_ref_idx_active_override_flag"); - WRITE_UE(stream, encoder_state->ref->used_size-1, "num_ref_idx_l0_active_minus1"); + WRITE_UE(stream, encoder_state->global->ref->used_size-1, "num_ref_idx_l0_active_minus1"); WRITE_UE(stream, 5-MRG_MAX_NUM_CANDS, "five_minus_max_num_merge_cand"); } @@ -1606,7 +1774,7 @@ static void encode_sao_color(encoder_state * const encoder_state, sao_info *sao, color_index color_i) { cabac_data * const cabac = &encoder_state->cabac; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; sao_eo_cat i; // Skip colors with no SAO. @@ -1690,14 +1858,14 @@ void encode_coding_tree(encoder_state * const encoder_state, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth) { cabac_data * const cabac = &encoder_state->cabac; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; cu_info *cur_cu = &cur_pic->cu_array[x_ctb + y_ctb * (cur_pic->width_in_lcu << MAX_DEPTH)]; uint8_t split_flag = GET_SPLITDATA(cur_cu, depth); uint8_t split_model = 0; //Absolute ctb - uint16_t abs_x_ctb = x_ctb + (encoder_state->lcu_offset_x * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); - uint16_t abs_y_ctb = y_ctb + (encoder_state->lcu_offset_y * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); + uint16_t abs_x_ctb = x_ctb + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); + uint16_t abs_y_ctb = y_ctb + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); // Check for slice border uint8_t border_x = ((encoder_state->encoder_control->in.width) < (abs_x_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; @@ -1852,7 +2020,7 @@ void encode_coding_tree(encoder_state * const encoder_state, //if(encoder_state->ref_idx_num[uiRefListIdx] > 0) { if (cur_cu->inter.mv_dir & (1 << ref_list_idx)) { - if (encoder_state->ref->used_size != 1) { //encoder_state->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1) + if (encoder_state->global->ref->used_size != 1) { //encoder_state->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1) // parseRefFrmIdx int32_t ref_frame = cur_cu->inter.mv_ref; @@ -1861,7 +2029,7 @@ void encode_coding_tree(encoder_state * const encoder_state, if (ref_frame > 0) { int32_t i; - int32_t ref_num = encoder_state->ref->used_size - 2; + int32_t ref_num = encoder_state->global->ref->used_size - 2; cabac->ctx = &(cabac->ctx_cu_ref_pic_model[1]); ref_frame--; @@ -1879,7 +2047,7 @@ void encode_coding_tree(encoder_state * const encoder_state, } } - if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder_state->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { + if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder_state->global->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { const int32_t mvd_hor = cur_cu->inter.mvd[0]; const int32_t mvd_ver = cur_cu->inter.mvd[1]; const int8_t hor_abs_gr0 = mvd_hor != 0; @@ -2393,15 +2561,15 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 coeffcost += abs((int)temp_coeff[i]); coeffcost2 += abs((int)temp_coeff2[i]); } - cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5); - cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder_state->cur_lambda_cost+0.5); + cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); + cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); // Full RDO } else if(encoder->rdo == 2) { coeffcost = get_coeff_cost(encoder_state, temp_coeff, 4, 0, scan_idx_luma); coeffcost2 = get_coeff_cost(encoder_state, temp_coeff2, 4, 0, scan_idx_luma); - cost += coeffcost*((int)encoder_state->cur_lambda_cost+0.5); - cost2 += coeffcost2*((int)encoder_state->cur_lambda_cost+0.5); + cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5); + cost2 += coeffcost2*((int)encoder_state->global->cur_lambda_cost+0.5); } cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2); @@ -2532,7 +2700,7 @@ void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32 static void encode_transform_unit(encoder_state * const encoder_state, int x_pu, int y_pu, int depth, int tr_depth) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; uint8_t width = LCU_WIDTH >> depth; uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); @@ -2680,7 +2848,7 @@ void encode_transform_coeff(encoder_state * const encoder_state, int32_t x_pu,in cabac_data * const cabac = &encoder_state->cabac; int32_t x_cu = x_pu / 2; int32_t y_cu = y_pu / 2; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; cu_info *cur_cu = &cur_pic->cu_array[x_cu + y_cu * (cur_pic->width_in_lcu << MAX_DEPTH)]; // NxN signifies implicit transform split at the first transform level. @@ -3038,4 +3206,3 @@ void encode_last_significant_xy(encoder_state * const encoder_state, // end LastSignificantXY } - diff --git a/src/encoder.h b/src/encoder.h index 4956fe39..bb3f2279 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -129,32 +129,74 @@ typedef struct } encoder_control; +typedef enum { + ENCODER_STATE_TYPE_INVALID = 'i', + ENCODER_STATE_TYPE_MAIN = 'M', + ENCODER_STATE_TYPE_SLICE = 'S', + ENCODER_STATE_TYPE_TILE = 'T', + ENCODER_STATE_TYPE_WAVEFRONT_ROW = 'W', +} encoder_state_type; + + + +typedef struct { + double cur_lambda_cost; + + int32_t frame; + int32_t poc; /*!< \brief picture order count */ + + int8_t QP; //!< \brief Quantization parameter + + //Current picture available references + picture_list *ref; + int8_t ref_list; + //int8_t ref_idx_num[2]; + +} encoder_state_config_global; + +typedef struct { + //Current picture to encode + picture *cur_pic; + + //Tile: offset in LCU for current encoder_state in global coordinates + int32_t lcu_offset_x; + int32_t lcu_offset_y; + + //Position of the first element in tile scan in global coordinates + int32_t lcu_offset_in_ts; +} encoder_state_config_tile; + +typedef struct { + //Local coordinates, relative to *tile + int32_t start_in_ts; + int32_t end_in_ts; + + //Global coordinates + int32_t start_in_rs; + int32_t end_in_rs; +} encoder_state_config_slice; + +typedef struct { + //Row of the wavefront, relative to *tile + int32_t lcu_offset_y; +} encoder_state_config_wfrow; + typedef struct encoder_state { const encoder_control *encoder_control; - double cur_lambda_cost; - bitstream stream; - cabac_data cabac; - + encoder_state_type type; + //List of children, the last item of this list is a pseudo-encoder with encoder_control = NULL //Use do { } while (encoder_state->children[++i].encoder_control) struct encoder_state *children; struct encoder_state *parent; - //Tile: offset in LCU for current encoder_state - int32_t lcu_offset_x; - int32_t lcu_offset_y; + encoder_state_config_global *global; + encoder_state_config_tile *tile; + encoder_state_config_slice *slice; + encoder_state_config_wfrow *wfrow; - //Current picture to encode - picture *cur_pic; - int32_t frame; - int32_t poc; /*!< \brief picture order count */ - - //Current picture available references - picture_list *ref; - int8_t ref_list; - int8_t ref_idx_num[2]; - - int8_t QP; //!< \brief Quantization parameter + bitstream stream; + cabac_data cabac; } encoder_state; int encoder_control_init(encoder_control *encoder, const config *cfg); @@ -162,8 +204,8 @@ int encoder_control_finalize(encoder_control *encoder); void encoder_control_input_init(encoder_control *encoder, int32_t width, int32_t height); -int encoder_state_init(encoder_state *encoder_state, const encoder_control * encoder); -int encoder_state_finalize(encoder_state *encoder_state); +int encoder_state_init(encoder_state * child_state, encoder_state * parent_state); +void encoder_state_finalize(encoder_state *encoder_state); void encoder_state_init_lambda(encoder_state *encoder_state); void encode_one_frame(encoder_state *encoder_state); diff --git a/src/filter.c b/src/filter.c index 20ac1c4f..bee1629d 100644 --- a/src/filter.c +++ b/src/filter.c @@ -167,7 +167,7 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state, int32_t xpos, int32_t ypos, int8_t depth, int8_t dir) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; const encoder_control * const encoder = encoder_state->encoder_control; cu_info *cu_q = &cur_pic->cu_array[(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)]; @@ -194,7 +194,7 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state, int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE; int8_t strength = 0; - int32_t qp = encoder_state->QP; + int32_t qp = encoder_state->global->QP; int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8); int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1)); int32_t beta = g_beta_table_8x8[b_index] * bitdepth_scale; @@ -295,7 +295,7 @@ void filter_deblock_edge_chroma(encoder_state * const encoder_state, int8_t depth, int8_t dir) { const encoder_control * const encoder = encoder_state->encoder_control; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; cu_info *cu_q = &cur_pic->cu_array[(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)]; // Chroma edges that do not lay on a 8x8 grid are not deblocked. @@ -327,7 +327,7 @@ void filter_deblock_edge_chroma(encoder_state * const encoder_state, int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1); int8_t strength = 2; - int32_t QP = g_chroma_scale[encoder_state->QP]; + int32_t QP = g_chroma_scale[encoder_state->global->QP]; int32_t bitdepth_scale = 1 << (encoder->bitdepth-8); int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1))); int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale; @@ -389,7 +389,7 @@ void filter_deblock_edge_chroma(encoder_state * const encoder_state, */ void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; cu_info *cur_cu = &cur_pic->cu_array[x + y*(cur_pic->width_in_lcu << MAX_DEPTH)]; uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0; uint8_t border_x = (cur_pic->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0; @@ -437,7 +437,7 @@ void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y */ void filter_deblock(encoder_state * const encoder_state) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; int16_t x, y; // TODO: Optimization: add thread for each LCU diff --git a/src/inter.c b/src/inter.c index 8e2b0e9b..ef19219c 100644 --- a/src/inter.c +++ b/src/inter.c @@ -83,12 +83,12 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels // negative overflow flag - int8_t overflow_neg_x = (encoder_state->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) < 0)?1:0; - int8_t overflow_neg_y = (encoder_state->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) < 0)?1:0; + int8_t overflow_neg_x = (encoder_state->tile->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) < 0)?1:0; + int8_t overflow_neg_y = (encoder_state->tile->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) < 0)?1:0; // positive overflow flag - int8_t overflow_pos_x = (encoder_state->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) + width > ref->width )?1:0; - int8_t overflow_pos_y = (encoder_state->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) + width > ref->height)?1:0; + int8_t overflow_pos_x = (encoder_state->tile->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) + width > ref->width )?1:0; + int8_t overflow_pos_y = (encoder_state->tile->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) + width > ref->height)?1:0; // Chroma half-pel #define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8) @@ -114,7 +114,7 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * // Fill source blocks with data from reference, -4...width+4 for (halfpel_y = 0, y = (ypos>>1) - 4; y < ((ypos + width)>>1) + 4; halfpel_y++, y++) { // calculate y-pixel offset - coord_y = (y + encoder_state->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1); + coord_y = (y + encoder_state->tile->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1); // On y-overflow set coord_y accordingly overflow_neg_y_temp = (coord_y < 0) ? 1 : 0; @@ -124,7 +124,7 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * coord_y *= ref_width_c; for (halfpel_x = 0, x = (xpos>>1) - 4; x < ((xpos + width)>>1) + 4; halfpel_x++, x++) { - coord_x = (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1); + coord_x = (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1); // On x-overflow set coord_x accordingly overflow_neg_x_temp = (coord_x < 0) ? 1 : 0; @@ -161,8 +161,8 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * int x_in_lcu = (x & ((LCU_WIDTH)-1)); int y_in_lcu = (y & ((LCU_WIDTH)-1)); - coord_x = (x + encoder_state->lcu_offset_x * LCU_WIDTH) + mv[0]; - coord_y = (y + encoder_state->lcu_offset_y * LCU_WIDTH) + mv[1]; + coord_x = (x + encoder_state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]; + coord_y = (y + encoder_state->tile->lcu_offset_y * LCU_WIDTH) + mv[1]; overflow_neg_x = (coord_x < 0)?1:0; overflow_neg_y = (coord_y < 0)?1:0; @@ -196,8 +196,8 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * int x_in_lcu = (x & ((LCU_WIDTH>>1)-1)); int y_in_lcu = (y & ((LCU_WIDTH>>1)-1)); - coord_x = (x + encoder_state->lcu_offset_x * (LCU_WIDTH >> 1)) + (mv[0]>>1); - coord_y = (y + encoder_state->lcu_offset_y * (LCU_WIDTH >> 1)) + (mv[1]>>1); + coord_x = (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH >> 1)) + (mv[0]>>1); + coord_y = (y + encoder_state->tile->lcu_offset_y * (LCU_WIDTH >> 1)) + (mv[1]>>1); overflow_neg_x = (coord_x < 0)?1:0; overflow_neg_y = (y + (mv[1]>>1) < 0)?1:0; @@ -229,11 +229,11 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * // Copy Luma for (y = ypos; y < ypos + width; y++) { int y_in_lcu = (y & ((LCU_WIDTH)-1)); - coord_y = ((y + encoder_state->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate + coord_y = ((y + encoder_state->tile->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate for (x = xpos; x < xpos + width; x++) { int x_in_lcu = (x & ((LCU_WIDTH)-1)); - lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + (x + encoder_state->lcu_offset_x * LCU_WIDTH) + mv[0]]; + lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + (x + encoder_state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]]; } } @@ -242,11 +242,11 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture * // TODO: chroma fractional pixel interpolation for (y = ypos>>1; y < (ypos + width)>>1; y++) { int y_in_lcu = (y & ((LCU_WIDTH>>1)-1)); - coord_y = ((y + encoder_state->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1)) * ref_width_c; // pre-calculate + coord_y = ((y + encoder_state->tile->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1)) * ref_width_c; // pre-calculate for (x = xpos>>1; x < (xpos + width)>>1; x++) { int x_in_lcu = (x & ((LCU_WIDTH>>1)-1)); - lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)]; - lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)]; + lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)]; + lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)]; } } } @@ -332,8 +332,8 @@ void inter_get_mv_cand(const encoder_state * const encoder_state, int32_t x, int inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu); #define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6) -#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->poc - encoder_state->ref->pics[(cu)->inter.mv_ref]->poc;\ - int tb = encoder_state->poc - encoder_state->ref->pics[cur_cu->inter.mv_ref]->poc;\ +#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->global->poc - encoder_state->global->ref->pics[(cu)->inter.mv_ref]->poc;\ + int tb = encoder_state->global->poc - encoder_state->global->ref->pics[cur_cu->inter.mv_ref]->poc;\ if (td != tb) { \ int scale = CALCULATE_SCALE(cu,tb,td); \ mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \ diff --git a/src/intra.c b/src/intra.c index c9b5364a..17fcc02c 100644 --- a/src/intra.c +++ b/src/intra.c @@ -375,7 +375,7 @@ int16_t intra_prediction(encoder_state * const encoder_state, pixel *orig, int32 intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0); sad = cost_func(pred, orig_block); - sad += mode_cost * (int)(encoder_state->cur_lambda_cost + 0.5); + sad += mode_cost * (int)(encoder_state->global->cur_lambda_cost + 0.5); // When rdo == 2, store best costs to an array and do full RDO later if(rdo == 2) { int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad); @@ -419,7 +419,7 @@ int16_t intra_prediction(encoder_state * const encoder_state, pixel *orig, int32 // Bitcost also calculated again for this mode rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds); // Add bitcost * lambda - rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->cur_lambda_cost + 0.5); + rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->global->cur_lambda_cost + 0.5); if(rdo_costs[rdo_mode] < best_sad) { best_sad = rdo_costs[rdo_mode]; diff --git a/src/rdo.c b/src/rdo.c index d9be91db..7c0fb81d 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -112,12 +112,12 @@ uint32_t rdo_cost_intra(encoder_state * const encoder_state, pixel *pred, pixel for (i = 0; i < width*width; i++) { coeffcost += abs((int)temp_coeff[i]); } - cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5); + cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); // Full RDO } else if(encoder->rdo == 2) { coeffcost = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode); - cost += coeffcost*((int)encoder_state->cur_lambda_cost+0.5); + cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5); } return cost; } @@ -299,7 +299,7 @@ uint32_t get_coded_level ( encoder_state * const encoder_state, double *coded_co cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma); if( !last && max_abs_level < 3 ) { - *coded_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0); + *coded_cost_sig = encoder_state->global->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0); *coded_cost = *coded_cost0 + *coded_cost_sig; if (max_abs_level == 0) return best_abs_level; } else { @@ -307,13 +307,13 @@ uint32_t get_coded_level ( encoder_state * const encoder_state, double *coded_co } if( !last ) { - cur_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1); + cur_cost_sig = encoder_state->global->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1); } min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 ); for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) { double err = (double)(level_double - ( abs_level << q_bits ) ); - double cur_cost = err * err * temp + encoder_state->cur_lambda_cost * + double cur_cost = err * err * temp + encoder_state->global->cur_lambda_cost * get_ic_rate_cost( encoder_state, abs_level, ctx_num_one, ctx_num_abs, abs_go_rice, c1_idx, c2_idx, type); cur_cost += cur_cost_sig; @@ -350,7 +350,7 @@ static double get_rate_last(const encoder_state * const encoder_state, if( ctx_y > 3 ) { uiCost += 32768.0 * ((ctx_y-2)>>1); } - return encoder_state->cur_lambda_cost*uiCost; + return encoder_state->global->cur_lambda_cost*uiCost; } static void calc_last_bits(encoder_state * const encoder_state, int32_t width, int32_t height, int8_t type, @@ -402,7 +402,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient * uint32_t max_num_coeff = width * height; int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); - int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0); + int32_t qp_scaled = get_scaled_qp(type, encoder_state->global->QP, 0); { int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; @@ -591,7 +591,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient * if (sig_coeffgroup_flag[ cg_blkpos ] == 0) { uint32_t ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); - cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); base_cost += cost_coeffgroup_sig[ cg_scanpos ] - rd_stats.sig_cost; } else { if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below. @@ -608,9 +608,9 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient * ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); if (cg_scanpos < cg_last_scanpos) { - cost_coeffgroup_sig[cg_scanpos] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1); + cost_coeffgroup_sig[cg_scanpos] = encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1); base_cost += cost_coeffgroup_sig[cg_scanpos]; - cost_zero_cg += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_zero_cg += encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); } // try to convert the current coeff group from non-zero to all-zero @@ -624,7 +624,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient * sig_coeffgroup_flag[ cg_blkpos ] = 0; base_cost = cost_zero_cg; if (cg_scanpos < cg_last_scanpos) { - cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); + cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0); } // reset coeffs to 0 in this block for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) { @@ -652,13 +652,13 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient * if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) { - best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0); - base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1); + best_cost = block_uncoded_cost + encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0); + base_cost += encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1); } else { cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma); ctx_cbf = ( type ? tr_depth : !tr_depth); - best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0); - base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1); + best_cost = block_uncoded_cost + encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0); + base_cost += encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1); } for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) { @@ -712,7 +712,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient * if(*abs_sum >= 2) { int64_t rd_factor = (int64_t) ( g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6))) - / encoder_state->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8))) + / encoder_state->global->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8))) + 0.5); int32_t lastCG = -1; int32_t absSum = 0; diff --git a/src/sao.c b/src/sao.c index aff4cf6f..84c05796 100644 --- a/src/sao.c +++ b/src/sao.c @@ -669,7 +669,7 @@ static void sao_search_edge_sao(const encoder_state * const encoder_state, { int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left); - sum_ddistortion += (int)((double)mode_bits*(encoder_state->cur_lambda_cost+0.5)); + sum_ddistortion += (int)((double)mode_bits*(encoder_state->global->cur_lambda_cost+0.5)); } // SAO is not applied for category 0. edge_offset[SAO_EO_CAT0] = 0; @@ -711,7 +711,7 @@ static void sao_search_band_sao(const encoder_state * const encoder_state, const ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position); temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left); - ddistortion += (int)((double)temp_rate*(encoder_state->cur_lambda_cost+0.5)); + ddistortion += (int)((double)temp_rate*(encoder_state->global->cur_lambda_cost+0.5)); // Select band sao over edge sao when distortion is lower if (ddistortion < sao_out->ddistortion) { @@ -745,7 +745,7 @@ static void sao_search_best_mode(const encoder_state * const encoder_state, cons { int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left); - int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5); + int ddistortion = mode_bits * (int)(encoder_state->global->cur_lambda_cost + 0.5); unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { @@ -759,7 +759,7 @@ static void sao_search_best_mode(const encoder_state * const encoder_state, cons { int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left); - int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5); + int ddistortion = mode_bits * (int)(encoder_state->global->cur_lambda_cost + 0.5); unsigned buf_i; for (buf_i = 0; buf_i < buf_cnt; ++buf_i) { @@ -780,7 +780,7 @@ static void sao_search_best_mode(const encoder_state * const encoder_state, cons // Choose between SAO and doing nothing, taking into account the // rate-distortion cost of coding do nothing. { - int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->cur_lambda_cost + 0.5); + int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->global->cur_lambda_cost + 0.5); if (sao_out->ddistortion >= cost_of_nothing) { sao_out->type = SAO_TYPE_NONE; } @@ -863,7 +863,7 @@ void sao_search_luma(const encoder_state * const encoder_state, const picture *p void sao_reconstruct_frame(encoder_state * const encoder_state) { vector2d lcu; - picture * const cur_pic = encoder_state->cur_pic; + picture * const cur_pic = encoder_state->tile->cur_pic; // These are needed because SAO needs the pre-SAO pixels form left and // top LCUs. Single pixel wide buffers, like what search_lcu takes, would diff --git a/src/search.c b/src/search.c index c3deae4e..99434380 100644 --- a/src/search.c +++ b/src/search.c @@ -159,7 +159,7 @@ static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost; } *bitcost = temp_bitcost; - return temp_bitcost*(int32_t)(encoder_state->cur_lambda_cost+0.5); + return temp_bitcost*(int32_t)(encoder_state->global->cur_lambda_cost+0.5); } @@ -201,8 +201,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign for (i = 0; i < 7; ++i) { const vector2d *pattern = &large_hexbs[i]; unsigned cost = calc_sad(pic, ref, orig->x, orig->y, - (encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, - (encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, block_width, block_width); cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); @@ -216,8 +216,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign // Try the 0,0 vector. if (!(mv.x == 0 && mv.y == 0)) { unsigned cost = calc_sad(pic, ref, orig->x, orig->y, - (encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x, - (encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y, + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x, + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y, block_width, block_width); cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); @@ -232,8 +232,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign for (i = 1; i < 7; ++i) { const vector2d *pattern = &large_hexbs[i]; unsigned cost = calc_sad(pic, ref, orig->x, orig->y, - (encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x, - (encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y, + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x, + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y, block_width, block_width); cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); @@ -267,8 +267,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign for (i = 0; i < 3; ++i) { const vector2d *offset = &large_hexbs[start + i]; unsigned cost = calc_sad(pic, ref, orig->x, orig->y, - (encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, - (encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width); cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); @@ -290,8 +290,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign for (i = 1; i < 5; ++i) { const vector2d *offset = &small_hexbs[i]; unsigned cost = calc_sad(pic, ref, orig->x, orig->y, - (encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, - (encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width); cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost); @@ -374,7 +374,7 @@ static unsigned search_mv_full(unsigned depth, */ static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; uint32_t ref_idx = 0; int x_local = (x&0x3f), y_local = (y&0x3f); int x_cu = x>>3; @@ -394,8 +394,8 @@ static int search_cu_inter(const encoder_state * const encoder_state, int x, int cur_cu->inter.cost = UINT_MAX; - for (ref_idx = 0; ref_idx < encoder_state->ref->used_size; ref_idx++) { - picture *ref_pic = encoder_state->ref->pics[ref_idx]; + for (ref_idx = 0; ref_idx < encoder_state->global->ref->used_size; ref_idx++) { + picture *ref_pic = encoder_state->global->ref->pics[ref_idx]; unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu); cu_info *ref_cu = &ref_pic->cu_array[y_cu * width_in_scu + x_cu]; uint32_t temp_bitcost = 0; @@ -670,7 +670,7 @@ static int search_cu_intra(encoder_state * const encoder_state, const int x_px, const int y_px, const int depth, lcu_t *lcu) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f }; const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 }; const int8_t cu_width = (LCU_WIDTH >> (depth)); @@ -776,7 +776,7 @@ static int lcu_get_final_cost(const encoder_state * const encoder_state, } } // Coefficient costs - cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->cur_lambda_cost+0.5); + cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->global->cur_lambda_cost+0.5); // Calculate actual bit costs for coding the coeffs // RDO @@ -838,11 +838,11 @@ static int lcu_get_final_cost(const encoder_state * const encoder_state, coeff_cost += get_coeff_cost(encoder_state, coeff_temp_v, blockwidth, 2, chroma_scan_mode); } // Multiply bit count with lambda to get RD-cost - cost += coeff_cost * (int32_t)(encoder_state->cur_lambda_cost+0.5); + cost += coeff_cost * (int32_t)(encoder_state->global->cur_lambda_cost+0.5); } // Bitcost - cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->cur_lambda_cost+0.5); + cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->global->cur_lambda_cost+0.5); return cost; } @@ -859,7 +859,7 @@ static int lcu_get_final_cost(const encoder_state * const encoder_state, */ static int search_cu(encoder_state * const encoder_state, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH]) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; int cu_width = LCU_WIDTH >> depth; int cost = MAX_INT; cu_info *cur_cu; @@ -911,7 +911,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height); } else if (cur_cu->type == CU_INTER) { int cbf; - inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); + inter_recon_lcu(encoder_state, encoder_state->global->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]); cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth); @@ -933,7 +933,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept // Recursively split all the way to max search depth. if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) { int half_cu = cu_width / 2; - int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost); + int split_cost = (int)(4.5 * encoder_state->global->cur_lambda_cost); int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth); // If skip mode was selected for the block, skip further search. @@ -970,7 +970,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept */ static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf) { - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; // Copy reference cu_info structs from neighbouring LCUs. { @@ -1050,7 +1050,7 @@ static void init_lcu_t(const encoder_state * const encoder_state, const int x, c // Copy LCU pixels. { - const picture * const pic = encoder_state->cur_pic; + const picture * const pic = encoder_state->tile->cur_pic; int pic_width = cur_pic->width; int x_max = MIN(x + LCU_WIDTH, pic_width) - x; int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y; @@ -1080,7 +1080,7 @@ static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x { const int x_cu = x_px >> MAX_DEPTH; const int y_cu = y_px >> MAX_DEPTH; - const picture * const cur_pic = encoder_state->cur_pic; + const picture * const cur_pic = encoder_state->tile->cur_pic; const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH; cu_info *const cu_array = cur_pic->cu_array; @@ -1100,7 +1100,7 @@ static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x // Copy pixels to picture. { - picture * const pic = encoder_state->cur_pic; + picture * const pic = encoder_state->tile->cur_pic; const int pic_width = pic->width; const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px; const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px; diff --git a/src/transform.c b/src/transform.c index 06fc2a76..ea3d9f40 100644 --- a/src/transform.c +++ b/src/transform.c @@ -634,7 +634,7 @@ void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_ int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2]; #endif - int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0); + int32_t qp_scaled = get_scaled_qp(type, encoder_state->global->QP, 0); //New block for variable definitions { @@ -646,7 +646,7 @@ void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_ int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; - int32_t add = ((encoder_state->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9); + int32_t add = ((encoder_state->tile->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9); int32_t q_bits8 = q_bits - 8; for (n = 0; n < width * height; n++) { @@ -762,7 +762,7 @@ void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t int32_t n; int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2); - int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0); + int32_t qp_scaled = get_scaled_qp(type, encoder_state->global->QP, 0); shift = 20 - QUANT_SHIFT - transform_shift; From 699669ee353667adfbcf0a4ec8ce00be34ac14b6 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Tue, 6 May 2014 15:45:31 +0200 Subject: [PATCH 08/21] fixed typo --- src/encoder.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 14b82907..e76c52b5 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -442,7 +442,7 @@ static int encoder_state_config_slice_init(encoder_state * const encoder_state, encoder_state->slice->end_in_ts = end_address_in_ts - encoder_state->tile->lcu_offset_in_ts; encoder_state->slice->start_in_rs = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[start_address_in_ts]; - encoder_state->slice->end_in_ts = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[end_address_in_ts]; + encoder_state->slice->end_in_rs = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[end_address_in_ts]; return 1; } @@ -568,6 +568,8 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const //Full span to analyze start_in_ts = child_state->tile->lcu_offset_in_ts + child_state->slice->start_in_ts; end_in_ts = MIN(child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu, child_state->tile->lcu_offset_in_ts + child_state->slice->end_in_ts); + + //printf("%c-%p: start_in_ts=%d, end_in_ts=%d\n",child_state->type, child_state, start_in_ts, end_in_ts); while (start_in_ts < end_in_ts) { encoder_state *new_child = NULL; int range_start = start_in_ts; @@ -590,7 +592,7 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const } } - //printf("range_start=%d, range_end_slice=%d, range_end_tile=%d, tile_allowed=%d, slice_allowed=%d\n",range_start,range_end_slice,range_end_tile,tile_allowed,slice_allowed); + //printf("range_start=%d, range_end_slice=%d, range_end_tile=%d, tile_allowed=%d, slice_allowed=%d end_in_ts=%d\n",range_start,range_end_slice,range_end_tile,tile_allowed,slice_allowed,end_in_ts); if ((!tile_allowed || (range_end_slice >= range_end_tile)) && !new_child && slice_allowed) { //Create a slice From cee6bb0e71b4c218d0d906559f3e24492296504a Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 06:24:22 +0200 Subject: [PATCH 09/21] Fix iteration on children --- src/encoder.c | 26 +++++--------------------- src/encoder.h | 2 +- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index e76c52b5..20d093f7 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -660,32 +660,15 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const printf("Wavefront\n"); } } - -/* if (encoder->tiles_enable) { - int x,y; - //Allocate subencoders (valid subencoder have a non null encoder_control field, so we use a null one to mark the end of the list) - encoder_state->children = MALLOC(struct encoder_state, encoder->tiles_num_tile_columns * encoder->tiles_num_tile_rows + 1); - encoder_state->children[encoder->tiles_num_tile_columns * encoder->tiles_num_tile_rows].encoder_control = NULL; - for (y=0; y < encoder->tiles_num_tile_rows; ++y) { - for (x=0; x < encoder->tiles_num_tile_columns; ++x) { - const int i = y * encoder->tiles_num_tile_columns + x; - encoder_state->children[i].encoder_control = encoder; - - if (!encoder_state_init_one(&encoder_state->children[i], encoder_state, x, y)) { - fprintf(stderr, "Could not initialize encoder state %d!\n", i); - return 0; - } - } - */ return 1; } void encoder_state_finalize(encoder_state * const encoder_state) { if (encoder_state->children) { int i=0; - do { + for (i = 0; encoder_state->children[i].encoder_control; ++i) { encoder_state_finalize(&encoder_state->children[i]); - } while (encoder_state->children[++i].encoder_control); + } FREE_POINTER(encoder_state->children); } @@ -1037,6 +1020,7 @@ void encode_one_frame(encoder_state * const main_state) if (main_state->children) { int i; + //FIXME! //This can be parallelized, we don't use a do...while loop because we use OpenMP #pragma omp parallel for for (i = 0; i < encoder->tiles_num_tile_rows * encoder->tiles_num_tile_columns; ++i) { @@ -1062,11 +1046,11 @@ void encode_one_frame(encoder_state * const main_state) //This has to be serial i = 0; - do { + for (i = 0; main_state->children[i].encoder_control; ++i) { //Append bitstream to main stream bitstream_append(&main_state->stream, &main_state->children[i].stream); bitstream_clear(&main_state->children[i].stream); - } while (main_state->children[++i].encoder_control); + } } else { //Encode the whole thing as one stream diff --git a/src/encoder.h b/src/encoder.h index bb3f2279..a5b1a7ac 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -186,7 +186,7 @@ typedef struct encoder_state { encoder_state_type type; //List of children, the last item of this list is a pseudo-encoder with encoder_control = NULL - //Use do { } while (encoder_state->children[++i].encoder_control) + //Use for (i = 0; encoder_state->children[i].encoder_control; ++i) { struct encoder_state *children; struct encoder_state *parent; From 8b5cb62237f265434c2feeeb35134281aeaa3ed0 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 07:06:19 +0200 Subject: [PATCH 10/21] Debug code to generate a graph --- src/encoder.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++--- src/encoder.h | 4 +- 2 files changed, 132 insertions(+), 8 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 20d093f7..ba4d1f30 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -427,6 +427,8 @@ static int encoder_state_config_tile_init(encoder_state * const encoder_state, encoder_state->tile->lcu_offset_y = lcu_offset_y; encoder_state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu]; + + encoder_state->tile->id = encoder->tiles_tile_id[encoder_state->tile->lcu_offset_in_ts]; return 1; } @@ -437,9 +439,8 @@ static void encoder_state_config_tile_finalize(encoder_state * const encoder_sta static int encoder_state_config_slice_init(encoder_state * const encoder_state, const int start_address_in_ts, const int end_address_in_ts) { - //Has to be called AFTER initializing encoder_state->tile - encoder_state->slice->start_in_ts = start_address_in_ts - encoder_state->tile->lcu_offset_in_ts; - encoder_state->slice->end_in_ts = end_address_in_ts - encoder_state->tile->lcu_offset_in_ts; + encoder_state->slice->start_in_ts = start_address_in_ts; + encoder_state->slice->end_in_ts = end_address_in_ts; encoder_state->slice->start_in_rs = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[start_address_in_ts]; encoder_state->slice->end_in_rs = encoder_state->encoder_control->tiles_ctb_addr_ts_to_rs[end_address_in_ts]; @@ -461,6 +462,108 @@ static void encoder_state_config_wfrow_finalize(encoder_state * const encoder_st //Nothing to do (yet?) } +#ifdef _DEBUG +static void encoder_state_dump_graphviz(const encoder_state * const encoder_state) { + int i; + + if (!encoder_state->parent) { + const encoder_control * const encoder = encoder_state->encoder_control; + int y,x; + //Empty lines (easier to copy-paste) + printf("\n\n\n\n\n"); + //Some styling... + printf("digraph EncoderStates {\n"); + printf(" fontname = \"Bitstream Vera Sans\"\n"); + printf(" fontsize = 8\n\n"); + printf(" node [\n"); + printf(" fontname = \"Bitstream Vera Sans\"\n"); + printf(" fontsize = 8\n"); + printf(" shape = \"record\"\n"); + printf(" ]\n\n"); + printf(" edge [\n"); + printf(" arrowtail = \"empty\"\n"); + printf(" ]\n\n"); + + printf(" \"Map\" [\n"); + printf(" shape=plaintext\" [\n"); + printf(" label = <"); + printf("", encoder->in.width_in_lcu); + for (y = 0; y < encoder->in.height_in_lcu; ++y) { + printf(""); + for (x = 0; x < encoder->in.width_in_lcu; ++x) { + const int lcu_id_rs = y * encoder->in.width_in_lcu + x; + + printf("", lcu_id_rs); + } + printf(""); + } + printf("", encoder->in.width_in_lcu); + for (y = 0; y < encoder->in.height_in_lcu; ++y) { + printf(""); + for (x = 0; x < encoder->in.width_in_lcu; ++x) { + const int lcu_id_rs = y * encoder->in.width_in_lcu + x; + const int lcu_id_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_id_rs]; + + printf("", lcu_id_ts); + } + printf(""); + } + printf("", encoder->in.width_in_lcu); + for (y = 0; y < encoder->in.height_in_lcu; ++y) { + printf(""); + for (x = 0; x < encoder->in.width_in_lcu; ++x) { + const int lcu_id_rs = y * encoder->in.width_in_lcu + x; + const int lcu_id_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_id_rs]; + + printf("", encoder->tiles_tile_id[lcu_id_ts]); + } + printf(""); + } + printf("
RS Map
%d
TS Map
%d
Tile map
%d
>\n ]\n"); + } + + printf(" \"%p\" [\n", encoder_state); + printf(" label = \"{encoder_state|"); + printf("+ type=%c\\l", encoder_state->type); + if (!encoder_state->parent || encoder_state->global != encoder_state->parent->global) { + printf("|+ global\\l"); + } + if (!encoder_state->parent || encoder_state->tile != encoder_state->parent->tile) { + printf("|+ tile\\l"); + printf(" - id = %d\\l", encoder_state->tile->id); + printf(" - lcu_offset_x = %d\\l", encoder_state->tile->lcu_offset_x); + printf(" - lcu_offset_y = %d\\l", encoder_state->tile->lcu_offset_y); + printf(" - lcu_offset_in_ts = %d\\l", encoder_state->tile->lcu_offset_in_ts); + } + if (!encoder_state->parent || encoder_state->slice != encoder_state->parent->slice) { + printf("|+ slice\\l"); + printf(" - start_in_ts = %d\\l", encoder_state->slice->start_in_ts); + printf(" - end_in_ts = %d\\l", encoder_state->slice->end_in_ts); + printf(" - start_in_rs = %d\\l", encoder_state->slice->start_in_rs); + printf(" - end_in_rs = %d\\l", encoder_state->slice->end_in_rs); + } + if (!encoder_state->parent || encoder_state->wfrow != encoder_state->parent->wfrow) { + printf("|+ wfrow\\l"); + printf(" - lcu_offset_y = %d\\l", encoder_state->wfrow->lcu_offset_y); + } + printf("}\"\n"); + printf(" ]\n"); + + if (encoder_state->parent) { + printf(" \"%p\" -> \"%p\"\n", encoder_state->parent, encoder_state); + } + + for (i = 0; encoder_state->children[i].encoder_control; ++i) { + encoder_state_dump_graphviz(&encoder_state->children[i]); + } + + if (!encoder_state->parent) { + printf("}\n"); + //Empty lines (easier to copy-paste) + printf("\n\n\n\n\n"); + } +} +#endif //_DEBUG int encoder_state_init(encoder_state * const child_state, encoder_state * const parent_state) { //We require that, if parent_state is NULL: @@ -473,6 +576,8 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const //child_state->slice //child_state->wfrow + printf("Init: %p %p\n", child_state, parent_state); + child_state->parent = parent_state; child_state->children = MALLOC(encoder_state, 1); child_state->children[0].encoder_control = NULL; @@ -547,16 +652,22 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const case ENCODER_STATE_TYPE_MAIN: children_allow_slice = 1; children_allow_tile = 1; + start_in_ts = 0; + end_in_ts = child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu; break; case ENCODER_STATE_TYPE_SLICE: assert(child_state->parent); if (child_state->parent->type != ENCODER_STATE_TYPE_TILE) children_allow_tile = 1; children_allow_wavefront_row = encoder->wpp; + start_in_ts = child_state->slice->start_in_ts; + end_in_ts = child_state->slice->end_in_ts; break; case ENCODER_STATE_TYPE_TILE: assert(child_state->parent); if (child_state->parent->type != ENCODER_STATE_TYPE_SLICE) children_allow_slice = 1; children_allow_wavefront_row = encoder->wpp; + start_in_ts = child_state->tile->lcu_offset_in_ts; + end_in_ts = child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu; break; case ENCODER_STATE_TYPE_WAVEFRONT_ROW: break; @@ -570,7 +681,7 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const end_in_ts = MIN(child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu, child_state->tile->lcu_offset_in_ts + child_state->slice->end_in_ts); //printf("%c-%p: start_in_ts=%d, end_in_ts=%d\n",child_state->type, child_state, start_in_ts, end_in_ts); - while (start_in_ts < end_in_ts) { + while (start_in_ts < end_in_ts && (children_allow_slice || children_allow_tile)) { encoder_state *new_child = NULL; int range_start = start_in_ts; int range_end_slice = start_in_ts; //Will be incremented to get the range of the "thing" @@ -596,8 +707,6 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const if ((!tile_allowed || (range_end_slice >= range_end_tile)) && !new_child && slice_allowed) { //Create a slice - - printf("%p slice: %d - %d\n", child_state, range_start, range_end_slice); new_child = &child_state->children[child_count]; new_child->encoder_control = encoder; new_child->type = ENCODER_STATE_TYPE_SLICE; @@ -624,7 +733,6 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const int width = MIN(width_in_lcu * LCU_WIDTH, encoder->in.width - lcu_offset_x * LCU_WIDTH); int height = MIN(height_in_lcu * LCU_WIDTH, encoder->in.height - lcu_offset_y * LCU_WIDTH); - printf("%p tile: %d - %d (%d)\n", child_state, range_start, range_end_tile, tile_id); new_child = &child_state->children[child_count]; new_child->encoder_control = encoder; new_child->type = ENCODER_STATE_TYPE_TILE; @@ -646,6 +754,17 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const fprintf(stderr, "Failed to allocate memory for children...\n"); return 0; } + + //Fix children parent (since we changed the address) + { + int i, j; + for (i = 0; child_state->children[i].encoder_control; ++i) { + for (j = 0; child_state->children[i].children[j].encoder_control; ++j) { + child_state->children[i].children[j].parent = &child_state->children[i]; + } + } + } + if (!encoder_state_init(&child_state->children[child_count], child_state)) { fprintf(stderr, "Unable to init child...\n"); return 0; @@ -660,6 +779,9 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const printf("Wavefront\n"); } } +#ifdef _DEBUG + if (!parent_state) encoder_state_dump_graphviz(child_state); +#endif //_DEBUG return 1; } diff --git a/src/encoder.h b/src/encoder.h index a5b1a7ac..13e37cea 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -158,6 +158,8 @@ typedef struct { //Current picture to encode picture *cur_pic; + int32_t id; + //Tile: offset in LCU for current encoder_state in global coordinates int32_t lcu_offset_x; int32_t lcu_offset_y; @@ -167,7 +169,7 @@ typedef struct { } encoder_state_config_tile; typedef struct { - //Local coordinates, relative to *tile + //Global coordinates int32_t start_in_ts; int32_t end_in_ts; From 831b221cf84e153078d86ff9f2e7339a3e2da928 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 09:44:02 +0200 Subject: [PATCH 11/21] Parsing seems to work now --- src/encoder.c | 134 ++++++++++++++++++++++++++++++++++++++++++++------ src/encoder.h | 4 +- 2 files changed, 122 insertions(+), 16 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index ba4d1f30..01a56ba7 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -439,6 +439,15 @@ static void encoder_state_config_tile_finalize(encoder_state * const encoder_sta static int encoder_state_config_slice_init(encoder_state * const encoder_state, const int start_address_in_ts, const int end_address_in_ts) { + int i = 0, slice_found=0; + for (i = 0; i < encoder_state->encoder_control->slice_count; ++i) { + if (encoder_state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) { + encoder_state->slice->id = i; + slice_found = 1; + break; + } + } + assert(slice_found); encoder_state->slice->start_in_ts = start_address_in_ts; encoder_state->slice->end_in_ts = end_address_in_ts; @@ -519,6 +528,25 @@ static void encoder_state_dump_graphviz(const encoder_state * const encoder_stat } printf(""); } + printf("Slice map", encoder->in.width_in_lcu); + for (y = 0; y < encoder->in.height_in_lcu; ++y) { + printf(""); + for (x = 0; x < encoder->in.width_in_lcu; ++x) { + const int lcu_id_rs = y * encoder->in.width_in_lcu + x; + const int lcu_id_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_id_rs]; + int slice_id = 0; + + //Not efficient, but who cares + for (i=0; i < encoder->slice_count; ++i) { + if (encoder->slice_addresses_in_ts[i] <= lcu_id_ts) { + slice_id = i; + } + } + + printf("%d", slice_id); + } + printf(""); + } printf(">\n ]\n"); } @@ -537,6 +565,7 @@ static void encoder_state_dump_graphviz(const encoder_state * const encoder_stat } if (!encoder_state->parent || encoder_state->slice != encoder_state->parent->slice) { printf("|+ slice\\l"); + printf(" - id = %d\\l", encoder_state->slice->id); printf(" - start_in_ts = %d\\l", encoder_state->slice->start_in_ts); printf(" - end_in_ts = %d\\l", encoder_state->slice->end_in_ts); printf(" - start_in_rs = %d\\l", encoder_state->slice->start_in_rs); @@ -645,6 +674,7 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const int children_allow_wavefront_row = 0; int children_allow_slice = 0; int children_allow_tile = 0; + int range_start; int start_in_ts, end_in_ts; @@ -676,16 +706,12 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const assert(0); } - //Full span to analyze - start_in_ts = child_state->tile->lcu_offset_in_ts + child_state->slice->start_in_ts; - end_in_ts = MIN(child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu, child_state->tile->lcu_offset_in_ts + child_state->slice->end_in_ts); - + range_start = start_in_ts; //printf("%c-%p: start_in_ts=%d, end_in_ts=%d\n",child_state->type, child_state, start_in_ts, end_in_ts); - while (start_in_ts < end_in_ts && (children_allow_slice || children_allow_tile)) { + while (range_start < end_in_ts && (children_allow_slice || children_allow_tile)) { encoder_state *new_child = NULL; - int range_start = start_in_ts; - int range_end_slice = start_in_ts; //Will be incremented to get the range of the "thing" - int range_end_tile = start_in_ts; //Will be incremented to get the range of the "thing" + int range_end_slice = range_start; //Will be incremented to get the range of the "thing" + int range_end_tile = range_start; //Will be incremented to get the range of the "thing" int tile_allowed = lcu_at_tile_start(encoder, range_start) && children_allow_tile; int slice_allowed = lcu_at_slice_start(encoder, range_start) && children_allow_slice; @@ -754,17 +780,17 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const fprintf(stderr, "Failed to allocate memory for children...\n"); return 0; } - - //Fix children parent (since we changed the address) + + //Fix children parent (since we changed the address), except for the last one which is not ready yet { int i, j; - for (i = 0; child_state->children[i].encoder_control; ++i) { + for (i = 0; child_state->children[i].encoder_control && i < child_count; ++i) { for (j = 0; child_state->children[i].children[j].encoder_control; ++j) { child_state->children[i].children[j].parent = &child_state->children[i]; } } } - + if (!encoder_state_init(&child_state->children[child_count], child_state)) { fprintf(stderr, "Unable to init child...\n"); return 0; @@ -772,13 +798,91 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const child_count += 1; } - start_in_ts = MAX(range_end_slice, range_end_tile) + 1; + range_start = MAX(range_end_slice, range_end_tile) + 1; } - if (children_allow_wavefront_row) { - printf("Wavefront\n"); + //We create wavefronts only if we have no children + if (children_allow_wavefront_row && child_count == 0) { + int first_row = encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] / encoder->in.width_in_lcu; + int last_row = encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] / encoder->in.width_in_lcu; + int num_rows; + int i; + + assert(!(children_allow_slice || children_allow_tile)); + assert(child_count == 0); + + for (i=start_in_ts; itiles_ctb_addr_ts_to_rs[i] / encoder->in.width_in_lcu; + if (row < first_row) first_row = row; + if (row > last_row) last_row = row; + } + + num_rows = last_row - first_row + 1; + + //When entropy_coding_sync_enabled_flag is equal to 1 and the first coding tree block in a slice is not the first coding + //tree block of a row of coding tree blocks in a tile, it is a requirement of bitstream conformance that the last coding tree + //block in the slice shall belong to the same row of coding tree blocks as the first coding tree block in the slice. + + if (encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] % encoder->in.width_in_lcu != child_state->tile->lcu_offset_x) { + if (num_rows > 1) { + fprintf(stderr, "Invalid: first CTB in slice %d is not at the tile %d edge, and the slice spans on more than one row.\n", child_state->slice->id, child_state->tile->id); + return 0; + } + } + + //FIXME Do the same kind of check if we implement slice segments + + + child_state->children = realloc(child_state->children, sizeof(encoder_state) * (num_rows + 1)); + child_state->children[num_rows].encoder_control = NULL; + + for (i=0; i < num_rows; ++i) { + encoder_state *new_child = &child_state->children[i]; + + new_child->encoder_control = encoder; + new_child->type = ENCODER_STATE_TYPE_WAVEFRONT_ROW; + new_child->global = child_state->global; + new_child->tile = child_state->tile; + new_child->slice = child_state->slice; + new_child->wfrow = MALLOC(encoder_state_config_wfrow, 1); + + if (!new_child->wfrow || !encoder_state_config_wfrow_init(new_child, i + first_row)) { + fprintf(stderr, "Could not initialize encoder_state->wfrow!\n"); + return 0; + } + + if (!encoder_state_init(new_child, child_state)) { + fprintf(stderr, "Unable to init child...\n"); + return 0; + } + } } } + + //Validate the structure + if (child_state->type == ENCODER_STATE_TYPE_TILE) { + if (child_state->tile->lcu_offset_in_ts < child_state->slice->start_in_ts) { + fprintf(stderr, "Tile %d starts before slice %d, in which it should be included!\n", child_state->tile->id, child_state->slice->id); + return 0; + } + if (child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu - 1 > child_state->slice->end_in_ts) { + fprintf(stderr, "Tile %d ends after slice %d, in which it should be included!\n", child_state->tile->id, child_state->slice->id); + return 0; + } + } + + if (child_state->type == ENCODER_STATE_TYPE_SLICE) { + if (child_state->slice->start_in_ts < child_state->tile->lcu_offset_in_ts) { + fprintf(stderr, "Slice %d starts before tile %d, in which it should be included!\n", child_state->slice->id, child_state->tile->id); + return 0; + } + if (child_state->slice->end_in_ts > child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu - 1) { + fprintf(stderr, "Slice %d ends after tile %d, in which it should be included!\n", child_state->slice->id, child_state->tile->id); + return 0; + } + } + + #ifdef _DEBUG if (!parent_state) encoder_state_dump_graphviz(child_state); #endif //_DEBUG diff --git a/src/encoder.h b/src/encoder.h index 13e37cea..7c80c5f1 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -169,6 +169,8 @@ typedef struct { } encoder_state_config_tile; typedef struct { + int32_t id; + //Global coordinates int32_t start_in_ts; int32_t end_in_ts; @@ -179,7 +181,7 @@ typedef struct { } encoder_state_config_slice; typedef struct { - //Row of the wavefront, relative to *tile + //Row in image coordinates of the wavefront int32_t lcu_offset_y; } encoder_state_config_wfrow; From 1e2671ac30a2d790eda89a1de23a726eb7bd3a4a Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 09:53:55 +0200 Subject: [PATCH 12/21] Renamed encoder_clear_refs to encoder_state_clear_refs --- src/encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 01a56ba7..2ac04a46 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -923,7 +923,7 @@ void encoder_state_finalize(encoder_state * const encoder_state) { } -static void encoder_clear_refs(encoder_state *encoder_state) { +static void encoder_state_clear_refs(encoder_state *encoder_state) { while (encoder_state->global->ref->used_size) { picture_list_rem(encoder_state->global->ref, encoder_state->global->ref->used_size - 1); } @@ -1191,7 +1191,7 @@ void encode_one_frame(encoder_state * const main_state) **/ if (is_radl_frame) { // Clear the reference list - encoder_clear_refs(main_state); + encoder_state_clear_refs(main_state); main_state->tile->cur_pic->slicetype = SLICE_I; main_state->tile->cur_pic->type = NAL_IDR_W_RADL; From a03f0cba19c5a02fa76e52de2ed138d39b0d2642 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 09:56:16 +0200 Subject: [PATCH 13/21] encoder_control_input_init near the other encoder_control_* functions --- src/encoder.c | 84 ++++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 2ac04a46..099bc719 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -387,6 +387,49 @@ int encoder_control_finalize(encoder_control * const encoder) { return 1; } +void encoder_control_input_init(encoder_control * const encoder, + const int32_t width, const int32_t height) +{ + encoder->in.width = width; + encoder->in.height = height; + encoder->in.real_width = width; + encoder->in.real_height = height; + + // If input dimensions are not divisible by the smallest block size, add + // pixels to the dimensions, so that they are. These extra pixels will be + // compressed along with the real ones but they will be cropped out before + // rendering. + if (encoder->in.width % CU_MIN_SIZE_PIXELS) { + encoder->in.width += CU_MIN_SIZE_PIXELS - (width % CU_MIN_SIZE_PIXELS); + } + + if (encoder->in.height % CU_MIN_SIZE_PIXELS) { + encoder->in.height += CU_MIN_SIZE_PIXELS - (height % CU_MIN_SIZE_PIXELS); + } + + encoder->in.height_in_lcu = encoder->in.height / LCU_WIDTH; + encoder->in.width_in_lcu = encoder->in.width / LCU_WIDTH; + + // Add one extra LCU when image not divisible by LCU_WIDTH + if (encoder->in.height_in_lcu * LCU_WIDTH < height) { + encoder->in.height_in_lcu++; + } + + if (encoder->in.width_in_lcu * LCU_WIDTH < width) { + encoder->in.width_in_lcu++; + } + + + + #ifdef _DEBUG + if (width != encoder->in.width || height != encoder->in.height) { + printf("Picture buffer has been extended to be a multiple of the smallest block size:\r\n"); + printf(" Width = %d (%d), Height = %d (%d)\r\n", width, encoder->in.width, height, + encoder->in.height); + } + #endif +} + static int encoder_state_config_global_init(encoder_state * const encoder_state) { encoder_state->global->ref = picture_list_init(MAX_REF_PIC_COUNT); if(!encoder_state->global->ref) { @@ -931,48 +974,7 @@ static void encoder_state_clear_refs(encoder_state *encoder_state) { encoder_state->global->poc = 0; } -void encoder_control_input_init(encoder_control * const encoder, - const int32_t width, const int32_t height) -{ - encoder->in.width = width; - encoder->in.height = height; - encoder->in.real_width = width; - encoder->in.real_height = height; - // If input dimensions are not divisible by the smallest block size, add - // pixels to the dimensions, so that they are. These extra pixels will be - // compressed along with the real ones but they will be cropped out before - // rendering. - if (encoder->in.width % CU_MIN_SIZE_PIXELS) { - encoder->in.width += CU_MIN_SIZE_PIXELS - (width % CU_MIN_SIZE_PIXELS); - } - - if (encoder->in.height % CU_MIN_SIZE_PIXELS) { - encoder->in.height += CU_MIN_SIZE_PIXELS - (height % CU_MIN_SIZE_PIXELS); - } - - encoder->in.height_in_lcu = encoder->in.height / LCU_WIDTH; - encoder->in.width_in_lcu = encoder->in.width / LCU_WIDTH; - - // Add one extra LCU when image not divisible by LCU_WIDTH - if (encoder->in.height_in_lcu * LCU_WIDTH < height) { - encoder->in.height_in_lcu++; - } - - if (encoder->in.width_in_lcu * LCU_WIDTH < width) { - encoder->in.width_in_lcu++; - } - - - - #ifdef _DEBUG - if (width != encoder->in.width || height != encoder->in.height) { - printf("Picture buffer has been extended to be a multiple of the smallest block size:\r\n"); - printf(" Width = %d (%d), Height = %d (%d)\r\n", width, encoder->in.width, height, - encoder->in.height); - } - #endif -} static void write_aud(encoder_state * const encoder_state) { From 24c2bd70ca40497e2b92b67ee7705ffdeec19ea2 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 11:37:53 +0200 Subject: [PATCH 14/21] Fix small bugs with compilation --- src/encoder.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 099bc719..5c262a60 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -648,8 +648,6 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const //child_state->slice //child_state->wfrow - printf("Init: %p %p\n", child_state, parent_state); - child_state->parent = parent_state; child_state->children = MALLOC(encoder_state, 1); child_state->children[0].encoder_control = NULL; @@ -743,10 +741,14 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const end_in_ts = child_state->tile->lcu_offset_in_ts + child_state->tile->cur_pic->width_in_lcu * child_state->tile->cur_pic->height_in_lcu; break; case ENCODER_STATE_TYPE_WAVEFRONT_ROW: + //GCC tries to be too clever... + start_in_ts = -1; + end_in_ts = -1; break; default: fprintf(stderr, "Invalid encoder_state->type %d!\n", child_state->type); assert(0); + return 0; } range_start = start_in_ts; From e144f817ef65b33eff4205fcb27983a3b328b449 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 10:48:30 +0200 Subject: [PATCH 15/21] Works when not using tiles --- src/encoder.c | 320 +++++++++++++++++++++++++++++++------------------- 1 file changed, 202 insertions(+), 118 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 5c262a60..57da3a3c 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -969,6 +969,7 @@ void encoder_state_finalize(encoder_state * const encoder_state) { static void encoder_state_clear_refs(encoder_state *encoder_state) { + //FIXME: Do we need to handle children? At present they all share the same global while (encoder_state->global->ref->used_size) { picture_list_rem(encoder_state->global->ref, encoder_state->global->ref->used_size - 1); } @@ -976,6 +977,55 @@ static void encoder_state_clear_refs(encoder_state *encoder_state) { encoder_state->global->poc = 0; } +static void encoder_state_blit_pixels(const encoder_state * const target_enc, pixel * const target, const encoder_state * const source_enc, const pixel * const source, const int is_y_channel) { + const int source_offset_x = source_enc->tile->lcu_offset_x * LCU_WIDTH; + const int source_offset_y = source_enc->tile->lcu_offset_y * LCU_WIDTH; + + const int target_offset_x = target_enc->tile->lcu_offset_x * LCU_WIDTH; + const int target_offset_y = target_enc->tile->lcu_offset_y * LCU_WIDTH; + + int source_stride = source_enc->tile->cur_pic->width; + int target_stride = target_enc->tile->cur_pic->width; + + int width; + int height; + + int source_offset; + int target_offset; + + //Do nothing if the source and the destination is the same! + if (source_enc->tile == target_enc->tile) return; + + if (is_y_channel) { + target_offset = source_offset_x + source_offset_y * target_enc->tile->cur_pic->width; + source_offset = target_offset_x + target_offset_y * source_enc->tile->cur_pic->width; + } else { + target_offset = source_offset_x/2 + source_offset_y/2 * target_enc->tile->cur_pic->width/2; + source_offset = target_offset_x/2 + target_offset_y/2 * source_enc->tile->cur_pic->width/2; + } + + if (target_enc->children) { + //Use information from the source + width = MIN(source_enc->tile->cur_pic->width_in_lcu * LCU_WIDTH, target_enc->tile->cur_pic->width - source_offset_x); + height = MIN(source_enc->tile->cur_pic->height_in_lcu * LCU_WIDTH, target_enc->tile->cur_pic->height - source_offset_y); + } else { + //Use information from the target + width = MIN(target_enc->tile->cur_pic->width_in_lcu * LCU_WIDTH, source_enc->tile->cur_pic->width - target_offset_x); + height = MIN(target_enc->tile->cur_pic->height_in_lcu * LCU_WIDTH, source_enc->tile->cur_pic->height - target_offset_y); + } + + if (!is_y_channel) { + width /= 2; + height /= 2; + + source_stride /= 2; + target_stride /= 2; + } + + //picture_blit_pixels(source + source_offset, target + target_offset, width, height, source_enc->cur_pic->width, target_enc->cur_pic->width); + picture_blit_pixels(source + source_offset, target + target_offset, width, height, source_stride, target_stride); +} + static void write_aud(encoder_state * const encoder_state) @@ -986,36 +1036,8 @@ static void write_aud(encoder_state * const encoder_state) bitstream_align(stream); } -static void substream_write_bitstream(encoder_state * const encoder_state, const int end_of_sub_stream) { - const encoder_control * const encoder = encoder_state->encoder_control; - const picture* const cur_pic = encoder_state->tile->cur_pic; - const int lcu_count = cur_pic->width_in_lcu * cur_pic->height_in_lcu; - int lcu_id; - vector2d lcu; - - for (lcu_id = 0; lcu_id < lcu_count; ++lcu_id) { - lcu.x = lcu_id % cur_pic->width_in_lcu; - lcu.y = lcu_id / cur_pic->width_in_lcu; - - //Write bitstream - if (encoder->sao_enable) { - encode_sao(encoder_state, lcu.x, lcu.y, &cur_pic->sao_luma[lcu.y * cur_pic->width_in_lcu + lcu.x], &cur_pic->sao_chroma[lcu.y * cur_pic->width_in_lcu + lcu.x]); - } - - encode_coding_tree(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0); - cabac_encode_bin_trm(&encoder_state->cabac, ((lcu_id == lcu_count - 1) && !end_of_sub_stream) ? 1 : 0); // end_of_slice_segment_flag - } - if (end_of_sub_stream) { - cabac_encode_bin_trm(&encoder_state->cabac, 1); // end_of_sub_stream_one_bit == 1 - cabac_flush(&encoder_state->cabac); - } else { - cabac_flush(&encoder_state->cabac); - bitstream_align(&encoder_state->stream); - } -} - -static void substream_encode(encoder_state * const encoder_state) { +static void encoder_state_encode_tile(encoder_state * const encoder_state) { const encoder_control * const encoder = encoder_state->encoder_control; #ifndef NDEBUG const unsigned long long int debug_bitstream_position = bitstream_tell(&(encoder_state->stream)); @@ -1129,57 +1151,79 @@ static void substream_encode(encoder_state * const encoder_state) { yuv_t_free(ver_buf); } -static void subencoder_blit_pixels(const encoder_state * const target_enc, pixel * const target, const encoder_state * const source_enc, const pixel * const source, const int is_y_channel) { - const int source_offset_x = source_enc->tile->lcu_offset_x * LCU_WIDTH; - const int source_offset_y = source_enc->tile->lcu_offset_y * LCU_WIDTH; - - const int target_offset_x = target_enc->tile->lcu_offset_x * LCU_WIDTH; - const int target_offset_y = target_enc->tile->lcu_offset_y * LCU_WIDTH; - - int source_stride = source_enc->tile->cur_pic->width; - int target_stride = target_enc->tile->cur_pic->width; - - int width; - int height; - - int source_offset; - int target_offset; - - //One of them has to be the main encoder - assert(target_enc->children || source_enc->children); - - if (is_y_channel) { - target_offset = source_offset_x + source_offset_y * target_enc->tile->cur_pic->width; - source_offset = target_offset_x + target_offset_y * source_enc->tile->cur_pic->width; +static void encoder_state_encode(encoder_state * const main_state) { + //If we have children, encode at child level + if (main_state->children[0].encoder_control) { + int i=0; + for (i=0; main_state->children[i].encoder_control; ++i) { + encoder_state *sub_state = &(main_state->children[i]); + + if (sub_state->tile != main_state->tile) { + //FIXME: remove this once these are in slice + sub_state->tile->cur_pic->slicetype = main_state->tile->cur_pic->slicetype; + sub_state->tile->cur_pic->type = main_state->tile->cur_pic->type; + + encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->y_data, main_state, main_state->tile->cur_pic->y_data, 1); + encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->u_data, main_state, main_state->tile->cur_pic->u_data, 0); + encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->v_data, main_state, main_state->tile->cur_pic->v_data, 0); + } + encoder_state_encode(&main_state->children[i]); + //FIXME: substream_write_bitstream(subencoder, (main_state->children[i+1].encoder_control) != NULL); + + if (sub_state->tile != main_state->tile) { + encoder_state_blit_pixels(main_state, main_state->tile->cur_pic->y_recdata, sub_state, sub_state->tile->cur_pic->y_recdata, 1); + encoder_state_blit_pixels(main_state, main_state->tile->cur_pic->u_recdata, sub_state, sub_state->tile->cur_pic->u_recdata, 0); + encoder_state_blit_pixels(main_state, main_state->tile->cur_pic->v_recdata, sub_state, sub_state->tile->cur_pic->v_recdata, 0); + } + } } else { - target_offset = source_offset_x/2 + source_offset_y/2 * target_enc->tile->cur_pic->width/2; - source_offset = target_offset_x/2 + target_offset_y/2 * source_enc->tile->cur_pic->width/2; + switch (main_state->type) { + case ENCODER_STATE_TYPE_TILE: + encoder_state_encode_tile(main_state); + break; + default: + fprintf(stderr, "Unsupported leaf type %c!\n", main_state->type); + assert(0); + } } - - if (target_enc->children) { - //Use information from the source - width = MIN(source_enc->tile->cur_pic->width_in_lcu * LCU_WIDTH, target_enc->tile->cur_pic->width - source_offset_x); - height = MIN(source_enc->tile->cur_pic->height_in_lcu * LCU_WIDTH, target_enc->tile->cur_pic->height - source_offset_y); - } else { - //Use information from the target - width = MIN(target_enc->tile->cur_pic->width_in_lcu * LCU_WIDTH, source_enc->tile->cur_pic->width - target_offset_x); - height = MIN(target_enc->tile->cur_pic->height_in_lcu * LCU_WIDTH, source_enc->tile->cur_pic->height - target_offset_y); - } - - if (!is_y_channel) { - width /= 2; - height /= 2; - - source_stride /= 2; - target_stride /= 2; - } - - //picture_blit_pixels(source + source_offset, target + target_offset, width, height, source_enc->cur_pic->width, target_enc->cur_pic->width); - picture_blit_pixels(source + source_offset, target + target_offset, width, height, source_stride, target_stride); } -void encode_one_frame(encoder_state * const main_state) -{ +static void encoder_state_new_frame(encoder_state * const main_state) { + int i; + //FIXME Move this somewhere else! + if (main_state->type == ENCODER_STATE_TYPE_MAIN) { + const encoder_control * const encoder = main_state->encoder_control; + + const int is_first_frame = (main_state->global->frame == 0); + const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->global->frame % 2 == 0); + const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->global->frame % encoder->cfg->intra_period) == 0); + const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; + + if (is_radl_frame) { + // Clear the reference list + encoder_state_clear_refs(main_state); + + main_state->tile->cur_pic->slicetype = SLICE_I; + main_state->tile->cur_pic->type = NAL_IDR_W_RADL; + } else { + main_state->tile->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; + main_state->tile->cur_pic->type = NAL_TRAIL_R; + } + } else { + //Clear the bitstream if it's not the main encoder + bitstream_clear(&main_state->stream); + } + + init_contexts(main_state, main_state->global->QP, main_state->tile->cur_pic->slicetype); + + for (i = 0; main_state->children[i].encoder_control; ++i) { + encoder_state_new_frame(&main_state->children[i]); + } + + +} + +static void encoder_state_write_bitstream_main(encoder_state * const main_state) { const encoder_control * const encoder = main_state->encoder_control; bitstream * const stream = &main_state->stream; @@ -1187,6 +1231,8 @@ void encode_one_frame(encoder_state * const main_state) const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->global->frame % 2 == 0); const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->global->frame % encoder->cfg->intra_period) == 0); const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; + + int i; /** IDR picture when: period == 0 and frame == 0 @@ -1243,49 +1289,12 @@ void encode_one_frame(encoder_state * const main_state) nal_write(stream, is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code); } - - encode_slice_header(main_state); - bitstream_align(&main_state->stream); - - if (main_state->children) { - int i; - //FIXME! - //This can be parallelized, we don't use a do...while loop because we use OpenMP - #pragma omp parallel for - for (i = 0; i < encoder->tiles_num_tile_rows * encoder->tiles_num_tile_columns; ++i) { - encoder_state *subencoder = &(main_state->children[i]); - - subencoder_blit_pixels(subencoder, subencoder->tile->cur_pic->y_data, main_state, main_state->tile->cur_pic->y_data, 1); - subencoder_blit_pixels(subencoder, subencoder->tile->cur_pic->u_data, main_state, main_state->tile->cur_pic->u_data, 0); - subencoder_blit_pixels(subencoder, subencoder->tile->cur_pic->v_data, main_state, main_state->tile->cur_pic->v_data, 0); - - //FIXME: remove this once these are in slice - subencoder->tile->cur_pic->slicetype = main_state->tile->cur_pic->slicetype; - subencoder->tile->cur_pic->type = main_state->tile->cur_pic->type; - - substream_encode(subencoder); - substream_write_bitstream(subencoder, (main_state->children[i+1].encoder_control) != NULL); - - subencoder_blit_pixels(main_state, main_state->tile->cur_pic->y_recdata, subencoder, subencoder->tile->cur_pic->y_recdata, 1); - subencoder_blit_pixels(main_state, main_state->tile->cur_pic->u_recdata, subencoder, subencoder->tile->cur_pic->u_recdata, 0); - subencoder_blit_pixels(main_state, main_state->tile->cur_pic->v_recdata, subencoder, subencoder->tile->cur_pic->v_recdata, 0); - } - - //We should do the slice header here, because we can have the entry points - - //This has to be serial - i = 0; - for (i = 0; main_state->children[i].encoder_control; ++i) { - //Append bitstream to main stream - bitstream_append(&main_state->stream, &main_state->children[i].stream); - bitstream_clear(&main_state->children[i].stream); - } - - } else { - //Encode the whole thing as one stream - substream_encode(main_state); - substream_write_bitstream(main_state, 0); + for (i = 0; main_state->children[i].encoder_control; ++i) { + //Append bitstream to main stream + bitstream_append(&main_state->stream, &main_state->children[i].stream); + //FIXME: Move this... + bitstream_clear(&main_state->children[i].stream); } // Calculate checksum @@ -1295,6 +1304,81 @@ void encode_one_frame(encoder_state * const main_state) main_state->tile->cur_pic->poc = main_state->global->poc; } +static void encoder_state_write_bitstream_tile(encoder_state * const encoder_state) { + const encoder_control * const encoder = encoder_state->encoder_control; + const picture* const cur_pic = encoder_state->tile->cur_pic; + const int lcu_count = cur_pic->width_in_lcu * cur_pic->height_in_lcu; + int lcu_id; + vector2d lcu; + + for (lcu_id = 0; lcu_id < lcu_count; ++lcu_id) { + lcu.x = lcu_id % cur_pic->width_in_lcu; + lcu.y = lcu_id / cur_pic->width_in_lcu; + + //Write bitstream + if (encoder->sao_enable) { + encode_sao(encoder_state, lcu.x, lcu.y, &cur_pic->sao_luma[lcu.y * cur_pic->width_in_lcu + lcu.x], &cur_pic->sao_chroma[lcu.y * cur_pic->width_in_lcu + lcu.x]); + } + + encode_coding_tree(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0); + + cabac_encode_bin_trm(&encoder_state->cabac, ((lcu_id == lcu_count - 1) && lcu_at_slice_end(encoder, lcu_id + encoder_state->tile->lcu_offset_in_ts)) ? 1 : 0); // end_of_slice_segment_flag + } + if (!lcu_at_slice_end(encoder, encoder_state->tile->lcu_offset_in_ts + cur_pic->width_in_lcu * cur_pic->height_in_lcu - 1)) { + cabac_encode_bin_trm(&encoder_state->cabac, 1); // end_of_sub_stream_one_bit == 1 + cabac_flush(&encoder_state->cabac); + } else { + cabac_flush(&encoder_state->cabac); + bitstream_align(&encoder_state->stream); + } + //We do not handle tiles containing something for now + assert(!encoder_state->children[0].encoder_control); +} + +static void encoder_state_write_bitstream_slice(encoder_state * const main_state) { + int i; + encode_slice_header(main_state); + bitstream_align(&main_state->stream); + + for (i = 0; main_state->children[i].encoder_control; ++i) { + //Append bitstream to main stream + bitstream_append(&main_state->stream, &main_state->children[i].stream); + //FIXME: Move this... + bitstream_clear(&main_state->children[i].stream); + } +} + + +static void encoder_state_write_bitstream(encoder_state * const main_state) { + int i; + for (i=0; main_state->children[i].encoder_control; ++i) { + encoder_state *sub_state = &(main_state->children[i]); + encoder_state_write_bitstream(sub_state); + } + + switch (main_state->type) { + case ENCODER_STATE_TYPE_MAIN: + encoder_state_write_bitstream_main(main_state); + break; + case ENCODER_STATE_TYPE_TILE: + encoder_state_write_bitstream_tile(main_state); + break; + case ENCODER_STATE_TYPE_SLICE: + encoder_state_write_bitstream_slice(main_state); + break; + default: + fprintf(stderr, "Unsupported leaf type %c!\n", main_state->type); + assert(0); + } +} + +void encode_one_frame(encoder_state * const main_state) +{ + encoder_state_new_frame(main_state); + encoder_state_encode(main_state); + encoder_state_write_bitstream(main_state); +} + static void fill_after_frame(unsigned height, unsigned array_width, unsigned array_height, pixel *data) { From 39d96e05466443716fc7a625a198a62083bfe7bf Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 10:58:35 +0200 Subject: [PATCH 16/21] Fix bug with cabac stream pointing to bad data --- src/encoder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/encoder.c b/src/encoder.c index 57da3a3c..65bc9e45 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -833,6 +833,7 @@ int encoder_state_init(encoder_state * const child_state, encoder_state * const for (j = 0; child_state->children[i].children[j].encoder_control; ++j) { child_state->children[i].children[j].parent = &child_state->children[i]; } + child_state->children[i].cabac.stream = &child_state->children[i].stream; } } From 0e6f1c99fc8e60fa04199faa26f34eb07f044709 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 11:18:10 +0200 Subject: [PATCH 17/21] Refactor picture to remove hidden dependency between slice and tiles picture.type -> encoder_state->global->pictype picture.slicetype -> encoder_state->global->slicetype picture.slice_sao_luma_flag -> 1 (was constant) picture.slice_sao_chroma_flag -> 1 (was constant) This may be changed later. For now it's better to avoid having slice related stuff in picture. --- src/encmain.c | 2 +- src/encoder.c | 86 +++++++++++++++++-------------------------------- src/encoder.h | 4 +++ src/picture.c | 2 -- src/picture.h | 4 --- src/search.c | 2 +- src/transform.c | 2 +- 7 files changed, 36 insertions(+), 66 deletions(-) diff --git a/src/encmain.c b/src/encmain.c index 9170fc40..43101584 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -355,7 +355,7 @@ int main(int argc, char *argv[]) temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1); fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.global->frame, - "BPI"[cur_pic->slicetype%3], diff<<3, + "BPI"[encoder_state.global->slicetype%3], diff<<3, temp_psnr[0], temp_psnr[1], temp_psnr[2]); // Increment total PSNR diff --git a/src/encoder.c b/src/encoder.c index 65bc9e45..bae5d9f7 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -58,7 +58,6 @@ static void encode_sao(encoder_state *encoder, */ void encoder_state_init_lambda(encoder_state * const encoder_state) { - const picture * const cur_pic = encoder_state->tile->cur_pic; double qp = encoder_state->global->QP; double lambda_scale = 1.0; double qp_temp = qp - 12; @@ -67,13 +66,13 @@ void encoder_state_init_lambda(encoder_state * const encoder_state) // Default QP-factor from HM config double qp_factor = 0.4624; - if (cur_pic->slicetype == SLICE_I) { + if (encoder_state->global->slicetype == SLICE_I) { qp_factor=0.57*lambda_scale; } lambda = qp_factor*pow( 2.0, qp_temp/3.0 ); - if (cur_pic->slicetype != SLICE_I ) { + if (encoder_state->global->slicetype != SLICE_I ) { lambda *= 0.95; } @@ -1050,7 +1049,7 @@ static void encoder_state_encode_tile(encoder_state * const encoder_state) { yuv_t *ver_buf = yuv_t_alloc(LCU_WIDTH + 2); cabac_start(&encoder_state->cabac); - init_contexts(encoder_state, encoder_state->global->QP, encoder_state->tile->cur_pic->slicetype); + init_contexts(encoder_state, encoder_state->global->QP, encoder_state->global->slicetype); // Initialize lambda value(s) to use in search encoder_state_init_lambda(encoder_state); @@ -1160,10 +1159,6 @@ static void encoder_state_encode(encoder_state * const main_state) { encoder_state *sub_state = &(main_state->children[i]); if (sub_state->tile != main_state->tile) { - //FIXME: remove this once these are in slice - sub_state->tile->cur_pic->slicetype = main_state->tile->cur_pic->slicetype; - sub_state->tile->cur_pic->type = main_state->tile->cur_pic->type; - encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->y_data, main_state, main_state->tile->cur_pic->y_data, 1); encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->u_data, main_state, main_state->tile->cur_pic->u_data, 0); encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->v_data, main_state, main_state->tile->cur_pic->v_data, 0); @@ -1198,24 +1193,24 @@ static void encoder_state_new_frame(encoder_state * const main_state) { const int is_first_frame = (main_state->global->frame == 0); const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->global->frame % 2 == 0); const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->global->frame % encoder->cfg->intra_period) == 0); - const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; + main_state->global->is_radl_frame = is_first_frame || is_i_radl || is_p_radl; - if (is_radl_frame) { + if (main_state->global->is_radl_frame) { // Clear the reference list encoder_state_clear_refs(main_state); - main_state->tile->cur_pic->slicetype = SLICE_I; - main_state->tile->cur_pic->type = NAL_IDR_W_RADL; + main_state->global->slicetype = SLICE_I; + main_state->global->pictype = NAL_IDR_W_RADL; } else { - main_state->tile->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; - main_state->tile->cur_pic->type = NAL_TRAIL_R; + main_state->global->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; + main_state->global->pictype = NAL_TRAIL_R; } } else { //Clear the bitstream if it's not the main encoder bitstream_clear(&main_state->stream); } - init_contexts(main_state, main_state->global->QP, main_state->tile->cur_pic->slicetype); + init_contexts(main_state, main_state->global->QP, main_state->global->slicetype); for (i = 0; main_state->children[i].encoder_control; ++i) { encoder_state_new_frame(&main_state->children[i]); @@ -1228,25 +1223,10 @@ static void encoder_state_write_bitstream_main(encoder_state * const main_state) const encoder_control * const encoder = main_state->encoder_control; bitstream * const stream = &main_state->stream; - const int is_first_frame = (main_state->global->frame == 0); - const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->global->frame % 2 == 0); - const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->global->frame % encoder->cfg->intra_period) == 0); - const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; - int i; - /** IDR picture when: period == 0 and frame == 0 - * period == 1 && frame%2 == 0 - * period != 0 && frame%period == 0 - **/ - if (is_radl_frame) { - // Clear the reference list - encoder_state_clear_refs(main_state); - - main_state->tile->cur_pic->slicetype = SLICE_I; - main_state->tile->cur_pic->type = NAL_IDR_W_RADL; - + if (main_state->global->is_radl_frame) { // Access Unit Delimiter (AUD) if (encoder->aud_enable) write_aud(main_state); @@ -1273,10 +1253,6 @@ static void encoder_state_write_bitstream_main(encoder_state * const main_state) bitstream_align(stream); } } else { - // When intra period == 1, all pictures are intra - main_state->tile->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; - main_state->tile->cur_pic->type = NAL_TRAIL_R; - // Access Unit Delimiter (AUD) if (encoder->aud_enable) write_aud(main_state); @@ -1285,10 +1261,10 @@ static void encoder_state_write_bitstream_main(encoder_state * const main_state) { // Not quite sure if this is correct, but it seems to have worked so far // so I tried to not change it's behavior. - int long_start_code = is_radl_frame || encoder->aud_enable ? 0 : 1; + int long_start_code = main_state->global->is_radl_frame || encoder->aud_enable ? 0 : 1; nal_write(stream, - is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code); + main_state->global->is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code); } for (i = 0; main_state->children[i].encoder_control; ++i) { @@ -1344,8 +1320,6 @@ static void encoder_state_write_bitstream_slice(encoder_state * const main_state for (i = 0; main_state->children[i].encoder_control; ++i) { //Append bitstream to main stream bitstream_append(&main_state->stream, &main_state->children[i].stream); - //FIXME: Move this... - bitstream_clear(&main_state->children[i].stream); } } @@ -1494,9 +1468,8 @@ static void add_checksum(encoder_state * const encoder_state) void encode_access_unit_delimiter(encoder_state * const encoder_state) { bitstream * const stream = &encoder_state->stream; - const picture * const cur_pic = encoder_state->tile->cur_pic; - uint8_t pic_type = cur_pic->slicetype == SLICE_I ? 0 - : cur_pic->slicetype == SLICE_P ? 1 + uint8_t pic_type = encoder_state->global->slicetype == SLICE_I ? 0 + : encoder_state->global->slicetype == SLICE_P ? 1 : 2; WRITE_U(stream, pic_type, 3, "pic_type"); } @@ -1998,7 +1971,6 @@ void encode_slice_header(encoder_state * const encoder_state) { const encoder_control * const encoder = encoder_state->encoder_control; bitstream * const stream = &encoder_state->stream; - const picture * const cur_pic = encoder_state->tile->cur_pic; #ifdef _DEBUG printf("=========== Slice ===========\n"); @@ -2006,8 +1978,8 @@ void encode_slice_header(encoder_state * const encoder_state) WRITE_U(stream, 1, 1, "first_slice_segment_in_pic_flag"); - if (cur_pic->type >= NAL_BLA_W_LP - && cur_pic->type <= NAL_RSV_IRAP_VCL23) { + if (encoder_state->global->pictype >= NAL_BLA_W_LP + && encoder_state->global->pictype <= NAL_RSV_IRAP_VCL23) { WRITE_U(stream, 1, 1, "no_output_of_prior_pics_flag"); } @@ -2015,7 +1987,7 @@ void encode_slice_header(encoder_state * const encoder_state) //WRITE_U(stream, 0, 1, "dependent_slice_segment_flag"); - WRITE_UE(stream, cur_pic->slicetype, "slice_type"); + WRITE_UE(stream, encoder_state->global->slicetype, "slice_type"); // if !entropy_slice_flag @@ -2023,8 +1995,8 @@ void encode_slice_header(encoder_state * const encoder_state) //WRITE_U(stream, 1, 1, "pic_output_flag"); //end if //if( IdrPicFlag ) <- nal_unit_type == 5 - if (cur_pic->type != NAL_IDR_W_RADL - && cur_pic->type != NAL_IDR_N_LP) { + if (encoder_state->global->pictype != NAL_IDR_W_RADL + && encoder_state->global->pictype != NAL_IDR_N_LP) { int j; int ref_negative = encoder_state->global->ref->used_size; int ref_positive = 0; @@ -2045,17 +2017,17 @@ void encode_slice_header(encoder_state * const encoder_state) //end if //end if if (encoder->sao_enable) { - WRITE_U(stream, cur_pic->slice_sao_luma_flag, 1, "slice_sao_luma_flag"); - WRITE_U(stream, cur_pic->slice_sao_chroma_flag, 1, "slice_sao_chroma_flag"); + WRITE_U(stream, 1, 1, "slice_sao_luma_flag"); + WRITE_U(stream, 1, 1, "slice_sao_chroma_flag"); } - if (cur_pic->slicetype != SLICE_I) { + if (encoder_state->global->slicetype != SLICE_I) { WRITE_U(stream, 1, 1, "num_ref_idx_active_override_flag"); WRITE_UE(stream, encoder_state->global->ref->used_size-1, "num_ref_idx_l0_active_minus1"); WRITE_UE(stream, 5-MRG_MAX_NUM_CANDS, "five_minus_max_num_merge_cand"); } - if (cur_pic->slicetype == SLICE_B) { + if (encoder_state->global->slicetype == SLICE_B) { WRITE_U(stream, 0, 1, "mvd_l1_zero_flag"); } @@ -2075,12 +2047,12 @@ static void encode_sao_color(encoder_state * const encoder_state, sao_info *sao, color_index color_i) { cabac_data * const cabac = &encoder_state->cabac; - const picture * const cur_pic = encoder_state->tile->cur_pic; sao_eo_cat i; // Skip colors with no SAO. - if (color_i == COLOR_Y && !cur_pic->slice_sao_luma_flag) return; - if (color_i != COLOR_Y && !cur_pic->slice_sao_chroma_flag) return; + //FIXME: for now, we always have SAO for all channels + if (color_i == COLOR_Y && 0) return; + if (color_i != COLOR_Y && 0) return; /// sao_type_idx_luma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass} /// sao_type_idx_chroma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass} @@ -2214,7 +2186,7 @@ void encode_coding_tree(encoder_state * const encoder_state, // Encode skip flag - if (cur_pic->slicetype != SLICE_I) { + if (encoder_state->global->slicetype != SLICE_I) { int8_t ctx_skip = 0; // uiCtxSkip = aboveskipped + leftskipped; int ui; int16_t num_cand = MRG_MAX_NUM_CANDS; @@ -2253,7 +2225,7 @@ void encode_coding_tree(encoder_state * const encoder_state, // ENDIF SKIP // Prediction mode - if (cur_pic->slicetype != SLICE_I) { + if (encoder_state->global->slicetype != SLICE_I) { cabac->ctx = &(cabac->ctx_cu_pred_mode_model); CABAC_BIN(cabac, (cur_cu->type == CU_INTRA), "PredMode"); } diff --git a/src/encoder.h b/src/encoder.h index 7c80c5f1..688a7249 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -152,6 +152,10 @@ typedef struct { int8_t ref_list; //int8_t ref_idx_num[2]; + int is_radl_frame; + uint8_t pictype; + uint8_t slicetype; + } encoder_state_config_global; typedef struct { diff --git a/src/picture.c b/src/picture.c index 034bb97d..ee0added 100644 --- a/src/picture.c +++ b/src/picture.c @@ -302,8 +302,6 @@ picture *picture_alloc(const int32_t width, const int32_t height, pic->coeff_y = NULL; pic->coeff_u = NULL; pic->coeff_v = NULL; - pic->slice_sao_luma_flag = 1; - pic->slice_sao_chroma_flag = 1; pic->sao_luma = MALLOC(sao_info, width_in_lcu * height_in_lcu); pic->sao_chroma = MALLOC(sao_info, width_in_lcu * height_in_lcu); diff --git a/src/picture.h b/src/picture.h index 117a69b6..f73859fd 100644 --- a/src/picture.h +++ b/src/picture.h @@ -126,10 +126,6 @@ typedef struct picture_struct uint8_t referenced; //!< \brief Whether this picture is referenced. int32_t refcount; //!< \brief Number of references in reflist to the picture cu_info* cu_array; //!< \brief Info for each CU at each depth. - uint8_t type; - uint8_t slicetype; - uint8_t slice_sao_luma_flag; - uint8_t slice_sao_chroma_flag; struct sao_info_struct *sao_luma; //!< \brief Array of sao parameters for every LCU. struct sao_info_struct *sao_chroma; //!< \brief Array of sao parameters for every LCU. int32_t poc; //!< \brief Picture order count diff --git a/src/search.c b/src/search.c index 99434380..ab7f5202 100644 --- a/src/search.c +++ b/src/search.c @@ -883,7 +883,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept y + cu_width <= cur_pic->height) { - if (cur_pic->slicetype != SLICE_I && + if (encoder_state->global->slicetype != SLICE_I && depth >= MIN_INTER_SEARCH_DEPTH && depth <= MAX_INTER_SEARCH_DEPTH) { diff --git a/src/transform.c b/src/transform.c index ea3d9f40..dabf971f 100644 --- a/src/transform.c +++ b/src/transform.c @@ -646,7 +646,7 @@ void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_ int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift; - int32_t add = ((encoder_state->tile->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9); + int32_t add = ((encoder_state->global->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9); int32_t q_bits8 = q_bits - 8; for (n = 0; n < width * height; n++) { From b48a687d3c672c744f206c85d91f0176cdfc85e2 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 11:33:19 +0200 Subject: [PATCH 18/21] Restored parallelism, but it will be done in another way... OpenMP is not very efficient in these kind of dynamic situation --- src/encoder.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index bae5d9f7..e5b0047b 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1154,8 +1154,32 @@ static void encoder_state_encode_tile(encoder_state * const encoder_state) { static void encoder_state_encode(encoder_state * const main_state) { //If we have children, encode at child level if (main_state->children[0].encoder_control) { - int i=0; - for (i=0; main_state->children[i].encoder_control; ++i) { + int i=0, max_i=0; + //OpenMP doesn't like aving a stop condition like main_state->children[i].encoder_control. + //We compute max_i to avoid this. + for (i=0; main_state->children[i].encoder_control; ++i); + max_i = i; + if (max_i > 1) { +#pragma omp parallel for + for (i=0; i < max_i; ++i) { + encoder_state *sub_state = &(main_state->children[i]); + + if (sub_state->tile != main_state->tile) { + encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->y_data, main_state, main_state->tile->cur_pic->y_data, 1); + encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->u_data, main_state, main_state->tile->cur_pic->u_data, 0); + encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->v_data, main_state, main_state->tile->cur_pic->v_data, 0); + } + encoder_state_encode(&main_state->children[i]); + //FIXME: substream_write_bitstream(subencoder, (main_state->children[i+1].encoder_control) != NULL); + + if (sub_state->tile != main_state->tile) { + encoder_state_blit_pixels(main_state, main_state->tile->cur_pic->y_recdata, sub_state, sub_state->tile->cur_pic->y_recdata, 1); + encoder_state_blit_pixels(main_state, main_state->tile->cur_pic->u_recdata, sub_state, sub_state->tile->cur_pic->u_recdata, 0); + encoder_state_blit_pixels(main_state, main_state->tile->cur_pic->v_recdata, sub_state, sub_state->tile->cur_pic->v_recdata, 0); + } + } + } else { + i=0; encoder_state *sub_state = &(main_state->children[i]); if (sub_state->tile != main_state->tile) { From 84e5dbee39098258565c29f04de8e1d26a3dd325 Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 13:33:02 +0200 Subject: [PATCH 19/21] Remove quote from graphviz dump --- src/encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoder.c b/src/encoder.c index e5b0047b..839ade9d 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -536,7 +536,7 @@ static void encoder_state_dump_graphviz(const encoder_state * const encoder_stat printf(" ]\n\n"); printf(" \"Map\" [\n"); - printf(" shape=plaintext\" [\n"); + printf(" shape=plaintext [\n"); printf(" label = <"); printf("", encoder->in.width_in_lcu); for (y = 0; y < encoder->in.height_in_lcu; ++y) { From 05eef82896c7cf13c6654964033d287b05c1052f Mon Sep 17 00:00:00 2001 From: Laurent Fasnacht Date: Wed, 7 May 2014 13:40:29 +0200 Subject: [PATCH 20/21] Remove extra [ from graphviz dump --- src/encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoder.c b/src/encoder.c index 839ade9d..3e4c363b 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -536,7 +536,7 @@ static void encoder_state_dump_graphviz(const encoder_state * const encoder_stat printf(" ]\n\n"); printf(" \"Map\" [\n"); - printf(" shape=plaintext [\n"); + printf(" shape=plaintext\n"); printf(" label = <
RS Map
"); printf("", encoder->in.width_in_lcu); for (y = 0; y < encoder->in.height_in_lcu; ++y) { From 535b42bc9b0440b069a2c7ff32f440286ae5608f Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 7 May 2014 14:10:22 +0300 Subject: [PATCH 21/21] Fix compilation for VS2010. --- build/C_Properties.props | 3 ++- src/encoder.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/build/C_Properties.props b/build/C_Properties.props index 934baef2..9d8881e9 100644 --- a/build/C_Properties.props +++ b/build/C_Properties.props @@ -14,7 +14,8 @@ MultiThreadedDebugDLL WIN32;WIN64;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) $(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories) - 4244;4204;4206 + 4244;4204;4206;4028 + true Ws2_32.lib;%(AdditionalDependencies) diff --git a/src/encoder.c b/src/encoder.c index 3e4c363b..02630e57 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1179,8 +1179,9 @@ static void encoder_state_encode(encoder_state * const main_state) { } } } else { + encoder_state *sub_state; i=0; - encoder_state *sub_state = &(main_state->children[i]); + sub_state = &(main_state->children[i]); if (sub_state->tile != main_state->tile) { encoder_state_blit_pixels(sub_state, sub_state->tile->cur_pic->y_data, main_state, main_state->tile->cur_pic->y_data, 1);
RS Map