/***************************************************************************** * This file is part of uvg266 VVC encoder. * * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright notice, this * list of conditions and the following disclaimer in the documentation and/or * other materials provided with the distribution. * * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS ****************************************************************************/ #include "encoder_state-ctors_dtors.h" #include #include #include "bitstream.h" #include "cabac.h" #include "cu.h" #include "debug.h" #include "encoder.h" #include "encoder_state-geometry.h" #include "encoderstate.h" #include "image.h" #include "imagelist.h" #include "uvg266.h" #include "threadqueue.h" #include "videoframe.h" #include "rate_control.h" #include "alf.h" #include "reshape.h" static int encoder_state_config_frame_init(encoder_state_t * const state) { state->frame->ref = uvg_image_list_alloc(MAX_REF_PIC_COUNT); if(!state->frame->ref) { fprintf(stderr, "Failed to allocate the picture list!\n"); return 0; } state->frame->ref_list = REF_PIC_LIST_0; state->frame->num = 0; state->frame->poc = 0; state->frame->total_bits_coded = 0; state->frame->cur_frame_bits_coded = 0; state->frame->cur_gop_bits_coded = 0; state->frame->prepared = 0; state->frame->done = 1; state->frame->rc_alpha = 3.2003; state->frame->rc_beta = -1.367; state->frame->icost = 0; const encoder_control_t * const encoder = state->encoder_control; const int num_lcus = encoder->in.width_in_lcu * encoder->in.height_in_lcu; state->frame->lcu_stats = calloc(num_lcus, sizeof(lcu_stats_t)); state->frame->aq_offsets = MALLOC(double, num_lcus); for (int y = 0; y < encoder->in.height_in_lcu; y++) { for (int x = 0; x < encoder->in.width_in_lcu; x++) { int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64); state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels = temp; } } state->frame->c_para = malloc(sizeof(double) * num_lcus); if(state->frame->c_para == NULL) { return 0; } state->frame->k_para = malloc(sizeof(double) * num_lcus); if (state->frame->k_para == NULL) { return 0; } pthread_mutex_init(&state->frame->rc_lock, NULL); state->frame->new_ratecontrol = uvg_get_rc_data(NULL); return 1; } static void encoder_state_config_frame_finalize(encoder_state_t * const state) { if (state->frame == NULL) return; pthread_mutex_destroy(&state->frame->rc_lock); if (state->frame->c_para) FREE_POINTER(state->frame->c_para); if (state->frame->k_para) FREE_POINTER(state->frame->k_para); uvg_image_list_destroy(state->frame->ref); FREE_POINTER(state->frame->lcu_stats); FREE_POINTER(state->frame->aq_offsets); } static int encoder_state_config_tile_init(encoder_state_t * const state, const int lcu_offset_x, const int lcu_offset_y, const int width, const int height, const int width_in_lcu, const int height_in_lcu) { const encoder_control_t * const encoder = state->encoder_control; state->tile->frame = uvg_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type, encoder->cfg.cclm); state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu); // Allocate the HMVP for IBC in any case state->tile->frame->hmvp_lut_ibc = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size_ibc = calloc(1, sizeof(uint8_t) * height_in_lcu); if (state->encoder_control->cfg.ibc) { // Allocate pixel buffer for each LCU row state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu); if (state->encoder_control->cfg.ibc & 2) { state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE); state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) * ((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride); } for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2); state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4 state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE]; state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2]; } } state->tile->frame->rec = NULL; state->tile->frame->source = NULL; if (!state->tile->frame) { printf("Error allocating videoframe!\r\n"); return 0; } state->tile->lcu_offset_x = lcu_offset_x; state->tile->lcu_offset_y = lcu_offset_y; state->tile->offset_x = lcu_offset_x * LCU_WIDTH; state->tile->offset_y = lcu_offset_y * LCU_WIDTH; state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu]; // hor_buf_search and ver_buf_search store single row/col from each LCU row/col. // Because these lines are independent, the chroma subsampling only matters in one // of the directions, . unsigned luma_size = LCU_WIDTH * state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; unsigned chroma_sizes_hor[] = { 0, luma_size / 2, luma_size / 2, luma_size }; unsigned chroma_sizes_ver[] = { 0, luma_size / 2, luma_size, luma_size }; unsigned chroma_size_hor = chroma_sizes_hor[state->encoder_control->chroma_format]; unsigned chroma_size_ver = chroma_sizes_ver[state->encoder_control->chroma_format]; state->tile->hor_buf_search = uvg_yuv_t_alloc(luma_size, chroma_size_hor); state->tile->ver_buf_search = uvg_yuv_t_alloc(luma_size, chroma_size_ver); if (encoder->cfg.sao_type) { state->tile->hor_buf_before_sao = uvg_yuv_t_alloc(luma_size, chroma_size_hor); state->tile->ver_buf_before_sao = uvg_yuv_t_alloc(luma_size, chroma_size_ver); } else { state->tile->hor_buf_before_sao = NULL; state->tile->ver_buf_before_sao = NULL; } if (encoder->cfg.wpp) { int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; state->tile->wf_jobs = MALLOC(threadqueue_job_t*, num_jobs); state->tile->wf_recon_jobs = MALLOC(threadqueue_job_t*, num_jobs); for (int i = 0; i < num_jobs; ++i) { state->tile->wf_jobs[i] = NULL; state->tile->wf_recon_jobs[i] = NULL; } if (!state->tile->wf_jobs) { printf("Error allocating wf_jobs array!\n"); return 0; } } else { state->tile->wf_jobs = NULL; state->tile->wf_recon_jobs = NULL; } state->tile->id = encoder->tiles_tile_id[state->tile->lcu_offset_in_ts]; return 1; } static void encoder_state_config_tile_finalize(encoder_state_t * const state) { if (state->tile == NULL) return; uvg_yuv_t_free(state->tile->hor_buf_search); uvg_yuv_t_free(state->tile->ver_buf_search); uvg_yuv_t_free(state->tile->hor_buf_before_sao); uvg_yuv_t_free(state->tile->ver_buf_before_sao); if (state->encoder_control->cfg.wpp) { int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; for (int i = 0; i < num_jobs; ++i) { uvg_threadqueue_free_job(&state->tile->wf_jobs[i]); uvg_threadqueue_free_job(&state->tile->wf_recon_jobs[i]); } } FREE_POINTER(state->tile->frame->hmvp_lut); FREE_POINTER(state->tile->frame->hmvp_size); FREE_POINTER(state->tile->frame->hmvp_lut_ibc); FREE_POINTER(state->tile->frame->hmvp_size_ibc); if (state->encoder_control->cfg.ibc) { if (state->encoder_control->cfg.ibc & 2) { FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash); } for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]); } FREE_POINTER(state->tile->frame->ibc_hashmap_row); FREE_POINTER(state->tile->frame->ibc_buffer_y); FREE_POINTER(state->tile->frame->ibc_buffer_u); FREE_POINTER(state->tile->frame->ibc_buffer_v); } uvg_videoframe_free(state->tile->frame); state->tile->frame = NULL; FREE_POINTER(state->tile->wf_jobs); FREE_POINTER(state->tile->wf_recon_jobs); } static int encoder_state_config_slice_init(encoder_state_t * const state, const int start_address_in_ts, const int end_address_in_ts) { state->slice->id = -1; for (int i = 0; i < state->encoder_control->slice_count; ++i) { if (state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) { state->slice->id = i; break; } } assert(state->slice->id != -1); state->slice->start_in_ts = start_address_in_ts; state->slice->end_in_ts = end_address_in_ts; state->slice->start_in_rs = state->encoder_control->tiles_ctb_addr_ts_to_rs[start_address_in_ts]; state->slice->end_in_rs = state->encoder_control->tiles_ctb_addr_ts_to_rs[end_address_in_ts]; return 1; } static int encoder_state_config_wfrow_init(encoder_state_t * const state, const int lcu_offset_y) { state->wfrow->lcu_offset_y = lcu_offset_y; return 1; } /** * \brief Initializer for main thread related things mostly arrays that are only needed one per frame * \param state encoder state * \returns int */ static int encoder_state_main_init(encoder_state_t* const state) { uint32_t lcus_in_frame = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; state->tile->frame->lmcs_aps = calloc(1, sizeof(lmcs_aps)); state->tile->frame->lmcs_avg_processed = calloc(1, lcus_in_frame * sizeof(int8_t)); state->tile->frame->lmcs_avg = calloc(1, lcus_in_frame * sizeof(int32_t)); if (state->encoder_control->cfg.alf_type) { state->slice->alf = malloc(sizeof(*state->slice->alf)); state->slice->alf->apss = malloc(sizeof(alf_aps) * ALF_CTB_MAX_NUM_APS); state->slice->alf->tile_group_luma_aps_id = malloc(ALF_CTB_MAX_NUM_APS * sizeof(int8_t)); state->slice->alf->cc_filter_param = malloc(sizeof(*state->slice->alf->cc_filter_param)); for (int aps_idx = 0; aps_idx < ALF_CTB_MAX_NUM_APS; aps_idx++) { state->slice->alf->tile_group_luma_aps_id[aps_idx] = -1; } state->slice->alf->tile_group_num_aps = -1; state->slice->alf->tile_group_chroma_aps_id = -1; state->slice->alf->tile_group_cc_alf_cb_enabled_flag = 0; state->slice->alf->tile_group_cc_alf_cr_enabled_flag = 0; state->slice->alf->tile_group_cc_alf_cb_aps_id = -1; state->slice->alf->tile_group_cc_alf_cr_aps_id = -1; state->slice->alf->num_of_param_sets = 0; memset(state->slice->alf->tile_group_alf_enabled_flag, 0, sizeof(state->slice->alf->tile_group_alf_enabled_flag)); if (state->encoder_control->cfg.alf_type == UVG_ALF_FULL) { uvg_reset_cc_alf_aps_param(state->slice->alf->cc_filter_param); } state->tile->frame->alf_info = MALLOC(alf_info_t, 1); uvg_alf_create(state->tile->frame, state->encoder_control->chroma_format); uvg_set_aps_map(state->tile->frame, state->encoder_control->cfg.alf_type); } return 1; } static int encoder_state_main_finalize(encoder_state_t* const state) { FREE_POINTER(state->tile->frame->lmcs_aps); FREE_POINTER(state->tile->frame->lmcs_avg_processed); FREE_POINTER(state->tile->frame->lmcs_avg); if (state->encoder_control->cfg.alf_type) { if (state->slice->alf->apss != NULL) { FREE_POINTER(state->slice->alf->apss); } if (state->slice->alf->tile_group_luma_aps_id != NULL) { FREE_POINTER(state->slice->alf->tile_group_luma_aps_id); } if (state->slice->alf->cc_filter_param != NULL) { FREE_POINTER(state->slice->alf->cc_filter_param); } FREE_POINTER(state->slice->alf); uvg_alf_destroy(state->tile->frame); FREE_POINTER(state->tile->frame->alf_info); FREE_POINTER(state->tile->frame->alf_param_set_map); } return 1; } int uvg_encoder_state_init(encoder_state_t * const child_state, encoder_state_t * const parent_state) { //We require that, if parent_state is NULL: //child_state->encoder_control is set // //If parent_state is not NULL, the following variable should either be set to NULL, //in order to inherit from parent, or should point to a valid structure: //child_state->frame //child_state->tile //child_state->slice //child_state->wfrow child_state->parent = parent_state; child_state->children = MALLOC(encoder_state_t, 1); child_state->children[0].encoder_control = NULL; child_state->must_code_qp_delta = false; child_state->tqj_bitstream_written = NULL; child_state->tqj_recon_done = NULL; child_state->tqj_alf_process = NULL; if (!parent_state) { const encoder_control_t * const encoder = child_state->encoder_control; child_state->type = ENCODER_STATE_TYPE_MAIN; assert(child_state->encoder_control); child_state->frame = MALLOC(encoder_state_config_frame_t, 1); if (!child_state->frame || !encoder_state_config_frame_init(child_state)) { fprintf(stderr, "Could not initialize encoder_state->frame!\n"); return 0; } child_state->tile = MALLOC(encoder_state_config_tile_t, 1); if (!child_state->tile || !encoder_state_config_tile_init(child_state, 0, 0, encoder->in.width, encoder->in.height, encoder->in.width_in_lcu, encoder->in.height_in_lcu)) { fprintf(stderr, "Could not initialize encoder_state->tile!\n"); return 0; } child_state->slice = MALLOC(encoder_state_config_slice_t, 1); if (!child_state->slice || !encoder_state_config_slice_init(child_state, 0, encoder->in.width_in_lcu * encoder->in.height_in_lcu - 1)) { fprintf(stderr, "Could not initialize encoder_state->slice!\n"); return 0; } child_state->wfrow = MALLOC(encoder_state_config_wfrow_t, 1); if (!child_state->wfrow || !encoder_state_config_wfrow_init(child_state, 0)) { fprintf(stderr, "Could not initialize encoder_state->wfrow!\n"); return 0; } } else { child_state->encoder_control = parent_state->encoder_control; if (!child_state->frame) child_state->frame = parent_state->frame; if (!child_state->tile) child_state->tile = parent_state->tile; if (!child_state->slice) child_state->slice = parent_state->slice; if (!child_state->wfrow) child_state->wfrow = parent_state->wfrow; } // Intialization of the constraint structure child_state->constraint = uvg_init_constraint(child_state->constraint, child_state->encoder_control); uvg_bitstream_init(&child_state->stream); // Set CABAC output bitstream child_state->cabac.stream = &child_state->stream; //Create sub-encoders { const encoder_control_t * const encoder = child_state->encoder_control; uint32_t child_count = 0; //We first check the type of this element. //If it's a MAIN, it can allow both slices or tiles as child //If it's a TILE, it can allow slices as child, if its parent is not a slice, or wavefront rows if there is no other children //If it's a SLICE, it can allow tiles as child, if its parent is not a tile, or wavefront rows if there is no other children //If it's a WAVEFRONT_ROW, it doesn't allow any children int children_allow_wavefront_row = 0; int children_allow_slice = 0; int children_allow_tile = 0; int range_start; // First index of this encoder state in tile scan order. int start_in_ts; // Index of the first LCU after this state in tile scan order. int end_in_ts; switch(child_state->type) { case ENCODER_STATE_TYPE_MAIN: children_allow_slice = 1; children_allow_tile = 1; start_in_ts = 0; end_in_ts = child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu; encoder_state_main_init(child_state); break; case ENCODER_STATE_TYPE_SLICE: assert(child_state->parent); if (child_state->parent->type != ENCODER_STATE_TYPE_TILE) children_allow_tile = 1; start_in_ts = child_state->slice->start_in_ts; end_in_ts = child_state->slice->end_in_ts + 1; int num_wpp_rows = (end_in_ts - start_in_ts) / child_state->tile->frame->width_in_lcu; children_allow_wavefront_row = encoder->cfg.wpp && num_wpp_rows > 1; break; case ENCODER_STATE_TYPE_TILE: assert(child_state->parent); if (child_state->parent->type != ENCODER_STATE_TYPE_SLICE) children_allow_slice = 1; children_allow_wavefront_row = encoder->cfg.wpp && child_state->tile->frame->height_in_lcu > 1; start_in_ts = child_state->tile->lcu_offset_in_ts; end_in_ts = child_state->tile->lcu_offset_in_ts + child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu; break; case ENCODER_STATE_TYPE_WAVEFRONT_ROW: //GCC tries to be too clever... start_in_ts = -1; end_in_ts = -1; break; default: fprintf(stderr, "Invalid encoder_state->type %d!\n", child_state->type); assert(0); return 0; } range_start = start_in_ts; //printf("%c-%p: start_in_ts=%d, end_in_ts=%d\n",child_state->type, child_state, start_in_ts, end_in_ts); while (range_start < end_in_ts && (children_allow_slice || children_allow_tile)) { encoder_state_t *new_child = NULL; int range_end_slice = range_start; //Will be incremented to get the range of the "thing" int range_end_tile = range_start; //Will be incremented to get the range of the "thing" int tile_allowed = uvg_lcu_at_tile_start(encoder, range_start) && children_allow_tile; int slice_allowed = uvg_lcu_at_slice_start(encoder, range_start) && children_allow_slice; //Find the smallest structure following the cursor if (slice_allowed) { while(!uvg_lcu_at_slice_end(encoder, range_end_slice)) { ++range_end_slice; } } if (tile_allowed) { while(!uvg_lcu_at_tile_end(encoder, range_end_tile)) { ++range_end_tile; } } //printf("range_start=%d, range_end_slice=%d, range_end_tile=%d, tile_allowed=%d, slice_allowed=%d end_in_ts=%d\n",range_start,range_end_slice,range_end_tile,tile_allowed,slice_allowed,end_in_ts); if ((!tile_allowed || (range_end_slice >= range_end_tile)) && !new_child && slice_allowed) { //Create a slice new_child = &child_state->children[child_count]; new_child->encoder_control = encoder; new_child->type = ENCODER_STATE_TYPE_SLICE; new_child->frame = child_state->frame; new_child->tile = child_state->tile; new_child->wfrow = child_state->wfrow; new_child->slice = MALLOC(encoder_state_config_slice_t, 1); if (!new_child->slice || !encoder_state_config_slice_init(new_child, range_start, range_end_slice)) { fprintf(stderr, "Could not initialize encoder_state->slice!\n"); return 0; } } if ((!slice_allowed || (range_end_slice < range_end_tile)) && !new_child && tile_allowed) { //Create a tile int tile_id = encoder->tiles_tile_id[range_start]; int tile_x = tile_id % encoder->cfg.tiles_width_count; int tile_y = tile_id / encoder->cfg.tiles_width_count; int lcu_offset_x = encoder->tiles_col_bd[tile_x]; int lcu_offset_y = encoder->tiles_row_bd[tile_y]; int width_in_lcu = encoder->tiles_col_bd[tile_x+1]-encoder->tiles_col_bd[tile_x]; int height_in_lcu = encoder->tiles_row_bd[tile_y+1]-encoder->tiles_row_bd[tile_y]; int width = MIN(width_in_lcu * LCU_WIDTH, encoder->in.width - lcu_offset_x * LCU_WIDTH); int height = MIN(height_in_lcu * LCU_WIDTH, encoder->in.height - lcu_offset_y * LCU_WIDTH); new_child = &child_state->children[child_count]; new_child->encoder_control = encoder; new_child->type = ENCODER_STATE_TYPE_TILE; new_child->frame = child_state->frame; new_child->tile = MALLOC(encoder_state_config_tile_t, 1); new_child->slice = child_state->slice; new_child->wfrow = child_state->wfrow; if (!new_child->tile || !encoder_state_config_tile_init(new_child, lcu_offset_x, lcu_offset_y, width, height, width_in_lcu, height_in_lcu)) { fprintf(stderr, "Could not initialize encoder_state->tile!\n"); return 0; } } if (new_child) { child_state->children = realloc(child_state->children, sizeof(encoder_state_t) * (2+child_count)); if (!child_state->children) { fprintf(stderr, "Failed to allocate memory for children...\n"); return 0; } child_state->children[1 + child_count].encoder_control = NULL; //Fix children parent (since we changed the address), except for the last one which is not ready yet { uint32_t i, j; for (i = 0; child_state->children[i].encoder_control && i < child_count; ++i) { for (j = 0; child_state->children[i].children[j].encoder_control; ++j) { child_state->children[i].children[j].parent = &child_state->children[i]; } for (j = 0; j < child_state->children[i].lcu_order_count; ++j) { child_state->children[i].lcu_order[j].encoder_state = &child_state->children[i]; } child_state->children[i].cabac.stream = &child_state->children[i].stream; } } if (!uvg_encoder_state_init(&child_state->children[child_count], child_state)) { fprintf(stderr, "Unable to init child...\n"); return 0; } child_count += 1; } range_start = MAX(range_end_slice, range_end_tile) + 1; } //We create wavefronts only if we have no children if (children_allow_wavefront_row && child_count == 0) { int first_row = encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] / encoder->in.width_in_lcu; int last_row = encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] / encoder->in.width_in_lcu; int num_rows; int i; assert(!(children_allow_slice || children_allow_tile)); assert(child_count == 0); for (i=start_in_ts; itiles_ctb_addr_ts_to_rs[i] / encoder->in.width_in_lcu; if (row < first_row) first_row = row; if (row > last_row) last_row = row; } num_rows = last_row - first_row + 1; //When entropy_coding_sync_enabled_flag is equal to 1 and the first coding tree block in a slice is not the first coding //tree block of a row of coding tree blocks in a tile, it is a requirement of bitstream conformance that the last coding tree //block in the slice shall belong to the same row of coding tree blocks as the first coding tree block in the slice. if (encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] % encoder->in.width_in_lcu != child_state->tile->lcu_offset_x) { if (num_rows > 1) { fprintf(stderr, "Invalid: first CTB in slice %d is not at the tile %d edge, and the slice spans on more than one row.\n", child_state->slice->id, child_state->tile->id); return 0; } } //FIXME Do the same kind of check if we implement slice segments child_count = num_rows; child_state->children = realloc(child_state->children, sizeof(encoder_state_t) * (num_rows + 1)); child_state->children[num_rows].encoder_control = NULL; for (i=0; i < num_rows; ++i) { encoder_state_t *new_child = &child_state->children[i]; new_child->encoder_control = encoder; new_child->type = ENCODER_STATE_TYPE_WAVEFRONT_ROW; new_child->frame = child_state->frame; new_child->tile = child_state->tile; new_child->slice = child_state->slice; new_child->wfrow = MALLOC(encoder_state_config_wfrow_t, 1); if (!new_child->wfrow || !encoder_state_config_wfrow_init(new_child, i)) { fprintf(stderr, "Could not initialize encoder_state->wfrow!\n"); return 0; } if (!uvg_encoder_state_init(new_child, child_state)) { fprintf(stderr, "Unable to init child...\n"); return 0; } } } child_state->is_leaf = (child_count == 0); //This node is a leaf, compute LCU-order if (child_state->is_leaf) { //All LCU computations are relative to the tile //Remark: this could be optimized, but since it's run only once, it's better to do it in a understandable way. //By default, the full tile int lcu_id; int lcu_start = 0; //End is the element AFTER the end (iterate < lcu_end) int lcu_end = child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu; //Restrict to the current slice if needed lcu_start = MAX(lcu_start, child_state->slice->start_in_ts - child_state->tile->lcu_offset_in_ts); lcu_end = MIN(lcu_end, child_state->slice->end_in_ts - child_state->tile->lcu_offset_in_ts + 1); //Restrict to the current wavefront row if needed if (child_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) { lcu_start = MAX(lcu_start, (child_state->wfrow->lcu_offset_y) * child_state->tile->frame->width_in_lcu); lcu_end = MIN(lcu_end, (child_state->wfrow->lcu_offset_y + 1) * child_state->tile->frame->width_in_lcu); } child_state->lcu_order_count = lcu_end - lcu_start; child_state->lcu_order = MALLOC(lcu_order_element_t, child_state->lcu_order_count); assert(child_state->lcu_order); for (uint32_t i = 0; i < child_state->lcu_order_count; ++i) { lcu_id = lcu_start + i; child_state->lcu_order[i].encoder_state = child_state; child_state->lcu_order[i].id = lcu_id; child_state->lcu_order[i].index = i; child_state->lcu_order[i].position.x = lcu_id % child_state->tile->frame->width_in_lcu; child_state->lcu_order[i].position.y = lcu_id / child_state->tile->frame->width_in_lcu; child_state->lcu_order[i].position_px.x = child_state->lcu_order[i].position.x * LCU_WIDTH; child_state->lcu_order[i].position_px.y = child_state->lcu_order[i].position.y * LCU_WIDTH; child_state->lcu_order[i].size.x = MIN(LCU_WIDTH, encoder->in.width - (child_state->tile->lcu_offset_x * LCU_WIDTH + child_state->lcu_order[i].position_px.x)); child_state->lcu_order[i].size.y = MIN(LCU_WIDTH, encoder->in.height - (child_state->tile->lcu_offset_y * LCU_WIDTH + child_state->lcu_order[i].position_px.y)); child_state->lcu_order[i].first_row = uvg_lcu_in_first_row(child_state, child_state->tile->lcu_offset_in_ts + lcu_id); child_state->lcu_order[i].last_row = uvg_lcu_in_last_row(child_state, child_state->tile->lcu_offset_in_ts + lcu_id); child_state->lcu_order[i].first_column = uvg_lcu_in_first_column(child_state, child_state->tile->lcu_offset_in_ts + lcu_id); child_state->lcu_order[i].last_column = uvg_lcu_in_last_column(child_state, child_state->tile->lcu_offset_in_ts + lcu_id); child_state->lcu_order[i].above = NULL; child_state->lcu_order[i].below = NULL; child_state->lcu_order[i].left = NULL; child_state->lcu_order[i].right = NULL; if (!child_state->lcu_order[i].first_row) { //Find LCU above if (child_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) { uint32_t j; //For all previous wavefront rows for (j=0; &child_state->parent->children[j] != child_state && child_state->parent->children[j].encoder_control; ++j) { if (child_state->parent->children[j].wfrow->lcu_offset_y == child_state->wfrow->lcu_offset_y - 1) { uint32_t k; for (k=0; k < child_state->parent->children[j].lcu_order_count; ++k) { if (child_state->parent->children[j].lcu_order[k].position.x == child_state->lcu_order[i].position.x) { assert(child_state->parent->children[j].lcu_order[k].position.y == child_state->lcu_order[i].position.y - 1); child_state->lcu_order[i].above = &child_state->parent->children[j].lcu_order[k]; } } } } } else { child_state->lcu_order[i].above = &child_state->lcu_order[i-child_state->tile->frame->width_in_lcu]; } assert(child_state->lcu_order[i].above); child_state->lcu_order[i].above->below = &child_state->lcu_order[i]; } if (!child_state->lcu_order[i].first_column) { child_state->lcu_order[i].left = &child_state->lcu_order[i-1]; assert(child_state->lcu_order[i].left->position.x == child_state->lcu_order[i].position.x - 1); child_state->lcu_order[i].left->right = &child_state->lcu_order[i]; } } } else { child_state->lcu_order_count = 0; child_state->lcu_order = NULL; } } //Validate the structure if (child_state->type == ENCODER_STATE_TYPE_TILE) { if (child_state->tile->lcu_offset_in_ts < child_state->slice->start_in_ts) { fprintf(stderr, "Tile %d starts before slice %d, in which it should be included!\n", child_state->tile->id, child_state->slice->id); return 0; } if (child_state->tile->lcu_offset_in_ts + child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu - 1 > child_state->slice->end_in_ts) { fprintf(stderr, "Tile %d ends after slice %d, in which it should be included!\n", child_state->tile->id, child_state->slice->id); return 0; } } if (child_state->type == ENCODER_STATE_TYPE_SLICE) { if (child_state->slice->start_in_ts < child_state->tile->lcu_offset_in_ts) { fprintf(stderr, "Slice %d starts before tile %d, in which it should be included!\n", child_state->slice->id, child_state->tile->id); return 0; } if (child_state->slice->end_in_ts > child_state->tile->lcu_offset_in_ts + child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu - 1) { fprintf(stderr, "Slice %d ends after tile %d, in which it should be included!\n", child_state->slice->id, child_state->tile->id); return 0; } } #ifdef UVG_DEBUG_PRINT_THREADING_INFO if (!parent_state) uvg_dbg_encoder_state_dump_graphviz(child_state); #endif //UVG_DEBUG_PRINT_THREADING_INFO return 1; } void uvg_encoder_state_finalize(encoder_state_t * const state) { if (state->children) { int i=0; for (i = 0; state->children[i].encoder_control; ++i) { uvg_encoder_state_finalize(&state->children[i]); } FREE_POINTER(state->children); } if (state->type == ENCODER_STATE_TYPE_MAIN) { encoder_state_main_finalize(state); } FREE_POINTER(state->lcu_order); state->lcu_order_count = 0; if (!state->parent || (state->parent->wfrow != state->wfrow)) { FREE_POINTER(state->wfrow); } if (!state->parent || (state->parent->slice != state->slice)) { FREE_POINTER(state->slice); } if (!state->parent || (state->parent->tile != state->tile)) { encoder_state_config_tile_finalize(state); FREE_POINTER(state->tile); } if (!state->parent || (state->parent->frame != state->frame)) { encoder_state_config_frame_finalize(state); FREE_POINTER(state->frame); } if (state->constraint) { // End of the constraint structure uvg_constraint_free(state); } uvg_bitstream_finalize(&state->stream); uvg_threadqueue_free_job(&state->tqj_recon_done); uvg_threadqueue_free_job(&state->tqj_bitstream_written); if (state->encoder_control->cfg.alf_type && state->encoder_control->cfg.wpp) { encoder_state_t* parent = state; while (parent->parent) parent = parent->parent; uvg_threadqueue_free_job(&parent->tqj_alf_process); } }