mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 12:44:07 +00:00
778 lines
34 KiB
C
778 lines
34 KiB
C
/*****************************************************************************
|
|
* This file is part of uvg266 VVC encoder.
|
|
*
|
|
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
* list of conditions and the following disclaimer in the documentation and/or
|
|
* other materials provided with the distribution.
|
|
*
|
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
****************************************************************************/
|
|
|
|
#include "encoder_state-ctors_dtors.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "bitstream.h"
|
|
#include "cabac.h"
|
|
#include "cu.h"
|
|
#include "debug.h"
|
|
#include "encoder.h"
|
|
#include "encoder_state-geometry.h"
|
|
#include "encoderstate.h"
|
|
#include "image.h"
|
|
#include "imagelist.h"
|
|
#include "uvg266.h"
|
|
#include "threadqueue.h"
|
|
#include "videoframe.h"
|
|
#include "rate_control.h"
|
|
#include "alf.h"
|
|
#include "reshape.h"
|
|
|
|
|
|
static int encoder_state_config_frame_init(encoder_state_t * const state) {
|
|
state->frame->ref = uvg_image_list_alloc(MAX_REF_PIC_COUNT);
|
|
if(!state->frame->ref) {
|
|
fprintf(stderr, "Failed to allocate the picture list!\n");
|
|
return 0;
|
|
}
|
|
state->frame->ref_list = REF_PIC_LIST_0;
|
|
state->frame->num = 0;
|
|
state->frame->poc = 0;
|
|
state->frame->total_bits_coded = 0;
|
|
state->frame->cur_frame_bits_coded = 0;
|
|
state->frame->cur_gop_bits_coded = 0;
|
|
state->frame->prepared = 0;
|
|
state->frame->done = 1;
|
|
|
|
state->frame->rc_alpha = 3.2003;
|
|
state->frame->rc_beta = -1.367;
|
|
state->frame->icost = 0;
|
|
|
|
const encoder_control_t * const encoder = state->encoder_control;
|
|
const int num_lcus = encoder->in.width_in_lcu * encoder->in.height_in_lcu;
|
|
state->frame->lcu_stats = calloc(num_lcus, sizeof(lcu_stats_t));
|
|
state->frame->aq_offsets = MALLOC(double, num_lcus);
|
|
|
|
for (int y = 0; y < encoder->in.height_in_lcu; y++) {
|
|
for (int x = 0; x < encoder->in.width_in_lcu; x++) {
|
|
int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64);
|
|
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels = temp;
|
|
}
|
|
}
|
|
|
|
state->frame->c_para = malloc(sizeof(double) * num_lcus);
|
|
if(state->frame->c_para == NULL) {
|
|
return 0;
|
|
}
|
|
state->frame->k_para = malloc(sizeof(double) * num_lcus);
|
|
if (state->frame->k_para == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
pthread_mutex_init(&state->frame->rc_lock, NULL);
|
|
|
|
state->frame->new_ratecontrol = uvg_get_rc_data(NULL);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
|
|
if (state->frame == NULL) return;
|
|
|
|
pthread_mutex_destroy(&state->frame->rc_lock);
|
|
if (state->frame->c_para) FREE_POINTER(state->frame->c_para);
|
|
if (state->frame->k_para) FREE_POINTER(state->frame->k_para);
|
|
|
|
uvg_image_list_destroy(state->frame->ref);
|
|
FREE_POINTER(state->frame->lcu_stats);
|
|
FREE_POINTER(state->frame->aq_offsets);
|
|
|
|
}
|
|
|
|
static int encoder_state_config_tile_init(encoder_state_t * const state,
|
|
const int lcu_offset_x, const int lcu_offset_y,
|
|
const int width, const int height, const int width_in_lcu, const int height_in_lcu) {
|
|
|
|
const encoder_control_t * const encoder = state->encoder_control;
|
|
state->tile->frame = uvg_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type, encoder->cfg.cclm);
|
|
|
|
state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
|
|
state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu);
|
|
|
|
// Allocate the HMVP for IBC in any case
|
|
state->tile->frame->hmvp_lut_ibc = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
|
|
state->tile->frame->hmvp_size_ibc = calloc(1, sizeof(uint8_t) * height_in_lcu);
|
|
|
|
if (state->encoder_control->cfg.ibc) {
|
|
// Allocate pixel buffer for each LCU row
|
|
state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
|
state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
|
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
|
|
state->tile->frame->ibc_hashmap_row = malloc(sizeof(uvg_hashmap_t) * state->tile->frame->height_in_lcu);
|
|
|
|
if (state->encoder_control->cfg.ibc & 2) {
|
|
state->tile->frame->ibc_hashmap_pos_to_hash_stride = ((state->tile->frame->width+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE);
|
|
state->tile->frame->ibc_hashmap_pos_to_hash = malloc(sizeof(uint32_t) *
|
|
((state->tile->frame->height+UVG_HASHMAP_BLOCKSIZE-1)/ UVG_HASHMAP_BLOCKSIZE) * state->tile->frame->ibc_hashmap_pos_to_hash_stride);
|
|
}
|
|
|
|
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
|
|
state->tile->frame->ibc_hashmap_row[i] = uvg_hashmap_create((LCU_WIDTH * IBC_BUFFER_WIDTH)>>2);
|
|
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
|
|
state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE];
|
|
state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2];
|
|
}
|
|
}
|
|
|
|
state->tile->frame->rec = NULL;
|
|
|
|
state->tile->frame->source = NULL;
|
|
|
|
if (!state->tile->frame) {
|
|
printf("Error allocating videoframe!\r\n");
|
|
return 0;
|
|
}
|
|
|
|
state->tile->lcu_offset_x = lcu_offset_x;
|
|
state->tile->lcu_offset_y = lcu_offset_y;
|
|
state->tile->offset_x = lcu_offset_x * LCU_WIDTH;
|
|
state->tile->offset_y = lcu_offset_y * LCU_WIDTH;
|
|
|
|
state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu];
|
|
|
|
// hor_buf_search and ver_buf_search store single row/col from each LCU row/col.
|
|
// Because these lines are independent, the chroma subsampling only matters in one
|
|
// of the directions, .
|
|
unsigned luma_size = LCU_WIDTH * state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu;
|
|
unsigned chroma_sizes_hor[] = { 0, luma_size / 2, luma_size / 2, luma_size };
|
|
unsigned chroma_sizes_ver[] = { 0, luma_size / 2, luma_size, luma_size };
|
|
unsigned chroma_size_hor = chroma_sizes_hor[state->encoder_control->chroma_format];
|
|
unsigned chroma_size_ver = chroma_sizes_ver[state->encoder_control->chroma_format];
|
|
|
|
state->tile->hor_buf_search = uvg_yuv_t_alloc(luma_size, chroma_size_hor);
|
|
state->tile->ver_buf_search = uvg_yuv_t_alloc(luma_size, chroma_size_ver);
|
|
|
|
if (encoder->cfg.sao_type) {
|
|
state->tile->hor_buf_before_sao = uvg_yuv_t_alloc(luma_size, chroma_size_hor);
|
|
state->tile->ver_buf_before_sao = uvg_yuv_t_alloc(luma_size, chroma_size_ver);
|
|
} else {
|
|
state->tile->hor_buf_before_sao = NULL;
|
|
state->tile->ver_buf_before_sao = NULL;
|
|
}
|
|
|
|
if (encoder->cfg.wpp) {
|
|
int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu;
|
|
state->tile->wf_jobs = MALLOC(threadqueue_job_t*, num_jobs);
|
|
state->tile->wf_recon_jobs = MALLOC(threadqueue_job_t*, num_jobs);
|
|
for (int i = 0; i < num_jobs; ++i) {
|
|
state->tile->wf_jobs[i] = NULL;
|
|
state->tile->wf_recon_jobs[i] = NULL;
|
|
}
|
|
if (!state->tile->wf_jobs) {
|
|
printf("Error allocating wf_jobs array!\n");
|
|
return 0;
|
|
}
|
|
} else {
|
|
state->tile->wf_jobs = NULL;
|
|
state->tile->wf_recon_jobs = NULL;
|
|
}
|
|
state->tile->id = encoder->tiles_tile_id[state->tile->lcu_offset_in_ts];
|
|
return 1;
|
|
}
|
|
|
|
static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
|
|
if (state->tile == NULL) return;
|
|
|
|
uvg_yuv_t_free(state->tile->hor_buf_search);
|
|
uvg_yuv_t_free(state->tile->ver_buf_search);
|
|
uvg_yuv_t_free(state->tile->hor_buf_before_sao);
|
|
uvg_yuv_t_free(state->tile->ver_buf_before_sao);
|
|
|
|
if (state->encoder_control->cfg.wpp) {
|
|
int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu;
|
|
for (int i = 0; i < num_jobs; ++i) {
|
|
uvg_threadqueue_free_job(&state->tile->wf_jobs[i]);
|
|
uvg_threadqueue_free_job(&state->tile->wf_recon_jobs[i]);
|
|
}
|
|
}
|
|
|
|
FREE_POINTER(state->tile->frame->hmvp_lut);
|
|
FREE_POINTER(state->tile->frame->hmvp_size);
|
|
|
|
FREE_POINTER(state->tile->frame->hmvp_lut_ibc);
|
|
FREE_POINTER(state->tile->frame->hmvp_size_ibc);
|
|
|
|
if (state->encoder_control->cfg.ibc) {
|
|
if (state->encoder_control->cfg.ibc & 2) {
|
|
FREE_POINTER(state->tile->frame->ibc_hashmap_pos_to_hash);
|
|
}
|
|
|
|
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
|
|
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
|
|
uvg_hashmap_free(state->tile->frame->ibc_hashmap_row[i]);
|
|
}
|
|
FREE_POINTER(state->tile->frame->ibc_hashmap_row);
|
|
FREE_POINTER(state->tile->frame->ibc_buffer_y);
|
|
FREE_POINTER(state->tile->frame->ibc_buffer_u);
|
|
FREE_POINTER(state->tile->frame->ibc_buffer_v);
|
|
}
|
|
|
|
uvg_videoframe_free(state->tile->frame);
|
|
state->tile->frame = NULL;
|
|
FREE_POINTER(state->tile->wf_jobs);
|
|
FREE_POINTER(state->tile->wf_recon_jobs);
|
|
}
|
|
|
|
static int encoder_state_config_slice_init(encoder_state_t * const state,
|
|
const int start_address_in_ts,
|
|
const int end_address_in_ts)
|
|
{
|
|
state->slice->id = -1;
|
|
for (int i = 0; i < state->encoder_control->slice_count; ++i) {
|
|
if (state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) {
|
|
state->slice->id = i;
|
|
break;
|
|
}
|
|
}
|
|
assert(state->slice->id != -1);
|
|
|
|
state->slice->start_in_ts = start_address_in_ts;
|
|
state->slice->end_in_ts = end_address_in_ts;
|
|
|
|
state->slice->start_in_rs = state->encoder_control->tiles_ctb_addr_ts_to_rs[start_address_in_ts];
|
|
state->slice->end_in_rs = state->encoder_control->tiles_ctb_addr_ts_to_rs[end_address_in_ts];
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int encoder_state_config_wfrow_init(encoder_state_t * const state,
|
|
const int lcu_offset_y) {
|
|
|
|
state->wfrow->lcu_offset_y = lcu_offset_y;
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* \brief Initializer for main thread related things
|
|
mostly arrays that are only needed one per frame
|
|
* \param state encoder state
|
|
* \returns int
|
|
*/
|
|
static int encoder_state_main_init(encoder_state_t* const state) {
|
|
|
|
uint32_t lcus_in_frame = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu;
|
|
state->tile->frame->lmcs_aps = calloc(1, sizeof(lmcs_aps));
|
|
state->tile->frame->lmcs_avg_processed = calloc(1, lcus_in_frame * sizeof(int8_t));
|
|
state->tile->frame->lmcs_avg = calloc(1, lcus_in_frame * sizeof(int32_t));
|
|
|
|
if (state->encoder_control->cfg.alf_type) {
|
|
state->slice->alf = malloc(sizeof(*state->slice->alf));
|
|
|
|
state->slice->alf->apss = malloc(sizeof(alf_aps) * ALF_CTB_MAX_NUM_APS);
|
|
state->slice->alf->tile_group_luma_aps_id = malloc(ALF_CTB_MAX_NUM_APS * sizeof(int8_t));
|
|
state->slice->alf->cc_filter_param = malloc(sizeof(*state->slice->alf->cc_filter_param));
|
|
for (int aps_idx = 0; aps_idx < ALF_CTB_MAX_NUM_APS; aps_idx++) {
|
|
state->slice->alf->tile_group_luma_aps_id[aps_idx] = -1;
|
|
}
|
|
state->slice->alf->tile_group_num_aps = -1;
|
|
state->slice->alf->tile_group_chroma_aps_id = -1;
|
|
state->slice->alf->tile_group_cc_alf_cb_enabled_flag = 0;
|
|
state->slice->alf->tile_group_cc_alf_cr_enabled_flag = 0;
|
|
state->slice->alf->tile_group_cc_alf_cb_aps_id = -1;
|
|
state->slice->alf->tile_group_cc_alf_cr_aps_id = -1;
|
|
state->slice->alf->num_of_param_sets = 0;
|
|
memset(state->slice->alf->tile_group_alf_enabled_flag, 0, sizeof(state->slice->alf->tile_group_alf_enabled_flag));
|
|
if (state->encoder_control->cfg.alf_type == UVG_ALF_FULL) {
|
|
uvg_reset_cc_alf_aps_param(state->slice->alf->cc_filter_param);
|
|
}
|
|
|
|
state->tile->frame->alf_info = MALLOC(alf_info_t, 1);
|
|
uvg_alf_create(state->tile->frame, state->encoder_control->chroma_format);
|
|
uvg_set_aps_map(state->tile->frame, state->encoder_control->cfg.alf_type);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int encoder_state_main_finalize(encoder_state_t* const state) {
|
|
|
|
FREE_POINTER(state->tile->frame->lmcs_aps);
|
|
FREE_POINTER(state->tile->frame->lmcs_avg_processed);
|
|
FREE_POINTER(state->tile->frame->lmcs_avg);
|
|
|
|
if (state->encoder_control->cfg.alf_type) {
|
|
if (state->slice->alf->apss != NULL) {
|
|
FREE_POINTER(state->slice->alf->apss);
|
|
}
|
|
if (state->slice->alf->tile_group_luma_aps_id != NULL) {
|
|
FREE_POINTER(state->slice->alf->tile_group_luma_aps_id);
|
|
}
|
|
if (state->slice->alf->cc_filter_param != NULL) {
|
|
FREE_POINTER(state->slice->alf->cc_filter_param);
|
|
}
|
|
FREE_POINTER(state->slice->alf);
|
|
|
|
uvg_alf_destroy(state->tile->frame);
|
|
FREE_POINTER(state->tile->frame->alf_info);
|
|
FREE_POINTER(state->tile->frame->alf_param_set_map);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int uvg_encoder_state_init(encoder_state_t * const child_state, encoder_state_t * const parent_state) {
|
|
//We require that, if parent_state is NULL:
|
|
//child_state->encoder_control is set
|
|
//
|
|
//If parent_state is not NULL, the following variable should either be set to NULL,
|
|
//in order to inherit from parent, or should point to a valid structure:
|
|
//child_state->frame
|
|
//child_state->tile
|
|
//child_state->slice
|
|
//child_state->wfrow
|
|
|
|
child_state->parent = parent_state;
|
|
child_state->children = MALLOC(encoder_state_t, 1);
|
|
child_state->children[0].encoder_control = NULL;
|
|
child_state->must_code_qp_delta = false;
|
|
child_state->tqj_bitstream_written = NULL;
|
|
child_state->tqj_recon_done = NULL;
|
|
child_state->tqj_alf_process = NULL;
|
|
|
|
if (!parent_state) {
|
|
const encoder_control_t * const encoder = child_state->encoder_control;
|
|
child_state->type = ENCODER_STATE_TYPE_MAIN;
|
|
assert(child_state->encoder_control);
|
|
child_state->frame = MALLOC(encoder_state_config_frame_t, 1);
|
|
if (!child_state->frame || !encoder_state_config_frame_init(child_state)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->frame!\n");
|
|
return 0;
|
|
}
|
|
child_state->tile = MALLOC(encoder_state_config_tile_t, 1);
|
|
if (!child_state->tile || !encoder_state_config_tile_init(child_state, 0, 0, encoder->in.width, encoder->in.height, encoder->in.width_in_lcu, encoder->in.height_in_lcu)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->tile!\n");
|
|
return 0;
|
|
}
|
|
|
|
child_state->slice = MALLOC(encoder_state_config_slice_t, 1);
|
|
if (!child_state->slice || !encoder_state_config_slice_init(child_state, 0, encoder->in.width_in_lcu * encoder->in.height_in_lcu - 1)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->slice!\n");
|
|
return 0;
|
|
}
|
|
child_state->wfrow = MALLOC(encoder_state_config_wfrow_t, 1);
|
|
if (!child_state->wfrow || !encoder_state_config_wfrow_init(child_state, 0)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->wfrow!\n");
|
|
return 0;
|
|
}
|
|
} else {
|
|
child_state->encoder_control = parent_state->encoder_control;
|
|
if (!child_state->frame) child_state->frame = parent_state->frame;
|
|
if (!child_state->tile) child_state->tile = parent_state->tile;
|
|
if (!child_state->slice) child_state->slice = parent_state->slice;
|
|
if (!child_state->wfrow) child_state->wfrow = parent_state->wfrow;
|
|
}
|
|
|
|
// Intialization of the constraint structure
|
|
child_state->constraint = uvg_init_constraint(child_state->constraint, child_state->encoder_control);
|
|
|
|
uvg_bitstream_init(&child_state->stream);
|
|
|
|
// Set CABAC output bitstream
|
|
child_state->cabac.stream = &child_state->stream;
|
|
|
|
//Create sub-encoders
|
|
{
|
|
const encoder_control_t * const encoder = child_state->encoder_control;
|
|
uint32_t child_count = 0;
|
|
//We first check the type of this element.
|
|
//If it's a MAIN, it can allow both slices or tiles as child
|
|
//If it's a TILE, it can allow slices as child, if its parent is not a slice, or wavefront rows if there is no other children
|
|
//If it's a SLICE, it can allow tiles as child, if its parent is not a tile, or wavefront rows if there is no other children
|
|
//If it's a WAVEFRONT_ROW, it doesn't allow any children
|
|
int children_allow_wavefront_row = 0;
|
|
int children_allow_slice = 0;
|
|
int children_allow_tile = 0;
|
|
int range_start;
|
|
|
|
// First index of this encoder state in tile scan order.
|
|
int start_in_ts;
|
|
// Index of the first LCU after this state in tile scan order.
|
|
int end_in_ts;
|
|
|
|
switch(child_state->type) {
|
|
case ENCODER_STATE_TYPE_MAIN:
|
|
children_allow_slice = 1;
|
|
children_allow_tile = 1;
|
|
start_in_ts = 0;
|
|
end_in_ts = child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu;
|
|
|
|
encoder_state_main_init(child_state);
|
|
|
|
break;
|
|
case ENCODER_STATE_TYPE_SLICE:
|
|
assert(child_state->parent);
|
|
if (child_state->parent->type != ENCODER_STATE_TYPE_TILE) children_allow_tile = 1;
|
|
start_in_ts = child_state->slice->start_in_ts;
|
|
end_in_ts = child_state->slice->end_in_ts + 1;
|
|
int num_wpp_rows = (end_in_ts - start_in_ts) / child_state->tile->frame->width_in_lcu;
|
|
children_allow_wavefront_row = encoder->cfg.wpp && num_wpp_rows > 1;
|
|
break;
|
|
case ENCODER_STATE_TYPE_TILE:
|
|
assert(child_state->parent);
|
|
if (child_state->parent->type != ENCODER_STATE_TYPE_SLICE) children_allow_slice = 1;
|
|
children_allow_wavefront_row =
|
|
encoder->cfg.wpp && child_state->tile->frame->height_in_lcu > 1;
|
|
start_in_ts = child_state->tile->lcu_offset_in_ts;
|
|
end_in_ts = child_state->tile->lcu_offset_in_ts + child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu;
|
|
break;
|
|
case ENCODER_STATE_TYPE_WAVEFRONT_ROW:
|
|
//GCC tries to be too clever...
|
|
start_in_ts = -1;
|
|
end_in_ts = -1;
|
|
break;
|
|
default:
|
|
fprintf(stderr, "Invalid encoder_state->type %d!\n", child_state->type);
|
|
assert(0);
|
|
return 0;
|
|
}
|
|
|
|
range_start = start_in_ts;
|
|
//printf("%c-%p: start_in_ts=%d, end_in_ts=%d\n",child_state->type, child_state, start_in_ts, end_in_ts);
|
|
while (range_start < end_in_ts && (children_allow_slice || children_allow_tile)) {
|
|
encoder_state_t *new_child = NULL;
|
|
int range_end_slice = range_start; //Will be incremented to get the range of the "thing"
|
|
int range_end_tile = range_start; //Will be incremented to get the range of the "thing"
|
|
|
|
int tile_allowed = uvg_lcu_at_tile_start(encoder, range_start) && children_allow_tile;
|
|
int slice_allowed = uvg_lcu_at_slice_start(encoder, range_start) && children_allow_slice;
|
|
|
|
//Find the smallest structure following the cursor
|
|
if (slice_allowed) {
|
|
while(!uvg_lcu_at_slice_end(encoder, range_end_slice)) {
|
|
++range_end_slice;
|
|
}
|
|
}
|
|
|
|
if (tile_allowed) {
|
|
while(!uvg_lcu_at_tile_end(encoder, range_end_tile)) {
|
|
++range_end_tile;
|
|
}
|
|
}
|
|
|
|
//printf("range_start=%d, range_end_slice=%d, range_end_tile=%d, tile_allowed=%d, slice_allowed=%d end_in_ts=%d\n",range_start,range_end_slice,range_end_tile,tile_allowed,slice_allowed,end_in_ts);
|
|
|
|
if ((!tile_allowed || (range_end_slice >= range_end_tile)) && !new_child && slice_allowed) {
|
|
//Create a slice
|
|
new_child = &child_state->children[child_count];
|
|
new_child->encoder_control = encoder;
|
|
new_child->type = ENCODER_STATE_TYPE_SLICE;
|
|
new_child->frame = child_state->frame;
|
|
new_child->tile = child_state->tile;
|
|
new_child->wfrow = child_state->wfrow;
|
|
new_child->slice = MALLOC(encoder_state_config_slice_t, 1);
|
|
if (!new_child->slice || !encoder_state_config_slice_init(new_child, range_start, range_end_slice)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->slice!\n");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if ((!slice_allowed || (range_end_slice < range_end_tile)) && !new_child && tile_allowed) {
|
|
//Create a tile
|
|
int tile_id = encoder->tiles_tile_id[range_start];
|
|
int tile_x = tile_id % encoder->cfg.tiles_width_count;
|
|
int tile_y = tile_id / encoder->cfg.tiles_width_count;
|
|
|
|
int lcu_offset_x = encoder->tiles_col_bd[tile_x];
|
|
int lcu_offset_y = encoder->tiles_row_bd[tile_y];
|
|
int width_in_lcu = encoder->tiles_col_bd[tile_x+1]-encoder->tiles_col_bd[tile_x];
|
|
int height_in_lcu = encoder->tiles_row_bd[tile_y+1]-encoder->tiles_row_bd[tile_y];
|
|
int width = MIN(width_in_lcu * LCU_WIDTH, encoder->in.width - lcu_offset_x * LCU_WIDTH);
|
|
int height = MIN(height_in_lcu * LCU_WIDTH, encoder->in.height - lcu_offset_y * LCU_WIDTH);
|
|
|
|
new_child = &child_state->children[child_count];
|
|
new_child->encoder_control = encoder;
|
|
new_child->type = ENCODER_STATE_TYPE_TILE;
|
|
new_child->frame = child_state->frame;
|
|
new_child->tile = MALLOC(encoder_state_config_tile_t, 1);
|
|
new_child->slice = child_state->slice;
|
|
new_child->wfrow = child_state->wfrow;
|
|
|
|
if (!new_child->tile || !encoder_state_config_tile_init(new_child, lcu_offset_x, lcu_offset_y, width, height, width_in_lcu, height_in_lcu)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->tile!\n");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (new_child) {
|
|
child_state->children = realloc(child_state->children, sizeof(encoder_state_t) * (2+child_count));
|
|
if (!child_state->children) {
|
|
fprintf(stderr, "Failed to allocate memory for children...\n");
|
|
return 0;
|
|
}
|
|
|
|
child_state->children[1 + child_count].encoder_control = NULL;
|
|
|
|
//Fix children parent (since we changed the address), except for the last one which is not ready yet
|
|
{
|
|
uint32_t i, j;
|
|
for (i = 0; child_state->children[i].encoder_control && i < child_count; ++i) {
|
|
for (j = 0; child_state->children[i].children[j].encoder_control; ++j) {
|
|
child_state->children[i].children[j].parent = &child_state->children[i];
|
|
}
|
|
for (j = 0; j < child_state->children[i].lcu_order_count; ++j) {
|
|
child_state->children[i].lcu_order[j].encoder_state = &child_state->children[i];
|
|
}
|
|
child_state->children[i].cabac.stream = &child_state->children[i].stream;
|
|
}
|
|
}
|
|
|
|
if (!uvg_encoder_state_init(&child_state->children[child_count], child_state)) {
|
|
fprintf(stderr, "Unable to init child...\n");
|
|
return 0;
|
|
}
|
|
child_count += 1;
|
|
}
|
|
|
|
range_start = MAX(range_end_slice, range_end_tile) + 1;
|
|
}
|
|
|
|
//We create wavefronts only if we have no children
|
|
if (children_allow_wavefront_row && child_count == 0) {
|
|
int first_row = encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] / encoder->in.width_in_lcu;
|
|
int last_row = encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] / encoder->in.width_in_lcu;
|
|
int num_rows;
|
|
int i;
|
|
|
|
assert(!(children_allow_slice || children_allow_tile));
|
|
assert(child_count == 0);
|
|
|
|
for (i=start_in_ts; i<end_in_ts; ++i) {
|
|
const int row = encoder->tiles_ctb_addr_ts_to_rs[i] / encoder->in.width_in_lcu;
|
|
if (row < first_row) first_row = row;
|
|
if (row > last_row) last_row = row;
|
|
}
|
|
|
|
num_rows = last_row - first_row + 1;
|
|
|
|
//When entropy_coding_sync_enabled_flag is equal to 1 and the first coding tree block in a slice is not the first coding
|
|
//tree block of a row of coding tree blocks in a tile, it is a requirement of bitstream conformance that the last coding tree
|
|
//block in the slice shall belong to the same row of coding tree blocks as the first coding tree block in the slice.
|
|
|
|
if (encoder->tiles_ctb_addr_ts_to_rs[start_in_ts] % encoder->in.width_in_lcu != child_state->tile->lcu_offset_x) {
|
|
if (num_rows > 1) {
|
|
fprintf(stderr, "Invalid: first CTB in slice %d is not at the tile %d edge, and the slice spans on more than one row.\n", child_state->slice->id, child_state->tile->id);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
//FIXME Do the same kind of check if we implement slice segments
|
|
|
|
child_count = num_rows;
|
|
child_state->children = realloc(child_state->children, sizeof(encoder_state_t) * (num_rows + 1));
|
|
child_state->children[num_rows].encoder_control = NULL;
|
|
|
|
for (i=0; i < num_rows; ++i) {
|
|
encoder_state_t *new_child = &child_state->children[i];
|
|
|
|
new_child->encoder_control = encoder;
|
|
new_child->type = ENCODER_STATE_TYPE_WAVEFRONT_ROW;
|
|
new_child->frame = child_state->frame;
|
|
new_child->tile = child_state->tile;
|
|
new_child->slice = child_state->slice;
|
|
new_child->wfrow = MALLOC(encoder_state_config_wfrow_t, 1);
|
|
|
|
if (!new_child->wfrow || !encoder_state_config_wfrow_init(new_child, i)) {
|
|
fprintf(stderr, "Could not initialize encoder_state->wfrow!\n");
|
|
return 0;
|
|
}
|
|
|
|
if (!uvg_encoder_state_init(new_child, child_state)) {
|
|
fprintf(stderr, "Unable to init child...\n");
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
child_state->is_leaf = (child_count == 0);
|
|
//This node is a leaf, compute LCU-order
|
|
if (child_state->is_leaf) {
|
|
//All LCU computations are relative to the tile
|
|
//Remark: this could be optimized, but since it's run only once, it's better to do it in a understandable way.
|
|
|
|
//By default, the full tile
|
|
int lcu_id;
|
|
int lcu_start = 0;
|
|
//End is the element AFTER the end (iterate < lcu_end)
|
|
int lcu_end = child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu;
|
|
|
|
//Restrict to the current slice if needed
|
|
lcu_start = MAX(lcu_start, child_state->slice->start_in_ts - child_state->tile->lcu_offset_in_ts);
|
|
lcu_end = MIN(lcu_end, child_state->slice->end_in_ts - child_state->tile->lcu_offset_in_ts + 1);
|
|
|
|
//Restrict to the current wavefront row if needed
|
|
if (child_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
|
lcu_start = MAX(lcu_start, (child_state->wfrow->lcu_offset_y) * child_state->tile->frame->width_in_lcu);
|
|
lcu_end = MIN(lcu_end, (child_state->wfrow->lcu_offset_y + 1) * child_state->tile->frame->width_in_lcu);
|
|
}
|
|
|
|
child_state->lcu_order_count = lcu_end - lcu_start;
|
|
child_state->lcu_order = MALLOC(lcu_order_element_t, child_state->lcu_order_count);
|
|
assert(child_state->lcu_order);
|
|
|
|
for (uint32_t i = 0; i < child_state->lcu_order_count; ++i) {
|
|
lcu_id = lcu_start + i;
|
|
child_state->lcu_order[i].encoder_state = child_state;
|
|
child_state->lcu_order[i].id = lcu_id;
|
|
child_state->lcu_order[i].index = i;
|
|
child_state->lcu_order[i].position.x = lcu_id % child_state->tile->frame->width_in_lcu;
|
|
child_state->lcu_order[i].position.y = lcu_id / child_state->tile->frame->width_in_lcu;
|
|
child_state->lcu_order[i].position_px.x = child_state->lcu_order[i].position.x * LCU_WIDTH;
|
|
child_state->lcu_order[i].position_px.y = child_state->lcu_order[i].position.y * LCU_WIDTH;
|
|
child_state->lcu_order[i].size.x = MIN(LCU_WIDTH, encoder->in.width - (child_state->tile->lcu_offset_x * LCU_WIDTH + child_state->lcu_order[i].position_px.x));
|
|
child_state->lcu_order[i].size.y = MIN(LCU_WIDTH, encoder->in.height - (child_state->tile->lcu_offset_y * LCU_WIDTH + child_state->lcu_order[i].position_px.y));
|
|
child_state->lcu_order[i].first_row = uvg_lcu_in_first_row(child_state, child_state->tile->lcu_offset_in_ts + lcu_id);
|
|
child_state->lcu_order[i].last_row = uvg_lcu_in_last_row(child_state, child_state->tile->lcu_offset_in_ts + lcu_id);
|
|
child_state->lcu_order[i].first_column = uvg_lcu_in_first_column(child_state, child_state->tile->lcu_offset_in_ts + lcu_id);
|
|
child_state->lcu_order[i].last_column = uvg_lcu_in_last_column(child_state, child_state->tile->lcu_offset_in_ts + lcu_id);
|
|
|
|
child_state->lcu_order[i].above = NULL;
|
|
child_state->lcu_order[i].below = NULL;
|
|
child_state->lcu_order[i].left = NULL;
|
|
child_state->lcu_order[i].right = NULL;
|
|
|
|
if (!child_state->lcu_order[i].first_row) {
|
|
//Find LCU above
|
|
if (child_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) {
|
|
uint32_t j;
|
|
//For all previous wavefront rows
|
|
for (j=0; &child_state->parent->children[j] != child_state && child_state->parent->children[j].encoder_control; ++j) {
|
|
if (child_state->parent->children[j].wfrow->lcu_offset_y == child_state->wfrow->lcu_offset_y - 1) {
|
|
uint32_t k;
|
|
for (k=0; k < child_state->parent->children[j].lcu_order_count; ++k) {
|
|
if (child_state->parent->children[j].lcu_order[k].position.x == child_state->lcu_order[i].position.x) {
|
|
assert(child_state->parent->children[j].lcu_order[k].position.y == child_state->lcu_order[i].position.y - 1);
|
|
child_state->lcu_order[i].above = &child_state->parent->children[j].lcu_order[k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
child_state->lcu_order[i].above = &child_state->lcu_order[i-child_state->tile->frame->width_in_lcu];
|
|
}
|
|
assert(child_state->lcu_order[i].above);
|
|
child_state->lcu_order[i].above->below = &child_state->lcu_order[i];
|
|
}
|
|
if (!child_state->lcu_order[i].first_column) {
|
|
child_state->lcu_order[i].left = &child_state->lcu_order[i-1];
|
|
assert(child_state->lcu_order[i].left->position.x == child_state->lcu_order[i].position.x - 1);
|
|
child_state->lcu_order[i].left->right = &child_state->lcu_order[i];
|
|
}
|
|
}
|
|
} else {
|
|
child_state->lcu_order_count = 0;
|
|
child_state->lcu_order = NULL;
|
|
}
|
|
}
|
|
|
|
//Validate the structure
|
|
if (child_state->type == ENCODER_STATE_TYPE_TILE) {
|
|
if (child_state->tile->lcu_offset_in_ts < child_state->slice->start_in_ts) {
|
|
fprintf(stderr, "Tile %d starts before slice %d, in which it should be included!\n", child_state->tile->id, child_state->slice->id);
|
|
return 0;
|
|
}
|
|
if (child_state->tile->lcu_offset_in_ts + child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu - 1 > child_state->slice->end_in_ts) {
|
|
fprintf(stderr, "Tile %d ends after slice %d, in which it should be included!\n", child_state->tile->id, child_state->slice->id);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (child_state->type == ENCODER_STATE_TYPE_SLICE) {
|
|
if (child_state->slice->start_in_ts < child_state->tile->lcu_offset_in_ts) {
|
|
fprintf(stderr, "Slice %d starts before tile %d, in which it should be included!\n", child_state->slice->id, child_state->tile->id);
|
|
return 0;
|
|
}
|
|
if (child_state->slice->end_in_ts > child_state->tile->lcu_offset_in_ts + child_state->tile->frame->width_in_lcu * child_state->tile->frame->height_in_lcu - 1) {
|
|
fprintf(stderr, "Slice %d ends after tile %d, in which it should be included!\n", child_state->slice->id, child_state->tile->id);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
#ifdef UVG_DEBUG_PRINT_THREADING_INFO
|
|
if (!parent_state) uvg_dbg_encoder_state_dump_graphviz(child_state);
|
|
#endif //UVG_DEBUG_PRINT_THREADING_INFO
|
|
return 1;
|
|
}
|
|
|
|
void uvg_encoder_state_finalize(encoder_state_t * const state) {
|
|
if (state->children) {
|
|
int i=0;
|
|
for (i = 0; state->children[i].encoder_control; ++i) {
|
|
uvg_encoder_state_finalize(&state->children[i]);
|
|
}
|
|
|
|
FREE_POINTER(state->children);
|
|
}
|
|
|
|
if (state->type == ENCODER_STATE_TYPE_MAIN) {
|
|
encoder_state_main_finalize(state);
|
|
}
|
|
|
|
FREE_POINTER(state->lcu_order);
|
|
state->lcu_order_count = 0;
|
|
|
|
if (!state->parent || (state->parent->wfrow != state->wfrow)) {
|
|
FREE_POINTER(state->wfrow);
|
|
}
|
|
|
|
if (!state->parent || (state->parent->slice != state->slice)) {
|
|
FREE_POINTER(state->slice);
|
|
}
|
|
|
|
if (!state->parent || (state->parent->tile != state->tile)) {
|
|
encoder_state_config_tile_finalize(state);
|
|
FREE_POINTER(state->tile);
|
|
}
|
|
|
|
if (!state->parent || (state->parent->frame != state->frame)) {
|
|
encoder_state_config_frame_finalize(state);
|
|
FREE_POINTER(state->frame);
|
|
}
|
|
|
|
if (state->constraint) {
|
|
// End of the constraint structure
|
|
uvg_constraint_free(state);
|
|
}
|
|
|
|
uvg_bitstream_finalize(&state->stream);
|
|
|
|
uvg_threadqueue_free_job(&state->tqj_recon_done);
|
|
uvg_threadqueue_free_job(&state->tqj_bitstream_written);
|
|
if (state->encoder_control->cfg.alf_type && state->encoder_control->cfg.wpp) {
|
|
encoder_state_t* parent = state;
|
|
while (parent->parent) parent = parent->parent;
|
|
uvg_threadqueue_free_job(&parent->tqj_alf_process);
|
|
}
|
|
}
|