/***************************************************************************** * This file is part of Kvazaar HEVC encoder. * * Copyright (C) 2013-2014 Tampere University of Technology and others (see * COPYING file). * * Kvazaar is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as published * by the Free Software Foundation. * * Kvazaar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Kvazaar. If not, see . ****************************************************************************/ /* * \file */ #include "encoderstate.h" #include #include #include #include #include #include "tables.h" #include "config.h" #include "cabac.h" #include "image.h" #include "nal.h" #include "context.h" #include "transform.h" #include "intra.h" #include "inter.h" #include "filter.h" #include "search.h" #include "sao.h" #include "rdo.h" /*! \brief Initializes lambda-value for current QP Implementation closer to HM (Used HM12 as reference) - Still missing functionality when GOP and B-pictures are used */ void encoder_state_init_lambda(encoder_state * const encoder_state) { double qp = encoder_state->global->QP; double lambda_scale = 1.0; double qp_temp = qp - 12; double lambda; // Default QP-factor from HM config double qp_factor = 0.4624; if (encoder_state->global->slicetype == SLICE_I) { qp_factor=0.57*lambda_scale; } lambda = qp_factor*pow( 2.0, qp_temp/3.0 ); if (encoder_state->global->slicetype != SLICE_I ) { lambda *= 0.95; } encoder_state->global->cur_lambda_cost = lambda; } int encoder_state_match_children_of_previous_frame(encoder_state * const encoder_state) { int i; for (i = 0; encoder_state->children[i].encoder_control; ++i) { //Child should also exist for previous encoder assert(encoder_state->previous_encoder_state->children[i].encoder_control); encoder_state->children[i].previous_encoder_state = &encoder_state->previous_encoder_state->children[i]; encoder_state_match_children_of_previous_frame(&encoder_state->children[i]); } return 1; } static void encoder_state_blit_pixels(const encoder_state * const target_enc, pixel * const target, const encoder_state * const source_enc, const pixel * const source, const int is_y_channel, int target_stride, int source_stride) { const int source_offset_x = source_enc->tile->lcu_offset_x * LCU_WIDTH; const int source_offset_y = source_enc->tile->lcu_offset_y * LCU_WIDTH; const int target_offset_x = target_enc->tile->lcu_offset_x * LCU_WIDTH; const int target_offset_y = target_enc->tile->lcu_offset_y * LCU_WIDTH; int width; int height; int source_offset; int target_offset; //Do nothing if the source and the destination is the same! if (source_enc->tile == target_enc->tile) return; if (is_y_channel) { target_offset = source_offset_x + source_offset_y * target_enc->tile->frame->width; source_offset = target_offset_x + target_offset_y * source_enc->tile->frame->width; } else { target_offset = source_offset_x/2 + source_offset_y/2 * target_enc->tile->frame->width/2; source_offset = target_offset_x/2 + target_offset_y/2 * source_enc->tile->frame->width/2; } if (target_enc->children) { //Use information from the source width = MIN(source_enc->tile->frame->width_in_lcu * LCU_WIDTH, target_enc->tile->frame->width - source_offset_x); height = MIN(source_enc->tile->frame->height_in_lcu * LCU_WIDTH, target_enc->tile->frame->height - source_offset_y); } else { //Use information from the target width = MIN(target_enc->tile->frame->width_in_lcu * LCU_WIDTH, source_enc->tile->frame->width - target_offset_x); height = MIN(target_enc->tile->frame->height_in_lcu * LCU_WIDTH, source_enc->tile->frame->height - target_offset_y); } if (!is_y_channel) { width /= 2; height /= 2; source_stride /= 2; target_stride /= 2; } //picture_blit_pixels(source + source_offset, target + target_offset, width, height, source_enc->cur_pic->width, target_enc->cur_pic->width); pixels_blit(source + source_offset, target + target_offset, width, height, source_stride, target_stride); } static void encoder_state_recdata_to_bufs(encoder_state * const encoder_state, const lcu_order_element * const lcu, yuv_t * const hor_buf, yuv_t * const ver_buf) { videoframe* const frame = encoder_state->tile->frame; if (hor_buf) { const int rdpx = lcu->position_px.x; const int rdpy = lcu->position_px.y + lcu->size.y - 1; const int by = lcu->position.y; //Copy the bottom row of this LCU to the horizontal buffer pixels_blit(&frame->rec->y[rdpy * frame->width + rdpx], &hor_buf->y[lcu->position_px.x + by * frame->width], lcu->size.x, 1, frame->width, frame->width); pixels_blit(&frame->rec->u[(rdpy/2) * frame->width/2 + (rdpx/2)], &hor_buf->u[lcu->position_px.x / 2 + by * frame->width / 2], lcu->size.x / 2, 1, frame->width / 2, frame->width / 2); pixels_blit(&frame->rec->v[(rdpy/2) * frame->width/2 + (rdpx/2)], &hor_buf->v[lcu->position_px.x / 2 + by * frame->width / 2], lcu->size.x / 2, 1, frame->width / 2, frame->width / 2); } if (ver_buf) { const int rdpx = lcu->position_px.x + lcu->size.x - 1; const int rdpy = lcu->position_px.y; const int bx = lcu->position.x; //Copy the right row of this LCU to the vertical buffer. pixels_blit(&frame->rec->y[rdpy * frame->width + rdpx], &ver_buf->y[lcu->position_px.y + bx * frame->height], 1, lcu->size.y, frame->width, 1); pixels_blit(&frame->rec->u[(rdpy/2) * frame->width/2 + (rdpx/2)], &ver_buf->u[lcu->position_px.y / 2 + bx * frame->height / 2], 1, lcu->size.y / 2, frame->width / 2, 1); pixels_blit(&frame->rec->v[(rdpy/2) * frame->width/2 + (rdpx/2)], &ver_buf->v[lcu->position_px.y / 2 + bx * frame->height / 2], 1, lcu->size.y / 2, frame->width / 2, 1); } } static void encode_sao_color(encoder_state * const encoder_state, sao_info *sao, color_index color_i) { cabac_data * const cabac = &encoder_state->cabac; sao_eo_cat i; // Skip colors with no SAO. //FIXME: for now, we always have SAO for all channels if (color_i == COLOR_Y && 0) return; if (color_i != COLOR_Y && 0) return; /// sao_type_idx_luma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass} /// sao_type_idx_chroma: TR, cMax = 2, cRiceParam = 0, bins = {0, bypass} // Encode sao_type_idx for Y and U+V. if (color_i != COLOR_V) { cabac->ctx = &(cabac->ctx_sao_type_idx_model);; CABAC_BIN(cabac, sao->type != SAO_TYPE_NONE, "sao_type_idx"); if (sao->type == SAO_TYPE_BAND) { CABAC_BIN_EP(cabac, 0, "sao_type_idx_ep"); } else if (sao->type == SAO_TYPE_EDGE) { CABAC_BIN_EP(cabac, 1, "sao_type_idx_ep"); } } if (sao->type == SAO_TYPE_NONE) return; /// sao_offset_abs[][][][]: TR, cMax = (1 << (Min(bitDepth, 10) - 5)) - 1, /// cRiceParam = 0, bins = {bypass x N} for (i = SAO_EO_CAT1; i <= SAO_EO_CAT4; ++i) { cabac_write_unary_max_symbol_ep(cabac, abs(sao->offsets[i]), SAO_ABS_OFFSET_MAX); } /// sao_offset_sign[][][][]: FL, cMax = 1, bins = {bypass} /// sao_band_position[][][]: FL, cMax = 31, bins = {bypass x N} /// sao_eo_class_luma: FL, cMax = 3, bins = {bypass x 3} /// sao_eo_class_chroma: FL, cMax = 3, bins = {bypass x 3} if (sao->type == SAO_TYPE_BAND) { for (i = SAO_EO_CAT1; i <= SAO_EO_CAT4; ++i) { // Positive sign is coded as 0. if(sao->offsets[i] != 0) { CABAC_BIN_EP(cabac, sao->offsets[i] < 0 ? 1 : 0, "sao_offset_sign"); } } // TODO: sao_band_position // FL cMax=31 (5 bits) CABAC_BINS_EP(cabac, sao->band_position, 5, "sao_band_position"); } else if (color_i != COLOR_V) { CABAC_BINS_EP(cabac, sao->eo_class, 2, "sao_eo_class"); } } static void encode_sao_merge_flags(encoder_state * const encoder_state, sao_info *sao, unsigned x_ctb, unsigned y_ctb) { cabac_data * const cabac = &encoder_state->cabac; // SAO merge flags are not present for the first row and column. if (x_ctb > 0) { cabac->ctx = &(cabac->ctx_sao_merge_flag_model); CABAC_BIN(cabac, sao->merge_left_flag, "sao_merge_left_flag"); } if (y_ctb > 0 && !sao->merge_left_flag) { cabac->ctx = &(cabac->ctx_sao_merge_flag_model); CABAC_BIN(cabac, sao->merge_up_flag, "sao_merge_up_flag"); } } /** * \brief Encode SAO information. */ static void encode_sao(encoder_state * const encoder_state, unsigned x_lcu, uint16_t y_lcu, sao_info *sao_luma, sao_info *sao_chroma) { // TODO: transmit merge flags outside sao_info encode_sao_merge_flags(encoder_state, sao_luma, x_lcu, y_lcu); // If SAO is merged, nothing else needs to be coded. if (!sao_luma->merge_left_flag && !sao_luma->merge_up_flag) { encode_sao_color(encoder_state, sao_luma, COLOR_Y); encode_sao_color(encoder_state, sao_chroma, COLOR_U); encode_sao_color(encoder_state, sao_chroma, COLOR_V); } } static void encoder_state_worker_encode_lcu(void * opaque) { const lcu_order_element * const lcu = opaque; encoder_state *encoder_state = lcu->encoder_state; const encoder_control * const encoder = encoder_state->encoder_control; videoframe* const frame = encoder_state->tile->frame; //This part doesn't write to bitstream, it's only search, deblock and sao search_lcu(encoder_state, lcu->position_px.x, lcu->position_px.y, encoder_state->tile->hor_buf_search, encoder_state->tile->ver_buf_search); encoder_state_recdata_to_bufs(encoder_state, lcu, encoder_state->tile->hor_buf_search, encoder_state->tile->ver_buf_search); if (encoder->deblock_enable) { filter_deblock_lcu(encoder_state, lcu->position_px.x, lcu->position_px.y); } if (encoder->sao_enable) { const int stride = frame->width_in_lcu; sao_info *sao_luma = &frame->sao_luma[lcu->position.y * stride + lcu->position.x]; sao_info *sao_chroma = &frame->sao_chroma[lcu->position.y * stride + lcu->position.x]; init_sao_info(sao_luma); init_sao_info(sao_chroma); { sao_info *sao_top = lcu->position.y != 0 ? &frame->sao_luma[(lcu->position.y - 1) * stride + lcu->position.x] : NULL; sao_info *sao_left = lcu->position.x != 0 ? &frame->sao_luma[lcu->position.y * stride + lcu->position.x -1] : NULL; sao_search_luma(encoder_state, frame, lcu->position.x, lcu->position.y, sao_luma, sao_top, sao_left); } { sao_info *sao_top = lcu->position.y != 0 ? &frame->sao_chroma[(lcu->position.y - 1) * stride + lcu->position.x] : NULL; sao_info *sao_left = lcu->position.x != 0 ? &frame->sao_chroma[lcu->position.y * stride + lcu->position.x - 1] : NULL; sao_search_chroma(encoder_state, frame, lcu->position.x, lcu->position.y, sao_chroma, sao_top, sao_left); } // Merge only if both luma and chroma can be merged sao_luma->merge_left_flag = sao_luma->merge_left_flag & sao_chroma->merge_left_flag; sao_luma->merge_up_flag = sao_luma->merge_up_flag & sao_chroma->merge_up_flag; assert(sao_luma->eo_class < SAO_NUM_EO); assert(sao_chroma->eo_class < SAO_NUM_EO); CHECKPOINT_SAO_INFO("sao_luma", *sao_luma); CHECKPOINT_SAO_INFO("sao_chroma", *sao_chroma); } //Now write data to bitstream (required to have a correct CABAC state) //First LCU, and we are in a slice. We need a slice header if (encoder_state->type == ENCODER_STATE_TYPE_SLICE && lcu->index == 0) { encoder_state_write_bitstream_slice_header(encoder_state); bitstream_align(&encoder_state->stream); } //Encode SAO if (encoder->sao_enable) { encode_sao(encoder_state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]); } //Encode coding tree encode_coding_tree(encoder_state, lcu->position.x << MAX_DEPTH, lcu->position.y << MAX_DEPTH, 0); //Terminator if (lcu->index < encoder_state->lcu_order_count - 1) { //Since we don't handle slice segments, end of slice segment == end of slice //Always 0 since otherwise it would be split cabac_encode_bin_trm(&encoder_state->cabac, 0); // end_of_slice_segment_flag } //Wavefronts need the context to be copied to the next row if (encoder_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && lcu->index == 1) { int j; //Find next encoder (next row) for (j=0; encoder_state->parent->children[j].encoder_control; ++j) { if (encoder_state->parent->children[j].wfrow->lcu_offset_y == encoder_state->wfrow->lcu_offset_y + 1) { //And copy context context_copy(&encoder_state->parent->children[j], encoder_state); } } } if (encoder->sao_enable && lcu->above) { //If we're not the first in the row if (lcu->above->left) { encoder_state_recdata_to_bufs(encoder_state, lcu->above->left, encoder_state->tile->hor_buf_before_sao, NULL); } //Latest LCU in the row, copy the data from the one above also if (!lcu->right) { encoder_state_recdata_to_bufs(encoder_state, lcu->above, encoder_state->tile->hor_buf_before_sao, NULL); } } } static void encoder_state_encode_leaf(encoder_state * const encoder_state) { const encoder_control * const encoder = encoder_state->encoder_control; int i = 0; assert(encoder_state->is_leaf); assert(encoder_state->lcu_order_count > 0); //If we're not using wavefronts, or we have a WAVEFRONT_ROW which is the single child of its parent, than we should not use parallelism if (encoder_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW || (encoder_state->type == ENCODER_STATE_TYPE_WAVEFRONT_ROW && !encoder_state->parent->children[1].encoder_control)) { for (i = 0; i < encoder_state->lcu_order_count; ++i) { PERFORMANCE_MEASURE_START(); encoder_state_worker_encode_lcu(&encoder_state->lcu_order[i]); #ifdef _DEBUG { const lcu_order_element * const lcu = &encoder_state->lcu_order[i]; PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=search_lcu,frame=%d,tile=%d,slice=%d,position_x=%d,position_y=%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, lcu->position.x + encoder_state->tile->lcu_offset_x, lcu->position.y + encoder_state->tile->lcu_offset_y); } #endif //_DEBUG } if (encoder->sao_enable) { PERFORMANCE_MEASURE_START(); sao_reconstruct_frame(encoder_state); PERFORMANCE_MEASURE_END(encoder_state->encoder_control->threadqueue, "type=sao_reconstruct_frame,frame=%d,tile=%d,slice=%d,row=%d-%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, encoder_state->lcu_order[0].position.y + encoder_state->tile->lcu_offset_y, encoder_state->lcu_order[encoder_state->lcu_order_count-1].position.y + encoder_state->tile->lcu_offset_y); } } else { for (i = 0; i < encoder_state->lcu_order_count; ++i) { const lcu_order_element * const lcu = &encoder_state->lcu_order[i]; #ifdef _DEBUG char job_description[256]; sprintf(job_description, "type=search_lcu,frame=%d,tile=%d,slice=%d,row=%d,position_x=%d,position_y=%d", encoder_state->global->frame, encoder_state->tile->id, encoder_state->slice->id, encoder_state->wfrow->lcu_offset_y, lcu->position.x + encoder_state->tile->lcu_offset_x, lcu->position.y + encoder_state->tile->lcu_offset_y); #else char* job_description = NULL; #endif encoder_state->tile->wf_jobs[lcu->id] = threadqueue_submit(encoder_state->encoder_control->threadqueue, encoder_state_worker_encode_lcu, (void*)lcu, 1, job_description); if (encoder_state->tile->wf_jobs[lcu->id]) { if (lcu->position.x > 0) { // Wait for the LCU on the left. threadqueue_job_dep_add(encoder_state->tile->wf_jobs[lcu->id], encoder_state->tile->wf_jobs[lcu->id - 1]); } if (lcu->position.y > 0) { if (lcu->position.x < encoder_state->tile->frame->width_in_lcu - 1) { // Wait for the LCU to the top-right of this one. threadqueue_job_dep_add(encoder_state->tile->wf_jobs[lcu->id], encoder_state->tile->wf_jobs[lcu->id - encoder_state->tile->frame->width_in_lcu + 1]); } else { // If there is no top-right LCU, wait for the one above. threadqueue_job_dep_add(encoder_state->tile->wf_jobs[lcu->id], encoder_state->tile->wf_jobs[lcu->id - encoder_state->tile->frame->width_in_lcu]); } } threadqueue_job_unwait_job(encoder_state->encoder_control->threadqueue, encoder_state->tile->wf_jobs[lcu->id]); } } } } static void encoder_state_encode(encoder_state * const main_state); static void encoder_state_worker_encode_children(void * opaque) { encoder_state *sub_state = opaque; encoder_state_encode(sub_state); if (sub_state->is_leaf) { if (sub_state->type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) { PERFORMANCE_MEASURE_START(); encoder_state_write_bitstream_leaf(sub_state); PERFORMANCE_MEASURE_END(sub_state->encoder_control->threadqueue, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,row=%d-%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->lcu_order[0].position.y + sub_state->tile->lcu_offset_y, sub_state->lcu_order[sub_state->lcu_order_count-1].position.y + sub_state->tile->lcu_offset_y); } else { threadqueue_job *job; #ifdef _DEBUG char job_description[256]; sprintf(job_description, "type=encoder_state_write_bitstream_leaf,frame=%d,tile=%d,slice=%d,row=%d", sub_state->global->frame, sub_state->tile->id, sub_state->slice->id, sub_state->wfrow->lcu_offset_y); #else char* job_description = NULL; #endif job = threadqueue_submit(sub_state->encoder_control->threadqueue, encoder_state_worker_write_bitstream_leaf, sub_state, 1, job_description); threadqueue_job_dep_add(job, sub_state->tile->wf_jobs[sub_state->wfrow->lcu_offset_y * sub_state->tile->frame->width_in_lcu + sub_state->lcu_order_count - 1]); threadqueue_job_unwait_job(sub_state->encoder_control->threadqueue, job); return; } } } typedef struct { int y; const encoder_state * encoder_state; } worker_sao_reconstruct_lcu_data; static void encoder_state_worker_sao_reconstruct_lcu(void *opaque) { worker_sao_reconstruct_lcu_data *data = opaque; videoframe * const frame = data->encoder_state->tile->frame; unsigned stride = frame->width_in_lcu; int x; //TODO: copy only needed data pixel *new_y_data = MALLOC(pixel, frame->width * frame->height); pixel *new_u_data = MALLOC(pixel, (frame->width * frame->height) >> 2); pixel *new_v_data = MALLOC(pixel, (frame->width * frame->height) >> 2); const int offset = frame->width * (data->y*LCU_WIDTH); const int offset_c = frame->width/2 * (data->y*LCU_WIDTH_C); int num_pixels = frame->width * (LCU_WIDTH + 2); if (num_pixels + offset > frame->width * frame->height) { num_pixels = frame->width * frame->height - offset; } memcpy(&new_y_data[offset], &frame->rec->y[offset], sizeof(pixel) * num_pixels); memcpy(&new_u_data[offset_c], &frame->rec->u[offset_c], sizeof(pixel) * num_pixels >> 2); memcpy(&new_v_data[offset_c], &frame->rec->v[offset_c], sizeof(pixel) * num_pixels >> 2); if (data->y>0) { //copy first row from buffer memcpy(&new_y_data[frame->width * (data->y*LCU_WIDTH-1)], &data->encoder_state->tile->hor_buf_before_sao->y[frame->width * (data->y-1)], frame->width * sizeof(pixel)); memcpy(&new_u_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->u[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(pixel)); memcpy(&new_v_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->v[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(pixel)); } for (x = 0; x < frame->width_in_lcu; x++) { // sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma); sao_info *sao_luma = &frame->sao_luma[data->y * stride + x]; sao_info *sao_chroma = &frame->sao_chroma[data->y * stride + x]; sao_reconstruct(data->encoder_state->encoder_control, frame, new_y_data, x, data->y, sao_luma, COLOR_Y); sao_reconstruct(data->encoder_state->encoder_control, frame, new_u_data, x, data->y, sao_chroma, COLOR_U); sao_reconstruct(data->encoder_state->encoder_control, frame, new_v_data, x, data->y, sao_chroma, COLOR_V); } free(new_y_data); free(new_u_data); free(new_v_data); free(opaque); } static int encoder_state_tree_is_a_chain(const encoder_state * const encoder_state) { if (!encoder_state->children[0].encoder_control) return 1; if (encoder_state->children[1].encoder_control) return 0; return encoder_state_tree_is_a_chain(&encoder_state->children[0]); } static void encoder_state_encode(encoder_state * const main_state) { //If we have children, encode at child level if (main_state->children[0].encoder_control) { int i=0; //If we have only one child, than it cannot be the last split in tree int node_is_the_last_split_in_tree = (main_state->children[1].encoder_control != 0); for (i=0; main_state->children[i].encoder_control; ++i) { encoder_state *sub_state = &(main_state->children[i]); if (sub_state->tile != main_state->tile) { const int offset_x = sub_state->tile->lcu_offset_x * LCU_WIDTH; const int offset_y = sub_state->tile->lcu_offset_y * LCU_WIDTH; const int width = MIN(sub_state->tile->frame->width_in_lcu * LCU_WIDTH, main_state->tile->frame->width - offset_x); const int height = MIN(sub_state->tile->frame->height_in_lcu * LCU_WIDTH, main_state->tile->frame->height - offset_y); assert(!sub_state->tile->frame->source); assert(!sub_state->tile->frame->rec); sub_state->tile->frame->source = image_make_subimage(main_state->tile->frame->source, offset_x, offset_y, width, height); sub_state->tile->frame->rec = image_make_subimage(main_state->tile->frame->rec, offset_x, offset_y, width, height); encoder_state_blit_pixels(sub_state, sub_state->tile->frame->source->y, main_state, main_state->tile->frame->source->y, 1, sub_state->tile->frame->source->stride, main_state->tile->frame->source->stride); encoder_state_blit_pixels(sub_state, sub_state->tile->frame->source->u, main_state, main_state->tile->frame->source->u, 0, sub_state->tile->frame->source->stride, main_state->tile->frame->source->stride); encoder_state_blit_pixels(sub_state, sub_state->tile->frame->source->v, main_state, main_state->tile->frame->source->v, 0, sub_state->tile->frame->source->stride, main_state->tile->frame->source->stride); } //To be the last split, we require that every child is a chain node_is_the_last_split_in_tree = node_is_the_last_split_in_tree && encoder_state_tree_is_a_chain(&main_state->children[i]); } //If it's the latest split point if (node_is_the_last_split_in_tree) { for (i=0; main_state->children[i].encoder_control; ++i) { //If we don't have wavefronts, parallelize encoding of children. if (main_state->children[i].type != ENCODER_STATE_TYPE_WAVEFRONT_ROW) { #ifdef _DEBUG char job_description[256]; switch (main_state->children[i].type) { case ENCODER_STATE_TYPE_TILE: sprintf(job_description, "frame=%d,tile=%d,row=%d-%d,position_x=%d,position_y=%d", main_state->children[i].global->frame, main_state->children[i].tile->id, main_state->children[i].lcu_order[0].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].lcu_order[main_state->children[i].lcu_order_count-1].position.y + main_state->children[i].tile->lcu_offset_y, main_state->children[i].tile->lcu_offset_x, main_state->children[i].tile->lcu_offset_y); break; case ENCODER_STATE_TYPE_SLICE: sprintf(job_description, "frame=%d,slice=%d,start_in_ts=%d", main_state->children[i].global->frame, main_state->children[i].slice->id, main_state->children[i].slice->start_in_ts); break; default: sprintf(job_description, "frame=%d,invalid", main_state->children[i].global->frame); break; } #else char* job_description = NULL; #endif threadqueue_submit(main_state->encoder_control->threadqueue, encoder_state_worker_encode_children, &(main_state->children[i]), 0, job_description); } else { //Wavefront rows have parallelism at LCU level, so we should not launch multiple threads here! //FIXME: add an assert: we can only have wavefront children encoder_state_worker_encode_children(&(main_state->children[i])); } } //If children are wavefront, we need to reconstruct SAO if (main_state->encoder_control->sao_enable && main_state->children[0].type == ENCODER_STATE_TYPE_WAVEFRONT_ROW) { int y; videoframe * const frame = main_state->tile->frame; threadqueue_job *previous_job = NULL; for (y = 0; y < frame->height_in_lcu; ++y) { worker_sao_reconstruct_lcu_data *data = MALLOC(worker_sao_reconstruct_lcu_data, 1); threadqueue_job *job; #ifdef _DEBUG char job_description[256]; sprintf(job_description, "frame=%d,tile=%d,position_y=%d", main_state->global->frame, main_state->tile->id, y + main_state->tile->lcu_offset_y); #else char* job_description = NULL; #endif data->y = y; data->encoder_state = main_state; job = threadqueue_submit(main_state->encoder_control->threadqueue, encoder_state_worker_sao_reconstruct_lcu, data, 1, job_description); if (previous_job) { threadqueue_job_dep_add(job, previous_job); } previous_job = job; if (y < frame->height_in_lcu - 1) { //Not last row: depend on the last LCU of the row below threadqueue_job_dep_add(job, main_state->tile->wf_jobs[(y + 1) * frame->width_in_lcu + frame->width_in_lcu - 1]); } else { //Last row: depend on the last LCU of the row threadqueue_job_dep_add(job, main_state->tile->wf_jobs[(y + 0) * frame->width_in_lcu + frame->width_in_lcu - 1]); } threadqueue_job_unwait_job(main_state->encoder_control->threadqueue, job); } } threadqueue_flush(main_state->encoder_control->threadqueue); } else { for (i=0; main_state->children[i].encoder_control; ++i) { encoder_state_worker_encode_children(&(main_state->children[i])); } } for (i=0; main_state->children[i].encoder_control; ++i) { encoder_state *sub_state = &(main_state->children[i]); if (sub_state->tile != main_state->tile) { encoder_state_blit_pixels(main_state, main_state->tile->frame->rec->y, sub_state, sub_state->tile->frame->rec->y, 1, main_state->tile->frame->source->stride, sub_state->tile->frame->source->stride); encoder_state_blit_pixels(main_state, main_state->tile->frame->rec->u, sub_state, sub_state->tile->frame->rec->u, 0, main_state->tile->frame->source->stride, sub_state->tile->frame->source->stride); encoder_state_blit_pixels(main_state, main_state->tile->frame->rec->v, sub_state, sub_state->tile->frame->rec->v, 0, main_state->tile->frame->source->stride, sub_state->tile->frame->source->stride); image_free(sub_state->tile->frame->source); image_free(sub_state->tile->frame->rec); sub_state->tile->frame->source = NULL; sub_state->tile->frame->rec = NULL; } } } else { switch (main_state->type) { case ENCODER_STATE_TYPE_TILE: case ENCODER_STATE_TYPE_SLICE: case ENCODER_STATE_TYPE_WAVEFRONT_ROW: encoder_state_encode_leaf(main_state); break; default: fprintf(stderr, "Unsupported leaf type %c!\n", main_state->type); assert(0); } } } static void encoder_state_clear_refs(encoder_state *main_state) { int i; while (main_state->global->ref->used_size) { image_list_rem(main_state->global->ref, main_state->global->ref->used_size - 1); } main_state->global->poc = 0; videoframe_set_poc(main_state->tile->frame, 0); for (i=0; main_state->children[i].encoder_control; ++i) { encoder_state *sub_state = &(main_state->children[i]); encoder_state_clear_refs(sub_state); } } static void encoder_state_new_frame(encoder_state * const main_state) { int i; //FIXME Move this somewhere else! if (main_state->type == ENCODER_STATE_TYPE_MAIN) { const encoder_control * const encoder = main_state->encoder_control; const int is_first_frame = (main_state->global->frame == 0); const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->global->frame % 2 == 0); const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->global->frame % encoder->cfg->intra_period) == 0); main_state->global->is_radl_frame = is_first_frame || is_i_radl || is_p_radl; if (main_state->global->is_radl_frame) { // Clear the reference list encoder_state_clear_refs(main_state); main_state->global->slicetype = SLICE_I; main_state->global->pictype = NAL_IDR_W_RADL; } else { main_state->global->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; main_state->global->pictype = NAL_TRAIL_R; } } else { //Clear the bitstream if it's not the main encoder bitstream_clear(&main_state->stream); } if (main_state->is_leaf) { //Leaf states have cabac and context cabac_start(&main_state->cabac); init_contexts(main_state, main_state->global->QP, main_state->global->slicetype); // Initialize lambda value(s) to use in search encoder_state_init_lambda(main_state); } for (i = 0; main_state->children[i].encoder_control; ++i) { encoder_state_new_frame(&main_state->children[i]); } } void encode_one_frame(encoder_state * const main_state) { { PERFORMANCE_MEASURE_START(); encoder_state_new_frame(main_state); PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=new_frame,frame=%d", main_state->global->frame); } { PERFORMANCE_MEASURE_START(); encoder_state_encode(main_state); PERFORMANCE_MEASURE_END(main_state->encoder_control->threadqueue, "type=encode,frame=%d", main_state->global->frame); } { encoder_state_write_bitstream(main_state); } } static void fill_after_frame(unsigned height, unsigned array_width, unsigned array_height, pixel *data) { pixel* p = data + height * array_width; pixel* end = data + array_width * array_height; while (p < end) { // Fill the line by copying the line above. memcpy(p, p - array_width, array_width); p += array_width; } } static int read_and_fill_frame_data(FILE *file, unsigned width, unsigned height, unsigned array_width, pixel *data) { pixel* p = data; pixel* end = data + array_width * height; pixel fill_char; unsigned i; while (p < end) { // Read the beginning of the line from input. if (width != fread(p, sizeof(unsigned char), width, file)) return 0; // Fill the rest with the last pixel value. fill_char = p[width - 1]; for (i = width; i < array_width; ++i) { p[i] = fill_char; } p += array_width; } return 1; } int read_one_frame(FILE* file, const encoder_state * const encoder_state) { unsigned width = encoder_state->encoder_control->in.real_width; unsigned height = encoder_state->encoder_control->in.real_height; unsigned array_width = encoder_state->tile->frame->width; unsigned array_height = encoder_state->tile->frame->height; if (width != array_width) { // In the case of frames not being aligned on 8 bit borders, bits need to be copied to fill them in. if (!read_and_fill_frame_data(file, width, height, array_width, encoder_state->tile->frame->source->y) || !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1, encoder_state->tile->frame->source->u) || !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1, encoder_state->tile->frame->source->v)) return 0; } else { // Otherwise the data can be read directly to the array. unsigned y_size = width * height; unsigned uv_size = (width >> 1) * (height >> 1); if (y_size != fread(encoder_state->tile->frame->source->y, sizeof(unsigned char), y_size, file) || uv_size != fread(encoder_state->tile->frame->source->u, sizeof(unsigned char), uv_size, file) || uv_size != fread(encoder_state->tile->frame->source->v, sizeof(unsigned char), uv_size, file)) return 0; } if (height != array_height) { fill_after_frame(height, array_width, array_height, encoder_state->tile->frame->source->y); fill_after_frame(height >> 1, array_width >> 1, array_height >> 1, encoder_state->tile->frame->source->u); fill_after_frame(height >> 1, array_width >> 1, array_height >> 1, encoder_state->tile->frame->source->v); } return 1; } void encoder_next_frame(encoder_state *encoder_state) { const encoder_control * const encoder = encoder_state->encoder_control; // Remove the ref pic (if present) if (encoder_state->global->ref->used_size == (uint32_t)encoder->cfg->ref_frames) { image_list_rem(encoder_state->global->ref, encoder_state->global->ref->used_size-1); } // Add current reconstructed picture as reference image_list_add(encoder_state->global->ref, encoder_state->tile->frame->rec, encoder_state->tile->frame->cu_array); //Remove current reconstructed picture, and alloc a new one image_free(encoder_state->tile->frame->rec); encoder_state->global->frame++; encoder_state->global->poc++; encoder_state->tile->frame->rec = image_alloc(encoder_state->tile->frame->width, encoder_state->tile->frame->height, encoder_state->global->poc); videoframe_set_poc(encoder_state->tile->frame, encoder_state->global->poc); } void encode_coding_tree(encoder_state * const encoder_state, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth) { cabac_data * const cabac = &encoder_state->cabac; const videoframe * const frame = encoder_state->tile->frame; const cu_info *cur_cu = videoframe_get_cu_const(frame, x_ctb, y_ctb); uint8_t split_flag = GET_SPLITDATA(cur_cu, depth); uint8_t split_model = 0; //Absolute ctb uint16_t abs_x_ctb = x_ctb + (encoder_state->tile->lcu_offset_x * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); uint16_t abs_y_ctb = y_ctb + (encoder_state->tile->lcu_offset_y * LCU_WIDTH) / (LCU_WIDTH >> MAX_DEPTH); // Check for slice border FIXME uint8_t border_x = ((encoder_state->encoder_control->in.width) < (abs_x_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; uint8_t border_y = ((encoder_state->encoder_control->in.height) < (abs_y_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; uint8_t border_split_x = ((encoder_state->encoder_control->in.width) < ((abs_x_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; uint8_t border_split_y = ((encoder_state->encoder_control->in.height) < ((abs_y_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; uint8_t border = border_x | border_y; /*!< are we in any border CU */ // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (depth != MAX_DEPTH) { // Implisit split flag when on border if (!border) { // Get left and top block split_flags and if they are present and true, increase model number if (x_ctb > 0 && GET_SPLITDATA(videoframe_get_cu_const(frame, x_ctb - 1, y_ctb), depth) == 1) { split_model++; } if (y_ctb > 0 && GET_SPLITDATA(videoframe_get_cu_const(frame, x_ctb, y_ctb - 1), depth) == 1) { split_model++; } cabac->ctx = &(cabac->ctx_split_flag_model[split_model]); CABAC_BIN(cabac, split_flag, "SplitFlag"); } if (split_flag || border) { // Split blocks and remember to change x and y block positions uint8_t change = 1<<(MAX_DEPTH-1-depth); encode_coding_tree(encoder_state, x_ctb, y_ctb, depth + 1); // x,y // TODO: fix when other half of the block would not be completely over the border if (!border_x || border_split_x) { encode_coding_tree(encoder_state, x_ctb + change, y_ctb, depth + 1); } if (!border_y || border_split_y) { encode_coding_tree(encoder_state, x_ctb, y_ctb + change, depth + 1); } if (!border || (border_split_x && border_split_y)) { encode_coding_tree(encoder_state, x_ctb + change, y_ctb + change, depth + 1); } return; } } // Encode skip flag if (encoder_state->global->slicetype != SLICE_I) { int8_t ctx_skip = 0; // uiCtxSkip = aboveskipped + leftskipped; int ui; int16_t num_cand = MRG_MAX_NUM_CANDS; // Get left and top skipped flags and if they are present and true, increase context number if (x_ctb > 0 && (videoframe_get_cu_const(frame, x_ctb - 1, y_ctb))->skipped) { ctx_skip++; } if (y_ctb > 0 && (videoframe_get_cu_const(frame, x_ctb, y_ctb - 1))->skipped) { ctx_skip++; } cabac->ctx = &(cabac->ctx_cu_skip_flag_model[ctx_skip]); CABAC_BIN(cabac, cur_cu->skipped, "SkipFlag"); // IF SKIP if (cur_cu->skipped) { if (num_cand > 1) { for (ui = 0; ui < num_cand - 1; ui++) { int32_t symbol = (ui != cur_cu->merge_idx); if (ui == 0) { cabac->ctx = &(cabac->ctx_cu_merge_idx_ext_model); CABAC_BIN(cabac, symbol, "MergeIndex"); } else { CABAC_BIN_EP(cabac,symbol,"MergeIndex"); } if (symbol == 0) { break; } } } return; } } // ENDIF SKIP // Prediction mode if (encoder_state->global->slicetype != SLICE_I) { cabac->ctx = &(cabac->ctx_cu_pred_mode_model); CABAC_BIN(cabac, (cur_cu->type == CU_INTRA), "PredMode"); } // part_mode if (cur_cu->type == CU_INTRA) { if (depth == MAX_DEPTH) { cabac->ctx = &(cabac->ctx_part_size_model[0]); if (cur_cu->part_size == SIZE_2Nx2N) { CABAC_BIN(cabac, 1, "part_mode 2Nx2N"); } else { CABAC_BIN(cabac, 0, "part_mode NxN"); } } } else { // TODO: Handle inter sizes other than 2Nx2N cabac->ctx = &(cabac->ctx_part_size_model[0]); CABAC_BIN(cabac, 1, "part_mode 2Nx2N"); } //end partsize if (cur_cu->type == CU_INTER) { // FOR each part // Mergeflag int16_t num_cand = 0; cabac->ctx = &(cabac->ctx_cu_merge_flag_ext_model); CABAC_BIN(cabac, cur_cu->merged, "MergeFlag"); num_cand = MRG_MAX_NUM_CANDS; if (cur_cu->merged) { //merge if (num_cand > 1) { int32_t ui; for (ui = 0; ui < num_cand - 1; ui++) { int32_t symbol = (ui != cur_cu->merge_idx); if (ui == 0) { cabac->ctx = &(cabac->ctx_cu_merge_idx_ext_model); CABAC_BIN(cabac, symbol, "MergeIndex"); } else { CABAC_BIN_EP(cabac,symbol,"MergeIndex"); } if (symbol == 0) break; } } } else { uint32_t ref_list_idx; /* // Void TEncSbac::codeInterDir( TComDataCU* pcCU, UInt uiAbsPartIdx ) if(cur_pic->slicetype == SLICE_B) { // Code Inter Dir const UInt uiInterDir = pcCU->getInterDir( uiAbsPartIdx ) - 1; const UInt uiCtx = pcCU->getCtxInterDir( uiAbsPartIdx ); ContextModel *pCtx = m_cCUInterDirSCModel.get( 0 ); if (pcCU->getPartitionSize(uiAbsPartIdx) == SIZE_2Nx2N || pcCU->getHeight(uiAbsPartIdx) != 8 ) { m_pcBinIf->encodeBin( uiInterDir == 2 ? 1 : 0, *( pCtx + uiCtx ) ); } if (uiInterDir < 2) { m_pcBinIf->encodeBin( uiInterDir, *( pCtx + 4 ) ); } } */ for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) { //if(encoder_state->ref_idx_num[uiRefListIdx] > 0) { if (cur_cu->inter.mv_dir & (1 << ref_list_idx)) { if (encoder_state->global->ref->used_size != 1) { //encoder_state->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1) // parseRefFrmIdx int32_t ref_frame = cur_cu->inter.mv_ref; cabac->ctx = &(cabac->ctx_cu_ref_pic_model[0]); CABAC_BIN(cabac, (ref_frame != 0), "ref_frame_flag"); if (ref_frame > 0) { int32_t i; int32_t ref_num = encoder_state->global->ref->used_size - 2; cabac->ctx = &(cabac->ctx_cu_ref_pic_model[1]); ref_frame--; for (i = 0; i < ref_num; ++i) { const uint32_t symbol = (i == ref_frame) ? 0 : 1; if (i == 0) { CABAC_BIN(cabac, symbol, "ref_frame_flag2"); } else { CABAC_BIN_EP(cabac, symbol, "ref_frame_flag2"); } if (symbol == 0) break; } } } if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder_state->global->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { const int32_t mvd_hor = cur_cu->inter.mvd[0]; const int32_t mvd_ver = cur_cu->inter.mvd[1]; const int8_t hor_abs_gr0 = mvd_hor != 0; const int8_t ver_abs_gr0 = mvd_ver != 0; const uint32_t mvd_hor_abs = abs(mvd_hor); const uint32_t mvd_ver_abs = abs(mvd_ver); cabac->ctx = &(cabac->ctx_cu_mvd_model[0]); CABAC_BIN(cabac, (mvd_hor != 0), "abs_mvd_greater0_flag_hor"); CABAC_BIN(cabac, (mvd_ver != 0), "abs_mvd_greater0_flag_ver"); cabac->ctx = &(cabac->ctx_cu_mvd_model[1]); if (hor_abs_gr0) { CABAC_BIN(cabac, (mvd_hor_abs>1), "abs_mvd_greater1_flag_hor"); } if (ver_abs_gr0) { CABAC_BIN(cabac, (mvd_ver_abs>1), "abs_mvd_greater1_flag_ver"); } if (hor_abs_gr0) { if (mvd_hor_abs > 1) { cabac_write_ep_ex_golomb(cabac,mvd_hor_abs-2, 1); } CABAC_BIN_EP(cabac, (mvd_hor>0)?0:1, "mvd_sign_flag_hor"); } if (ver_abs_gr0) { if (mvd_ver_abs > 1) { cabac_write_ep_ex_golomb(cabac,mvd_ver_abs-2, 1); } CABAC_BIN_EP(cabac, (mvd_ver>0)?0:1, "mvd_sign_flag_ver"); } } // Signal which candidate MV to use cabac_write_unary_max_symbol(cabac, cabac->ctx_mvp_idx_model, cur_cu->inter.mv_cand, 1, AMVP_MAX_NUM_CANDS - 1); } } } // for ref_list } // if !merge { int cbf = (cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)); // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual if (!cur_cu->merged) { cabac->ctx = &(cabac->ctx_cu_qt_root_cbf_model); CABAC_BIN(cabac, cbf, "rqt_root_cbf"); } // Code (possible) coeffs to bitstream if (cbf) { encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); } } // END for each part } else if (cur_cu->type == CU_INTRA) { uint8_t intra_pred_mode[4] = { cur_cu->intra[0].mode, cur_cu->intra[1].mode, cur_cu->intra[2].mode, cur_cu->intra[3].mode }; uint8_t intra_pred_mode_chroma = cur_cu->intra[0].mode_chroma; int8_t intra_preds[4][3] = {{-1, -1, -1},{-1, -1, -1},{-1, -1, -1},{-1, -1, -1}}; int8_t mpm_preds[4] = {-1, -1, -1, -1}; int i, j; uint32_t flag[4]; int num_pred_units = (cur_cu->part_size == SIZE_2Nx2N ? 1 : 4); #if ENABLE_PCM == 1 // Code must start after variable initialization cabac_encode_bin_trm(cabac, 0); // IPCMFlag == 0 #endif // PREDINFO CODING // If intra prediction mode is found from the predictors, // it can be signaled with two EP's. Otherwise we can send // 5 EP bins with the full predmode for (j = 0; j < num_pred_units; ++j) { static const vector2d offset[4] = {{0,0},{1,0},{0,1},{1,1}}; const cu_info *left_cu = NULL; const cu_info *above_cu = NULL; if (x_ctb > 0) { left_cu = videoframe_get_cu_const(frame, x_ctb - 1, y_ctb); } // Don't take the above CU across the LCU boundary. if (y_ctb > 0 && (y_ctb & 7) != 0) { above_cu = videoframe_get_cu_const(frame, x_ctb, y_ctb - 1); } intra_get_dir_luma_predictor((x_ctb<<3) + (offset[j].x<<2), (y_ctb<<3) + (offset[j].y<<2), intra_preds[j], cur_cu, left_cu, above_cu); for (i = 0; i < 3; i++) { if (intra_preds[j][i] == intra_pred_mode[j]) { mpm_preds[j] = (int8_t)i; break; } } flag[j] = (mpm_preds[j] == -1) ? 0 : 1; } cabac->ctx = &(cabac->ctx_intra_mode_model); for (j = 0; j < num_pred_units; ++j) { CABAC_BIN(cabac, flag[j], "prev_intra_luma_pred_flag"); } for (j = 0; j < num_pred_units; ++j) { // Signal index of the prediction mode in the prediction list. if (flag[j]) { CABAC_BIN_EP(cabac, (mpm_preds[j] == 0 ? 0 : 1), "mpm_idx"); if (mpm_preds[j] != 0) { CABAC_BIN_EP(cabac, (mpm_preds[j] == 1 ? 0 : 1), "mpm_idx"); } } else { // Signal the actual prediction mode. int32_t tmp_pred = intra_pred_mode[j]; // Sort prediction list from lowest to highest. if (intra_preds[j][0] > intra_preds[j][1]) SWAP(intra_preds[j][0], intra_preds[j][1], int8_t); if (intra_preds[j][0] > intra_preds[j][2]) SWAP(intra_preds[j][0], intra_preds[j][2], int8_t); if (intra_preds[j][1] > intra_preds[j][2]) SWAP(intra_preds[j][1], intra_preds[j][2], int8_t); // Reduce the index of the signaled prediction mode according to the // prediction list, as it has been already signaled that it's not one // of the prediction modes. for (i = 2; i >= 0; i--) { tmp_pred = (tmp_pred > intra_preds[j][i] ? tmp_pred - 1 : tmp_pred); } CABAC_BINS_EP(cabac, tmp_pred, 5, "rem_intra_luma_pred_mode"); } } { // start intra chroma pred mode coding unsigned pred_mode = 5; unsigned chroma_pred_modes[4] = {0, 26, 10, 1}; if (intra_pred_mode_chroma == intra_pred_mode[0]) { pred_mode = 4; } else if (intra_pred_mode_chroma == 34) { // Angular 34 mode is possible only if intra pred mode is one of the // possible chroma pred modes, in which case it is signaled with that // duplicate mode. for (i = 0; i < 4; ++i) { if (intra_pred_mode[0] == chroma_pred_modes[i]) pred_mode = i; } } else { for (i = 0; i < 4; ++i) { if (intra_pred_mode_chroma == chroma_pred_modes[i]) pred_mode = i; } } // pred_mode == 5 mean intra_pred_mode_chroma is something that can't // be coded. assert(pred_mode != 5); /** * Table 9-35 - Binarization for intra_chroma_pred_mode * intra_chroma_pred_mode bin_string * 4 0 * 0 100 * 1 101 * 2 110 * 3 111 * Table 9-37 - Assignment of ctxInc to syntax elements with context coded bins * intra_chroma_pred_mode[][] = 0, bypass, bypass */ cabac->ctx = &(cabac->ctx_chroma_pred_model[0]); if (pred_mode == 4) { CABAC_BIN(cabac, 0, "intra_chroma_pred_mode"); } else { CABAC_BIN(cabac, 1, "intra_chroma_pred_mode"); CABAC_BINS_EP(cabac, pred_mode, 2, "intra_chroma_pred_mode"); } } // end intra chroma pred mode coding encode_transform_coeff(encoder_state, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0); } #if ENABLE_PCM == 1 // Code IPCM block if (cur_cu->type == CU_PCM) { cabac_encode_bin_trm(cabac, 1); // IPCMFlag == 1 cabac_finish(cabac); bitstream_align(cabac.stream); // PCM sample { unsigned y, x; pixel *base_y = &cur_pic->y_data[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; pixel *base_u = &cur_pic->u_data[(x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * encoder->in.width / 2)]; pixel *base_v = &cur_pic->v_data[(x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * encoder->in.width / 2)]; // Luma for (y = 0; y < LCU_WIDTH >> depth; y++) { for (x = 0; x < LCU_WIDTH >> depth; x++) { bitstream_put(cabac.stream, base_y[x + y * encoder->in.width], 8); } } // Chroma if (encoder->in.video_format != FORMAT_400) { for (y = 0; y < LCU_WIDTH >> (depth + 1); y++) { for (x = 0; x < LCU_WIDTH >> (depth + 1); x++) { bitstream_put(cabac.stream, base_u[x + y * (encoder->in.width >> 1)], 8); } } for (y = 0; y < LCU_WIDTH >> (depth + 1); y++) { for (x = 0; x < LCU_WIDTH >> (depth + 1); x++) { bitstream_put(cabac.stream, base_v[x + y * (encoder->in.width >> 1)], 8); } } } } // end PCM sample cabac_start(cabac); } // end Code IPCM block #endif /* END ENABLE_PCM */ else { /* Should not happend */ printf("UNHANDLED TYPE!\r\n"); assert(0); exit(1); } /* end prediction unit */ /* end coding_unit */ } coeff_scan_order_t get_scan_order(int8_t cu_type, int intra_mode, int depth) { // Scan mode is diagonal, except for 4x4+8x8 luma and 4x4 chroma, where: // - angular 6-14 = vertical // - angular 22-30 = horizontal if (cu_type == CU_INTRA && depth >= 3) { if (intra_mode >= 6 && intra_mode <= 14) { return SCAN_VER; } else if (intra_mode >= 22 && intra_mode <= 30) { return SCAN_HOR; } } return SCAN_DIAG; } static void encode_transform_unit(encoder_state * const encoder_state, int x_pu, int y_pu, int depth) { const videoframe * const frame = encoder_state->tile->frame; uint8_t width = LCU_WIDTH >> depth; uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); int x_cu = x_pu / 2; int y_cu = y_pu / 2; const cu_info *cur_cu = videoframe_get_cu_const(frame, x_cu, y_cu); coefficient coeff_y[LCU_WIDTH*LCU_WIDTH+1]; coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; coefficient coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; int32_t coeff_stride = frame->width; int8_t scan_idx = get_scan_order(cur_cu->type, cur_cu->intra[PU_INDEX(x_pu, y_pu)].mode, depth); int cbf_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); if (cbf_y) { int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH); int y = y_pu * (LCU_WIDTH >> MAX_PU_DEPTH); coefficient *orig_pos = &frame->coeff_y[x + y * frame->width]; for (y = 0; y < width; y++) { for (x = 0; x < width; x++) { coeff_y[x+y*width] = orig_pos[x]; } orig_pos += coeff_stride; } } // CoeffNxN // Residual Coding if (cbf_y) { encode_coeff_nxn(encoder_state, coeff_y, width, 0, scan_idx, cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip); } if (depth == MAX_DEPTH + 1 && !(x_pu % 2 && y_pu % 2)) { // For size 4x4 luma transform the corresponding chroma transforms are // also of size 4x4 covering 8x8 luma pixels. The residual is coded // in the last transform unit so for the other ones, don't do anything. return; } if (cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth)) { int x, y; coefficient *orig_pos_u, *orig_pos_v; if (depth <= MAX_DEPTH) { x = x_pu * (LCU_WIDTH >> (MAX_PU_DEPTH + 1)); y = y_pu * (LCU_WIDTH >> (MAX_PU_DEPTH + 1)); } else { // for 4x4 select top left pixel of the CU. x = x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)); y = y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)); } orig_pos_u = &frame->coeff_u[x + y * (frame->width >> 1)]; orig_pos_v = &frame->coeff_v[x + y * (frame->width >> 1)]; for (y = 0; y < (width_c); y++) { for (x = 0; x < (width_c); x++) { coeff_u[x+y*(width_c)] = orig_pos_u[x]; coeff_v[x+y*(width_c)] = orig_pos_v[x]; } orig_pos_u += coeff_stride>>1; orig_pos_v += coeff_stride>>1; } scan_idx = get_scan_order(cur_cu->type, cur_cu->intra[0].mode_chroma, depth); if (cbf_is_set(cur_cu->cbf.u, depth)) { encode_coeff_nxn(encoder_state, coeff_u, width_c, 2, scan_idx, 0); } if (cbf_is_set(cur_cu->cbf.v, depth)) { encode_coeff_nxn(encoder_state, coeff_v, width_c, 2, scan_idx, 0); } } } /** * \param encoder * \param x_pu Prediction units' x coordinate. * \param y_pu Prediction units' y coordinate. * \param depth Depth from LCU. * \param tr_depth Depth from last CU. * \param parent_coeff_u What was signaled at previous level for cbf_cb. * \param parent_coeff_v What was signlaed at previous level for cbf_cr. */ void encode_transform_coeff(encoder_state * const encoder_state, int32_t x_pu,int32_t y_pu, int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v) { cabac_data * const cabac = &encoder_state->cabac; int32_t x_cu = x_pu / 2; int32_t y_cu = y_pu / 2; const videoframe * const frame = encoder_state->tile->frame; const cu_info *cur_cu = videoframe_get_cu_const(frame, x_cu, y_cu); // NxN signifies implicit transform split at the first transform level. // There is a similar implicit split for inter, but it is only used when // transform hierarchy is not in use. int intra_split_flag = (cur_cu->type == CU_INTRA && cur_cu->part_size == SIZE_NxN); // The implicit split by intra NxN is not counted towards max_tr_depth. int max_tr_depth = (cur_cu->type == CU_INTRA ? TR_DEPTH_INTRA + intra_split_flag : TR_DEPTH_INTER); int8_t split = (cur_cu->tr_depth > depth); const int cb_flag_y = cbf_is_set(cur_cu->cbf.y, depth + PU_INDEX(x_pu, y_pu)); const int cb_flag_u = cbf_is_set(cur_cu->cbf.u, depth); const int cb_flag_v = cbf_is_set(cur_cu->cbf.v, depth); // The split_transform_flag is not signaled when: // - transform size is greater than 32 (depth == 0) // - transform size is 4 (depth == MAX_PU_DEPTH) // - transform depth is max // - cu is intra NxN and it's the first split if (depth > 0 && depth < MAX_PU_DEPTH && tr_depth < max_tr_depth && !(intra_split_flag && tr_depth == 0)) { cabac->ctx = &(cabac->ctx_trans_subdiv_model[5 - ((g_convert_to_bit[LCU_WIDTH] + 2) - depth)]); CABAC_BIN(cabac, split, "split_transform_flag"); } // Chroma cb flags are not signaled when one of the following: // - transform size is 4 (2x2 chroma transform doesn't exist) // - they have already been signaled to 0 previously // When they are not present they are inferred to be 0, except for size 4 // when the flags from previous level are used. if (depth < MAX_PU_DEPTH) { cabac->ctx = &(cabac->ctx_qt_cbf_model_chroma[tr_depth]); if (tr_depth == 0 || parent_coeff_u) { CABAC_BIN(cabac, cb_flag_u, "cbf_cb"); } if (tr_depth == 0 || parent_coeff_v) { CABAC_BIN(cabac, cb_flag_v, "cbf_cr"); } } if (split) { uint8_t pu_offset = 1 << (MAX_PU_DEPTH - (depth + 1)); encode_transform_coeff(encoder_state, x_pu, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); encode_transform_coeff(encoder_state, x_pu + pu_offset, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); encode_transform_coeff(encoder_state, x_pu, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); encode_transform_coeff(encoder_state, x_pu + pu_offset, y_pu + pu_offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); return; } // Luma coded block flag is signaled when one of the following: // - prediction mode is intra // - transform depth > 0 // - we have chroma coefficients at this level // When it is not present, it is inferred to be 1. if(cur_cu->type == CU_INTRA || tr_depth > 0 || cb_flag_u || cb_flag_v) { cabac->ctx = &(cabac->ctx_qt_cbf_model_luma[!tr_depth]); CABAC_BIN(cabac, cb_flag_y, "cbf_luma"); } if (cb_flag_y | cb_flag_u | cb_flag_v) { encode_transform_unit(encoder_state, x_pu, y_pu, depth); } } void encode_coeff_nxn(encoder_state * const encoder_state, coefficient *coeff, uint8_t width, uint8_t type, int8_t scan_mode, int8_t tr_skip) { const encoder_control * const encoder = encoder_state->encoder_control; cabac_data * const cabac = &encoder_state->cabac; int c1 = 1; uint8_t last_coeff_x = 0; uint8_t last_coeff_y = 0; int32_t i; uint32_t sig_coeffgroup_flag[64]; uint32_t num_nonzero = 0; int32_t scan_pos_last = -1; int32_t pos_last = 0; int32_t shift = 4>>1; int8_t be_valid = ENABLE_SIGN_HIDING; int32_t scan_pos_sig; int32_t last_scan_set; uint32_t go_rice_param = 0; uint32_t blk_pos, pos_y, pos_x, sig, ctx_sig; // CONSTANTS const uint32_t num_blk_side = width >> shift; const uint32_t log2_block_size = g_convert_to_bit[width] + 2; const uint32_t *scan = g_sig_last_scan[scan_mode][log2_block_size - 1]; const uint32_t *scan_cg = g_sig_last_scan_cg[log2_block_size - 2][scan_mode]; // Init base contexts according to block type cabac_ctx *base_coeff_group_ctx = &(cabac->ctx_cu_sig_coeff_group_model[type]); cabac_ctx *baseCtx = (type == 0) ? &(cabac->ctx_cu_sig_model_luma[0]) : &(cabac->ctx_cu_sig_model_chroma[0]); memset(sig_coeffgroup_flag,0,sizeof(uint32_t)*64); // Count non-zero coeffs for (i = 0; i < width * width; i++) { if (coeff[i] != 0) { num_nonzero++; } } // Transforms with no non-zero coefficients are indicated with CBFs. assert(num_nonzero != 0); // transform skip flag if(width == 4 && encoder->trskip_enable) { cabac->ctx = (type == 0) ? &(cabac->ctx_transform_skip_model_luma) : &(cabac->ctx_transform_skip_model_chroma); CABAC_BIN(cabac, tr_skip, "transform_skip_flag"); } scan_pos_last = -1; // Significance mapping while (num_nonzero > 0) { pos_last = scan[++scan_pos_last]; #define POSY (pos_last >> log2_block_size) #define POSX (pos_last - ( POSY << log2_block_size )) if (coeff[pos_last] != 0) { sig_coeffgroup_flag[(num_blk_side * (POSY >> shift) + (POSX >> shift))] = 1; } num_nonzero -= (coeff[pos_last] != 0) ? 1 : 0; #undef POSY #undef POSX } last_coeff_x = pos_last & (width - 1); last_coeff_y = (uint8_t)(pos_last >> log2_block_size); // Code last_coeff_x and last_coeff_y encode_last_significant_xy(encoder_state, last_coeff_x, last_coeff_y, width, width, type, scan_mode); scan_pos_sig = scan_pos_last; last_scan_set = (scan_pos_last >> 4); // significant_coeff_flag for (i = last_scan_set; i >= 0; i--) { int32_t sub_pos = i << 4; // LOG2_SCAN_SET_SIZE; int32_t abs_coeff[16]; int32_t cg_blk_pos = scan_cg[i]; int32_t cg_pos_y = cg_blk_pos / num_blk_side; int32_t cg_pos_x = cg_blk_pos - (cg_pos_y * num_blk_side); uint32_t coeff_signs = 0; int32_t last_nz_pos_in_cg = -1; int32_t first_nz_pos_in_cg = 16; int32_t num_non_zero = 0; go_rice_param = 0; if (scan_pos_sig == scan_pos_last) { abs_coeff[0] = abs(coeff[pos_last]); coeff_signs = (coeff[pos_last] < 0); num_non_zero = 1; last_nz_pos_in_cg = scan_pos_sig; first_nz_pos_in_cg = scan_pos_sig; scan_pos_sig--; } if (i == last_scan_set || i == 0) { sig_coeffgroup_flag[cg_blk_pos] = 1; } else { uint32_t sig_coeff_group = (sig_coeffgroup_flag[cg_blk_pos] != 0); uint32_t ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); cabac->ctx = &base_coeff_group_ctx[ctx_sig]; CABAC_BIN(cabac, sig_coeff_group, "coded_sub_block_flag"); } if (sig_coeffgroup_flag[cg_blk_pos]) { int32_t pattern_sig_ctx = context_calc_pattern_sig_ctx(sig_coeffgroup_flag, cg_pos_x, cg_pos_y, width); for (; scan_pos_sig >= sub_pos; scan_pos_sig--) { blk_pos = scan[scan_pos_sig]; pos_y = blk_pos >> log2_block_size; pos_x = blk_pos - (pos_y << log2_block_size); sig = (coeff[blk_pos] != 0) ? 1 : 0; if (scan_pos_sig > sub_pos || i == 0 || num_non_zero) { ctx_sig = context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y, log2_block_size, type); cabac->ctx = &baseCtx[ctx_sig]; CABAC_BIN(cabac, sig, "sig_coeff_flag"); } if (sig) { abs_coeff[num_non_zero] = abs(coeff[blk_pos]); coeff_signs = 2 * coeff_signs + (coeff[blk_pos] < 0); num_non_zero++; if (last_nz_pos_in_cg == -1) { last_nz_pos_in_cg = scan_pos_sig; } first_nz_pos_in_cg = scan_pos_sig; } } } else { scan_pos_sig = sub_pos - 1; } if (num_non_zero > 0) { int8_t sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 /*SBH_THRESHOLD*/) ? 1 : 0; uint32_t ctx_set = (i > 0 && type == 0) ? 2 : 0; cabac_ctx *base_ctx_mod; int32_t num_c1_flag, first_c2_flag_idx, idx, first_coeff2; if (c1 == 0) { ctx_set++; } c1 = 1; base_ctx_mod = (type == 0) ? &(cabac->ctx_cu_one_model_luma[4 * ctx_set]) : &(cabac->ctx_cu_one_model_chroma[4 * ctx_set]); num_c1_flag = MIN(num_non_zero, C1FLAG_NUMBER); first_c2_flag_idx = -1; for (idx = 0; idx < num_c1_flag; idx++) { uint32_t symbol = (abs_coeff[idx] > 1) ? 1 : 0; cabac->ctx = &base_ctx_mod[c1]; CABAC_BIN(cabac, symbol, "coeff_abs_level_greater1_flag"); if (symbol) { c1 = 0; if (first_c2_flag_idx == -1) { first_c2_flag_idx = idx; } } else if ((c1 < 3) && (c1 > 0)) { c1++; } } if (c1 == 0) { base_ctx_mod = (type == 0) ? &(cabac->ctx_cu_abs_model_luma[ctx_set]) : &(cabac->ctx_cu_abs_model_chroma[ctx_set]); if (first_c2_flag_idx != -1) { uint8_t symbol = (abs_coeff[first_c2_flag_idx] > 2) ? 1 : 0; cabac->ctx = &base_ctx_mod[0]; CABAC_BIN(cabac, symbol, "coeff_abs_level_greater2_flag"); } } if (be_valid && sign_hidden) { CABAC_BINS_EP(cabac, (coeff_signs >> 1), (num_non_zero - 1), "coeff_sign_flag"); } else { CABAC_BINS_EP(cabac, coeff_signs, num_non_zero, "coeff_sign_flag"); } if (c1 == 0 || num_non_zero > C1FLAG_NUMBER) { first_coeff2 = 1; for (idx = 0; idx < num_non_zero; idx++) { int32_t base_level = (idx < C1FLAG_NUMBER) ? (2 + first_coeff2) : 1; if (abs_coeff[idx] >= base_level) { cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); if (abs_coeff[idx] > 3 * (1 << go_rice_param)) { go_rice_param = MIN(go_rice_param + 1, 4); } } if (abs_coeff[idx] >= 2) { first_coeff2 = 0; } } } } } } /*! \brief Encode (X,Y) position of the last significant coefficient \param lastpos_x X component of last coefficient \param lastpos_y Y component of last coefficient \param width Block width \param height Block height \param type plane type / luminance or chrominance \param scan scan type (diag, hor, ver) This method encodes the X and Y component within a block of the last significant coefficient. */ void encode_last_significant_xy(encoder_state * const encoder_state, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, uint8_t type, uint8_t scan) { cabac_data * const cabac = &encoder_state->cabac; uint8_t offset_x = type?0:((TOBITS(width)*3) + ((TOBITS(width)+1)>>2)),offset_y = offset_x; uint8_t shift_x = type?(TOBITS(width)):((TOBITS(width)+3)>>2), shift_y = shift_x; int group_idx_x; int group_idx_y; int last_x,last_y,i; cabac_ctx *base_ctx_x = (type ? cabac->ctx_cu_ctx_last_x_chroma : cabac->ctx_cu_ctx_last_x_luma); cabac_ctx *base_ctx_y = (type ? cabac->ctx_cu_ctx_last_y_chroma : cabac->ctx_cu_ctx_last_y_luma); if (scan == SCAN_VER) { SWAP( lastpos_x, lastpos_y,uint8_t ); } group_idx_x = g_group_idx[lastpos_x]; group_idx_y = g_group_idx[lastpos_y]; // Last X binarization for (last_x = 0; last_x < group_idx_x ; last_x++) { cabac->ctx = &base_ctx_x[offset_x + (last_x >> shift_x)]; CABAC_BIN(cabac,1,"last_sig_coeff_x_prefix"); } if (group_idx_x < g_group_idx[width - 1]) { cabac->ctx = &base_ctx_x[offset_x + (last_x >> shift_x)]; CABAC_BIN(cabac,0,"last_sig_coeff_x_prefix"); } // Last Y binarization for (last_y = 0; last_y < group_idx_y ; last_y++) { cabac->ctx = &base_ctx_y[offset_y + (last_y >> shift_y)]; CABAC_BIN(cabac,1,"last_sig_coeff_y_prefix"); } if (group_idx_y < g_group_idx[height - 1]) { cabac->ctx = &base_ctx_y[offset_y + (last_y >> shift_y)]; CABAC_BIN(cabac,0,"last_sig_coeff_y_prefix"); } // Last X if (group_idx_x > 3) { lastpos_x -= g_min_in_group[group_idx_x]; for (i = ((group_idx_x - 2) >> 1) - 1; i >= 0; i--) { CABAC_BIN_EP(cabac,(lastpos_x>>i) & 1,"last_sig_coeff_x_suffix"); } } // Last Y if (group_idx_y > 3) { lastpos_y -= g_min_in_group[group_idx_y]; for (i = ((group_idx_y - 2) >> 1) - 1; i >= 0; i--) { CABAC_BIN_EP(cabac,(lastpos_y>>i) & 1,"last_sig_coeff_y_suffix"); } } // end LastSignificantXY }