mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
e78a8dfcf5
The kvz_config struct is created by the user but kvazaar keeps a pointer to it. It is easy to break things by modifying the configuration outside kvazaar. In addition, kvazaar modifies the struct even though it is has a const modifier. This commit changes the field cfg in encoder_control_t to be a copy of the kvz_config struct instead of a pointer, removing modifications to the const struct and allowing users to do whatever they want with it after opening the encoder.
849 lines
30 KiB
C
849 lines
30 KiB
C
/*****************************************************************************
|
|
* This file is part of Kvazaar HEVC encoder.
|
|
*
|
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
|
* COPYING file).
|
|
*
|
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
|
* the terms of the GNU Lesser General Public License as published by the
|
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
|
* option) any later version.
|
|
*
|
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
|
****************************************************************************/
|
|
|
|
#include "sao.h"
|
|
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "cabac.h"
|
|
#include "image.h"
|
|
#include "rdo.h"
|
|
#include "strategies/strategies-sao.h"
|
|
|
|
|
|
static void init_sao_info(sao_info_t *sao) {
|
|
sao->type = SAO_TYPE_NONE;
|
|
sao->merge_left_flag = 0;
|
|
sao->merge_up_flag = 0;
|
|
}
|
|
|
|
|
|
static float sao_mode_bits_none(const encoder_state_t * const state, sao_info_t *sao_top, sao_info_t *sao_left)
|
|
{
|
|
float mode_bits = 0.0;
|
|
const cabac_data_t * const cabac = &state->cabac;
|
|
const cabac_ctx_t *ctx = NULL;
|
|
// FL coded merges.
|
|
if (sao_left != NULL) {
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
}
|
|
if (sao_top != NULL) {
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
}
|
|
|
|
// TR coded type_idx_, none = 0
|
|
ctx = &(cabac->ctx.sao_type_idx_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
|
|
return mode_bits;
|
|
}
|
|
|
|
static float sao_mode_bits_merge(const encoder_state_t * const state,
|
|
int8_t merge_cand) {
|
|
float mode_bits = 0.0;
|
|
const cabac_data_t * const cabac = &state->cabac;
|
|
const cabac_ctx_t *ctx = NULL;
|
|
// FL coded merges.
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 1);
|
|
if (merge_cand == 1) return mode_bits;
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, merge_cand == 2);
|
|
return mode_bits;
|
|
}
|
|
|
|
|
|
static float sao_mode_bits_edge(const encoder_state_t * const state,
|
|
int edge_class, int offsets[NUM_SAO_EDGE_CATEGORIES],
|
|
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
|
|
{
|
|
float mode_bits = 0.0;
|
|
const cabac_data_t * const cabac = &state->cabac;
|
|
const cabac_ctx_t *ctx = NULL;
|
|
// FL coded merges.
|
|
if (sao_left != NULL) {
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
}
|
|
if (sao_top != NULL) {
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
}
|
|
|
|
// TR coded type_idx_, edge = 2 = cMax
|
|
ctx = &(cabac->ctx.sao_type_idx_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
|
|
|
|
// TR coded offsets.
|
|
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++) {
|
|
sao_eo_cat edge_cat;
|
|
for (edge_cat = SAO_EO_CAT1; edge_cat <= SAO_EO_CAT4; ++edge_cat) {
|
|
int abs_offset = abs(offsets[edge_cat+5*buf_index]);
|
|
if (abs_offset == 0 || abs_offset == SAO_ABS_OFFSET_MAX) {
|
|
mode_bits += abs_offset + 1;
|
|
} else {
|
|
mode_bits += abs_offset + 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
mode_bits += 2.0;
|
|
|
|
return mode_bits;
|
|
}
|
|
|
|
|
|
static float sao_mode_bits_band(const encoder_state_t * const state,
|
|
int band_position[2], int offsets[10],
|
|
sao_info_t *sao_top, sao_info_t *sao_left, unsigned buf_cnt)
|
|
{
|
|
float mode_bits = 0.0;
|
|
const cabac_data_t * const cabac = &state->cabac;
|
|
const cabac_ctx_t *ctx = NULL;
|
|
// FL coded merges.
|
|
if (sao_left != NULL) {
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
}
|
|
if (sao_top != NULL) {
|
|
ctx = &(cabac->ctx.sao_merge_flag_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 0);
|
|
}
|
|
|
|
// TR coded sao_type_idx_, band = 1
|
|
ctx = &(cabac->ctx.sao_type_idx_model);
|
|
mode_bits += CTX_ENTROPY_FBITS(ctx, 1) + 1.0;
|
|
|
|
// TR coded offsets and possible FL coded offset signs.
|
|
for (unsigned buf_index = 0; buf_index < buf_cnt; buf_index++)
|
|
{
|
|
int i;
|
|
for (i = 0; i < 4; ++i) {
|
|
int abs_offset = abs(offsets[i + 1 + buf_index*5]);
|
|
if (abs_offset == 0) {
|
|
mode_bits += abs_offset + 1;
|
|
} else if(abs_offset == SAO_ABS_OFFSET_MAX) {
|
|
mode_bits += abs_offset + 1 + 1;
|
|
} else {
|
|
mode_bits += abs_offset + 2 + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// FL coded band position.
|
|
mode_bits += 5.0 * buf_cnt;
|
|
|
|
return mode_bits;
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief calculate an array of intensity correlations for each intensity value
|
|
*/
|
|
void kvz_calc_sao_offset_array(const encoder_control_t * const encoder, const sao_info_t *sao, int *offset, color_t color_i)
|
|
{
|
|
int val;
|
|
int values = (1<<encoder->bitdepth);
|
|
int shift = encoder->bitdepth-5;
|
|
int band_pos = (color_i == COLOR_V) ? 1 : 0;
|
|
|
|
// Loop through all intensity values and construct an offset array
|
|
for (val = 0; val < values; val++) {
|
|
int cur_band = val>>shift;
|
|
if (cur_band >= sao->band_position[band_pos] && cur_band < sao->band_position[band_pos] + 4) {
|
|
offset[val] = CLIP(0, values - 1, val + sao->offsets[cur_band - sao->band_position[band_pos] + 1 + 5 * band_pos]);
|
|
} else {
|
|
offset[val] = val;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* \param orig_data Original pixel data. 64x64 for luma, 32x32 for chroma.
|
|
* \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
|
|
* \param sao_bands an array of bands for original and reconstructed block
|
|
*/
|
|
static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4],
|
|
int *band_position)
|
|
{
|
|
int band;
|
|
int offset;
|
|
int best_dist;
|
|
int temp_dist;
|
|
int dist[32];
|
|
int temp_offsets[32];
|
|
int temp_rate[32];
|
|
int best_dist_pos = 0;
|
|
|
|
FILL(dist, 0);
|
|
FILL(temp_rate, 0);
|
|
|
|
// Calculate distortion for each band using N*h^2 - 2*h*E
|
|
for (band = 0; band < 32; band++) {
|
|
best_dist = INT_MAX;
|
|
offset = 0;
|
|
if (sao_bands[1][band] != 0) {
|
|
offset = (sao_bands[0][band] + (sao_bands[1][band] >> 1)) / sao_bands[1][band];
|
|
offset = CLIP(-SAO_ABS_OFFSET_MAX, SAO_ABS_OFFSET_MAX, offset);
|
|
}
|
|
dist[band] = offset==0?0:INT_MAX;
|
|
temp_offsets[band] = 0;
|
|
while(offset != 0) {
|
|
temp_dist = sao_bands[1][band]*offset*offset - 2*offset*sao_bands[0][band];
|
|
|
|
// Store best distortion and offset
|
|
if(temp_dist < best_dist) {
|
|
dist[band] = temp_dist;
|
|
temp_offsets[band] = offset;
|
|
}
|
|
offset += (offset > 0) ? -1:1;
|
|
}
|
|
}
|
|
|
|
best_dist = INT_MAX;
|
|
//Find starting pos for best 4 band distortions
|
|
for (band = 0; band < 28; band++) {
|
|
temp_dist = dist[band] + dist[band+1] + dist[band+2] + dist[band+3];
|
|
if(temp_dist < best_dist) {
|
|
best_dist = temp_dist;
|
|
best_dist_pos = band;
|
|
}
|
|
}
|
|
// Copy best offsets to output
|
|
memcpy(offsets, &temp_offsets[best_dist_pos], 4*sizeof(int));
|
|
|
|
*band_position = best_dist_pos;
|
|
|
|
return best_dist;
|
|
}
|
|
|
|
/**
|
|
* \param orig_data Original pixel data. 64x64 for luma, 32x32 for chroma.
|
|
* \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
|
|
* \param sao_bands an array of bands for original and reconstructed block
|
|
*/
|
|
static void calc_sao_bands(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data,
|
|
int block_width, int block_height,
|
|
int sao_bands[2][32])
|
|
{
|
|
int y, x;
|
|
int shift = state->encoder_control->bitdepth-5;
|
|
|
|
//Loop pixels and take top 5 bits to classify different bands
|
|
for (y = 0; y < block_height; ++y) {
|
|
for (x = 0; x < block_width; ++x) {
|
|
sao_bands[0][rec_data[y * block_width + x]>>shift] += orig_data[y * block_width + x] - rec_data[y * block_width + x];
|
|
sao_bands[1][rec_data[y * block_width + x]>>shift]++;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Calculate dimensions of the buffer used by sao reconstruction.
|
|
|
|
* \param pic Picture.
|
|
* \param sao Sao parameters.
|
|
* \param rec Top-left corner of the LCU
|
|
*/
|
|
static void sao_calc_band_block_dims(const videoframe_t *frame, color_t color_i,
|
|
vector2d_t *rec, vector2d_t *block)
|
|
{
|
|
const int is_chroma = (color_i != COLOR_Y ? 1 : 0);
|
|
int width = frame->width >> is_chroma;
|
|
int height = frame->height >> is_chroma;
|
|
int block_width = LCU_WIDTH >> is_chroma;
|
|
|
|
|
|
// Handle right and bottom, taking care of non-LCU sized CUs.
|
|
if (rec->y + block_width >= height) {
|
|
if (rec->y + block_width >= height) {
|
|
block->y = height - rec->y;
|
|
}
|
|
}
|
|
if (rec->x + block_width >= width) {
|
|
if (rec->x + block_width > width) {
|
|
block->x = width - rec->x;
|
|
}
|
|
}
|
|
|
|
rec->x = 0; rec->y = 0;
|
|
}
|
|
|
|
/**
|
|
* \brief Calculate dimensions of the buffer used by sao reconstruction.
|
|
*
|
|
* This function calculates 4 vectors that can be used to make the temporary
|
|
* buffers required by sao_reconstruct_color.
|
|
*
|
|
* Vector block is the area affected by sao. Vectors tr and br are top-left
|
|
* margin and bottom-right margin, which contain pixels that are not modified
|
|
* by the reconstruction of this LCU but are needed by the reconstruction.
|
|
* Vector rec is the offset from the CU to the required pixel area.
|
|
*
|
|
* The margins are always either 0 or 1, depending on the direction of the
|
|
* edge offset class.
|
|
*
|
|
* This also takes into account borders of the picture and non-LCU sized
|
|
* CU's at the bottom and right of the picture.
|
|
*
|
|
* \ CU + rec
|
|
* +------+
|
|
* |\ tl |
|
|
* | +--+ |
|
|
* | |\ block
|
|
* | | \| |
|
|
* | +--+ |
|
|
* | \ br
|
|
* +------+
|
|
*
|
|
* \param pic Picture.
|
|
* \param sao Sao parameters.
|
|
* \param rec Top-left corner of the LCU, modified to be top-left corner of
|
|
*/
|
|
static void sao_calc_edge_block_dims(const videoframe_t * const frame, color_t color_i,
|
|
const sao_info_t *sao, vector2d_t *rec,
|
|
vector2d_t *tl, vector2d_t *br,
|
|
vector2d_t *block)
|
|
{
|
|
vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0];
|
|
vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1];
|
|
const int is_chroma = (color_i != COLOR_Y ? 1 : 0);
|
|
int width = frame->width >> is_chroma;
|
|
int height = frame->height >> is_chroma;
|
|
int block_width = LCU_WIDTH >> is_chroma;
|
|
|
|
// Handle top and left.
|
|
if (rec->y == 0) {
|
|
tl->y = 0;
|
|
if (a_ofs.y == -1 || b_ofs.y == -1) {
|
|
block->y -= 1;
|
|
tl->y += 1;
|
|
}
|
|
}
|
|
if (rec->x == 0) {
|
|
tl->x = 0;
|
|
if (a_ofs.x == -1 || b_ofs.x == -1) {
|
|
block->x -= 1;
|
|
tl->x += 1;
|
|
}
|
|
}
|
|
|
|
// Handle right and bottom, taking care of non-LCU sized CUs.
|
|
if (rec->y + block_width >= height) {
|
|
br->y = 0;
|
|
block->y -= block_width + rec->y - height;
|
|
if (a_ofs.y == 1 || b_ofs.y == 1) {
|
|
block->y -= 1;
|
|
br->y += 1;
|
|
}
|
|
}
|
|
if (rec->x + block_width >= width) {
|
|
br->x = 0;
|
|
block->x -= block_width + rec->x - width;
|
|
if (a_ofs.x == 1 || b_ofs.x == 1) {
|
|
block->x -= 1;
|
|
br->x += 1;
|
|
}
|
|
}
|
|
|
|
rec->y = (rec->y == 0 ? 0 : -1);
|
|
rec->x = (rec->x == 0 ? 0 : -1);
|
|
}
|
|
|
|
void kvz_sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const kvz_pixel *old_rec,
|
|
unsigned x_ctb, unsigned y_ctb,
|
|
const sao_info_t *sao, color_t color_i)
|
|
{
|
|
const int is_chroma = (color_i != COLOR_Y ? 1 : 0);
|
|
const int pic_stride = frame->width >> is_chroma;
|
|
const int lcu_stride = LCU_WIDTH >> is_chroma;
|
|
const int buf_stride = lcu_stride + 2;
|
|
|
|
kvz_pixel *recdata = frame->rec->data[color_i];
|
|
kvz_pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)];
|
|
kvz_pixel new_rec[LCU_WIDTH * LCU_WIDTH];
|
|
// Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32.
|
|
kvz_pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)];
|
|
const kvz_pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)];
|
|
|
|
vector2d_t ofs;
|
|
vector2d_t tl = { 1, 1 };
|
|
vector2d_t br = { 1, 1 };
|
|
vector2d_t block;
|
|
|
|
if (sao->type == SAO_TYPE_NONE) {
|
|
return;
|
|
}
|
|
|
|
ofs.x = x_ctb * lcu_stride;
|
|
ofs.y = y_ctb * lcu_stride;
|
|
block.x = lcu_stride;
|
|
block.y = lcu_stride;
|
|
if (sao->type == SAO_TYPE_BAND) {
|
|
tl.x = 0; tl.y = 0;
|
|
br.x = 0; br.y = 0;
|
|
sao_calc_band_block_dims(frame, color_i, &ofs, &block);
|
|
}
|
|
else {
|
|
sao_calc_edge_block_dims(frame, color_i, sao, &ofs, &tl, &br, &block);
|
|
}
|
|
|
|
assert(ofs.x + tl.x + block.x + br.x <= frame->width);
|
|
assert(ofs.y + tl.y + block.y + br.y <= frame->height);
|
|
|
|
CHECKPOINT("ofs.x=%d ofs.y=%d tl.x=%d tl.y=%d block.x=%d block.y=%d br.x=%d br.y=%d",
|
|
ofs.x, ofs.y, tl.x, tl.y, block.x, block.y, br.x, br.y);
|
|
|
|
// Data to tmp buffer.
|
|
kvz_pixels_blit(&old_lcu_rec[ofs.y * pic_stride + ofs.x],
|
|
buf_rec,
|
|
tl.x + block.x + br.x,
|
|
tl.y + block.y + br.y,
|
|
pic_stride, buf_stride);
|
|
|
|
kvz_sao_reconstruct_color(encoder, &buf_rec[tl.y * buf_stride + tl.x],
|
|
&new_rec[(ofs.y + tl.y) * lcu_stride + ofs.x + tl.x],
|
|
sao,
|
|
buf_stride, lcu_stride,
|
|
block.x, block.y, color_i);
|
|
|
|
// Copy reconstructed block from tmp buffer to rec image.
|
|
kvz_pixels_blit(&new_rec[(tl.y + ofs.y) * lcu_stride + (tl.x + ofs.x)],
|
|
&lcu_rec[(tl.y + ofs.y) * (frame->rec->stride >> is_chroma) + (tl.x + ofs.x)],
|
|
block.x, block.y, lcu_stride, frame->rec->stride >> is_chroma);
|
|
}
|
|
|
|
|
|
|
|
static void sao_search_edge_sao(const encoder_state_t * const state,
|
|
const kvz_pixel * data[], const kvz_pixel * recdata[],
|
|
int block_width, int block_height,
|
|
unsigned buf_cnt,
|
|
sao_info_t *sao_out, sao_info_t *sao_top,
|
|
sao_info_t *sao_left)
|
|
{
|
|
sao_eo_class edge_class;
|
|
// This array is used to calculate the mean offset used to minimize distortion.
|
|
int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES];
|
|
unsigned i = 0;
|
|
|
|
|
|
sao_out->type = SAO_TYPE_EDGE;
|
|
sao_out->ddistortion = INT_MAX;
|
|
|
|
for (edge_class = SAO_EO0; edge_class <= SAO_EO3; ++edge_class) {
|
|
int edge_offset[NUM_SAO_EDGE_CATEGORIES*2];
|
|
int sum_ddistortion = 0;
|
|
sao_eo_cat edge_cat;
|
|
|
|
// Call calc_sao_edge_dir once for luma and twice for chroma.
|
|
for (i = 0; i < buf_cnt; ++i) {
|
|
FILL(cat_sum_cnt, 0);
|
|
kvz_calc_sao_edge_dir(data[i], recdata[i], edge_class,
|
|
block_width, block_height, cat_sum_cnt);
|
|
|
|
|
|
for (edge_cat = SAO_EO_CAT1; edge_cat <= SAO_EO_CAT4; ++edge_cat) {
|
|
int cat_sum = cat_sum_cnt[0][edge_cat];
|
|
int cat_cnt = cat_sum_cnt[1][edge_cat];
|
|
|
|
// The optimum offset can be calculated by getting the minima of the
|
|
// fast ddistortion estimation formula. The minima is the mean error
|
|
// and we round that to the nearest integer.
|
|
int offset = 0;
|
|
if (cat_cnt != 0) {
|
|
offset = (cat_sum + (cat_cnt >> 1)) / cat_cnt;
|
|
offset = CLIP(-SAO_ABS_OFFSET_MAX, SAO_ABS_OFFSET_MAX, offset);
|
|
}
|
|
|
|
// Sharpening edge offsets can't be encoded, so set them to 0 here.
|
|
if (edge_cat >= SAO_EO_CAT1 && edge_cat <= SAO_EO_CAT2 && offset < 0) {
|
|
offset = 0;
|
|
}
|
|
if (edge_cat >= SAO_EO_CAT3 && edge_cat <= SAO_EO_CAT4 && offset > 0) {
|
|
offset = 0;
|
|
}
|
|
|
|
edge_offset[edge_cat+5*i] = offset;
|
|
// The ddistortion is amount by which the SSE of data changes. It should
|
|
// be negative for all categories, if offset was chosen correctly.
|
|
// ddistortion = N * h^2 - 2 * h * E, where N is the number of samples
|
|
// and E is the sum of errors.
|
|
// It basically says that all pixels that are not improved by offset
|
|
// increase increase SSE by h^2 and all pixels that are improved by
|
|
// offset decrease SSE by h*E.
|
|
sum_ddistortion += cat_cnt * offset * offset - 2 * offset * cat_sum;
|
|
}
|
|
}
|
|
|
|
{
|
|
float mode_bits = sao_mode_bits_edge(state, edge_class, edge_offset, sao_top, sao_left, buf_cnt);
|
|
sum_ddistortion += (int)((double)mode_bits*state->lambda +0.5);
|
|
}
|
|
// SAO is not applied for category 0.
|
|
edge_offset[SAO_EO_CAT0] = 0;
|
|
edge_offset[SAO_EO_CAT0 + 5] = 0;
|
|
|
|
// Choose the offset class that offers the least error after offset.
|
|
if (sum_ddistortion < sao_out->ddistortion) {
|
|
sao_out->eo_class = edge_class;
|
|
sao_out->ddistortion = sum_ddistortion;
|
|
memcpy(sao_out->offsets, edge_offset, sizeof(int) * NUM_SAO_EDGE_CATEGORIES * 2);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void sao_search_band_sao(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[],
|
|
int block_width, int block_height,
|
|
unsigned buf_cnt,
|
|
sao_info_t *sao_out, sao_info_t *sao_top,
|
|
sao_info_t *sao_left)
|
|
{
|
|
unsigned i;
|
|
|
|
sao_out->type = SAO_TYPE_BAND;
|
|
sao_out->ddistortion = MAX_INT;
|
|
|
|
// Band offset
|
|
{
|
|
int sao_bands[2][32];
|
|
int temp_offsets[10];
|
|
int ddistortion = 0;
|
|
float temp_rate = 0.0;
|
|
|
|
for (i = 0; i < buf_cnt; ++i) {
|
|
FILL(sao_bands, 0);
|
|
calc_sao_bands(state, data[i], recdata[i],block_width,
|
|
block_height,sao_bands);
|
|
|
|
|
|
ddistortion += calc_sao_band_offsets(sao_bands, &temp_offsets[1+5*i], &sao_out->band_position[i]);
|
|
}
|
|
|
|
temp_rate = sao_mode_bits_band(state, sao_out->band_position, temp_offsets, sao_top, sao_left, buf_cnt);
|
|
ddistortion += (int)((double)temp_rate*state->lambda + 0.5);
|
|
|
|
// Select band sao over edge sao when distortion is lower
|
|
if (ddistortion < sao_out->ddistortion) {
|
|
sao_out->type = SAO_TYPE_BAND;
|
|
sao_out->ddistortion = ddistortion;
|
|
memcpy(&sao_out->offsets[0], &temp_offsets[0], sizeof(int) * buf_cnt * 5);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* \param data Array of pointers to reference pixels.
|
|
* \param recdata Array of pointers to reconstructed pixels.
|
|
* \param block_width Width of the area to be examined.
|
|
* \param block_height Height of the area to be examined.
|
|
* \param buf_cnt Number of pointers data and recdata have.
|
|
* \param sao_out Output parameter for the best sao parameters.
|
|
*/
|
|
static void sao_search_best_mode(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[],
|
|
int block_width, int block_height,
|
|
unsigned buf_cnt,
|
|
sao_info_t *sao_out, sao_info_t *sao_top,
|
|
sao_info_t *sao_left, int32_t merge_cost[3])
|
|
{
|
|
sao_info_t edge_sao;
|
|
sao_info_t band_sao;
|
|
|
|
init_sao_info(&edge_sao);
|
|
init_sao_info(&band_sao);
|
|
|
|
//Avoid "random" uninitialized value
|
|
edge_sao.band_position[0] = edge_sao.band_position[1] = 0;
|
|
edge_sao.eo_class = SAO_EO0;
|
|
band_sao.offsets[0] = 0;
|
|
band_sao.offsets[5] = 0;
|
|
band_sao.eo_class = SAO_EO0;
|
|
|
|
sao_search_edge_sao(state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
|
|
sao_search_band_sao(state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);
|
|
|
|
{
|
|
float mode_bits = sao_mode_bits_edge(state, edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left, buf_cnt);
|
|
int ddistortion = (int)(mode_bits * state->lambda + 0.5);
|
|
unsigned buf_i;
|
|
|
|
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
|
ddistortion += kvz_sao_edge_ddistortion(data[buf_i], recdata[buf_i],
|
|
block_width, block_height,
|
|
edge_sao.eo_class, &edge_sao.offsets[5 * buf_i]);
|
|
}
|
|
|
|
edge_sao.ddistortion = ddistortion;
|
|
}
|
|
|
|
{
|
|
float mode_bits = sao_mode_bits_band(state, band_sao.band_position, band_sao.offsets, sao_top, sao_left, buf_cnt);
|
|
int ddistortion = (int)(mode_bits * state->lambda + 0.5);
|
|
unsigned buf_i;
|
|
|
|
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
|
ddistortion += kvz_sao_band_ddistortion(state, data[buf_i], recdata[buf_i],
|
|
block_width, block_height,
|
|
band_sao.band_position[buf_i], &band_sao.offsets[1 + 5 * buf_i]);
|
|
}
|
|
|
|
band_sao.ddistortion = ddistortion;
|
|
}
|
|
|
|
if (edge_sao.ddistortion <= band_sao.ddistortion) {
|
|
*sao_out = edge_sao;
|
|
merge_cost[0] = edge_sao.ddistortion;
|
|
} else {
|
|
*sao_out = band_sao;
|
|
merge_cost[0] = band_sao.ddistortion;
|
|
}
|
|
|
|
// Choose between SAO and doing nothing, taking into account the
|
|
// rate-distortion cost of coding do nothing.
|
|
{
|
|
int cost_of_nothing = (int)(sao_mode_bits_none(state, sao_top, sao_left) * state->lambda + 0.5);
|
|
if (sao_out->ddistortion >= cost_of_nothing) {
|
|
sao_out->type = SAO_TYPE_NONE;
|
|
merge_cost[0] = cost_of_nothing;
|
|
}
|
|
}
|
|
|
|
// Calculate merge costs
|
|
if (sao_top || sao_left) {
|
|
sao_info_t* merge_sao[2] = { sao_left, sao_top};
|
|
int i;
|
|
for (i = 0; i < 2; i++) {
|
|
sao_info_t* merge_cand = merge_sao[i];
|
|
|
|
if (merge_cand) {
|
|
unsigned buf_i;
|
|
float mode_bits = sao_mode_bits_merge(state, i + 1);
|
|
int ddistortion = (int)(mode_bits * state->lambda + 0.5);
|
|
|
|
switch (merge_cand->type) {
|
|
case SAO_TYPE_EDGE:
|
|
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
|
ddistortion += kvz_sao_edge_ddistortion(data[buf_i], recdata[buf_i],
|
|
block_width, block_height,
|
|
merge_cand->eo_class, &merge_cand->offsets[5 * buf_i]);
|
|
}
|
|
merge_cost[i + 1] = ddistortion;
|
|
break;
|
|
case SAO_TYPE_BAND:
|
|
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
|
ddistortion += kvz_sao_band_ddistortion(state, data[buf_i], recdata[buf_i],
|
|
block_width, block_height,
|
|
merge_cand->band_position[buf_i], &merge_cand->offsets[1 + 5 * buf_i]);
|
|
}
|
|
merge_cost[i + 1] = ddistortion;
|
|
break;
|
|
case SAO_TYPE_NONE:
|
|
merge_cost[i + 1] = ddistortion;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
static void sao_search_chroma(const encoder_state_t * const state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3])
|
|
{
|
|
int block_width = (LCU_WIDTH / 2);
|
|
int block_height = (LCU_WIDTH / 2);
|
|
const kvz_pixel *orig_list[2];
|
|
const kvz_pixel *rec_list[2];
|
|
kvz_pixel orig[2][LCU_CHROMA_SIZE];
|
|
kvz_pixel rec[2][LCU_CHROMA_SIZE];
|
|
color_t color_i;
|
|
|
|
// Check for right and bottom boundaries.
|
|
if (x_ctb * (LCU_WIDTH / 2) + (LCU_WIDTH / 2) >= (unsigned)frame->width / 2) {
|
|
block_width = (frame->width - x_ctb * LCU_WIDTH) / 2;
|
|
}
|
|
if (y_ctb * (LCU_WIDTH / 2) + (LCU_WIDTH / 2) >= (unsigned)frame->height / 2) {
|
|
block_height = (frame->height - y_ctb * LCU_WIDTH) / 2;
|
|
}
|
|
|
|
sao->type = SAO_TYPE_EDGE;
|
|
|
|
// Copy data to temporary buffers and init orig and rec lists to point to those buffers.
|
|
for (color_i = COLOR_U; color_i <= COLOR_V; ++color_i) {
|
|
kvz_pixel *data = &frame->source->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->source->stride / 2)];
|
|
kvz_pixel *recdata = &frame->rec->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->rec->stride / 2)];
|
|
kvz_pixels_blit(data, orig[color_i - 1], block_width, block_height,
|
|
frame->source->stride / 2, block_width);
|
|
kvz_pixels_blit(recdata, rec[color_i - 1], block_width, block_height,
|
|
frame->rec->stride / 2, block_width);
|
|
orig_list[color_i - 1] = &orig[color_i - 1][0];
|
|
rec_list[color_i - 1] = &rec[color_i - 1][0];
|
|
}
|
|
|
|
// Calculate
|
|
sao_search_best_mode(state, orig_list, rec_list, block_width, block_height, 2, sao, sao_top, sao_left, merge_cost);
|
|
}
|
|
|
|
static void sao_search_luma(const encoder_state_t * const state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3])
|
|
{
|
|
kvz_pixel orig[LCU_LUMA_SIZE];
|
|
kvz_pixel rec[LCU_LUMA_SIZE];
|
|
const kvz_pixel * orig_list[1] = { NULL };
|
|
const kvz_pixel * rec_list[1] = { NULL };
|
|
kvz_pixel *data = &frame->source->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->source->stride)];
|
|
kvz_pixel *recdata = &frame->rec->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->rec->stride)];
|
|
int block_width = LCU_WIDTH;
|
|
int block_height = LCU_WIDTH;
|
|
|
|
// Check for right and bottom boundaries.
|
|
if (x_ctb * LCU_WIDTH + LCU_WIDTH >= (unsigned)frame->width) {
|
|
block_width = frame->width - x_ctb * LCU_WIDTH;
|
|
}
|
|
if (y_ctb * LCU_WIDTH + LCU_WIDTH >= (unsigned)frame->height) {
|
|
block_height = frame->height - y_ctb * LCU_WIDTH;
|
|
}
|
|
|
|
sao->type = SAO_TYPE_EDGE;
|
|
|
|
// Fill temporary buffers with picture data.
|
|
kvz_pixels_blit(data, orig, block_width, block_height, frame->source->stride, block_width);
|
|
kvz_pixels_blit(recdata, rec, block_width, block_height, frame->rec->stride, block_width);
|
|
|
|
orig_list[0] = orig;
|
|
rec_list[0] = rec;
|
|
sao_search_best_mode(state, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left, merge_cost);
|
|
}
|
|
|
|
void kvz_sao_search_lcu(const encoder_state_t* const state, int lcu_x, int lcu_y)
|
|
{
|
|
assert(!state->encoder_control->cfg.lossless);
|
|
|
|
videoframe_t* const frame = state->tile->frame;
|
|
const int stride = frame->width_in_lcu;
|
|
int32_t merge_cost_luma[3] = { INT32_MAX };
|
|
int32_t merge_cost_chroma[3] = { INT32_MAX };
|
|
sao_info_t *sao_luma = &frame->sao_luma[lcu_y * stride + lcu_x];
|
|
sao_info_t *sao_chroma = NULL;
|
|
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
|
sao_chroma = &frame->sao_chroma[lcu_y * stride + lcu_x];
|
|
}
|
|
|
|
// Merge candidates
|
|
sao_info_t *sao_top_luma = lcu_y != 0 ? &frame->sao_luma [(lcu_y - 1) * stride + lcu_x] : NULL;
|
|
sao_info_t *sao_left_luma = lcu_x != 0 ? &frame->sao_luma [lcu_y * stride + lcu_x - 1] : NULL;
|
|
sao_info_t *sao_top_chroma = NULL;
|
|
sao_info_t *sao_left_chroma = NULL;
|
|
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
|
if (lcu_y != 0) sao_top_chroma = &frame->sao_chroma[(lcu_y - 1) * stride + lcu_x];
|
|
if (lcu_x != 0) sao_left_chroma = &frame->sao_chroma[lcu_y * stride + lcu_x - 1];
|
|
}
|
|
|
|
sao_search_luma(state, frame, lcu_x, lcu_y, sao_luma, sao_top_luma, sao_left_luma, merge_cost_luma);
|
|
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
|
sao_search_chroma(state, frame, lcu_x, lcu_y, sao_chroma, sao_top_chroma, sao_left_chroma, merge_cost_chroma);
|
|
} else {
|
|
merge_cost_chroma[0] = 0;
|
|
merge_cost_chroma[1] = 0;
|
|
merge_cost_chroma[2] = 0;
|
|
}
|
|
|
|
sao_luma->merge_up_flag = sao_luma->merge_left_flag = 0;
|
|
// Check merge costs
|
|
if (sao_top_luma) {
|
|
// Merge up if cost is equal or smaller to the searched mode cost
|
|
if (merge_cost_luma[2] + merge_cost_chroma[2] <= merge_cost_luma[0] + merge_cost_chroma[0]) {
|
|
*sao_luma = *sao_top_luma;
|
|
if (sao_top_chroma) *sao_chroma = *sao_top_chroma;
|
|
sao_luma->merge_up_flag = 1;
|
|
sao_luma->merge_left_flag = 0;
|
|
}
|
|
}
|
|
if (sao_left_luma) {
|
|
// Merge left if cost is equal or smaller to the searched mode cost
|
|
// AND smaller than merge up cost, if merge up was already chosen
|
|
if (merge_cost_luma[1] + merge_cost_chroma[1] <= merge_cost_luma[0] + merge_cost_chroma[0]) {
|
|
if (!sao_luma->merge_up_flag || merge_cost_luma[1] + merge_cost_chroma[1] < merge_cost_luma[2] + merge_cost_chroma[2]) {
|
|
*sao_luma = *sao_left_luma;
|
|
if (sao_left_chroma) *sao_chroma = *sao_left_chroma;
|
|
sao_luma->merge_left_flag = 1;
|
|
sao_luma->merge_up_flag = 0;
|
|
}
|
|
}
|
|
}
|
|
assert(sao_luma->eo_class < SAO_NUM_EO);
|
|
CHECKPOINT_SAO_INFO("sao_luma", *sao_luma);
|
|
|
|
if (sao_chroma) {
|
|
assert(sao_chroma->eo_class < SAO_NUM_EO);
|
|
CHECKPOINT_SAO_INFO("sao_chroma", *sao_chroma);
|
|
}
|
|
}
|
|
|
|
void kvz_sao_reconstruct_frame(encoder_state_t * const state)
|
|
{
|
|
vector2d_t lcu;
|
|
videoframe_t * const frame = state->tile->frame;
|
|
|
|
// These are needed because SAO needs the pre-SAO pixels form left and
|
|
// top LCUs. Single pixel wide buffers, like what kvz_search_lcu takes, would
|
|
// be enough though.
|
|
kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->rec->width * frame->rec->height);
|
|
kvz_pixels_blit(frame->rec->y, new_y_data, frame->rec->width, frame->rec->height, frame->rec->stride, frame->rec->width);
|
|
for (lcu.y = 0; lcu.y < frame->height_in_lcu; lcu.y++) {
|
|
for (lcu.x = 0; lcu.x < frame->width_in_lcu; lcu.x++) {
|
|
unsigned stride = frame->width_in_lcu;
|
|
sao_info_t *sao_luma = &frame->sao_luma[lcu.y * stride + lcu.x];
|
|
|
|
// sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma);
|
|
kvz_sao_reconstruct(state->encoder_control, frame, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
|
|
}
|
|
}
|
|
free(new_y_data);
|
|
|
|
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
|
|
kvz_pixel *new_u_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2);
|
|
kvz_pixel *new_v_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2);
|
|
|
|
kvz_pixels_blit(frame->rec->u, new_u_data, frame->rec->width / 2, frame->rec->height / 2, frame->rec->stride / 2, frame->rec->width / 2);
|
|
kvz_pixels_blit(frame->rec->v, new_v_data, frame->rec->width / 2, frame->rec->height / 2, frame->rec->stride / 2, frame->rec->width / 2);
|
|
|
|
for (lcu.y = 0; lcu.y < frame->height_in_lcu; lcu.y++) {
|
|
for (lcu.x = 0; lcu.x < frame->width_in_lcu; lcu.x++) {
|
|
unsigned stride = frame->width_in_lcu;
|
|
sao_info_t *sao_chroma = &frame->sao_chroma[lcu.y * stride + lcu.x];
|
|
|
|
kvz_sao_reconstruct(state->encoder_control, frame, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
|
|
kvz_sao_reconstruct(state->encoder_control, frame, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
|
|
}
|
|
}
|
|
|
|
free(new_u_data);
|
|
free(new_v_data);
|
|
}
|
|
}
|