Copy generic to avx2

This commit is contained in:
Ari Lemmetti 2015-10-02 17:50:34 +03:00
parent cad2ea9d6e
commit b37cca87c8

View file

@ -30,6 +30,7 @@
#include "strategyselector.h"
#include "encoder.h"
#include "transform.h"
#include "rdo.h"
#if COMPILE_INTEL_AVX2
#include <immintrin.h>
@ -194,6 +195,119 @@ void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coe
}
}
/**
* \brief Quantize residual and get both the reconstruction and coeffs.
*
* \param width Transform width.
* \param color Color.
* \param scan_order Coefficient scan order.
* \param use_trskip Whether transform skip is used.
* \param stride Stride for ref_in, pred_in rec_out and coeff_out.
* \param ref_in Reference pixels.
* \param pred_in Predicted pixels.
* \param rec_out Reconstructed pixels.
* \param coeff_out Coefficients used for reconstruction of rec_out.
*
* \returns Whether coeff_out contains any non-zero coefficients.
*/
int kvz_quantize_residual_avx2(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const coeff_scan_order_t scan_order, const int use_trskip,
const int in_stride, const int out_stride,
const kvz_pixel *const ref_in, const kvz_pixel *const pred_in,
kvz_pixel *rec_out, coeff_t *coeff_out)
{
// Temporary arrays to pass data to and from kvz_quant and transform functions.
int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
coeff_t quant_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
int has_coeffs = 0;
assert(width <= TR_MAX_WIDTH);
assert(width >= TR_MIN_WIDTH);
// Get residual. (ref_in - pred_in -> residual)
{
int y, x;
for (y = 0; y < width; ++y) {
for (x = 0; x < width; ++x) {
residual[x + y * width] = (int16_t)(ref_in[x + y * in_stride] - pred_in[x + y * in_stride]);
}
}
}
// Transform residual. (residual -> coeff)
if (use_trskip) {
kvz_transformskip(state->encoder_control, residual, coeff, width);
}
else {
kvz_transform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535));
}
// Quantize coeffs. (coeff -> quant_coeff)
if (state->encoder_control->rdoq_enable) {
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
kvz_rdoq(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2),
scan_order, cur_cu->type, tr_depth);
}
else {
kvz_quant(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2),
scan_order, cur_cu->type);
}
// Check if there are any non-zero coefficients.
{
int i;
for (i = 0; i < width * width; ++i) {
if (quant_coeff[i] != 0) {
has_coeffs = 1;
break;
}
}
}
// Copy coefficients to coeff_out.
kvz_coefficients_blit(quant_coeff, coeff_out, width, width, width, out_stride);
// Do the inverse quantization and transformation and the reconstruction to
// rec_out.
if (has_coeffs) {
int y, x;
// Get quantized residual. (quant_coeff -> coeff -> residual)
kvz_dequant(state, quant_coeff, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type);
if (use_trskip) {
kvz_itransformskip(state->encoder_control, residual, coeff, width);
}
else {
kvz_itransform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535));
}
// Get quantized reconstruction. (residual + pred_in -> rec_out)
for (y = 0; y < width; ++y) {
for (x = 0; x < width; ++x) {
int16_t val = residual[x + y * width] + pred_in[x + y * in_stride];
rec_out[x + y * out_stride] = (kvz_pixel)CLIP(0, PIXEL_MAX, val);
}
}
}
else if (rec_out != pred_in) {
// With no coeffs and rec_out == pred_int we skip copying the coefficients
// because the reconstruction is just the prediction.
int y, x;
for (y = 0; y < width; ++y) {
for (x = 0; x < width; ++x) {
rec_out[x + y * out_stride] = pred_in[x + y * in_stride];
}
}
}
return has_coeffs;
}
void kvz_quant_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
{
@ -214,6 +328,7 @@ int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth)
#if COMPILE_INTEL_AVX2
success &= kvz_strategyselector_register(opaque, "quant", "avx2", 40, &kvz_quant_avx2);
success &= kvz_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &kvz_quantize_residual_avx2);
#endif //COMPILE_INTEL_AVX2
return success;