mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 12:44:07 +00:00
Rename pixel_t to kvz_pixel.
This commit is contained in:
parent
cecea44d37
commit
f7f17a060c
6
src/cu.h
6
src/cu.h
|
@ -123,9 +123,9 @@ int cu_array_free(cu_array_t *cua);
|
||||||
* - First pixel is the top-left pixel.
|
* - First pixel is the top-left pixel.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
pixel_t y[LCU_REF_PX_WIDTH + 1];
|
kvz_pixel y[LCU_REF_PX_WIDTH + 1];
|
||||||
pixel_t u[LCU_REF_PX_WIDTH / 2 + 1];
|
kvz_pixel u[LCU_REF_PX_WIDTH / 2 + 1];
|
||||||
pixel_t v[LCU_REF_PX_WIDTH / 2 + 1];
|
kvz_pixel v[LCU_REF_PX_WIDTH / 2 + 1];
|
||||||
} lcu_ref_px_t;
|
} lcu_ref_px_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
@ -441,9 +441,9 @@ static void encoder_state_worker_sao_reconstruct_lcu(void *opaque) {
|
||||||
int x;
|
int x;
|
||||||
|
|
||||||
//TODO: copy only needed data
|
//TODO: copy only needed data
|
||||||
pixel_t *new_y_data = MALLOC(pixel_t, frame->width * frame->height);
|
kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->width * frame->height);
|
||||||
pixel_t *new_u_data = MALLOC(pixel_t, (frame->width * frame->height) >> 2);
|
kvz_pixel *new_u_data = MALLOC(kvz_pixel, (frame->width * frame->height) >> 2);
|
||||||
pixel_t *new_v_data = MALLOC(pixel_t, (frame->width * frame->height) >> 2);
|
kvz_pixel *new_v_data = MALLOC(kvz_pixel, (frame->width * frame->height) >> 2);
|
||||||
|
|
||||||
const int offset = frame->width * (data->y*LCU_WIDTH);
|
const int offset = frame->width * (data->y*LCU_WIDTH);
|
||||||
const int offset_c = frame->width/2 * (data->y*LCU_WIDTH_C);
|
const int offset_c = frame->width/2 * (data->y*LCU_WIDTH_C);
|
||||||
|
@ -453,15 +453,15 @@ static void encoder_state_worker_sao_reconstruct_lcu(void *opaque) {
|
||||||
num_pixels = frame->width * frame->height - offset;
|
num_pixels = frame->width * frame->height - offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(&new_y_data[offset], &frame->rec->y[offset], sizeof(pixel_t) * num_pixels);
|
memcpy(&new_y_data[offset], &frame->rec->y[offset], sizeof(kvz_pixel) * num_pixels);
|
||||||
memcpy(&new_u_data[offset_c], &frame->rec->u[offset_c], sizeof(pixel_t) * num_pixels >> 2);
|
memcpy(&new_u_data[offset_c], &frame->rec->u[offset_c], sizeof(kvz_pixel) * num_pixels >> 2);
|
||||||
memcpy(&new_v_data[offset_c], &frame->rec->v[offset_c], sizeof(pixel_t) * num_pixels >> 2);
|
memcpy(&new_v_data[offset_c], &frame->rec->v[offset_c], sizeof(kvz_pixel) * num_pixels >> 2);
|
||||||
|
|
||||||
if (data->y>0) {
|
if (data->y>0) {
|
||||||
//copy first row from buffer
|
//copy first row from buffer
|
||||||
memcpy(&new_y_data[frame->width * (data->y*LCU_WIDTH-1)], &data->encoder_state->tile->hor_buf_before_sao->y[frame->width * (data->y-1)], frame->width * sizeof(pixel_t));
|
memcpy(&new_y_data[frame->width * (data->y*LCU_WIDTH-1)], &data->encoder_state->tile->hor_buf_before_sao->y[frame->width * (data->y-1)], frame->width * sizeof(kvz_pixel));
|
||||||
memcpy(&new_u_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->u[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(pixel_t));
|
memcpy(&new_u_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->u[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(kvz_pixel));
|
||||||
memcpy(&new_v_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->v[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(pixel_t));
|
memcpy(&new_v_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->v[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(kvz_pixel));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (x = 0; x < frame->width_in_lcu; x++) {
|
for (x = 0; x < frame->width_in_lcu; x++) {
|
||||||
|
|
12
src/filter.c
12
src/filter.c
|
@ -83,7 +83,7 @@ const int8_t g_chroma_filter[8][4] =
|
||||||
/**
|
/**
|
||||||
* \brief
|
* \brief
|
||||||
*/
|
*/
|
||||||
INLINE void filter_deblock_luma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset,
|
INLINE void filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset,
|
||||||
int32_t tc, int8_t sw,
|
int32_t tc, int8_t sw,
|
||||||
int8_t part_P_nofilter, int8_t part_Q_nofilter,
|
int8_t part_P_nofilter, int8_t part_Q_nofilter,
|
||||||
int32_t thr_cut,
|
int32_t thr_cut,
|
||||||
|
@ -143,7 +143,7 @@ INLINE void filter_deblock_luma(const encoder_control_t * const encoder, pixel_t
|
||||||
/**
|
/**
|
||||||
* \brief
|
* \brief
|
||||||
*/
|
*/
|
||||||
INLINE void filter_deblock_chroma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, int32_t tc,
|
INLINE void filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc,
|
||||||
int8_t part_P_nofilter, int8_t part_Q_nofilter)
|
int8_t part_P_nofilter, int8_t part_Q_nofilter)
|
||||||
{
|
{
|
||||||
int32_t delta;
|
int32_t delta;
|
||||||
|
@ -188,8 +188,8 @@ void filter_deblock_edge_luma(encoder_state_t * const state,
|
||||||
int32_t beta_offset_div2 = encoder->beta_offset_div2;
|
int32_t beta_offset_div2 = encoder->beta_offset_div2;
|
||||||
int32_t tc_offset_div2 = encoder->tc_offset_div2;
|
int32_t tc_offset_div2 = encoder->tc_offset_div2;
|
||||||
// TODO: support 10+bits
|
// TODO: support 10+bits
|
||||||
pixel_t *orig_src = &frame->rec->y[xpos + ypos*stride];
|
kvz_pixel *orig_src = &frame->rec->y[xpos + ypos*stride];
|
||||||
pixel_t *src = orig_src;
|
kvz_pixel *src = orig_src;
|
||||||
int32_t step = 1;
|
int32_t step = 1;
|
||||||
cu_info_t *cu_p = NULL;
|
cu_info_t *cu_p = NULL;
|
||||||
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
||||||
|
@ -381,8 +381,8 @@ void filter_deblock_edge_chroma(encoder_state_t * const state,
|
||||||
int32_t stride = frame->rec->stride >> 1;
|
int32_t stride = frame->rec->stride >> 1;
|
||||||
int32_t tc_offset_div2 = encoder->tc_offset_div2;
|
int32_t tc_offset_div2 = encoder->tc_offset_div2;
|
||||||
// TODO: support 10+bits
|
// TODO: support 10+bits
|
||||||
pixel_t *src_u = &frame->rec->u[x + y*stride];
|
kvz_pixel *src_u = &frame->rec->u[x + y*stride];
|
||||||
pixel_t *src_v = &frame->rec->v[x + y*stride];
|
kvz_pixel *src_v = &frame->rec->v[x + y*stride];
|
||||||
// Init offset and step to EDGE_HOR
|
// Init offset and step to EDGE_HOR
|
||||||
int32_t offset = stride;
|
int32_t offset = stride;
|
||||||
int32_t step = 1;
|
int32_t step = 1;
|
||||||
|
|
|
@ -43,11 +43,11 @@ void filter_deblock_edge_chroma(encoder_state_t *state,
|
||||||
int32_t xpos, int32_t ypos,
|
int32_t xpos, int32_t ypos,
|
||||||
int8_t depth, int8_t dir);
|
int8_t depth, int8_t dir);
|
||||||
void filter_deblock_lcu(encoder_state_t *state, int x_px, int y_px);
|
void filter_deblock_lcu(encoder_state_t *state, int x_px, int y_px);
|
||||||
void filter_deblock_luma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, int32_t tc , int8_t sw,
|
void filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc , int8_t sw,
|
||||||
int8_t part_p_nofilter, int8_t part_q_nofilter,
|
int8_t part_p_nofilter, int8_t part_q_nofilter,
|
||||||
int32_t thr_cut,
|
int32_t thr_cut,
|
||||||
int8_t filter_second_p, int8_t filter_second_q);
|
int8_t filter_second_p, int8_t filter_second_q);
|
||||||
void filter_deblock_chroma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, int32_t tc,
|
void filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc,
|
||||||
int8_t part_p_nofilter, int8_t part_q_nofilter);
|
int8_t part_p_nofilter, int8_t part_q_nofilter);
|
||||||
|
|
||||||
// SAO
|
// SAO
|
||||||
|
|
28
src/image.c
28
src/image.c
|
@ -52,7 +52,7 @@ kvz_picture *image_alloc(const int32_t width, const int32_t height)
|
||||||
unsigned int chroma_size = luma_size / 4;
|
unsigned int chroma_size = luma_size / 4;
|
||||||
|
|
||||||
//Allocate memory
|
//Allocate memory
|
||||||
im->fulldata = MALLOC(pixel_t, (luma_size + 2 * chroma_size));
|
im->fulldata = MALLOC(kvz_pixel, (luma_size + 2 * chroma_size));
|
||||||
if (!im->fulldata) {
|
if (!im->fulldata) {
|
||||||
free(im);
|
free(im);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -156,9 +156,9 @@ yuv_t * yuv_t_alloc(int luma_size)
|
||||||
// Get buffers with separate mallocs in order to take advantage of
|
// Get buffers with separate mallocs in order to take advantage of
|
||||||
// automatic buffer overrun checks.
|
// automatic buffer overrun checks.
|
||||||
yuv_t *yuv = (yuv_t *)malloc(sizeof(*yuv));
|
yuv_t *yuv = (yuv_t *)malloc(sizeof(*yuv));
|
||||||
yuv->y = (pixel_t *)malloc(luma_size * sizeof(*yuv->y));
|
yuv->y = (kvz_pixel *)malloc(luma_size * sizeof(*yuv->y));
|
||||||
yuv->u = (pixel_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
|
yuv->u = (kvz_pixel *)malloc(luma_size / 2 * sizeof(*yuv->u));
|
||||||
yuv->v = (pixel_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
|
yuv->v = (kvz_pixel *)malloc(luma_size / 2 * sizeof(*yuv->v));
|
||||||
yuv->size = luma_size;
|
yuv->size = luma_size;
|
||||||
|
|
||||||
return yuv;
|
return yuv;
|
||||||
|
@ -183,10 +183,10 @@ void yuv_t_free(yuv_t * yuv)
|
||||||
*
|
*
|
||||||
* \returns Sum of Absolute Differences
|
* \returns Sum of Absolute Differences
|
||||||
*/
|
*/
|
||||||
static unsigned cor_sad(const pixel_t *pic_data, const pixel_t *ref_data,
|
static unsigned cor_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data,
|
||||||
int block_width, int block_height, unsigned pic_stride)
|
int block_width, int block_height, unsigned pic_stride)
|
||||||
{
|
{
|
||||||
pixel_t ref = *ref_data;
|
kvz_pixel ref = *ref_data;
|
||||||
int x, y;
|
int x, y;
|
||||||
unsigned sad = 0;
|
unsigned sad = 0;
|
||||||
|
|
||||||
|
@ -210,7 +210,7 @@ static unsigned cor_sad(const pixel_t *pic_data, const pixel_t *ref_data,
|
||||||
*
|
*
|
||||||
* \returns Sum of Absolute Differences
|
* \returns Sum of Absolute Differences
|
||||||
*/
|
*/
|
||||||
static unsigned ver_sad(const pixel_t *pic_data, const pixel_t *ref_data,
|
static unsigned ver_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data,
|
||||||
int block_width, int block_height, unsigned pic_stride)
|
int block_width, int block_height, unsigned pic_stride)
|
||||||
{
|
{
|
||||||
int x, y;
|
int x, y;
|
||||||
|
@ -236,7 +236,7 @@ static unsigned ver_sad(const pixel_t *pic_data, const pixel_t *ref_data,
|
||||||
*
|
*
|
||||||
* \returns Sum of Absolute Differences
|
* \returns Sum of Absolute Differences
|
||||||
*/
|
*/
|
||||||
static unsigned hor_sad(const pixel_t *pic_data, const pixel_t *ref_data,
|
static unsigned hor_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data,
|
||||||
int block_width, int block_height, unsigned pic_stride, unsigned ref_stride)
|
int block_width, int block_height, unsigned pic_stride, unsigned ref_stride)
|
||||||
{
|
{
|
||||||
int x, y;
|
int x, y;
|
||||||
|
@ -269,7 +269,7 @@ static unsigned image_interpolated_sad(const kvz_picture *pic, const kvz_picture
|
||||||
int pic_x, int pic_y, int ref_x, int ref_y,
|
int pic_x, int pic_y, int ref_x, int ref_y,
|
||||||
int block_width, int block_height)
|
int block_width, int block_height)
|
||||||
{
|
{
|
||||||
pixel_t *pic_data, *ref_data;
|
kvz_pixel *pic_data, *ref_data;
|
||||||
|
|
||||||
int left, right, top, bottom;
|
int left, right, top, bottom;
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
@ -424,8 +424,8 @@ unsigned image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_
|
||||||
{
|
{
|
||||||
// Reference block is completely inside the frame, so just calculate the
|
// Reference block is completely inside the frame, so just calculate the
|
||||||
// SAD directly. This is the most common case, which is why it's first.
|
// SAD directly. This is the most common case, which is why it's first.
|
||||||
const pixel_t *pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
||||||
const pixel_t *ref_data = &ref->y[ref_y * ref->stride + ref_x];
|
const kvz_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x];
|
||||||
return reg_sad(pic_data, ref_data, block_width, block_height, pic->stride, ref->stride);
|
return reg_sad(pic_data, ref_data, block_width, block_height, pic->stride, ref->stride);
|
||||||
} else {
|
} else {
|
||||||
// Call a routine that knows how to interpolate pixels outside the frame.
|
// Call a routine that knows how to interpolate pixels outside the frame.
|
||||||
|
@ -434,7 +434,7 @@ unsigned image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
unsigned pixels_calc_ssd(const pixel_t *const ref, const pixel_t *const rec,
|
unsigned pixels_calc_ssd(const kvz_pixel *const ref, const kvz_pixel *const rec,
|
||||||
const int ref_stride, const int rec_stride,
|
const int ref_stride, const int rec_stride,
|
||||||
const int width)
|
const int width)
|
||||||
{
|
{
|
||||||
|
@ -467,7 +467,7 @@ unsigned pixels_calc_ssd(const pixel_t *const ref, const pixel_t *const rec,
|
||||||
* This should be inlined, but it's defined here for now to see if Visual
|
* This should be inlined, but it's defined here for now to see if Visual
|
||||||
* Studios LTCG will inline it.
|
* Studios LTCG will inline it.
|
||||||
*/
|
*/
|
||||||
void pixels_blit(const pixel_t * const orig, pixel_t * const dst,
|
void pixels_blit(const kvz_pixel * const orig, kvz_pixel * const dst,
|
||||||
const unsigned width, const unsigned height,
|
const unsigned width, const unsigned height,
|
||||||
const unsigned orig_stride, const unsigned dst_stride)
|
const unsigned orig_stride, const unsigned dst_stride)
|
||||||
{
|
{
|
||||||
|
@ -496,7 +496,7 @@ void pixels_blit(const pixel_t * const orig, pixel_t * const dst,
|
||||||
assert(orig != dst || orig_stride == dst_stride);
|
assert(orig != dst || orig_stride == dst_stride);
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(pixel_t));
|
memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(kvz_pixel));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
16
src/image.h
16
src/image.h
|
@ -29,16 +29,16 @@
|
||||||
#include "kvazaar.h"
|
#include "kvazaar.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
pixel_t y[LCU_LUMA_SIZE];
|
kvz_pixel y[LCU_LUMA_SIZE];
|
||||||
pixel_t u[LCU_CHROMA_SIZE];
|
kvz_pixel u[LCU_CHROMA_SIZE];
|
||||||
pixel_t v[LCU_CHROMA_SIZE];
|
kvz_pixel v[LCU_CHROMA_SIZE];
|
||||||
} lcu_yuv_t;
|
} lcu_yuv_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int size;
|
int size;
|
||||||
pixel_t *y;
|
kvz_pixel *y;
|
||||||
pixel_t *u;
|
kvz_pixel *u;
|
||||||
pixel_t *v;
|
kvz_pixel *v;
|
||||||
} yuv_t;
|
} yuv_t;
|
||||||
|
|
||||||
|
|
||||||
|
@ -62,12 +62,12 @@ unsigned image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_
|
||||||
int block_width, int block_height, int max_lcu_below);
|
int block_width, int block_height, int max_lcu_below);
|
||||||
|
|
||||||
|
|
||||||
unsigned pixels_calc_ssd(const pixel_t *const ref, const pixel_t *const rec,
|
unsigned pixels_calc_ssd(const kvz_pixel *const ref, const kvz_pixel *const rec,
|
||||||
const int ref_stride, const int rec_stride,
|
const int ref_stride, const int rec_stride,
|
||||||
const int width);
|
const int width);
|
||||||
|
|
||||||
|
|
||||||
void pixels_blit(const pixel_t* orig, pixel_t *dst,
|
void pixels_blit(const kvz_pixel* orig, kvz_pixel *dst,
|
||||||
unsigned width, unsigned height,
|
unsigned width, unsigned height,
|
||||||
unsigned orig_stride, unsigned dst_stride);
|
unsigned orig_stride, unsigned dst_stride);
|
||||||
|
|
||||||
|
|
58
src/inter.c
58
src/inter.c
|
@ -91,12 +91,12 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co
|
||||||
// Chroma half-pel
|
// Chroma half-pel
|
||||||
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
||||||
int8_t chroma_halfpel = ((mv[0]>>2)&1) || ((mv[1]>>2)&1); //!< (luma integer mv) lsb is set -> chroma is half-pel
|
int8_t chroma_halfpel = ((mv[0]>>2)&1) || ((mv[1]>>2)&1); //!< (luma integer mv) lsb is set -> chroma is half-pel
|
||||||
pixel_t halfpel_src_u[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< U source block for interpolation
|
kvz_pixel halfpel_src_u[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< U source block for interpolation
|
||||||
pixel_t halfpel_src_v[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< V source block for interpolation
|
kvz_pixel halfpel_src_v[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< V source block for interpolation
|
||||||
pixel_t *halfpel_src_off_u = &halfpel_src_u[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_u with offset (4,4)
|
kvz_pixel *halfpel_src_off_u = &halfpel_src_u[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_u with offset (4,4)
|
||||||
pixel_t *halfpel_src_off_v = &halfpel_src_v[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_v with offset (4,4)
|
kvz_pixel *halfpel_src_off_v = &halfpel_src_v[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_v with offset (4,4)
|
||||||
pixel_t halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u)
|
kvz_pixel halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u)
|
||||||
pixel_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v)
|
kvz_pixel halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v)
|
||||||
|
|
||||||
// Luma quarter-pel
|
// Luma quarter-pel
|
||||||
int8_t fractional_mv = (mv[0]&1) || (mv[1]&1) || (mv[0]&2) || (mv[1]&2); // either of 2 lowest bits of mv set -> mv is fractional
|
int8_t fractional_mv = (mv[0]&1) || (mv[1]&1) || (mv[0]&2) || (mv[1]&2); // either of 2 lowest bits of mv set -> mv is fractional
|
||||||
|
@ -114,19 +114,19 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co
|
||||||
#define FILTER_SIZE_C 4 //Chroma filter size
|
#define FILTER_SIZE_C 4 //Chroma filter size
|
||||||
|
|
||||||
// Fractional luma 1/4-pel
|
// Fractional luma 1/4-pel
|
||||||
pixel_t qpel_src_y[(LCU_WIDTH+FILTER_SIZE_Y) * (LCU_WIDTH+FILTER_SIZE_Y)];
|
kvz_pixel qpel_src_y[(LCU_WIDTH+FILTER_SIZE_Y) * (LCU_WIDTH+FILTER_SIZE_Y)];
|
||||||
pixel_t* qpel_src_off_y = &qpel_src_y[(width+FILTER_SIZE_Y)*(FILTER_SIZE_Y>>1)+(FILTER_SIZE_Y>>1)];
|
kvz_pixel* qpel_src_off_y = &qpel_src_y[(width+FILTER_SIZE_Y)*(FILTER_SIZE_Y>>1)+(FILTER_SIZE_Y>>1)];
|
||||||
pixel_t qpel_dst_y[LCU_WIDTH*LCU_WIDTH*16];
|
kvz_pixel qpel_dst_y[LCU_WIDTH*LCU_WIDTH*16];
|
||||||
|
|
||||||
// Fractional chroma 1/8-pel
|
// Fractional chroma 1/8-pel
|
||||||
int width_c = width>>1;
|
int width_c = width>>1;
|
||||||
pixel_t octpel_src_u[((LCU_WIDTH>>1)+FILTER_SIZE_C) * ((LCU_WIDTH>>1)+FILTER_SIZE_C)];
|
kvz_pixel octpel_src_u[((LCU_WIDTH>>1)+FILTER_SIZE_C) * ((LCU_WIDTH>>1)+FILTER_SIZE_C)];
|
||||||
pixel_t* octpel_src_off_u = &octpel_src_u[(width_c+FILTER_SIZE_C)*(FILTER_SIZE_C>>1)+(FILTER_SIZE_C>>1)];
|
kvz_pixel* octpel_src_off_u = &octpel_src_u[(width_c+FILTER_SIZE_C)*(FILTER_SIZE_C>>1)+(FILTER_SIZE_C>>1)];
|
||||||
pixel_t octpel_dst_u[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64];
|
kvz_pixel octpel_dst_u[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64];
|
||||||
|
|
||||||
pixel_t octpel_src_v[((LCU_WIDTH >> 1) + FILTER_SIZE_C) * ((LCU_WIDTH >> 1) + FILTER_SIZE_C)];
|
kvz_pixel octpel_src_v[((LCU_WIDTH >> 1) + FILTER_SIZE_C) * ((LCU_WIDTH >> 1) + FILTER_SIZE_C)];
|
||||||
pixel_t* octpel_src_off_v = &octpel_src_v[(width_c + FILTER_SIZE_C)*(FILTER_SIZE_C >> 1) + (FILTER_SIZE_C >> 1)];
|
kvz_pixel* octpel_src_off_v = &octpel_src_v[(width_c + FILTER_SIZE_C)*(FILTER_SIZE_C >> 1) + (FILTER_SIZE_C >> 1)];
|
||||||
pixel_t octpel_dst_v[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64];
|
kvz_pixel octpel_dst_v[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64];
|
||||||
|
|
||||||
// Fractional luma
|
// Fractional luma
|
||||||
extend_borders(xpos, ypos, mv[0]>>2, mv[1]>>2, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH,
|
extend_borders(xpos, ypos, mv[0]>>2, mv[1]>>2, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH,
|
||||||
|
@ -156,7 +156,7 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co
|
||||||
for(x = 0; x < width; ++x) {
|
for(x = 0; x < width; ++x) {
|
||||||
int x_in_lcu = ((x+xpos) & ((LCU_WIDTH)-1));
|
int x_in_lcu = ((x+xpos) & ((LCU_WIDTH)-1));
|
||||||
int qpel_x = x*4+y_off_x;
|
int qpel_x = x*4+y_off_x;
|
||||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (pixel_t)qpel_dst_y[qpel_y*(width*4)+qpel_x];
|
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)qpel_dst_y[qpel_y*(width*4)+qpel_x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//Sample fractional pixels for chroma
|
//Sample fractional pixels for chroma
|
||||||
|
@ -166,8 +166,8 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co
|
||||||
for(x = 0; x < width_c; ++x) {
|
for(x = 0; x < width_c; ++x) {
|
||||||
int x_in_lcu = ((x+(xpos>>1)) & ((LCU_WIDTH>>1)-1));
|
int x_in_lcu = ((x+(xpos>>1)) & ((LCU_WIDTH>>1)-1));
|
||||||
int qpel_x = x*8+c_off_x;
|
int qpel_x = x*8+c_off_x;
|
||||||
lcu->rec.u[y_in_lcu * dst_width_c + x_in_lcu] = (pixel_t)octpel_dst_u[qpel_y*(width_c*8)+qpel_x];
|
lcu->rec.u[y_in_lcu * dst_width_c + x_in_lcu] = (kvz_pixel)octpel_dst_u[qpel_y*(width_c*8)+qpel_x];
|
||||||
lcu->rec.v[y_in_lcu * dst_width_c + x_in_lcu] = (pixel_t)octpel_dst_v[qpel_y*(width_c*8)+qpel_x];
|
lcu->rec.v[y_in_lcu * dst_width_c + x_in_lcu] = (kvz_pixel)octpel_dst_v[qpel_y*(width_c*8)+qpel_x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -219,8 +219,8 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co
|
||||||
for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) {
|
for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) {
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||||
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (pixel_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
|
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (kvz_pixel)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||||
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (pixel_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
|
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (kvz_pixel)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -339,17 +339,17 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_picture * ref1, const kvz_picture * ref2, int32_t xpos, int32_t ypos, int32_t width, int16_t mv_param[2][2], lcu_t* lcu) {
|
void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_picture * ref1, const kvz_picture * ref2, int32_t xpos, int32_t ypos, int32_t width, int16_t mv_param[2][2], lcu_t* lcu) {
|
||||||
pixel_t temp_lcu_y[64 * 64];
|
kvz_pixel temp_lcu_y[64 * 64];
|
||||||
pixel_t temp_lcu_u[32 * 32];
|
kvz_pixel temp_lcu_u[32 * 32];
|
||||||
pixel_t temp_lcu_v[32 * 32];
|
kvz_pixel temp_lcu_v[32 * 32];
|
||||||
int temp_x, temp_y;
|
int temp_x, temp_y;
|
||||||
// TODO: interpolated values require 14-bit accuracy for bi-prediction, current implementation of ipol filters round the value to 8bits
|
// TODO: interpolated values require 14-bit accuracy for bi-prediction, current implementation of ipol filters round the value to 8bits
|
||||||
|
|
||||||
//Reconstruct both predictors
|
//Reconstruct both predictors
|
||||||
inter_recon_lcu(state, ref1, xpos, ypos, width, mv_param[0], lcu);
|
inter_recon_lcu(state, ref1, xpos, ypos, width, mv_param[0], lcu);
|
||||||
memcpy(temp_lcu_y, lcu->rec.y, sizeof(pixel_t) * 64 * 64);
|
memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64);
|
||||||
memcpy(temp_lcu_u, lcu->rec.u, sizeof(pixel_t) * 32 * 32);
|
memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32);
|
||||||
memcpy(temp_lcu_v, lcu->rec.v, sizeof(pixel_t) * 32 * 32);
|
memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32);
|
||||||
inter_recon_lcu(state, ref2, xpos, ypos, width, mv_param[1], lcu);
|
inter_recon_lcu(state, ref2, xpos, ypos, width, mv_param[1], lcu);
|
||||||
|
|
||||||
// After reconstruction, merge the predictors by taking an average of each pixel
|
// After reconstruction, merge the predictors by taking an average of each pixel
|
||||||
|
@ -357,7 +357,7 @@ void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_pictu
|
||||||
int y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
|
int y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
|
||||||
for (temp_x = 0; temp_x < width; ++temp_x) {
|
for (temp_x = 0; temp_x < width; ++temp_x) {
|
||||||
int x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
|
int x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
|
||||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (pixel_t)(((int)lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] +
|
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)(((int)lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] +
|
||||||
(int)temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] + 1) >> 1);
|
(int)temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] + 1) >> 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -365,10 +365,10 @@ void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_pictu
|
||||||
int y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
|
int y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
|
||||||
for (temp_x = 0; temp_x < width>>1; ++temp_x) {
|
for (temp_x = 0; temp_x < width>>1; ++temp_x) {
|
||||||
int x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1));
|
int x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1));
|
||||||
lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (pixel_t)(((int)lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] +
|
lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)(((int)lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] +
|
||||||
(int)temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + 1) >> 1);
|
(int)temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + 1) >> 1);
|
||||||
|
|
||||||
lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (pixel_t)(((int)lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] +
|
lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)(((int)lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] +
|
||||||
(int)temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + 1) >> 1);
|
(int)temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + 1) >> 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
80
src/intra.c
80
src/intra.c
|
@ -87,7 +87,7 @@ void intra_set_block_mode(videoframe_t *frame,uint32_t x_cu, uint32_t y_cu, uint
|
||||||
* \param width block width
|
* \param width block width
|
||||||
* \returns DC prediction
|
* \returns DC prediction
|
||||||
*/
|
*/
|
||||||
pixel_t intra_get_dc_pred(const pixel_t *pic, uint16_t picwidth, uint8_t width)
|
kvz_pixel intra_get_dc_pred(const kvz_pixel *pic, uint16_t picwidth, uint8_t width)
|
||||||
{
|
{
|
||||||
int32_t i, sum = 0;
|
int32_t i, sum = 0;
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ pixel_t intra_get_dc_pred(const pixel_t *pic, uint16_t picwidth, uint8_t width)
|
||||||
}
|
}
|
||||||
|
|
||||||
// return the average
|
// return the average
|
||||||
return (pixel_t)((sum + width) / (width + width));
|
return (kvz_pixel)((sum + width) / (width + width));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -175,11 +175,11 @@ int8_t intra_get_dir_luma_predictor(const uint32_t x, const uint32_t y, int8_t*
|
||||||
* \param preds output buffer for 3 predictions
|
* \param preds output buffer for 3 predictions
|
||||||
* \returns (predictions are found)?1:0
|
* \returns (predictions are found)?1:0
|
||||||
*/
|
*/
|
||||||
void intra_filter(pixel_t *ref, int32_t stride,int32_t width, int8_t mode)
|
void intra_filter(kvz_pixel *ref, int32_t stride,int32_t width, int8_t mode)
|
||||||
{
|
{
|
||||||
#define FWIDTH (LCU_WIDTH*2+1)
|
#define FWIDTH (LCU_WIDTH*2+1)
|
||||||
pixel_t filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples
|
kvz_pixel filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples
|
||||||
pixel_t *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1)
|
kvz_pixel *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1)
|
||||||
int x,y;
|
int x,y;
|
||||||
|
|
||||||
if (!mode) {
|
if (!mode) {
|
||||||
|
@ -222,9 +222,9 @@ void intra_filter(pixel_t *ref, int32_t stride,int32_t width, int8_t mode)
|
||||||
* \param recstride Stride for rec pixel arrays.
|
* \param recstride Stride for rec pixel arrays.
|
||||||
* \param dst
|
* \param dst
|
||||||
*/
|
*/
|
||||||
void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec, const pixel_t *rec_filtered, int recstride, pixel_t *dst, int width, int mode, int is_chroma)
|
void intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma)
|
||||||
{
|
{
|
||||||
const pixel_t *ref_pixels = rec;
|
const kvz_pixel *ref_pixels = rec;
|
||||||
if (is_chroma || mode == 1 || width == 4) {
|
if (is_chroma || mode == 1 || width == 4) {
|
||||||
// For chroma, DC and 4x4 blocks, always use unfiltered reference.
|
// For chroma, DC and 4x4 blocks, always use unfiltered reference.
|
||||||
} else if (mode == 0) {
|
} else if (mode == 0) {
|
||||||
|
@ -244,7 +244,7 @@ void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec,
|
||||||
intra_get_planar_pred(ref_pixels, recstride, width, dst, width);
|
intra_get_planar_pred(ref_pixels, recstride, width, dst, width);
|
||||||
} else if (mode == 1) {
|
} else if (mode == 1) {
|
||||||
int i;
|
int i;
|
||||||
pixel_t val = intra_get_dc_pred(ref_pixels, recstride, width);
|
kvz_pixel val = intra_get_dc_pred(ref_pixels, recstride, width);
|
||||||
for (i = 0; i < width * width; i++) {
|
for (i = 0; i < width * width; i++) {
|
||||||
dst[i] = val;
|
dst[i] = val;
|
||||||
}
|
}
|
||||||
|
@ -271,11 +271,11 @@ void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec,
|
||||||
* \param chroma chroma-block flag
|
* \param chroma chroma-block flag
|
||||||
|
|
||||||
*/
|
*/
|
||||||
void intra_recon(const encoder_control_t * const encoder, pixel_t* rec, int32_t recstride, uint32_t width, pixel_t* dst, int32_t dststride, int8_t mode, int8_t chroma)
|
void intra_recon(const encoder_control_t * const encoder, kvz_pixel* rec, int32_t recstride, uint32_t width, kvz_pixel* dst, int32_t dststride, int8_t mode, int8_t chroma)
|
||||||
{
|
{
|
||||||
pixel_t pred[LCU_WIDTH * LCU_WIDTH];
|
kvz_pixel pred[LCU_WIDTH * LCU_WIDTH];
|
||||||
pixel_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
|
kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
|
||||||
pixel_t *recf = &rec_filtered_temp[recstride + 1];
|
kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
|
||||||
|
|
||||||
// Generate filtered reference pixels.
|
// Generate filtered reference pixels.
|
||||||
{
|
{
|
||||||
|
@ -304,7 +304,7 @@ void intra_recon(const encoder_control_t * const encoder, pixel_t* rec, int32_t
|
||||||
* edge pixels filled with the reconstructed pixels.
|
* edge pixels filled with the reconstructed pixels.
|
||||||
*/
|
*/
|
||||||
void intra_build_reference_border(const encoder_control_t * const encoder, int32_t x_luma, int32_t y_luma, int16_t out_width,
|
void intra_build_reference_border(const encoder_control_t * const encoder, int32_t x_luma, int32_t y_luma, int16_t out_width,
|
||||||
pixel_t *dst, int32_t dst_stride, int8_t chroma,
|
kvz_pixel *dst, int32_t dst_stride, int8_t chroma,
|
||||||
int32_t pic_width, int32_t pic_height,
|
int32_t pic_width, int32_t pic_height,
|
||||||
lcu_t *lcu)
|
lcu_t *lcu)
|
||||||
{
|
{
|
||||||
|
@ -362,7 +362,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32
|
||||||
{ 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
|
{ 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
|
||||||
};
|
};
|
||||||
|
|
||||||
const pixel_t dc_val = 1 << (encoder->bitdepth - 1);
|
const kvz_pixel dc_val = 1 << (encoder->bitdepth - 1);
|
||||||
const int is_chroma = chroma ? 1 : 0;
|
const int is_chroma = chroma ? 1 : 0;
|
||||||
|
|
||||||
// input picture pointer
|
// input picture pointer
|
||||||
|
@ -377,12 +377,12 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32
|
||||||
|
|
||||||
int x_local = (x_luma&0x3f)>>is_chroma, y_local = (y_luma&0x3f)>>is_chroma;
|
int x_local = (x_luma&0x3f)>>is_chroma, y_local = (y_luma&0x3f)>>is_chroma;
|
||||||
|
|
||||||
pixel_t *left_ref = !chroma ? &lcu->left_ref.y[1] : (chroma == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
kvz_pixel *left_ref = !chroma ? &lcu->left_ref.y[1] : (chroma == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
||||||
pixel_t *top_ref = !chroma ? &lcu->top_ref.y[1] : (chroma == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
|
kvz_pixel *top_ref = !chroma ? &lcu->top_ref.y[1] : (chroma == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
|
||||||
pixel_t *rec_ref = !chroma ? lcu->rec.y : (chroma == 1) ? lcu->rec.u : lcu->rec.v;
|
kvz_pixel *rec_ref = !chroma ? lcu->rec.y : (chroma == 1) ? lcu->rec.u : lcu->rec.v;
|
||||||
|
|
||||||
pixel_t *left_border = &left_ref[y_local];
|
kvz_pixel *left_border = &left_ref[y_local];
|
||||||
pixel_t *top_border = &top_ref[x_local];
|
kvz_pixel *top_border = &top_ref[x_local];
|
||||||
uint32_t left_stride = 1;
|
uint32_t left_stride = 1;
|
||||||
|
|
||||||
if(x_local) {
|
if(x_local) {
|
||||||
|
@ -399,7 +399,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32
|
||||||
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
||||||
int num_ref_pixels = num_ref_pixels_left[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma;
|
int num_ref_pixels = num_ref_pixels_left[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma;
|
||||||
int i;
|
int i;
|
||||||
pixel_t nearest_pixel;
|
kvz_pixel nearest_pixel;
|
||||||
|
|
||||||
// Max pixel we can copy from src is yy + outwidth - 1 because the dst
|
// Max pixel we can copy from src is yy + outwidth - 1 because the dst
|
||||||
// extends one pixel to the left.
|
// extends one pixel to the left.
|
||||||
|
@ -421,7 +421,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If we are on the left edge, extend the first pixel of the top row.
|
// If we are on the left edge, extend the first pixel of the top row.
|
||||||
pixel_t nearest_pixel = y > 0 ? top_border[0] : dc_val;
|
kvz_pixel nearest_pixel = y > 0 ? top_border[0] : dc_val;
|
||||||
int i;
|
int i;
|
||||||
for (i = 1; i < out_width - 1; i++) {
|
for (i = 1; i < out_width - 1; i++) {
|
||||||
dst[i * dst_stride] = nearest_pixel;
|
dst[i * dst_stride] = nearest_pixel;
|
||||||
|
@ -433,7 +433,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32
|
||||||
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
||||||
int num_ref_pixels = num_ref_pixels_top[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma;
|
int num_ref_pixels = num_ref_pixels_top[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma;
|
||||||
int i;
|
int i;
|
||||||
pixel_t nearest_pixel;
|
kvz_pixel nearest_pixel;
|
||||||
|
|
||||||
// Max pixel we can copy from src is yy + outwidth - 1 because the dst
|
// Max pixel we can copy from src is yy + outwidth - 1 because the dst
|
||||||
// extends one pixel to the left.
|
// extends one pixel to the left.
|
||||||
|
@ -452,7 +452,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Extend nearest pixel.
|
// Extend nearest pixel.
|
||||||
pixel_t nearest_pixel = x > 0 ? left_border[0] : dc_val;
|
kvz_pixel nearest_pixel = x > 0 ? left_border[0] : dc_val;
|
||||||
int i;
|
int i;
|
||||||
for(i = 1; i < out_width; i++)
|
for(i = 1; i < out_width; i++)
|
||||||
{
|
{
|
||||||
|
@ -484,7 +484,7 @@ const int32_t inv_ang_table[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256};
|
||||||
* \brief this functions constructs the angular intra prediction from border samples
|
* \brief this functions constructs the angular intra prediction from border samples
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel_t* src, int32_t src_stride, pixel_t* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter)
|
void intra_get_angular_pred(const encoder_control_t * const encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter)
|
||||||
{
|
{
|
||||||
int32_t k,l;
|
int32_t k,l;
|
||||||
int32_t blk_size = width;
|
int32_t blk_size = width;
|
||||||
|
@ -499,10 +499,10 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel
|
||||||
int32_t inv_angle = inv_ang_table[abs_ang];
|
int32_t inv_angle = inv_ang_table[abs_ang];
|
||||||
|
|
||||||
// Do angular predictions
|
// Do angular predictions
|
||||||
pixel_t *ref_main;
|
kvz_pixel *ref_main;
|
||||||
pixel_t *ref_side;
|
kvz_pixel *ref_side;
|
||||||
pixel_t ref_above[2 * LCU_WIDTH + 1];
|
kvz_pixel ref_above[2 * LCU_WIDTH + 1];
|
||||||
pixel_t ref_left[2 * LCU_WIDTH + 1];
|
kvz_pixel ref_left[2 * LCU_WIDTH + 1];
|
||||||
|
|
||||||
// Tell clang-analyzer that everything is ok.
|
// Tell clang-analyzer that everything is ok.
|
||||||
assert(width == 4 || width == 8 || width == 16 || width == 32);
|
assert(width == 4 || width == 8 || width == 16 || width == 32);
|
||||||
|
@ -564,7 +564,7 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel
|
||||||
// Do linear filtering
|
// Do linear filtering
|
||||||
for (l = 0; l < blk_size; l++) {
|
for (l = 0; l < blk_size; l++) {
|
||||||
ref_main_index = l + delta_int + 1;
|
ref_main_index = l + delta_int + 1;
|
||||||
dst[k * dst_stride + l] = (pixel_t) ( (minus_delta_fract * ref_main[ref_main_index]
|
dst[k * dst_stride + l] = (kvz_pixel) ( (minus_delta_fract * ref_main[ref_main_index]
|
||||||
+ delta_fract * ref_main[ref_main_index + 1] + 16) >> 5);
|
+ delta_fract * ref_main[ref_main_index + 1] + 16) >> 5);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -578,7 +578,7 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel
|
||||||
|
|
||||||
// Flip the block if this is the horizontal mode
|
// Flip the block if this is the horizontal mode
|
||||||
if (!mode_ver) {
|
if (!mode_ver) {
|
||||||
pixel_t tmp;
|
kvz_pixel tmp;
|
||||||
for (k=0;k<blk_size-1;k++) {
|
for (k=0;k<blk_size-1;k++) {
|
||||||
for (l=k+1;l<blk_size;l++) {
|
for (l=k+1;l<blk_size;l++) {
|
||||||
tmp = dst[k * dst_stride + l];
|
tmp = dst[k * dst_stride + l];
|
||||||
|
@ -592,7 +592,7 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void intra_dc_pred_filtering(const pixel_t *src, int32_t src_stride, pixel_t *dst, int32_t dst_stride, int32_t width, int32_t height )
|
void intra_dc_pred_filtering(const kvz_pixel *src, int32_t src_stride, kvz_pixel *dst, int32_t dst_stride, int32_t width, int32_t height )
|
||||||
{
|
{
|
||||||
int32_t x, y, dst_stride2, src_stride2;
|
int32_t x, y, dst_stride2, src_stride2;
|
||||||
|
|
||||||
|
@ -619,7 +619,7 @@ void intra_dc_pred_filtering(const pixel_t *src, int32_t src_stride, pixel_t *ds
|
||||||
|
|
||||||
This function derives the prediction samples for planar mode (intra coding).
|
This function derives the prediction samples for planar mode (intra coding).
|
||||||
*/
|
*/
|
||||||
void intra_get_planar_pred(const pixel_t* src, int32_t srcstride, uint32_t width, pixel_t* dst, int32_t dststride)
|
void intra_get_planar_pred(const kvz_pixel* src, int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride)
|
||||||
{
|
{
|
||||||
int32_t k, l, bottom_left, top_right;
|
int32_t k, l, bottom_left, top_right;
|
||||||
int32_t hor_pred;
|
int32_t hor_pred;
|
||||||
|
@ -651,7 +651,7 @@ void intra_get_planar_pred(const pixel_t* src, int32_t srcstride, uint32_t width
|
||||||
for (l = 0; l < (int32_t)blk_size; l++) {
|
for (l = 0; l < (int32_t)blk_size; l++) {
|
||||||
hor_pred += right_column[k];
|
hor_pred += right_column[k];
|
||||||
top_row[l] += bottom_row[l];
|
top_row[l] += bottom_row[l];
|
||||||
dst[k * dststride + l] = (pixel_t)((hor_pred + top_row[l]) >> shift_2d);
|
dst[k * dststride + l] = (kvz_pixel)((hor_pred + top_row[l]) >> shift_2d);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -689,10 +689,10 @@ void intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth
|
||||||
const uint32_t pic_height = state->tile->frame->height;
|
const uint32_t pic_height = state->tile->frame->height;
|
||||||
|
|
||||||
// Pointers to reconstruction arrays
|
// Pointers to reconstruction arrays
|
||||||
pixel_t *recbase_y = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
|
kvz_pixel *recbase_y = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
|
||||||
|
|
||||||
pixel_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)];
|
kvz_pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)];
|
||||||
pixel_t *rec_shift = &rec[width * 2 + 8 + 1];
|
kvz_pixel *rec_shift = &rec[width * 2 + 8 + 1];
|
||||||
|
|
||||||
int32_t rec_stride = LCU_WIDTH;
|
int32_t rec_stride = LCU_WIDTH;
|
||||||
|
|
||||||
|
@ -744,16 +744,16 @@ void intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int dep
|
||||||
const uint32_t pic_height = state->tile->frame->height;
|
const uint32_t pic_height = state->tile->frame->height;
|
||||||
|
|
||||||
// Pointers to reconstruction arrays
|
// Pointers to reconstruction arrays
|
||||||
pixel_t *recbase_u = &lcu->rec.u[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4];
|
kvz_pixel *recbase_u = &lcu->rec.u[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4];
|
||||||
pixel_t *recbase_v = &lcu->rec.v[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4];
|
kvz_pixel *recbase_v = &lcu->rec.v[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4];
|
||||||
|
|
||||||
pixel_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)];
|
kvz_pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)];
|
||||||
|
|
||||||
int32_t rec_stride = LCU_WIDTH;
|
int32_t rec_stride = LCU_WIDTH;
|
||||||
|
|
||||||
// Reconstruct chroma.
|
// Reconstruct chroma.
|
||||||
if (!(x & 4 || y & 4)) {
|
if (!(x & 4 || y & 4)) {
|
||||||
pixel_t *rec_shift_c = &rec[width_c * 2 + 8 + 1];
|
kvz_pixel *rec_shift_c = &rec[width_c * 2 + 8 + 1];
|
||||||
intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
|
intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
|
||||||
pic_width/2, pic_height/2, lcu);
|
pic_width/2, pic_height/2, lcu);
|
||||||
intra_recon(encoder,
|
intra_recon(encoder,
|
||||||
|
|
16
src/intra.h
16
src/intra.h
|
@ -35,19 +35,19 @@
|
||||||
|
|
||||||
int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
|
int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
|
||||||
const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu);
|
const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu);
|
||||||
void intra_dc_pred_filtering(const pixel_t* src, int32_t src_stride, pixel_t* dst, int32_t dst_stride, int32_t width, int32_t height );
|
void intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height );
|
||||||
|
|
||||||
void intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, pixel_t *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu);
|
void intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu);
|
||||||
void intra_filter(pixel_t* ref, int32_t stride, int32_t width, int8_t mode);
|
void intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode);
|
||||||
|
|
||||||
/* Predictions */
|
/* Predictions */
|
||||||
void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec, const pixel_t *rec_filtered, int recstride, pixel_t *dst, int width, int mode, int is_chroma);
|
void intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma);
|
||||||
|
|
||||||
pixel_t intra_get_dc_pred(const pixel_t* pic, uint16_t pic_width, uint8_t width);
|
kvz_pixel intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width);
|
||||||
void intra_get_planar_pred(const pixel_t* src,int32_t srcstride, uint32_t width, pixel_t* dst, int32_t dststride);
|
void intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride);
|
||||||
void intra_get_angular_pred(const encoder_control_t *encoder, const pixel_t* src, int32_t src_stride, pixel_t* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter);
|
void intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter);
|
||||||
|
|
||||||
void intra_recon(const encoder_control_t *encoder, pixel_t* rec, int32_t rec_stride, uint32_t width, pixel_t* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
|
void intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
|
||||||
|
|
||||||
void intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
void intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
||||||
void intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
void intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);
|
||||||
|
|
|
@ -41,9 +41,9 @@ extern "C" {
|
||||||
|
|
||||||
#define KVZ_BIT_DEPTH 8
|
#define KVZ_BIT_DEPTH 8
|
||||||
#if KVZ_BIT_DEPTH == 8
|
#if KVZ_BIT_DEPTH == 8
|
||||||
typedef uint8_t pixel_t;
|
typedef uint8_t kvz_pixel;
|
||||||
#else
|
#else
|
||||||
typedef uint16_t pixel_t;
|
typedef uint16_t kvz_pixel;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -140,12 +140,12 @@ typedef struct bitstream_chunk_t kvz_payload;
|
||||||
* \brief Struct which contains all picture data
|
* \brief Struct which contains all picture data
|
||||||
*/
|
*/
|
||||||
typedef struct kvz_picture {
|
typedef struct kvz_picture {
|
||||||
pixel_t *fulldata; //!< \brief Allocated buffer (only used in the base_image)
|
kvz_pixel *fulldata; //!< \brief Allocated buffer (only used in the base_image)
|
||||||
|
|
||||||
pixel_t *y; //!< \brief Pointer to luma pixel array.
|
kvz_pixel *y; //!< \brief Pointer to luma pixel array.
|
||||||
pixel_t *u; //!< \brief Pointer to chroma U pixel array.
|
kvz_pixel *u; //!< \brief Pointer to chroma U pixel array.
|
||||||
pixel_t *v; //!< \brief Pointer to chroma V pixel array.
|
kvz_pixel *v; //!< \brief Pointer to chroma V pixel array.
|
||||||
pixel_t *data[3]; //!< \brief Alternate access method to same data.
|
kvz_pixel *data[3]; //!< \brief Alternate access method to same data.
|
||||||
|
|
||||||
int32_t width; //!< \brief Luma pixel array width.
|
int32_t width; //!< \brief Luma pixel array width.
|
||||||
int32_t height; //!< \brief Luma pixel array height.
|
int32_t height; //!< \brief Luma pixel array height.
|
||||||
|
|
|
@ -170,7 +170,7 @@ int intra_rdo_cost_compare(uint32_t *rdo_costs,int8_t rdo_modes_to_check, uint32
|
||||||
|
|
||||||
** Only for luma
|
** Only for luma
|
||||||
*/
|
*/
|
||||||
uint32_t rdo_cost_intra(encoder_state_t * const state, pixel_t *pred, pixel_t *orig_block, int width, int8_t mode, int tr_depth)
|
uint32_t rdo_cost_intra(encoder_state_t * const state, kvz_pixel *pred, kvz_pixel *orig_block, int width, int8_t mode, int tr_depth)
|
||||||
{
|
{
|
||||||
const encoder_control_t * const encoder = state->encoder_control;
|
const encoder_control_t * const encoder = state->encoder_control;
|
||||||
coeff_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2];
|
coeff_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2];
|
||||||
|
|
|
@ -39,7 +39,7 @@ int intra_rdo_cost_compare(uint32_t *rdo_costs,int8_t rdo_modes_to_check, uint32
|
||||||
void rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width,
|
void rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width,
|
||||||
int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);
|
int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);
|
||||||
|
|
||||||
uint32_t rdo_cost_intra(encoder_state_t *state, pixel_t* pred, pixel_t* orig_block, int width, int8_t mode, int tr_depth);
|
uint32_t rdo_cost_intra(encoder_state_t *state, kvz_pixel* pred, kvz_pixel* orig_block, int width, int8_t mode, int tr_depth);
|
||||||
|
|
||||||
int32_t get_coeff_cost(const encoder_state_t *state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode);
|
int32_t get_coeff_cost(const encoder_state_t *state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode);
|
||||||
|
|
||||||
|
|
88
src/sao.c
88
src/sao.c
|
@ -44,7 +44,7 @@ static const vector2d_t g_sao_edge_offsets[SAO_NUM_EO][2] = {
|
||||||
// Mapping of edge_idx values to eo-classes.
|
// Mapping of edge_idx values to eo-classes.
|
||||||
|
|
||||||
|
|
||||||
static int sao_calc_eo_cat(pixel_t a, pixel_t b, pixel_t c)
|
static int sao_calc_eo_cat(kvz_pixel a, kvz_pixel b, kvz_pixel c)
|
||||||
{
|
{
|
||||||
// Mapping relationships between a, b and c to eo_idx.
|
// Mapping relationships between a, b and c to eo_idx.
|
||||||
static const int sao_eo_idx_to_eo_category[] = { 1, 2, 0, 3, 4 };
|
static const int sao_eo_idx_to_eo_category[] = { 1, 2, 0, 3, 4 };
|
||||||
|
@ -55,7 +55,7 @@ static int sao_calc_eo_cat(pixel_t a, pixel_t b, pixel_t c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int sao_band_ddistortion(const encoder_state_t * const state, const pixel_t *orig_data, const pixel_t *rec_data,
|
int sao_band_ddistortion(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data,
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
int band_pos, int sao_bands[4])
|
int band_pos, int sao_bands[4])
|
||||||
{
|
{
|
||||||
|
@ -82,7 +82,7 @@ int sao_band_ddistortion(const encoder_state_t * const state, const pixel_t *ori
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int sao_edge_ddistortion(const pixel_t *orig_data, const pixel_t *rec_data,
|
int sao_edge_ddistortion(const kvz_pixel *orig_data, const kvz_pixel *rec_data,
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES])
|
int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES])
|
||||||
{
|
{
|
||||||
|
@ -93,10 +93,10 @@ int sao_edge_ddistortion(const pixel_t *orig_data, const pixel_t *rec_data,
|
||||||
|
|
||||||
for (y = 1; y < block_height - 1; ++y) {
|
for (y = 1; y < block_height - 1; ++y) {
|
||||||
for (x = 1; x < block_width - 1; ++x) {
|
for (x = 1; x < block_width - 1; ++x) {
|
||||||
const pixel_t *c_data = &rec_data[y * block_width + x];
|
const kvz_pixel *c_data = &rec_data[y * block_width + x];
|
||||||
pixel_t a = c_data[a_ofs.y * block_width + a_ofs.x];
|
kvz_pixel a = c_data[a_ofs.y * block_width + a_ofs.x];
|
||||||
pixel_t c = c_data[0];
|
kvz_pixel c = c_data[0];
|
||||||
pixel_t b = c_data[b_ofs.y * block_width + b_ofs.x];
|
kvz_pixel b = c_data[b_ofs.y * block_width + b_ofs.x];
|
||||||
|
|
||||||
int offset = offsets[sao_calc_eo_cat(a, b, c)];
|
int offset = offsets[sao_calc_eo_cat(a, b, c)];
|
||||||
|
|
||||||
|
@ -326,7 +326,7 @@ static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4],
|
||||||
* \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
|
* \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
|
||||||
* \param sao_bands an array of bands for original and reconstructed block
|
* \param sao_bands an array of bands for original and reconstructed block
|
||||||
*/
|
*/
|
||||||
static void calc_sao_bands(const encoder_state_t * const state, const pixel_t *orig_data, const pixel_t *rec_data,
|
static void calc_sao_bands(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data,
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
int sao_bands[2][32])
|
int sao_bands[2][32])
|
||||||
{
|
{
|
||||||
|
@ -349,7 +349,7 @@ static void calc_sao_bands(const encoder_state_t * const state, const pixel_t *o
|
||||||
* \param dir_offsets
|
* \param dir_offsets
|
||||||
* \param is_chroma 0 for luma, 1 for chroma. Indicates
|
* \param is_chroma 0 for luma, 1 for chroma. Indicates
|
||||||
*/
|
*/
|
||||||
static void calc_sao_edge_dir(const pixel_t *orig_data, const pixel_t *rec_data,
|
static void calc_sao_edge_dir(const kvz_pixel *orig_data, const kvz_pixel *rec_data,
|
||||||
int eo_class, int block_width, int block_height,
|
int eo_class, int block_width, int block_height,
|
||||||
int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES])
|
int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES])
|
||||||
{
|
{
|
||||||
|
@ -362,10 +362,10 @@ static void calc_sao_edge_dir(const pixel_t *orig_data, const pixel_t *rec_data,
|
||||||
// their neighbours.
|
// their neighbours.
|
||||||
for (y = 1; y < block_height - 1; ++y) {
|
for (y = 1; y < block_height - 1; ++y) {
|
||||||
for (x = 1; x < block_width - 1; ++x) {
|
for (x = 1; x < block_width - 1; ++x) {
|
||||||
const pixel_t *c_data = &rec_data[y * block_width + x];
|
const kvz_pixel *c_data = &rec_data[y * block_width + x];
|
||||||
pixel_t a = c_data[a_ofs.y * block_width + a_ofs.x];
|
kvz_pixel a = c_data[a_ofs.y * block_width + a_ofs.x];
|
||||||
pixel_t c = c_data[0];
|
kvz_pixel c = c_data[0];
|
||||||
pixel_t b = c_data[b_ofs.y * block_width + b_ofs.x];
|
kvz_pixel b = c_data[b_ofs.y * block_width + b_ofs.x];
|
||||||
|
|
||||||
int eo_cat = sao_calc_eo_cat(a, b, c);
|
int eo_cat = sao_calc_eo_cat(a, b, c);
|
||||||
|
|
||||||
|
@ -376,7 +376,7 @@ static void calc_sao_edge_dir(const pixel_t *orig_data, const pixel_t *rec_data,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sao_reconstruct_color(const encoder_control_t * const encoder,
|
static void sao_reconstruct_color(const encoder_control_t * const encoder,
|
||||||
const pixel_t *rec_data, pixel_t *new_rec_data,
|
const kvz_pixel *rec_data, kvz_pixel *new_rec_data,
|
||||||
const sao_info_t *sao,
|
const sao_info_t *sao,
|
||||||
int stride, int new_stride,
|
int stride, int new_stride,
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
|
@ -401,15 +401,15 @@ static void sao_reconstruct_color(const encoder_control_t * const encoder,
|
||||||
for (x = 0; x < block_width; ++x) {
|
for (x = 0; x < block_width; ++x) {
|
||||||
vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0];
|
vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0];
|
||||||
vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1];
|
vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1];
|
||||||
const pixel_t *c_data = &rec_data[y * stride + x];
|
const kvz_pixel *c_data = &rec_data[y * stride + x];
|
||||||
pixel_t *new_data = &new_rec_data[y * new_stride + x];
|
kvz_pixel *new_data = &new_rec_data[y * new_stride + x];
|
||||||
pixel_t a = c_data[a_ofs.y * stride + a_ofs.x];
|
kvz_pixel a = c_data[a_ofs.y * stride + a_ofs.x];
|
||||||
pixel_t c = c_data[0];
|
kvz_pixel c = c_data[0];
|
||||||
pixel_t b = c_data[b_ofs.y * stride + b_ofs.x];
|
kvz_pixel b = c_data[b_ofs.y * stride + b_ofs.x];
|
||||||
|
|
||||||
int eo_cat = sao_calc_eo_cat(a, b, c);
|
int eo_cat = sao_calc_eo_cat(a, b, c);
|
||||||
|
|
||||||
new_data[0] = (pixel_t)CLIP(0, (1 << KVZ_BIT_DEPTH) - 1, c_data[0] + sao->offsets[eo_cat + offset_v]);
|
new_data[0] = (kvz_pixel)CLIP(0, (1 << KVZ_BIT_DEPTH) - 1, c_data[0] + sao->offsets[eo_cat + offset_v]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -527,7 +527,7 @@ static void sao_calc_edge_block_dims(const videoframe_t * const frame, color_t c
|
||||||
rec->x = (rec->x == 0 ? 0 : -1);
|
rec->x = (rec->x == 0 ? 0 : -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const pixel_t *old_rec,
|
void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const kvz_pixel *old_rec,
|
||||||
unsigned x_ctb, unsigned y_ctb,
|
unsigned x_ctb, unsigned y_ctb,
|
||||||
const sao_info_t *sao, color_t color_i)
|
const sao_info_t *sao, color_t color_i)
|
||||||
{
|
{
|
||||||
|
@ -536,12 +536,12 @@ void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * fra
|
||||||
const int lcu_stride = LCU_WIDTH >> is_chroma;
|
const int lcu_stride = LCU_WIDTH >> is_chroma;
|
||||||
const int buf_stride = lcu_stride + 2;
|
const int buf_stride = lcu_stride + 2;
|
||||||
|
|
||||||
pixel_t *recdata = frame->rec->data[color_i];
|
kvz_pixel *recdata = frame->rec->data[color_i];
|
||||||
pixel_t buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)];
|
kvz_pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)];
|
||||||
pixel_t new_rec[LCU_WIDTH * LCU_WIDTH];
|
kvz_pixel new_rec[LCU_WIDTH * LCU_WIDTH];
|
||||||
// Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32.
|
// Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32.
|
||||||
pixel_t *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)];
|
kvz_pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)];
|
||||||
const pixel_t *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)];
|
const kvz_pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)];
|
||||||
|
|
||||||
vector2d_t ofs;
|
vector2d_t ofs;
|
||||||
vector2d_t tl = { 1, 1 };
|
vector2d_t tl = { 1, 1 };
|
||||||
|
@ -593,7 +593,7 @@ void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * fra
|
||||||
|
|
||||||
|
|
||||||
static void sao_search_edge_sao(const encoder_state_t * const state,
|
static void sao_search_edge_sao(const encoder_state_t * const state,
|
||||||
const pixel_t * data[], const pixel_t * recdata[],
|
const kvz_pixel * data[], const kvz_pixel * recdata[],
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
unsigned buf_cnt,
|
unsigned buf_cnt,
|
||||||
sao_info_t *sao_out, sao_info_t *sao_top,
|
sao_info_t *sao_out, sao_info_t *sao_top,
|
||||||
|
@ -671,7 +671,7 @@ static void sao_search_edge_sao(const encoder_state_t * const state,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void sao_search_band_sao(const encoder_state_t * const state, const pixel_t * data[], const pixel_t * recdata[],
|
static void sao_search_band_sao(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[],
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
unsigned buf_cnt,
|
unsigned buf_cnt,
|
||||||
sao_info_t *sao_out, sao_info_t *sao_top,
|
sao_info_t *sao_out, sao_info_t *sao_top,
|
||||||
|
@ -719,7 +719,7 @@ static void sao_search_band_sao(const encoder_state_t * const state, const pixel
|
||||||
* \param buf_cnt Number of pointers data and recdata have.
|
* \param buf_cnt Number of pointers data and recdata have.
|
||||||
* \param sao_out Output parameter for the best sao parameters.
|
* \param sao_out Output parameter for the best sao parameters.
|
||||||
*/
|
*/
|
||||||
static void sao_search_best_mode(const encoder_state_t * const state, const pixel_t * data[], const pixel_t * recdata[],
|
static void sao_search_best_mode(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[],
|
||||||
int block_width, int block_height,
|
int block_width, int block_height,
|
||||||
unsigned buf_cnt,
|
unsigned buf_cnt,
|
||||||
sao_info_t *sao_out, sao_info_t *sao_top,
|
sao_info_t *sao_out, sao_info_t *sao_top,
|
||||||
|
@ -831,10 +831,10 @@ void sao_search_chroma(const encoder_state_t * const state, const videoframe_t *
|
||||||
{
|
{
|
||||||
int block_width = (LCU_WIDTH / 2);
|
int block_width = (LCU_WIDTH / 2);
|
||||||
int block_height = (LCU_WIDTH / 2);
|
int block_height = (LCU_WIDTH / 2);
|
||||||
const pixel_t *orig_list[2];
|
const kvz_pixel *orig_list[2];
|
||||||
const pixel_t *rec_list[2];
|
const kvz_pixel *rec_list[2];
|
||||||
pixel_t orig[2][LCU_CHROMA_SIZE];
|
kvz_pixel orig[2][LCU_CHROMA_SIZE];
|
||||||
pixel_t rec[2][LCU_CHROMA_SIZE];
|
kvz_pixel rec[2][LCU_CHROMA_SIZE];
|
||||||
color_t color_i;
|
color_t color_i;
|
||||||
|
|
||||||
// Check for right and bottom boundaries.
|
// Check for right and bottom boundaries.
|
||||||
|
@ -849,8 +849,8 @@ void sao_search_chroma(const encoder_state_t * const state, const videoframe_t *
|
||||||
|
|
||||||
// Copy data to temporary buffers and init orig and rec lists to point to those buffers.
|
// Copy data to temporary buffers and init orig and rec lists to point to those buffers.
|
||||||
for (color_i = COLOR_U; color_i <= COLOR_V; ++color_i) {
|
for (color_i = COLOR_U; color_i <= COLOR_V; ++color_i) {
|
||||||
pixel_t *data = &frame->source->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->source->stride / 2)];
|
kvz_pixel *data = &frame->source->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->source->stride / 2)];
|
||||||
pixel_t *recdata = &frame->rec->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->rec->stride / 2)];
|
kvz_pixel *recdata = &frame->rec->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->rec->stride / 2)];
|
||||||
pixels_blit(data, orig[color_i - 1], block_width, block_height,
|
pixels_blit(data, orig[color_i - 1], block_width, block_height,
|
||||||
frame->source->stride / 2, block_width);
|
frame->source->stride / 2, block_width);
|
||||||
pixels_blit(recdata, rec[color_i - 1], block_width, block_height,
|
pixels_blit(recdata, rec[color_i - 1], block_width, block_height,
|
||||||
|
@ -865,12 +865,12 @@ void sao_search_chroma(const encoder_state_t * const state, const videoframe_t *
|
||||||
|
|
||||||
void sao_search_luma(const encoder_state_t * const state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3])
|
void sao_search_luma(const encoder_state_t * const state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3])
|
||||||
{
|
{
|
||||||
pixel_t orig[LCU_LUMA_SIZE];
|
kvz_pixel orig[LCU_LUMA_SIZE];
|
||||||
pixel_t rec[LCU_LUMA_SIZE];
|
kvz_pixel rec[LCU_LUMA_SIZE];
|
||||||
const pixel_t * orig_list[1] = { NULL };
|
const kvz_pixel * orig_list[1] = { NULL };
|
||||||
const pixel_t * rec_list[1] = { NULL };
|
const kvz_pixel * rec_list[1] = { NULL };
|
||||||
pixel_t *data = &frame->source->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->source->stride)];
|
kvz_pixel *data = &frame->source->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->source->stride)];
|
||||||
pixel_t *recdata = &frame->rec->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->rec->stride)];
|
kvz_pixel *recdata = &frame->rec->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->rec->stride)];
|
||||||
int block_width = LCU_WIDTH;
|
int block_width = LCU_WIDTH;
|
||||||
int block_height = LCU_WIDTH;
|
int block_height = LCU_WIDTH;
|
||||||
|
|
||||||
|
@ -901,9 +901,9 @@ void sao_reconstruct_frame(encoder_state_t * const state)
|
||||||
// These are needed because SAO needs the pre-SAO pixels form left and
|
// These are needed because SAO needs the pre-SAO pixels form left and
|
||||||
// top LCUs. Single pixel wide buffers, like what search_lcu takes, would
|
// top LCUs. Single pixel wide buffers, like what search_lcu takes, would
|
||||||
// be enough though.
|
// be enough though.
|
||||||
pixel_t *new_y_data = MALLOC(pixel_t, frame->rec->width * frame->rec->height);
|
kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->rec->width * frame->rec->height);
|
||||||
pixel_t *new_u_data = MALLOC(pixel_t, (frame->rec->width * frame->rec->height) >> 2);
|
kvz_pixel *new_u_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2);
|
||||||
pixel_t *new_v_data = MALLOC(pixel_t, (frame->rec->width * frame->rec->height) >> 2);
|
kvz_pixel *new_v_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2);
|
||||||
|
|
||||||
pixels_blit(frame->rec->y, new_y_data, frame->rec->width, frame->rec->height, frame->rec->stride, frame->rec->width);
|
pixels_blit(frame->rec->y, new_y_data, frame->rec->width, frame->rec->height, frame->rec->stride, frame->rec->width);
|
||||||
pixels_blit(frame->rec->u, new_u_data, frame->rec->width/2, frame->rec->height/2, frame->rec->stride/2, frame->rec->width/2);
|
pixels_blit(frame->rec->u, new_u_data, frame->rec->width/2, frame->rec->height/2, frame->rec->stride/2, frame->rec->width/2);
|
||||||
|
|
|
@ -58,7 +58,7 @@ typedef struct sao_info_t {
|
||||||
void init_sao_info(sao_info_t *sao);
|
void init_sao_info(sao_info_t *sao);
|
||||||
void sao_search_chroma(const encoder_state_t * state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]);
|
void sao_search_chroma(const encoder_state_t * state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]);
|
||||||
void sao_search_luma(const encoder_state_t * state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]);
|
void sao_search_luma(const encoder_state_t * state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]);
|
||||||
void sao_reconstruct(const encoder_control_t * encoder, videoframe_t *frame, const pixel_t *old_rec,
|
void sao_reconstruct(const encoder_control_t * encoder, videoframe_t *frame, const kvz_pixel *old_rec,
|
||||||
unsigned x_ctb, unsigned y_ctb,
|
unsigned x_ctb, unsigned y_ctb,
|
||||||
const sao_info_t *sao, color_t color_i);
|
const sao_info_t *sao, color_t color_i);
|
||||||
void sao_reconstruct_frame(encoder_state_t *state);
|
void sao_reconstruct_frame(encoder_state_t *state);
|
||||||
|
|
82
src/search.c
82
src/search.c
|
@ -860,13 +860,13 @@ static unsigned search_frac(const encoder_state_t * const state,
|
||||||
|
|
||||||
//create buffer for block + extra for filter
|
//create buffer for block + extra for filter
|
||||||
int src_stride = block_width+FILTER_SIZE+1;
|
int src_stride = block_width+FILTER_SIZE+1;
|
||||||
pixel_t src[(LCU_WIDTH+FILTER_SIZE+1) * (LCU_WIDTH+FILTER_SIZE+1)];
|
kvz_pixel src[(LCU_WIDTH+FILTER_SIZE+1) * (LCU_WIDTH+FILTER_SIZE+1)];
|
||||||
pixel_t* src_off = &src[HALF_FILTER+HALF_FILTER*(block_width+FILTER_SIZE+1)];
|
kvz_pixel* src_off = &src[HALF_FILTER+HALF_FILTER*(block_width+FILTER_SIZE+1)];
|
||||||
|
|
||||||
//destination buffer for interpolation
|
//destination buffer for interpolation
|
||||||
int dst_stride = (block_width+1)*4;
|
int dst_stride = (block_width+1)*4;
|
||||||
pixel_t dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
|
kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
|
||||||
pixel_t* dst_off = &dst[dst_stride*4+4];
|
kvz_pixel* dst_off = &dst[dst_stride*4+4];
|
||||||
|
|
||||||
extend_borders(orig->x, orig->y, mv.x-1, mv.y-1,
|
extend_borders(orig->x, orig->y, mv.x-1, mv.y-1,
|
||||||
state->tile->lcu_offset_x * LCU_WIDTH,
|
state->tile->lcu_offset_x * LCU_WIDTH,
|
||||||
|
@ -885,8 +885,8 @@ static unsigned search_frac(const encoder_state_t * const state,
|
||||||
for (i = 0; i < 9; ++i) {
|
for (i = 0; i < 9; ++i) {
|
||||||
const vector2d_t *pattern = &square[i];
|
const vector2d_t *pattern = &square[i];
|
||||||
|
|
||||||
pixel_t tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
kvz_pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
||||||
pixel_t tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
kvz_pixel tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
|
||||||
int y,x;
|
int y,x;
|
||||||
for(y = 0; y < block_width; ++y) {
|
for(y = 0; y < block_width; ++y) {
|
||||||
|
@ -925,8 +925,8 @@ static unsigned search_frac(const encoder_state_t * const state,
|
||||||
for (i = 0; i < 9; ++i) {
|
for (i = 0; i < 9; ++i) {
|
||||||
const vector2d_t *pattern = &square[i];
|
const vector2d_t *pattern = &square[i];
|
||||||
|
|
||||||
pixel_t tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
kvz_pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
||||||
pixel_t tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
kvz_pixel tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
|
||||||
int y,x;
|
int y,x;
|
||||||
for(y = 0; y < block_width; ++y) {
|
for(y = 0; y < block_width; ++y) {
|
||||||
|
@ -1123,8 +1123,8 @@ static int search_cu_inter(const encoder_state_t * const state, int x, int y, in
|
||||||
uint32_t cost = 0;
|
uint32_t cost = 0;
|
||||||
int8_t cu_mv_cand = 0;
|
int8_t cu_mv_cand = 0;
|
||||||
int16_t mv[2][2];
|
int16_t mv[2][2];
|
||||||
pixel_t tmp_block[64 * 64];
|
kvz_pixel tmp_block[64 * 64];
|
||||||
pixel_t tmp_pic[64 * 64];
|
kvz_pixel tmp_pic[64 * 64];
|
||||||
// Force L0 and L1 references
|
// Force L0 and L1 references
|
||||||
if (state->global->refmap[merge_cand[i].ref[0]].list == 2 || state->global->refmap[merge_cand[j].ref[1]].list == 1) continue;
|
if (state->global->refmap[merge_cand[i].ref[0]].list == 2 || state->global->refmap[merge_cand[j].ref[1]].list == 1) continue;
|
||||||
|
|
||||||
|
@ -1636,9 +1636,9 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
||||||
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
|
const bool reconstruct_chroma = !(x_px & 4 || y_px & 4);
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
pixel_t y[TR_MAX_WIDTH*TR_MAX_WIDTH];
|
kvz_pixel y[TR_MAX_WIDTH*TR_MAX_WIDTH];
|
||||||
pixel_t u[TR_MAX_WIDTH*TR_MAX_WIDTH];
|
kvz_pixel u[TR_MAX_WIDTH*TR_MAX_WIDTH];
|
||||||
pixel_t v[TR_MAX_WIDTH*TR_MAX_WIDTH];
|
kvz_pixel v[TR_MAX_WIDTH*TR_MAX_WIDTH];
|
||||||
} nosplit_pixels;
|
} nosplit_pixels;
|
||||||
cu_cbf_t nosplit_cbf;
|
cu_cbf_t nosplit_cbf;
|
||||||
|
|
||||||
|
@ -1885,7 +1885,7 @@ static INLINE uint8_t select_best_mode_index(const int8_t *modes, const double *
|
||||||
* coefficients of the residual.
|
* coefficients of the residual.
|
||||||
*/
|
*/
|
||||||
static double get_cost(encoder_state_t * const state,
|
static double get_cost(encoder_state_t * const state,
|
||||||
pixel_t *pred, pixel_t *orig_block,
|
kvz_pixel *pred, kvz_pixel *orig_block,
|
||||||
cost_pixel_nxn_func *satd_func,
|
cost_pixel_nxn_func *satd_func,
|
||||||
cost_pixel_nxn_func *sad_func,
|
cost_pixel_nxn_func *sad_func,
|
||||||
int width)
|
int width)
|
||||||
|
@ -1914,8 +1914,8 @@ static double get_cost(encoder_state_t * const state,
|
||||||
|
|
||||||
static void search_intra_chroma_rough(encoder_state_t * const state,
|
static void search_intra_chroma_rough(encoder_state_t * const state,
|
||||||
int x_px, int y_px, int depth,
|
int x_px, int y_px, int depth,
|
||||||
const pixel_t *orig_u, const pixel_t *orig_v, int16_t origstride,
|
const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride,
|
||||||
const pixel_t *rec_u, const pixel_t *rec_v, int16_t recstride,
|
const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride,
|
||||||
int8_t luma_mode,
|
int8_t luma_mode,
|
||||||
int8_t modes[5], double costs[5])
|
int8_t modes[5], double costs[5])
|
||||||
{
|
{
|
||||||
|
@ -1931,11 +1931,11 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
||||||
cost_pixel_nxn_func *const satd_func = pixels_get_satd_func(width);
|
cost_pixel_nxn_func *const satd_func = pixels_get_satd_func(width);
|
||||||
//cost_pixel_nxn_func *const sad_func = pixels_get_sad_func(width);
|
//cost_pixel_nxn_func *const sad_func = pixels_get_sad_func(width);
|
||||||
|
|
||||||
pixel_t _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
||||||
pixel_t *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
|
kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
|
||||||
|
|
||||||
pixel_t _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
||||||
pixel_t *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
||||||
|
|
||||||
pixels_blit(orig_u, orig_block, width, width, origstride, width);
|
pixels_blit(orig_u, orig_block, width, width, origstride, width);
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
|
@ -1986,8 +1986,8 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
||||||
* \return Number of prediction modes in param modes.
|
* \return Number of prediction modes in param modes.
|
||||||
*/
|
*/
|
||||||
static int8_t search_intra_rough(encoder_state_t * const state,
|
static int8_t search_intra_rough(encoder_state_t * const state,
|
||||||
pixel_t *orig, int32_t origstride,
|
kvz_pixel *orig, int32_t origstride,
|
||||||
pixel_t *rec, int16_t recstride,
|
kvz_pixel *rec, int16_t recstride,
|
||||||
int width, int8_t *intra_preds,
|
int width, int8_t *intra_preds,
|
||||||
int8_t modes[35], double costs[35])
|
int8_t modes[35], double costs[35])
|
||||||
{
|
{
|
||||||
|
@ -1995,15 +1995,15 @@ static int8_t search_intra_rough(encoder_state_t * const state,
|
||||||
cost_pixel_nxn_func *sad_func = pixels_get_sad_func(width);
|
cost_pixel_nxn_func *sad_func = pixels_get_sad_func(width);
|
||||||
|
|
||||||
// Temporary block arrays
|
// Temporary block arrays
|
||||||
pixel_t _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
||||||
pixel_t *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
|
kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
|
||||||
|
|
||||||
pixel_t _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT];
|
||||||
pixel_t *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
|
||||||
|
|
||||||
pixel_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
|
kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
|
||||||
|
|
||||||
pixel_t *recf = &rec_filtered_temp[recstride + 1];
|
kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
|
||||||
|
|
||||||
assert(width == 4 || width == 8 || width == 16 || width == 32);
|
assert(width == 4 || width == 8 || width == 16 || width == 32);
|
||||||
|
|
||||||
|
@ -2150,8 +2150,8 @@ static int8_t search_intra_rough(encoder_state_t * const state,
|
||||||
*/
|
*/
|
||||||
static int8_t search_intra_rdo(encoder_state_t * const state,
|
static int8_t search_intra_rdo(encoder_state_t * const state,
|
||||||
int x_px, int y_px, int depth,
|
int x_px, int y_px, int depth,
|
||||||
pixel_t *orig, int32_t origstride,
|
kvz_pixel *orig, int32_t origstride,
|
||||||
pixel_t *rec, int16_t recstride,
|
kvz_pixel *rec, int16_t recstride,
|
||||||
int8_t *intra_preds,
|
int8_t *intra_preds,
|
||||||
int modes_to_check,
|
int modes_to_check,
|
||||||
int8_t modes[35], double costs[35],
|
int8_t modes[35], double costs[35],
|
||||||
|
@ -2160,13 +2160,13 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
||||||
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->tr_depth_intra);
|
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->tr_depth_intra);
|
||||||
const int width = LCU_WIDTH >> depth;
|
const int width = LCU_WIDTH >> depth;
|
||||||
|
|
||||||
pixel_t pred[LCU_WIDTH * LCU_WIDTH + 1];
|
kvz_pixel pred[LCU_WIDTH * LCU_WIDTH + 1];
|
||||||
pixel_t orig_block[LCU_WIDTH * LCU_WIDTH + 1];
|
kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1];
|
||||||
int rdo_mode;
|
int rdo_mode;
|
||||||
int pred_mode;
|
int pred_mode;
|
||||||
|
|
||||||
pixel_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
|
kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1];
|
||||||
pixel_t *recf = &rec_filtered_temp[recstride + 1];
|
kvz_pixel *recf = &rec_filtered_temp[recstride + 1];
|
||||||
|
|
||||||
// Generate filtered reference pixels.
|
// Generate filtered reference pixels.
|
||||||
{
|
{
|
||||||
|
@ -2273,8 +2273,8 @@ static double search_cu_intra(encoder_state_t * const state,
|
||||||
|
|
||||||
cu_info_t *cur_cu = &lcu->cu[cu_index];
|
cu_info_t *cur_cu = &lcu->cu[cu_index];
|
||||||
|
|
||||||
pixel_t rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)];
|
kvz_pixel rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)];
|
||||||
pixel_t *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1];
|
kvz_pixel *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1];
|
||||||
|
|
||||||
int8_t candidate_modes[3];
|
int8_t candidate_modes[3];
|
||||||
|
|
||||||
|
@ -2304,7 +2304,7 @@ static double search_cu_intra(encoder_state_t * const state,
|
||||||
double costs[35];
|
double costs[35];
|
||||||
|
|
||||||
// Find best intra mode for 2Nx2N.
|
// Find best intra mode for 2Nx2N.
|
||||||
pixel_t *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
|
kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
|
||||||
unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
|
unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
|
||||||
|
|
||||||
int8_t number_of_modes;
|
int8_t number_of_modes;
|
||||||
|
@ -2499,8 +2499,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_modes != 1 && num_modes != 5) {
|
if (num_modes != 1 && num_modes != 5) {
|
||||||
pixel_t rec_u[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)];
|
kvz_pixel rec_u[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)];
|
||||||
pixel_t rec_v[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)];
|
kvz_pixel rec_v[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)];
|
||||||
|
|
||||||
const int16_t width_c = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
|
const int16_t width_c = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
|
||||||
const int16_t rec_stride = width_c * 2 + 8;
|
const int16_t rec_stride = width_c * 2 + 8;
|
||||||
|
@ -2518,8 +2518,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
||||||
lcu);
|
lcu);
|
||||||
|
|
||||||
vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 };
|
vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 };
|
||||||
pixel_t *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
||||||
pixel_t *ref_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
kvz_pixel *ref_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
||||||
|
|
||||||
search_intra_chroma_rough(state, x, y, depth,
|
search_intra_chroma_rough(state, x, y, depth,
|
||||||
ref_u, ref_v, LCU_WIDTH_C,
|
ref_u, ref_v, LCU_WIDTH_C,
|
||||||
|
|
|
@ -80,7 +80,7 @@ __m128i eight_tap_filter_x4_and_flip_16bit(__m128i data0, __m128i data1, __m128i
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
void eight_tap_filter_and_flip_avx2(int8_t filter[4][8], pixel_t *src, int16_t src_stride, int16_t* __restrict dst)
|
void eight_tap_filter_and_flip_avx2(int8_t filter[4][8], kvz_pixel *src, int16_t src_stride, int16_t* __restrict dst)
|
||||||
{
|
{
|
||||||
|
|
||||||
//Load 2 rows per xmm register
|
//Load 2 rows per xmm register
|
||||||
|
@ -104,7 +104,7 @@ void eight_tap_filter_and_flip_avx2(int8_t filter[4][8], pixel_t *src, int16_t s
|
||||||
eight_tap_filter_x8_and_flip(rows01, rows23, rows45, rows67, (__m128i*)(&filter[3]), (__m128i*)(dst + 3 * dst_stride));
|
eight_tap_filter_x8_and_flip(rows01, rows23, rows45, rows67, (__m128i*)(&filter[3]), (__m128i*)(dst + 3 * dst_stride));
|
||||||
}
|
}
|
||||||
|
|
||||||
static INLINE void eight_tap_filter_and_flip_16bit_avx2(int8_t filter[4][8], int16_t *src, int16_t src_stride, int offset, int combined_shift, pixel_t* __restrict dst, int16_t dst_stride)
|
static INLINE void eight_tap_filter_and_flip_16bit_avx2(int8_t filter[4][8], int16_t *src, int16_t src_stride, int offset, int combined_shift, kvz_pixel* __restrict dst, int16_t dst_stride)
|
||||||
{
|
{
|
||||||
|
|
||||||
//Load a row per xmm register
|
//Load a row per xmm register
|
||||||
|
@ -157,7 +157,7 @@ static INLINE void eight_tap_filter_and_flip_16bit_avx2(int8_t filter[4][8], int
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t eight_tap_filter_hor_avx2(int8_t *filter, pixel_t *data)
|
int16_t eight_tap_filter_hor_avx2(int8_t *filter, kvz_pixel *data)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
__m128i vector;
|
__m128i vector;
|
||||||
|
@ -185,7 +185,7 @@ int32_t eight_tap_filter_hor_16bit_avx2(int8_t *filter, int16_t *data)
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t eight_tap_filter_ver_avx2(int8_t *filter, pixel_t *data, int16_t stride)
|
int16_t eight_tap_filter_ver_avx2(int8_t *filter, kvz_pixel *data, int16_t stride)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 8; ++i)
|
for (int i = 0; i < 8; ++i)
|
||||||
|
@ -207,7 +207,7 @@ int32_t eight_tap_filter_ver_16bit_avx2(int8_t *filter, int16_t *data, int16_t s
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t four_tap_filter_hor_avx2(int8_t *filter, pixel_t *data)
|
int16_t four_tap_filter_hor_avx2(int8_t *filter, kvz_pixel *data)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
|
@ -229,7 +229,7 @@ int32_t four_tap_filter_hor_16bit_avx2(int8_t *filter, int16_t *data)
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t four_tap_filter_ver_avx2(int8_t *filter, pixel_t *data, int16_t stride)
|
int16_t four_tap_filter_ver_avx2(int8_t *filter, kvz_pixel *data, int16_t stride)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
|
@ -251,7 +251,7 @@ int32_t four_tap_filter_ver_16bit_avx2(int8_t *filter, int16_t *data, int16_t st
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void filter_inter_quarterpel_luma_avx2(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
void filter_inter_quarterpel_luma_avx2(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
{
|
{
|
||||||
|
|
||||||
int32_t x, y;
|
int32_t x, y;
|
||||||
|
@ -312,7 +312,7 @@ void filter_inter_quarterpel_luma_avx2(const encoder_control_t * const encoder,
|
||||||
* \param dst_stride stride of destination image
|
* \param dst_stride stride of destination image
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void filter_inter_halfpel_chroma_avx2(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
void filter_inter_halfpel_chroma_avx2(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
{
|
{
|
||||||
/* ____________
|
/* ____________
|
||||||
* | B0,0|ae0,0|
|
* | B0,0|ae0,0|
|
||||||
|
@ -373,7 +373,7 @@ void filter_inter_halfpel_chroma_avx2(const encoder_control_t * const encoder, p
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
{
|
{
|
||||||
|
|
||||||
int32_t x, y;
|
int32_t x, y;
|
||||||
|
@ -481,8 +481,8 @@ void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, pi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void extend_borders_avx2(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel_t *ref, int ref_width, int ref_height,
|
void extend_borders_avx2(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||||
int filterSize, int width, int height, pixel_t *dst) {
|
int filterSize, int width, int height, kvz_pixel *dst) {
|
||||||
|
|
||||||
int16_t mv[2] = { mv_x, mv_y };
|
int16_t mv[2] = { mv_x, mv_y };
|
||||||
int halfFilterSize = filterSize >> 1;
|
int halfFilterSize = filterSize >> 1;
|
||||||
|
|
|
@ -98,7 +98,7 @@ static INLINE uint32_t m256i_horizontal_sum(const __m256i sum)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static unsigned sad_8bit_8x8_avx2(const pixel_t *buf1, const pixel_t *buf2)
|
static unsigned sad_8bit_8x8_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2)
|
||||||
{
|
{
|
||||||
const __m256i *const a = (const __m256i *)buf1;
|
const __m256i *const a = (const __m256i *)buf1;
|
||||||
const __m256i *const b = (const __m256i *)buf2;
|
const __m256i *const b = (const __m256i *)buf2;
|
||||||
|
@ -108,7 +108,7 @@ static unsigned sad_8bit_8x8_avx2(const pixel_t *buf1, const pixel_t *buf2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static unsigned sad_8bit_16x16_avx2(const pixel_t *buf1, const pixel_t *buf2)
|
static unsigned sad_8bit_16x16_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2)
|
||||||
{
|
{
|
||||||
const __m256i *const a = (const __m256i *)buf1;
|
const __m256i *const a = (const __m256i *)buf1;
|
||||||
const __m256i *const b = (const __m256i *)buf2;
|
const __m256i *const b = (const __m256i *)buf2;
|
||||||
|
@ -118,7 +118,7 @@ static unsigned sad_8bit_16x16_avx2(const pixel_t *buf1, const pixel_t *buf2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static unsigned sad_8bit_32x32_avx2(const pixel_t *buf1, const pixel_t *buf2)
|
static unsigned sad_8bit_32x32_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2)
|
||||||
{
|
{
|
||||||
const __m256i *const a = (const __m256i *)buf1;
|
const __m256i *const a = (const __m256i *)buf1;
|
||||||
const __m256i *const b = (const __m256i *)buf2;
|
const __m256i *const b = (const __m256i *)buf2;
|
||||||
|
|
|
@ -32,7 +32,7 @@
|
||||||
extern int8_t g_luma_filter[4][8];
|
extern int8_t g_luma_filter[4][8];
|
||||||
extern int8_t g_chroma_filter[8][4];
|
extern int8_t g_chroma_filter[8][4];
|
||||||
|
|
||||||
int16_t eight_tap_filter_hor_generic(int8_t *filter, pixel_t *data)
|
int16_t eight_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 8; ++i)
|
for (int i = 0; i < 8; ++i)
|
||||||
|
@ -54,7 +54,7 @@ int32_t eight_tap_filter_hor_16bit_generic(int8_t *filter, int16_t *data)
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t eight_tap_filter_ver_generic(int8_t *filter, pixel_t *data, int16_t stride)
|
int16_t eight_tap_filter_ver_generic(int8_t *filter, kvz_pixel *data, int16_t stride)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 8; ++i)
|
for (int i = 0; i < 8; ++i)
|
||||||
|
@ -76,7 +76,7 @@ int32_t eight_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int16_
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t four_tap_filter_hor_generic(int8_t *filter, pixel_t *data)
|
int16_t four_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
|
@ -98,7 +98,7 @@ int32_t four_tap_filter_hor_16bit_generic(int8_t *filter, int16_t *data)
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t four_tap_filter_ver_generic(int8_t *filter, pixel_t *data, int16_t stride)
|
int16_t four_tap_filter_ver_generic(int8_t *filter, kvz_pixel *data, int16_t stride)
|
||||||
{
|
{
|
||||||
int16_t temp = 0;
|
int16_t temp = 0;
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
|
@ -120,7 +120,7 @@ int32_t four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int16_t
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void filter_inter_quarterpel_luma_generic(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
void filter_inter_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
{
|
{
|
||||||
//TODO: horizontal and vertical only filtering
|
//TODO: horizontal and vertical only filtering
|
||||||
int32_t x, y;
|
int32_t x, y;
|
||||||
|
@ -180,7 +180,7 @@ void filter_inter_quarterpel_luma_generic(const encoder_control_t * const encode
|
||||||
* \param dst_stride stride of destination image
|
* \param dst_stride stride of destination image
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void filter_inter_halfpel_chroma_generic(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
void filter_inter_halfpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
{
|
{
|
||||||
/* ____________
|
/* ____________
|
||||||
* | B0,0|ae0,0|
|
* | B0,0|ae0,0|
|
||||||
|
@ -241,7 +241,7 @@ void filter_inter_halfpel_chroma_generic(const encoder_control_t * const encoder
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void filter_inter_octpel_chroma_generic(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
void filter_inter_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
{
|
{
|
||||||
|
|
||||||
int32_t x, y;
|
int32_t x, y;
|
||||||
|
@ -349,8 +349,8 @@ void filter_inter_octpel_chroma_generic(const encoder_control_t * const encoder,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel_t *ref, int ref_width, int ref_height,
|
void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||||
int filterSize, int width, int height, pixel_t *dst) {
|
int filterSize, int width, int height, kvz_pixel *dst) {
|
||||||
|
|
||||||
int16_t mv[2] = { mv_x, mv_y };
|
int16_t mv[2] = { mv_x, mv_y };
|
||||||
int halfFilterSize = filterSize >> 1;
|
int halfFilterSize = filterSize >> 1;
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
#include "nal.h"
|
#include "nal.h"
|
||||||
|
|
||||||
|
|
||||||
static void array_checksum_generic(const pixel_t* data,
|
static void array_checksum_generic(const kvz_pixel* data,
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int stride,
|
const int stride,
|
||||||
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) {
|
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) {
|
||||||
|
@ -52,7 +52,7 @@ static void array_checksum_generic(const pixel_t* data,
|
||||||
checksum_out[3] = (checksum) & 0xff;
|
checksum_out[3] = (checksum) & 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void array_checksum_generic4(const pixel_t* data,
|
static void array_checksum_generic4(const kvz_pixel* data,
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int stride,
|
const int stride,
|
||||||
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) {
|
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) {
|
||||||
|
@ -95,7 +95,7 @@ static void array_checksum_generic4(const pixel_t* data,
|
||||||
checksum_out[3] = (checksum) & 0xff;
|
checksum_out[3] = (checksum) & 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void array_checksum_generic8(const pixel_t* data,
|
static void array_checksum_generic8(const kvz_pixel* data,
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int stride,
|
const int stride,
|
||||||
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) {
|
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) {
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
|
|
||||||
// Function to clip int16_t to pixel. (0-255 or 0-1023)
|
// Function to clip int16_t to pixel. (0-255 or 0-1023)
|
||||||
// Assumes PIXEL_MAX to be 2^n-1
|
// Assumes PIXEL_MAX to be 2^n-1
|
||||||
pixel_t fast_clip_16bit_to_pixel(int16_t value)
|
kvz_pixel fast_clip_16bit_to_pixel(int16_t value)
|
||||||
{
|
{
|
||||||
// Ensure that compiler generates arithmetic shift from ">>"
|
// Ensure that compiler generates arithmetic shift from ">>"
|
||||||
#if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__)
|
#if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__)
|
||||||
|
@ -50,7 +50,7 @@ pixel_t fast_clip_16bit_to_pixel(int16_t value)
|
||||||
|
|
||||||
// Function to clip int32_t to pixel. (0-255 or 0-1023)
|
// Function to clip int32_t to pixel. (0-255 or 0-1023)
|
||||||
// Assumes PIXEL_MAX to be 2^n-1
|
// Assumes PIXEL_MAX to be 2^n-1
|
||||||
pixel_t fast_clip_32bit_to_pixel(int32_t value)
|
kvz_pixel fast_clip_32bit_to_pixel(int32_t value)
|
||||||
{
|
{
|
||||||
// Ensure that compiler generates arithmetic shift from ">>"
|
// Ensure that compiler generates arithmetic shift from ">>"
|
||||||
#if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__)
|
#if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__)
|
||||||
|
@ -84,7 +84,7 @@ pixel_t fast_clip_32bit_to_pixel(int32_t value)
|
||||||
*
|
*
|
||||||
* \returns Sum of Absolute Differences
|
* \returns Sum of Absolute Differences
|
||||||
*/
|
*/
|
||||||
static unsigned reg_sad_generic(const pixel_t * const data1, const pixel_t * const data2,
|
static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
{
|
{
|
||||||
int y, x;
|
int y, x;
|
||||||
|
@ -104,7 +104,7 @@ static unsigned reg_sad_generic(const pixel_t * const data1, const pixel_t * con
|
||||||
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
|
* \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
|
||||||
* From HM 13.0
|
* From HM 13.0
|
||||||
*/
|
*/
|
||||||
static unsigned satd_8bit_4x4_generic(const pixel_t *piOrg, const pixel_t *piCur)
|
static unsigned satd_8bit_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
|
||||||
{
|
{
|
||||||
int32_t k, satd = 0, diff[16], m[16], d[16];
|
int32_t k, satd = 0, diff[16], m[16], d[16];
|
||||||
for (k = 0; k < 16; ++k) {
|
for (k = 0; k < 16; ++k) {
|
||||||
|
@ -191,8 +191,8 @@ static unsigned satd_8bit_4x4_generic(const pixel_t *piOrg, const pixel_t *piCur
|
||||||
/**
|
/**
|
||||||
* \brief Calculate SATD between two 8x8 blocks inside bigger arrays.
|
* \brief Calculate SATD between two 8x8 blocks inside bigger arrays.
|
||||||
*/
|
*/
|
||||||
unsigned satd_16bit_8x8_general(const pixel_t * piOrg, const int32_t iStrideOrg,
|
unsigned satd_16bit_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg,
|
||||||
const pixel_t * piCur, const int32_t iStrideCur)
|
const kvz_pixel * piCur, const int32_t iStrideCur)
|
||||||
{
|
{
|
||||||
int32_t k, i, j, jj, sad = 0;
|
int32_t k, i, j, jj, sad = 0;
|
||||||
int32_t diff[64], m1[8][8], m2[8][8], m3[8][8];
|
int32_t diff[64], m1[8][8], m2[8][8], m3[8][8];
|
||||||
|
@ -307,10 +307,10 @@ cost_pixel_nxn_func satd_8bit_32x32_generic;
|
||||||
cost_pixel_nxn_func satd_8bit_64x64_generic;
|
cost_pixel_nxn_func satd_8bit_64x64_generic;
|
||||||
|
|
||||||
// These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
|
// These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
|
||||||
SATD_NXN(8, pixel_t, 8bit)
|
SATD_NXN(8, kvz_pixel, 8bit)
|
||||||
SATD_NXN(16, pixel_t, 8bit)
|
SATD_NXN(16, kvz_pixel, 8bit)
|
||||||
SATD_NXN(32, pixel_t, 8bit)
|
SATD_NXN(32, kvz_pixel, 8bit)
|
||||||
SATD_NXN(64, pixel_t, 8bit)
|
SATD_NXN(64, kvz_pixel, 8bit)
|
||||||
|
|
||||||
// Function macro for defining SAD calculating functions
|
// Function macro for defining SAD calculating functions
|
||||||
// for fixed size blocks.
|
// for fixed size blocks.
|
||||||
|
@ -336,11 +336,11 @@ static cost_pixel_nxn_func sad_8bit_64x64_generic;
|
||||||
// These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64
|
// These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64
|
||||||
// with function signatures of cost_16bit_nxn_func.
|
// with function signatures of cost_16bit_nxn_func.
|
||||||
// They are used through get_pixel_sad_func.
|
// They are used through get_pixel_sad_func.
|
||||||
SAD_NXN(4, pixel_t, 8bit)
|
SAD_NXN(4, kvz_pixel, 8bit)
|
||||||
SAD_NXN(8, pixel_t, 8bit)
|
SAD_NXN(8, kvz_pixel, 8bit)
|
||||||
SAD_NXN(16, pixel_t, 8bit)
|
SAD_NXN(16, kvz_pixel, 8bit)
|
||||||
SAD_NXN(32, pixel_t, 8bit)
|
SAD_NXN(32, kvz_pixel, 8bit)
|
||||||
SAD_NXN(64, pixel_t, 8bit)
|
SAD_NXN(64, kvz_pixel, 8bit)
|
||||||
|
|
||||||
|
|
||||||
int strategy_register_picture_generic(void* opaque)
|
int strategy_register_picture_generic(void* opaque)
|
||||||
|
|
|
@ -24,10 +24,10 @@ int strategy_register_picture_generic(void* opaque);
|
||||||
|
|
||||||
// Function to clip int16_t to pixel. (0-255 or 0-1023)
|
// Function to clip int16_t to pixel. (0-255 or 0-1023)
|
||||||
// Assumes PIXEL_MAX to be 2^n-1
|
// Assumes PIXEL_MAX to be 2^n-1
|
||||||
pixel_t fast_clip_16bit_to_pixel(int16_t value);
|
kvz_pixel fast_clip_16bit_to_pixel(int16_t value);
|
||||||
|
|
||||||
// Function to clip int32_t to pixel. (0-255 or 0-1023)
|
// Function to clip int32_t to pixel. (0-255 or 0-1023)
|
||||||
// Assumes PIXEL_MAX to be 2^n-1
|
// Assumes PIXEL_MAX to be 2^n-1
|
||||||
pixel_t fast_clip_32bit_to_pixel(int32_t value);
|
kvz_pixel fast_clip_32bit_to_pixel(int32_t value);
|
||||||
|
|
||||||
#endif //STRATEGIES_PICTURE_GENERIC_H_
|
#endif //STRATEGIES_PICTURE_GENERIC_H_
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
# include <stdlib.h>
|
# include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
static unsigned reg_sad_sse2(const pixel_t * const data1, const pixel_t * const data2,
|
static unsigned reg_sad_sse2(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
{
|
{
|
||||||
int y, x;
|
int y, x;
|
||||||
|
@ -56,7 +56,7 @@ static unsigned reg_sad_sse2(const pixel_t * const data1, const pixel_t * const
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned sad_8bit_4x4_sse2(const pixel_t *buf1, const pixel_t *buf2)
|
static unsigned sad_8bit_4x4_sse2(const kvz_pixel *buf1, const kvz_pixel *buf2)
|
||||||
{
|
{
|
||||||
const __m128i *const mbuf1 = (const __m128i *)buf1;
|
const __m128i *const mbuf1 = (const __m128i *)buf1;
|
||||||
const __m128i *const mbuf2 = (const __m128i *)buf2;
|
const __m128i *const mbuf2 = (const __m128i *)buf2;
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
# include <stdlib.h>
|
# include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
static unsigned reg_sad_sse41(const pixel_t * const data1, const pixel_t * const data2,
|
static unsigned reg_sad_sse41(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
{
|
{
|
||||||
int y, x;
|
int y, x;
|
||||||
|
|
|
@ -25,11 +25,11 @@
|
||||||
#include "encoder.h"
|
#include "encoder.h"
|
||||||
|
|
||||||
|
|
||||||
typedef unsigned(ipol_func)(const encoder_control_t * encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst,
|
typedef unsigned(ipol_func)(const encoder_control_t * encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst,
|
||||||
int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
||||||
|
|
||||||
typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel_t *ref, int ref_width, int ref_height,
|
typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||||
int filterSize, int width, int height, pixel_t *dst);
|
int filterSize, int width, int height, kvz_pixel *dst);
|
||||||
|
|
||||||
|
|
||||||
// Declare function pointers.
|
// Declare function pointers.
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
#include "generic/nal-generic.h"
|
#include "generic/nal-generic.h"
|
||||||
|
|
||||||
void (*array_checksum)(const pixel_t* data,
|
void (*array_checksum)(const kvz_pixel* data,
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int stride,
|
const int stride,
|
||||||
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]);
|
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]);
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
* \param width Width of the picture.
|
* \param width Width of the picture.
|
||||||
* \param stride Width of one row in the pixel array.
|
* \param stride Width of one row in the pixel array.
|
||||||
*/
|
*/
|
||||||
typedef void (*array_checksum_func)(const pixel_t* data,
|
typedef void (*array_checksum_func)(const kvz_pixel* data,
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int stride,
|
const int stride,
|
||||||
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]);
|
unsigned char checksum_out[SEI_HASH_MAX_LENGTH]);
|
||||||
|
|
|
@ -23,10 +23,10 @@
|
||||||
#include "../image.h"
|
#include "../image.h"
|
||||||
|
|
||||||
|
|
||||||
typedef unsigned(reg_sad_func)(const pixel_t *const data1, const pixel_t *const data2,
|
typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2,
|
||||||
const int width, const int height,
|
const int width, const int height,
|
||||||
const unsigned stride1, const unsigned stride2);
|
const unsigned stride1, const unsigned stride2);
|
||||||
typedef unsigned (cost_pixel_nxn_func)(const pixel_t *block1, const pixel_t *block2);
|
typedef unsigned (cost_pixel_nxn_func)(const kvz_pixel *block1, const kvz_pixel *block2);
|
||||||
|
|
||||||
|
|
||||||
// Declare function pointers.
|
// Declare function pointers.
|
||||||
|
|
|
@ -24,13 +24,13 @@
|
||||||
\brief assembly functions header for sad
|
\brief assembly functions header for sad
|
||||||
*/
|
*/
|
||||||
|
|
||||||
unsigned kvz_sad_4x4_avx(const pixel_t*, const pixel_t*);
|
unsigned kvz_sad_4x4_avx(const kvz_pixel*, const kvz_pixel*);
|
||||||
unsigned kvz_sad_8x8_avx(const pixel_t*, const pixel_t*);
|
unsigned kvz_sad_8x8_avx(const kvz_pixel*, const kvz_pixel*);
|
||||||
unsigned kvz_sad_16x16_avx(const pixel_t*, const pixel_t*);
|
unsigned kvz_sad_16x16_avx(const kvz_pixel*, const kvz_pixel*);
|
||||||
|
|
||||||
unsigned kvz_sad_4x4_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride);
|
unsigned kvz_sad_4x4_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
||||||
unsigned kvz_sad_8x8_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride);
|
unsigned kvz_sad_8x8_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
||||||
unsigned kvz_sad_16x16_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride);
|
unsigned kvz_sad_16x16_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride);
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -24,10 +24,10 @@
|
||||||
/*! \file picture-x86-asm-satd.h
|
/*! \file picture-x86-asm-satd.h
|
||||||
\brief assembly functions header for satd
|
\brief assembly functions header for satd
|
||||||
*/
|
*/
|
||||||
unsigned kvz_satd_4x4_avx(const pixel_t *org, const pixel_t *cur);
|
unsigned kvz_satd_4x4_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||||
unsigned kvz_satd_8x8_avx(const pixel_t *org, const pixel_t *cur);
|
unsigned kvz_satd_8x8_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||||
unsigned kvz_satd_16x16_avx(const pixel_t *org, const pixel_t *cur);
|
unsigned kvz_satd_16x16_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||||
unsigned kvz_satd_32x32_avx(const pixel_t *org, const pixel_t *cur);
|
unsigned kvz_satd_32x32_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||||
unsigned kvz_satd_64x64_avx(const pixel_t *org, const pixel_t *cur);
|
unsigned kvz_satd_64x64_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
#include "picture-x86-asm-sad.h"
|
#include "picture-x86-asm-sad.h"
|
||||||
#include "picture-x86-asm-satd.h"
|
#include "picture-x86-asm-satd.h"
|
||||||
|
|
||||||
static unsigned kvz_sad_32x32_avx(const pixel_t *data1, const pixel_t *data2)
|
static unsigned kvz_sad_32x32_avx(const kvz_pixel *data1, const kvz_pixel *data2)
|
||||||
{
|
{
|
||||||
unsigned sad = 0;
|
unsigned sad = 0;
|
||||||
sad += kvz_sad_16x16_avx(data1, data2);
|
sad += kvz_sad_16x16_avx(data1, data2);
|
||||||
|
@ -39,7 +39,7 @@ static unsigned kvz_sad_32x32_avx(const pixel_t *data1, const pixel_t *data2)
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned kvz_sad_32x32_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride)
|
static unsigned kvz_sad_32x32_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride)
|
||||||
{
|
{
|
||||||
unsigned sad = 0;
|
unsigned sad = 0;
|
||||||
sad += kvz_sad_16x16_stride_avx(data1, data2, stride);
|
sad += kvz_sad_16x16_stride_avx(data1, data2, stride);
|
||||||
|
@ -49,7 +49,7 @@ static unsigned kvz_sad_32x32_stride_avx(const pixel_t *data1, const pixel_t *da
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned kvz_sad_64x64_avx(const pixel_t *data1, const pixel_t *data2)
|
static unsigned kvz_sad_64x64_avx(const kvz_pixel *data1, const kvz_pixel *data2)
|
||||||
{
|
{
|
||||||
unsigned sad = 0;
|
unsigned sad = 0;
|
||||||
sad += kvz_sad_32x32_avx(data1, data2);
|
sad += kvz_sad_32x32_avx(data1, data2);
|
||||||
|
@ -59,7 +59,7 @@ static unsigned kvz_sad_64x64_avx(const pixel_t *data1, const pixel_t *data2)
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned kvz_sad_64x64_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride)
|
static unsigned kvz_sad_64x64_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride)
|
||||||
{
|
{
|
||||||
unsigned sad = 0;
|
unsigned sad = 0;
|
||||||
sad += kvz_sad_32x32_stride_avx(data1, data2, stride);
|
sad += kvz_sad_32x32_stride_avx(data1, data2, stride);
|
||||||
|
@ -69,7 +69,7 @@ static unsigned kvz_sad_64x64_stride_avx(const pixel_t *data1, const pixel_t *da
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned kvz_sad_other_avx(const pixel_t * const data1, const pixel_t * const data2,
|
static unsigned kvz_sad_other_avx(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
{
|
{
|
||||||
int y, x;
|
int y, x;
|
||||||
|
@ -84,7 +84,7 @@ static unsigned kvz_sad_other_avx(const pixel_t * const data1, const pixel_t * c
|
||||||
return sad;
|
return sad;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned reg_sad_x86_asm(const pixel_t * const data1, const pixel_t * const data2,
|
static unsigned reg_sad_x86_asm(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||||
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
const int width, const int height, const unsigned stride1, const unsigned stride2)
|
||||||
{
|
{
|
||||||
if (width == 4 && height == 4) {
|
if (width == 4 && height == 4) {
|
||||||
|
|
|
@ -157,7 +157,7 @@ static const strategy_to_select_t strategies_to_select[] = {
|
||||||
{ NULL, NULL },
|
{ NULL, NULL },
|
||||||
};
|
};
|
||||||
|
|
||||||
unsigned satd_8bit_8x8_generic(const pixel_t * const block1, const pixel_t * const block2);
|
unsigned satd_8bit_8x8_generic(const kvz_pixel * const block1, const kvz_pixel * const block2);
|
||||||
|
|
||||||
|
|
||||||
#endif //STRATEGYSELECTOR_H_
|
#endif //STRATEGYSELECTOR_H_
|
||||||
|
|
|
@ -326,8 +326,8 @@ int quantize_residual(encoder_state_t *const state,
|
||||||
const cu_info_t *const cur_cu, const int width, const color_t color,
|
const cu_info_t *const cur_cu, const int width, const color_t color,
|
||||||
const coeff_scan_order_t scan_order, const int use_trskip,
|
const coeff_scan_order_t scan_order, const int use_trskip,
|
||||||
const int in_stride, const int out_stride,
|
const int in_stride, const int out_stride,
|
||||||
const pixel_t *const ref_in, const pixel_t *const pred_in,
|
const kvz_pixel *const ref_in, const kvz_pixel *const pred_in,
|
||||||
pixel_t *rec_out, coeff_t *coeff_out)
|
kvz_pixel *rec_out, coeff_t *coeff_out)
|
||||||
{
|
{
|
||||||
// Temporary arrays to pass data to and from quant and transform functions.
|
// Temporary arrays to pass data to and from quant and transform functions.
|
||||||
int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||||
|
@ -440,11 +440,11 @@ int quantize_residual_trskip(
|
||||||
const cu_info_t *const cur_cu, const int width, const color_t color,
|
const cu_info_t *const cur_cu, const int width, const color_t color,
|
||||||
const coeff_scan_order_t scan_order, int8_t *trskip_out,
|
const coeff_scan_order_t scan_order, int8_t *trskip_out,
|
||||||
const int in_stride, const int out_stride,
|
const int in_stride, const int out_stride,
|
||||||
const pixel_t *const ref_in, const pixel_t *const pred_in,
|
const kvz_pixel *const ref_in, const kvz_pixel *const pred_in,
|
||||||
pixel_t *rec_out, coeff_t *coeff_out)
|
kvz_pixel *rec_out, coeff_t *coeff_out)
|
||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
pixel_t rec[4*4];
|
kvz_pixel rec[4*4];
|
||||||
coeff_t coeff[4*4];
|
coeff_t coeff[4*4];
|
||||||
uint32_t cost;
|
uint32_t cost;
|
||||||
int has_coeffs;
|
int has_coeffs;
|
||||||
|
@ -543,9 +543,9 @@ void quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_
|
||||||
const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH;
|
const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH;
|
||||||
|
|
||||||
// Pointers to current location in arrays with prediction.
|
// Pointers to current location in arrays with prediction.
|
||||||
pixel_t *recbase_y = &lcu->rec.y[luma_offset];
|
kvz_pixel *recbase_y = &lcu->rec.y[luma_offset];
|
||||||
// Pointers to current location in arrays with reference.
|
// Pointers to current location in arrays with reference.
|
||||||
const pixel_t *base_y = &lcu->ref.y[luma_offset];
|
const kvz_pixel *base_y = &lcu->ref.y[luma_offset];
|
||||||
// Pointers to current location in arrays with kvantized coefficients.
|
// Pointers to current location in arrays with kvantized coefficients.
|
||||||
coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset];
|
coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset];
|
||||||
|
|
||||||
|
@ -633,10 +633,10 @@ void quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int3
|
||||||
cbf_clear(&cur_cu->cbf.v, depth);
|
cbf_clear(&cur_cu->cbf.v, depth);
|
||||||
|
|
||||||
const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C;
|
const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C;
|
||||||
pixel_t *recbase_u = &lcu->rec.u[chroma_offset];
|
kvz_pixel *recbase_u = &lcu->rec.u[chroma_offset];
|
||||||
pixel_t *recbase_v = &lcu->rec.v[chroma_offset];
|
kvz_pixel *recbase_v = &lcu->rec.v[chroma_offset];
|
||||||
const pixel_t *base_u = &lcu->ref.u[chroma_offset];
|
const kvz_pixel *base_u = &lcu->ref.u[chroma_offset];
|
||||||
const pixel_t *base_v = &lcu->ref.v[chroma_offset];
|
const kvz_pixel *base_v = &lcu->ref.v[chroma_offset];
|
||||||
coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset];
|
coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset];
|
||||||
coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset];
|
coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset];
|
||||||
coeff_scan_order_t scan_idx_chroma;
|
coeff_scan_order_t scan_idx_chroma;
|
||||||
|
|
14
src/yuv_io.c
14
src/yuv_io.c
|
@ -28,10 +28,10 @@
|
||||||
#include "yuv_io.h"
|
#include "yuv_io.h"
|
||||||
|
|
||||||
static void fill_after_frame(unsigned height, unsigned array_width,
|
static void fill_after_frame(unsigned height, unsigned array_width,
|
||||||
unsigned array_height, pixel_t *data)
|
unsigned array_height, kvz_pixel *data)
|
||||||
{
|
{
|
||||||
pixel_t* p = data + height * array_width;
|
kvz_pixel* p = data + height * array_width;
|
||||||
pixel_t* end = data + array_width * array_height;
|
kvz_pixel* end = data + array_width * array_height;
|
||||||
|
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
// Fill the line by copying the line above.
|
// Fill the line by copying the line above.
|
||||||
|
@ -43,11 +43,11 @@ static void fill_after_frame(unsigned height, unsigned array_width,
|
||||||
|
|
||||||
static int read_and_fill_frame_data(FILE *file,
|
static int read_and_fill_frame_data(FILE *file,
|
||||||
unsigned width, unsigned height,
|
unsigned width, unsigned height,
|
||||||
unsigned array_width, pixel_t *data)
|
unsigned array_width, kvz_pixel *data)
|
||||||
{
|
{
|
||||||
pixel_t* p = data;
|
kvz_pixel* p = data;
|
||||||
pixel_t* end = data + array_width * height;
|
kvz_pixel* end = data + array_width * height;
|
||||||
pixel_t fill_char;
|
kvz_pixel fill_char;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// GLOBALS
|
// GLOBALS
|
||||||
pixel_t * bufs[NUM_TESTS][6][2];
|
kvz_pixel * bufs[NUM_TESTS][6][2];
|
||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
int log_width; // for selecting dim from bufs
|
int log_width; // for selecting dim from bufs
|
||||||
|
@ -45,7 +45,7 @@ static struct {
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// SETUP, TEARDOWN AND HELPER FUNCTIONS
|
// SETUP, TEARDOWN AND HELPER FUNCTIONS
|
||||||
static void init_gradient(int x_px, int y_px, int width, int slope, pixel_t *buf)
|
static void init_gradient(int x_px, int y_px, int width, int slope, kvz_pixel *buf)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < width; ++y) {
|
for (int y = 0; y < width; ++y) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
|
@ -68,10 +68,10 @@ static void setup_tests()
|
||||||
|
|
||||||
for (int w = LCU_MIN_LOG_W; w <= LCU_MAX_LOG_W; ++w) {
|
for (int w = LCU_MIN_LOG_W; w <= LCU_MAX_LOG_W; ++w) {
|
||||||
unsigned size = 1 << (w * 2);
|
unsigned size = 1 << (w * 2);
|
||||||
bufs[test][w][0] = malloc(size * sizeof(pixel_t) + SIMD_ALIGNMENT);
|
bufs[test][w][0] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT);
|
||||||
bufs[test][w][0] = ALIGNED_POINTER(bufs[test][w][0], SIMD_ALIGNMENT);
|
bufs[test][w][0] = ALIGNED_POINTER(bufs[test][w][0], SIMD_ALIGNMENT);
|
||||||
|
|
||||||
bufs[test][w][1] = malloc(size * sizeof(pixel_t) + SIMD_ALIGNMENT);
|
bufs[test][w][1] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT);
|
||||||
bufs[test][w][1] = ALIGNED_POINTER(bufs[test][w][1], SIMD_ALIGNMENT);
|
bufs[test][w][1] = ALIGNED_POINTER(bufs[test][w][1], SIMD_ALIGNMENT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,7 +104,7 @@ static void tear_down_tests()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static unsigned test_calc_sad(const pixel_t * buf1, const pixel_t * buf2, int dim)
|
static unsigned test_calc_sad(const kvz_pixel * buf1, const kvz_pixel * buf2, int dim)
|
||||||
{
|
{
|
||||||
unsigned result = 0;
|
unsigned result = 0;
|
||||||
for (int i = 0; i < dim * dim; ++i) {
|
for (int i = 0; i < dim * dim; ++i) {
|
||||||
|
@ -125,8 +125,8 @@ TEST test_black_and_white(void)
|
||||||
const int test = 0;
|
const int test = 0;
|
||||||
const int width = 1 << test_env.log_width;
|
const int width = 1 << test_env.log_width;
|
||||||
|
|
||||||
pixel_t * buf1 = bufs[test][test_env.log_width][0];
|
kvz_pixel * buf1 = bufs[test][test_env.log_width][0];
|
||||||
pixel_t * buf2 = bufs[test][test_env.log_width][1];
|
kvz_pixel * buf2 = bufs[test][test_env.log_width][1];
|
||||||
|
|
||||||
unsigned result1 = test_env.tested_func(buf1, buf2);
|
unsigned result1 = test_env.tested_func(buf1, buf2);
|
||||||
unsigned result2 = test_env.tested_func(buf2, buf1);
|
unsigned result2 = test_env.tested_func(buf2, buf1);
|
||||||
|
@ -149,8 +149,8 @@ TEST test_gradient(void)
|
||||||
const int test = 1;
|
const int test = 1;
|
||||||
const int width = 1 << test_env.log_width;
|
const int width = 1 << test_env.log_width;
|
||||||
|
|
||||||
pixel_t * buf1 = bufs[test][test_env.log_width][0];
|
kvz_pixel * buf1 = bufs[test][test_env.log_width][0];
|
||||||
pixel_t * buf2 = bufs[test][test_env.log_width][1];
|
kvz_pixel * buf2 = bufs[test][test_env.log_width][1];
|
||||||
|
|
||||||
unsigned result = test_calc_sad(buf1, buf2, width);
|
unsigned result = test_calc_sad(buf1, buf2, width);
|
||||||
unsigned result1 = test_env.tested_func(buf1, buf2);
|
unsigned result1 = test_env.tested_func(buf1, buf2);
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// GLOBALS
|
// GLOBALS
|
||||||
pixel_t * satd_bufs[NUM_TESTS][7][2];
|
kvz_pixel * satd_bufs[NUM_TESTS][7][2];
|
||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
int log_width; // for selecting dim from satd_bufs
|
int log_width; // for selecting dim from satd_bufs
|
||||||
|
@ -53,8 +53,8 @@ static void setup_tests()
|
||||||
|
|
||||||
for (int w = LCU_MIN_LOG_W; w <= LCU_MAX_LOG_W; ++w) {
|
for (int w = LCU_MIN_LOG_W; w <= LCU_MAX_LOG_W; ++w) {
|
||||||
unsigned size = 1 << (w * 2);
|
unsigned size = 1 << (w * 2);
|
||||||
satd_bufs[test][w][0] = malloc(size * sizeof(pixel_t));
|
satd_bufs[test][w][0] = malloc(size * sizeof(kvz_pixel));
|
||||||
satd_bufs[test][w][1] = malloc(size * sizeof(pixel_t));
|
satd_bufs[test][w][1] = malloc(size * sizeof(kvz_pixel));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,8 +110,8 @@ TEST satd_test_black_and_white(void)
|
||||||
|
|
||||||
const int test = 0;
|
const int test = 0;
|
||||||
|
|
||||||
pixel_t * buf1 = satd_bufs[test][satd_test_env.log_width][0];
|
kvz_pixel * buf1 = satd_bufs[test][satd_test_env.log_width][0];
|
||||||
pixel_t * buf2 = satd_bufs[test][satd_test_env.log_width][1];
|
kvz_pixel * buf2 = satd_bufs[test][satd_test_env.log_width][1];
|
||||||
|
|
||||||
unsigned result1 = satd_test_env.tested_func(buf1, buf2);
|
unsigned result1 = satd_test_env.tested_func(buf1, buf2);
|
||||||
unsigned result2 = satd_test_env.tested_func(buf2, buf1);
|
unsigned result2 = satd_test_env.tested_func(buf2, buf1);
|
||||||
|
@ -128,8 +128,8 @@ TEST satd_test_checkers(void)
|
||||||
|
|
||||||
const int test = 1;
|
const int test = 1;
|
||||||
|
|
||||||
pixel_t * buf1 = satd_bufs[test][satd_test_env.log_width][0];
|
kvz_pixel * buf1 = satd_bufs[test][satd_test_env.log_width][0];
|
||||||
pixel_t * buf2 = satd_bufs[test][satd_test_env.log_width][1];
|
kvz_pixel * buf2 = satd_bufs[test][satd_test_env.log_width][1];
|
||||||
|
|
||||||
unsigned result1 = satd_test_env.tested_func(buf1, buf2);
|
unsigned result1 = satd_test_env.tested_func(buf1, buf2);
|
||||||
unsigned result2 = satd_test_env.tested_func(buf2, buf1);
|
unsigned result2 = satd_test_env.tested_func(buf2, buf1);
|
||||||
|
@ -147,8 +147,8 @@ TEST satd_test_gradient(void)
|
||||||
|
|
||||||
const int test = 2;
|
const int test = 2;
|
||||||
|
|
||||||
pixel_t * buf1 = satd_bufs[test][satd_test_env.log_width][0];
|
kvz_pixel * buf1 = satd_bufs[test][satd_test_env.log_width][0];
|
||||||
pixel_t * buf2 = satd_bufs[test][satd_test_env.log_width][1];
|
kvz_pixel * buf2 = satd_bufs[test][satd_test_env.log_width][1];
|
||||||
|
|
||||||
unsigned result1 = satd_test_env.tested_func(buf1, buf2);
|
unsigned result1 = satd_test_env.tested_func(buf1, buf2);
|
||||||
unsigned result2 = satd_test_env.tested_func(buf2, buf1);
|
unsigned result2 = satd_test_env.tested_func(buf2, buf1);
|
||||||
|
|
|
@ -40,8 +40,8 @@
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// GLOBALS
|
// GLOBALS
|
||||||
pixel_t * bufs[NUM_TESTS]; // SIMD aligned pointers.
|
kvz_pixel * bufs[NUM_TESTS]; // SIMD aligned pointers.
|
||||||
pixel_t * actual_bufs[NUM_TESTS]; // pointers returned by malloc.
|
kvz_pixel * actual_bufs[NUM_TESTS]; // pointers returned by malloc.
|
||||||
|
|
||||||
static struct test_env_t {
|
static struct test_env_t {
|
||||||
int log_width; // for selecting dim from bufs
|
int log_width; // for selecting dim from bufs
|
||||||
|
@ -53,7 +53,7 @@ static struct test_env_t {
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// SETUP, TEARDOWN AND HELPER FUNCTIONS
|
// SETUP, TEARDOWN AND HELPER FUNCTIONS
|
||||||
static void init_gradient(int x_px, int y_px, int width, int slope, pixel_t *buf)
|
static void init_gradient(int x_px, int y_px, int width, int slope, kvz_pixel *buf)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < width; ++y) {
|
for (int y = 0; y < width; ++y) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
|
@ -71,7 +71,7 @@ static void setup_tests()
|
||||||
for (int test = 0; test < NUM_TESTS; ++test) {
|
for (int test = 0; test < NUM_TESTS; ++test) {
|
||||||
unsigned size = NUM_CHUNKS * 64 * 64;
|
unsigned size = NUM_CHUNKS * 64 * 64;
|
||||||
|
|
||||||
actual_bufs[test] = malloc(size * sizeof(pixel_t) + SIMD_ALIGNMENT);
|
actual_bufs[test] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT);
|
||||||
bufs[test] = ALIGNED_POINTER(actual_bufs[test], SIMD_ALIGNMENT);
|
bufs[test] = ALIGNED_POINTER(actual_bufs[test], SIMD_ALIGNMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,9 +112,9 @@ TEST test_intra_speed(const int width)
|
||||||
uint64_t sum = 0;
|
uint64_t sum = 0;
|
||||||
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
||||||
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
||||||
pixel_t * buf1 = &bufs[test][offset];
|
kvz_pixel * buf1 = &bufs[test][offset];
|
||||||
for (int chunk = 1; chunk < NUM_CHUNKS; ++chunk) {
|
for (int chunk = 1; chunk < NUM_CHUNKS; ++chunk) {
|
||||||
pixel_t * buf2 = &bufs[test][chunk * size + offset];
|
kvz_pixel * buf2 = &bufs[test][chunk * size + offset];
|
||||||
|
|
||||||
cost_pixel_nxn_func *tested_func = test_env.tested_func;
|
cost_pixel_nxn_func *tested_func = test_env.tested_func;
|
||||||
sum += tested_func(buf1, buf2);
|
sum += tested_func(buf1, buf2);
|
||||||
|
@ -151,9 +151,9 @@ TEST test_inter_speed(const int width)
|
||||||
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
||||||
// Treat 4 consecutive chunks as one chunk with double width and height,
|
// Treat 4 consecutive chunks as one chunk with double width and height,
|
||||||
// and do a 8x8 grid search against the first chunk to simulate real usage.
|
// and do a 8x8 grid search against the first chunk to simulate real usage.
|
||||||
pixel_t * buf1 = &bufs[test][offset];
|
kvz_pixel * buf1 = &bufs[test][offset];
|
||||||
for (int chunk = 0; chunk < NUM_CHUNKS; chunk += 4) {
|
for (int chunk = 0; chunk < NUM_CHUNKS; chunk += 4) {
|
||||||
pixel_t * buf2 = &bufs[test][chunk * size + offset];
|
kvz_pixel * buf2 = &bufs[test][chunk * size + offset];
|
||||||
for (int y = 0; y < 8; ++y) {
|
for (int y = 0; y < 8; ++y) {
|
||||||
for (int x = 0; x < 8; ++x) {
|
for (int x = 0; x < 8; ++x) {
|
||||||
const int stride1 = 2 * 64;
|
const int stride1 = 2 * 64;
|
||||||
|
@ -203,8 +203,8 @@ TEST dct_speed(const int width)
|
||||||
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) {
|
||||||
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
// Compare the first chunk against the 35 other chunks to simulate real usage.
|
||||||
for (int chunk = 0; chunk < NUM_CHUNKS; ++chunk) {
|
for (int chunk = 0; chunk < NUM_CHUNKS; ++chunk) {
|
||||||
pixel_t * buf1 = &bufs[test][offset];
|
kvz_pixel * buf1 = &bufs[test][offset];
|
||||||
pixel_t * buf2 = &bufs[test][chunk * size + offset];
|
kvz_pixel * buf2 = &bufs[test][chunk * size + offset];
|
||||||
for (int p = 0; p < size; ++p) {
|
for (int p = 0; p < size; ++p) {
|
||||||
tmp_residual[p] = (int16_t)(buf1[p] - buf2[p]);
|
tmp_residual[p] = (int16_t)(buf1[p] - buf2[p]);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue