diff --git a/src/cu.h b/src/cu.h index c86c8fb8..eed38d1d 100644 --- a/src/cu.h +++ b/src/cu.h @@ -123,9 +123,9 @@ int cu_array_free(cu_array_t *cua); * - First pixel is the top-left pixel. */ typedef struct { - pixel_t y[LCU_REF_PX_WIDTH + 1]; - pixel_t u[LCU_REF_PX_WIDTH / 2 + 1]; - pixel_t v[LCU_REF_PX_WIDTH / 2 + 1]; + kvz_pixel y[LCU_REF_PX_WIDTH + 1]; + kvz_pixel u[LCU_REF_PX_WIDTH / 2 + 1]; + kvz_pixel v[LCU_REF_PX_WIDTH / 2 + 1]; } lcu_ref_px_t; typedef struct { diff --git a/src/encoderstate.c b/src/encoderstate.c index cfc20964..b66f319e 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -441,9 +441,9 @@ static void encoder_state_worker_sao_reconstruct_lcu(void *opaque) { int x; //TODO: copy only needed data - pixel_t *new_y_data = MALLOC(pixel_t, frame->width * frame->height); - pixel_t *new_u_data = MALLOC(pixel_t, (frame->width * frame->height) >> 2); - pixel_t *new_v_data = MALLOC(pixel_t, (frame->width * frame->height) >> 2); + kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->width * frame->height); + kvz_pixel *new_u_data = MALLOC(kvz_pixel, (frame->width * frame->height) >> 2); + kvz_pixel *new_v_data = MALLOC(kvz_pixel, (frame->width * frame->height) >> 2); const int offset = frame->width * (data->y*LCU_WIDTH); const int offset_c = frame->width/2 * (data->y*LCU_WIDTH_C); @@ -453,15 +453,15 @@ static void encoder_state_worker_sao_reconstruct_lcu(void *opaque) { num_pixels = frame->width * frame->height - offset; } - memcpy(&new_y_data[offset], &frame->rec->y[offset], sizeof(pixel_t) * num_pixels); - memcpy(&new_u_data[offset_c], &frame->rec->u[offset_c], sizeof(pixel_t) * num_pixels >> 2); - memcpy(&new_v_data[offset_c], &frame->rec->v[offset_c], sizeof(pixel_t) * num_pixels >> 2); + memcpy(&new_y_data[offset], &frame->rec->y[offset], sizeof(kvz_pixel) * num_pixels); + memcpy(&new_u_data[offset_c], &frame->rec->u[offset_c], sizeof(kvz_pixel) * num_pixels >> 2); + memcpy(&new_v_data[offset_c], &frame->rec->v[offset_c], sizeof(kvz_pixel) * num_pixels >> 2); if (data->y>0) { //copy first row from buffer - memcpy(&new_y_data[frame->width * (data->y*LCU_WIDTH-1)], &data->encoder_state->tile->hor_buf_before_sao->y[frame->width * (data->y-1)], frame->width * sizeof(pixel_t)); - memcpy(&new_u_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->u[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(pixel_t)); - memcpy(&new_v_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->v[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(pixel_t)); + memcpy(&new_y_data[frame->width * (data->y*LCU_WIDTH-1)], &data->encoder_state->tile->hor_buf_before_sao->y[frame->width * (data->y-1)], frame->width * sizeof(kvz_pixel)); + memcpy(&new_u_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->u[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(kvz_pixel)); + memcpy(&new_v_data[frame->width/2 * (data->y*LCU_WIDTH_C-1)], &data->encoder_state->tile->hor_buf_before_sao->v[frame->width/2 * (data->y-1)], frame->width/2 * sizeof(kvz_pixel)); } for (x = 0; x < frame->width_in_lcu; x++) { diff --git a/src/filter.c b/src/filter.c index e91a42fa..ab907eb7 100644 --- a/src/filter.c +++ b/src/filter.c @@ -83,7 +83,7 @@ const int8_t g_chroma_filter[8][4] = /** * \brief */ -INLINE void filter_deblock_luma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, +INLINE void filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc, int8_t sw, int8_t part_P_nofilter, int8_t part_Q_nofilter, int32_t thr_cut, @@ -143,7 +143,7 @@ INLINE void filter_deblock_luma(const encoder_control_t * const encoder, pixel_t /** * \brief */ -INLINE void filter_deblock_chroma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, int32_t tc, +INLINE void filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc, int8_t part_P_nofilter, int8_t part_Q_nofilter) { int32_t delta; @@ -188,8 +188,8 @@ void filter_deblock_edge_luma(encoder_state_t * const state, int32_t beta_offset_div2 = encoder->beta_offset_div2; int32_t tc_offset_div2 = encoder->tc_offset_div2; // TODO: support 10+bits - pixel_t *orig_src = &frame->rec->y[xpos + ypos*stride]; - pixel_t *src = orig_src; + kvz_pixel *orig_src = &frame->rec->y[xpos + ypos*stride]; + kvz_pixel *src = orig_src; int32_t step = 1; cu_info_t *cu_p = NULL; int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE; @@ -381,8 +381,8 @@ void filter_deblock_edge_chroma(encoder_state_t * const state, int32_t stride = frame->rec->stride >> 1; int32_t tc_offset_div2 = encoder->tc_offset_div2; // TODO: support 10+bits - pixel_t *src_u = &frame->rec->u[x + y*stride]; - pixel_t *src_v = &frame->rec->v[x + y*stride]; + kvz_pixel *src_u = &frame->rec->u[x + y*stride]; + kvz_pixel *src_v = &frame->rec->v[x + y*stride]; // Init offset and step to EDGE_HOR int32_t offset = stride; int32_t step = 1; diff --git a/src/filter.h b/src/filter.h index 043faa93..e3e70d96 100644 --- a/src/filter.h +++ b/src/filter.h @@ -43,11 +43,11 @@ void filter_deblock_edge_chroma(encoder_state_t *state, int32_t xpos, int32_t ypos, int8_t depth, int8_t dir); void filter_deblock_lcu(encoder_state_t *state, int x_px, int y_px); -void filter_deblock_luma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, int32_t tc , int8_t sw, +void filter_deblock_luma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc , int8_t sw, int8_t part_p_nofilter, int8_t part_q_nofilter, int32_t thr_cut, int8_t filter_second_p, int8_t filter_second_q); -void filter_deblock_chroma(const encoder_control_t * const encoder, pixel_t *src, int32_t offset, int32_t tc, +void filter_deblock_chroma(const encoder_control_t * const encoder, kvz_pixel *src, int32_t offset, int32_t tc, int8_t part_p_nofilter, int8_t part_q_nofilter); // SAO diff --git a/src/image.c b/src/image.c index 2610a8ba..8a00323c 100644 --- a/src/image.c +++ b/src/image.c @@ -52,7 +52,7 @@ kvz_picture *image_alloc(const int32_t width, const int32_t height) unsigned int chroma_size = luma_size / 4; //Allocate memory - im->fulldata = MALLOC(pixel_t, (luma_size + 2 * chroma_size)); + im->fulldata = MALLOC(kvz_pixel, (luma_size + 2 * chroma_size)); if (!im->fulldata) { free(im); return NULL; @@ -156,9 +156,9 @@ yuv_t * yuv_t_alloc(int luma_size) // Get buffers with separate mallocs in order to take advantage of // automatic buffer overrun checks. yuv_t *yuv = (yuv_t *)malloc(sizeof(*yuv)); - yuv->y = (pixel_t *)malloc(luma_size * sizeof(*yuv->y)); - yuv->u = (pixel_t *)malloc(luma_size / 2 * sizeof(*yuv->u)); - yuv->v = (pixel_t *)malloc(luma_size / 2 * sizeof(*yuv->v)); + yuv->y = (kvz_pixel *)malloc(luma_size * sizeof(*yuv->y)); + yuv->u = (kvz_pixel *)malloc(luma_size / 2 * sizeof(*yuv->u)); + yuv->v = (kvz_pixel *)malloc(luma_size / 2 * sizeof(*yuv->v)); yuv->size = luma_size; return yuv; @@ -183,10 +183,10 @@ void yuv_t_free(yuv_t * yuv) * * \returns Sum of Absolute Differences */ -static unsigned cor_sad(const pixel_t *pic_data, const pixel_t *ref_data, +static unsigned cor_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data, int block_width, int block_height, unsigned pic_stride) { - pixel_t ref = *ref_data; + kvz_pixel ref = *ref_data; int x, y; unsigned sad = 0; @@ -210,7 +210,7 @@ static unsigned cor_sad(const pixel_t *pic_data, const pixel_t *ref_data, * * \returns Sum of Absolute Differences */ -static unsigned ver_sad(const pixel_t *pic_data, const pixel_t *ref_data, +static unsigned ver_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data, int block_width, int block_height, unsigned pic_stride) { int x, y; @@ -236,7 +236,7 @@ static unsigned ver_sad(const pixel_t *pic_data, const pixel_t *ref_data, * * \returns Sum of Absolute Differences */ -static unsigned hor_sad(const pixel_t *pic_data, const pixel_t *ref_data, +static unsigned hor_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data, int block_width, int block_height, unsigned pic_stride, unsigned ref_stride) { int x, y; @@ -269,7 +269,7 @@ static unsigned image_interpolated_sad(const kvz_picture *pic, const kvz_picture int pic_x, int pic_y, int ref_x, int ref_y, int block_width, int block_height) { - pixel_t *pic_data, *ref_data; + kvz_pixel *pic_data, *ref_data; int left, right, top, bottom; int result = 0; @@ -424,8 +424,8 @@ unsigned image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_ { // Reference block is completely inside the frame, so just calculate the // SAD directly. This is the most common case, which is why it's first. - const pixel_t *pic_data = &pic->y[pic_y * pic->stride + pic_x]; - const pixel_t *ref_data = &ref->y[ref_y * ref->stride + ref_x]; + const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; + const kvz_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x]; return reg_sad(pic_data, ref_data, block_width, block_height, pic->stride, ref->stride); } else { // Call a routine that knows how to interpolate pixels outside the frame. @@ -434,7 +434,7 @@ unsigned image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_ } -unsigned pixels_calc_ssd(const pixel_t *const ref, const pixel_t *const rec, +unsigned pixels_calc_ssd(const kvz_pixel *const ref, const kvz_pixel *const rec, const int ref_stride, const int rec_stride, const int width) { @@ -467,7 +467,7 @@ unsigned pixels_calc_ssd(const pixel_t *const ref, const pixel_t *const rec, * This should be inlined, but it's defined here for now to see if Visual * Studios LTCG will inline it. */ -void pixels_blit(const pixel_t * const orig, pixel_t * const dst, +void pixels_blit(const kvz_pixel * const orig, kvz_pixel * const dst, const unsigned width, const unsigned height, const unsigned orig_stride, const unsigned dst_stride) { @@ -496,7 +496,7 @@ void pixels_blit(const pixel_t * const orig, pixel_t * const dst, assert(orig != dst || orig_stride == dst_stride); for (y = 0; y < height; ++y) { - memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(pixel_t)); + memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(kvz_pixel)); } } diff --git a/src/image.h b/src/image.h index d58a8b1d..a7946e69 100644 --- a/src/image.h +++ b/src/image.h @@ -29,16 +29,16 @@ #include "kvazaar.h" typedef struct { - pixel_t y[LCU_LUMA_SIZE]; - pixel_t u[LCU_CHROMA_SIZE]; - pixel_t v[LCU_CHROMA_SIZE]; + kvz_pixel y[LCU_LUMA_SIZE]; + kvz_pixel u[LCU_CHROMA_SIZE]; + kvz_pixel v[LCU_CHROMA_SIZE]; } lcu_yuv_t; typedef struct { int size; - pixel_t *y; - pixel_t *u; - pixel_t *v; + kvz_pixel *y; + kvz_pixel *u; + kvz_pixel *v; } yuv_t; @@ -62,12 +62,12 @@ unsigned image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_ int block_width, int block_height, int max_lcu_below); -unsigned pixels_calc_ssd(const pixel_t *const ref, const pixel_t *const rec, +unsigned pixels_calc_ssd(const kvz_pixel *const ref, const kvz_pixel *const rec, const int ref_stride, const int rec_stride, const int width); -void pixels_blit(const pixel_t* orig, pixel_t *dst, +void pixels_blit(const kvz_pixel* orig, kvz_pixel *dst, unsigned width, unsigned height, unsigned orig_stride, unsigned dst_stride); diff --git a/src/inter.c b/src/inter.c index 14742f10..c66fdbc8 100644 --- a/src/inter.c +++ b/src/inter.c @@ -91,12 +91,12 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co // Chroma half-pel #define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8) int8_t chroma_halfpel = ((mv[0]>>2)&1) || ((mv[1]>>2)&1); //!< (luma integer mv) lsb is set -> chroma is half-pel - pixel_t halfpel_src_u[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< U source block for interpolation - pixel_t halfpel_src_v[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< V source block for interpolation - pixel_t *halfpel_src_off_u = &halfpel_src_u[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_u with offset (4,4) - pixel_t *halfpel_src_off_v = &halfpel_src_v[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_v with offset (4,4) - pixel_t halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u) - pixel_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v) + kvz_pixel halfpel_src_u[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< U source block for interpolation + kvz_pixel halfpel_src_v[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< V source block for interpolation + kvz_pixel *halfpel_src_off_u = &halfpel_src_u[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_u with offset (4,4) + kvz_pixel *halfpel_src_off_v = &halfpel_src_v[HALFPEL_CHROMA_WIDTH * 4 + 4]; //!< halfpel_src_v with offset (4,4) + kvz_pixel halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u) + kvz_pixel halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v) // Luma quarter-pel int8_t fractional_mv = (mv[0]&1) || (mv[1]&1) || (mv[0]&2) || (mv[1]&2); // either of 2 lowest bits of mv set -> mv is fractional @@ -114,19 +114,19 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co #define FILTER_SIZE_C 4 //Chroma filter size // Fractional luma 1/4-pel - pixel_t qpel_src_y[(LCU_WIDTH+FILTER_SIZE_Y) * (LCU_WIDTH+FILTER_SIZE_Y)]; - pixel_t* qpel_src_off_y = &qpel_src_y[(width+FILTER_SIZE_Y)*(FILTER_SIZE_Y>>1)+(FILTER_SIZE_Y>>1)]; - pixel_t qpel_dst_y[LCU_WIDTH*LCU_WIDTH*16]; + kvz_pixel qpel_src_y[(LCU_WIDTH+FILTER_SIZE_Y) * (LCU_WIDTH+FILTER_SIZE_Y)]; + kvz_pixel* qpel_src_off_y = &qpel_src_y[(width+FILTER_SIZE_Y)*(FILTER_SIZE_Y>>1)+(FILTER_SIZE_Y>>1)]; + kvz_pixel qpel_dst_y[LCU_WIDTH*LCU_WIDTH*16]; // Fractional chroma 1/8-pel int width_c = width>>1; - pixel_t octpel_src_u[((LCU_WIDTH>>1)+FILTER_SIZE_C) * ((LCU_WIDTH>>1)+FILTER_SIZE_C)]; - pixel_t* octpel_src_off_u = &octpel_src_u[(width_c+FILTER_SIZE_C)*(FILTER_SIZE_C>>1)+(FILTER_SIZE_C>>1)]; - pixel_t octpel_dst_u[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64]; + kvz_pixel octpel_src_u[((LCU_WIDTH>>1)+FILTER_SIZE_C) * ((LCU_WIDTH>>1)+FILTER_SIZE_C)]; + kvz_pixel* octpel_src_off_u = &octpel_src_u[(width_c+FILTER_SIZE_C)*(FILTER_SIZE_C>>1)+(FILTER_SIZE_C>>1)]; + kvz_pixel octpel_dst_u[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64]; - pixel_t octpel_src_v[((LCU_WIDTH >> 1) + FILTER_SIZE_C) * ((LCU_WIDTH >> 1) + FILTER_SIZE_C)]; - pixel_t* octpel_src_off_v = &octpel_src_v[(width_c + FILTER_SIZE_C)*(FILTER_SIZE_C >> 1) + (FILTER_SIZE_C >> 1)]; - pixel_t octpel_dst_v[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64]; + kvz_pixel octpel_src_v[((LCU_WIDTH >> 1) + FILTER_SIZE_C) * ((LCU_WIDTH >> 1) + FILTER_SIZE_C)]; + kvz_pixel* octpel_src_off_v = &octpel_src_v[(width_c + FILTER_SIZE_C)*(FILTER_SIZE_C >> 1) + (FILTER_SIZE_C >> 1)]; + kvz_pixel octpel_dst_v[(LCU_WIDTH >> 1)*(LCU_WIDTH >> 1) * 64]; // Fractional luma extend_borders(xpos, ypos, mv[0]>>2, mv[1]>>2, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH, @@ -156,7 +156,7 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co for(x = 0; x < width; ++x) { int x_in_lcu = ((x+xpos) & ((LCU_WIDTH)-1)); int qpel_x = x*4+y_off_x; - lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (pixel_t)qpel_dst_y[qpel_y*(width*4)+qpel_x]; + lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)qpel_dst_y[qpel_y*(width*4)+qpel_x]; } } //Sample fractional pixels for chroma @@ -166,8 +166,8 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co for(x = 0; x < width_c; ++x) { int x_in_lcu = ((x+(xpos>>1)) & ((LCU_WIDTH>>1)-1)); int qpel_x = x*8+c_off_x; - lcu->rec.u[y_in_lcu * dst_width_c + x_in_lcu] = (pixel_t)octpel_dst_u[qpel_y*(width_c*8)+qpel_x]; - lcu->rec.v[y_in_lcu * dst_width_c + x_in_lcu] = (pixel_t)octpel_dst_v[qpel_y*(width_c*8)+qpel_x]; + lcu->rec.u[y_in_lcu * dst_width_c + x_in_lcu] = (kvz_pixel)octpel_dst_u[qpel_y*(width_c*8)+qpel_x]; + lcu->rec.v[y_in_lcu * dst_width_c + x_in_lcu] = (kvz_pixel)octpel_dst_v[qpel_y*(width_c*8)+qpel_x]; } } } @@ -219,8 +219,8 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) { int x_in_lcu = (x & ((LCU_WIDTH>>1)-1)); int y_in_lcu = (y & ((LCU_WIDTH>>1)-1)); - lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (pixel_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x]; - lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (pixel_t)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x]; + lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = (kvz_pixel)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x]; + lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = (kvz_pixel)halfpel_v[halfpel_y*LCU_WIDTH + halfpel_x]; } } } @@ -339,17 +339,17 @@ void inter_recon_lcu(const encoder_state_t * const state, const kvz_picture * co */ void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_picture * ref1, const kvz_picture * ref2, int32_t xpos, int32_t ypos, int32_t width, int16_t mv_param[2][2], lcu_t* lcu) { - pixel_t temp_lcu_y[64 * 64]; - pixel_t temp_lcu_u[32 * 32]; - pixel_t temp_lcu_v[32 * 32]; + kvz_pixel temp_lcu_y[64 * 64]; + kvz_pixel temp_lcu_u[32 * 32]; + kvz_pixel temp_lcu_v[32 * 32]; int temp_x, temp_y; // TODO: interpolated values require 14-bit accuracy for bi-prediction, current implementation of ipol filters round the value to 8bits //Reconstruct both predictors inter_recon_lcu(state, ref1, xpos, ypos, width, mv_param[0], lcu); - memcpy(temp_lcu_y, lcu->rec.y, sizeof(pixel_t) * 64 * 64); - memcpy(temp_lcu_u, lcu->rec.u, sizeof(pixel_t) * 32 * 32); - memcpy(temp_lcu_v, lcu->rec.v, sizeof(pixel_t) * 32 * 32); + memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64); + memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32); + memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32); inter_recon_lcu(state, ref2, xpos, ypos, width, mv_param[1], lcu); // After reconstruction, merge the predictors by taking an average of each pixel @@ -357,7 +357,7 @@ void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_pictu int y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1)); for (temp_x = 0; temp_x < width; ++temp_x) { int x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1)); - lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (pixel_t)(((int)lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] + + lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)(((int)lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] + (int)temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] + 1) >> 1); } } @@ -365,10 +365,10 @@ void inter_recon_lcu_bipred(const encoder_state_t * const state, const kvz_pictu int y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1)); for (temp_x = 0; temp_x < width>>1; ++temp_x) { int x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1)); - lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (pixel_t)(((int)lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + + lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)(((int)lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + (int)temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + 1) >> 1); - lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (pixel_t)(((int)lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + + lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)(((int)lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + (int)temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] + 1) >> 1); } } diff --git a/src/intra.c b/src/intra.c index 4d16357c..ec08d824 100644 --- a/src/intra.c +++ b/src/intra.c @@ -87,7 +87,7 @@ void intra_set_block_mode(videoframe_t *frame,uint32_t x_cu, uint32_t y_cu, uint * \param width block width * \returns DC prediction */ -pixel_t intra_get_dc_pred(const pixel_t *pic, uint16_t picwidth, uint8_t width) +kvz_pixel intra_get_dc_pred(const kvz_pixel *pic, uint16_t picwidth, uint8_t width) { int32_t i, sum = 0; @@ -100,7 +100,7 @@ pixel_t intra_get_dc_pred(const pixel_t *pic, uint16_t picwidth, uint8_t width) } // return the average - return (pixel_t)((sum + width) / (width + width)); + return (kvz_pixel)((sum + width) / (width + width)); } /** @@ -175,11 +175,11 @@ int8_t intra_get_dir_luma_predictor(const uint32_t x, const uint32_t y, int8_t* * \param preds output buffer for 3 predictions * \returns (predictions are found)?1:0 */ -void intra_filter(pixel_t *ref, int32_t stride,int32_t width, int8_t mode) +void intra_filter(kvz_pixel *ref, int32_t stride,int32_t width, int8_t mode) { #define FWIDTH (LCU_WIDTH*2+1) - pixel_t filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples - pixel_t *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1) + kvz_pixel filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples + kvz_pixel *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1) int x,y; if (!mode) { @@ -222,9 +222,9 @@ void intra_filter(pixel_t *ref, int32_t stride,int32_t width, int8_t mode) * \param recstride Stride for rec pixel arrays. * \param dst */ -void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec, const pixel_t *rec_filtered, int recstride, pixel_t *dst, int width, int mode, int is_chroma) +void intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma) { - const pixel_t *ref_pixels = rec; + const kvz_pixel *ref_pixels = rec; if (is_chroma || mode == 1 || width == 4) { // For chroma, DC and 4x4 blocks, always use unfiltered reference. } else if (mode == 0) { @@ -244,7 +244,7 @@ void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec, intra_get_planar_pred(ref_pixels, recstride, width, dst, width); } else if (mode == 1) { int i; - pixel_t val = intra_get_dc_pred(ref_pixels, recstride, width); + kvz_pixel val = intra_get_dc_pred(ref_pixels, recstride, width); for (i = 0; i < width * width; i++) { dst[i] = val; } @@ -271,11 +271,11 @@ void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec, * \param chroma chroma-block flag */ -void intra_recon(const encoder_control_t * const encoder, pixel_t* rec, int32_t recstride, uint32_t width, pixel_t* dst, int32_t dststride, int8_t mode, int8_t chroma) +void intra_recon(const encoder_control_t * const encoder, kvz_pixel* rec, int32_t recstride, uint32_t width, kvz_pixel* dst, int32_t dststride, int8_t mode, int8_t chroma) { - pixel_t pred[LCU_WIDTH * LCU_WIDTH]; - pixel_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - pixel_t *recf = &rec_filtered_temp[recstride + 1]; + kvz_pixel pred[LCU_WIDTH * LCU_WIDTH]; + kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; + kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; // Generate filtered reference pixels. { @@ -304,7 +304,7 @@ void intra_recon(const encoder_control_t * const encoder, pixel_t* rec, int32_t * edge pixels filled with the reconstructed pixels. */ void intra_build_reference_border(const encoder_control_t * const encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, - pixel_t *dst, int32_t dst_stride, int8_t chroma, + kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu) { @@ -362,7 +362,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32 { 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } }; - const pixel_t dc_val = 1 << (encoder->bitdepth - 1); + const kvz_pixel dc_val = 1 << (encoder->bitdepth - 1); const int is_chroma = chroma ? 1 : 0; // input picture pointer @@ -377,12 +377,12 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32 int x_local = (x_luma&0x3f)>>is_chroma, y_local = (y_luma&0x3f)>>is_chroma; - pixel_t *left_ref = !chroma ? &lcu->left_ref.y[1] : (chroma == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1]; - pixel_t *top_ref = !chroma ? &lcu->top_ref.y[1] : (chroma == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1]; - pixel_t *rec_ref = !chroma ? lcu->rec.y : (chroma == 1) ? lcu->rec.u : lcu->rec.v; + kvz_pixel *left_ref = !chroma ? &lcu->left_ref.y[1] : (chroma == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1]; + kvz_pixel *top_ref = !chroma ? &lcu->top_ref.y[1] : (chroma == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1]; + kvz_pixel *rec_ref = !chroma ? lcu->rec.y : (chroma == 1) ? lcu->rec.u : lcu->rec.v; - pixel_t *left_border = &left_ref[y_local]; - pixel_t *top_border = &top_ref[x_local]; + kvz_pixel *left_border = &left_ref[y_local]; + kvz_pixel *top_border = &top_ref[x_local]; uint32_t left_stride = 1; if(x_local) { @@ -399,7 +399,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32 // Get the number of reference pixels based on the PU coordinate within the LCU. int num_ref_pixels = num_ref_pixels_left[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma; int i; - pixel_t nearest_pixel; + kvz_pixel nearest_pixel; // Max pixel we can copy from src is yy + outwidth - 1 because the dst // extends one pixel to the left. @@ -421,7 +421,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32 } } else { // If we are on the left edge, extend the first pixel of the top row. - pixel_t nearest_pixel = y > 0 ? top_border[0] : dc_val; + kvz_pixel nearest_pixel = y > 0 ? top_border[0] : dc_val; int i; for (i = 1; i < out_width - 1; i++) { dst[i * dst_stride] = nearest_pixel; @@ -433,7 +433,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32 // Get the number of reference pixels based on the PU coordinate within the LCU. int num_ref_pixels = num_ref_pixels_top[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma; int i; - pixel_t nearest_pixel; + kvz_pixel nearest_pixel; // Max pixel we can copy from src is yy + outwidth - 1 because the dst // extends one pixel to the left. @@ -452,7 +452,7 @@ void intra_build_reference_border(const encoder_control_t * const encoder, int32 } } else { // Extend nearest pixel. - pixel_t nearest_pixel = x > 0 ? left_border[0] : dc_val; + kvz_pixel nearest_pixel = x > 0 ? left_border[0] : dc_val; int i; for(i = 1; i < out_width; i++) { @@ -484,7 +484,7 @@ const int32_t inv_ang_table[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; * \brief this functions constructs the angular intra prediction from border samples * */ -void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel_t* src, int32_t src_stride, pixel_t* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter) +void intra_get_angular_pred(const encoder_control_t * const encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter) { int32_t k,l; int32_t blk_size = width; @@ -499,10 +499,10 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel int32_t inv_angle = inv_ang_table[abs_ang]; // Do angular predictions - pixel_t *ref_main; - pixel_t *ref_side; - pixel_t ref_above[2 * LCU_WIDTH + 1]; - pixel_t ref_left[2 * LCU_WIDTH + 1]; + kvz_pixel *ref_main; + kvz_pixel *ref_side; + kvz_pixel ref_above[2 * LCU_WIDTH + 1]; + kvz_pixel ref_left[2 * LCU_WIDTH + 1]; // Tell clang-analyzer that everything is ok. assert(width == 4 || width == 8 || width == 16 || width == 32); @@ -564,7 +564,7 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel // Do linear filtering for (l = 0; l < blk_size; l++) { ref_main_index = l + delta_int + 1; - dst[k * dst_stride + l] = (pixel_t) ( (minus_delta_fract * ref_main[ref_main_index] + dst[k * dst_stride + l] = (kvz_pixel) ( (minus_delta_fract * ref_main[ref_main_index] + delta_fract * ref_main[ref_main_index + 1] + 16) >> 5); } } else { @@ -578,7 +578,7 @@ void intra_get_angular_pred(const encoder_control_t * const encoder, const pixel // Flip the block if this is the horizontal mode if (!mode_ver) { - pixel_t tmp; + kvz_pixel tmp; for (k=0;k> shift_2d); + dst[k * dststride + l] = (kvz_pixel)((hor_pred + top_row[l]) >> shift_2d); } } } @@ -689,10 +689,10 @@ void intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth const uint32_t pic_height = state->tile->frame->height; // Pointers to reconstruction arrays - pixel_t *recbase_y = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; + kvz_pixel *recbase_y = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; - pixel_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - pixel_t *rec_shift = &rec[width * 2 + 8 + 1]; + kvz_pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; + kvz_pixel *rec_shift = &rec[width * 2 + 8 + 1]; int32_t rec_stride = LCU_WIDTH; @@ -744,16 +744,16 @@ void intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int dep const uint32_t pic_height = state->tile->frame->height; // Pointers to reconstruction arrays - pixel_t *recbase_u = &lcu->rec.u[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4]; - pixel_t *recbase_v = &lcu->rec.v[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4]; + kvz_pixel *recbase_u = &lcu->rec.u[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4]; + kvz_pixel *recbase_v = &lcu->rec.v[lcu_px.x/2 + (lcu_px.y * LCU_WIDTH)/4]; - pixel_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; + kvz_pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; int32_t rec_stride = LCU_WIDTH; // Reconstruct chroma. if (!(x & 4 || y & 4)) { - pixel_t *rec_shift_c = &rec[width_c * 2 + 8 + 1]; + kvz_pixel *rec_shift_c = &rec[width_c * 2 + 8 + 1]; intra_build_reference_border(encoder, x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1, pic_width/2, pic_height/2, lcu); intra_recon(encoder, diff --git a/src/intra.h b/src/intra.h index ee8523c2..757930fc 100644 --- a/src/intra.h +++ b/src/intra.h @@ -35,19 +35,19 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds, const cu_info_t* cur_cu, const cu_info_t* left_cu, const cu_info_t* above_cu); -void intra_dc_pred_filtering(const pixel_t* src, int32_t src_stride, pixel_t* dst, int32_t dst_stride, int32_t width, int32_t height ); +void intra_dc_pred_filtering(const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t height ); -void intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, pixel_t *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu); -void intra_filter(pixel_t* ref, int32_t stride, int32_t width, int8_t mode); +void intra_build_reference_border(const encoder_control_t *encoder, int32_t x_luma, int32_t y_luma, int16_t out_width, kvz_pixel *dst, int32_t dst_stride, int8_t chroma, int32_t pic_width, int32_t pic_height, lcu_t *lcu); +void intra_filter(kvz_pixel* ref, int32_t stride, int32_t width, int8_t mode); /* Predictions */ -void intra_get_pred(const encoder_control_t * const encoder, const pixel_t *rec, const pixel_t *rec_filtered, int recstride, pixel_t *dst, int width, int mode, int is_chroma); +void intra_get_pred(const encoder_control_t * const encoder, const kvz_pixel *rec, const kvz_pixel *rec_filtered, int recstride, kvz_pixel *dst, int width, int mode, int is_chroma); -pixel_t intra_get_dc_pred(const pixel_t* pic, uint16_t pic_width, uint8_t width); -void intra_get_planar_pred(const pixel_t* src,int32_t srcstride, uint32_t width, pixel_t* dst, int32_t dststride); -void intra_get_angular_pred(const encoder_control_t *encoder, const pixel_t* src, int32_t src_stride, pixel_t* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter); +kvz_pixel intra_get_dc_pred(const kvz_pixel* pic, uint16_t pic_width, uint8_t width); +void intra_get_planar_pred(const kvz_pixel* src,int32_t srcstride, uint32_t width, kvz_pixel* dst, int32_t dststride); +void intra_get_angular_pred(const encoder_control_t *encoder, const kvz_pixel* src, int32_t src_stride, kvz_pixel* dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter); -void intra_recon(const encoder_control_t *encoder, pixel_t* rec, int32_t rec_stride, uint32_t width, pixel_t* dst, int32_t dst_stride, int8_t mode, int8_t chroma); +void intra_recon(const encoder_control_t *encoder, kvz_pixel* rec, int32_t rec_stride, uint32_t width, kvz_pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); void intra_recon_lcu_luma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); void intra_recon_lcu_chroma(encoder_state_t *state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); diff --git a/src/kvazaar.h b/src/kvazaar.h index 81c04282..0d1ad570 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -41,9 +41,9 @@ extern "C" { #define KVZ_BIT_DEPTH 8 #if KVZ_BIT_DEPTH == 8 -typedef uint8_t pixel_t; +typedef uint8_t kvz_pixel; #else -typedef uint16_t pixel_t; +typedef uint16_t kvz_pixel; #endif /** @@ -140,12 +140,12 @@ typedef struct bitstream_chunk_t kvz_payload; * \brief Struct which contains all picture data */ typedef struct kvz_picture { - pixel_t *fulldata; //!< \brief Allocated buffer (only used in the base_image) + kvz_pixel *fulldata; //!< \brief Allocated buffer (only used in the base_image) - pixel_t *y; //!< \brief Pointer to luma pixel array. - pixel_t *u; //!< \brief Pointer to chroma U pixel array. - pixel_t *v; //!< \brief Pointer to chroma V pixel array. - pixel_t *data[3]; //!< \brief Alternate access method to same data. + kvz_pixel *y; //!< \brief Pointer to luma pixel array. + kvz_pixel *u; //!< \brief Pointer to chroma U pixel array. + kvz_pixel *v; //!< \brief Pointer to chroma V pixel array. + kvz_pixel *data[3]; //!< \brief Alternate access method to same data. int32_t width; //!< \brief Luma pixel array width. int32_t height; //!< \brief Luma pixel array height. diff --git a/src/rdo.c b/src/rdo.c index 55787910..1796f6f1 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -170,7 +170,7 @@ int intra_rdo_cost_compare(uint32_t *rdo_costs,int8_t rdo_modes_to_check, uint32 ** Only for luma */ -uint32_t rdo_cost_intra(encoder_state_t * const state, pixel_t *pred, pixel_t *orig_block, int width, int8_t mode, int tr_depth) +uint32_t rdo_cost_intra(encoder_state_t * const state, kvz_pixel *pred, kvz_pixel *orig_block, int width, int8_t mode, int tr_depth) { const encoder_control_t * const encoder = state->encoder_control; coeff_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; diff --git a/src/rdo.h b/src/rdo.h index 3a9f1aa8..17c574b0 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -39,7 +39,7 @@ int intra_rdo_cost_compare(uint32_t *rdo_costs,int8_t rdo_modes_to_check, uint32 void rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width, int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth); -uint32_t rdo_cost_intra(encoder_state_t *state, pixel_t* pred, pixel_t* orig_block, int width, int8_t mode, int tr_depth); +uint32_t rdo_cost_intra(encoder_state_t *state, kvz_pixel* pred, kvz_pixel* orig_block, int width, int8_t mode, int tr_depth); int32_t get_coeff_cost(const encoder_state_t *state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode); diff --git a/src/sao.c b/src/sao.c index 0b1e5ad6..92615218 100644 --- a/src/sao.c +++ b/src/sao.c @@ -44,7 +44,7 @@ static const vector2d_t g_sao_edge_offsets[SAO_NUM_EO][2] = { // Mapping of edge_idx values to eo-classes. -static int sao_calc_eo_cat(pixel_t a, pixel_t b, pixel_t c) +static int sao_calc_eo_cat(kvz_pixel a, kvz_pixel b, kvz_pixel c) { // Mapping relationships between a, b and c to eo_idx. static const int sao_eo_idx_to_eo_category[] = { 1, 2, 0, 3, 4 }; @@ -55,7 +55,7 @@ static int sao_calc_eo_cat(pixel_t a, pixel_t b, pixel_t c) } -int sao_band_ddistortion(const encoder_state_t * const state, const pixel_t *orig_data, const pixel_t *rec_data, +int sao_band_ddistortion(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, int block_width, int block_height, int band_pos, int sao_bands[4]) { @@ -82,7 +82,7 @@ int sao_band_ddistortion(const encoder_state_t * const state, const pixel_t *ori } -int sao_edge_ddistortion(const pixel_t *orig_data, const pixel_t *rec_data, +int sao_edge_ddistortion(const kvz_pixel *orig_data, const kvz_pixel *rec_data, int block_width, int block_height, int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) { @@ -93,10 +93,10 @@ int sao_edge_ddistortion(const pixel_t *orig_data, const pixel_t *rec_data, for (y = 1; y < block_height - 1; ++y) { for (x = 1; x < block_width - 1; ++x) { - const pixel_t *c_data = &rec_data[y * block_width + x]; - pixel_t a = c_data[a_ofs.y * block_width + a_ofs.x]; - pixel_t c = c_data[0]; - pixel_t b = c_data[b_ofs.y * block_width + b_ofs.x]; + const kvz_pixel *c_data = &rec_data[y * block_width + x]; + kvz_pixel a = c_data[a_ofs.y * block_width + a_ofs.x]; + kvz_pixel c = c_data[0]; + kvz_pixel b = c_data[b_ofs.y * block_width + b_ofs.x]; int offset = offsets[sao_calc_eo_cat(a, b, c)]; @@ -326,7 +326,7 @@ static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4], * \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma. * \param sao_bands an array of bands for original and reconstructed block */ -static void calc_sao_bands(const encoder_state_t * const state, const pixel_t *orig_data, const pixel_t *rec_data, +static void calc_sao_bands(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, int block_width, int block_height, int sao_bands[2][32]) { @@ -349,7 +349,7 @@ static void calc_sao_bands(const encoder_state_t * const state, const pixel_t *o * \param dir_offsets * \param is_chroma 0 for luma, 1 for chroma. Indicates */ -static void calc_sao_edge_dir(const pixel_t *orig_data, const pixel_t *rec_data, +static void calc_sao_edge_dir(const kvz_pixel *orig_data, const kvz_pixel *rec_data, int eo_class, int block_width, int block_height, int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { @@ -362,10 +362,10 @@ static void calc_sao_edge_dir(const pixel_t *orig_data, const pixel_t *rec_data, // their neighbours. for (y = 1; y < block_height - 1; ++y) { for (x = 1; x < block_width - 1; ++x) { - const pixel_t *c_data = &rec_data[y * block_width + x]; - pixel_t a = c_data[a_ofs.y * block_width + a_ofs.x]; - pixel_t c = c_data[0]; - pixel_t b = c_data[b_ofs.y * block_width + b_ofs.x]; + const kvz_pixel *c_data = &rec_data[y * block_width + x]; + kvz_pixel a = c_data[a_ofs.y * block_width + a_ofs.x]; + kvz_pixel c = c_data[0]; + kvz_pixel b = c_data[b_ofs.y * block_width + b_ofs.x]; int eo_cat = sao_calc_eo_cat(a, b, c); @@ -376,7 +376,7 @@ static void calc_sao_edge_dir(const pixel_t *orig_data, const pixel_t *rec_data, } static void sao_reconstruct_color(const encoder_control_t * const encoder, - const pixel_t *rec_data, pixel_t *new_rec_data, + const kvz_pixel *rec_data, kvz_pixel *new_rec_data, const sao_info_t *sao, int stride, int new_stride, int block_width, int block_height, @@ -401,15 +401,15 @@ static void sao_reconstruct_color(const encoder_control_t * const encoder, for (x = 0; x < block_width; ++x) { vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; - const pixel_t *c_data = &rec_data[y * stride + x]; - pixel_t *new_data = &new_rec_data[y * new_stride + x]; - pixel_t a = c_data[a_ofs.y * stride + a_ofs.x]; - pixel_t c = c_data[0]; - pixel_t b = c_data[b_ofs.y * stride + b_ofs.x]; + const kvz_pixel *c_data = &rec_data[y * stride + x]; + kvz_pixel *new_data = &new_rec_data[y * new_stride + x]; + kvz_pixel a = c_data[a_ofs.y * stride + a_ofs.x]; + kvz_pixel c = c_data[0]; + kvz_pixel b = c_data[b_ofs.y * stride + b_ofs.x]; int eo_cat = sao_calc_eo_cat(a, b, c); - new_data[0] = (pixel_t)CLIP(0, (1 << KVZ_BIT_DEPTH) - 1, c_data[0] + sao->offsets[eo_cat + offset_v]); + new_data[0] = (kvz_pixel)CLIP(0, (1 << KVZ_BIT_DEPTH) - 1, c_data[0] + sao->offsets[eo_cat + offset_v]); } } } @@ -527,7 +527,7 @@ static void sao_calc_edge_block_dims(const videoframe_t * const frame, color_t c rec->x = (rec->x == 0 ? 0 : -1); } -void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const pixel_t *old_rec, +void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const kvz_pixel *old_rec, unsigned x_ctb, unsigned y_ctb, const sao_info_t *sao, color_t color_i) { @@ -536,12 +536,12 @@ void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * fra const int lcu_stride = LCU_WIDTH >> is_chroma; const int buf_stride = lcu_stride + 2; - pixel_t *recdata = frame->rec->data[color_i]; - pixel_t buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)]; - pixel_t new_rec[LCU_WIDTH * LCU_WIDTH]; + kvz_pixel *recdata = frame->rec->data[color_i]; + kvz_pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)]; + kvz_pixel new_rec[LCU_WIDTH * LCU_WIDTH]; // Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32. - pixel_t *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)]; - const pixel_t *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)]; + kvz_pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)]; + const kvz_pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)]; vector2d_t ofs; vector2d_t tl = { 1, 1 }; @@ -593,7 +593,7 @@ void sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * fra static void sao_search_edge_sao(const encoder_state_t * const state, - const pixel_t * data[], const pixel_t * recdata[], + const kvz_pixel * data[], const kvz_pixel * recdata[], int block_width, int block_height, unsigned buf_cnt, sao_info_t *sao_out, sao_info_t *sao_top, @@ -671,7 +671,7 @@ static void sao_search_edge_sao(const encoder_state_t * const state, } -static void sao_search_band_sao(const encoder_state_t * const state, const pixel_t * data[], const pixel_t * recdata[], +static void sao_search_band_sao(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[], int block_width, int block_height, unsigned buf_cnt, sao_info_t *sao_out, sao_info_t *sao_top, @@ -719,7 +719,7 @@ static void sao_search_band_sao(const encoder_state_t * const state, const pixel * \param buf_cnt Number of pointers data and recdata have. * \param sao_out Output parameter for the best sao parameters. */ -static void sao_search_best_mode(const encoder_state_t * const state, const pixel_t * data[], const pixel_t * recdata[], +static void sao_search_best_mode(const encoder_state_t * const state, const kvz_pixel * data[], const kvz_pixel * recdata[], int block_width, int block_height, unsigned buf_cnt, sao_info_t *sao_out, sao_info_t *sao_top, @@ -831,10 +831,10 @@ void sao_search_chroma(const encoder_state_t * const state, const videoframe_t * { int block_width = (LCU_WIDTH / 2); int block_height = (LCU_WIDTH / 2); - const pixel_t *orig_list[2]; - const pixel_t *rec_list[2]; - pixel_t orig[2][LCU_CHROMA_SIZE]; - pixel_t rec[2][LCU_CHROMA_SIZE]; + const kvz_pixel *orig_list[2]; + const kvz_pixel *rec_list[2]; + kvz_pixel orig[2][LCU_CHROMA_SIZE]; + kvz_pixel rec[2][LCU_CHROMA_SIZE]; color_t color_i; // Check for right and bottom boundaries. @@ -849,8 +849,8 @@ void sao_search_chroma(const encoder_state_t * const state, const videoframe_t * // Copy data to temporary buffers and init orig and rec lists to point to those buffers. for (color_i = COLOR_U; color_i <= COLOR_V; ++color_i) { - pixel_t *data = &frame->source->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->source->stride / 2)]; - pixel_t *recdata = &frame->rec->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->rec->stride / 2)]; + kvz_pixel *data = &frame->source->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->source->stride / 2)]; + kvz_pixel *recdata = &frame->rec->data[color_i][CU_TO_PIXEL(x_ctb, y_ctb, 1, frame->rec->stride / 2)]; pixels_blit(data, orig[color_i - 1], block_width, block_height, frame->source->stride / 2, block_width); pixels_blit(recdata, rec[color_i - 1], block_width, block_height, @@ -865,12 +865,12 @@ void sao_search_chroma(const encoder_state_t * const state, const videoframe_t * void sao_search_luma(const encoder_state_t * const state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]) { - pixel_t orig[LCU_LUMA_SIZE]; - pixel_t rec[LCU_LUMA_SIZE]; - const pixel_t * orig_list[1] = { NULL }; - const pixel_t * rec_list[1] = { NULL }; - pixel_t *data = &frame->source->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->source->stride)]; - pixel_t *recdata = &frame->rec->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->rec->stride)]; + kvz_pixel orig[LCU_LUMA_SIZE]; + kvz_pixel rec[LCU_LUMA_SIZE]; + const kvz_pixel * orig_list[1] = { NULL }; + const kvz_pixel * rec_list[1] = { NULL }; + kvz_pixel *data = &frame->source->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->source->stride)]; + kvz_pixel *recdata = &frame->rec->y[CU_TO_PIXEL(x_ctb, y_ctb, 0, frame->rec->stride)]; int block_width = LCU_WIDTH; int block_height = LCU_WIDTH; @@ -901,9 +901,9 @@ void sao_reconstruct_frame(encoder_state_t * const state) // These are needed because SAO needs the pre-SAO pixels form left and // top LCUs. Single pixel wide buffers, like what search_lcu takes, would // be enough though. - pixel_t *new_y_data = MALLOC(pixel_t, frame->rec->width * frame->rec->height); - pixel_t *new_u_data = MALLOC(pixel_t, (frame->rec->width * frame->rec->height) >> 2); - pixel_t *new_v_data = MALLOC(pixel_t, (frame->rec->width * frame->rec->height) >> 2); + kvz_pixel *new_y_data = MALLOC(kvz_pixel, frame->rec->width * frame->rec->height); + kvz_pixel *new_u_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2); + kvz_pixel *new_v_data = MALLOC(kvz_pixel, (frame->rec->width * frame->rec->height) >> 2); pixels_blit(frame->rec->y, new_y_data, frame->rec->width, frame->rec->height, frame->rec->stride, frame->rec->width); pixels_blit(frame->rec->u, new_u_data, frame->rec->width/2, frame->rec->height/2, frame->rec->stride/2, frame->rec->width/2); diff --git a/src/sao.h b/src/sao.h index f268619d..617ebcd9 100644 --- a/src/sao.h +++ b/src/sao.h @@ -58,7 +58,7 @@ typedef struct sao_info_t { void init_sao_info(sao_info_t *sao); void sao_search_chroma(const encoder_state_t * state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]); void sao_search_luma(const encoder_state_t * state, const videoframe_t *frame, unsigned x_ctb, unsigned y_ctb, sao_info_t *sao, sao_info_t *sao_top, sao_info_t *sao_left, int32_t merge_cost[3]); -void sao_reconstruct(const encoder_control_t * encoder, videoframe_t *frame, const pixel_t *old_rec, +void sao_reconstruct(const encoder_control_t * encoder, videoframe_t *frame, const kvz_pixel *old_rec, unsigned x_ctb, unsigned y_ctb, const sao_info_t *sao, color_t color_i); void sao_reconstruct_frame(encoder_state_t *state); diff --git a/src/search.c b/src/search.c index f90099c4..1c3f8b7e 100644 --- a/src/search.c +++ b/src/search.c @@ -860,13 +860,13 @@ static unsigned search_frac(const encoder_state_t * const state, //create buffer for block + extra for filter int src_stride = block_width+FILTER_SIZE+1; - pixel_t src[(LCU_WIDTH+FILTER_SIZE+1) * (LCU_WIDTH+FILTER_SIZE+1)]; - pixel_t* src_off = &src[HALF_FILTER+HALF_FILTER*(block_width+FILTER_SIZE+1)]; + kvz_pixel src[(LCU_WIDTH+FILTER_SIZE+1) * (LCU_WIDTH+FILTER_SIZE+1)]; + kvz_pixel* src_off = &src[HALF_FILTER+HALF_FILTER*(block_width+FILTER_SIZE+1)]; //destination buffer for interpolation int dst_stride = (block_width+1)*4; - pixel_t dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16]; - pixel_t* dst_off = &dst[dst_stride*4+4]; + kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16]; + kvz_pixel* dst_off = &dst[dst_stride*4+4]; extend_borders(orig->x, orig->y, mv.x-1, mv.y-1, state->tile->lcu_offset_x * LCU_WIDTH, @@ -885,8 +885,8 @@ static unsigned search_frac(const encoder_state_t * const state, for (i = 0; i < 9; ++i) { const vector2d_t *pattern = &square[i]; - pixel_t tmp_filtered[LCU_WIDTH*LCU_WIDTH]; - pixel_t tmp_pic[LCU_WIDTH*LCU_WIDTH]; + kvz_pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH]; + kvz_pixel tmp_pic[LCU_WIDTH*LCU_WIDTH]; int y,x; for(y = 0; y < block_width; ++y) { @@ -925,8 +925,8 @@ static unsigned search_frac(const encoder_state_t * const state, for (i = 0; i < 9; ++i) { const vector2d_t *pattern = &square[i]; - pixel_t tmp_filtered[LCU_WIDTH*LCU_WIDTH]; - pixel_t tmp_pic[LCU_WIDTH*LCU_WIDTH]; + kvz_pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH]; + kvz_pixel tmp_pic[LCU_WIDTH*LCU_WIDTH]; int y,x; for(y = 0; y < block_width; ++y) { @@ -1123,8 +1123,8 @@ static int search_cu_inter(const encoder_state_t * const state, int x, int y, in uint32_t cost = 0; int8_t cu_mv_cand = 0; int16_t mv[2][2]; - pixel_t tmp_block[64 * 64]; - pixel_t tmp_pic[64 * 64]; + kvz_pixel tmp_block[64 * 64]; + kvz_pixel tmp_pic[64 * 64]; // Force L0 and L1 references if (state->global->refmap[merge_cand[i].ref[0]].list == 2 || state->global->refmap[merge_cand[j].ref[1]].list == 1) continue; @@ -1636,9 +1636,9 @@ static double search_intra_trdepth(encoder_state_t * const state, const bool reconstruct_chroma = !(x_px & 4 || y_px & 4); struct { - pixel_t y[TR_MAX_WIDTH*TR_MAX_WIDTH]; - pixel_t u[TR_MAX_WIDTH*TR_MAX_WIDTH]; - pixel_t v[TR_MAX_WIDTH*TR_MAX_WIDTH]; + kvz_pixel y[TR_MAX_WIDTH*TR_MAX_WIDTH]; + kvz_pixel u[TR_MAX_WIDTH*TR_MAX_WIDTH]; + kvz_pixel v[TR_MAX_WIDTH*TR_MAX_WIDTH]; } nosplit_pixels; cu_cbf_t nosplit_cbf; @@ -1885,7 +1885,7 @@ static INLINE uint8_t select_best_mode_index(const int8_t *modes, const double * * coefficients of the residual. */ static double get_cost(encoder_state_t * const state, - pixel_t *pred, pixel_t *orig_block, + kvz_pixel *pred, kvz_pixel *orig_block, cost_pixel_nxn_func *satd_func, cost_pixel_nxn_func *sad_func, int width) @@ -1914,8 +1914,8 @@ static double get_cost(encoder_state_t * const state, static void search_intra_chroma_rough(encoder_state_t * const state, int x_px, int y_px, int depth, - const pixel_t *orig_u, const pixel_t *orig_v, int16_t origstride, - const pixel_t *rec_u, const pixel_t *rec_v, int16_t recstride, + const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride, + const kvz_pixel *rec_u, const kvz_pixel *rec_v, int16_t recstride, int8_t luma_mode, int8_t modes[5], double costs[5]) { @@ -1931,11 +1931,11 @@ static void search_intra_chroma_rough(encoder_state_t * const state, cost_pixel_nxn_func *const satd_func = pixels_get_satd_func(width); //cost_pixel_nxn_func *const sad_func = pixels_get_sad_func(width); - pixel_t _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; - pixel_t *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); + kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); - pixel_t _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; - pixel_t *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); + kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); pixels_blit(orig_u, orig_block, width, width, origstride, width); for (int i = 0; i < 5; ++i) { @@ -1986,8 +1986,8 @@ static void search_intra_chroma_rough(encoder_state_t * const state, * \return Number of prediction modes in param modes. */ static int8_t search_intra_rough(encoder_state_t * const state, - pixel_t *orig, int32_t origstride, - pixel_t *rec, int16_t recstride, + kvz_pixel *orig, int32_t origstride, + kvz_pixel *rec, int16_t recstride, int width, int8_t *intra_preds, int8_t modes[35], double costs[35]) { @@ -1995,15 +1995,15 @@ static int8_t search_intra_rough(encoder_state_t * const state, cost_pixel_nxn_func *sad_func = pixels_get_sad_func(width); // Temporary block arrays - pixel_t _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; - pixel_t *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); + kvz_pixel _pred[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); - pixel_t _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; - pixel_t *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); + kvz_pixel _orig_block[LCU_WIDTH * LCU_WIDTH + 1 + SIMD_ALIGNMENT]; + kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT); - pixel_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; + kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - pixel_t *recf = &rec_filtered_temp[recstride + 1]; + kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; assert(width == 4 || width == 8 || width == 16 || width == 32); @@ -2150,8 +2150,8 @@ static int8_t search_intra_rough(encoder_state_t * const state, */ static int8_t search_intra_rdo(encoder_state_t * const state, int x_px, int y_px, int depth, - pixel_t *orig, int32_t origstride, - pixel_t *rec, int16_t recstride, + kvz_pixel *orig, int32_t origstride, + kvz_pixel *rec, int16_t recstride, int8_t *intra_preds, int modes_to_check, int8_t modes[35], double costs[35], @@ -2160,13 +2160,13 @@ static int8_t search_intra_rdo(encoder_state_t * const state, const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->tr_depth_intra); const int width = LCU_WIDTH >> depth; - pixel_t pred[LCU_WIDTH * LCU_WIDTH + 1]; - pixel_t orig_block[LCU_WIDTH * LCU_WIDTH + 1]; + kvz_pixel pred[LCU_WIDTH * LCU_WIDTH + 1]; + kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; int rdo_mode; int pred_mode; - pixel_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - pixel_t *recf = &rec_filtered_temp[recstride + 1]; + kvz_pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; + kvz_pixel *recf = &rec_filtered_temp[recstride + 1]; // Generate filtered reference pixels. { @@ -2273,8 +2273,8 @@ static double search_cu_intra(encoder_state_t * const state, cu_info_t *cur_cu = &lcu->cu[cu_index]; - pixel_t rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)]; - pixel_t *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1]; + kvz_pixel rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)]; + kvz_pixel *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1]; int8_t candidate_modes[3]; @@ -2304,7 +2304,7 @@ static double search_cu_intra(encoder_state_t * const state, double costs[35]; // Find best intra mode for 2Nx2N. - pixel_t *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; + kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2); int8_t number_of_modes; @@ -2499,8 +2499,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, } if (num_modes != 1 && num_modes != 5) { - pixel_t rec_u[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; - pixel_t rec_v[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; + kvz_pixel rec_u[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; + kvz_pixel rec_v[(LCU_WIDTH_C * 2 + 8) * (LCU_WIDTH_C * 2 + 8)]; const int16_t width_c = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); const int16_t rec_stride = width_c * 2 + 8; @@ -2518,8 +2518,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, lcu); vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 }; - pixel_t *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; - pixel_t *ref_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; + kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; + kvz_pixel *ref_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; search_intra_chroma_rough(state, x, y, depth, ref_u, ref_v, LCU_WIDTH_C, diff --git a/src/strategies/avx2/ipol-avx2.c b/src/strategies/avx2/ipol-avx2.c index 546babfb..967e7c40 100644 --- a/src/strategies/avx2/ipol-avx2.c +++ b/src/strategies/avx2/ipol-avx2.c @@ -80,7 +80,7 @@ __m128i eight_tap_filter_x4_and_flip_16bit(__m128i data0, __m128i data1, __m128i return a; } -void eight_tap_filter_and_flip_avx2(int8_t filter[4][8], pixel_t *src, int16_t src_stride, int16_t* __restrict dst) +void eight_tap_filter_and_flip_avx2(int8_t filter[4][8], kvz_pixel *src, int16_t src_stride, int16_t* __restrict dst) { //Load 2 rows per xmm register @@ -104,7 +104,7 @@ void eight_tap_filter_and_flip_avx2(int8_t filter[4][8], pixel_t *src, int16_t s eight_tap_filter_x8_and_flip(rows01, rows23, rows45, rows67, (__m128i*)(&filter[3]), (__m128i*)(dst + 3 * dst_stride)); } -static INLINE void eight_tap_filter_and_flip_16bit_avx2(int8_t filter[4][8], int16_t *src, int16_t src_stride, int offset, int combined_shift, pixel_t* __restrict dst, int16_t dst_stride) +static INLINE void eight_tap_filter_and_flip_16bit_avx2(int8_t filter[4][8], int16_t *src, int16_t src_stride, int offset, int combined_shift, kvz_pixel* __restrict dst, int16_t dst_stride) { //Load a row per xmm register @@ -157,7 +157,7 @@ static INLINE void eight_tap_filter_and_flip_16bit_avx2(int8_t filter[4][8], int } -int16_t eight_tap_filter_hor_avx2(int8_t *filter, pixel_t *data) +int16_t eight_tap_filter_hor_avx2(int8_t *filter, kvz_pixel *data) { union { __m128i vector; @@ -185,7 +185,7 @@ int32_t eight_tap_filter_hor_16bit_avx2(int8_t *filter, int16_t *data) return temp; } -int16_t eight_tap_filter_ver_avx2(int8_t *filter, pixel_t *data, int16_t stride) +int16_t eight_tap_filter_ver_avx2(int8_t *filter, kvz_pixel *data, int16_t stride) { int16_t temp = 0; for (int i = 0; i < 8; ++i) @@ -207,7 +207,7 @@ int32_t eight_tap_filter_ver_16bit_avx2(int8_t *filter, int16_t *data, int16_t s return temp; } -int16_t four_tap_filter_hor_avx2(int8_t *filter, pixel_t *data) +int16_t four_tap_filter_hor_avx2(int8_t *filter, kvz_pixel *data) { int16_t temp = 0; for (int i = 0; i < 4; ++i) @@ -229,7 +229,7 @@ int32_t four_tap_filter_hor_16bit_avx2(int8_t *filter, int16_t *data) return temp; } -int16_t four_tap_filter_ver_avx2(int8_t *filter, pixel_t *data, int16_t stride) +int16_t four_tap_filter_ver_avx2(int8_t *filter, kvz_pixel *data, int16_t stride) { int16_t temp = 0; for (int i = 0; i < 4; ++i) @@ -251,7 +251,7 @@ int32_t four_tap_filter_ver_16bit_avx2(int8_t *filter, int16_t *data, int16_t st return temp; } -void filter_inter_quarterpel_luma_avx2(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) +void filter_inter_quarterpel_luma_avx2(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) { int32_t x, y; @@ -312,7 +312,7 @@ void filter_inter_quarterpel_luma_avx2(const encoder_control_t * const encoder, * \param dst_stride stride of destination image * */ -void filter_inter_halfpel_chroma_avx2(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) +void filter_inter_halfpel_chroma_avx2(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) { /* ____________ * | B0,0|ae0,0| @@ -373,7 +373,7 @@ void filter_inter_halfpel_chroma_avx2(const encoder_control_t * const encoder, p } } -void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) +void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) { int32_t x, y; @@ -481,8 +481,8 @@ void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, pi } } -void extend_borders_avx2(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel_t *ref, int ref_width, int ref_height, - int filterSize, int width, int height, pixel_t *dst) { +void extend_borders_avx2(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height, + int filterSize, int width, int height, kvz_pixel *dst) { int16_t mv[2] = { mv_x, mv_y }; int halfFilterSize = filterSize >> 1; diff --git a/src/strategies/avx2/picture-avx2.c b/src/strategies/avx2/picture-avx2.c index 24b51a5e..80d63d43 100644 --- a/src/strategies/avx2/picture-avx2.c +++ b/src/strategies/avx2/picture-avx2.c @@ -98,7 +98,7 @@ static INLINE uint32_t m256i_horizontal_sum(const __m256i sum) } -static unsigned sad_8bit_8x8_avx2(const pixel_t *buf1, const pixel_t *buf2) +static unsigned sad_8bit_8x8_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2) { const __m256i *const a = (const __m256i *)buf1; const __m256i *const b = (const __m256i *)buf2; @@ -108,7 +108,7 @@ static unsigned sad_8bit_8x8_avx2(const pixel_t *buf1, const pixel_t *buf2) } -static unsigned sad_8bit_16x16_avx2(const pixel_t *buf1, const pixel_t *buf2) +static unsigned sad_8bit_16x16_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2) { const __m256i *const a = (const __m256i *)buf1; const __m256i *const b = (const __m256i *)buf2; @@ -118,7 +118,7 @@ static unsigned sad_8bit_16x16_avx2(const pixel_t *buf1, const pixel_t *buf2) } -static unsigned sad_8bit_32x32_avx2(const pixel_t *buf1, const pixel_t *buf2) +static unsigned sad_8bit_32x32_avx2(const kvz_pixel *buf1, const kvz_pixel *buf2) { const __m256i *const a = (const __m256i *)buf1; const __m256i *const b = (const __m256i *)buf2; diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c index debb0102..b9a4e4aa 100644 --- a/src/strategies/generic/ipol-generic.c +++ b/src/strategies/generic/ipol-generic.c @@ -32,7 +32,7 @@ extern int8_t g_luma_filter[4][8]; extern int8_t g_chroma_filter[8][4]; -int16_t eight_tap_filter_hor_generic(int8_t *filter, pixel_t *data) +int16_t eight_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data) { int16_t temp = 0; for (int i = 0; i < 8; ++i) @@ -54,7 +54,7 @@ int32_t eight_tap_filter_hor_16bit_generic(int8_t *filter, int16_t *data) return temp; } -int16_t eight_tap_filter_ver_generic(int8_t *filter, pixel_t *data, int16_t stride) +int16_t eight_tap_filter_ver_generic(int8_t *filter, kvz_pixel *data, int16_t stride) { int16_t temp = 0; for (int i = 0; i < 8; ++i) @@ -76,7 +76,7 @@ int32_t eight_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int16_ return temp; } -int16_t four_tap_filter_hor_generic(int8_t *filter, pixel_t *data) +int16_t four_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data) { int16_t temp = 0; for (int i = 0; i < 4; ++i) @@ -98,7 +98,7 @@ int32_t four_tap_filter_hor_16bit_generic(int8_t *filter, int16_t *data) return temp; } -int16_t four_tap_filter_ver_generic(int8_t *filter, pixel_t *data, int16_t stride) +int16_t four_tap_filter_ver_generic(int8_t *filter, kvz_pixel *data, int16_t stride) { int16_t temp = 0; for (int i = 0; i < 4; ++i) @@ -120,7 +120,7 @@ int32_t four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int16_t return temp; } -void filter_inter_quarterpel_luma_generic(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) +void filter_inter_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) { //TODO: horizontal and vertical only filtering int32_t x, y; @@ -180,7 +180,7 @@ void filter_inter_quarterpel_luma_generic(const encoder_control_t * const encode * \param dst_stride stride of destination image * */ -void filter_inter_halfpel_chroma_generic(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) +void filter_inter_halfpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) { /* ____________ * | B0,0|ae0,0| @@ -241,7 +241,7 @@ void filter_inter_halfpel_chroma_generic(const encoder_control_t * const encoder } } -void filter_inter_octpel_chroma_generic(const encoder_control_t * const encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) +void filter_inter_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag) { int32_t x, y; @@ -349,8 +349,8 @@ void filter_inter_octpel_chroma_generic(const encoder_control_t * const encoder, } } -void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel_t *ref, int ref_width, int ref_height, - int filterSize, int width, int height, pixel_t *dst) { +void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height, + int filterSize, int width, int height, kvz_pixel *dst) { int16_t mv[2] = { mv_x, mv_y }; int halfFilterSize = filterSize >> 1; diff --git a/src/strategies/generic/nal-generic.c b/src/strategies/generic/nal-generic.c index 0af93a76..3d959e75 100644 --- a/src/strategies/generic/nal-generic.c +++ b/src/strategies/generic/nal-generic.c @@ -29,7 +29,7 @@ #include "nal.h" -static void array_checksum_generic(const pixel_t* data, +static void array_checksum_generic(const kvz_pixel* data, const int height, const int width, const int stride, unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) { @@ -52,7 +52,7 @@ static void array_checksum_generic(const pixel_t* data, checksum_out[3] = (checksum) & 0xff; } -static void array_checksum_generic4(const pixel_t* data, +static void array_checksum_generic4(const kvz_pixel* data, const int height, const int width, const int stride, unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) { @@ -95,7 +95,7 @@ static void array_checksum_generic4(const pixel_t* data, checksum_out[3] = (checksum) & 0xff; } -static void array_checksum_generic8(const pixel_t* data, +static void array_checksum_generic8(const kvz_pixel* data, const int height, const int width, const int stride, unsigned char checksum_out[SEI_HASH_MAX_LENGTH]) { diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c index f5eccf81..8e6382d6 100644 --- a/src/strategies/generic/picture-generic.c +++ b/src/strategies/generic/picture-generic.c @@ -28,7 +28,7 @@ // Function to clip int16_t to pixel. (0-255 or 0-1023) // Assumes PIXEL_MAX to be 2^n-1 -pixel_t fast_clip_16bit_to_pixel(int16_t value) +kvz_pixel fast_clip_16bit_to_pixel(int16_t value) { // Ensure that compiler generates arithmetic shift from ">>" #if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__) @@ -50,7 +50,7 @@ pixel_t fast_clip_16bit_to_pixel(int16_t value) // Function to clip int32_t to pixel. (0-255 or 0-1023) // Assumes PIXEL_MAX to be 2^n-1 -pixel_t fast_clip_32bit_to_pixel(int32_t value) +kvz_pixel fast_clip_32bit_to_pixel(int32_t value) { // Ensure that compiler generates arithmetic shift from ">>" #if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__) @@ -84,7 +84,7 @@ pixel_t fast_clip_32bit_to_pixel(int32_t value) * * \returns Sum of Absolute Differences */ -static unsigned reg_sad_generic(const pixel_t * const data1, const pixel_t * const data2, +static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { int y, x; @@ -104,7 +104,7 @@ static unsigned reg_sad_generic(const pixel_t * const data1, const pixel_t * con * \brief Calculate SATD between two 4x4 blocks inside bigger arrays. * From HM 13.0 */ -static unsigned satd_8bit_4x4_generic(const pixel_t *piOrg, const pixel_t *piCur) +static unsigned satd_8bit_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur) { int32_t k, satd = 0, diff[16], m[16], d[16]; for (k = 0; k < 16; ++k) { @@ -191,8 +191,8 @@ static unsigned satd_8bit_4x4_generic(const pixel_t *piOrg, const pixel_t *piCur /** * \brief Calculate SATD between two 8x8 blocks inside bigger arrays. */ -unsigned satd_16bit_8x8_general(const pixel_t * piOrg, const int32_t iStrideOrg, - const pixel_t * piCur, const int32_t iStrideCur) +unsigned satd_16bit_8x8_general(const kvz_pixel * piOrg, const int32_t iStrideOrg, + const kvz_pixel * piCur, const int32_t iStrideCur) { int32_t k, i, j, jj, sad = 0; int32_t diff[64], m1[8][8], m2[8][8], m3[8][8]; @@ -307,10 +307,10 @@ cost_pixel_nxn_func satd_8bit_32x32_generic; cost_pixel_nxn_func satd_8bit_64x64_generic; // These macros define sadt_16bit_NxN for N = 8, 16, 32, 64 -SATD_NXN(8, pixel_t, 8bit) -SATD_NXN(16, pixel_t, 8bit) -SATD_NXN(32, pixel_t, 8bit) -SATD_NXN(64, pixel_t, 8bit) +SATD_NXN(8, kvz_pixel, 8bit) +SATD_NXN(16, kvz_pixel, 8bit) +SATD_NXN(32, kvz_pixel, 8bit) +SATD_NXN(64, kvz_pixel, 8bit) // Function macro for defining SAD calculating functions // for fixed size blocks. @@ -336,11 +336,11 @@ static cost_pixel_nxn_func sad_8bit_64x64_generic; // These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64 // with function signatures of cost_16bit_nxn_func. // They are used through get_pixel_sad_func. -SAD_NXN(4, pixel_t, 8bit) -SAD_NXN(8, pixel_t, 8bit) -SAD_NXN(16, pixel_t, 8bit) -SAD_NXN(32, pixel_t, 8bit) -SAD_NXN(64, pixel_t, 8bit) +SAD_NXN(4, kvz_pixel, 8bit) +SAD_NXN(8, kvz_pixel, 8bit) +SAD_NXN(16, kvz_pixel, 8bit) +SAD_NXN(32, kvz_pixel, 8bit) +SAD_NXN(64, kvz_pixel, 8bit) int strategy_register_picture_generic(void* opaque) diff --git a/src/strategies/generic/picture-generic.h b/src/strategies/generic/picture-generic.h index 6a53b6ba..fe123a11 100644 --- a/src/strategies/generic/picture-generic.h +++ b/src/strategies/generic/picture-generic.h @@ -24,10 +24,10 @@ int strategy_register_picture_generic(void* opaque); // Function to clip int16_t to pixel. (0-255 or 0-1023) // Assumes PIXEL_MAX to be 2^n-1 -pixel_t fast_clip_16bit_to_pixel(int16_t value); +kvz_pixel fast_clip_16bit_to_pixel(int16_t value); // Function to clip int32_t to pixel. (0-255 or 0-1023) // Assumes PIXEL_MAX to be 2^n-1 -pixel_t fast_clip_32bit_to_pixel(int32_t value); +kvz_pixel fast_clip_32bit_to_pixel(int32_t value); #endif //STRATEGIES_PICTURE_GENERIC_H_ diff --git a/src/strategies/sse2/picture-sse2.c b/src/strategies/sse2/picture-sse2.c index c0e03de1..457b9c2b 100644 --- a/src/strategies/sse2/picture-sse2.c +++ b/src/strategies/sse2/picture-sse2.c @@ -31,7 +31,7 @@ # include -static unsigned reg_sad_sse2(const pixel_t * const data1, const pixel_t * const data2, +static unsigned reg_sad_sse2(const kvz_pixel * const data1, const kvz_pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { int y, x; @@ -56,7 +56,7 @@ static unsigned reg_sad_sse2(const pixel_t * const data1, const pixel_t * const return sad; } -static unsigned sad_8bit_4x4_sse2(const pixel_t *buf1, const pixel_t *buf2) +static unsigned sad_8bit_4x4_sse2(const kvz_pixel *buf1, const kvz_pixel *buf2) { const __m128i *const mbuf1 = (const __m128i *)buf1; const __m128i *const mbuf2 = (const __m128i *)buf2; diff --git a/src/strategies/sse41/picture-sse41.c b/src/strategies/sse41/picture-sse41.c index 8e27ec50..280e9731 100644 --- a/src/strategies/sse41/picture-sse41.c +++ b/src/strategies/sse41/picture-sse41.c @@ -31,7 +31,7 @@ # include -static unsigned reg_sad_sse41(const pixel_t * const data1, const pixel_t * const data2, +static unsigned reg_sad_sse41(const kvz_pixel * const data1, const kvz_pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { int y, x; diff --git a/src/strategies/strategies-ipol.h b/src/strategies/strategies-ipol.h index 01990e0a..e7214287 100644 --- a/src/strategies/strategies-ipol.h +++ b/src/strategies/strategies-ipol.h @@ -25,11 +25,11 @@ #include "encoder.h" -typedef unsigned(ipol_func)(const encoder_control_t * encoder, pixel_t *src, int16_t src_stride, int width, int height, pixel_t *dst, +typedef unsigned(ipol_func)(const encoder_control_t * encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag); -typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel_t *ref, int ref_width, int ref_height, - int filterSize, int width, int height, pixel_t *dst); +typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height, + int filterSize, int width, int height, kvz_pixel *dst); // Declare function pointers. diff --git a/src/strategies/strategies-nal.c b/src/strategies/strategies-nal.c index 43ffd67c..7213b691 100644 --- a/src/strategies/strategies-nal.c +++ b/src/strategies/strategies-nal.c @@ -22,7 +22,7 @@ #include "generic/nal-generic.h" -void (*array_checksum)(const pixel_t* data, +void (*array_checksum)(const kvz_pixel* data, const int height, const int width, const int stride, unsigned char checksum_out[SEI_HASH_MAX_LENGTH]); diff --git a/src/strategies/strategies-nal.h b/src/strategies/strategies-nal.h index 9a87cbbc..a4ef9b62 100644 --- a/src/strategies/strategies-nal.h +++ b/src/strategies/strategies-nal.h @@ -30,7 +30,7 @@ * \param width Width of the picture. * \param stride Width of one row in the pixel array. */ -typedef void (*array_checksum_func)(const pixel_t* data, +typedef void (*array_checksum_func)(const kvz_pixel* data, const int height, const int width, const int stride, unsigned char checksum_out[SEI_HASH_MAX_LENGTH]); diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index 40df75ac..a2afd042 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -23,10 +23,10 @@ #include "../image.h" -typedef unsigned(reg_sad_func)(const pixel_t *const data1, const pixel_t *const data2, +typedef unsigned(reg_sad_func)(const kvz_pixel *const data1, const kvz_pixel *const data2, const int width, const int height, const unsigned stride1, const unsigned stride2); -typedef unsigned (cost_pixel_nxn_func)(const pixel_t *block1, const pixel_t *block2); +typedef unsigned (cost_pixel_nxn_func)(const kvz_pixel *block1, const kvz_pixel *block2); // Declare function pointers. diff --git a/src/strategies/x86_asm/picture-x86-asm-sad.h b/src/strategies/x86_asm/picture-x86-asm-sad.h index 3de8ee98..e44c76cf 100644 --- a/src/strategies/x86_asm/picture-x86-asm-sad.h +++ b/src/strategies/x86_asm/picture-x86-asm-sad.h @@ -24,13 +24,13 @@ \brief assembly functions header for sad */ -unsigned kvz_sad_4x4_avx(const pixel_t*, const pixel_t*); -unsigned kvz_sad_8x8_avx(const pixel_t*, const pixel_t*); -unsigned kvz_sad_16x16_avx(const pixel_t*, const pixel_t*); +unsigned kvz_sad_4x4_avx(const kvz_pixel*, const kvz_pixel*); +unsigned kvz_sad_8x8_avx(const kvz_pixel*, const kvz_pixel*); +unsigned kvz_sad_16x16_avx(const kvz_pixel*, const kvz_pixel*); -unsigned kvz_sad_4x4_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride); -unsigned kvz_sad_8x8_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride); -unsigned kvz_sad_16x16_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride); +unsigned kvz_sad_4x4_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride); +unsigned kvz_sad_8x8_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride); +unsigned kvz_sad_16x16_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride); #endif diff --git a/src/strategies/x86_asm/picture-x86-asm-satd.h b/src/strategies/x86_asm/picture-x86-asm-satd.h index eb2a1af1..0889c89c 100644 --- a/src/strategies/x86_asm/picture-x86-asm-satd.h +++ b/src/strategies/x86_asm/picture-x86-asm-satd.h @@ -24,10 +24,10 @@ /*! \file picture-x86-asm-satd.h \brief assembly functions header for satd */ -unsigned kvz_satd_4x4_avx(const pixel_t *org, const pixel_t *cur); -unsigned kvz_satd_8x8_avx(const pixel_t *org, const pixel_t *cur); -unsigned kvz_satd_16x16_avx(const pixel_t *org, const pixel_t *cur); -unsigned kvz_satd_32x32_avx(const pixel_t *org, const pixel_t *cur); -unsigned kvz_satd_64x64_avx(const pixel_t *org, const pixel_t *cur); +unsigned kvz_satd_4x4_avx(const kvz_pixel *org, const kvz_pixel *cur); +unsigned kvz_satd_8x8_avx(const kvz_pixel *org, const kvz_pixel *cur); +unsigned kvz_satd_16x16_avx(const kvz_pixel *org, const kvz_pixel *cur); +unsigned kvz_satd_32x32_avx(const kvz_pixel *org, const kvz_pixel *cur); +unsigned kvz_satd_64x64_avx(const kvz_pixel *org, const kvz_pixel *cur); #endif diff --git a/src/strategies/x86_asm/picture-x86-asm.c b/src/strategies/x86_asm/picture-x86-asm.c index e9a4112e..89403997 100644 --- a/src/strategies/x86_asm/picture-x86-asm.c +++ b/src/strategies/x86_asm/picture-x86-asm.c @@ -29,7 +29,7 @@ #include "picture-x86-asm-sad.h" #include "picture-x86-asm-satd.h" -static unsigned kvz_sad_32x32_avx(const pixel_t *data1, const pixel_t *data2) +static unsigned kvz_sad_32x32_avx(const kvz_pixel *data1, const kvz_pixel *data2) { unsigned sad = 0; sad += kvz_sad_16x16_avx(data1, data2); @@ -39,7 +39,7 @@ static unsigned kvz_sad_32x32_avx(const pixel_t *data1, const pixel_t *data2) return sad; } -static unsigned kvz_sad_32x32_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride) +static unsigned kvz_sad_32x32_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride) { unsigned sad = 0; sad += kvz_sad_16x16_stride_avx(data1, data2, stride); @@ -49,7 +49,7 @@ static unsigned kvz_sad_32x32_stride_avx(const pixel_t *data1, const pixel_t *da return sad; } -static unsigned kvz_sad_64x64_avx(const pixel_t *data1, const pixel_t *data2) +static unsigned kvz_sad_64x64_avx(const kvz_pixel *data1, const kvz_pixel *data2) { unsigned sad = 0; sad += kvz_sad_32x32_avx(data1, data2); @@ -59,7 +59,7 @@ static unsigned kvz_sad_64x64_avx(const pixel_t *data1, const pixel_t *data2) return sad; } -static unsigned kvz_sad_64x64_stride_avx(const pixel_t *data1, const pixel_t *data2, unsigned stride) +static unsigned kvz_sad_64x64_stride_avx(const kvz_pixel *data1, const kvz_pixel *data2, unsigned stride) { unsigned sad = 0; sad += kvz_sad_32x32_stride_avx(data1, data2, stride); @@ -69,7 +69,7 @@ static unsigned kvz_sad_64x64_stride_avx(const pixel_t *data1, const pixel_t *da return sad; } -static unsigned kvz_sad_other_avx(const pixel_t * const data1, const pixel_t * const data2, +static unsigned kvz_sad_other_avx(const kvz_pixel * const data1, const kvz_pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { int y, x; @@ -84,7 +84,7 @@ static unsigned kvz_sad_other_avx(const pixel_t * const data1, const pixel_t * c return sad; } -static unsigned reg_sad_x86_asm(const pixel_t * const data1, const pixel_t * const data2, +static unsigned reg_sad_x86_asm(const kvz_pixel * const data1, const kvz_pixel * const data2, const int width, const int height, const unsigned stride1, const unsigned stride2) { if (width == 4 && height == 4) { diff --git a/src/strategyselector.h b/src/strategyselector.h index 7b40a0c8..a47761a4 100644 --- a/src/strategyselector.h +++ b/src/strategyselector.h @@ -157,7 +157,7 @@ static const strategy_to_select_t strategies_to_select[] = { { NULL, NULL }, }; -unsigned satd_8bit_8x8_generic(const pixel_t * const block1, const pixel_t * const block2); +unsigned satd_8bit_8x8_generic(const kvz_pixel * const block1, const kvz_pixel * const block2); #endif //STRATEGYSELECTOR_H_ diff --git a/src/transform.c b/src/transform.c index b3a6cd13..85d74126 100644 --- a/src/transform.c +++ b/src/transform.c @@ -326,8 +326,8 @@ int quantize_residual(encoder_state_t *const state, const cu_info_t *const cur_cu, const int width, const color_t color, const coeff_scan_order_t scan_order, const int use_trskip, const int in_stride, const int out_stride, - const pixel_t *const ref_in, const pixel_t *const pred_in, - pixel_t *rec_out, coeff_t *coeff_out) + const kvz_pixel *const ref_in, const kvz_pixel *const pred_in, + kvz_pixel *rec_out, coeff_t *coeff_out) { // Temporary arrays to pass data to and from quant and transform functions. int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; @@ -440,11 +440,11 @@ int quantize_residual_trskip( const cu_info_t *const cur_cu, const int width, const color_t color, const coeff_scan_order_t scan_order, int8_t *trskip_out, const int in_stride, const int out_stride, - const pixel_t *const ref_in, const pixel_t *const pred_in, - pixel_t *rec_out, coeff_t *coeff_out) + const kvz_pixel *const ref_in, const kvz_pixel *const pred_in, + kvz_pixel *rec_out, coeff_t *coeff_out) { struct { - pixel_t rec[4*4]; + kvz_pixel rec[4*4]; coeff_t coeff[4*4]; uint32_t cost; int has_coeffs; @@ -543,9 +543,9 @@ void quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_ const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH; // Pointers to current location in arrays with prediction. - pixel_t *recbase_y = &lcu->rec.y[luma_offset]; + kvz_pixel *recbase_y = &lcu->rec.y[luma_offset]; // Pointers to current location in arrays with reference. - const pixel_t *base_y = &lcu->ref.y[luma_offset]; + const kvz_pixel *base_y = &lcu->ref.y[luma_offset]; // Pointers to current location in arrays with kvantized coefficients. coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset]; @@ -633,10 +633,10 @@ void quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int3 cbf_clear(&cur_cu->cbf.v, depth); const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C; - pixel_t *recbase_u = &lcu->rec.u[chroma_offset]; - pixel_t *recbase_v = &lcu->rec.v[chroma_offset]; - const pixel_t *base_u = &lcu->ref.u[chroma_offset]; - const pixel_t *base_v = &lcu->ref.v[chroma_offset]; + kvz_pixel *recbase_u = &lcu->rec.u[chroma_offset]; + kvz_pixel *recbase_v = &lcu->rec.v[chroma_offset]; + const kvz_pixel *base_u = &lcu->ref.u[chroma_offset]; + const kvz_pixel *base_v = &lcu->ref.v[chroma_offset]; coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset]; coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset]; coeff_scan_order_t scan_idx_chroma; diff --git a/src/yuv_io.c b/src/yuv_io.c index a3e04280..604fb841 100644 --- a/src/yuv_io.c +++ b/src/yuv_io.c @@ -28,10 +28,10 @@ #include "yuv_io.h" static void fill_after_frame(unsigned height, unsigned array_width, - unsigned array_height, pixel_t *data) + unsigned array_height, kvz_pixel *data) { - pixel_t* p = data + height * array_width; - pixel_t* end = data + array_width * array_height; + kvz_pixel* p = data + height * array_width; + kvz_pixel* end = data + array_width * array_height; while (p < end) { // Fill the line by copying the line above. @@ -43,11 +43,11 @@ static void fill_after_frame(unsigned height, unsigned array_width, static int read_and_fill_frame_data(FILE *file, unsigned width, unsigned height, - unsigned array_width, pixel_t *data) + unsigned array_width, kvz_pixel *data) { - pixel_t* p = data; - pixel_t* end = data + array_width * height; - pixel_t fill_char; + kvz_pixel* p = data; + kvz_pixel* end = data + array_width * height; + kvz_pixel fill_char; unsigned i; while (p < end) { diff --git a/tests/intra_sad_tests.c b/tests/intra_sad_tests.c index 4fad8e61..abfbdac3 100644 --- a/tests/intra_sad_tests.c +++ b/tests/intra_sad_tests.c @@ -35,7 +35,7 @@ ////////////////////////////////////////////////////////////////////////// // GLOBALS -pixel_t * bufs[NUM_TESTS][6][2]; +kvz_pixel * bufs[NUM_TESTS][6][2]; static struct { int log_width; // for selecting dim from bufs @@ -45,7 +45,7 @@ static struct { ////////////////////////////////////////////////////////////////////////// // SETUP, TEARDOWN AND HELPER FUNCTIONS -static void init_gradient(int x_px, int y_px, int width, int slope, pixel_t *buf) +static void init_gradient(int x_px, int y_px, int width, int slope, kvz_pixel *buf) { for (int y = 0; y < width; ++y) { for (int x = 0; x < width; ++x) { @@ -68,10 +68,10 @@ static void setup_tests() for (int w = LCU_MIN_LOG_W; w <= LCU_MAX_LOG_W; ++w) { unsigned size = 1 << (w * 2); - bufs[test][w][0] = malloc(size * sizeof(pixel_t) + SIMD_ALIGNMENT); + bufs[test][w][0] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT); bufs[test][w][0] = ALIGNED_POINTER(bufs[test][w][0], SIMD_ALIGNMENT); - bufs[test][w][1] = malloc(size * sizeof(pixel_t) + SIMD_ALIGNMENT); + bufs[test][w][1] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT); bufs[test][w][1] = ALIGNED_POINTER(bufs[test][w][1], SIMD_ALIGNMENT); } } @@ -104,7 +104,7 @@ static void tear_down_tests() } -static unsigned test_calc_sad(const pixel_t * buf1, const pixel_t * buf2, int dim) +static unsigned test_calc_sad(const kvz_pixel * buf1, const kvz_pixel * buf2, int dim) { unsigned result = 0; for (int i = 0; i < dim * dim; ++i) { @@ -125,8 +125,8 @@ TEST test_black_and_white(void) const int test = 0; const int width = 1 << test_env.log_width; - pixel_t * buf1 = bufs[test][test_env.log_width][0]; - pixel_t * buf2 = bufs[test][test_env.log_width][1]; + kvz_pixel * buf1 = bufs[test][test_env.log_width][0]; + kvz_pixel * buf2 = bufs[test][test_env.log_width][1]; unsigned result1 = test_env.tested_func(buf1, buf2); unsigned result2 = test_env.tested_func(buf2, buf1); @@ -149,8 +149,8 @@ TEST test_gradient(void) const int test = 1; const int width = 1 << test_env.log_width; - pixel_t * buf1 = bufs[test][test_env.log_width][0]; - pixel_t * buf2 = bufs[test][test_env.log_width][1]; + kvz_pixel * buf1 = bufs[test][test_env.log_width][0]; + kvz_pixel * buf2 = bufs[test][test_env.log_width][1]; unsigned result = test_calc_sad(buf1, buf2, width); unsigned result1 = test_env.tested_func(buf1, buf2); diff --git a/tests/satd_tests.c b/tests/satd_tests.c index 280e01d0..61c8f95a 100644 --- a/tests/satd_tests.c +++ b/tests/satd_tests.c @@ -33,7 +33,7 @@ ////////////////////////////////////////////////////////////////////////// // GLOBALS -pixel_t * satd_bufs[NUM_TESTS][7][2]; +kvz_pixel * satd_bufs[NUM_TESTS][7][2]; static struct { int log_width; // for selecting dim from satd_bufs @@ -53,8 +53,8 @@ static void setup_tests() for (int w = LCU_MIN_LOG_W; w <= LCU_MAX_LOG_W; ++w) { unsigned size = 1 << (w * 2); - satd_bufs[test][w][0] = malloc(size * sizeof(pixel_t)); - satd_bufs[test][w][1] = malloc(size * sizeof(pixel_t)); + satd_bufs[test][w][0] = malloc(size * sizeof(kvz_pixel)); + satd_bufs[test][w][1] = malloc(size * sizeof(kvz_pixel)); } } @@ -110,8 +110,8 @@ TEST satd_test_black_and_white(void) const int test = 0; - pixel_t * buf1 = satd_bufs[test][satd_test_env.log_width][0]; - pixel_t * buf2 = satd_bufs[test][satd_test_env.log_width][1]; + kvz_pixel * buf1 = satd_bufs[test][satd_test_env.log_width][0]; + kvz_pixel * buf2 = satd_bufs[test][satd_test_env.log_width][1]; unsigned result1 = satd_test_env.tested_func(buf1, buf2); unsigned result2 = satd_test_env.tested_func(buf2, buf1); @@ -128,8 +128,8 @@ TEST satd_test_checkers(void) const int test = 1; - pixel_t * buf1 = satd_bufs[test][satd_test_env.log_width][0]; - pixel_t * buf2 = satd_bufs[test][satd_test_env.log_width][1]; + kvz_pixel * buf1 = satd_bufs[test][satd_test_env.log_width][0]; + kvz_pixel * buf2 = satd_bufs[test][satd_test_env.log_width][1]; unsigned result1 = satd_test_env.tested_func(buf1, buf2); unsigned result2 = satd_test_env.tested_func(buf2, buf1); @@ -147,8 +147,8 @@ TEST satd_test_gradient(void) const int test = 2; - pixel_t * buf1 = satd_bufs[test][satd_test_env.log_width][0]; - pixel_t * buf2 = satd_bufs[test][satd_test_env.log_width][1]; + kvz_pixel * buf1 = satd_bufs[test][satd_test_env.log_width][0]; + kvz_pixel * buf2 = satd_bufs[test][satd_test_env.log_width][1]; unsigned result1 = satd_test_env.tested_func(buf1, buf2); unsigned result2 = satd_test_env.tested_func(buf2, buf1); diff --git a/tests/speed_tests.c b/tests/speed_tests.c index 445fb33e..2f2563e4 100644 --- a/tests/speed_tests.c +++ b/tests/speed_tests.c @@ -40,8 +40,8 @@ ////////////////////////////////////////////////////////////////////////// // GLOBALS -pixel_t * bufs[NUM_TESTS]; // SIMD aligned pointers. -pixel_t * actual_bufs[NUM_TESTS]; // pointers returned by malloc. +kvz_pixel * bufs[NUM_TESTS]; // SIMD aligned pointers. +kvz_pixel * actual_bufs[NUM_TESTS]; // pointers returned by malloc. static struct test_env_t { int log_width; // for selecting dim from bufs @@ -53,7 +53,7 @@ static struct test_env_t { ////////////////////////////////////////////////////////////////////////// // SETUP, TEARDOWN AND HELPER FUNCTIONS -static void init_gradient(int x_px, int y_px, int width, int slope, pixel_t *buf) +static void init_gradient(int x_px, int y_px, int width, int slope, kvz_pixel *buf) { for (int y = 0; y < width; ++y) { for (int x = 0; x < width; ++x) { @@ -71,7 +71,7 @@ static void setup_tests() for (int test = 0; test < NUM_TESTS; ++test) { unsigned size = NUM_CHUNKS * 64 * 64; - actual_bufs[test] = malloc(size * sizeof(pixel_t) + SIMD_ALIGNMENT); + actual_bufs[test] = malloc(size * sizeof(kvz_pixel) + SIMD_ALIGNMENT); bufs[test] = ALIGNED_POINTER(actual_bufs[test], SIMD_ALIGNMENT); } @@ -112,9 +112,9 @@ TEST test_intra_speed(const int width) uint64_t sum = 0; for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) { // Compare the first chunk against the 35 other chunks to simulate real usage. - pixel_t * buf1 = &bufs[test][offset]; + kvz_pixel * buf1 = &bufs[test][offset]; for (int chunk = 1; chunk < NUM_CHUNKS; ++chunk) { - pixel_t * buf2 = &bufs[test][chunk * size + offset]; + kvz_pixel * buf2 = &bufs[test][chunk * size + offset]; cost_pixel_nxn_func *tested_func = test_env.tested_func; sum += tested_func(buf1, buf2); @@ -151,9 +151,9 @@ TEST test_inter_speed(const int width) for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) { // Treat 4 consecutive chunks as one chunk with double width and height, // and do a 8x8 grid search against the first chunk to simulate real usage. - pixel_t * buf1 = &bufs[test][offset]; + kvz_pixel * buf1 = &bufs[test][offset]; for (int chunk = 0; chunk < NUM_CHUNKS; chunk += 4) { - pixel_t * buf2 = &bufs[test][chunk * size + offset]; + kvz_pixel * buf2 = &bufs[test][chunk * size + offset]; for (int y = 0; y < 8; ++y) { for (int x = 0; x < 8; ++x) { const int stride1 = 2 * 64; @@ -203,8 +203,8 @@ TEST dct_speed(const int width) for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) { // Compare the first chunk against the 35 other chunks to simulate real usage. for (int chunk = 0; chunk < NUM_CHUNKS; ++chunk) { - pixel_t * buf1 = &bufs[test][offset]; - pixel_t * buf2 = &bufs[test][chunk * size + offset]; + kvz_pixel * buf1 = &bufs[test][offset]; + kvz_pixel * buf2 = &bufs[test][chunk * size + offset]; for (int p = 0; p < size; ++p) { tmp_residual[p] = (int16_t)(buf1[p] - buf2[p]); }