diff --git a/src/sao.c b/src/sao.c index ee2fb91f..016d8950 100644 --- a/src/sao.c +++ b/src/sao.c @@ -261,17 +261,6 @@ static void calc_sao_bands(const encoder_state_t * const state, const kvz_pixel } -static int sao_calc_eo_cat(kvz_pixel a, kvz_pixel b, kvz_pixel c) -{ - // Mapping relationships between a, b and c to eo_idx. - static const int sao_eo_idx_to_eo_category[] = { 1, 2, 0, 3, 4 }; - - int eo_idx = 2 + SIGN3((int)c - (int)a) + SIGN3((int)c - (int)b); - - return sao_eo_idx_to_eo_category[eo_idx]; -} - - /** * \brief Reconstruct SAO. * @@ -279,16 +268,12 @@ static int sao_calc_eo_cat(kvz_pixel a, kvz_pixel b, kvz_pixel c) * \param buffer Buffer containing the deblocked input pixels. The * area to filter starts at index 0. * \param stride stride of buffer - * \param x x-coordinate of the top-left corner in pixels - * \param y y-coordinate of the top-left corner in pixels + * \param frame_x x-coordinate of the top-left corner in pixels + * \param frame_y y-coordinate of the top-left corner in pixels * \param width width of the area to filter * \param height height of the area to filter * \param sao SAO information * \param color color plane index - * \param border_left true, if the left border of the area exists - * \param border_right true, if the right border of the area exists - * \param border_above true, if the top border of the area exists - * \param border_below true, if the bottom border of the area exists */ void kvz_sao_reconstruct(const encoder_state_t *state, const kvz_pixel *buffer, @@ -308,66 +293,45 @@ void kvz_sao_reconstruct(const encoder_state_t *state, const int frame_height = frame->height >> shift; kvz_pixel *output = &frame->rec->data[color][frame_x + frame_y * frame_width]; - switch (sao->type) { + if (sao->type == SAO_TYPE_EDGE) { + const vector2d_t *offset = g_sao_edge_offsets[sao->eo_class]; - case SAO_TYPE_NONE: - break; - - case SAO_TYPE_BAND: { - int offsets[1 << KVZ_BIT_DEPTH]; - kvz_calc_sao_offset_array(ctrl, sao, offsets, color); - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - output[x + y * frame_width] = offsets[buffer[x + y * stride]]; - } - } - break; + if (frame_x + width + offset[0].x > frame_width || + frame_x + width + offset[1].x > frame_width) + { + // Nothing to do for the rightmost column. + width -= 1; } - - case SAO_TYPE_EDGE: { - const int offset_v = color == COLOR_V ? 5 : 0; - const vector2d_t *offset = g_sao_edge_offsets[sao->eo_class]; - - int x_orig = 0; - int y_orig = 0; - - if (frame_x + offset[0].x < 0 || frame_x + offset[1].x < 0) { - // Nothing to do for the leftmost column. - x_orig += 1; - } - if (frame_x + width + offset[0].x > frame_width || - frame_x + width + offset[1].x > frame_width) - { - // Nothing to do for the rightmost column. - width -= 1; - } - if (frame_y + offset[0].y < 0 || frame_y + offset[1].y < 0) { - // Nothing to do for the topmost row. - y_orig += 1; - } - if (frame_y + height + offset[0].y > frame_height || - frame_y + height + offset[1].y > frame_height) - { - // Nothing to do for the bottommost row. - height -= 1; - } - - for (int y = y_orig; y < height; y++) { - for (int x = x_orig; x < width; x++) { - const kvz_pixel *data = &buffer[x + y * stride]; - - kvz_pixel a = data[offset[0].x + offset[0].y * stride]; - kvz_pixel c = data[0]; - kvz_pixel b = data[offset[1].x + offset[1].y * stride]; - - const int eo_cat = sao_calc_eo_cat(a, b, c); - - output[x + y * frame_width] = - CLIP(0, (1 << KVZ_BIT_DEPTH) - 1, c + sao->offsets[eo_cat + offset_v]); - } - } - break; + if (frame_x + offset[0].x < 0 || frame_x + offset[1].x < 0) { + // Nothing to do for the leftmost column. + buffer += 1; + output += 1; + width -= 1; } + if (frame_y + height + offset[0].y > frame_height || + frame_y + height + offset[1].y > frame_height) + { + // Nothing to do for the bottommost row. + height -= 1; + } + if (frame_y + offset[0].y < 0 || frame_y + offset[1].y < 0) { + // Nothing to do for the topmost row. + buffer += stride; + output += frame_width; + height -= 1; + } + } + + if (sao->type != SAO_TYPE_NONE) { + kvz_sao_reconstruct_color(ctrl, + buffer, + output, + sao, + stride, + frame_width, + width, + height, + color); } } diff --git a/src/strategies/avx2/sao-avx2.c b/src/strategies/avx2/sao-avx2.c index 91498266..cb4a04d9 100644 --- a/src/strategies/avx2/sao-avx2.c +++ b/src/strategies/avx2/sao-avx2.c @@ -36,13 +36,13 @@ // is difficult to understand. -static INLINE __m128i load_6_pixels(const kvz_pixel* data){ - +static INLINE __m128i load_6_pixels(const kvz_pixel* data) +{ return _mm_insert_epi16(_mm_cvtsi32_si128(*(int32_t*)&(data[0])), *(int16_t*)&(data[4]), 2); } -static INLINE __m256i load_5_offsets(const int* offsets){ - +static INLINE __m256i load_5_offsets(const int* offsets) +{ return _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*) offsets)), _mm_insert_epi32(_mm_setzero_si128(), offsets[4], 0), 1); } @@ -68,9 +68,12 @@ static __m128i sao_calc_eo_cat_avx2(__m128i* a, __m128i* b, __m128i* c) } -int kvz_sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) +static int sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int eo_class, + int offsets[NUM_SAO_EDGE_CATEGORIES]) { int y, x; int sum = 0; @@ -134,7 +137,12 @@ int kvz_sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, const kvz_pixel *r } -static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, __m256i* __restrict v_count, __m256i* __restrict v_cat, __m256i* __restrict v_diff, int eo_cat){ +static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, + __m256i* __restrict v_count, + __m256i* __restrict v_cat, + __m256i* __restrict v_diff, + int eo_cat) +{ __m256i v_mask = _mm256_cmpeq_epi32(*v_cat, _mm256_set1_epi32(eo_cat)); *v_diff_accum = _mm256_add_epi32(*v_diff_accum, _mm256_and_si256(*v_diff, v_mask)); *v_count = _mm256_sub_epi32(*v_count, v_mask); @@ -146,9 +154,12 @@ static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, __ accum_count_eo_cat_avx2(&(v_diff_accum[ EO_CAT ]), &(v_count[ EO_CAT ]), &V_CAT , &v_diff, EO_CAT); -void kvz_calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int eo_class, int block_width, int block_height, - int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int eo_class, + int block_width, + int block_height, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { int y, x; vector2d_t a_ofs = g_sao_edge_offsets[eo_class][0]; @@ -235,30 +246,29 @@ void kvz_calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec } -void kvz_sao_reconstruct_color_avx2(const encoder_control_t * const encoder, - const kvz_pixel *rec_data, kvz_pixel *new_rec_data, - const sao_info_t *sao, - int stride, int new_stride, - int block_width, int block_height, - color_t color_i) +static void sao_reconstruct_color_avx2(const encoder_control_t * const encoder, + const kvz_pixel *rec_data, kvz_pixel *new_rec_data, + const sao_info_t *sao, + int stride, int new_stride, + int block_width, int block_height, + color_t color_i) { - int y, x; // Arrays orig_data and rec_data are quarter size for chroma. int offset_v = color_i == COLOR_V ? 5 : 0; - if(sao->type == SAO_TYPE_BAND) { - int offsets[1<type == SAO_TYPE_BAND) { + int offsets[1 << KVZ_BIT_DEPTH]; kvz_calc_sao_offset_array(encoder, sao, offsets, color_i); - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { new_rec_data[y * new_stride + x] = offsets[rec_data[y * stride + x]]; } } } else { // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; x+=8) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; x+=8) { vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; const kvz_pixel *c_data = &rec_data[y * stride + x]; @@ -294,9 +304,13 @@ void kvz_sao_reconstruct_color_avx2(const encoder_control_t * const encoder, } -int kvz_sao_band_ddistortion_avx2(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int band_pos, int sao_bands[4]) +static int sao_band_ddistortion_avx2(const encoder_state_t * const state, + const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int band_pos, + int sao_bands[4]) { int y, x; int shift = state->encoder_control->bitdepth-5; @@ -343,10 +357,10 @@ int kvz_strategy_register_sao_avx2(void* opaque, uint8_t bitdepth) bool success = true; #if COMPILE_INTEL_AVX2 if (bitdepth == 8) { - success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "avx2", 40, &kvz_sao_edge_ddistortion_avx2); - success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "avx2", 40, &kvz_calc_sao_edge_dir_avx2); - success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "avx2", 40, &kvz_sao_reconstruct_color_avx2); - success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "avx2", 40, &kvz_sao_band_ddistortion_avx2); + success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "avx2", 40, &sao_edge_ddistortion_avx2); + success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "avx2", 40, &calc_sao_edge_dir_avx2); + success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "avx2", 40, &sao_reconstruct_color_avx2); + success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "avx2", 40, &sao_band_ddistortion_avx2); } #endif //COMPILE_INTEL_AVX2 return success; diff --git a/src/strategies/generic/sao-generic.c b/src/strategies/generic/sao-generic.c index c472c019..706dbf47 100644 --- a/src/strategies/generic/sao-generic.c +++ b/src/strategies/generic/sao-generic.c @@ -40,9 +40,12 @@ static int sao_calc_eo_cat(kvz_pixel a, kvz_pixel b, kvz_pixel c) } -int kvz_sao_edge_ddistortion_generic(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) +static int sao_edge_ddistortion_generic(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int eo_class, + int offsets[NUM_SAO_EDGE_CATEGORIES]) { int y, x; int sum = 0; @@ -76,9 +79,12 @@ int kvz_sao_edge_ddistortion_generic(const kvz_pixel *orig_data, const kvz_pixel * \param dir_offsets * \param is_chroma 0 for luma, 1 for chroma. Indicates */ -void kvz_calc_sao_edge_dir_generic(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int eo_class, int block_width, int block_height, - int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +static void calc_sao_edge_dir_generic(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int eo_class, + int block_width, + int block_height, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { int y, x; vector2d_t a_ofs = g_sao_edge_offsets[eo_class][0]; @@ -103,30 +109,32 @@ void kvz_calc_sao_edge_dir_generic(const kvz_pixel *orig_data, const kvz_pixel * } -void kvz_sao_reconstruct_color_generic(const encoder_control_t * const encoder, - const kvz_pixel *rec_data, kvz_pixel *new_rec_data, - const sao_info_t *sao, - int stride, int new_stride, - int block_width, int block_height, - color_t color_i) +static void sao_reconstruct_color_generic(const encoder_control_t * const encoder, + const kvz_pixel *rec_data, + kvz_pixel *new_rec_data, + const sao_info_t *sao, + int stride, + int new_stride, + int block_width, + int block_height, + color_t color_i) { - int y, x; // Arrays orig_data and rec_data are quarter size for chroma. int offset_v = color_i == COLOR_V ? 5 : 0; - if(sao->type == SAO_TYPE_BAND) { + if (sao->type == SAO_TYPE_BAND) { int offsets[1<eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; const kvz_pixel *c_data = &rec_data[y * stride + x]; @@ -144,9 +152,13 @@ void kvz_sao_reconstruct_color_generic(const encoder_control_t * const encoder, } -int kvz_sao_band_ddistortion_generic(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int band_pos, int sao_bands[4]) +static int sao_band_ddistortion_generic(const encoder_state_t * const state, + const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int band_pos, + int sao_bands[4]) { int y, x; int shift = state->encoder_control->bitdepth-5; @@ -174,11 +186,11 @@ int kvz_sao_band_ddistortion_generic(const encoder_state_t * const state, const int kvz_strategy_register_sao_generic(void* opaque, uint8_t bitdepth) { bool success = true; - - success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "generic", 0, &kvz_sao_edge_ddistortion_generic); - success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "generic", 0, &kvz_calc_sao_edge_dir_generic); - success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "generic", 0, &kvz_sao_reconstruct_color_generic); - success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "generic", 0, &kvz_sao_band_ddistortion_generic); + + success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "generic", 0, &sao_edge_ddistortion_generic); + success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "generic", 0, &calc_sao_edge_dir_generic); + success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "generic", 0, &sao_reconstruct_color_generic); + success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "generic", 0, &sao_band_ddistortion_generic); return success; }