diff --git a/src/encoderstate.c b/src/encoderstate.c index 904700ac..4897df15 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -1613,9 +1613,12 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t uint8_t last_coeff_x = 0; uint8_t last_coeff_y = 0; int32_t i; - uint32_t sig_coeffgroup_flag[8 * 8] = { 0 }; + uint32_t sig_coeffgroup_flag[64]; + uint32_t num_nonzero = 0; + int32_t scan_pos_last = -1; int32_t pos_last = 0; + int32_t shift = 4>>1; int8_t be_valid = encoder->sign_hiding; int32_t scan_pos_sig; int32_t last_scan_set; @@ -1623,7 +1626,7 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t uint32_t blk_pos, pos_y, pos_x, sig, ctx_sig; // CONSTANTS - const uint32_t num_blk_side = width >> 2; + const uint32_t num_blk_side = width >> shift; const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const uint32_t *scan = kvz_g_sig_last_scan[scan_mode][log2_block_size - 1]; @@ -1633,42 +1636,35 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]); cabac_ctx_t *baseCtx = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0]) : &(cabac->ctx.cu_sig_model_chroma[0]); + FILL(sig_coeffgroup_flag, 0); - // Scan all coeff groups to find out which of them have coeffs. - // Populate sig_coeffgroup_flag with that info. - unsigned sig_cg_cnt = 0; - for (int cg_y = 0; cg_y < width / 4; ++cg_y) { - for (int cg_x = 0; cg_x < width / 4; ++cg_x) { - unsigned cg_pos = cg_y * width * 4 + cg_x * 4; - for (int coeff_row = 0; coeff_row < 4; ++coeff_row) { - // Load four 16-bit coeffs and see if any of them are non-zero. - unsigned coeff_pos = cg_pos + coeff_row * width; - uint64_t four_coeffs = *(uint64_t*)(&coeff[coeff_pos]); - if (four_coeffs) { - ++sig_cg_cnt; - unsigned cg_pos_y = cg_pos >> log2_block_size; - unsigned cg_pos_x = cg_pos - (cg_pos_y << log2_block_size); - sig_coeffgroup_flag[(cg_pos_x >> 2) + (cg_pos_y >> 2) * num_blk_side] = 1; - break; - } - } - } + // Count non-zero coeffs + for (i = 0; i < width * width; i+=4) { + + // Load 4 coeffs + uint64_t packed = *(uint64_t*)(&coeff[i]); + // Or bits from upper byte to lower + packed |= packed >> 8; + // Zero upper byte for overflow + packed &= 0x00FF00FF00FF00FF; + // Any bits in lower byte results in overflow + packed += 0x00FF00FF00FF00FF; + // Pick only overflow bits, overflow means there were bits in that coeff + packed &= 0x0100010001000100; + // Add bits of two coeffs via possible overflow + packed += 0x00FFFF0000FFFF00; + // Preserve only the two numbers of nonzero coeffs + packed &= ~0x00FFFF0000FFFF00; + // Add these numbers + packed += (packed << 32); + // Shift to the right position and discard extra bits + packed >>= 56; + + num_nonzero += packed; } - // Rest of the code assumes at least one non-zero coeff. - assert(sig_cg_cnt > 0); - - // Find the last coeff group by going backwards in scan order. - unsigned scan_cg_last = num_blk_side * num_blk_side - 1; - while (!sig_coeffgroup_flag[scan_cg[scan_cg_last]]) { - --scan_cg_last; - } - - // Find the last coeff by going backwards in scan order. - unsigned scan_coeff_last = scan_cg_last * 16 + 15; - while (!coeff[scan[scan_coeff_last]]) { - --scan_coeff_last; - } + // Transforms with no non-zero coefficients are indicated with CBFs. + assert(num_nonzero != 0); // transform skip flag if(width == 4 && encoder->trskip_enable) { @@ -1676,6 +1672,23 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t CABAC_BIN(cabac, tr_skip, "transform_skip_flag"); } + scan_pos_last = -1; + + // Significance mapping + while (num_nonzero > 0) { + pos_last = scan[++scan_pos_last]; +#define POSY (pos_last >> log2_block_size) +#define POSX (pos_last - ( POSY << log2_block_size )) + + if (coeff[pos_last] != 0) { + sig_coeffgroup_flag[(num_blk_side * (POSY >> shift) + (POSX >> shift))] = 1; + } + + num_nonzero -= (coeff[pos_last] != 0) ? 1 : 0; + #undef POSY + #undef POSX + } + last_coeff_x = pos_last & (width - 1); last_coeff_y = (uint8_t)(pos_last >> log2_block_size); @@ -1683,8 +1696,8 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t kvz_encode_last_significant_xy(state, last_coeff_x, last_coeff_y, width, width, type, scan_mode); - scan_pos_sig = scan_coeff_last; - last_scan_set = scan_cg_last; + scan_pos_sig = scan_pos_last; + last_scan_set = (scan_pos_last >> 4); // significant_coeff_flag for (i = last_scan_set; i >= 0; i--) { @@ -1700,7 +1713,7 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t int32_t num_non_zero = 0; go_rice_param = 0; - if (scan_pos_sig == scan_coeff_last) { + if (scan_pos_sig == scan_pos_last) { abs_coeff[0] = abs(coeff[pos_last]); coeff_signs = (coeff[pos_last] < 0); num_non_zero = 1;