Revert "Further optimize coefficient coding"

This reverts commit 25462124f8.

That commit broke the bitstream. If it's not good enough to push on Friday
night, it's probably not good enough on Monday morning either.
This commit is contained in:
Ari Koivula 2015-12-07 15:04:14 +02:00
parent 865c86fef2
commit b32965925e

View file

@ -1613,9 +1613,12 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
uint8_t last_coeff_x = 0; uint8_t last_coeff_x = 0;
uint8_t last_coeff_y = 0; uint8_t last_coeff_y = 0;
int32_t i; int32_t i;
uint32_t sig_coeffgroup_flag[8 * 8] = { 0 }; uint32_t sig_coeffgroup_flag[64];
uint32_t num_nonzero = 0;
int32_t scan_pos_last = -1;
int32_t pos_last = 0; int32_t pos_last = 0;
int32_t shift = 4>>1;
int8_t be_valid = encoder->sign_hiding; int8_t be_valid = encoder->sign_hiding;
int32_t scan_pos_sig; int32_t scan_pos_sig;
int32_t last_scan_set; int32_t last_scan_set;
@ -1623,7 +1626,7 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
uint32_t blk_pos, pos_y, pos_x, sig, ctx_sig; uint32_t blk_pos, pos_y, pos_x, sig, ctx_sig;
// CONSTANTS // CONSTANTS
const uint32_t num_blk_side = width >> 2; const uint32_t num_blk_side = width >> shift;
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t *scan = const uint32_t *scan =
kvz_g_sig_last_scan[scan_mode][log2_block_size - 1]; kvz_g_sig_last_scan[scan_mode][log2_block_size - 1];
@ -1633,42 +1636,35 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]); cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]);
cabac_ctx_t *baseCtx = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0]) : cabac_ctx_t *baseCtx = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0]) :
&(cabac->ctx.cu_sig_model_chroma[0]); &(cabac->ctx.cu_sig_model_chroma[0]);
FILL(sig_coeffgroup_flag, 0);
// Scan all coeff groups to find out which of them have coeffs. // Count non-zero coeffs
// Populate sig_coeffgroup_flag with that info. for (i = 0; i < width * width; i+=4) {
unsigned sig_cg_cnt = 0;
for (int cg_y = 0; cg_y < width / 4; ++cg_y) { // Load 4 coeffs
for (int cg_x = 0; cg_x < width / 4; ++cg_x) { uint64_t packed = *(uint64_t*)(&coeff[i]);
unsigned cg_pos = cg_y * width * 4 + cg_x * 4; // Or bits from upper byte to lower
for (int coeff_row = 0; coeff_row < 4; ++coeff_row) { packed |= packed >> 8;
// Load four 16-bit coeffs and see if any of them are non-zero. // Zero upper byte for overflow
unsigned coeff_pos = cg_pos + coeff_row * width; packed &= 0x00FF00FF00FF00FF;
uint64_t four_coeffs = *(uint64_t*)(&coeff[coeff_pos]); // Any bits in lower byte results in overflow
if (four_coeffs) { packed += 0x00FF00FF00FF00FF;
++sig_cg_cnt; // Pick only overflow bits, overflow means there were bits in that coeff
unsigned cg_pos_y = cg_pos >> log2_block_size; packed &= 0x0100010001000100;
unsigned cg_pos_x = cg_pos - (cg_pos_y << log2_block_size); // Add bits of two coeffs via possible overflow
sig_coeffgroup_flag[(cg_pos_x >> 2) + (cg_pos_y >> 2) * num_blk_side] = 1; packed += 0x00FFFF0000FFFF00;
break; // Preserve only the two numbers of nonzero coeffs
} packed &= ~0x00FFFF0000FFFF00;
} // Add these numbers
} packed += (packed << 32);
// Shift to the right position and discard extra bits
packed >>= 56;
num_nonzero += packed;
} }
// Rest of the code assumes at least one non-zero coeff. // Transforms with no non-zero coefficients are indicated with CBFs.
assert(sig_cg_cnt > 0); assert(num_nonzero != 0);
// Find the last coeff group by going backwards in scan order.
unsigned scan_cg_last = num_blk_side * num_blk_side - 1;
while (!sig_coeffgroup_flag[scan_cg[scan_cg_last]]) {
--scan_cg_last;
}
// Find the last coeff by going backwards in scan order.
unsigned scan_coeff_last = scan_cg_last * 16 + 15;
while (!coeff[scan[scan_coeff_last]]) {
--scan_coeff_last;
}
// transform skip flag // transform skip flag
if(width == 4 && encoder->trskip_enable) { if(width == 4 && encoder->trskip_enable) {
@ -1676,6 +1672,23 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
CABAC_BIN(cabac, tr_skip, "transform_skip_flag"); CABAC_BIN(cabac, tr_skip, "transform_skip_flag");
} }
scan_pos_last = -1;
// Significance mapping
while (num_nonzero > 0) {
pos_last = scan[++scan_pos_last];
#define POSY (pos_last >> log2_block_size)
#define POSX (pos_last - ( POSY << log2_block_size ))
if (coeff[pos_last] != 0) {
sig_coeffgroup_flag[(num_blk_side * (POSY >> shift) + (POSX >> shift))] = 1;
}
num_nonzero -= (coeff[pos_last] != 0) ? 1 : 0;
#undef POSY
#undef POSX
}
last_coeff_x = pos_last & (width - 1); last_coeff_x = pos_last & (width - 1);
last_coeff_y = (uint8_t)(pos_last >> log2_block_size); last_coeff_y = (uint8_t)(pos_last >> log2_block_size);
@ -1683,8 +1696,8 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
kvz_encode_last_significant_xy(state, last_coeff_x, last_coeff_y, width, width, kvz_encode_last_significant_xy(state, last_coeff_x, last_coeff_y, width, width,
type, scan_mode); type, scan_mode);
scan_pos_sig = scan_coeff_last; scan_pos_sig = scan_pos_last;
last_scan_set = scan_cg_last; last_scan_set = (scan_pos_last >> 4);
// significant_coeff_flag // significant_coeff_flag
for (i = last_scan_set; i >= 0; i--) { for (i = last_scan_set; i >= 0; i--) {
@ -1700,7 +1713,7 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
int32_t num_non_zero = 0; int32_t num_non_zero = 0;
go_rice_param = 0; go_rice_param = 0;
if (scan_pos_sig == scan_coeff_last) { if (scan_pos_sig == scan_pos_last) {
abs_coeff[0] = abs(coeff[pos_last]); abs_coeff[0] = abs(coeff[pos_last]);
coeff_signs = (coeff[pos_last] < 0); coeff_signs = (coeff[pos_last] < 0);
num_non_zero = 1; num_non_zero = 1;