mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
Further optimize coefficient coding
Remove the need to count the coefficients by populating the significant coefficient group map first and finding the last coefficient from the last group afterward. The speedup is about 2% on ultrafast. The previous version of this patch was reverted due to a bug, which has now been fixed.
This commit is contained in:
parent
b78460b02c
commit
3a80c7de74
|
@ -1613,20 +1613,15 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
|
||||||
uint8_t last_coeff_x = 0;
|
uint8_t last_coeff_x = 0;
|
||||||
uint8_t last_coeff_y = 0;
|
uint8_t last_coeff_y = 0;
|
||||||
int32_t i;
|
int32_t i;
|
||||||
uint32_t sig_coeffgroup_flag[64];
|
uint32_t sig_coeffgroup_flag[8 * 8] = { 0 };
|
||||||
|
|
||||||
uint32_t num_nonzero = 0;
|
|
||||||
int32_t scan_pos_last = -1;
|
|
||||||
int32_t pos_last = 0;
|
|
||||||
int32_t shift = 4>>1;
|
|
||||||
int8_t be_valid = encoder->sign_hiding;
|
int8_t be_valid = encoder->sign_hiding;
|
||||||
int32_t scan_pos_sig;
|
int32_t scan_pos_sig;
|
||||||
int32_t last_scan_set;
|
|
||||||
uint32_t go_rice_param = 0;
|
uint32_t go_rice_param = 0;
|
||||||
uint32_t blk_pos, pos_y, pos_x, sig, ctx_sig;
|
uint32_t blk_pos, pos_y, pos_x, sig, ctx_sig;
|
||||||
|
|
||||||
// CONSTANTS
|
// CONSTANTS
|
||||||
const uint32_t num_blk_side = width >> shift;
|
const uint32_t num_blk_side = width >> TR_MIN_LOG2_SIZE;
|
||||||
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
|
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
|
||||||
const uint32_t *scan =
|
const uint32_t *scan =
|
||||||
kvz_g_sig_last_scan[scan_mode][log2_block_size - 1];
|
kvz_g_sig_last_scan[scan_mode][log2_block_size - 1];
|
||||||
|
@ -1636,35 +1631,44 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
|
||||||
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]);
|
cabac_ctx_t *base_coeff_group_ctx = &(cabac->ctx.cu_sig_coeff_group_model[type]);
|
||||||
cabac_ctx_t *baseCtx = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0]) :
|
cabac_ctx_t *baseCtx = (type == 0) ? &(cabac->ctx.cu_sig_model_luma[0]) :
|
||||||
&(cabac->ctx.cu_sig_model_chroma[0]);
|
&(cabac->ctx.cu_sig_model_chroma[0]);
|
||||||
FILL(sig_coeffgroup_flag, 0);
|
|
||||||
|
|
||||||
// Count non-zero coeffs
|
// Scan all coeff groups to find out which of them have coeffs.
|
||||||
for (i = 0; i < width * width; i+=4) {
|
// Populate sig_coeffgroup_flag with that info.
|
||||||
|
unsigned sig_cg_cnt = 0;
|
||||||
// Load 4 coeffs
|
for (int cg_y = 0; cg_y < width / 4; ++cg_y) {
|
||||||
uint64_t packed = *(uint64_t*)(&coeff[i]);
|
for (int cg_x = 0; cg_x < width / 4; ++cg_x) {
|
||||||
// Or bits from upper byte to lower
|
unsigned cg_pos = cg_y * width * 4 + cg_x * 4;
|
||||||
packed |= packed >> 8;
|
for (int coeff_row = 0; coeff_row < 4; ++coeff_row) {
|
||||||
// Zero upper byte for overflow
|
// Load four 16-bit coeffs and see if any of them are non-zero.
|
||||||
packed &= 0x00FF00FF00FF00FF;
|
unsigned coeff_pos = cg_pos + coeff_row * width;
|
||||||
// Any bits in lower byte results in overflow
|
uint64_t four_coeffs = *(uint64_t*)(&coeff[coeff_pos]);
|
||||||
packed += 0x00FF00FF00FF00FF;
|
if (four_coeffs) {
|
||||||
// Pick only overflow bits, overflow means there were bits in that coeff
|
++sig_cg_cnt;
|
||||||
packed &= 0x0100010001000100;
|
unsigned cg_pos_y = (cg_pos >> log2_block_size) >> TR_MIN_LOG2_SIZE;
|
||||||
// Add bits of two coeffs via possible overflow
|
unsigned cg_pos_x = (cg_pos & (width - 1)) >> TR_MIN_LOG2_SIZE;
|
||||||
packed += 0x00FFFF0000FFFF00;
|
sig_coeffgroup_flag[cg_pos_x + cg_pos_y * num_blk_side] = 1;
|
||||||
// Preserve only the two numbers of nonzero coeffs
|
break;
|
||||||
packed &= ~0x00FFFF0000FFFF00;
|
}
|
||||||
// Add these numbers
|
}
|
||||||
packed += (packed << 32);
|
}
|
||||||
// Shift to the right position and discard extra bits
|
|
||||||
packed >>= 56;
|
|
||||||
|
|
||||||
num_nonzero += packed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transforms with no non-zero coefficients are indicated with CBFs.
|
// Rest of the code assumes at least one non-zero coeff.
|
||||||
assert(num_nonzero != 0);
|
assert(sig_cg_cnt > 0);
|
||||||
|
|
||||||
|
// Find the last coeff group by going backwards in scan order.
|
||||||
|
unsigned scan_cg_last = num_blk_side * num_blk_side - 1;
|
||||||
|
while (!sig_coeffgroup_flag[scan_cg[scan_cg_last]]) {
|
||||||
|
--scan_cg_last;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the last coeff by going backwards in scan order.
|
||||||
|
unsigned scan_pos_last = scan_cg_last * 16 + 15;
|
||||||
|
while (!coeff[scan[scan_pos_last]]) {
|
||||||
|
--scan_pos_last;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pos_last = scan[scan_pos_last];
|
||||||
|
|
||||||
// transform skip flag
|
// transform skip flag
|
||||||
if(width == 4 && encoder->trskip_enable) {
|
if(width == 4 && encoder->trskip_enable) {
|
||||||
|
@ -1672,23 +1676,6 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
|
||||||
CABAC_BIN(cabac, tr_skip, "transform_skip_flag");
|
CABAC_BIN(cabac, tr_skip, "transform_skip_flag");
|
||||||
}
|
}
|
||||||
|
|
||||||
scan_pos_last = -1;
|
|
||||||
|
|
||||||
// Significance mapping
|
|
||||||
while (num_nonzero > 0) {
|
|
||||||
pos_last = scan[++scan_pos_last];
|
|
||||||
#define POSY (pos_last >> log2_block_size)
|
|
||||||
#define POSX (pos_last - ( POSY << log2_block_size ))
|
|
||||||
|
|
||||||
if (coeff[pos_last] != 0) {
|
|
||||||
sig_coeffgroup_flag[(num_blk_side * (POSY >> shift) + (POSX >> shift))] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
num_nonzero -= (coeff[pos_last] != 0) ? 1 : 0;
|
|
||||||
#undef POSY
|
|
||||||
#undef POSX
|
|
||||||
}
|
|
||||||
|
|
||||||
last_coeff_x = pos_last & (width - 1);
|
last_coeff_x = pos_last & (width - 1);
|
||||||
last_coeff_y = (uint8_t)(pos_last >> log2_block_size);
|
last_coeff_y = (uint8_t)(pos_last >> log2_block_size);
|
||||||
|
|
||||||
|
@ -1697,10 +1684,9 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
|
||||||
type, scan_mode);
|
type, scan_mode);
|
||||||
|
|
||||||
scan_pos_sig = scan_pos_last;
|
scan_pos_sig = scan_pos_last;
|
||||||
last_scan_set = (scan_pos_last >> 4);
|
|
||||||
|
|
||||||
// significant_coeff_flag
|
// significant_coeff_flag
|
||||||
for (i = last_scan_set; i >= 0; i--) {
|
for (i = scan_cg_last; i >= 0; i--) {
|
||||||
int32_t sub_pos = i << 4; // LOG2_SCAN_SET_SIZE;
|
int32_t sub_pos = i << 4; // LOG2_SCAN_SET_SIZE;
|
||||||
int32_t abs_coeff[16];
|
int32_t abs_coeff[16];
|
||||||
int32_t cg_blk_pos = scan_cg[i];
|
int32_t cg_blk_pos = scan_cg[i];
|
||||||
|
@ -1722,7 +1708,7 @@ void kvz_encode_coeff_nxn(encoder_state_t * const state, coeff_t *coeff, uint8_t
|
||||||
scan_pos_sig--;
|
scan_pos_sig--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == last_scan_set || i == 0) {
|
if (i == scan_cg_last || i == 0) {
|
||||||
sig_coeffgroup_flag[cg_blk_pos] = 1;
|
sig_coeffgroup_flag[cg_blk_pos] = 1;
|
||||||
} else {
|
} else {
|
||||||
uint32_t sig_coeff_group = (sig_coeffgroup_flag[cg_blk_pos] != 0);
|
uint32_t sig_coeff_group = (sig_coeffgroup_flag[cg_blk_pos] != 0);
|
||||||
|
|
Loading…
Reference in a new issue