mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-18 03:04:06 +00:00
Add a fast lane for byte-aligned bitstream writes
The CABAC engine only writes to the bitstream when it has a full byte. These writes are also always byte-aligned, so there is no need to even check for stream alignment. Speedup was around 3% with ultrafast and low QP.
This commit is contained in:
parent
deb63f735f
commit
a9e45efcfc
|
@ -216,16 +216,34 @@ void kvz_bitstream_clear(bitstream_t *const stream)
|
||||||
kvz_bitstream_init(stream);
|
kvz_bitstream_init(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Write a byte to a byte aligned bitstream
|
||||||
|
* \param stream stream the data is to be appended to
|
||||||
|
* \param data input data
|
||||||
|
*/
|
||||||
|
void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data)
|
||||||
|
{
|
||||||
|
assert(stream->cur_bit == 0);
|
||||||
|
const uint8_t emulation_prevention_three_byte = 0x03;
|
||||||
|
|
||||||
|
if ((stream->zerocount == 2) && (data < 4)) {
|
||||||
|
kvz_bitstream_writebyte(stream, emulation_prevention_three_byte);
|
||||||
|
stream->zerocount = 0;
|
||||||
|
}
|
||||||
|
stream->zerocount = data == 0 ? stream->zerocount + 1 : 0;
|
||||||
|
kvz_bitstream_writebyte(stream, data);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Write bits to bitstream
|
* \brief Write bits to bitstream
|
||||||
* \param stream pointer bitstream to put the data
|
* Buffers individual bits untill they make a full byte.
|
||||||
* \param data input data
|
* \param stream stream the data is to be appended to
|
||||||
* \param bits number of bits to write from data to stream
|
* \param data input data
|
||||||
|
* \param bits number of bits to write from data to stream
|
||||||
*/
|
*/
|
||||||
void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits)
|
void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits)
|
||||||
{
|
{
|
||||||
const uint8_t emulation_prevention_three_byte = 0x03;
|
while (bits--) {
|
||||||
while(bits--) {
|
|
||||||
stream->data <<= 1;
|
stream->data <<= 1;
|
||||||
|
|
||||||
if (data & kvz_bit_set_mask[bits]) {
|
if (data & kvz_bit_set_mask[bits]) {
|
||||||
|
@ -234,18 +252,9 @@ void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t b
|
||||||
stream->cur_bit++;
|
stream->cur_bit++;
|
||||||
|
|
||||||
// write byte to output
|
// write byte to output
|
||||||
if (stream->cur_bit==8) {
|
if (stream->cur_bit == 8) {
|
||||||
stream->cur_bit = 0;
|
stream->cur_bit = 0;
|
||||||
if((stream->zerocount == 2) && (stream->data < 4)) {
|
kvz_bitstream_put_byte(stream, stream->data);
|
||||||
kvz_bitstream_writebyte(stream, emulation_prevention_three_byte);
|
|
||||||
stream->zerocount = 0;
|
|
||||||
}
|
|
||||||
if(stream->data == 0) {
|
|
||||||
stream->zerocount++;
|
|
||||||
} else {
|
|
||||||
stream->zerocount = 0;
|
|
||||||
}
|
|
||||||
kvz_bitstream_writebyte(stream, stream->data);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,6 +77,7 @@ void kvz_bitstream_move(bitstream_t *dst, bitstream_t *src);
|
||||||
void kvz_bitstream_clear(bitstream_t *stream);
|
void kvz_bitstream_clear(bitstream_t *stream);
|
||||||
|
|
||||||
void kvz_bitstream_put(bitstream_t *stream, uint32_t data, uint8_t bits);
|
void kvz_bitstream_put(bitstream_t *stream, uint32_t data, uint8_t bits);
|
||||||
|
void kvz_bitstream_put_byte(bitstream_t *const stream, const uint32_t data);
|
||||||
/* Use macros to force inlining */
|
/* Use macros to force inlining */
|
||||||
#define bitstream_put_ue(stream, data) { kvz_bitstream_put(stream,kvz_g_exp_table[data].value,kvz_g_exp_table[data].len); }
|
#define bitstream_put_ue(stream, data) { kvz_bitstream_put(stream,kvz_g_exp_table[data].value,kvz_g_exp_table[data].len); }
|
||||||
#define bitstream_put_se(stream, data) { uint32_t index=(uint32_t)(((data)<=0)?(-(data))<<1:((data)<<1)-1); \
|
#define bitstream_put_se(stream, data) { uint32_t index=(uint32_t)(((data)<=0)?(-(data))<<1:((data)<<1)-1); \
|
||||||
|
|
12
src/cabac.c
12
src/cabac.c
|
@ -141,11 +141,11 @@ void kvz_cabac_write(cabac_data_t * const data)
|
||||||
uint32_t carry = lead_byte >> 8;
|
uint32_t carry = lead_byte >> 8;
|
||||||
uint32_t byte = data->buffered_byte + carry;
|
uint32_t byte = data->buffered_byte + carry;
|
||||||
data->buffered_byte = lead_byte & 0xff;
|
data->buffered_byte = lead_byte & 0xff;
|
||||||
kvz_bitstream_put(data->stream, byte, 8);
|
kvz_bitstream_put_byte(data->stream, byte);
|
||||||
|
|
||||||
byte = (0xff + carry) & 0xff;
|
byte = (0xff + carry) & 0xff;
|
||||||
while (data->num_buffered_bytes > 1) {
|
while (data->num_buffered_bytes > 1) {
|
||||||
kvz_bitstream_put(data->stream, byte, 8);
|
kvz_bitstream_put_byte(data->stream, byte);
|
||||||
data->num_buffered_bytes--;
|
data->num_buffered_bytes--;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -163,18 +163,18 @@ void kvz_cabac_finish(cabac_data_t * const data)
|
||||||
assert(data->bits_left <= 32);
|
assert(data->bits_left <= 32);
|
||||||
|
|
||||||
if (data->low >> (32 - data->bits_left)) {
|
if (data->low >> (32 - data->bits_left)) {
|
||||||
kvz_bitstream_put(data->stream,data->buffered_byte + 1, 8);
|
kvz_bitstream_put_byte(data->stream, data->buffered_byte + 1);
|
||||||
while (data->num_buffered_bytes > 1) {
|
while (data->num_buffered_bytes > 1) {
|
||||||
kvz_bitstream_put(data->stream, 0, 8);
|
kvz_bitstream_put_byte(data->stream, 0);
|
||||||
data->num_buffered_bytes--;
|
data->num_buffered_bytes--;
|
||||||
}
|
}
|
||||||
data->low -= 1 << (32 - data->bits_left);
|
data->low -= 1 << (32 - data->bits_left);
|
||||||
} else {
|
} else {
|
||||||
if (data->num_buffered_bytes > 0) {
|
if (data->num_buffered_bytes > 0) {
|
||||||
kvz_bitstream_put(data->stream,data->buffered_byte, 8);
|
kvz_bitstream_put_byte(data->stream, data->buffered_byte);
|
||||||
}
|
}
|
||||||
while (data->num_buffered_bytes > 1) {
|
while (data->num_buffered_bytes > 1) {
|
||||||
kvz_bitstream_put(data->stream, 0xff, 8);
|
kvz_bitstream_put_byte(data->stream, 0xff);
|
||||||
data->num_buffered_bytes--;
|
data->num_buffered_bytes--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue