Add a fast lane for byte-aligned bitstream writes

The CABAC engine only writes to the bitstream when it has a full byte.
These writes are also always byte-aligned, so there is no need to even
check for stream alignment.

Speedup was around 3% with ultrafast and low QP.
This commit is contained in:
Ari Koivula 2016-12-23 16:12:32 +02:00
parent deb63f735f
commit a9e45efcfc
3 changed files with 32 additions and 22 deletions

View file

@ -216,16 +216,34 @@ void kvz_bitstream_clear(bitstream_t *const stream)
kvz_bitstream_init(stream);
}
/**
* \brief Write a byte to a byte aligned bitstream
* \param stream stream the data is to be appended to
* \param data input data
*/
void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data)
{
assert(stream->cur_bit == 0);
const uint8_t emulation_prevention_three_byte = 0x03;
if ((stream->zerocount == 2) && (data < 4)) {
kvz_bitstream_writebyte(stream, emulation_prevention_three_byte);
stream->zerocount = 0;
}
stream->zerocount = data == 0 ? stream->zerocount + 1 : 0;
kvz_bitstream_writebyte(stream, data);
}
/**
* \brief Write bits to bitstream
* \param stream pointer bitstream to put the data
* \param data input data
* \param bits number of bits to write from data to stream
* Buffers individual bits untill they make a full byte.
* \param stream stream the data is to be appended to
* \param data input data
* \param bits number of bits to write from data to stream
*/
void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits)
{
const uint8_t emulation_prevention_three_byte = 0x03;
while(bits--) {
while (bits--) {
stream->data <<= 1;
if (data & kvz_bit_set_mask[bits]) {
@ -234,18 +252,9 @@ void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t b
stream->cur_bit++;
// write byte to output
if (stream->cur_bit==8) {
if (stream->cur_bit == 8) {
stream->cur_bit = 0;
if((stream->zerocount == 2) && (stream->data < 4)) {
kvz_bitstream_writebyte(stream, emulation_prevention_three_byte);
stream->zerocount = 0;
}
if(stream->data == 0) {
stream->zerocount++;
} else {
stream->zerocount = 0;
}
kvz_bitstream_writebyte(stream, stream->data);
kvz_bitstream_put_byte(stream, stream->data);
}
}
}

View file

@ -77,6 +77,7 @@ void kvz_bitstream_move(bitstream_t *dst, bitstream_t *src);
void kvz_bitstream_clear(bitstream_t *stream);
void kvz_bitstream_put(bitstream_t *stream, uint32_t data, uint8_t bits);
void kvz_bitstream_put_byte(bitstream_t *const stream, const uint32_t data);
/* Use macros to force inlining */
#define bitstream_put_ue(stream, data) { kvz_bitstream_put(stream,kvz_g_exp_table[data].value,kvz_g_exp_table[data].len); }
#define bitstream_put_se(stream, data) { uint32_t index=(uint32_t)(((data)<=0)?(-(data))<<1:((data)<<1)-1); \

View file

@ -141,11 +141,11 @@ void kvz_cabac_write(cabac_data_t * const data)
uint32_t carry = lead_byte >> 8;
uint32_t byte = data->buffered_byte + carry;
data->buffered_byte = lead_byte & 0xff;
kvz_bitstream_put(data->stream, byte, 8);
kvz_bitstream_put_byte(data->stream, byte);
byte = (0xff + carry) & 0xff;
while (data->num_buffered_bytes > 1) {
kvz_bitstream_put(data->stream, byte, 8);
kvz_bitstream_put_byte(data->stream, byte);
data->num_buffered_bytes--;
}
} else {
@ -163,18 +163,18 @@ void kvz_cabac_finish(cabac_data_t * const data)
assert(data->bits_left <= 32);
if (data->low >> (32 - data->bits_left)) {
kvz_bitstream_put(data->stream,data->buffered_byte + 1, 8);
kvz_bitstream_put_byte(data->stream, data->buffered_byte + 1);
while (data->num_buffered_bytes > 1) {
kvz_bitstream_put(data->stream, 0, 8);
kvz_bitstream_put_byte(data->stream, 0);
data->num_buffered_bytes--;
}
data->low -= 1 << (32 - data->bits_left);
} else {
if (data->num_buffered_bytes > 0) {
kvz_bitstream_put(data->stream,data->buffered_byte, 8);
kvz_bitstream_put_byte(data->stream, data->buffered_byte);
}
while (data->num_buffered_bytes > 1) {
kvz_bitstream_put(data->stream, 0xff, 8);
kvz_bitstream_put_byte(data->stream, 0xff);
data->num_buffered_bytes--;
}
}