From a9e45efcfc82a0ee704357170d60df6fedc38d91 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Fri, 23 Dec 2016 16:12:32 +0200 Subject: [PATCH] Add a fast lane for byte-aligned bitstream writes The CABAC engine only writes to the bitstream when it has a full byte. These writes are also always byte-aligned, so there is no need to even check for stream alignment. Speedup was around 3% with ultrafast and low QP. --- src/bitstream.c | 41 +++++++++++++++++++++++++---------------- src/bitstream.h | 1 + src/cabac.c | 12 ++++++------ 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/bitstream.c b/src/bitstream.c index a07bc4bf..fda948c2 100644 --- a/src/bitstream.c +++ b/src/bitstream.c @@ -216,16 +216,34 @@ void kvz_bitstream_clear(bitstream_t *const stream) kvz_bitstream_init(stream); } +/** + * \brief Write a byte to a byte aligned bitstream + * \param stream stream the data is to be appended to + * \param data input data + */ +void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data) +{ + assert(stream->cur_bit == 0); + const uint8_t emulation_prevention_three_byte = 0x03; + + if ((stream->zerocount == 2) && (data < 4)) { + kvz_bitstream_writebyte(stream, emulation_prevention_three_byte); + stream->zerocount = 0; + } + stream->zerocount = data == 0 ? stream->zerocount + 1 : 0; + kvz_bitstream_writebyte(stream, data); +} + /** * \brief Write bits to bitstream - * \param stream pointer bitstream to put the data - * \param data input data - * \param bits number of bits to write from data to stream + * Buffers individual bits untill they make a full byte. + * \param stream stream the data is to be appended to + * \param data input data + * \param bits number of bits to write from data to stream */ void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits) { - const uint8_t emulation_prevention_three_byte = 0x03; - while(bits--) { + while (bits--) { stream->data <<= 1; if (data & kvz_bit_set_mask[bits]) { @@ -234,18 +252,9 @@ void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t b stream->cur_bit++; // write byte to output - if (stream->cur_bit==8) { + if (stream->cur_bit == 8) { stream->cur_bit = 0; - if((stream->zerocount == 2) && (stream->data < 4)) { - kvz_bitstream_writebyte(stream, emulation_prevention_three_byte); - stream->zerocount = 0; - } - if(stream->data == 0) { - stream->zerocount++; - } else { - stream->zerocount = 0; - } - kvz_bitstream_writebyte(stream, stream->data); + kvz_bitstream_put_byte(stream, stream->data); } } } diff --git a/src/bitstream.h b/src/bitstream.h index daa017f7..9b48098c 100644 --- a/src/bitstream.h +++ b/src/bitstream.h @@ -77,6 +77,7 @@ void kvz_bitstream_move(bitstream_t *dst, bitstream_t *src); void kvz_bitstream_clear(bitstream_t *stream); void kvz_bitstream_put(bitstream_t *stream, uint32_t data, uint8_t bits); +void kvz_bitstream_put_byte(bitstream_t *const stream, const uint32_t data); /* Use macros to force inlining */ #define bitstream_put_ue(stream, data) { kvz_bitstream_put(stream,kvz_g_exp_table[data].value,kvz_g_exp_table[data].len); } #define bitstream_put_se(stream, data) { uint32_t index=(uint32_t)(((data)<=0)?(-(data))<<1:((data)<<1)-1); \ diff --git a/src/cabac.c b/src/cabac.c index c4b639c9..893a8245 100644 --- a/src/cabac.c +++ b/src/cabac.c @@ -141,11 +141,11 @@ void kvz_cabac_write(cabac_data_t * const data) uint32_t carry = lead_byte >> 8; uint32_t byte = data->buffered_byte + carry; data->buffered_byte = lead_byte & 0xff; - kvz_bitstream_put(data->stream, byte, 8); + kvz_bitstream_put_byte(data->stream, byte); byte = (0xff + carry) & 0xff; while (data->num_buffered_bytes > 1) { - kvz_bitstream_put(data->stream, byte, 8); + kvz_bitstream_put_byte(data->stream, byte); data->num_buffered_bytes--; } } else { @@ -163,18 +163,18 @@ void kvz_cabac_finish(cabac_data_t * const data) assert(data->bits_left <= 32); if (data->low >> (32 - data->bits_left)) { - kvz_bitstream_put(data->stream,data->buffered_byte + 1, 8); + kvz_bitstream_put_byte(data->stream, data->buffered_byte + 1); while (data->num_buffered_bytes > 1) { - kvz_bitstream_put(data->stream, 0, 8); + kvz_bitstream_put_byte(data->stream, 0); data->num_buffered_bytes--; } data->low -= 1 << (32 - data->bits_left); } else { if (data->num_buffered_bytes > 0) { - kvz_bitstream_put(data->stream,data->buffered_byte, 8); + kvz_bitstream_put_byte(data->stream, data->buffered_byte); } while (data->num_buffered_bytes > 1) { - kvz_bitstream_put(data->stream, 0xff, 8); + kvz_bitstream_put_byte(data->stream, 0xff); data->num_buffered_bytes--; } }