mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-18 03:04:06 +00:00
Add a fast lane for byte-aligned bitstream writes
The CABAC engine only writes to the bitstream when it has a full byte. These writes are also always byte-aligned, so there is no need to even check for stream alignment. Speedup was around 3% with ultrafast and low QP.
This commit is contained in:
parent
deb63f735f
commit
a9e45efcfc
|
@ -216,16 +216,34 @@ void kvz_bitstream_clear(bitstream_t *const stream)
|
|||
kvz_bitstream_init(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Write a byte to a byte aligned bitstream
|
||||
* \param stream stream the data is to be appended to
|
||||
* \param data input data
|
||||
*/
|
||||
void kvz_bitstream_put_byte(bitstream_t *const stream, uint32_t data)
|
||||
{
|
||||
assert(stream->cur_bit == 0);
|
||||
const uint8_t emulation_prevention_three_byte = 0x03;
|
||||
|
||||
if ((stream->zerocount == 2) && (data < 4)) {
|
||||
kvz_bitstream_writebyte(stream, emulation_prevention_three_byte);
|
||||
stream->zerocount = 0;
|
||||
}
|
||||
stream->zerocount = data == 0 ? stream->zerocount + 1 : 0;
|
||||
kvz_bitstream_writebyte(stream, data);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Write bits to bitstream
|
||||
* \param stream pointer bitstream to put the data
|
||||
* \param data input data
|
||||
* \param bits number of bits to write from data to stream
|
||||
* Buffers individual bits untill they make a full byte.
|
||||
* \param stream stream the data is to be appended to
|
||||
* \param data input data
|
||||
* \param bits number of bits to write from data to stream
|
||||
*/
|
||||
void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t bits)
|
||||
{
|
||||
const uint8_t emulation_prevention_three_byte = 0x03;
|
||||
while(bits--) {
|
||||
while (bits--) {
|
||||
stream->data <<= 1;
|
||||
|
||||
if (data & kvz_bit_set_mask[bits]) {
|
||||
|
@ -234,18 +252,9 @@ void kvz_bitstream_put(bitstream_t *const stream, const uint32_t data, uint8_t b
|
|||
stream->cur_bit++;
|
||||
|
||||
// write byte to output
|
||||
if (stream->cur_bit==8) {
|
||||
if (stream->cur_bit == 8) {
|
||||
stream->cur_bit = 0;
|
||||
if((stream->zerocount == 2) && (stream->data < 4)) {
|
||||
kvz_bitstream_writebyte(stream, emulation_prevention_three_byte);
|
||||
stream->zerocount = 0;
|
||||
}
|
||||
if(stream->data == 0) {
|
||||
stream->zerocount++;
|
||||
} else {
|
||||
stream->zerocount = 0;
|
||||
}
|
||||
kvz_bitstream_writebyte(stream, stream->data);
|
||||
kvz_bitstream_put_byte(stream, stream->data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,6 +77,7 @@ void kvz_bitstream_move(bitstream_t *dst, bitstream_t *src);
|
|||
void kvz_bitstream_clear(bitstream_t *stream);
|
||||
|
||||
void kvz_bitstream_put(bitstream_t *stream, uint32_t data, uint8_t bits);
|
||||
void kvz_bitstream_put_byte(bitstream_t *const stream, const uint32_t data);
|
||||
/* Use macros to force inlining */
|
||||
#define bitstream_put_ue(stream, data) { kvz_bitstream_put(stream,kvz_g_exp_table[data].value,kvz_g_exp_table[data].len); }
|
||||
#define bitstream_put_se(stream, data) { uint32_t index=(uint32_t)(((data)<=0)?(-(data))<<1:((data)<<1)-1); \
|
||||
|
|
12
src/cabac.c
12
src/cabac.c
|
@ -141,11 +141,11 @@ void kvz_cabac_write(cabac_data_t * const data)
|
|||
uint32_t carry = lead_byte >> 8;
|
||||
uint32_t byte = data->buffered_byte + carry;
|
||||
data->buffered_byte = lead_byte & 0xff;
|
||||
kvz_bitstream_put(data->stream, byte, 8);
|
||||
kvz_bitstream_put_byte(data->stream, byte);
|
||||
|
||||
byte = (0xff + carry) & 0xff;
|
||||
while (data->num_buffered_bytes > 1) {
|
||||
kvz_bitstream_put(data->stream, byte, 8);
|
||||
kvz_bitstream_put_byte(data->stream, byte);
|
||||
data->num_buffered_bytes--;
|
||||
}
|
||||
} else {
|
||||
|
@ -163,18 +163,18 @@ void kvz_cabac_finish(cabac_data_t * const data)
|
|||
assert(data->bits_left <= 32);
|
||||
|
||||
if (data->low >> (32 - data->bits_left)) {
|
||||
kvz_bitstream_put(data->stream,data->buffered_byte + 1, 8);
|
||||
kvz_bitstream_put_byte(data->stream, data->buffered_byte + 1);
|
||||
while (data->num_buffered_bytes > 1) {
|
||||
kvz_bitstream_put(data->stream, 0, 8);
|
||||
kvz_bitstream_put_byte(data->stream, 0);
|
||||
data->num_buffered_bytes--;
|
||||
}
|
||||
data->low -= 1 << (32 - data->bits_left);
|
||||
} else {
|
||||
if (data->num_buffered_bytes > 0) {
|
||||
kvz_bitstream_put(data->stream,data->buffered_byte, 8);
|
||||
kvz_bitstream_put_byte(data->stream, data->buffered_byte);
|
||||
}
|
||||
while (data->num_buffered_bytes > 1) {
|
||||
kvz_bitstream_put(data->stream, 0xff, 8);
|
||||
kvz_bitstream_put_byte(data->stream, 0xff);
|
||||
data->num_buffered_bytes--;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue