Iterable structure for sub-encoders (more flexibility)

This commit is contained in:
Laurent Fasnacht 2014-05-01 09:45:37 +02:00
parent 212d390003
commit b8b28635ff

View file

@ -412,8 +412,9 @@ int encoder_state_init(encoder_state * const encoder_state, const encoder_contro
#if USE_TILES #if USE_TILES
if (encoder->tiles_enable) { if (encoder->tiles_enable) {
int x,y; int x,y;
//Allocate subencoders //Allocate subencoders (valid subencoder have a non null encoder_control field, so we use a null one to mark the end of the list)
encoder_state->children = MALLOC(struct encoder_state, encoder->tiles_num_tile_columns * encoder->tiles_num_tile_rows); encoder_state->children = MALLOC(struct encoder_state, encoder->tiles_num_tile_columns * encoder->tiles_num_tile_rows + 1);
encoder_state->children[encoder->tiles_num_tile_columns * encoder->tiles_num_tile_rows].encoder_control = NULL;
for (y=0; y < encoder->tiles_num_tile_rows; ++y) { for (y=0; y < encoder->tiles_num_tile_rows; ++y) {
for (x=0; x < encoder->tiles_num_tile_columns; ++x) { for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
const int i = y * encoder->tiles_num_tile_columns + x; const int i = y * encoder->tiles_num_tile_columns + x;
@ -441,19 +442,14 @@ static int encoder_state_finalize_one(encoder_state * const encoder_state) {
int encoder_state_finalize(encoder_state * const encoder_state) { int encoder_state_finalize(encoder_state * const encoder_state) {
#if USE_TILES if (encoder_state->children) {
const encoder_control * const encoder = encoder_state->encoder_control; int i=0;
if (encoder->tiles_enable) { do {
int x,y; encoder_state_finalize_one(&encoder_state->children[i]);
for (y=0; y < encoder->tiles_num_tile_rows; ++y) { } while (encoder_state->children[++i].encoder_control);
for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
const int i = y * encoder->tiles_num_tile_columns + x;
encoder_state_finalize_one(&encoder_state->children[i]);
}
}
FREE_POINTER(encoder_state->children); FREE_POINTER(encoder_state->children);
} }
#endif //USE_TILES
encoder_state_finalize_one(encoder_state); encoder_state_finalize_one(encoder_state);
picture_list_destroy(encoder_state->ref); picture_list_destroy(encoder_state->ref);
@ -461,28 +457,18 @@ int encoder_state_finalize(encoder_state * const encoder_state) {
} }
static void encoder_clear_refs(encoder_state *encoder_state) { static void encoder_clear_refs(encoder_state *encoder_state) {
const encoder_control * const encoder = encoder_state->encoder_control; if (encoder_state->children) {
int i=0;
do {
encoder_state->children[i].poc = 0;
} while (encoder_state->children[++i].encoder_control);
}
while (encoder_state->ref->used_size) { while (encoder_state->ref->used_size) {
picture_list_rem(encoder_state->ref, encoder_state->ref->used_size - 1, 1); picture_list_rem(encoder_state->ref, encoder_state->ref->used_size - 1, 1);
} }
encoder_state->poc = 0; encoder_state->poc = 0;
if (USE_TILES && encoder->tiles_enable) {
#if USE_TILES
if (encoder_state->children) {
int x,y;
//Allocate subencoders
for (y=0; y < encoder->tiles_num_tile_rows; ++y) {
for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
const int i = y * encoder->tiles_num_tile_columns + x;
encoder_state->children[i].poc = 0;
}
}
}
#endif //USE_TILES
}
} }
void encoder_control_input_init(encoder_control * const encoder, void encoder_control_input_init(encoder_control * const encoder,
@ -651,14 +637,14 @@ static void substream_encode(encoder_state * const encoder_state, const int last
dealloc_yuv_t(ver_buf); dealloc_yuv_t(ver_buf);
} }
void encode_one_frame(encoder_state * const encoder_state) void encode_one_frame(encoder_state * const main_state)
{ {
const encoder_control * const encoder = encoder_state->encoder_control; const encoder_control * const encoder = main_state->encoder_control;
bitstream * const stream = &encoder_state->stream; bitstream * const stream = &main_state->stream;
const int is_first_frame = (encoder_state->frame == 0); const int is_first_frame = (main_state->frame == 0);
const int is_i_radl = (encoder->cfg->intra_period == 1 && encoder_state->frame % 2 == 0); const int is_i_radl = (encoder->cfg->intra_period == 1 && main_state->frame % 2 == 0);
const int is_p_radl = (encoder->cfg->intra_period > 1 && (encoder_state->frame % encoder->cfg->intra_period) == 0); const int is_p_radl = (encoder->cfg->intra_period > 1 && (main_state->frame % encoder->cfg->intra_period) == 0);
const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl; const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl;
@ -668,44 +654,44 @@ void encode_one_frame(encoder_state * const encoder_state)
**/ **/
if (is_radl_frame) { if (is_radl_frame) {
// Clear the reference list // Clear the reference list
encoder_clear_refs(encoder_state); encoder_clear_refs(main_state);
encoder_state->cur_pic->slicetype = SLICE_I; main_state->cur_pic->slicetype = SLICE_I;
encoder_state->cur_pic->type = NAL_IDR_W_RADL; main_state->cur_pic->type = NAL_IDR_W_RADL;
// Access Unit Delimiter (AUD) // Access Unit Delimiter (AUD)
if (encoder->aud_enable) if (encoder->aud_enable)
write_aud(encoder_state); write_aud(main_state);
// Video Parameter Set (VPS) // Video Parameter Set (VPS)
nal_write(stream, NAL_VPS_NUT, 0, 1); nal_write(stream, NAL_VPS_NUT, 0, 1);
encode_vid_parameter_set(encoder_state); encode_vid_parameter_set(main_state);
bitstream_align(stream); bitstream_align(stream);
// Sequence Parameter Set (SPS) // Sequence Parameter Set (SPS)
nal_write(stream, NAL_SPS_NUT, 0, 1); nal_write(stream, NAL_SPS_NUT, 0, 1);
encode_seq_parameter_set(encoder_state); encode_seq_parameter_set(main_state);
bitstream_align(stream); bitstream_align(stream);
// Picture Parameter Set (PPS) // Picture Parameter Set (PPS)
nal_write(stream, NAL_PPS_NUT, 0, 1); nal_write(stream, NAL_PPS_NUT, 0, 1);
encode_pic_parameter_set(encoder_state); encode_pic_parameter_set(main_state);
bitstream_align(stream); bitstream_align(stream);
if (encoder_state->frame == 0) { if (main_state->frame == 0) {
// Prefix SEI // Prefix SEI
nal_write(stream, PREFIX_SEI_NUT, 0, 0); nal_write(stream, PREFIX_SEI_NUT, 0, 0);
encode_prefix_sei_version(encoder_state); encode_prefix_sei_version(main_state);
bitstream_align(stream); bitstream_align(stream);
} }
} else { } else {
// When intra period == 1, all pictures are intra // When intra period == 1, all pictures are intra
encoder_state->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P; main_state->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P;
encoder_state->cur_pic->type = NAL_TRAIL_R; main_state->cur_pic->type = NAL_TRAIL_R;
// Access Unit Delimiter (AUD) // Access Unit Delimiter (AUD)
if (encoder->aud_enable) if (encoder->aud_enable)
write_aud(encoder_state); write_aud(main_state);
} }
{ {
@ -717,77 +703,75 @@ void encode_one_frame(encoder_state * const encoder_state)
is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code); is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code);
} }
encode_slice_header(encoder_state); encode_slice_header(main_state);
bitstream_align(&encoder_state->stream); bitstream_align(&main_state->stream);
if (USE_TILES && encoder->tiles_enable) { if (USE_TILES && encoder->tiles_enable) {
#if USE_TILES #if USE_TILES
int x,y; int i;
//This can be parallelized //This can be parallelized, we don't use a do...while loop because we use OpenMP
#pragma omp parallel for private(x,y) collapse(2) #pragma omp parallel for
for (y=0; y < encoder->tiles_num_tile_rows; ++y) { for (i = 0; i < encoder->tiles_num_tile_rows * encoder->tiles_num_tile_columns; ++i) {
for (x=0; x < encoder->tiles_num_tile_columns; ++x) { encoder_state *subencoder = &(main_state->children[i]);
const int tile_width_in_lcu = encoder->tiles_col_bd[x+1]-encoder->tiles_col_bd[x];
const int tile_height_in_lcu = encoder->tiles_row_bd[y+1]-encoder->tiles_row_bd[y]; const int tile_width_in_lcu = subencoder->cur_pic->width_in_lcu;
const int tile_offset_x = encoder->tiles_col_bd[x] * LCU_WIDTH; const int tile_height_in_lcu = subencoder->cur_pic->height_in_lcu;
const int tile_offset_y = encoder->tiles_row_bd[y] * LCU_WIDTH; const int tile_offset_x = subencoder->lcu_offset_x * LCU_WIDTH;
const int tile_width = MIN(tile_width_in_lcu * LCU_WIDTH, encoder->in.width - tile_offset_x); const int tile_offset_y = subencoder->lcu_offset_y * LCU_WIDTH;
const int tile_height = MIN(tile_height_in_lcu * LCU_WIDTH, encoder->in.height - tile_offset_y); const int tile_width = MIN(tile_width_in_lcu * LCU_WIDTH, encoder->in.width - tile_offset_x);
const int tile_offset_full = tile_offset_x+tile_offset_y*encoder_state->cur_pic->width; const int tile_height = MIN(tile_height_in_lcu * LCU_WIDTH, encoder->in.height - tile_offset_y);
const int tile_offset_half = tile_offset_x/2+tile_offset_y/2*encoder_state->cur_pic->width/2;
const int i = y * encoder->tiles_num_tile_columns + x; const int tile_offset_full = tile_offset_x+tile_offset_y*main_state->cur_pic->width;
const int tile_offset_half = tile_offset_x/2+tile_offset_y/2*main_state->cur_pic->width/2;
//TODO: ref frames //TODO: ref frames
subencoder->QP = main_state->QP;
encoder_state->children[i].QP = encoder_state->QP; picture_blit_pixels(main_state->cur_pic->y_data + tile_offset_full, subencoder->cur_pic->y_data, tile_width, tile_height, main_state->cur_pic->width, tile_width);
picture_blit_pixels(main_state->cur_pic->u_data + tile_offset_half, subencoder->cur_pic->u_data, tile_width/2, tile_height/2, main_state->cur_pic->width/2, tile_width/2);
picture_blit_pixels(encoder_state->cur_pic->y_data + tile_offset_full, encoder_state->children[i].cur_pic->y_data, tile_width, tile_height, encoder_state->cur_pic->width, tile_width); picture_blit_pixels(main_state->cur_pic->v_data + tile_offset_half, subencoder->cur_pic->v_data, tile_width/2, tile_height/2, main_state->cur_pic->width/2, tile_width/2);
picture_blit_pixels(encoder_state->cur_pic->u_data + tile_offset_half, encoder_state->children[i].cur_pic->u_data, tile_width/2, tile_height/2, encoder_state->cur_pic->width/2, tile_width/2);
picture_blit_pixels(encoder_state->cur_pic->v_data + tile_offset_half, encoder_state->children[i].cur_pic->v_data, tile_width/2, tile_height/2, encoder_state->cur_pic->width/2, tile_width/2); subencoder->cur_pic->slicetype = main_state->cur_pic->slicetype;
subencoder->cur_pic->type = main_state->cur_pic->type;
encoder_state->children[i].cur_pic->slicetype = encoder_state->cur_pic->slicetype;
encoder_state->children[i].cur_pic->type = encoder_state->cur_pic->type; substream_encode(subencoder, !(main_state->children[i+1].encoder_control));
substream_encode(&encoder_state->children[i], x == (encoder->tiles_num_tile_columns-1) && y == (encoder->tiles_num_tile_rows-1)); picture_blit_pixels(subencoder->cur_pic->y_recdata, main_state->cur_pic->y_recdata + tile_offset_full, tile_width, tile_height, tile_width, main_state->cur_pic->width);
picture_blit_pixels(subencoder->cur_pic->u_recdata, main_state->cur_pic->u_recdata + tile_offset_half, tile_width/2, tile_height/2, tile_width/2, main_state->cur_pic->width/2);
picture_blit_pixels(encoder_state->children[i].cur_pic->y_recdata, encoder_state->cur_pic->y_recdata + tile_offset_full, tile_width, tile_height, tile_width, encoder_state->cur_pic->width); picture_blit_pixels(subencoder->cur_pic->v_recdata, main_state->cur_pic->v_recdata + tile_offset_half, tile_width/2, tile_height/2, tile_width/2, main_state->cur_pic->width/2);
picture_blit_pixels(encoder_state->children[i].cur_pic->u_recdata, encoder_state->cur_pic->u_recdata + tile_offset_half, tile_width/2, tile_height/2, tile_width/2, encoder_state->cur_pic->width/2);
picture_blit_pixels(encoder_state->children[i].cur_pic->v_recdata, encoder_state->cur_pic->v_recdata + tile_offset_half, tile_width/2, tile_height/2, tile_width/2, encoder_state->cur_pic->width/2);
}
} }
//This has to be serial //This has to be serial
for (y=0; y < encoder->tiles_num_tile_rows; ++y) { i = 0;
for (x=0; x < encoder->tiles_num_tile_columns; ++x) { do {
const int i = y * encoder->tiles_num_tile_columns + x; if (!main_state->children[i+1].encoder_control) {
//last tile
if (x == (encoder->tiles_num_tile_columns-1) && y == (encoder->tiles_num_tile_rows-1)) { cabac_flush(&main_state->children[i].cabac);
//Last tile bitstream_align(&main_state->children[i].stream);
cabac_flush(&encoder_state->children[i].cabac); } else {
bitstream_align(&encoder_state->children[i].stream); //Other tiles
} else { cabac_encode_bin_trm(&main_state->children[i].cabac, 1); // end_of_sub_stream_one_bit == 1
//Other tiles cabac_flush(&main_state->children[i].cabac);
cabac_encode_bin_trm(&encoder_state->children[i].cabac, 1); // end_of_sub_stream_one_bit == 1
cabac_flush(&encoder_state->children[i].cabac);
}
//Append bitstream to main stream
bitstream_append(&encoder_state->stream, &encoder_state->children[i].stream);
bitstream_clear(&encoder_state->children[i].stream);
} }
} //Append bitstream to main stream
bitstream_append(&main_state->stream, &main_state->children[i].stream);
bitstream_clear(&main_state->children[i].stream);
} while (main_state->children[++i].encoder_control);
#endif //USE_TILES #endif //USE_TILES
} else { } else {
//Encode the whole thing as one stream //Encode the whole thing as one stream
substream_encode(encoder_state, 1); substream_encode(main_state, 1);
cabac_flush(&encoder_state->cabac); cabac_flush(&main_state->cabac);
bitstream_align(stream); bitstream_align(stream);
} }
// Calculate checksum // Calculate checksum
add_checksum(encoder_state); add_checksum(main_state);
encoder_state->cur_pic->poc = encoder_state->poc; main_state->cur_pic->poc = main_state->poc;
} }
static void fill_after_frame(unsigned height, unsigned array_width, static void fill_after_frame(unsigned height, unsigned array_width,