WPP with threading

This commit is contained in:
Joose Sainio 2019-11-12 12:12:57 +02:00
parent 615973adca
commit b9b01f8036
4 changed files with 30 additions and 13 deletions

View file

@ -60,10 +60,12 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) {
for (int y = 0; y < encoder->in.height_in_lcu; y++) {
for (int x = 0; x < encoder->in.width_in_lcu; x++) {
int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64);
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels =temp;
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels = temp;
}
}
pthread_mutex_init(&state->frame->rc_lock, NULL);
for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) {
state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus);
state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus);
@ -98,6 +100,7 @@ static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
FREE_POINTER(state->frame->new_ratecontrol.k_para[i]);
}
pthread_mutex_destroy(&state->frame->rc_lock);
// fclose(state->frame->bpp_d);
// fclose(state->frame->c_d);
// fclose(state->frame->k_d);

View file

@ -703,9 +703,17 @@ static void encoder_state_worker_encode_lcu(void * opaque)
}
}
pthread_mutex_lock(&state->frame->rc_lock);
const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
state->frame->cur_frame_bits_coded += bits;
// This variable is used differently by intra and inter frames and shouldn't
// be touched in intra frames here
state->frame->remaining_weight -= !state->frame->is_irap ?
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->weight :
0;
pthread_mutex_unlock(&state->frame->rc_lock);
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
uint8_t not_skip = false;
for(int y = 0; y < 64 && !not_skip; y+=8) {
for(int x = 0; x < 64 && !not_skip; x+=8) {

View file

@ -151,6 +151,8 @@ typedef struct encoder_state_config_frame_t {
*/
lcu_stats_t *lcu_stats;
pthread_mutex_t rc_lock;
struct
{
double *c_para[KVZ_MAX_GOP_LAYERS];
@ -175,7 +177,7 @@ typedef struct encoder_state_config_frame_t {
*/
bool first_nal;
double icost;
double remaining_icost;
double remaining_weight;
double i_bits_left;
} encoder_state_config_frame_t;

View file

@ -382,7 +382,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
}
}
state->frame->icost = total_cost;
state->frame->remaining_icost = total_cost;
state->frame->remaining_weight = total_cost;
}
const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0);
@ -451,6 +451,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
}
total_weight = calculate_weights(state, layer, ctu_count, best_lambda);
state->frame->remaining_weight = bits;
}
else {
for (int i = 0; i < ctu_count; ++i) {
@ -480,15 +481,17 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
const int index = pos.x + pos.y * state->tile->frame->width_in_lcu;
if (state->frame->is_irap) {
// TODO: intra
int cus_left = num_ctu - index + 1;
int window = MIN(4, cus_left);
double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost;
pthread_mutex_lock(&state->frame->rc_lock);
double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded;
double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window;
avg_bits = mad * weighted_bits_left / state->frame->remaining_icost;
state->frame->remaining_icost -= mad;
avg_bits = mad * weighted_bits_left / state->frame->remaining_weight;
state->frame->remaining_weight -= mad;
state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost;
pthread_mutex_unlock(&state->frame->rc_lock);
}
else {
double total_weight = 0;
@ -500,16 +503,15 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
double taylor_e3 = 0.0;
int iter = 0;
for (int i = index; i < num_ctu; i++) {
total_weight += state->frame->lcu_stats[i].weight;
}
int last_ctu = index + used_ctu_count;
for (int i = index; i < last_ctu; i++) {
target_bits += state->frame->lcu_stats[i].weight;
}
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10); //obtain the total bit-rate for the realInfluenceLCU (=4) CTUs
pthread_mutex_lock(&state->frame->rc_lock);
total_weight = state->frame->remaining_weight;
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10);
pthread_mutex_unlock(&state->frame->rc_lock);
//just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda
do {
@ -582,12 +584,14 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
est_qp = lambda_to_qp(est_lambda);
}
else {
// In case wpp is used the previous ctus may not be ready from above rows
const int ctu_limit = encoder->cfg.wpp ? pos.y * encoder->in.width_in_lcu : 0;
est_lambda = alpha * pow(bpp, beta);
const double clip_lambda = state->frame->lambda;
double clip_neighbor_lambda = -1;
for(int temp_index = index - 1; temp_index >= 0; --temp_index) {
for(int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
if(state->frame->lcu_stats[temp_index].lambda > 0) {
clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda;
break;
@ -616,7 +620,7 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
est_qp = lambda_to_qp(est_lambda);
int clip_qp = -1;
for (int temp_index = index - 1; temp_index >= 0; --temp_index) {
for (int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
if (state->frame->lcu_stats[temp_index].qp > -1) {
clip_qp = state->frame->lcu_stats[temp_index].qp;
break;