WPP with threading

This commit is contained in:
Joose Sainio 2019-11-12 12:12:57 +02:00
parent 615973adca
commit b9b01f8036
4 changed files with 30 additions and 13 deletions

View file

@ -64,6 +64,8 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) {
} }
} }
pthread_mutex_init(&state->frame->rc_lock, NULL);
for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) { for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) {
state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus); state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus);
state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus); state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus);
@ -98,6 +100,7 @@ static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
FREE_POINTER(state->frame->new_ratecontrol.k_para[i]); FREE_POINTER(state->frame->new_ratecontrol.k_para[i]);
} }
pthread_mutex_destroy(&state->frame->rc_lock);
// fclose(state->frame->bpp_d); // fclose(state->frame->bpp_d);
// fclose(state->frame->c_d); // fclose(state->frame->c_d);
// fclose(state->frame->k_d); // fclose(state->frame->k_d);

View file

@ -703,9 +703,17 @@ static void encoder_state_worker_encode_lcu(void * opaque)
} }
} }
pthread_mutex_lock(&state->frame->rc_lock);
const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits; const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
state->frame->cur_frame_bits_coded += bits; state->frame->cur_frame_bits_coded += bits;
// This variable is used differently by intra and inter frames and shouldn't
// be touched in intra frames here
state->frame->remaining_weight -= !state->frame->is_irap ?
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->weight :
0;
pthread_mutex_unlock(&state->frame->rc_lock);
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits; kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
uint8_t not_skip = false; uint8_t not_skip = false;
for(int y = 0; y < 64 && !not_skip; y+=8) { for(int y = 0; y < 64 && !not_skip; y+=8) {
for(int x = 0; x < 64 && !not_skip; x+=8) { for(int x = 0; x < 64 && !not_skip; x+=8) {

View file

@ -151,6 +151,8 @@ typedef struct encoder_state_config_frame_t {
*/ */
lcu_stats_t *lcu_stats; lcu_stats_t *lcu_stats;
pthread_mutex_t rc_lock;
struct struct
{ {
double *c_para[KVZ_MAX_GOP_LAYERS]; double *c_para[KVZ_MAX_GOP_LAYERS];
@ -175,7 +177,7 @@ typedef struct encoder_state_config_frame_t {
*/ */
bool first_nal; bool first_nal;
double icost; double icost;
double remaining_icost; double remaining_weight;
double i_bits_left; double i_bits_left;
} encoder_state_config_frame_t; } encoder_state_config_frame_t;

View file

@ -382,7 +382,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
} }
} }
state->frame->icost = total_cost; state->frame->icost = total_cost;
state->frame->remaining_icost = total_cost; state->frame->remaining_weight = total_cost;
} }
const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0); const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0);
@ -451,6 +451,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11); while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
} }
total_weight = calculate_weights(state, layer, ctu_count, best_lambda); total_weight = calculate_weights(state, layer, ctu_count, best_lambda);
state->frame->remaining_weight = bits;
} }
else { else {
for (int i = 0; i < ctu_count; ++i) { for (int i = 0; i < ctu_count; ++i) {
@ -480,15 +481,17 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
const int index = pos.x + pos.y * state->tile->frame->width_in_lcu; const int index = pos.x + pos.y * state->tile->frame->width_in_lcu;
if (state->frame->is_irap) { if (state->frame->is_irap) {
// TODO: intra
int cus_left = num_ctu - index + 1; int cus_left = num_ctu - index + 1;
int window = MIN(4, cus_left); int window = MIN(4, cus_left);
double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost; double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost;
pthread_mutex_lock(&state->frame->rc_lock);
double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded; double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded;
double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window; double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window;
avg_bits = mad * weighted_bits_left / state->frame->remaining_icost; avg_bits = mad * weighted_bits_left / state->frame->remaining_weight;
state->frame->remaining_icost -= mad; state->frame->remaining_weight -= mad;
state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost; state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost;
pthread_mutex_unlock(&state->frame->rc_lock);
} }
else { else {
double total_weight = 0; double total_weight = 0;
@ -500,16 +503,15 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
double taylor_e3 = 0.0; double taylor_e3 = 0.0;
int iter = 0; int iter = 0;
for (int i = index; i < num_ctu; i++) {
total_weight += state->frame->lcu_stats[i].weight;
}
int last_ctu = index + used_ctu_count; int last_ctu = index + used_ctu_count;
for (int i = index; i < last_ctu; i++) { for (int i = index; i < last_ctu; i++) {
target_bits += state->frame->lcu_stats[i].weight; target_bits += state->frame->lcu_stats[i].weight;
} }
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10); //obtain the total bit-rate for the realInfluenceLCU (=4) CTUs pthread_mutex_lock(&state->frame->rc_lock);
total_weight = state->frame->remaining_weight;
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10);
pthread_mutex_unlock(&state->frame->rc_lock);
//just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda //just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda
do { do {
@ -582,12 +584,14 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
est_qp = lambda_to_qp(est_lambda); est_qp = lambda_to_qp(est_lambda);
} }
else { else {
// In case wpp is used the previous ctus may not be ready from above rows
const int ctu_limit = encoder->cfg.wpp ? pos.y * encoder->in.width_in_lcu : 0;
est_lambda = alpha * pow(bpp, beta); est_lambda = alpha * pow(bpp, beta);
const double clip_lambda = state->frame->lambda; const double clip_lambda = state->frame->lambda;
double clip_neighbor_lambda = -1; double clip_neighbor_lambda = -1;
for(int temp_index = index - 1; temp_index >= 0; --temp_index) { for(int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
if(state->frame->lcu_stats[temp_index].lambda > 0) { if(state->frame->lcu_stats[temp_index].lambda > 0) {
clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda; clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda;
break; break;
@ -616,7 +620,7 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
est_qp = lambda_to_qp(est_lambda); est_qp = lambda_to_qp(est_lambda);
int clip_qp = -1; int clip_qp = -1;
for (int temp_index = index - 1; temp_index >= 0; --temp_index) { for (int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
if (state->frame->lcu_stats[temp_index].qp > -1) { if (state->frame->lcu_stats[temp_index].qp > -1) {
clip_qp = state->frame->lcu_stats[temp_index].qp; clip_qp = state->frame->lcu_stats[temp_index].qp;
break; break;