diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index 49735905..1ba741b3 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -60,10 +60,12 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) { for (int y = 0; y < encoder->in.height_in_lcu; y++) { for (int x = 0; x < encoder->in.width_in_lcu; x++) { int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64); - state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels =temp; + state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels = temp; } } + pthread_mutex_init(&state->frame->rc_lock, NULL); + for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) { state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus); state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus); @@ -98,6 +100,7 @@ static void encoder_state_config_frame_finalize(encoder_state_t * const state) { FREE_POINTER(state->frame->new_ratecontrol.k_para[i]); } + pthread_mutex_destroy(&state->frame->rc_lock); // fclose(state->frame->bpp_d); // fclose(state->frame->c_d); // fclose(state->frame->k_d); diff --git a/src/encoderstate.c b/src/encoderstate.c index bbf5ce50..a33461b2 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -703,9 +703,17 @@ static void encoder_state_worker_encode_lcu(void * opaque) } } + pthread_mutex_lock(&state->frame->rc_lock); const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits; state->frame->cur_frame_bits_coded += bits; + // This variable is used differently by intra and inter frames and shouldn't + // be touched in intra frames here + state->frame->remaining_weight -= !state->frame->is_irap ? + kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->weight : + 0; + pthread_mutex_unlock(&state->frame->rc_lock); kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits; + uint8_t not_skip = false; for(int y = 0; y < 64 && !not_skip; y+=8) { for(int x = 0; x < 64 && !not_skip; x+=8) { diff --git a/src/encoderstate.h b/src/encoderstate.h index 80cc7072..b29a682c 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -151,6 +151,8 @@ typedef struct encoder_state_config_frame_t { */ lcu_stats_t *lcu_stats; + pthread_mutex_t rc_lock; + struct { double *c_para[KVZ_MAX_GOP_LAYERS]; @@ -175,7 +177,7 @@ typedef struct encoder_state_config_frame_t { */ bool first_nal; double icost; - double remaining_icost; + double remaining_weight; double i_bits_left; } encoder_state_config_frame_t; diff --git a/src/rate_control.c b/src/rate_control.c index 55f0e184..da638e99 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -382,7 +382,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) { } } state->frame->icost = total_cost; - state->frame->remaining_icost = total_cost; + state->frame->remaining_weight = total_cost; } const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0); @@ -451,6 +451,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) { while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11); } total_weight = calculate_weights(state, layer, ctu_count, best_lambda); + state->frame->remaining_weight = bits; } else { for (int i = 0; i < ctu_count; ++i) { @@ -480,15 +481,17 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) { const int index = pos.x + pos.y * state->tile->frame->width_in_lcu; if (state->frame->is_irap) { - // TODO: intra int cus_left = num_ctu - index + 1; int window = MIN(4, cus_left); double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost; + + pthread_mutex_lock(&state->frame->rc_lock); double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded; double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window; - avg_bits = mad * weighted_bits_left / state->frame->remaining_icost; - state->frame->remaining_icost -= mad; + avg_bits = mad * weighted_bits_left / state->frame->remaining_weight; + state->frame->remaining_weight -= mad; state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost; + pthread_mutex_unlock(&state->frame->rc_lock); } else { double total_weight = 0; @@ -500,16 +503,15 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) { double taylor_e3 = 0.0; int iter = 0; - for (int i = index; i < num_ctu; i++) { - total_weight += state->frame->lcu_stats[i].weight; - } - int last_ctu = index + used_ctu_count; for (int i = index; i < last_ctu; i++) { target_bits += state->frame->lcu_stats[i].weight; } - target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10); //obtain the total bit-rate for the realInfluenceLCU (=4) CTUs + pthread_mutex_lock(&state->frame->rc_lock); + total_weight = state->frame->remaining_weight; + target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10); + pthread_mutex_unlock(&state->frame->rc_lock); //just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda do { @@ -582,12 +584,14 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) { est_qp = lambda_to_qp(est_lambda); } else { + // In case wpp is used the previous ctus may not be ready from above rows + const int ctu_limit = encoder->cfg.wpp ? pos.y * encoder->in.width_in_lcu : 0; est_lambda = alpha * pow(bpp, beta); const double clip_lambda = state->frame->lambda; double clip_neighbor_lambda = -1; - for(int temp_index = index - 1; temp_index >= 0; --temp_index) { + for(int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) { if(state->frame->lcu_stats[temp_index].lambda > 0) { clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda; break; @@ -616,7 +620,7 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) { est_qp = lambda_to_qp(est_lambda); int clip_qp = -1; - for (int temp_index = index - 1; temp_index >= 0; --temp_index) { + for (int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) { if (state->frame->lcu_stats[temp_index].qp > -1) { clip_qp = state->frame->lcu_stats[temp_index].qp; break;