mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
WPP with threading
This commit is contained in:
parent
615973adca
commit
b9b01f8036
|
@ -60,10 +60,12 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) {
|
|||
for (int y = 0; y < encoder->in.height_in_lcu; y++) {
|
||||
for (int x = 0; x < encoder->in.width_in_lcu; x++) {
|
||||
int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64);
|
||||
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels =temp;
|
||||
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels = temp;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_init(&state->frame->rc_lock, NULL);
|
||||
|
||||
for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) {
|
||||
state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus);
|
||||
state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus);
|
||||
|
@ -98,6 +100,7 @@ static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
|
|||
FREE_POINTER(state->frame->new_ratecontrol.k_para[i]);
|
||||
}
|
||||
|
||||
pthread_mutex_destroy(&state->frame->rc_lock);
|
||||
// fclose(state->frame->bpp_d);
|
||||
// fclose(state->frame->c_d);
|
||||
// fclose(state->frame->k_d);
|
||||
|
|
|
@ -703,9 +703,17 @@ static void encoder_state_worker_encode_lcu(void * opaque)
|
|||
}
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&state->frame->rc_lock);
|
||||
const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
|
||||
state->frame->cur_frame_bits_coded += bits;
|
||||
// This variable is used differently by intra and inter frames and shouldn't
|
||||
// be touched in intra frames here
|
||||
state->frame->remaining_weight -= !state->frame->is_irap ?
|
||||
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->weight :
|
||||
0;
|
||||
pthread_mutex_unlock(&state->frame->rc_lock);
|
||||
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
|
||||
|
||||
uint8_t not_skip = false;
|
||||
for(int y = 0; y < 64 && !not_skip; y+=8) {
|
||||
for(int x = 0; x < 64 && !not_skip; x+=8) {
|
||||
|
|
|
@ -151,6 +151,8 @@ typedef struct encoder_state_config_frame_t {
|
|||
*/
|
||||
lcu_stats_t *lcu_stats;
|
||||
|
||||
pthread_mutex_t rc_lock;
|
||||
|
||||
struct
|
||||
{
|
||||
double *c_para[KVZ_MAX_GOP_LAYERS];
|
||||
|
@ -175,7 +177,7 @@ typedef struct encoder_state_config_frame_t {
|
|||
*/
|
||||
bool first_nal;
|
||||
double icost;
|
||||
double remaining_icost;
|
||||
double remaining_weight;
|
||||
double i_bits_left;
|
||||
} encoder_state_config_frame_t;
|
||||
|
||||
|
|
|
@ -382,7 +382,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
|
|||
}
|
||||
}
|
||||
state->frame->icost = total_cost;
|
||||
state->frame->remaining_icost = total_cost;
|
||||
state->frame->remaining_weight = total_cost;
|
||||
}
|
||||
|
||||
const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0);
|
||||
|
@ -451,6 +451,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
|
|||
while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
|
||||
}
|
||||
total_weight = calculate_weights(state, layer, ctu_count, best_lambda);
|
||||
state->frame->remaining_weight = bits;
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < ctu_count; ++i) {
|
||||
|
@ -480,15 +481,17 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
|
|||
const int index = pos.x + pos.y * state->tile->frame->width_in_lcu;
|
||||
|
||||
if (state->frame->is_irap) {
|
||||
// TODO: intra
|
||||
int cus_left = num_ctu - index + 1;
|
||||
int window = MIN(4, cus_left);
|
||||
double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost;
|
||||
|
||||
pthread_mutex_lock(&state->frame->rc_lock);
|
||||
double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded;
|
||||
double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window;
|
||||
avg_bits = mad * weighted_bits_left / state->frame->remaining_icost;
|
||||
state->frame->remaining_icost -= mad;
|
||||
avg_bits = mad * weighted_bits_left / state->frame->remaining_weight;
|
||||
state->frame->remaining_weight -= mad;
|
||||
state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost;
|
||||
pthread_mutex_unlock(&state->frame->rc_lock);
|
||||
}
|
||||
else {
|
||||
double total_weight = 0;
|
||||
|
@ -500,16 +503,15 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
|
|||
double taylor_e3 = 0.0;
|
||||
int iter = 0;
|
||||
|
||||
for (int i = index; i < num_ctu; i++) {
|
||||
total_weight += state->frame->lcu_stats[i].weight;
|
||||
}
|
||||
|
||||
int last_ctu = index + used_ctu_count;
|
||||
for (int i = index; i < last_ctu; i++) {
|
||||
target_bits += state->frame->lcu_stats[i].weight;
|
||||
}
|
||||
|
||||
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10); //obtain the total bit-rate for the realInfluenceLCU (=4) CTUs
|
||||
pthread_mutex_lock(&state->frame->rc_lock);
|
||||
total_weight = state->frame->remaining_weight;
|
||||
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10);
|
||||
pthread_mutex_unlock(&state->frame->rc_lock);
|
||||
|
||||
//just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda
|
||||
do {
|
||||
|
@ -582,12 +584,14 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
|
|||
est_qp = lambda_to_qp(est_lambda);
|
||||
}
|
||||
else {
|
||||
// In case wpp is used the previous ctus may not be ready from above rows
|
||||
const int ctu_limit = encoder->cfg.wpp ? pos.y * encoder->in.width_in_lcu : 0;
|
||||
|
||||
est_lambda = alpha * pow(bpp, beta);
|
||||
const double clip_lambda = state->frame->lambda;
|
||||
|
||||
double clip_neighbor_lambda = -1;
|
||||
for(int temp_index = index - 1; temp_index >= 0; --temp_index) {
|
||||
for(int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
|
||||
if(state->frame->lcu_stats[temp_index].lambda > 0) {
|
||||
clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda;
|
||||
break;
|
||||
|
@ -616,7 +620,7 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
|
|||
est_qp = lambda_to_qp(est_lambda);
|
||||
|
||||
int clip_qp = -1;
|
||||
for (int temp_index = index - 1; temp_index >= 0; --temp_index) {
|
||||
for (int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
|
||||
if (state->frame->lcu_stats[temp_index].qp > -1) {
|
||||
clip_qp = state->frame->lcu_stats[temp_index].qp;
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue