mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
WPP with threading
This commit is contained in:
parent
615973adca
commit
b9b01f8036
|
@ -60,10 +60,12 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) {
|
||||||
for (int y = 0; y < encoder->in.height_in_lcu; y++) {
|
for (int y = 0; y < encoder->in.height_in_lcu; y++) {
|
||||||
for (int x = 0; x < encoder->in.width_in_lcu; x++) {
|
for (int x = 0; x < encoder->in.width_in_lcu; x++) {
|
||||||
int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64);
|
int temp = MIN(encoder->cfg.width - x * 64, 64) * MIN(encoder->cfg.height - y * 64, 64);
|
||||||
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels =temp;
|
state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels = temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pthread_mutex_init(&state->frame->rc_lock, NULL);
|
||||||
|
|
||||||
for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) {
|
for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) {
|
||||||
state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus);
|
state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus);
|
||||||
state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus);
|
state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus);
|
||||||
|
@ -98,6 +100,7 @@ static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
|
||||||
FREE_POINTER(state->frame->new_ratecontrol.k_para[i]);
|
FREE_POINTER(state->frame->new_ratecontrol.k_para[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pthread_mutex_destroy(&state->frame->rc_lock);
|
||||||
// fclose(state->frame->bpp_d);
|
// fclose(state->frame->bpp_d);
|
||||||
// fclose(state->frame->c_d);
|
// fclose(state->frame->c_d);
|
||||||
// fclose(state->frame->k_d);
|
// fclose(state->frame->k_d);
|
||||||
|
|
|
@ -703,9 +703,17 @@ static void encoder_state_worker_encode_lcu(void * opaque)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pthread_mutex_lock(&state->frame->rc_lock);
|
||||||
const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
|
const uint32_t bits = kvz_bitstream_tell(&state->stream) - existing_bits;
|
||||||
state->frame->cur_frame_bits_coded += bits;
|
state->frame->cur_frame_bits_coded += bits;
|
||||||
|
// This variable is used differently by intra and inter frames and shouldn't
|
||||||
|
// be touched in intra frames here
|
||||||
|
state->frame->remaining_weight -= !state->frame->is_irap ?
|
||||||
|
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->weight :
|
||||||
|
0;
|
||||||
|
pthread_mutex_unlock(&state->frame->rc_lock);
|
||||||
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
|
kvz_get_lcu_stats(state, lcu->position.x, lcu->position.y)->bits = bits;
|
||||||
|
|
||||||
uint8_t not_skip = false;
|
uint8_t not_skip = false;
|
||||||
for(int y = 0; y < 64 && !not_skip; y+=8) {
|
for(int y = 0; y < 64 && !not_skip; y+=8) {
|
||||||
for(int x = 0; x < 64 && !not_skip; x+=8) {
|
for(int x = 0; x < 64 && !not_skip; x+=8) {
|
||||||
|
|
|
@ -151,6 +151,8 @@ typedef struct encoder_state_config_frame_t {
|
||||||
*/
|
*/
|
||||||
lcu_stats_t *lcu_stats;
|
lcu_stats_t *lcu_stats;
|
||||||
|
|
||||||
|
pthread_mutex_t rc_lock;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
double *c_para[KVZ_MAX_GOP_LAYERS];
|
double *c_para[KVZ_MAX_GOP_LAYERS];
|
||||||
|
@ -175,7 +177,7 @@ typedef struct encoder_state_config_frame_t {
|
||||||
*/
|
*/
|
||||||
bool first_nal;
|
bool first_nal;
|
||||||
double icost;
|
double icost;
|
||||||
double remaining_icost;
|
double remaining_weight;
|
||||||
double i_bits_left;
|
double i_bits_left;
|
||||||
} encoder_state_config_frame_t;
|
} encoder_state_config_frame_t;
|
||||||
|
|
||||||
|
|
|
@ -382,7 +382,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
state->frame->icost = total_cost;
|
state->frame->icost = total_cost;
|
||||||
state->frame->remaining_icost = total_cost;
|
state->frame->remaining_weight = total_cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0);
|
const int layer = encoder->cfg.gop[state->frame->gop_offset].layer - (state->frame->is_irap ? 1 : 0);
|
||||||
|
@ -451,6 +451,7 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) {
|
||||||
while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
|
while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
|
||||||
}
|
}
|
||||||
total_weight = calculate_weights(state, layer, ctu_count, best_lambda);
|
total_weight = calculate_weights(state, layer, ctu_count, best_lambda);
|
||||||
|
state->frame->remaining_weight = bits;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (int i = 0; i < ctu_count; ++i) {
|
for (int i = 0; i < ctu_count; ++i) {
|
||||||
|
@ -480,15 +481,17 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
|
||||||
const int index = pos.x + pos.y * state->tile->frame->width_in_lcu;
|
const int index = pos.x + pos.y * state->tile->frame->width_in_lcu;
|
||||||
|
|
||||||
if (state->frame->is_irap) {
|
if (state->frame->is_irap) {
|
||||||
// TODO: intra
|
|
||||||
int cus_left = num_ctu - index + 1;
|
int cus_left = num_ctu - index + 1;
|
||||||
int window = MIN(4, cus_left);
|
int window = MIN(4, cus_left);
|
||||||
double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost;
|
double mad = kvz_get_lcu_stats(state, pos.x, pos.y)->i_cost;
|
||||||
|
|
||||||
|
pthread_mutex_lock(&state->frame->rc_lock);
|
||||||
double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded;
|
double bits_left = state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded;
|
||||||
double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window;
|
double weighted_bits_left = (bits_left * window + (bits_left - state->frame->i_bits_left)*cus_left) / window;
|
||||||
avg_bits = mad * weighted_bits_left / state->frame->remaining_icost;
|
avg_bits = mad * weighted_bits_left / state->frame->remaining_weight;
|
||||||
state->frame->remaining_icost -= mad;
|
state->frame->remaining_weight -= mad;
|
||||||
state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost;
|
state->frame->i_bits_left -= state->frame->cur_pic_target_bits * mad / state->frame->icost;
|
||||||
|
pthread_mutex_unlock(&state->frame->rc_lock);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
double total_weight = 0;
|
double total_weight = 0;
|
||||||
|
@ -500,16 +503,15 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) {
|
||||||
double taylor_e3 = 0.0;
|
double taylor_e3 = 0.0;
|
||||||
int iter = 0;
|
int iter = 0;
|
||||||
|
|
||||||
for (int i = index; i < num_ctu; i++) {
|
|
||||||
total_weight += state->frame->lcu_stats[i].weight;
|
|
||||||
}
|
|
||||||
|
|
||||||
int last_ctu = index + used_ctu_count;
|
int last_ctu = index + used_ctu_count;
|
||||||
for (int i = index; i < last_ctu; i++) {
|
for (int i = index; i < last_ctu; i++) {
|
||||||
target_bits += state->frame->lcu_stats[i].weight;
|
target_bits += state->frame->lcu_stats[i].weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10); //obtain the total bit-rate for the realInfluenceLCU (=4) CTUs
|
pthread_mutex_lock(&state->frame->rc_lock);
|
||||||
|
total_weight = state->frame->remaining_weight;
|
||||||
|
target_bits = MAX(target_bits + state->frame->cur_pic_target_bits - state->frame->cur_frame_bits_coded - (int)total_weight, 10);
|
||||||
|
pthread_mutex_unlock(&state->frame->rc_lock);
|
||||||
|
|
||||||
//just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda
|
//just similar with the process at frame level, details can refer to the function TEncRCPic::kvz_estimate_pic_lambda
|
||||||
do {
|
do {
|
||||||
|
@ -582,12 +584,14 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
|
||||||
est_qp = lambda_to_qp(est_lambda);
|
est_qp = lambda_to_qp(est_lambda);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
// In case wpp is used the previous ctus may not be ready from above rows
|
||||||
|
const int ctu_limit = encoder->cfg.wpp ? pos.y * encoder->in.width_in_lcu : 0;
|
||||||
|
|
||||||
est_lambda = alpha * pow(bpp, beta);
|
est_lambda = alpha * pow(bpp, beta);
|
||||||
const double clip_lambda = state->frame->lambda;
|
const double clip_lambda = state->frame->lambda;
|
||||||
|
|
||||||
double clip_neighbor_lambda = -1;
|
double clip_neighbor_lambda = -1;
|
||||||
for(int temp_index = index - 1; temp_index >= 0; --temp_index) {
|
for(int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
|
||||||
if(state->frame->lcu_stats[temp_index].lambda > 0) {
|
if(state->frame->lcu_stats[temp_index].lambda > 0) {
|
||||||
clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda;
|
clip_neighbor_lambda = state->frame->lcu_stats[temp_index].lambda;
|
||||||
break;
|
break;
|
||||||
|
@ -616,7 +620,7 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) {
|
||||||
est_qp = lambda_to_qp(est_lambda);
|
est_qp = lambda_to_qp(est_lambda);
|
||||||
|
|
||||||
int clip_qp = -1;
|
int clip_qp = -1;
|
||||||
for (int temp_index = index - 1; temp_index >= 0; --temp_index) {
|
for (int temp_index = index - 1; temp_index >= ctu_limit; --temp_index) {
|
||||||
if (state->frame->lcu_stats[temp_index].qp > -1) {
|
if (state->frame->lcu_stats[temp_index].qp > -1) {
|
||||||
clip_qp = state->frame->lcu_stats[temp_index].qp;
|
clip_qp = state->frame->lcu_stats[temp_index].qp;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in a new issue