From c759c138ed4cddfe38ec57a63e7b280dfcca3c83 Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Wed, 13 Nov 2019 11:56:25 +0200 Subject: [PATCH] Prepare the rc data structure to be shared among all frame encoders --- src/encoder_state-ctors_dtors.c | 25 +------ src/encoderstate.c | 5 ++ src/encoderstate.h | 15 +--- src/kvazaar.c | 8 ++- src/rate_control.c | 120 +++++++++++++++++++++----------- src/rate_control.h | 16 +++++ 6 files changed, 113 insertions(+), 76 deletions(-) diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index 1ba741b3..84c15940 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -34,6 +34,7 @@ #include "kvazaar.h" #include "threadqueue.h" #include "videoframe.h" +#include "rate_control.h" static int encoder_state_config_frame_init(encoder_state_t * const state) { @@ -66,22 +67,7 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) { pthread_mutex_init(&state->frame->rc_lock, NULL); - for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) { - state->frame->new_ratecontrol.c_para[i] = malloc(sizeof(double) * num_lcus); - state->frame->new_ratecontrol.k_para[i] = malloc(sizeof(double) * num_lcus); - state->frame->new_ratecontrol.pic_c_para[i] = 5.0; - state->frame->new_ratecontrol.pic_k_para[i] = -0.1; - for(int j = 0; j < num_lcus; j++) { - state->frame->new_ratecontrol.c_para[i][j] = 5.0; - state->frame->new_ratecontrol.k_para[i][j] = -0.1; - } - } - state->frame->new_ratecontrol.intra_bpp = calloc(num_lcus, sizeof(double)); - state->frame->new_ratecontrol.intra_dis = calloc(num_lcus, sizeof(double)); - memset(state->frame->new_ratecontrol.previous_lambdas, 0, sizeof(state->frame->new_ratecontrol.previous_lambdas)); - state->frame->new_ratecontrol.previous_frame_lambda = 0.0; - state->frame->new_ratecontrol.intra_pic_bpp = 0.0; - state->frame->new_ratecontrol.intra_pic_distortion = 0.0; + state->frame->new_ratecontrol = kvz_get_rc_data(NULL); // state->frame->bpp_d = fopen("bits.txt", "wb"); // state->frame->c_d = fopen("c.txt", "wb"); @@ -93,13 +79,6 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) { static void encoder_state_config_frame_finalize(encoder_state_t * const state) { if (state->frame == NULL) return; - FREE_POINTER(state->frame->new_ratecontrol.intra_bpp); - FREE_POINTER(state->frame->new_ratecontrol.intra_dis); - for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) { - FREE_POINTER(state->frame->new_ratecontrol.c_para[i]); - FREE_POINTER(state->frame->new_ratecontrol.k_para[i]); - } - pthread_mutex_destroy(&state->frame->rc_lock); // fclose(state->frame->bpp_d); // fclose(state->frame->c_d); diff --git a/src/encoderstate.c b/src/encoderstate.c index c932afc1..2301e4b2 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -822,6 +822,11 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) } kvz_threadqueue_job_dep_add(job[0], ref_state->tile->wf_jobs[dep_lcu->id]); + //TODO: Preparation for the lock free implementation of the new rc + if (ref_state->frame->slicetype == KVZ_SLICE_I && ref_state->frame->num != 0 && state->encoder_control->cfg.owf > 1 && true) { + kvz_threadqueue_job_dep_add(job[0], ref_state->previous_encoder_state->tile->wf_jobs[dep_lcu->id]); + } + // Very spesific bug that happens when owf length is longer than the // gop length. Takes care of that. if(!state->encoder_control->cfg.gop_lowdelay && diff --git a/src/encoderstate.h b/src/encoderstate.h index e256a32c..44d9fab5 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -39,6 +39,7 @@ #include "videoframe.h" #include "extras/crypto.h" +struct kvz_rc_data; typedef enum { ENCODER_STATE_TYPE_INVALID = 'i', @@ -155,19 +156,7 @@ typedef struct encoder_state_config_frame_t { pthread_mutex_t rc_lock; - struct - { - double *c_para[KVZ_MAX_GOP_LAYERS]; - double *k_para[KVZ_MAX_GOP_LAYERS]; - double pic_c_para[KVZ_MAX_GOP_LAYERS]; - double pic_k_para[KVZ_MAX_GOP_LAYERS]; - double previous_lambdas[KVZ_MAX_GOP_LAYERS+1]; - double previous_frame_lambda; - double *intra_bpp; - double *intra_dis; - double intra_pic_distortion; - double intra_pic_bpp; - } new_ratecontrol; + struct kvz_rc_data *new_ratecontrol; FILE * bpp_d; diff --git a/src/kvazaar.c b/src/kvazaar.c index 6c4369f2..741142a3 100644 --- a/src/kvazaar.c +++ b/src/kvazaar.c @@ -38,6 +38,7 @@ #include "strategyselector.h" #include "threadqueue.h" #include "videoframe.h" +#include "rate_control.h" static void kvazaar_close(kvz_encoder *encoder) @@ -64,6 +65,7 @@ static void kvazaar_close(kvz_encoder *encoder) } FREE_POINTER(encoder->states); + kvz_free_rc_data(); // Discard const from the pointer. kvz_encoder_control_free((void*) encoder->control); encoder->control = NULL; @@ -99,6 +101,11 @@ static kvz_encoder * kvazaar_open(const kvz_config *cfg) encoder->frames_started = 0; encoder->frames_done = 0; + // Assure that the rc data allocation was successful + if(!kvz_get_rc_data(encoder->control)) { + goto kvazaar_open_failure; + } + kvz_init_input_frame_buffer(&encoder->input_buffer); encoder->states = calloc(encoder->num_encoder_states, sizeof(encoder_state_t)); @@ -108,7 +115,6 @@ static kvz_encoder * kvazaar_open(const kvz_config *cfg) for (unsigned i = 0; i < encoder->num_encoder_states; ++i) { encoder->states[i].encoder_control = encoder->control; - if (!kvz_encoder_state_init(&encoder->states[i], NULL)) { goto kvazaar_open_failure; } diff --git a/src/rate_control.c b/src/rate_control.c index 13a61fb9..45466be5 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -31,6 +31,8 @@ static const double MIN_LAMBDA = 0.1; static const double MAX_LAMBDA = 10000; #define BETA1 1.2517 +static kvz_rc_data *data; + /** * \brief Clip lambda value to a valid range. */ @@ -39,6 +41,46 @@ static double clip_lambda(double lambda) { return CLIP(MIN_LAMBDA, MAX_LAMBDA, lambda); } +kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder) { + if (data != NULL || encoder == NULL) return data; + + data = calloc(1, sizeof(kvz_rc_data)); + + if (data == NULL) return NULL; + const int num_lcus = encoder->in.width_in_lcu * encoder->in.height_in_lcu; + for (int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) { + data->c_para[i] = malloc(sizeof(double) * num_lcus); + data->k_para[i] = malloc(sizeof(double) * num_lcus); + data->pic_c_para[i] = 5.0; + data->pic_k_para[i] = -0.1; + for (int j = 0; j < num_lcus; j++) { + data->c_para[i][j] = 5.0; + data->k_para[i][j] = -0.1; + } + } + data->intra_bpp = calloc(num_lcus, sizeof(double)); + data->intra_dis = calloc(num_lcus, sizeof(double)); + memset(data->previous_lambdas, 0, sizeof(data->previous_lambdas)); + data->previous_frame_lambda = 0.0; + data->intra_pic_bpp = 0.0; + data->intra_pic_distortion = 0.0; + + return data; +} + +void kvz_free_rc_data() { + if (data == NULL) return; + + FREE_POINTER(data->intra_bpp); + FREE_POINTER(data->intra_dis); + for (int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) { + FREE_POINTER(data->c_para[i]); + FREE_POINTER(data->k_para[i]); + } + FREE_POINTER(data); +} + + /** * \brief Update alpha and beta parameters. * @@ -297,10 +339,10 @@ static double solve_cubic_equation(const encoder_state_config_frame_t * const st double b = 0.0; double c = 0.0; double d = 0.0; - assert(!((state->new_ratecontrol.c_para[layer][i] <= 0) || (state->new_ratecontrol.k_para[layer][i] >= 0))); //Check C and K during each solution + assert(!((state->new_ratecontrol->c_para[layer][i] <= 0) || (state->new_ratecontrol->k_para[layer][i] >= 0))); //Check C and K during each solution - double CLCU = state->new_ratecontrol.c_para[layer][i]; - double KLCU = state->new_ratecontrol.k_para[layer][i]; + double CLCU = state->new_ratecontrol->c_para[layer][i]; + double KLCU = state->new_ratecontrol->k_para[layer][i]; a = -CLCU * KLCU / pow(state->lcu_stats[i].pixels, KLCU - 1.0); b = -1.0 / (KLCU - 1.0); d = est_lambda; @@ -355,8 +397,8 @@ static double solve_cubic_equation(const encoder_state_config_frame_t * const st static INLINE double calculate_weights(encoder_state_t* const state, const int layer, const int ctu_count, double estLambda) { double total_weight = 0; for(int i = 0; i < ctu_count; i++) { - double c_lcu = state->frame->new_ratecontrol.c_para[layer][i]; - double k_lcu = state->frame->new_ratecontrol.k_para[layer][i]; + double c_lcu = state->frame->new_ratecontrol->c_para[layer][i]; + double k_lcu = state->frame->new_ratecontrol->k_para[layer][i]; double a = -c_lcu * k_lcu / pow(state->frame->lcu_stats[i].pixels, k_lcu - 1.0); double b = -1.0 / (k_lcu - 1.0); state->frame->lcu_stats[i].original_weight = state->frame->lcu_stats[i].weight = pow(a / estLambda, b); @@ -395,9 +437,9 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) { beta = state->frame->rc_beta; } else { - alpha = -state->frame->new_ratecontrol.pic_c_para[layer] * - state->frame->new_ratecontrol.pic_k_para[layer]; - beta = state->frame->new_ratecontrol.pic_k_para[layer] - 1; + alpha = -state->frame->new_ratecontrol->pic_c_para[layer] * + state->frame->new_ratecontrol->pic_k_para[layer]; + beta = state->frame->new_ratecontrol->pic_k_para[layer] - 1; } double bits = pic_allocate_bits(state); // fprintf(state->frame->bpp_d, "Frame %d\tbits:\t%f\n", state->frame->num, bits); @@ -416,12 +458,12 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) { } double temp_lambda; - if ((temp_lambda = state->frame->new_ratecontrol.previous_lambdas[layer]) > 0.0) { + if ((temp_lambda = state->frame->new_ratecontrol->previous_lambdas[layer]) > 0.0) { temp_lambda = CLIP(0.1, 10000.0, temp_lambda); est_lambda = CLIP(temp_lambda * pow(2.0, -1), temp_lambda * 2, est_lambda); } - if((temp_lambda = state->frame->new_ratecontrol.previous_frame_lambda) > 0.0) { + if((temp_lambda = state->frame->new_ratecontrol->previous_frame_lambda) > 0.0) { temp_lambda = CLIP(0.1, 2000.0, temp_lambda); est_lambda = CLIP(temp_lambda * pow(2.0, -10.0 / 3.0), temp_lambda * pow(2.0, 10.0 / 3.0), est_lambda); } @@ -440,8 +482,8 @@ void kvz_estimate_pic_lambda(encoder_state_t * const state) { taylor_e3 = 0.0; best_lambda = temp_lambda = solve_cubic_equation(state->frame, 0, ctu_count, layer, temp_lambda, bits); for (int i = 0; i < ctu_count; ++i) { - double CLCU = state->frame->new_ratecontrol.c_para[layer][i]; - double KLCU = state->frame->new_ratecontrol.k_para[layer][i]; + double CLCU = state->frame->new_ratecontrol->c_para[layer][i]; + double KLCU = state->frame->new_ratecontrol->k_para[layer][i]; double a = -CLCU * KLCU / pow(state->frame->lcu_stats[i].pixels, KLCU - 1.0); double b = -1.0 / (KLCU - 1.0); taylor_e3 += pow(a / best_lambda, b); @@ -520,8 +562,8 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) { temp_lambda = best_lambda; for (int i = index; i < last_ctu; i++) { - double CLCU = state->frame->new_ratecontrol.c_para[layer][i]; - double KLCU = state->frame->new_ratecontrol.k_para[layer][i]; + double CLCU = state->frame->new_ratecontrol->c_para[layer][i]; + double KLCU = state->frame->new_ratecontrol->k_para[layer][i]; double a = -CLCU * KLCU / pow((double)state->frame->lcu_stats[i].pixels, KLCU - 1.0); double b = -1.0 / (KLCU - 1.0); taylor_e3 += pow(a / best_lambda, b); @@ -529,8 +571,8 @@ static double get_ctu_bits(encoder_state_t * const state, vector2d_t pos) { iter++; } while (fabs(taylor_e3 - target_bits) > 0.01 && iter < 5); - double c_ctu = state->frame->new_ratecontrol.c_para[layer][index]; - double k_ctu = state->frame->new_ratecontrol.k_para[layer][index]; + double c_ctu = state->frame->new_ratecontrol->c_para[layer][index]; + double k_ctu = state->frame->new_ratecontrol->k_para[layer][index]; double a = -c_ctu * k_ctu / pow(((double)state->frame->lcu_stats[index].pixels), k_ctu - 1.0); double b = -1.0 / (k_ctu - 1.0); @@ -565,9 +607,9 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) { beta = state->frame->rc_beta; } else { - alpha = -state->frame->new_ratecontrol.c_para[layer][index] * - state->frame->new_ratecontrol.k_para[layer][index]; - beta = state->frame->new_ratecontrol.k_para[layer][index] - 1; + alpha = -state->frame->new_ratecontrol->c_para[layer][index] * + state->frame->new_ratecontrol->k_para[layer][index]; + beta = state->frame->new_ratecontrol->k_para[layer][index] - 1; } double est_lambda; @@ -650,8 +692,8 @@ void kvz_set_ctu_qp_lambda(encoder_state_t * const state, vector2d_t pos) { static void update_pic_ck(encoder_state_t * const state, double bpp, double distortion, double lambda, int layer) { double new_k, new_c; if(state->frame->num == 1) { - new_k = log(distortion / state->frame->new_ratecontrol.intra_pic_distortion) / - log(bpp / state->frame->new_ratecontrol.intra_pic_bpp); + new_k = log(distortion / state->frame->new_ratecontrol->intra_pic_distortion) / + log(bpp / state->frame->new_ratecontrol->intra_pic_bpp); new_c = distortion / pow(bpp, new_k); } else { @@ -663,13 +705,13 @@ static void update_pic_ck(encoder_state_t * const state, double bpp, double dist if(state->frame->is_irap || state->frame->num <= (4 - state->encoder_control->cfg.frame_allocation)) { for(int i = 1; i < 5; i++) { - state->frame->new_ratecontrol.pic_c_para[i] = new_c; - state->frame->new_ratecontrol.pic_k_para[i] = new_k; + state->frame->new_ratecontrol->pic_c_para[i] = new_c; + state->frame->new_ratecontrol->pic_k_para[i] = new_k; } } else { - state->frame->new_ratecontrol.pic_c_para[layer] = new_c; - state->frame->new_ratecontrol.pic_k_para[layer] = new_k; + state->frame->new_ratecontrol->pic_c_para[layer] = new_c; + state->frame->new_ratecontrol->pic_k_para[layer] = new_k; } // fprintf(state->frame->c_d, "Frame %d\tC:\t%f\tbpp\t%f\tdistortion\t%f\tlambda\t%f\n", state->frame->num, new_c, bpp, distortion, lambda); // fprintf(state->frame->k_d, "Frame %d\tK:\t%f\tbpp\t%f\tdistortion\t%f\tlambda\t%f\n", state->frame->num, new_k, bpp, distortion, lambda); @@ -687,12 +729,12 @@ static void update_ck(encoder_state_t * const state, int ctu_index, int layer) distortion = MAX(distortion, 0.0001); if (state->frame->num == 1) { if (bpp < 0.001) { - new_k = state->frame->new_ratecontrol.pic_k_para[layer]; - new_c = state->frame->new_ratecontrol.intra_dis[ctu_index] / pow(state->frame->new_ratecontrol.intra_bpp[ctu_index], new_k); + new_k = state->frame->new_ratecontrol->pic_k_para[layer]; + new_c = state->frame->new_ratecontrol->intra_dis[ctu_index] / pow(state->frame->new_ratecontrol->intra_bpp[ctu_index], new_k); } else { - new_k = log(distortion / state->frame->new_ratecontrol.intra_pic_distortion) / - log(bpp / state->frame->new_ratecontrol.intra_pic_bpp); + new_k = log(distortion / state->frame->new_ratecontrol->intra_pic_distortion) / + log(bpp / state->frame->new_ratecontrol->intra_pic_bpp); new_c = distortion / pow(bpp, new_k); } } @@ -706,13 +748,13 @@ static void update_ck(encoder_state_t * const state, int ctu_index, int layer) if (state->frame->is_irap || state->frame->num <= (4 - state->encoder_control->cfg.frame_allocation)) { for (int i = 1; i < 5; i++) { - state->frame->new_ratecontrol.c_para[i][ctu_index] = new_c; - state->frame->new_ratecontrol.k_para[i][ctu_index] = new_k; + state->frame->new_ratecontrol->c_para[i][ctu_index] = new_c; + state->frame->new_ratecontrol->k_para[i][ctu_index] = new_k; } } else { - state->frame->new_ratecontrol.c_para[layer][ctu_index] = new_c; - state->frame->new_ratecontrol.k_para[layer][ctu_index] = new_k; + state->frame->new_ratecontrol->c_para[layer][ctu_index] = new_c; + state->frame->new_ratecontrol->k_para[layer][ctu_index] = new_k; } } // fprintf(state->frame->c_d, "CTU %d\tC:\t%f\tbpp\t%f\tdistortion\t%f\tlambda\t%f\n", ctu_index, new_c, bpp, distortion, lambda); @@ -760,18 +802,18 @@ void kvz_update_after_picture(encoder_state_t * const state) { for (int y_ctu = 0; y_ctu < state->encoder_control->in.height_in_lcu; y_ctu++) { for (int x_ctu = 0; x_ctu < state->encoder_control->in.width_in_lcu; x_ctu++) { lcu_stats_t *ctu = kvz_get_lcu_stats(state, x_ctu, y_ctu); - state->frame->new_ratecontrol.intra_dis[x_ctu + y_ctu * state->encoder_control->in.width_in_lcu] = + state->frame->new_ratecontrol->intra_dis[x_ctu + y_ctu * state->encoder_control->in.width_in_lcu] = ctu->distortion; - state->frame->new_ratecontrol.intra_bpp[x_ctu + y_ctu * state->encoder_control->in.width_in_lcu] = + state->frame->new_ratecontrol->intra_bpp[x_ctu + y_ctu * state->encoder_control->in.width_in_lcu] = ctu->bits / ctu->pixels; } } - state->frame->new_ratecontrol.intra_pic_distortion = total_distortion; - state->frame->new_ratecontrol.intra_pic_bpp = pic_bpp; + state->frame->new_ratecontrol->intra_pic_distortion = total_distortion; + state->frame->new_ratecontrol->intra_pic_bpp = pic_bpp; } - state->frame->new_ratecontrol.previous_frame_lambda = lambda; - state->frame->new_ratecontrol.previous_lambdas[layer] = lambda; + state->frame->new_ratecontrol->previous_frame_lambda = lambda; + state->frame->new_ratecontrol->previous_lambdas[layer] = lambda; update_pic_ck(state, pic_bpp, total_distortion, lambda, layer); for(int i = 0; i < state->encoder_control->in.width_in_lcu * state->encoder_control->in.height_in_lcu; i++) { diff --git a/src/rate_control.h b/src/rate_control.h index ddb4214b..3b9d7d40 100644 --- a/src/rate_control.h +++ b/src/rate_control.h @@ -30,6 +30,22 @@ #include "encoderstate.h" +typedef struct kvz_rc_data { + double *c_para[KVZ_MAX_GOP_LAYERS]; + double *k_para[KVZ_MAX_GOP_LAYERS]; + double pic_c_para[KVZ_MAX_GOP_LAYERS]; + double pic_k_para[KVZ_MAX_GOP_LAYERS]; + double previous_lambdas[KVZ_MAX_GOP_LAYERS + 1]; + double previous_frame_lambda; + double *intra_bpp; + double *intra_dis; + double intra_pic_distortion; + double intra_pic_bpp; +} kvz_rc_data; + +kvz_rc_data * kvz_get_rc_data(const encoder_control_t * const encoder); +void kvz_free_rc_data(); + void kvz_set_picture_lambda_and_qp(encoder_state_t * const state); void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,