WIP picture lambda calculation

2024-11-24 02:24:07 +00:00 · 2019-09-03 10:57:13 +03:00 · 2019-09-03 10:57:13 +03:00 · 7d2737bdf6
parent 131c04f65c
commit 7d2737bdf6
4 changed files with 225 additions and 6 deletions
--- a/src/encoder_state-ctors_dtors.c
+++ b/src/encoder_state-ctors_dtors.c
@ -56,12 +56,41 @@ static int encoder_state_config_frame_init(encoder_state_t * const state) {
  const int num_lcus = encoder->in.width_in_lcu * encoder->in.height_in_lcu;
  state->frame->lcu_stats = MALLOC(lcu_stats_t, num_lcus);

+  for (int y = 0; y < encoder->in.height_in_lcu; y++) {
+    for (int x = 0; x < encoder->in.width_in_lcu; x++) {
+      state->frame->lcu_stats[x + y * encoder->in.width_in_lcu].pixels =
+        MIN(encoder->cfg.width - (x + 1)*64, 64) * MIN(encoder->cfg.height - (y + 1) * 64, 64);
+    }
+  }
+
+  for(int i = 0; i < KVZ_MAX_GOP_LAYERS; i++) {
+    state->frame->new_lookahead.c_para[i] = malloc(sizeof(double) * num_lcus);
+    state->frame->new_lookahead.k_para[i] = malloc(sizeof(double) * num_lcus);
+    state->frame->new_lookahead.pic_c_para[i] = 5.0;
+    state->frame->new_lookahead.pic_k_para[i] = -0.1;
+    for(int j = 0; j < num_lcus; j++) {
+      state->frame->new_lookahead.c_para[i][j] = 5.0;
+      state->frame->new_lookahead.k_para[i][j] = -0.1;
+    }
+  }
+  state->frame->new_lookahead.intra_slice_bpp = calloc(num_lcus, sizeof(double));
+  state->frame->new_lookahead.intra_slice_dis = calloc(num_lcus, sizeof(double));
+  memset(state->frame->new_lookahead.previous_lambdas, 0, sizeof(state->frame->new_lookahead.previous_lambdas));
+  state->frame->new_lookahead.last_frame_lambda = 0.0;
+
  return 1;
 }

 static void encoder_state_config_frame_finalize(encoder_state_t * const state) {
  if (state->frame == NULL) return;

+  FREE_POINTER(state->frame->new_lookahead.intra_slice_bpp);
+  FREE_POINTER(state->frame->new_lookahead.intra_slice_dis);
+  for(int i = 0; i < 6; i++) {
+    FREE_POINTER(state->frame->new_lookahead.c_para[i]);
+    FREE_POINTER(state->frame->new_lookahead.k_para[i]);
+  }
+
  kvz_image_list_destroy(state->frame->ref);
  FREE_POINTER(state->frame->lcu_stats);
 }
--- a/src/encoderstate.h
+++ b/src/encoderstate.h
@ -53,6 +53,8 @@ typedef struct lcu_stats_t {
  //! \brief Number of bits that were spent
  uint32_t bits;

+  uint32_t pixels;
+
  //! \brief Weight of the LCU for rate control
  double weight;

@ -141,6 +143,18 @@ typedef struct encoder_state_config_frame_t {
   */
  lcu_stats_t *lcu_stats;

+  struct
+  {
+    double *c_para[KVZ_MAX_GOP_LAYERS];
+    double *k_para[KVZ_MAX_GOP_LAYERS];
+    double pic_c_para[KVZ_MAX_GOP_LAYERS];
+    double pic_k_para[KVZ_MAX_GOP_LAYERS];
+    double *intra_slice_bpp;
+    double *intra_slice_dis;
+    double previous_lambdas[KVZ_MAX_GOP_LAYERS+1];
+    double last_frame_lambda;
+  } new_lookahead;
+
  /**
   * \brief Whether next NAL is the first NAL in the access unit.
   */
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -64,6 +64,11 @@ extern "C" {
 */
 #define KVZ_MAX_GOP_LENGTH 32

+
+/**
+* Maximum amount of GoP layers.
+*/
+#define KVZ_MAX_GOP_LAYERS 6
 /**
 * Size of data chunks.
 */
@ -389,7 +394,7 @@ typedef struct kvz_config

  /** \brief Enable Early Skip Mode Decision */
  uint8_t early_skip;
-
+  uint8_t frame_allocation;
 } kvz_config;

 /**
--- a/src/rate_control.c
+++ b/src/rate_control.c
@ -69,7 +69,7 @@ static void update_parameters(uint32_t bits,
 * \param state   the main encoder state
 * \return        target number of bits
 */
-static double gop_allocate_bits(encoder_state_t * const state, int gop_length)
+static double gop_allocate_bits(encoder_state_t * const state)
 {
  const encoder_control_t * const encoder = state->encoder_control;

@ -90,7 +90,7 @@ static double gop_allocate_bits(encoder_state_t * const state, int gop_length)
  // Equation 12 from https://doi.org/10.1109/TIP.2014.2336550
  double gop_target_bits =
    (encoder->target_avg_bppic * (pictures_coded + SMOOTHING_WINDOW) - bits_coded)
-    * MAX(1, gop_length) / SMOOTHING_WINDOW;
+    * MAX(1, encoder->cfg.gop_len) / SMOOTHING_WINDOW;
  // Allocate at least 200 bits for each GOP like HM does.
  return MAX(200, gop_target_bits);
 }
@ -148,7 +148,7 @@ static double pic_allocate_bits(encoder_state_t * const state)
      state->frame->num == 0)
  {
    // A new GOP starts at this frame.
-    state->frame->cur_gop_target_bits = gop_allocate_bits(state, state->frame->poc == 0 ? 1 : encoder->cfg.gop_len);
+    state->frame->cur_gop_target_bits = gop_allocate_bits(state);
    state->frame->cur_gop_bits_coded  = 0;
  } else {
    state->frame->cur_gop_target_bits =
@ -159,14 +159,185 @@ static double pic_allocate_bits(encoder_state_t * const state)
    return state->frame->cur_gop_target_bits;
  }

-  const double pic_weight = state->frame->poc != 0 ? encoder->gop_layer_weights[
-    encoder->cfg.gop[state->frame->gop_offset].layer - 1] : 1;
+  const double pic_weight = encoder->gop_layer_weights[
+    encoder->cfg.gop[state->frame->gop_offset].layer - 1];
  const double pic_target_bits =
    state->frame->cur_gop_target_bits * pic_weight - pic_header_bits(state);
  // Allocate at least 100 bits for each picture like HM does.
  return MAX(100, pic_target_bits);
 }

+
+static double solve_cubic_equation(const encoder_state_config_frame_t * const state,
+                            int ctu_index,
+                            int last_ctu,
+                            int layer,
+                            double est_lambda,
+                            double target_bits) 
+{
+  double bestlambda = 0.0;
+  double paraA = 0.0;
+  double paraB = 0.0;
+  double paraC = 0.0;
+  double paraD = 0.0;
+  double delta = 0.0;
+  double paraAA = 0.0;
+  double paraBB = 0.0;
+  double paraCC = 0.0;
+  for (int i = ctu_index; i < last_ctu; i++)
+  {
+    double a = 0.0;
+    double b = 0.0;
+    double c = 0.0;
+    double d = 0.0;
+    assert((state->new_lookahead.c_para[layer][i] <= 0) || (state->new_lookahead.k_para[layer][i] >= 0)); //Check C and K during each solution 
+
+    double CLCU = state->new_lookahead.c_para[layer][i];
+    double KLCU = state->new_lookahead.k_para[layer][i];
+    a = -CLCU * KLCU / pow(state->lcu_stats[i].pixels, KLCU - 1.0);
+    b = -1.0 / (KLCU - 1.0);
+    d = est_lambda;
+    c = pow(a / d, b);
+    paraA = paraA - c * pow(b, 3.0) / 6.0;
+    paraB = paraB + (pow(b, 2.0) / 2.0 + pow(b, 3.0)*log(d) / 2.0)*c;
+    paraC = paraC - (pow(b, 3.0) / 2.0*pow(log(d), 2.0) + pow(b, 2.0)*log(d) + b)*c;
+    paraD = paraD + c * (1 + b * log(d) + pow(b, 2.0) / 2 * pow(log(d), 2.0) + pow(b, 3.0) / 6 * pow(log(d), 3.0));
+  }
+
+  paraD = paraD - target_bits;
+  paraAA = paraB * paraB - 3 * paraA*paraC;
+  paraBB = paraB * paraC - 9 * paraA*paraD;
+  paraCC = paraC * paraC - 3 * paraB*paraD;
+
+  delta = paraBB * paraBB - 4 * paraAA*paraCC;
+
+  if (delta > 0.0)	//Check whether delta is right
+  {
+    double tempx = 0.0;
+    double part1 = 0.0;
+    double part2 = 0.0;
+    double flag1 = 0.0;
+    double flag2 = 0.0;
+    part1 = paraAA * paraB + 3 * paraA*(-paraBB - pow(delta, 0.5)) / 2.0;
+    part2 = paraAA * paraB + 3 * paraA*(-paraBB + pow(delta, 0.5)) / 2.0;
+    if (part1 < 0.0) {
+      part1 = -part1;
+      flag1 = -1.0;
+    }
+    else {
+      flag1 = 1.0;
+    }
+    if (part2 < 0.0) {
+      part2 = -part2;
+      flag2 = -1.0;
+    }
+    else {
+      flag2 = 1.0;
+    }
+    tempx = (-paraB - flag1 * pow(part1, 1.0 / 3.0) - flag2 * pow(part2, 1.0 / 3.0)) / 3 / paraA;
+    bestlambda = exp(tempx);
+  }
+  else {
+    bestlambda = est_lambda;		//Use the original picture estimated lambda for the current CTU
+  }
+  bestlambda = CLIP(0.001, 100000000.0, bestlambda);
+
+  return bestlambda;
+}
+
+static INLINE double calculate_weights(encoder_state_t* const state, const int layer, const int ctu_count, double estLambda) {
+  double total_weight = 0;
+  for(int i = 0; i < ctu_count; i++) {
+    double CLCU = state->frame->new_lookahead.c_para[layer][i];
+    double KLCU = state->frame->new_lookahead.k_para[layer][i];
+    double a = -CLCU * KLCU / pow(state->frame->lcu_stats[i].pixels, KLCU - 1.0);
+    double b = -1.0 / (KLCU - 1.0);
+    state->frame->lcu_stats[i].weight = pow(a / estLambda, b);
+    if (state->frame->lcu_stats[i].weight < 0.01) {
+      state->frame->lcu_stats[i].weight = 0.01;
+    }
+    total_weight += state->frame->lcu_stats[i].weight;
+  }
+  return total_weight;
+}
+
+void estimatePicLambda(encoder_state_t * const state) {
+  double bits = pic_allocate_bits(state);
+  const int layer = state->frame->gop_offset - (state->frame->is_irap ? 1 : 0);
+  const int ctu_count = state->tile->frame->height_in_lcu * state->tile->frame->width_in_lcu;
+
+  double alpha;
+  double beta;  
+  if(state->frame->poc == 0) {
+    alpha = state->frame->rc_alpha;
+    beta = state->frame->rc_beta;
+  }
+  else {
+    alpha = -state->frame->new_lookahead.pic_c_para[state->frame->gop_offset] *
+      state->frame->new_lookahead.pic_k_para[state->frame->gop_offset];
+    beta = state->frame->new_lookahead.pic_k_para[state->frame->gop_offset] - 1;
+  }
+  double estLambda;
+  double bpp = bits / (state->encoder_control->cfg.width * state->encoder_control->cfg.height);
+  if (state->frame->is_irap) {
+    // TODO: Intra
+    estLambda = alpha * pow(bpp, beta) * 0.5;
+  }
+  else {
+    estLambda = alpha * pow(bpp, beta);
+  }
+
+  double temp_lambda;
+  if ((temp_lambda = state->frame->new_lookahead.previous_lambdas[layer]) > 0.0) {
+    estLambda = CLIP(temp_lambda * pow(2.0, -1), temp_lambda * 2, estLambda);
+  }
+
+  if((temp_lambda = state->frame->new_lookahead.last_frame_lambda) > 0.0) {
+    estLambda = CLIP(temp_lambda * pow(2.0, -10.0 / 3.0), temp_lambda * pow(2.0, 10.0 / 3.0), estLambda);
+  }
+
+  estLambda = MIN(estLambda, 0.1);
+
+  double total_weight = 0;
+
+  if(!state->frame->is_irap) {
+    if(!state->encoder_control->cfg.frame_allocation) {
+      double best_lambda = 0.0;
+      temp_lambda = estLambda;
+      double taylor_e3;
+      int iteration_number = 0;
+      do {
+        taylor_e3 = 0.0;
+        best_lambda = temp_lambda = solve_cubic_equation(state->frame, 0, ctu_count, layer, temp_lambda, bits);
+        for (int i = 0; i < ctu_count; ++i) {
+          double CLCU = state->frame->new_lookahead.c_para[layer][i];
+          double KLCU = state->frame->new_lookahead.k_para[layer][i];
+          double a = -CLCU * KLCU / pow(state->frame->lcu_stats[i].pixels, KLCU - 1.0);
+          double b = -1.0 / (KLCU - 1.0);
+          taylor_e3 += pow(a / best_lambda, b);
+        }
+      }
+      while (fabs(taylor_e3 - bits) > 0.01 && iteration_number <= 11);
+    }
+    total_weight = calculate_weights(state, layer, ctu_count, estLambda);
+  }
+  else {
+    for (int i = 0; i < ctu_count; ++i) {
+      state->frame->lcu_stats[i].weight = MAX(0.01,
+        state->frame->lcu_stats[i].pixels * pow(estLambda / state->frame->rc_alpha,
+                                                1.0 / state->frame->rc_beta));
+      total_weight += state->frame->lcu_stats[i].weight;
+    }
+  }
+
+  for(int i = 0; i < ctu_count; ++i) {
+    state->frame->lcu_stats[i].weight = bits * state->frame->lcu_stats[i].weight / total_weight;
+  }
+
+  state->frame->lambda = estLambda;
+}
+
+
 static int8_t lambda_to_qp(const double lambda)
 {
  const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5;