Merge branch 'lowdelay_GOP'

Conflicts: README.md
2024-11-27 11:24:05 +00:00 · 2015-11-14 00:05:13 +02:00 · 2015-11-14 00:05:13 +02:00 · cfe834bb53
parent 0722f461c5 5ae97b46c6
commit cfe834bb53
8 changed files with 158 additions and 16 deletions
--- a/README.md
+++ b/README.md
@ -64,7 +64,10 @@ http://ultravideo.cs.tut.fi/#encoder for more information.
              --pu-depth-intra <int>-<int> : Range for sizes of intra prediction units to try.
                                         0: 64x64, 1: 32x32, 2: 16x16, 3: 8x8, 4: 4x4
              --no-info              : Don't add information about the encoder to settings.
-              --gop <int>            : Length of Group of Pictures, must be 8 or 0 [0]
+              --gop <string>         : Definition for GOP [0]
+                                         - 0 disabled
+                                         - 8 B-frame pyramid of length 8
+                                         - lp-gop syntax, defined below (example: g8d4r3t2)
              --bipred               : Enable bi-prediction search
              --bitrate <integer>    : Target bitrate. [0]
                                         0: disable rate-control
@ -126,13 +129,25 @@ http://ultravideo.cs.tut.fi/#encoder for more information.
           -w, --width               : Width of input in pixels
           -h, --height              : Height of input in pixels

-For example:
+
+###For example:

    kvazaar -i BQMall_832x480_60.yuv --input-res 832x480 -o out.hevc -n 600 -q 32

 The only accepted input format so far is 8-bit YUV 4:2:0.


+### LP-GOP syntax
+The LP-GOP syntax is "lp-g(num)d(num)r(num)t(num)", where
+- g = GOP length.
+- d = Number of GOP layers.
+- r = Number of references, where one reference is always the previous picture,
+  unless temporal scaling is used. The others are key-frames.
+- t = How many references to skip for temporal scaling, where 4 means only
+  every fourth picture needs to be decoded.
+
+
+
 ##Presets
 The names of the presets are the same as with x264: ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow and placebo. The effects of the presets are listed in the following table, where the names have been abreviated to fit the layout in GitHub.

--- a/src/config.c
+++ b/src/config.c
@ -565,8 +565,111 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
  else if OPT("info")
    cfg->add_encoder_info = atobool(value);
  else if OPT("gop") {
-    // TODO: Defining the whole GOP structure via parameters
-    if(atoi(value) == 8) {
+    if (!strncmp(value, "lp-", 3)) {  // Handle GOPs starting with "lp-".
+      struct {
+        unsigned g;  // length
+        unsigned d;  // depth
+        unsigned r;  // references 
+        unsigned t;  // temporal
+      } gop = { 0 };
+
+      if (sscanf(value, "lp-g%ud%ur%ut%u", &gop.g, &gop.d, &gop.r, &gop.t) != 4) {
+        fprintf(stderr, "Error in GOP syntax. Example: lp-g8d4r2t2\n");
+        return 0;
+      }
+
+      if (gop.g < 1 || gop.g > 32) {
+        fprintf(stderr, "gop.g must be between 1 and 32.\n");
+      }
+      if (gop.d < 1 || gop.d > 8) {
+        fprintf(stderr, "gop.d must be between 1 and 8.\n");
+      }
+      if (gop.r < 1 || gop.r > 15) {
+        fprintf(stderr, "gop.d must be between 1 and 15.\n");
+      }
+      if (gop.t < 1 || gop.t > 15) {
+        fprintf(stderr, "gop.t must be between 1 and 32.\n");
+      }
+      
+      // Initialize modulos for testing depth.
+      // The picture belong to the lowest depth in which (poc % modulo) == 0.
+      unsigned depth_modulos[8] = { 0 };
+      for (int d = 0; d < gop.d; ++d) {
+        depth_modulos[gop.d - 1 - d] = 1 << d;
+      }
+      depth_modulos[0] = gop.g;
+
+      cfg->gop_lowdelay = 1;
+      cfg->gop_len = gop.g;
+      for (int g = 1; g <= gop.g; ++g) {
+        kvz_gop_config *gop_pic = &cfg->gop[g - 1];
+
+        // Find gop depth for picture.
+        int gop_layer = 0;
+        while (gop_layer < gop.d && (g % depth_modulos[gop_layer])) {
+          ++gop_layer;
+        }
+
+        gop_pic->poc_offset = g;
+        gop_pic->layer = gop_layer + 1;
+        gop_pic->qp_offset = gop_layer + 1;
+        gop_pic->ref_pos_count = 0;
+        gop_pic->ref_neg_count = gop.r;
+        gop_pic->is_ref = 0;
+
+        // Set first ref to point to previous frame, and the rest to previous
+        // key-frames.
+        // If gop.t > 1, have (poc % gop.t) == 0 point gop.t frames away,
+        // instead of the previous frame. Set the frames in between to
+        // point to the nearest frame with a lower gop-depth.
+        if (gop.t > 1) {
+          if (gop_pic->poc_offset % gop.t == 0) {
+            gop_pic->ref_neg[0] = gop.t;
+          } else {
+            int r = gop_pic->poc_offset - 1;
+            while (r > 0) {
+              if (cfg->gop[r].layer < gop_pic->layer) break;
+              --r;
+            }
+            // Var r is now 0 or index of the pic with layer < depth.
+            if (cfg->gop[r].layer < gop_pic->layer) {
+              gop_pic->ref_neg[0] = gop_pic->poc_offset - cfg->gop[r].poc_offset;
+              cfg->gop[r].is_ref = 1;
+            } else {
+              // No ref was found, just refer to the previous key-frame.
+              gop_pic->ref_neg[0] = gop_pic->poc_offset % gop.g;
+            }
+          }
+        } else {
+          gop_pic->ref_neg[0] = 1;
+          if (gop_pic->poc_offset >= 2) {
+            cfg->gop[gop_pic->poc_offset - 2].is_ref = 1;
+          }
+        }
+
+        int keyframe = gop_pic->poc_offset % gop.g;
+        for (int i = 1; i < gop_pic->ref_neg_count; ++i) {
+          while (keyframe == gop_pic->ref_neg[i - 1]) {
+            keyframe += gop.g;
+          }
+          gop_pic->ref_neg[i] = keyframe;
+        }
+
+        gop_pic->qp_factor = 0.4624;  // from HM
+      }
+
+      for (int g = 0; g < gop.g; ++g) {
+        kvz_gop_config *gop_pic = &cfg->gop[g];
+        if (!gop_pic->is_ref) {
+          gop_pic->qp_factor = 0.68 * 1.31;  // derived from HM
+        }
+      }
+
+      // Key-frame is always a reference.
+      cfg->gop[gop.g - 1].is_ref = 1;
+      cfg->gop[gop.g - 1].qp_factor = 0.578;  // from HM
+    } else if (atoi(value) == 8) {
+      cfg->gop_lowdelay = 0;
      // GOP
      cfg->gop_len = 8;
      cfg->gop[0].poc_offset = 8; cfg->gop[0].qp_offset = 1; cfg->gop[0].layer = 1; cfg->gop[0].qp_factor = 0.442;  cfg->gop[0].is_ref = 1;
--- a/src/encoder.c
+++ b/src/encoder.c
@ -547,6 +547,7 @@ static int encoder_control_init_gop_layer_weights(encoder_control_t * const enco
    case 0:
      break;

+    case 3:
    case 4:
      // These weights were copied from http://doi.org/10.1109/TIP.2014.2336550
      if (encoder->target_avg_bpp <= 0.05) {
--- a/src/encoder_state-bitstream.c
+++ b/src/encoder_state-bitstream.c
@ -329,8 +329,13 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
  WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag");

  //for each layer
+  if (encoder->cfg->gop_lowdelay) {
+    WRITE_UE(stream, encoder->cfg->ref_frames, "sps_max_dec_pic_buffering");
+    WRITE_UE(stream, 0, "sps_num_reorder_pics");
+  } else {
    WRITE_UE(stream, encoder->cfg->ref_frames + encoder->cfg->gop_len, "sps_max_dec_pic_buffering");
    WRITE_UE(stream, encoder->cfg->gop_len, "sps_num_reorder_pics");
+  }
  WRITE_UE(stream, 0, "sps_max_latency_increase");
  //end for

--- a/src/encoderstate.c
+++ b/src/encoderstate.c
@ -788,6 +788,12 @@ static void encoder_state_new_frame(encoder_state_t * const state) {
      state->global->pictype = KVZ_NAL_IDR_W_RADL;
    } else {
      state->global->slicetype = encoder->cfg->intra_period==1 ? KVZ_SLICE_I : (state->encoder_control->cfg->gop_len?KVZ_SLICE_B:KVZ_SLICE_P);
+
+      // Use P-slice for lowdelay.
+      if (state->global->slicetype == KVZ_SLICE_B && encoder->cfg->gop_lowdelay) {
+        state->global->slicetype = KVZ_SLICE_P;
+      }
+
      state->global->pictype = KVZ_NAL_TRAIL_R;
      if (state->encoder_control->cfg->gop_len) {
        if (encoder->cfg->intra_period > 1 && (state->global->poc % encoder->cfg->intra_period) == 0) {
--- a/src/input_frame_buffer.c
+++ b/src/input_frame_buffer.c
@ -64,7 +64,7 @@ int kvz_encoder_feed_frame(input_frame_buffer_t *buf,
  assert(frame->source == NULL);
  assert(frame->rec    != NULL);

-  if (cfg->gop_len == 0) {
+  if (cfg->gop_len == 0 || cfg->gop_lowdelay) {
    // GOP disabled, just return the input frame.

    if (img_in == NULL) return 0;
@ -73,7 +73,7 @@ int kvz_encoder_feed_frame(input_frame_buffer_t *buf,
    frame->source   = kvz_image_copy_ref(img_in);
    frame->rec->pts = img_in->pts;
    frame->rec->dts = img_in->dts;
-    state->global->gop_offset = 0;
+    state->global->gop_offset = cfg->gop_lowdelay ? (state->global->frame-1) % cfg->gop_len : 0;
    return 1;
  }

--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -172,6 +172,7 @@ typedef struct kvz_config

  int32_t add_encoder_info;
  int8_t gop_len;            /*!< \brief length of GOP for the video sequence */
+  int8_t gop_lowdelay;       /*!< \brief specifies that the GOP does not use future pictures */
  kvz_gop_config gop[KVZ_MAX_GOP_LENGTH];  /*!< \brief Array of GOP settings */

  int32_t target_bitrate;
--- a/src/rate_control.c
+++ b/src/rate_control.c
@ -157,19 +157,30 @@ int8_t kvz_lambda_to_QP(const double lambda)
 double kvz_select_picture_lambda_from_qp(encoder_state_t const * const state)
 {
  const int gop_len = state->encoder_control->cfg->gop_len;
-  const double qp_temp = state->global->QP - 12;
+  const int intra_period = state->encoder_control->cfg->intra_period;
+  const int keyframe_period = gop_len > 0 ? gop_len : intra_period;
+  
+  double lambda = pow(2.0, (state->global->QP - 12) / 3.0);

-  double qp_factor;
  if (state->global->slicetype == KVZ_SLICE_I) {
-    const double lambda_scale = 1.0 - CLIP(0.0, 0.5, 0.05 * gop_len);
-    qp_factor = 0.57 * lambda_scale;
-  } else if (gop_len > 0) {
-    qp_factor = 0.95 * state->global->QP_factor;
+    lambda *= 0.57;
+    
+    // Reduce lambda for I-frames according to the number of references.
+    if (keyframe_period == 0) {
+      lambda *= 0.5;
    } else {
-    // default QP factor from HM config
-    qp_factor = 0.95 * 0.4624;
+      lambda *= 1.0 - CLIP(0.0, 0.5, 0.05 * (keyframe_period - 1));
+    }
+  } else if (gop_len > 0) {
+    lambda *= state->global->QP_factor;
+  } else {
+    lambda *= 0.4624;
+  }
+
+  // Increase lambda if not key-frame.
+  if (keyframe_period > 0 && state->global->poc % keyframe_period != 0) {
+    lambda *= CLIP(2.0, 4.0, (state->global->QP - 12) / 6.0);
  }
  
-  const double lambda = qp_factor * pow(2.0, qp_temp / 3.0);
  return lambda;
 }