Merge branch 'TMVP'

2024-11-27 19:24:06 +00:00 · 2016-08-10 15:07:37 +03:00 · 2016-08-10 15:07:37 +03:00 · cdcbfb38aa
parent 25eb973c38 e61c9469a0
commit cdcbfb38aa
11 changed files with 232 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -105,6 +105,7 @@ Optional parameters:
                                     "on": Early termination is on
                                     "sensitive": Sensitive early termination is on
          --lossless             : Use lossless coding
+          --no-tmvp              : Disable Temporal Motion Vector Prediction

  Video Usability Information:
          --sar <width:height>   : Specify Sample Aspect Ratio
--- a/configure.ac
+++ b/configure.ac
@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
 #
 # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
 ver_major=3
-ver_minor=10
+ver_minor=11
 ver_release=0

 # Prevents configure from adding a lot of defines to the CFLAGS
--- a/doc/kvazaar.1
+++ b/doc/kvazaar.1
@ -175,6 +175,9 @@ Specify the me early termination behaviour
 .TP
 \fB\-\-lossless            
 Use lossless coding
+.TP
+\fB\-\-no\-tmvp            
+Disable Temporal Motion Vector Prediction

 .SS "Video Usability Information:"
 .TP
--- a/src/cfg.c
+++ b/src/cfg.c
@ -81,6 +81,7 @@ int kvz_config_init(kvz_config *cfg)
  cfg->target_bitrate  = 0;
  cfg->hash            = KVZ_HASH_CHECKSUM;
  cfg->lossless        = false;
+  cfg->tmvp_enable     = true;

  cfg->cu_split_termination = KVZ_CU_SPLIT_TERMINATION_ZERO;

@ -603,10 +604,22 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
    cfg->aud_enable = atobool(value);
  else if OPT("cqmfile")
    cfg->cqmfile = strdup(value);
-  else if OPT("tiles-width-split")
-    return parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split);
-  else if OPT("tiles-height-split")
-    return parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split);
+  else if OPT("tiles-width-split") {
+    int retval = parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split);
+    if (cfg->tiles_width_count > 1 && cfg->tmvp_enable) {
+      cfg->tmvp_enable = false;
+      fprintf(stderr, "Disabling TMVP because tiles are used.\n");
+    }
+    return retval;
+  }
+  else if OPT("tiles-height-split") {
+    int retval = parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split);
+    if (cfg->tiles_height_count > 1 && cfg->tmvp_enable) {
+      cfg->tmvp_enable = false;
+      fprintf(stderr, "Disabling TMVP because tiles are used.\n");
+    }
+    return retval;
+  }
  else if OPT("tiles")
  {
    // A simpler interface for setting tiles, accepting only uniform split.
@ -631,6 +644,12 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
    FREE_POINTER(cfg->tiles_height_split);
    cfg->tiles_width_count = width;
    cfg->tiles_height_count = height;
+
+    if (cfg->tmvp_enable) {
+      cfg->tmvp_enable = false;
+      fprintf(stderr, "Disabling TMVP because tiles are used.\n");
+    }
+
    return 1;
  }
  else if OPT("wpp")
@ -797,6 +816,10 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
      fprintf(stderr, "Input error: unsupported gop length, must be 0 or 8\n");
      return 0;
    }
+    if (cfg->gop_len && cfg->tmvp_enable) {
+      cfg->tmvp_enable = false;
+      fprintf(stderr, "Disabling TMVP because GOP is used.\n");
+    }
  }
  else if OPT("bipred")
    cfg->bipred = atobool(value);
@ -910,6 +933,17 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
  }
  else if OPT("lossless")
    cfg->lossless = (bool)atobool(value);
+  else if OPT("tmvp") {
+    cfg->tmvp_enable = atobool(value);
+    if (cfg->gop_len && cfg->tmvp_enable) {
+      fprintf(stderr, "Cannot enable TMVP because GOP is used.\n");
+      cfg->tmvp_enable = false;
+    }
+    if (cfg->tiles_width_count > 1 || cfg->tiles_height_count > 1) {
+      fprintf(stderr, "Cannot enable TMVP because tiles are used.\n");
+      cfg->tmvp_enable = false;
+    }
+  }
  else
    return 0;
 #undef OPT
--- a/src/cli.c
+++ b/src/cli.c
@ -110,6 +110,8 @@ static const struct option long_options[] = {
  { "me-early-termination",required_argument, NULL, 0 },
  { "lossless",                 no_argument, NULL, 0 },
  { "no-lossless",              no_argument, NULL, 0 },
+  { "tmvp",                     no_argument, NULL, 0 },
+  { "no-tmvp",                  no_argument, NULL, 0 },
  {0, 0, 0, 0}
 };

@ -391,6 +393,7 @@ void print_help(void)
    "                                     \"on\": Early termination is on\n"
    "                                     \"sensitive\": Sensitive early termination is on\n"
    "          --lossless             : Use lossless coding\n"
+    "          --no-tmvp              : Disable Temporal Motion Vector Prediction\n"
    "\n"
    "  Video Usability Information:\n"
    "          --sar <width:height>   : Specify Sample Aspect Ratio\n"
--- a/src/encoder_state-bitstream.c
+++ b/src/encoder_state-bitstream.c
@ -393,7 +393,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
  //IF long_term_ref_pics_present
  //ENDIF

-  WRITE_U(stream, ENABLE_TEMPORAL_MVP, 1,
+  WRITE_U(stream, state->encoder_control->cfg->tmvp_enable, 1,
          "sps_temporal_mvp_enable_flag");
  WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag");
  WRITE_U(stream, 1, 1, "vui_parameters_present_flag");
@ -747,10 +747,16 @@ void kvz_encoder_state_write_bitstream_slice_header(encoder_state_t * const stat
      WRITE_U(stream, 1, 1, "used_by_curr_pic_s1_flag");
    }
    //WRITE_UE(stream, 0, "short_term_ref_pic_set_idx");
+    
+    if (state->encoder_control->cfg->tmvp_enable) {
+      WRITE_U(stream, 1, 1, "slice_temporal_mvp_enabled_flag");
+    }
  }

    //end if
  //end if
+
+
  if (encoder->sao_enable) {
    WRITE_U(stream, 1, 1, "slice_sao_luma_flag");
    WRITE_U(stream, 1, 1, "slice_sao_chroma_flag");
@ -763,6 +769,12 @@ void kvz_encoder_state_write_bitstream_slice_header(encoder_state_t * const stat
          WRITE_UE(stream, ref_positive != 0 ? ref_positive - 1 : 0, "num_ref_idx_l1_active_minus1");
          WRITE_U(stream, 0, 1, "mvd_l1_zero_flag");
        }
+
+      // ToDo: handle B-frames with TMVP
+      if (state->encoder_control->cfg->tmvp_enable && ref_negative > 1) {
+        WRITE_UE(stream, 0, "collocated_ref_idx");
+      }
+
      WRITE_UE(stream, 5-MRG_MAX_NUM_CANDS, "five_minus_max_num_merge_cand");
  }

--- a/src/encoderstate.c
+++ b/src/encoderstate.c
@ -982,6 +982,10 @@ void kvz_encoder_prepare(encoder_state_t *state)
                   prev_state->tile->frame->rec,
                   prev_state->tile->frame->cu_array,
                   prev_state->frame->poc);
+    kvz_cu_array_free(state->tile->frame->cu_array);
+    unsigned height = state->tile->frame->height_in_lcu * LCU_WIDTH;
+    unsigned width  = state->tile->frame->width_in_lcu  * LCU_WIDTH;
+    state->tile->frame->cu_array = kvz_cu_array_alloc(width, height);
  }

  // Remove source and reconstructed picture.
--- a/src/global.h
+++ b/src/global.h
@ -124,9 +124,6 @@ typedef int16_t coeff_t;
 //! spec: pcm_enabled_flag, Setting to 1 will enable using PCM blocks (current intra-search does not consider PCM)
 #define ENABLE_PCM 0

-//! Enable usage of temporal Motion Vector Prediction
-#define ENABLE_TEMPORAL_MVP 0
-
 //! skip residual coding when it's under _some_ threshold
 #define OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD 0

--- a/src/inter.c
+++ b/src/inter.c
@ -22,6 +22,7 @@

 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>

 #include "encoder.h"
 #include "imagelist.h"
@ -644,6 +645,89 @@ static bool is_b0_cand_coded(int x, int y, int width, int height)
  return true;
 }

+
+/**
+* \brief Get merge candidates for current block
+* \param encoder encoder control struct to use
+* \param x block x position in SCU
+* \param y block y position in SCU
+* \param width current block width
+* \param height current block height
+* \param H candidate H
+* \param C1 candidate C1
+*/
+static void kvz_inter_get_temporal_merge_candidates(const encoder_state_t * const state,
+                                             int32_t x,
+                                             int32_t y,
+                                             int32_t width,
+                                             int32_t height,
+                                             cu_info_t **C3,
+                                             cu_info_t **H) {
+  /*
+  Predictor block locations
+  _________
+  |CurrentPU|
+  | |C0|__  |
+  |    |C3| |
+  |_________|_
+            |H|
+  */
+
+  *C3 = NULL;
+  *H  = NULL;
+
+  // Find temporal reference, closest POC
+  if (state->frame->ref->used_size) {
+    uint32_t poc_diff = UINT_MAX;
+    int32_t closest_ref = 0;
+
+    for (int temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) {
+      int td = state->frame->poc - state->frame->ref->pocs[temporal_cand];
+
+      td = td < 0 ? -td : td;
+      if (td < poc_diff) {
+        closest_ref = temporal_cand;
+        poc_diff = td;
+      }
+    }
+
+    cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[closest_ref];
+    int cu_per_width = ref_cu_array->width / SCU_WIDTH;
+
+    uint32_t xColBr = x + width;
+    uint32_t yColBr = y + height;
+
+    // H must be available
+    if (xColBr < state->encoder_control->in.width &&
+        yColBr < state->encoder_control->in.height) {
+      int32_t H_offset = -1;
+
+      // Y inside the current CTU / LCU
+      if (yColBr % LCU_WIDTH != 0) {
+        H_offset = ((xColBr >> 4) << 4) / SCU_WIDTH +
+                  (((yColBr >> 4) << 4) / SCU_WIDTH) * cu_per_width;
+      }
+
+      if (H_offset >= 0) {
+        // Only use when it's inter block
+        if (ref_cu_array->data[H_offset].type == CU_INTER) {
+          *H = &ref_cu_array->data[H_offset];
+        }
+      }
+    }
+    uint32_t xColCtr = x + (width / 2);
+    uint32_t yColCtr = y + (height / 2);
+
+    // C3 must be inside the LCU, in the center position of current CU
+    if (xColCtr < state->encoder_control->in.width && yColCtr < state->encoder_control->in.height) {
+      uint32_t C3_offset = ((xColCtr >> 4) << 4) / SCU_WIDTH + ((((yColCtr >> 4) << 4) / SCU_WIDTH) * cu_per_width);
+      if (ref_cu_array->data[C3_offset].type == CU_INTER) {
+        *C3 = &ref_cu_array->data[C3_offset];
+      }
+    }
+  }
+}
+
 /**
 * \brief Get merge candidates for current block.
 *
@ -840,11 +924,17 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
 * \brief Pick two mv candidates from the spatial candidates.
 */
 static void get_mv_cand_from_spatial(const encoder_state_t * const state,
+                                     int32_t x,
+                                     int32_t y,
+                                     int32_t width,
+                                     int32_t height,
                                     const cu_info_t *b0,
                                     const cu_info_t *b1,
                                     const cu_info_t *b2,
                                     const cu_info_t *a0,
                                     const cu_info_t *a1,
+                                     const cu_info_t *c3,
+                                     const cu_info_t *h,
                                     const cu_info_t *cur_cu,
                                     int8_t reflist,
                                     int16_t mv_cand[2][2])
@ -1000,11 +1090,44 @@ static void get_mv_cand_from_spatial(const encoder_state_t * const state,
    candidates = 1;
  }

-#if ENABLE_TEMPORAL_MVP
-  if(candidates < AMVP_MAX_NUM_CANDS) {
-    //TODO: add temporal mv predictor
+  if (state->encoder_control->cfg->tmvp_enable) {
+    /*
+    Predictor block locations
+    _________
+    |CurrentPU|
+    | |C0|__  |
+    |    |C3| |
+    |_________|_
+    |H|
+    */
+
+    // Find temporal reference, closest POC
+    if (state->frame->poc > 1 && state->frame->ref->used_size && candidates < AMVP_MAX_NUM_CANDS) {
+      uint32_t poc_diff = UINT_MAX;
+
+      for (int temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) {
+        int td = state->frame->poc - state->frame->ref->pocs[temporal_cand];
+        td = td < 0 ? -td : td;
+        if (td < poc_diff) {
+          poc_diff = td;
+        }
+      }
+
+      const cu_info_t *selected_CU = (h != NULL) ? h : (c3 != NULL) ? c3 : NULL;
+
+      if (selected_CU) {
+        int td = selected_CU->inter.mv_ref[reflist] + 1;
+        int tb = cur_cu->inter.mv_ref[reflist] + 1;
+
+        int scale = CALCULATE_SCALE(NULL, tb, td);
+        mv_cand[candidates][0] = ((scale * selected_CU->inter.mv[0][0] + 127 + (scale * selected_CU->inter.mv[0][0] < 0)) >> 8);
+        mv_cand[candidates][1] = ((scale * selected_CU->inter.mv[0][1] + 127 + (scale * selected_CU->inter.mv[0][1] < 0)) >> 8);
+
+        candidates++;
+      }
+#undef CALCULATE_SCALE
+    }
  }
-#endif

  // Fill with (0,0)
  while (candidates < AMVP_MAX_NUM_CANDS) {
@ -1039,12 +1162,13 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
                           lcu_t *lcu,
                           int8_t reflist)
 {
-  cu_info_t *b0, *b1, *b2, *a0, *a1;
-  b0 = b1 = b2 = a0 = a1 = NULL;
+  cu_info_t *b0, *b1, *b2, *a0, *a1, *c3, *h;
+  b0 = b1 = b2 = a0 = a1 = c3 = h = NULL;
  get_spatial_merge_candidates(x, y, width, height,
                               state->tile->frame->width, state->tile->frame->height,
                               &b0, &b1, &b2, &a0, &a1, lcu);
-  get_mv_cand_from_spatial(state, b0, b1, b2, a0, a1, cur_cu, reflist, mv_cand);
+  kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3, &h);
+  get_mv_cand_from_spatial(state, x, y, width, height, b0, b1, b2, a0, a1, c3, h, cur_cu, reflist, mv_cand);
 }

 /**
@ -1069,14 +1193,16 @@ void kvz_inter_get_mv_cand_cua(const encoder_state_t * const state,
                               int8_t reflist)
 {
  const cu_info_t *b0, *b1, *b2, *a0, *a1;
-  b0 = b1 = b2 = a0 = a1 = NULL;
-
+  cu_info_t *c3, *h;
+  b0 = b1 = b2 = a0 = a1 = c3 = h = NULL;
+  
  const cu_array_t *cua = state->tile->frame->cu_array;
  get_spatial_merge_candidates_cua(cua,
                                   x, y, width, height,
                                   state->tile->frame->width, state->tile->frame->height,
                                   &b0, &b1, &b2, &a0, &a1);
-  get_mv_cand_from_spatial(state, b0, b1, b2, a0, a1, cur_cu, reflist, mv_cand);
+  kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3, &h);
+  get_mv_cand_from_spatial(state, x, y, width, height, b0, b1, b2, a0, a1, c3, h, cur_cu, reflist, mv_cand);
 }

 /**
@ -1195,12 +1321,39 @@ uint8_t kvz_inter_get_merge_cand(const encoder_state_t * const state,
      }
    }
  }
+  
+  if (state->encoder_control->cfg->tmvp_enable) {
+#define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6)

-#if ENABLE_TEMPORAL_MVP
-  if(candidates < AMVP_MAX_NUM_CANDS) {
-    //TODO: add temporal mv predictor
+    if (candidates < MRG_MAX_NUM_CANDS && state->frame->ref->used_size) {
+
+      cu_info_t *c3 = NULL;
+      cu_info_t *h = NULL;
+
+      kvz_inter_get_temporal_merge_candidates(state, x, y, width, height, &c3, &h);
+
+      const cu_info_t *selected_CU = (h != NULL) ? h : (c3 != NULL) ? c3 : NULL;
+
+      if (selected_CU) {
+        int td = selected_CU->inter.mv_ref[0] + 1;
+        int tb = 1;
+
+        int scale = CALCULATE_SCALE(NULL, tb, td);
+        mv_cand[candidates].mv[0][0] = ((scale * selected_CU->inter.mv[0][0] + 127 + (scale * selected_CU->inter.mv[0][0] < 0)) >> 8);
+        mv_cand[candidates].mv[0][1] = ((scale * selected_CU->inter.mv[0][1] + 127 + (scale * selected_CU->inter.mv[0][1] < 0)) >> 8);
+
+        /*
+        ToDo: temporal prediction in B-pictures
+        mv_cand[candidates].mv[1][0] = selected_CU->inter.mv[1][0];
+        mv_cand[candidates].mv[1][1] = selected_CU->inter.mv[1][1];
+        */
+        mv_cand[candidates].dir = selected_CU->inter.mv_dir;
+        mv_cand[candidates].ref[0] = 0;
+        candidates++;
+      }
+    }
+#undef CALCULATE_SCALE
  }
-#endif

  if (candidates < MRG_MAX_NUM_CANDS && state->frame->slicetype == KVZ_SLICE_B) {
    #define NUM_PRIORITY_LIST 12;
--- a/src/inter.h
+++ b/src/inter.h
@ -61,19 +61,6 @@ void kvz_inter_recon_lcu_bipred(const encoder_state_t * const state,
                                int16_t mv_param[2][2],
                                lcu_t* lcu);

-void kvz_inter_get_spatial_merge_candidates(int32_t x,
-                                            int32_t y,
-                                            int32_t width,
-                                            int32_t height,
-                                            int32_t picture_width,
-                                            int32_t picture_height,
-                                            cu_info_t **b0,
-                                            cu_info_t **b1,
-                                            cu_info_t **b2,
-                                            cu_info_t **a0,
-                                            cu_info_t **a1,
-                                            lcu_t *lcu);
-
 void kvz_inter_get_mv_cand(const encoder_state_t * const state,
                           int32_t x,
                           int32_t y,
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -278,6 +278,8 @@ typedef struct kvz_config

  int32_t lossless; /*!< \brief Use lossless coding. */

+  int32_t tmvp_enable; /*!> \brief Use Temporal Motion Vector Predictors. */
+
 } kvz_config;

 /**