From 48ccc268395da2c35a5b7fbcd059da00a4408160 Mon Sep 17 00:00:00 2001
From: Ari Koivula <ari@koivu.la>
Date: Tue, 16 Aug 2016 19:03:21 +0300
Subject: [PATCH] Add --input-format and --input-bitdepth

Adds reading of 10 bit input for 10-bit encoding.
---
 configure.ac                  |  2 +-
 src/cfg.c                     | 29 ++++++++++++++
 src/cli.c                     |  4 ++
 src/encmain.c                 | 22 +++++++++--
 src/encoder.c                 |  5 ++-
 src/encoder.h                 |  6 +--
 src/encoder_state-bitstream.c |  6 +--
 src/kvazaar.h                 | 30 ++++++++++++++
 src/yuv_io.c                  | 73 ++++++++++++++++++++++++-----------
 src/yuv_io.h                  |  1 +
 10 files changed, 143 insertions(+), 35 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5152ea7f..5d9e5ee1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
 #
 # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
 ver_major=3
-ver_minor=11
+ver_minor=12
 ver_release=0
 
 # Prevents configure from adding a lot of defines to the CFLAGS
diff --git a/src/cfg.c b/src/cfg.c
index a24e9746..4cbf55a7 100644
--- a/src/cfg.c
+++ b/src/cfg.c
@@ -115,6 +115,9 @@ int kvz_config_init(kvz_config *cfg)
 
   cfg->rdoq_skip = 0;
 
+  cfg->input_format = KVZ_FORMAT_P420;
+  cfg->input_bitdepth = 8;
+
   return 1;
 }
 
@@ -949,6 +952,32 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
   else if OPT("rdoq-skip"){
     cfg->rdoq_skip = atobool(value);
   }
+  else if OPT("input-format") {
+    static enum kvz_input_format const formats[] = { KVZ_FORMAT_P400, KVZ_FORMAT_P420 };
+    static const char * const format_names[] = { "P400", "P420", NULL };
+
+    int8_t format = 0;
+    if (!parse_enum(value, format_names, &format)) {
+      fprintf(stderr, "input-format not recognized.\n");
+      return 0;
+    }
+
+    cfg->input_format = formats[format];
+  }
+  else if OPT("input-bitdepth") {
+    cfg->input_bitdepth = atoi(value);
+    if (cfg->input_bitdepth < 8 || cfg->input_bitdepth > 16) {
+      fprintf(stderr, "input-bitdepth not between 8 and 16.\n");
+      return 0;
+    }
+    if (cfg->input_bitdepth > 8 && KVZ_BIT_DEPTH == 8) {
+      // Because the image is read straight into the reference buffers,
+      // reading >8 bit samples doesn't work when sizeof(kvz_pixel)==1.
+      fprintf(stderr, "input-bitdepth can't be set to larger than 8 because"
+                      " Kvazaar is compiled with KVZ_BIT_DEPTH=8.\n");
+      return 0;
+    }
+  }
   else
     return 0;
 #undef OPT
diff --git a/src/cli.c b/src/cli.c
index de839b85..436c2a8f 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -113,6 +113,8 @@ static const struct option long_options[] = {
   { "tmvp",                     no_argument, NULL, 0 },
   { "no-tmvp",                  no_argument, NULL, 0 },
   { "rdoq-skip",                no_argument, NULL, 0 },
+  { "input-bitdepth",     required_argument, NULL, 0 },
+  { "input-format",       required_argument, NULL, 0 },
   {0, 0, 0, 0}
 };
 
@@ -396,6 +398,8 @@ void print_help(void)
     "          --lossless             : Use lossless coding\n"
     "          --no-tmvp              : Disable Temporal Motion Vector Prediction\n"
     "          --rdoq-skip            : Skips unnecessary rdoq\n"
+    "          --input-format         : P420 or P400\n"
+    "          --input-bitdepth       : 8-16\n"
     "\n"
     "  Video Usability Information:\n"
     "          --sar <width:height>   : Specify Sample Aspect Ratio\n"
diff --git a/src/encmain.c b/src/encmain.c
index bdcd7637..362859e0 100644
--- a/src/encmain.c
+++ b/src/encmain.c
@@ -187,20 +187,36 @@ static void* input_read_thread(void* in_args)
       goto done;
     }
 
-    if (!yuv_io_read(args->input, args->opts->config->width, args->opts->config->height, frame_in)) {
+    bool read_success = yuv_io_read(args->input, 
+                                    args->opts->config->width,
+                                    args->opts->config->height,
+                                    args->encoder->cfg->input_bitdepth,
+                                    args->encoder->bitdepth,
+                                    frame_in);
+    if (!read_success) {
       // reading failed
       if (feof(args->input)) {
         // When looping input, re-open the file and re-read data.
         if (args->opts->loop_input && args->input != stdin) {
           fclose(args->input);
           args->input = fopen(args->opts->input, "rb");
-          if (args->input == NULL ||
-              !yuv_io_read(args->input, args->opts->config->width, args->opts->config->height, frame_in))
+          if (args->input == NULL)
           {
             fprintf(stderr, "Could not re-open input file, shutting down!\n");
             retval = RETVAL_FAILURE;
             goto done;
           }
+          bool read_success = yuv_io_read(args->input,
+                                          args->opts->config->width,
+                                          args->opts->config->height,
+                                          args->encoder->cfg->input_bitdepth,
+                                          args->encoder->bitdepth,
+                                          frame_in);
+          if (!read_success) {
+            fprintf(stderr, "Could not re-open input file, shutting down!\n");
+            retval = RETVAL_FAILURE;
+            goto done;
+          }
         } else {
           retval = RETVAL_EOF;
           goto done;
diff --git a/src/encoder.c b/src/encoder.c
index ecafed8e..3a90e315 100644
--- a/src/encoder.c
+++ b/src/encoder.c
@@ -128,6 +128,8 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) {
 
   encoder->bitdepth = KVZ_BIT_DEPTH;
 
+  encoder->chroma_format = KVZ_FORMAT2CSP(cfg->input_format);
+
   // deblocking filter
   encoder->deblock_enable    = 1;
   encoder->beta_offset_div2  = 0;
@@ -394,7 +396,7 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) {
   encoder->pu_depth_intra.max = cfg->pu_depth_intra.max;
 
   // input init (TODO: read from commandline / config)  
-  encoder->in.video_format    = FORMAT_420;
+  encoder->chroma_format      = KVZ_CSP_420;
 
   // deblocking filter
   encoder->deblock_enable     = (int8_t)  (encoder->cfg->deblock_enable &&
@@ -496,7 +498,6 @@ void kvz_encoder_control_input_init(encoder_control_t * const encoder,
   encoder->in.height = height;
   encoder->in.real_width = width;
   encoder->in.real_height = height;
-  encoder->in.bitdepth = encoder->bitdepth;
 
   // If input dimensions are not divisible by the smallest block size, add
   // pixels to the dimensions, so that they are. These extra pixels will be
diff --git a/src/encoder.h b/src/encoder.h
index 7be95eaf..ee9909bd 100644
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -32,8 +32,6 @@
 #include "threadqueue.h"
 
 
-enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 };
-
 /* Encoder control options, the main struct */
 typedef struct encoder_control_t
 {
@@ -48,8 +46,6 @@ typedef struct encoder_control_t
     int32_t height_in_lcu;
     int32_t real_width;  /*!< \brief real input picture width */
     int32_t real_height; /*!< \brief real input picture width */
-    int8_t video_format;
-    int8_t bitdepth;  /*!< \brief input bit depth (8,10) */
     int64_t pixels_per_pic;
     int8_t source_scan_type;
   } in;
@@ -62,6 +58,8 @@ typedef struct encoder_control_t
   } me;
   
   int8_t bitdepth;
+  enum kvz_chroma_format chroma_format;
+
   int8_t tr_depth_intra;
 
   int8_t fme_level;
diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c
index 4c938165..e2d5aaae 100644
--- a/src/encoder_state-bitstream.c
+++ b/src/encoder_state-bitstream.c
@@ -243,7 +243,7 @@ static void encoder_state_write_bitstream_VUI(bitstream_t *stream,
       encoder->vui.colorprim != 2 || encoder->vui.transfer != 2 ||
       encoder->vui.colormatrix != 2) {
     WRITE_U(stream, 1, 1, "video_signal_type_present_flag");
-    WRITE_U(stream, encoder->vui.videoformat, 3, "video_format");
+    WRITE_U(stream, encoder->vui.videoformat, 3, "chroma_format");
     WRITE_U(stream, encoder->vui.fullrange, 1, "video_full_range_flag");
 
     if (encoder->vui.colorprim != 2 || encoder->vui.transfer != 2 ||
@@ -310,10 +310,10 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
   encoder_state_write_bitstream_PTL(stream, state);
 
   WRITE_UE(stream, 0, "sps_seq_parameter_set_id");
-  WRITE_UE(stream, encoder->in.video_format,
+  WRITE_UE(stream, encoder->chroma_format,
            "chroma_format_idc");
 
-  if (encoder->in.video_format == 3) {
+  if (encoder->chroma_format == 3) {
     WRITE_U(stream, 0, 1, "separate_colour_plane_flag");
   }
 
diff --git a/src/kvazaar.h b/src/kvazaar.h
index 7c65a65c..d228825b 100644
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@@ -163,6 +163,34 @@ enum kvz_me_early_termination
   KVZ_ME_EARLY_TERMINATION_SENSITIVE = 2
 };
 
+
+/**
+ * \brief Format the pixels are read in.
+ * This is separate from chroma subsampling, because we might want to read
+ * interleaved formats in the future.
+ * \since 3.12.0
+ */
+enum kvz_input_format {
+  KVZ_FORMAT_P400 = 0,
+  KVZ_FORMAT_P420 = 1,
+  KVZ_FORMAT_P422 = 2,
+  KVZ_FORMAT_P444 = 3,
+};
+
+/**
+* \brief Chroma subsampling format used for encoding.
+* \since 3.12.0
+*/
+enum kvz_chroma_format {
+  KVZ_CSP_400 = 0,
+  KVZ_CSP_420 = 1,
+  KVZ_CSP_422 = 2,
+  KVZ_CSP_444 = 3,
+};
+
+// Map from input format to chroma format.
+#define KVZ_FORMAT2CSP(format) ((enum kvz_chroma_format)"\0\1\2\3"[format])
+
 /**
  * \brief GoP picture configuration.
  */
@@ -282,6 +310,8 @@ typedef struct kvz_config
 
   int32_t rdoq_skip; /*!< \brief Mode of rdoq skip */
 
+  enum kvz_input_format input_format; /*!< \brief Use Temporal Motion Vector Predictors. */
+  int32_t input_bitdepth; /*!< \brief Use Temporal Motion Vector Predictors. */
 } kvz_config;
 
 /**
diff --git a/src/yuv_io.c b/src/yuv_io.c
index 8f562459..89222782 100644
--- a/src/yuv_io.c
+++ b/src/yuv_io.c
@@ -68,21 +68,37 @@ static int read_and_fill_frame_data(FILE *file,
 }
 
 
-/**
-* \brief Convert 8 bit (single byte per pixel) to 10bit (two bytes per pixel) array
-*
-* \param input   input/output buffer
-* \return        1
-*/
-int frame_8bit_to_10bit(kvz_pixel* input, int width, int height) {
-  uint8_t* temp_buffer = (uint8_t*)input;
-  const uint32_t pixels = width*height;
-  for (int i = pixels - 1; i >= 0; i--) {
-    input[i] = temp_buffer[i] << 2;
+static void swap_16b_buffer_bytes(kvz_pixel* input, int size)
+{
+  for (int i = 0; i < size; ++i) {
+    input[i] = ((input[i] & 0xff) << 8) + ((input[i] & 0xff00) >> 8);
   }
-  return 1;
 }
 
+
+static void shift_to_bitdepth(kvz_pixel* input, int size, int from_bitdepth, int to_bitdepth)
+{
+  int shift = from_bitdepth - to_bitdepth;
+  for (int i = 0; i < size; ++i) {
+    // Shifting by a negative number is undefined.
+    if (shift > 0) {
+      input[i] <<= shift;
+    } else {
+      input[i] >>= shift;
+    }
+  }
+}
+
+
+bool machine_is_big_endian()
+{
+  uint16_t number = 1;
+  char first_byte = *(char*)&number;
+
+  return (first_byte != 0);
+}
+
+
 /**
  * \brief Read a single frame from a file.
  *
@@ -98,15 +114,20 @@ int frame_8bit_to_10bit(kvz_pixel* input, int width, int height) {
  */
 int yuv_io_read(FILE* file,
                 unsigned input_width, unsigned input_height,
+                unsigned input_bitdepth, unsigned to_bitdepth,
                 kvz_picture *img_out)
 {
   assert(input_width % 2 == 0);
   assert(input_height % 2 == 0);
 
+  unsigned bytes_per_sample = input_bitdepth > 8 ? 2 : 1;
+
   const unsigned y_size = input_width * input_height;
+  const unsigned y_bytes = y_size * bytes_per_sample;
   const unsigned uv_input_width  = input_width  / 2;
   const unsigned uv_input_height = input_height / 2;
   const unsigned uv_size = uv_input_width * uv_input_height;
+  const unsigned uv_bytes = uv_size * bytes_per_sample;
 
   const unsigned uv_array_width  = img_out->width  / 2;
   const unsigned uv_array_height = img_out->height  / 2;
@@ -114,9 +135,9 @@ int yuv_io_read(FILE* file,
   if (input_width == img_out->width) {
     // No need to extend pixels.
     const size_t pixel_size = sizeof(unsigned char);
-    if (fread(img_out->y, pixel_size, y_size,  file) != y_size)  return 0;
-    if (fread(img_out->u, pixel_size, uv_size, file) != uv_size) return 0;
-    if (fread(img_out->v, pixel_size, uv_size, file) != uv_size) return 0;
+    if (fread(img_out->y, pixel_size, y_bytes,  file) != y_bytes)  return 0;
+    if (fread(img_out->u, pixel_size, uv_bytes, file) != uv_bytes) return 0;
+    if (fread(img_out->v, pixel_size, uv_bytes, file) != uv_bytes) return 0;
   } else {
     // Need to copy pixels to fill the image in horizontal direction.
     if (!read_and_fill_frame_data(file, input_width,    input_height,    img_out->width, img_out->y)) return 0;
@@ -126,17 +147,25 @@ int yuv_io_read(FILE* file,
 
   if (input_height != img_out->height) {
     // Need to copy pixels to fill the image in vertical direction.
-    fill_after_frame(input_height,    img_out->width, img_out->height,    img_out->y);
+    fill_after_frame(input_height,    img_out->width, img_out->height, img_out->y);
     fill_after_frame(uv_input_height, uv_array_width, uv_array_height, img_out->u);
     fill_after_frame(uv_input_height, uv_array_width, uv_array_height, img_out->v);
   }
+  
+  if (bytes_per_sample == 2) {
+    if (machine_is_big_endian()) {
+      swap_16b_buffer_bytes(img_out->y, y_size);
+      swap_16b_buffer_bytes(img_out->u, uv_size);
+      swap_16b_buffer_bytes(img_out->v, uv_size);
+    }
 
-#if KVZ_BIT_DEPTH == 10
-  frame_8bit_to_10bit(img_out->y, img_out->width, img_out->height);
-	frame_8bit_to_10bit(img_out->u, img_out->width >> 1, img_out->height >> 1);
-	frame_8bit_to_10bit(img_out->v, img_out->width >> 1, img_out->height >> 1);
-#endif
-
+    if (input_bitdepth != to_bitdepth) {
+      shift_to_bitdepth(img_out->y, y_size, input_bitdepth, to_bitdepth);
+      shift_to_bitdepth(img_out->u, uv_size, input_bitdepth, to_bitdepth);
+      shift_to_bitdepth(img_out->v, uv_size, input_bitdepth, to_bitdepth);
+    }
+  }
+  
   return 1;
 }
 
diff --git a/src/yuv_io.h b/src/yuv_io.h
index 558a8846..fb31245f 100644
--- a/src/yuv_io.h
+++ b/src/yuv_io.h
@@ -33,6 +33,7 @@
 
 int yuv_io_read(FILE* file,
                 unsigned input_width, unsigned input_height,
+                unsigned from_bitdepth, unsigned to_bitdepth,
                 kvz_picture *img_out);
 
 int yuv_io_seek(FILE* file, unsigned frames,