Add --input-format and --input-bitdepth

Adds reading of 10 bit input for 10-bit encoding.
2024-11-27 11:24:05 +00:00 · 2016-08-16 19:03:21 +03:00 · 2016-08-16 19:03:21 +03:00 · 48ccc26839
parent cc08073615
commit 48ccc26839
10 changed files with 143 additions and 35 deletions
--- a/configure.ac
+++ b/configure.ac
@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
 #
 # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
 ver_major=3
-ver_minor=11
+ver_minor=12
 ver_release=0

 # Prevents configure from adding a lot of defines to the CFLAGS
--- a/src/cfg.c
+++ b/src/cfg.c
@ -115,6 +115,9 @@ int kvz_config_init(kvz_config *cfg)

  cfg->rdoq_skip = 0;

+  cfg->input_format = KVZ_FORMAT_P420;
+  cfg->input_bitdepth = 8;
+
  return 1;
 }

@ -949,6 +952,32 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
  else if OPT("rdoq-skip"){
    cfg->rdoq_skip = atobool(value);
  }
+  else if OPT("input-format") {
+    static enum kvz_input_format const formats[] = { KVZ_FORMAT_P400, KVZ_FORMAT_P420 };
+    static const char * const format_names[] = { "P400", "P420", NULL };
+
+    int8_t format = 0;
+    if (!parse_enum(value, format_names, &format)) {
+      fprintf(stderr, "input-format not recognized.\n");
+      return 0;
+    }
+
+    cfg->input_format = formats[format];
+  }
+  else if OPT("input-bitdepth") {
+    cfg->input_bitdepth = atoi(value);
+    if (cfg->input_bitdepth < 8 || cfg->input_bitdepth > 16) {
+      fprintf(stderr, "input-bitdepth not between 8 and 16.\n");
+      return 0;
+    }
+    if (cfg->input_bitdepth > 8 && KVZ_BIT_DEPTH == 8) {
+      // Because the image is read straight into the reference buffers,
+      // reading >8 bit samples doesn't work when sizeof(kvz_pixel)==1.
+      fprintf(stderr, "input-bitdepth can't be set to larger than 8 because"
+                      " Kvazaar is compiled with KVZ_BIT_DEPTH=8.\n");
+      return 0;
+    }
+  }
  else
    return 0;
 #undef OPT
--- a/src/cli.c
+++ b/src/cli.c
@ -113,6 +113,8 @@ static const struct option long_options[] = {
  { "tmvp",                     no_argument, NULL, 0 },
  { "no-tmvp",                  no_argument, NULL, 0 },
  { "rdoq-skip",                no_argument, NULL, 0 },
+  { "input-bitdepth",     required_argument, NULL, 0 },
+  { "input-format",       required_argument, NULL, 0 },
  {0, 0, 0, 0}
 };

@ -396,6 +398,8 @@ void print_help(void)
    "          --lossless             : Use lossless coding\n"
    "          --no-tmvp              : Disable Temporal Motion Vector Prediction\n"
    "          --rdoq-skip            : Skips unnecessary rdoq\n"
+    "          --input-format         : P420 or P400\n"
+    "          --input-bitdepth       : 8-16\n"
    "\n"
    "  Video Usability Information:\n"
    "          --sar <width:height>   : Specify Sample Aspect Ratio\n"
--- a/src/encmain.c
+++ b/src/encmain.c
@ -187,20 +187,36 @@ static void* input_read_thread(void* in_args)
      goto done;
    }

-    if (!yuv_io_read(args->input, args->opts->config->width, args->opts->config->height, frame_in)) {
+    bool read_success = yuv_io_read(args->input, 
+                                    args->opts->config->width,
+                                    args->opts->config->height,
+                                    args->encoder->cfg->input_bitdepth,
+                                    args->encoder->bitdepth,
+                                    frame_in);
+    if (!read_success) {
      // reading failed
      if (feof(args->input)) {
        // When looping input, re-open the file and re-read data.
        if (args->opts->loop_input && args->input != stdin) {
          fclose(args->input);
          args->input = fopen(args->opts->input, "rb");
-          if (args->input == NULL ||
-              !yuv_io_read(args->input, args->opts->config->width, args->opts->config->height, frame_in))
+          if (args->input == NULL)
          {
            fprintf(stderr, "Could not re-open input file, shutting down!\n");
            retval = RETVAL_FAILURE;
            goto done;
          }
+          bool read_success = yuv_io_read(args->input,
+                                          args->opts->config->width,
+                                          args->opts->config->height,
+                                          args->encoder->cfg->input_bitdepth,
+                                          args->encoder->bitdepth,
+                                          frame_in);
+          if (!read_success) {
+            fprintf(stderr, "Could not re-open input file, shutting down!\n");
+            retval = RETVAL_FAILURE;
+            goto done;
+          }
        } else {
          retval = RETVAL_EOF;
          goto done;
--- a/src/encoder.c
+++ b/src/encoder.c
@ -128,6 +128,8 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) {

  encoder->bitdepth = KVZ_BIT_DEPTH;

+  encoder->chroma_format = KVZ_FORMAT2CSP(cfg->input_format);
+
  // deblocking filter
  encoder->deblock_enable    = 1;
  encoder->beta_offset_div2  = 0;
@ -394,7 +396,7 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) {
  encoder->pu_depth_intra.max = cfg->pu_depth_intra.max;

  // input init (TODO: read from commandline / config)  
-  encoder->in.video_format    = FORMAT_420;
+  encoder->chroma_format      = KVZ_CSP_420;

  // deblocking filter
  encoder->deblock_enable     = (int8_t)  (encoder->cfg->deblock_enable &&
@ -496,7 +498,6 @@ void kvz_encoder_control_input_init(encoder_control_t * const encoder,
  encoder->in.height = height;
  encoder->in.real_width = width;
  encoder->in.real_height = height;
-  encoder->in.bitdepth = encoder->bitdepth;

  // If input dimensions are not divisible by the smallest block size, add
  // pixels to the dimensions, so that they are. These extra pixels will be
--- a/src/encoder.h
+++ b/src/encoder.h
@ -32,8 +32,6 @@
 #include "threadqueue.h"


-enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 };
-
 /* Encoder control options, the main struct */
 typedef struct encoder_control_t
 {
@ -48,8 +46,6 @@ typedef struct encoder_control_t
    int32_t height_in_lcu;
    int32_t real_width;  /*!< \brief real input picture width */
    int32_t real_height; /*!< \brief real input picture width */
-    int8_t video_format;
-    int8_t bitdepth;  /*!< \brief input bit depth (8,10) */
    int64_t pixels_per_pic;
    int8_t source_scan_type;
  } in;
@ -62,6 +58,8 @@ typedef struct encoder_control_t
  } me;
  
  int8_t bitdepth;
+  enum kvz_chroma_format chroma_format;
+
  int8_t tr_depth_intra;

  int8_t fme_level;
--- a/src/encoder_state-bitstream.c
+++ b/src/encoder_state-bitstream.c
@ -243,7 +243,7 @@ static void encoder_state_write_bitstream_VUI(bitstream_t *stream,
      encoder->vui.colorprim != 2 || encoder->vui.transfer != 2 ||
      encoder->vui.colormatrix != 2) {
    WRITE_U(stream, 1, 1, "video_signal_type_present_flag");
-    WRITE_U(stream, encoder->vui.videoformat, 3, "video_format");
+    WRITE_U(stream, encoder->vui.videoformat, 3, "chroma_format");
    WRITE_U(stream, encoder->vui.fullrange, 1, "video_full_range_flag");

    if (encoder->vui.colorprim != 2 || encoder->vui.transfer != 2 ||
@ -310,10 +310,10 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
  encoder_state_write_bitstream_PTL(stream, state);

  WRITE_UE(stream, 0, "sps_seq_parameter_set_id");
-  WRITE_UE(stream, encoder->in.video_format,
+  WRITE_UE(stream, encoder->chroma_format,
           "chroma_format_idc");

-  if (encoder->in.video_format == 3) {
+  if (encoder->chroma_format == 3) {
    WRITE_U(stream, 0, 1, "separate_colour_plane_flag");
  }

--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@ -163,6 +163,34 @@ enum kvz_me_early_termination
  KVZ_ME_EARLY_TERMINATION_SENSITIVE = 2
 };

+
+/**
+ * \brief Format the pixels are read in.
+ * This is separate from chroma subsampling, because we might want to read
+ * interleaved formats in the future.
+ * \since 3.12.0
+ */
+enum kvz_input_format {
+  KVZ_FORMAT_P400 = 0,
+  KVZ_FORMAT_P420 = 1,
+  KVZ_FORMAT_P422 = 2,
+  KVZ_FORMAT_P444 = 3,
+};
+
+/**
+* \brief Chroma subsampling format used for encoding.
+* \since 3.12.0
+*/
+enum kvz_chroma_format {
+  KVZ_CSP_400 = 0,
+  KVZ_CSP_420 = 1,
+  KVZ_CSP_422 = 2,
+  KVZ_CSP_444 = 3,
+};
+
+// Map from input format to chroma format.
+#define KVZ_FORMAT2CSP(format) ((enum kvz_chroma_format)"\0\1\2\3"[format])
+
 /**
 * \brief GoP picture configuration.
 */
@ -282,6 +310,8 @@ typedef struct kvz_config

  int32_t rdoq_skip; /*!< \brief Mode of rdoq skip */

+  enum kvz_input_format input_format; /*!< \brief Use Temporal Motion Vector Predictors. */
+  int32_t input_bitdepth; /*!< \brief Use Temporal Motion Vector Predictors. */
 } kvz_config;

 /**
--- a/src/yuv_io.c
+++ b/src/yuv_io.c
@ -68,21 +68,37 @@ static int read_and_fill_frame_data(FILE *file,
 }


-/**
-* \brief Convert 8 bit (single byte per pixel) to 10bit (two bytes per pixel) array
-*
-* \param input   input/output buffer
-* \return        1
-*/
-int frame_8bit_to_10bit(kvz_pixel* input, int width, int height) {
-  uint8_t* temp_buffer = (uint8_t*)input;
-  const uint32_t pixels = width*height;
-  for (int i = pixels - 1; i >= 0; i--) {
-    input[i] = temp_buffer[i] << 2;
+static void swap_16b_buffer_bytes(kvz_pixel* input, int size)
+{
+  for (int i = 0; i < size; ++i) {
+    input[i] = ((input[i] & 0xff) << 8) + ((input[i] & 0xff00) >> 8);
  }
-  return 1;
 }

+
+static void shift_to_bitdepth(kvz_pixel* input, int size, int from_bitdepth, int to_bitdepth)
+{
+  int shift = from_bitdepth - to_bitdepth;
+  for (int i = 0; i < size; ++i) {
+    // Shifting by a negative number is undefined.
+    if (shift > 0) {
+      input[i] <<= shift;
+    } else {
+      input[i] >>= shift;
+    }
+  }
+}
+
+
+bool machine_is_big_endian()
+{
+  uint16_t number = 1;
+  char first_byte = *(char*)&number;
+
+  return (first_byte != 0);
+}
+
+
 /**
 * \brief Read a single frame from a file.
 *
@ -98,15 +114,20 @@ int frame_8bit_to_10bit(kvz_pixel* input, int width, int height) {
 */
 int yuv_io_read(FILE* file,
                unsigned input_width, unsigned input_height,
+                unsigned input_bitdepth, unsigned to_bitdepth,
                kvz_picture *img_out)
 {
  assert(input_width % 2 == 0);
  assert(input_height % 2 == 0);

+  unsigned bytes_per_sample = input_bitdepth > 8 ? 2 : 1;
+
  const unsigned y_size = input_width * input_height;
+  const unsigned y_bytes = y_size * bytes_per_sample;
  const unsigned uv_input_width  = input_width  / 2;
  const unsigned uv_input_height = input_height / 2;
  const unsigned uv_size = uv_input_width * uv_input_height;
+  const unsigned uv_bytes = uv_size * bytes_per_sample;

  const unsigned uv_array_width  = img_out->width  / 2;
  const unsigned uv_array_height = img_out->height  / 2;
@ -114,9 +135,9 @@ int yuv_io_read(FILE* file,
  if (input_width == img_out->width) {
    // No need to extend pixels.
    const size_t pixel_size = sizeof(unsigned char);
-    if (fread(img_out->y, pixel_size, y_size,  file) != y_size)  return 0;
-    if (fread(img_out->u, pixel_size, uv_size, file) != uv_size) return 0;
-    if (fread(img_out->v, pixel_size, uv_size, file) != uv_size) return 0;
+    if (fread(img_out->y, pixel_size, y_bytes,  file) != y_bytes)  return 0;
+    if (fread(img_out->u, pixel_size, uv_bytes, file) != uv_bytes) return 0;
+    if (fread(img_out->v, pixel_size, uv_bytes, file) != uv_bytes) return 0;
  } else {
    // Need to copy pixels to fill the image in horizontal direction.
    if (!read_and_fill_frame_data(file, input_width,    input_height,    img_out->width, img_out->y)) return 0;
@ -126,17 +147,25 @@ int yuv_io_read(FILE* file,

  if (input_height != img_out->height) {
    // Need to copy pixels to fill the image in vertical direction.
-    fill_after_frame(input_height,    img_out->width, img_out->height,    img_out->y);
+    fill_after_frame(input_height,    img_out->width, img_out->height, img_out->y);
    fill_after_frame(uv_input_height, uv_array_width, uv_array_height, img_out->u);
    fill_after_frame(uv_input_height, uv_array_width, uv_array_height, img_out->v);
  }
+  
+  if (bytes_per_sample == 2) {
+    if (machine_is_big_endian()) {
+      swap_16b_buffer_bytes(img_out->y, y_size);
+      swap_16b_buffer_bytes(img_out->u, uv_size);
+      swap_16b_buffer_bytes(img_out->v, uv_size);
+    }

-#if KVZ_BIT_DEPTH == 10
-  frame_8bit_to_10bit(img_out->y, img_out->width, img_out->height);
-	frame_8bit_to_10bit(img_out->u, img_out->width >> 1, img_out->height >> 1);
-	frame_8bit_to_10bit(img_out->v, img_out->width >> 1, img_out->height >> 1);
-#endif
-
+    if (input_bitdepth != to_bitdepth) {
+      shift_to_bitdepth(img_out->y, y_size, input_bitdepth, to_bitdepth);
+      shift_to_bitdepth(img_out->u, uv_size, input_bitdepth, to_bitdepth);
+      shift_to_bitdepth(img_out->v, uv_size, input_bitdepth, to_bitdepth);
+    }
+  }
+  
  return 1;
 }

--- a/src/yuv_io.h
+++ b/src/yuv_io.h
@ -33,6 +33,7 @@

 int yuv_io_read(FILE* file,
                unsigned input_width, unsigned input_height,
+                unsigned from_bitdepth, unsigned to_bitdepth,
                kvz_picture *img_out);

 int yuv_io_seek(FILE* file, unsigned frames,