Small fixes all around to enable 10bit encoding

Conflicts: src/encmain.c src/encoder.c src/encoderstate.c src/global.h
2024-11-27 11:24:05 +00:00 · 2015-04-29 15:10:37 +03:00 · 2015-04-29 15:10:37 +03:00 · 57ab46f110
parent 7cd4f7a5c9
commit 57ab46f110
11 changed files with 53 additions and 32 deletions
--- a/src/encoder.c
+++ b/src/encoder.c
@ -141,7 +141,7 @@ encoder_control_t* encoder_control_init(const kvz_config *const cfg) {

  // Config pointer to config struct
  encoder->cfg = cfg;
-  encoder->bitdepth = 8;
+  encoder->bitdepth = BIT_DEPTH;

  // deblocking filter
  encoder->deblock_enable    = 1;
--- a/src/encoder_state-bitstream.c
+++ b/src/encoder_state-bitstream.c
@ -53,8 +53,8 @@ static void encoder_state_write_bitstream_PTL(encoder_state_t * const state)
  // Profile Tier
  WRITE_U(stream, 0, 2, "general_profile_space");
  WRITE_U(stream, 0, 1, "general_tier_flag");
-  // Main Profile == 1
-  WRITE_U(stream, 1, 5, "general_profile_idc");
+  // Main Profile == 1,  Main 10 profile == 2
+  WRITE_U(stream, (state->encoder_control->bitdepth == 8)?1:2, 5, "general_profile_idc");
  /* Compatibility flags should be set at general_profile_idc
   *  (so with general_profile_idc = 1, compatibility_flag[1] should be 1)
   * According to specification, when compatibility_flag[1] is set,
@ -327,8 +327,8 @@ static void encoder_state_write_bitstream_seq_parameter_set(encoder_state_t * co
  //IF window flag
  //END IF

-  WRITE_UE(stream, encoder->in.bitdepth-8, "bit_depth_luma_minus8");
-  WRITE_UE(stream, encoder->in.bitdepth-8, "bit_depth_chroma_minus8");
+  WRITE_UE(stream, encoder->bitdepth-8, "bit_depth_luma_minus8");
+  WRITE_UE(stream, encoder->bitdepth-8, "bit_depth_chroma_minus8");
  WRITE_UE(stream, 1, "log2_max_pic_order_cnt_lsb_minus4");
  WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag");

--- a/src/encoderstate.c
+++ b/src/encoderstate.c
@ -906,6 +906,17 @@ int encoder_feed_frame(encoder_state_t *const state, kvz_picture *const img_in)
    }
  }

+int frame_8bit_to_10bit(pixel_t* input, int width, int height) {
+  uint8_t* temp_buffer = malloc(width*height);
+  const uint32_t pixels = width*height;
+  memcpy(temp_buffer, (void *)input, pixels);
+  for(int i = 0; i < pixels; i++) {
+    input[i] = temp_buffer[i]<<2;
+  }
+  free(temp_buffer);
+  return 1;
+}
+
  if (gop_pictures_available < cfg->gop_len) {
    if (img_in != NULL || gop_pictures_available == 0) {
      // Either start of the sequence with no full GOP available yet, or the
@ -940,6 +951,12 @@ int encoder_feed_frame(encoder_state_t *const state, kvz_picture *const img_in)
    gop_pictures_available = MAX(0, gop_pictures_available - cfg->gop_len);
    gop_buf_read_idx = (gop_buf_read_idx + cfg->gop_len) % gop_buf_size;
  }
+#if BIT_DEPTH == 10
+  frame_8bit_to_10bit(state->tile->frame->source->y, width, height);
+  
+  frame_8bit_to_10bit(state->tile->frame->source->u, width>>1, height>>1);
+  frame_8bit_to_10bit(state->tile->frame->source->v, width>>1, height>>1);
+#endif
  return 1;
 }

--- a/src/encoderstate.h
+++ b/src/encoderstate.h
@ -220,7 +220,7 @@ void encode_last_significant_xy(encoder_state_t *state,
                                uint8_t lastpos_x, uint8_t lastpos_y,
                                uint8_t width, uint8_t height,
                                uint8_t type, uint8_t scan);
-void encode_coeff_nxn(encoder_state_t *state, int16_t *coeff, uint8_t width,
+void encode_coeff_nxn(encoder_state_t *state, coeff_t *coeff, uint8_t width,
                      uint8_t type, int8_t scan_mode, int8_t tr_skip);
 void encode_transform_coeff(encoder_state_t *state, int32_t x_cu, int32_t y_cu,
                            int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v);
--- a/src/filter.c
+++ b/src/filter.c
@ -129,14 +129,14 @@ INLINE void filter_deblock_luma(const encoder_control_t * const encoder, kvz_pix
  }

  if(part_P_nofilter) {
-    src[-offset]   = (uint8_t)m3;
-    src[-offset*2] = (uint8_t)m2;
-    src[-offset*3] = (uint8_t)m1;
+    src[-offset]   = (pixel_t)m3;
+    src[-offset*2] = (pixel_t)m2;
+    src[-offset*3] = (pixel_t)m1;
  }
  if(part_Q_nofilter) {
-    src[0]        = (uint8_t)m4;
-    src[offset]   = (uint8_t)m5;
-    src[offset*2] = (uint8_t)m6;
+    src[0]        = (pixel_t)m4;
+    src[offset]   = (pixel_t)m5;
+    src[offset*2] = (pixel_t)m6;
  }
 }

--- a/src/rdo.c
+++ b/src/rdo.c
@ -208,7 +208,7 @@ uint32_t rdo_cost_intra(encoder_state_t * const state, kvz_pixel *pred, kvz_pixe
    // SSD between original and reconstructed
    for (i = 0; i < width*width; i++) {
      //int diff = temp_block[i]-block[i];
-      int diff = orig_block[i] - CLIP(0, 255, pred[i] + temp_block[i]);
+      int diff = orig_block[i] - CLIP(0, PIXEL_MAX, pred[i] + temp_block[i]);

      ssd += diff*diff;
    }
--- a/src/search.c
+++ b/src/search.c
@ -760,16 +760,16 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i
      // number of allocated pixels left.
      int x_max = MIN(LCU_REF_PX_WIDTH, pic_width - x);
      int x_min_in_lcu = (x>0) ? 0 : 1;
-      memcpy(&lcu->top_ref.y[x_min_in_lcu], &hor_buf->y[OFFSET_HOR_BUF(x, y, frame, x_min_in_lcu-1)], x_max + (1-x_min_in_lcu));
-      memcpy(&lcu->top_ref.u[x_min_in_lcu], &hor_buf->u[OFFSET_HOR_BUF_C(x, y, frame, x_min_in_lcu-1)], x_max / 2 + (1-x_min_in_lcu));
-      memcpy(&lcu->top_ref.v[x_min_in_lcu], &hor_buf->v[OFFSET_HOR_BUF_C(x, y, frame, x_min_in_lcu-1)], x_max / 2 + (1-x_min_in_lcu));
+      memcpy(&lcu->top_ref.y[x_min_in_lcu], &hor_buf->y[OFFSET_HOR_BUF(x, y, frame, x_min_in_lcu-1)], (x_max + (1-x_min_in_lcu))*sizeof(pixel_t));
+      memcpy(&lcu->top_ref.u[x_min_in_lcu], &hor_buf->u[OFFSET_HOR_BUF_C(x, y, frame, x_min_in_lcu-1)], (x_max / 2 + (1-x_min_in_lcu))*sizeof(pixel_t));
+      memcpy(&lcu->top_ref.v[x_min_in_lcu], &hor_buf->v[OFFSET_HOR_BUF_C(x, y, frame, x_min_in_lcu-1)], (x_max / 2 + (1-x_min_in_lcu))*sizeof(pixel_t));
    }
    // Copy left reference pixels.
    if (x > 0) {
      int y_min_in_lcu = (y>0) ? 0 : 1;
-      memcpy(&lcu->left_ref.y[y_min_in_lcu], &ver_buf->y[OFFSET_VER_BUF(x, y, frame, y_min_in_lcu-1)], LCU_WIDTH + (1-y_min_in_lcu));
-      memcpy(&lcu->left_ref.u[y_min_in_lcu], &ver_buf->u[OFFSET_VER_BUF_C(x, y, frame, y_min_in_lcu-1)], LCU_WIDTH / 2 + (1-y_min_in_lcu));
-      memcpy(&lcu->left_ref.v[y_min_in_lcu], &ver_buf->v[OFFSET_VER_BUF_C(x, y, frame, y_min_in_lcu-1)], LCU_WIDTH / 2 + (1-y_min_in_lcu));
+      memcpy(&lcu->left_ref.y[y_min_in_lcu], &ver_buf->y[OFFSET_VER_BUF(x, y, frame, y_min_in_lcu-1)], (LCU_WIDTH + (1-y_min_in_lcu))*sizeof(pixel_t));
+      memcpy(&lcu->left_ref.u[y_min_in_lcu], &ver_buf->u[OFFSET_VER_BUF_C(x, y, frame, y_min_in_lcu-1)], (LCU_WIDTH / 2 + (1-y_min_in_lcu))*sizeof(pixel_t));
+      memcpy(&lcu->left_ref.v[y_min_in_lcu], &ver_buf->v[OFFSET_VER_BUF_C(x, y, frame, y_min_in_lcu-1)], (LCU_WIDTH / 2 + (1-y_min_in_lcu))*sizeof(pixel_t));
    }
  }

--- a/src/strategies/generic/picture-generic.c
+++ b/src/strategies/generic/picture-generic.c
@ -35,7 +35,7 @@ kvz_pixel fast_clip_16bit_to_pixel(int16_t value)

  if (value & ~PIXEL_MAX) {
    int16_t temp = (-value) >> 15;
-#if BITDEPTH == 10
+#if BIT_DEPTH == 10
    temp &= PIXEL_MAX;
 #endif
    return temp;
@ -57,7 +57,7 @@ kvz_pixel fast_clip_32bit_to_pixel(int32_t value)

  if (value & ~PIXEL_MAX) {
    int32_t temp = (-value) >> 31;
-#if BITDEPTH == 10
+#if BIT_DEPTH == 10
    temp &= PIXEL_MAX;
 #endif
    return temp;
--- a/src/transform.c
+++ b/src/transform.c
@ -133,14 +133,14 @@ void itransform2d(const encoder_control_t * const encoder, int16_t *block, int16
 * \brief quantize transformed coefficents
 *
 */
-void quant(const encoder_state_t * const state, int16_t *coef, int16_t *q_coef, int32_t width,
+void quant(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
           int32_t height, int8_t type, int8_t scan_idx, int8_t block_type )
 {
  const encoder_control_t * const encoder = state->encoder_control;
  const uint32_t log2_block_size = g_convert_to_bit[ width ] + 2;
  const uint32_t * const scan = g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ];

-  int32_t qp_scaled = get_scaled_qp(type, state->global->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, state->global->QP, (encoder->bitdepth-8)*6);

  const uint32_t log2_tr_size = g_convert_to_bit[ width ] + 2;
  const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
@ -163,7 +163,7 @@ void quant(const encoder_state_t * const state, int16_t *coef, int16_t *q_coef,
    ac_sum += level;

    level *= sign;
-    q_coef[n] = (int16_t)(CLIP( -32768, 32767, level));
+    q_coef[n] = (coeff_t)(CLIP( -32768, 32767, level));
  }

  if (!(encoder->sign_hiding && ac_sum >= 2)) return;
@ -262,14 +262,14 @@ void quant(const encoder_state_t * const state, int16_t *coef, int16_t *q_coef,
 * \brief inverse quantize transformed and quantized coefficents
 *
 */
-void dequant(const encoder_state_t * const state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
+void dequant(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
 {
  const encoder_control_t * const encoder = state->encoder_control;
  int32_t shift,add,coeff_q;
  int32_t n;
  int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2);

-  int32_t qp_scaled = get_scaled_qp(type, state->global->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, state->global->QP, (encoder->bitdepth-8)*6);

  shift = 20 - QUANT_SHIFT - transform_shift;

@ -286,13 +286,13 @@ void dequant(const encoder_state_t * const state, int16_t *q_coef, int16_t *coef

      for (n = 0; n < width * height; n++) {
        coeff_q = ((q_coef[n] * dequant_coef[n]) + add ) >> (shift -  qp_scaled/6);
-        coef[n] = (int16_t)CLIP(-32768,32767,coeff_q);
+        coef[n] = (coeff_t)CLIP(-32768,32767,coeff_q);
      }
    } else {
      for (n = 0; n < width * height; n++) {
        // Clip to avoid possible overflow in following shift left operation
        coeff_q   = CLIP(-32768, 32767, q_coef[n] * dequant_coef[n]);
-        coef[n] = (int16_t)CLIP(-32768, 32767, coeff_q << (qp_scaled/6 - shift));
+        coef[n] = (coeff_t)CLIP(-32768, 32767, coeff_q << (qp_scaled/6 - shift));
      }
    }
  } else {
@ -301,7 +301,7 @@ void dequant(const encoder_state_t * const state, int16_t *q_coef, int16_t *coef

    for (n = 0; n < width*height; n++) {
      coeff_q   = (q_coef[n] * scale + add) >> shift;
-      coef[n] = (int16_t)CLIP(-32768, 32767, coeff_q);
+      coef[n] = (coeff_t)CLIP(-32768, 32767, coeff_q);
    }
  }
 }
@ -398,7 +398,7 @@ int quantize_residual(encoder_state_t *const state,
    for (y = 0; y < width; ++y) {
      for (x = 0; x < width; ++x) {
        int16_t val = residual[x + y * width] + pred_in[x + y * in_stride];
-        rec_out[x + y * out_stride] = (uint8_t)CLIP(0, 255, val);
+        rec_out[x + y * out_stride] = (pixel_t)CLIP(0, PIXEL_MAX, val);
      }
    }
  } else if (rec_out != pred_in) {
--- a/src/transform.h
+++ b/src/transform.h
@ -35,9 +35,9 @@ extern const int16_t g_inv_quant_scales[6];



-void quant(const encoder_state_t *state, int16_t *coef, int16_t *q_coef, int32_t width,
+void quant(const encoder_state_t *state, coeff_t *coef, coeff_t *q_coef, int32_t width,
           int32_t height, int8_t type, int8_t scan_idx, int8_t block_type);
-void dequant(const encoder_state_t *state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type);
+void dequant(const encoder_state_t *state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type);

 void transformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
 void itransformskip(const encoder_control_t *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
--- a/src/videoframe.c
+++ b/src/videoframe.c
@ -110,7 +110,11 @@ cu_info_t* videoframe_get_cu(videoframe_t * const frame, const unsigned int x_in
  return &frame->cu_array->data[x_in_scu + y_in_scu * (frame->width_in_lcu << MAX_DEPTH)];
 }

+#if BIT_DEPTH == 8
 #define PSNRMAX (255.0 * 255.0)
+#else
+  #define PSNRMAX ((double)PIXEL_MAX * (double)PIXEL_MAX)
+#endif

 /**
 * \brief Calculates image PSNR value