From b52a930bed2f4aea1009542b2f6dcdf08961e0cd Mon Sep 17 00:00:00 2001
From: Ari Lemmetti <ari.lemmetti@gmail.com>
Date: Sat, 4 Apr 2020 22:14:10 +0300
Subject: [PATCH] About working with generics

---
 src/image.c                              |  21 --
 src/image.h                              |  16 +-
 src/inter.c                              | 256 ++++++++++++-----------
 src/kvazaar.h                            |   2 +
 src/strategies/generic/ipol-generic.c    |  22 +-
 src/strategies/generic/picture-generic.c |  59 +++---
 src/strategies/strategies-ipol.h         |  22 ++
 src/strategies/strategies-picture.h      |  29 ++-
 8 files changed, 226 insertions(+), 201 deletions(-)

diff --git a/src/image.c b/src/image.c
index a48e5a4f..ddd58d47 100644
--- a/src/image.c
+++ b/src/image.c
@@ -218,27 +218,6 @@ void kvz_yuv_t_free(yuv_t *yuv)
   FREE_POINTER(yuv);
 }
 
-hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size)
-{
-  // Get buffers with separate mallocs in order to take advantage of
-  // automatic buffer overrun checks.
-  hi_prec_buf_t *yuv = (hi_prec_buf_t *)malloc(sizeof(*yuv));
-  yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y));
-  yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
-  yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
-  yuv->size = luma_size;
-
-  return yuv;
-}
-
-void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv)
-{
-  free(yuv->y);
-  free(yuv->u);
-  free(yuv->v);
-  free(yuv);
-}
-
 static INLINE uint32_t reg_sad_maybe_optimized(const kvz_pixel * const data1, const kvz_pixel * const data2,
                                   const int32_t width, const int32_t height, const uint32_t stride1,
                                   const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad)
diff --git a/src/image.h b/src/image.h
index ccac4553..a6a08b0c 100644
--- a/src/image.h
+++ b/src/image.h
@@ -51,13 +51,6 @@ typedef struct {
   enum kvz_chroma_format chroma_format;
 } lcu_yuv_t;
 
-typedef struct {
-  int size;
-  int16_t *y;
-  int16_t *u;
-  int16_t *v;
-} hi_prec_buf_t;
-
 typedef struct {
   int size;
   kvz_pixel *y;
@@ -65,6 +58,12 @@ typedef struct {
   kvz_pixel *v;
 } yuv_t;
 
+typedef struct {
+  int size;
+  kvz_pixel_ip *y;
+  kvz_pixel_ip *u;
+  kvz_pixel_ip *v;
+} yuv_ip_t;
 
 kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height);
 kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height);
@@ -82,9 +81,6 @@ kvz_picture *kvz_image_make_subimage(kvz_picture *const orig_image,
 yuv_t * kvz_yuv_t_alloc(int luma_size, int chroma_size);
 void kvz_yuv_t_free(yuv_t * yuv);
 
-hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size);
-void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv);
-
 
 //Algorithms
 unsigned kvz_image_calc_sad(const kvz_picture *pic,
diff --git a/src/inter.c b/src/inter.c
index 65a981dc..a311aecf 100644
--- a/src/inter.c
+++ b/src/inter.c
@@ -52,14 +52,15 @@ typedef struct {
 } merge_candidates_t;
 
 
-static void inter_recon_frac_luma(const encoder_state_t *const state,
-  const kvz_picture *const ref,
-  int32_t xpos,
-  int32_t ypos,
-  int32_t block_width,
-  int32_t block_height,
-  const int16_t mv_param[2],
-  lcu_t *lcu)
+static void inter_recon_frac_luma(const encoder_state_t * const state,
+                                  const kvz_picture * const ref,
+                                  int32_t xpos,
+                                  int32_t ypos,
+                                  int32_t block_width,
+                                  int32_t block_height,
+                                  const int16_t mv_param[2],
+                                  yuv_t *out,
+                                  unsigned out_stride)
 {
   int mv_frac_x = (mv_param[0] & 3);
   int mv_frac_y = (mv_param[1] & 3);
@@ -349,130 +350,130 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride,
  *
  * \param state          encoder state
  * \param ref            picture to copy the data from
- * \param xpos           PU x position
- * \param ypos           PU y position
+ * \param pu_x           PU x position
+ * \param pu_y           PU y position
  * \param width          PU width
  * \param height         PU height
  * \param mv_param       motion vector
- * \param lcu            destination lcu
- * \param hi_prec_out    destination of high precision output, or NULL if not needed
+ * \param lcu_px         destination lcu
+ * \param lcu_ip         destination of high precision output, or NULL if not needed
  * \param predict_luma   Enable or disable luma prediction for this call.
  * \param predict_chroma Enable or disable chroma prediction for this call.
 */
-static void inter_recon_unipred(const encoder_state_t * const state,
-                                const kvz_picture * const ref,
-                                int32_t xpos,
-                                int32_t ypos,
-                                int32_t width,
-                                int32_t height,
-                                const int16_t mv_param[2],
-                                lcu_t *lcu,
-                                hi_prec_buf_t *hi_prec_out,
-                                bool predict_luma,
-                                bool predict_chroma)
+static unsigned inter_recon_unipred(const encoder_state_t * const state,
+                                    const kvz_picture * const ref,
+                                    int32_t pu_x,
+                                    int32_t pu_y,
+                                    int32_t pu_w,
+                                    int32_t pu_h,
+                                    int32_t out_stride_luma,
+                                    const int16_t mv_param[2],
+                                    yuv_t *yuv_px,
+                                    yuv_ip_t *yuv_ip,
+                                    bool predict_luma,
+                                    bool predict_chroma)
 {
-  const vector2d_t pu_in_tile = { xpos, ypos };
-  const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH };
-
-  const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 };
-  const vector2d_t mv_in_frame = {
-    mv_in_pu.x + pu_in_tile.x + state->tile->offset_x,
-    mv_in_pu.y + pu_in_tile.y + state->tile->offset_y
+  const vector2d_t int_mv = { mv_param[0] >> 2, mv_param[1] >> 2 };
+  const vector2d_t int_mv_in_frame = {
+    int_mv.x + pu_x + state->tile->offset_x,
+    int_mv.y + pu_y + state->tile->offset_y
   };
 
-  const bool mv_is_outside_frame = mv_in_frame.x < 0 ||
-      mv_in_frame.y < 0 ||
-      mv_in_frame.x + width > ref->width ||
-      mv_in_frame.y + height > ref->height;
+  const bool int_mv_outside_frame = int_mv_in_frame.x < 0 ||
+    int_mv_in_frame.y < 0 ||
+    int_mv_in_frame.x + pu_w > ref->width ||
+    int_mv_in_frame.y + pu_h > ref->height;
 
   // With 420, odd coordinates need interpolation.
-  const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1);
-  const int8_t fractional_luma = ((mv_param[0] & 3) || (mv_param[1] & 3));
+  const bool fractional_chroma = (int_mv.x & 1) || (int_mv.y & 1);
+  const bool fractional_luma = (mv_param[0] & 3) || (mv_param[1] & 3);
 
   // Generate prediction for luma.
   if (predict_luma) {
     if (fractional_luma) {
       // With a fractional MV, do interpolation.
-      if (state->encoder_control->cfg.bipred && hi_prec_out) {
+      if (state->encoder_control->cfg.bipred && yuv_ip) {
         inter_recon_frac_luma_hi(state, ref,
-          pu_in_tile.x, pu_in_tile.y,
-          width, height,
-          mv_param, hi_prec_out);
+          pu_x, pu_y,
+          pu_w, pu_h,
+          mv_param, yuv_ip, out_stride_luma);
       }
       else {
         inter_recon_frac_luma(state, ref,
-          pu_in_tile.x, pu_in_tile.y,
-          width, height,
-          mv_param, lcu);
+          pu_x, pu_y,
+          pu_w, pu_h,
+          mv_param, yuv_px, out_stride_luma);
       }
     }
     else {
       // With an integer MV, copy pixels directly from the reference.
-      const int lcu_pu_index = pu_in_lcu.y * LCU_WIDTH + pu_in_lcu.x;
-      if (mv_is_outside_frame) {
+      if (int_mv_outside_frame) {
         inter_cp_with_ext_border(ref->y, ref->width,
           ref->width, ref->height,
-          &lcu->rec.y[lcu_pu_index], LCU_WIDTH,
-          width, height,
-          &mv_in_frame);
+          yuv_px->y, out_stride_luma,
+          pu_w, pu_h,
+          &int_mv_in_frame);
       }
       else {
-        const int frame_mv_index = mv_in_frame.y * ref->width + mv_in_frame.x;
+        const int frame_mv_index = int_mv_in_frame.y * ref->width + int_mv_in_frame.x;
         kvz_pixels_blit(&ref->y[frame_mv_index],
-          &lcu->rec.y[lcu_pu_index],
-          width, height,
-          ref->width, LCU_WIDTH);
+          yuv_px->y,
+          pu_w, pu_h,
+          ref->width, out_stride_luma);
       }
     }
   }
 
   if (!predict_chroma) {
-    return;
+    return fractional_luma;
   }
 
+  const unsigned out_stride_c = out_stride_luma / 2;
+
   // Generate prediction for chroma.
   if (fractional_luma || fractional_chroma) {
     // With a fractional MV, do interpolation.
-    if (state->encoder_control->cfg.bipred && hi_prec_out) {
+    if (state->encoder_control->cfg.bipred && yuv_ip) {
       inter_recon_frac_chroma_hi(state, ref,
-                                    pu_in_tile.x, pu_in_tile.y,
-                                    width, height,
-                                    mv_param, hi_prec_out);
+                                    pu_x, pu_y,
+                                    pu_w, pu_h, 
+                                    mv_param, yuv_ip, out_stride_c);
     } else {
       inter_recon_frac_chroma(state, ref,
-                              pu_in_tile.x, pu_in_tile.y,
-                              width, height,
-                              mv_param, lcu);
+                              pu_x, pu_y,
+                              pu_w, pu_h,
+                              mv_param, yuv_px, out_stride_c);
     }
   } else {
     // With an integer MV, copy pixels directly from the reference.
-    const int lcu_pu_index_c = pu_in_lcu.y / 2 * LCU_WIDTH_C + pu_in_lcu.x / 2;
-    const vector2d_t mv_in_frame_c = { mv_in_frame.x / 2, mv_in_frame.y / 2 };
+    const vector2d_t int_mv_in_frame_c = { int_mv_in_frame.x / 2, int_mv_in_frame.y / 2 };
 
-    if (mv_is_outside_frame) {
+    if (int_mv_outside_frame) {
       inter_cp_with_ext_border(ref->u, ref->width / 2,
                                ref->width / 2, ref->height / 2,
-                               &lcu->rec.u[lcu_pu_index_c], LCU_WIDTH_C,
-                               width / 2, height / 2,
-                               &mv_in_frame_c);
+                               yuv_px->u, out_stride_c,
+                               pu_w / 2, pu_h / 2,
+                               &int_mv_in_frame_c);
       inter_cp_with_ext_border(ref->v, ref->width / 2,
                                ref->width / 2, ref->height / 2,
-                               &lcu->rec.v[lcu_pu_index_c], LCU_WIDTH_C,
-                               width / 2, height / 2,
-                               &mv_in_frame_c);
+                               yuv_px->v, out_stride_c,
+                               pu_w / 2, pu_h / 2,
+                               &int_mv_in_frame_c);
     } else {
-      const int frame_mv_index = mv_in_frame_c.y * ref->width / 2 + mv_in_frame_c.x;
+      const int frame_mv_index = int_mv_in_frame_c.y * ref->width / 2 + int_mv_in_frame_c.x;
 
       kvz_pixels_blit(&ref->u[frame_mv_index],
-                      &lcu->rec.u[lcu_pu_index_c],
-                      width / 2, height / 2,
-                      ref->width / 2, LCU_WIDTH_C);
+                      yuv_px->u,
+                      pu_w / 2, pu_h / 2,
+                      ref->width / 2, out_stride_c);
       kvz_pixels_blit(&ref->v[frame_mv_index],
-                      &lcu->rec.v[lcu_pu_index_c],
-                      width / 2, height / 2,
-                      ref->width / 2, LCU_WIDTH_C);
+                      yuv_px->v,
+                      pu_w / 2, pu_h / 2,
+                      ref->width / 2, out_stride_c);
     }
   }
+
+  return fractional_luma | ((fractional_luma || fractional_chroma) << 1);
 }
 /**
  * \brief Reconstruct bi-pred inter PU
@@ -480,8 +481,8 @@ static void inter_recon_unipred(const encoder_state_t * const state,
  * \param state          encoder state
  * \param ref1           reference picture to copy the data from
  * \param ref2           other reference picture to copy the data from
- * \param xpos           PU x position
- * \param ypos           PU y position
+ * \param pu_x           PU x position
+ * \param pu_y           PU y position
  * \param width          PU width
  * \param height         PU height
  * \param mv_param       motion vectors
@@ -489,56 +490,60 @@ static void inter_recon_unipred(const encoder_state_t * const state,
  * \param predict_luma   Enable or disable luma prediction for this call.
  * \param predict_chroma Enable or disable chroma prediction for this call.
  */
-void kvz_inter_recon_bipred(const encoder_state_t * const state,
-                            const kvz_picture * ref1,
-                            const kvz_picture * ref2,
-                            int32_t xpos,
-                            int32_t ypos,
-                            int32_t width,
-                            int32_t height,
-                            int16_t mv_param[2][2],
-                            lcu_t* lcu,
-                            bool predict_luma,
-                            bool predict_chroma)
+void kvz_inter_recon_bipred(const encoder_state_t *const state,
+  const kvz_picture *ref1,
+  const kvz_picture *ref2,
+  int32_t pu_x,
+  int32_t pu_y,
+  int32_t pu_w,
+  int32_t pu_h,
+  int16_t mv_param[2][2],
+  lcu_t *lcu,
+  bool predict_luma,
+  bool predict_chroma)
 {
-  kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH];
-  kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C];
-  kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C];
+  // Allocate maximum size arrays for interpolated and copied samples
+  ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
+  ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
+  ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
+  ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
 
-  const int hi_prec_luma_rec0 = mv_param[0][0] & 3 || mv_param[0][1] & 3;
-  const int hi_prec_luma_rec1 = mv_param[1][0] & 3 || mv_param[1][1] & 3;
+  yuv_t px_L0;
+  px_L0.size = pu_w * pu_h;
+  px_L0.y = &px_buf_L0[0];
+  px_L0.u = &px_buf_L0[LCU_LUMA_SIZE];
+  px_L0.v = &px_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
 
-  const int hi_prec_chroma_rec0 = mv_param[0][0] & 7 || mv_param[0][1] & 7;
-  const int hi_prec_chroma_rec1 = mv_param[1][0] & 7 || mv_param[1][1] & 7;
+  yuv_t px_L1;
+  px_L1.size = pu_w * pu_h;
+  px_L1.y = &px_buf_L1[0];
+  px_L1.u = &px_buf_L1[LCU_LUMA_SIZE];
+  px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
 
-  hi_prec_buf_t* high_precision_rec0 = 0;
-  hi_prec_buf_t* high_precision_rec1 = 0;
-  if (hi_prec_chroma_rec0) high_precision_rec0 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH);
-  if (hi_prec_chroma_rec1) high_precision_rec1 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH);
+  yuv_ip_t ip_L0;
+  ip_L0.size = pu_w * pu_h;
+  ip_L0.y = &ip_buf_L0[0];
+  ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE];
+  ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
 
+  yuv_ip_t ip_L1;
+  ip_L1.size = pu_w * pu_h;
+  ip_L1.y = &ip_buf_L1[0];
+  ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE];
+  ip_L1.v = &ip_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
 
-  //Reconstruct both predictors
-  inter_recon_unipred(state, ref1, xpos, ypos, width, height, mv_param[0], lcu, high_precision_rec0,
-                      predict_luma, predict_chroma);
-  if (!hi_prec_luma_rec0){
-    memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64); // copy to temp_lcu_y
-  }
-  if (!hi_prec_chroma_rec0){
-    memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_u
-    memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_v
-  }
-  inter_recon_unipred(state, ref2, xpos, ypos, width, height, mv_param[1], lcu, high_precision_rec1,
-                      predict_luma, predict_chroma);
+  // Sample blocks from both reference picture lists.
+  // Flags state if the outputs were written to high-precision / interpolated sample buffers.
+  unsigned ip_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0],
+                                             &px_L0, &ip_L0, predict_luma, predict_chroma);
+  unsigned ip_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1],
+                                             &px_L1, &ip_L1, predict_luma, predict_chroma);
 
   // After reconstruction, merge the predictors by taking an average of each pixel
-  kvz_inter_recon_bipred_blend(hi_prec_luma_rec0, hi_prec_luma_rec1, 
-                               hi_prec_chroma_rec0, hi_prec_chroma_rec1,
-                               height, width, ypos, xpos,
-                               high_precision_rec0, high_precision_rec1,
-                               lcu, temp_lcu_y, temp_lcu_u, temp_lcu_v, predict_luma, predict_chroma);
- 
-  if (high_precision_rec0 != 0) kvz_hi_prec_buf_t_free(high_precision_rec0);
-  if (high_precision_rec1 != 0) kvz_hi_prec_buf_t_free(high_precision_rec1);
+  kvz_inter_recon_bipred_blend(lcu, &px_L0, &px_L1, &ip_L0, &ip_L1,
+                               pu_x, pu_y, pu_w, pu_h,
+                               ip_flags_L0, ip_flags_L1,
+                               predict_luma, predict_chroma);
 }
 
 
@@ -626,12 +631,21 @@ void kvz_inter_pred_pu(const encoder_state_t * const state,
         state->frame->ref_LX[mv_idx][
           pu->inter.mv_ref[mv_idx]]];
 
+    const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
+    const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
+    yuv_t lcu_adapter;
+    lcu_adapter.size = pu_w * pu_h;
+    lcu_adapter.y = lcu->rec.y + offset_luma,
+    lcu_adapter.u = lcu->rec.u + offset_chroma,
+    lcu_adapter.v = lcu->rec.v + offset_chroma,
+
     inter_recon_unipred(state,
       ref,
       pu_x, pu_y,
       pu_w, pu_h,
+      LCU_WIDTH,
       pu->inter.mv[mv_idx],
-      lcu,
+      &lcu_adapter,
       NULL,
       predict_luma, predict_chroma);
   }
diff --git a/src/kvazaar.h b/src/kvazaar.h
index 856ea6e8..9cd97188 100644
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@@ -97,6 +97,8 @@ typedef uint8_t kvz_pixel;
 typedef uint16_t kvz_pixel;
 #endif
 
+typedef int16_t kvz_pixel_ip;
+
 /**
  * \brief Opaque data structure representing one instance of the encoder.
  */
diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c
index 67db7db9..cdd8c040 100644
--- a/src/strategies/generic/ipol-generic.c
+++ b/src/strategies/generic/ipol-generic.c
@@ -131,7 +131,16 @@ int32_t kvz_four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int
   return temp;
 }
 
-void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
+void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder,
+  kvz_pixel *src,
+  int16_t src_stride,
+  int width,
+  int height,
+  kvz_pixel *dst,
+  int16_t dst_stride,
+  int8_t hor_flag,
+  int8_t ver_flag,
+  const int16_t mv[2])
 {
   //TODO: horizontal and vertical only filtering
   int32_t x, y;
@@ -669,7 +678,16 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
   }
 }
 
-void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
+void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder,
+  kvz_pixel *src,
+  int16_t src_stride,
+  int width,
+  int height,
+  kvz_pixel *dst,
+  int16_t dst_stride,
+  int8_t hor_flag,
+  int8_t ver_flag,
+  const int16_t mv[2])
 {
   //TODO: horizontal and vertical only filtering
   int32_t x, y;
diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c
index 16dde988..f5fcb033 100644
--- a/src/strategies/generic/picture-generic.c
+++ b/src/strategies/generic/picture-generic.c
@@ -547,55 +547,52 @@ static unsigned pixels_calc_ssd_generic(const kvz_pixel *const ref, const kvz_pi
   return ssd >> (2*(KVZ_BIT_DEPTH-8));
 }
 
-static void inter_recon_bipred_generic(const int hi_prec_luma_rec0,
-  const int hi_prec_luma_rec1,
-  const int hi_prec_chroma_rec0,
-  const int hi_prec_chroma_rec1,
-  int32_t height,
-  int32_t width,
-  int32_t ypos,
-  int32_t xpos,
-  const hi_prec_buf_t*high_precision_rec0,
-  const hi_prec_buf_t*high_precision_rec1,
-  lcu_t* lcu,
-  kvz_pixel* temp_lcu_y,
-  kvz_pixel* temp_lcu_u,
-  kvz_pixel* temp_lcu_v,
-  bool predict_luma,
-  bool predict_chroma) {
+static void inter_recon_bipred_generic(lcu_t *const lcu,
+  const yuv_t *const px_L0,
+  const yuv_t *const px_L1,
+  const yuv_ip_t *const ip_L0,
+  const yuv_ip_t *const ip_L1,
+  const unsigned pu_x,
+  const unsigned pu_y,
+  const unsigned pu_w,
+  const unsigned pu_h,
+  const unsigned ip_flags_L0,
+  const unsigned ip_flags_L1,
+  const bool predict_luma,
+  const bool predict_chroma) {
 
   int shift = 15 - KVZ_BIT_DEPTH;
   int offset = 1 << (shift - 1);
 
+  const unsigned pu_w_c = pu_w >> 1;
+
   int y_in_lcu;
   int x_in_lcu;
 
   //After reconstruction, merge the predictors by taking an average of each pixel
-  for (int temp_y = 0; temp_y < height; ++temp_y) {
-
-
-    for (int temp_x = 0; temp_x < width; ++temp_x) {
-      y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
-      x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
+  for (int y = 0; y < pu_h; ++y) {
+    for (int x = 0; x < pu_w; ++x) {
+      y_in_lcu = (pu_y + y) & (LCU_WIDTH-1);
+      x_in_lcu = (pu_x + x) & (LCU_WIDTH-1);
 
       if (predict_luma) {
-        int16_t sample0_y = (hi_prec_luma_rec0 ? high_precision_rec0->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
-        int16_t sample1_y = (hi_prec_luma_rec1 ? high_precision_rec1->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
+        int16_t sample0_y = ((ip_flags_L0 & 1) ? ip_L0->y[y * pu_w + x] : (px_L0->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
+        int16_t sample1_y = ((ip_flags_L1 & 1) ? ip_L1->y[y * pu_w + x] : (px_L1->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
 
         lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift);
       }
 
-      if (predict_chroma && (temp_x < width >> 1 && temp_y < height >> 1)) {
+      if (predict_chroma && (x < (pu_w >> 1) && y < (pu_h >> 1))) {
 
-        y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
-        x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1));
+        y_in_lcu = SUB_SCU(pu_y) / 2 + y;
+        x_in_lcu = SUB_SCU(pu_x) / 2 + x;
 
-        int16_t sample0_u = (hi_prec_chroma_rec0 ? high_precision_rec0->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
-        int16_t sample1_u = (hi_prec_chroma_rec1 ? high_precision_rec1->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
+        int16_t sample0_u = ((ip_flags_L0 & 2) ? ip_L0->u[y * pu_w_c + x] : (px_L0->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
+        int16_t sample1_u = ((ip_flags_L1 & 2) ? ip_L1->u[y * pu_w_c + x] : (px_L1->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
         lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift);
 
-        int16_t sample0_v = (hi_prec_chroma_rec0 ? high_precision_rec0->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
-        int16_t sample1_v = (hi_prec_chroma_rec1 ? high_precision_rec1->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
+        int16_t sample0_v = ((ip_flags_L0 & 2) ? ip_L0->v[y * pu_w_c + x] : (px_L0->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
+        int16_t sample1_v = ((ip_flags_L1 & 2) ? ip_L1->v[y * pu_w_c + x] : (px_L1->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
         lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift);
       }
     }
diff --git a/src/strategies/strategies-ipol.h b/src/strategies/strategies-ipol.h
index 84d66820..7e02e73a 100644
--- a/src/strategies/strategies-ipol.h
+++ b/src/strategies/strategies-ipol.h
@@ -101,6 +101,28 @@ typedef void(kvz_sample_octpel_chroma_func)(const encoder_control_t * const enco
 typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
 typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
 
+typedef void(kvz_sample_14bit_quarterpel_luma_func)(const encoder_control_t * const encoder,
+  kvz_pixel *src,
+  int16_t src_stride,
+  int width,
+  int height,
+  int16_t *dst,
+  int16_t dst_stride,
+  int8_t hor_flag,
+  int8_t ver_flag,
+  const int16_t mv[2]);
+
+typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t *const encoder,
+  kvz_pixel *src,
+  int16_t src_stride,
+  int width,
+  int height,
+  int16_t *dst,
+  int16_t dst_stride,
+  int8_t hor_flag,
+  int8_t ver_flag,
+  const int16_t mv[2]);
+
 // Declare function pointers.
 extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma;
 extern ipol_blocks_func * kvz_filter_hpel_blocks_diag_luma;
diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h
index 7b2b509c..85b16bfd 100644
--- a/src/strategies/strategies-picture.h
+++ b/src/strategies/strategies-picture.h
@@ -133,22 +133,19 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_
                                 int32_t width, int32_t height, uint32_t pic_stride,
                                 uint32_t ref_stride, uint32_t left, uint32_t right);
 
-typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0,
-    const int hi_prec_luma_rec1,
-    const int hi_prec_chroma_rec0,
-    const int hi_prec_chroma_rec1,
-    int height,
-    int width,
-    int ypos,
-    int xpos,
-    const hi_prec_buf_t*high_precision_rec0,
-    const hi_prec_buf_t*high_precision_rec1,
-    lcu_t* lcu,
-    kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH],
-    kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C],
-    kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C],
-    bool predict_luma,
-    bool predict_chroma);  
+typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
+  const yuv_t *const px_L0,
+  const yuv_t *const px_L1,
+  const yuv_ip_t *const ip_L0,
+  const yuv_ip_t *const ip_L1,
+  const unsigned pu_x,
+  const unsigned pu_y,
+  const unsigned pu_w,
+  const unsigned pu_h,
+  const unsigned ip_flags_L0,
+  const unsigned ip_flags_L1,
+  const bool predict_luma,
+  const bool predict_chroma);
 
 typedef double (pixel_var_func)(const kvz_pixel *buf, const uint32_t len);