diff --git a/src/cu.h b/src/cu.h
index ecb7c695..1d49d347 100644
--- a/src/cu.h
+++ b/src/cu.h
@@ -77,55 +77,6 @@ typedef enum {
   MTS_TR_NUM    = 6,
 } mts_idx;
 
-extern const uint8_t uvg_part_mode_num_parts[];
-extern const uint8_t uvg_part_mode_offsets[][4][2];
-extern const uint8_t uvg_part_mode_sizes[][4][2];
-
-/**
- * \brief Get the x coordinate of a PU.
- *
- * \param part_mode   partition mode of the containing CU
- * \param cu_width    width of the containing CU
- * \param cu_x        x coordinate of the containing CU
- * \param i           number of the PU
- * \return            location of the left edge of the PU
- */
-#define PU_GET_X(part_mode, cu_width, cu_x, i) \
-  ((cu_x) + uvg_part_mode_offsets[(part_mode)][(i)][0] * (cu_width) / 4)
-
-/**
- * \brief Get the y coordinate of a PU.
- *
- * \param part_mode   partition mode of the containing CU
- * \param cu_width    width of the containing CU
- * \param cu_y        y coordinate of the containing CU
- * \param i           number of the PU
- * \return            location of the top edge of the PU
- */
-#define PU_GET_Y(part_mode, cu_width, cu_y, i) \
-  ((cu_y) + uvg_part_mode_offsets[(part_mode)][(i)][1] * (cu_width) / 4)
-
-/**
- * \brief Get the width of a PU.
- *
- * \param part_mode   partition mode of the containing CU
- * \param cu_width    width of the containing CU
- * \param i           number of the PU
- * \return            width of the PU
- */
-#define PU_GET_W(part_mode, cu_width, i) \
-  (uvg_part_mode_sizes[(part_mode)][(i)][0] * (cu_width) / 4)
-
-/**
- * \brief Get the height of a PU.
- *
- * \param part_mode   partition mode of the containing CU
- * \param cu_width    width of the containing CU
- * \param i           number of the PU
- * \return            height of the PU
- */
-#define PU_GET_H(part_mode, cu_width, i) \
-  (uvg_part_mode_sizes[(part_mode)][(i)][1] * (cu_width) / 4)
 
 //////////////////////////////////////////////////////////////////////////
 // TYPES
@@ -142,6 +93,25 @@ enum uvg_tree_type {
   UVG_CHROMA_T = 2
 };
 
+enum split_type {
+  NO_SPLIT = 0,
+  QT_SPLIT = 1,
+  BT_HOR_SPLIT = 2,
+  BT_VER_SPLIT = 3,
+  TT_HOR_SPLIT = 4,
+  TT_VER_SPLIT = 5,
+};
+
+typedef struct  {
+  uint32_t split_tree;
+  uint8_t current_depth;
+} split_tree_t;
+
+
+// Split for each depth takes three bits like xxy where if either x bit is set
+// it is a MTT split, and if there are any MTT split QT split is not allowed
+#define CAN_QT_SPLIT(x) (((x) & 6DB6DB6) == 0)
+
 /**
  * \brief Struct for CU info
  */
@@ -149,7 +119,6 @@ typedef struct
 {
   uint8_t type        : 3; //!< \brief block type, one of cu_type_t values
   uint8_t depth       : 3; //!< \brief depth / size of this block
-  uint8_t part_size   : 3; //!< \brief partition mode, one of part_mode_t values
   uint8_t tr_depth    : 3; //!< \brief transform depth
   uint8_t skipped     : 1; //!< \brief flag to indicate this block is skipped
   uint8_t merged      : 1; //!< \brief flag to indicate this block is merged
diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c
index 46552a12..6f6fc9d8 100644
--- a/src/encode_coding_tree.c
+++ b/src/encode_coding_tree.c
@@ -825,11 +825,14 @@ static void encode_transform_coeff(
  * \param depth           Depth from LCU.
  * \return if non-zero mvd is coded
  */
-int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
-                                      cabac_data_t * const cabac,
-                                      const cu_info_t * const cur_cu,
-                                      int x, int y, int width, int height,
-                                      int depth, lcu_t* lcu, double* bits_out)
+int uvg_encode_inter_prediction_unit(
+  encoder_state_t * const state,
+  cabac_data_t * const cabac,
+  const cu_info_t * const cur_cu,
+  int depth,
+  lcu_t* lcu,
+  double* bits_out,
+  const cu_loc_t* const cu_loc)
 {
   // Mergeflag
   int16_t num_cand = 0;
@@ -864,8 +867,8 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
       // Code Inter Dir
       uint8_t inter_dir = cur_cu->inter.mv_dir;
 
-      if (cur_cu->part_size == SIZE_2Nx2N || (LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4
-        uint32_t inter_dir_ctx = (7 - ((uvg_math_floor_log2(width) + uvg_math_floor_log2(height) + 1) >> 1));
+      if ((LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4
+        uint32_t inter_dir_ctx = (7 - ((uvg_math_floor_log2(cu_loc->width) + uvg_math_floor_log2(cu_loc->height) + 1) >> 1));
 
         CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[inter_dir_ctx]), (inter_dir == 3), bits, "inter_pred_idc");
       }
@@ -916,16 +919,14 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
         if (lcu) {
           uvg_inter_get_mv_cand(
             state, 
-            x, y, width, height,
-            mv_cand, cur_cu, 
-            lcu, ref_list_idx);
+            mv_cand, cur_cu, lcu, ref_list_idx,
+            cu_loc);
         }
         else {
           uvg_inter_get_mv_cand_cua(
             state,
-            x, y, width, height,
-            mv_cand, cur_cu, ref_list_idx
-          );
+            mv_cand, cur_cu, ref_list_idx, cu_loc
+            );
         }
 
         uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx);
@@ -1346,11 +1347,11 @@ bool uvg_write_split_flag(
   if (no_split && allow_split) {
     // Get left and top block split_flags and if they are present and true, increase model number
     // ToDo: should use height and width to increase model, PU_GET_W() ?
-    if (left_cu && PU_GET_H(left_cu->part_size, LCU_WIDTH >> left_cu->depth, 0) < LCU_WIDTH >> depth) {
+    if (left_cu && LCU_WIDTH >> left_cu->depth < LCU_WIDTH >> depth) {
       split_model++;
     }
 
-    if (above_cu && PU_GET_W(above_cu->part_size, LCU_WIDTH >> above_cu->depth, 0) < LCU_WIDTH >> depth) {
+    if (above_cu && LCU_WIDTH >> above_cu->depth < LCU_WIDTH >> depth) {
       split_model++;
     }
 
@@ -1625,22 +1626,15 @@ void uvg_encode_coding_tree(
 
   if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
     uint8_t imv_mode = UVG_IMV_OFF;
-    
-    const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size];
     bool non_zero_mvd = false;
+  
+    // TODO: height for non-square blocks
+    const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, cu_loc.x, cu_loc.y);
 
-    for (int i = 0; i < num_pu; ++i) {
-      // TODO: height for non-square blocks
-      const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i);
-      const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i);
-      const int pu_w = PU_GET_W(cur_cu->part_size, cu_width, i);
-      const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i);
-      const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, pu_x, pu_y);
-
-      non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, NULL, NULL);
-      DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu);
-      uvg_hmvp_add_mv(state, x, y, pu_w, pu_h, cur_pu);
-    }
+    non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, depth, NULL, NULL, &cu_loc);
+    DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu);
+    uvg_hmvp_add_mv(state, x, y, width, height, cur_pu);
+    
 
     // imv mode, select between fullpel, half-pel and quarter-pel resolutions
     // 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel
@@ -1661,7 +1655,7 @@ void uvg_encode_coding_tree(
       int cbf = cbf_is_set_any(cur_cu->cbf, depth);
       // Only need to signal coded block flag if not skipped or merged
       // skip = no coded residual, merge = coded residual
-      if (cur_cu->part_size != SIZE_2Nx2N || !cur_cu->merged) {
+      if (!cur_cu->merged) {
         cabac->cur_ctx = &(cabac->ctx.cu_qt_root_cbf_model);
         CABAC_BIN(cabac, cbf, "rqt_root_cbf");
       }
@@ -1747,15 +1741,18 @@ end:
 double uvg_mock_encode_coding_unit(
   encoder_state_t* const state,
   cabac_data_t* cabac,
-  int x,
-  int y,
-  int depth,
+  const cu_loc_t* const cu_loc,
   lcu_t* lcu,
   cu_info_t* cur_cu,
   enum uvg_tree_type tree_type) {
   double bits = 0;
   const encoder_control_t* const ctrl = state->encoder_control;
 
+  const int x = cu_loc->x;
+  const int y = cu_loc->y;
+
+  const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
+
   int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T);
   int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T);
 
@@ -1846,7 +1843,7 @@ double uvg_mock_encode_coding_unit(
   
   if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
     const uint8_t imv_mode = UVG_IMV_OFF;
-    const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, x, y, cu_width, cu_width, depth, lcu, &bits);
+    const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, depth, lcu, &bits, cu_loc);
     if (ctrl->cfg.amvr && non_zero_mvd) {
       CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.imv_flag[0]), imv_mode, bits, "imv_flag");
       if (imv_mode > UVG_IMV_OFF) {
diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h
index 575f4afd..231e22ff 100644
--- a/src/encode_coding_tree.h
+++ b/src/encode_coding_tree.h
@@ -78,20 +78,19 @@ void uvg_encode_mvd(encoder_state_t * const state,
 double uvg_mock_encode_coding_unit(
   encoder_state_t* const state,
   cabac_data_t* cabac,
-  int x,
-  int y,
-  int depth,
+  const cu_loc_t* const cu_loc,
   lcu_t* lcu,
   cu_info_t* cur_cu,
   enum uvg_tree_type tree_type);
 
-int uvg_encode_inter_prediction_unit(encoder_state_t* const state,
-                                      cabac_data_t* const cabac,
-                                      const cu_info_t* const cur_cu,
-                                      int x, int y, int width, int height,
-                                      int depth, 
-                                      lcu_t* lcu,
-                                      double* bits_out);
+int uvg_encode_inter_prediction_unit(
+  encoder_state_t* const state,
+  cabac_data_t* const cabac,
+  const cu_info_t* const cur_cu,
+  int depth,
+  lcu_t* lcu,
+  double* bits_out,
+  const cu_loc_t* const cu_loc);
 
 void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state,
   cabac_data_t* const cabac,
diff --git a/src/filter.c b/src/filter.c
index 2d51a17c..26a57100 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -855,13 +855,11 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
       uint8_t max_filter_length_P = 0;
       uint8_t max_filter_length_Q = 0;
       const int cu_size = LCU_WIDTH >> cu_q->depth;
-      const int pu_part_idx = (y + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ? 
-                               1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0)
-                            + (x + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0);
-      const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx)
-                                          : PU_GET_W(cu_q->part_size, cu_size, pu_part_idx);
-      const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx) 
-                                         : x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
+      // TODO: NON square
+      const int pu_size = dir == EDGE_HOR ? cu_size
+                                          : cu_size;
+      const int pu_pos = dir == EDGE_HOR ? y_coord 
+                                         : x_coord;
       get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
                             dir, tu_boundary,
                             LCU_WIDTH >> cu_p->tr_depth,
@@ -1088,13 +1086,10 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
       }
 
       const int cu_size = LCU_WIDTH >> (cu_q->depth + (tree_type == UVG_CHROMA_T));
-      const int pu_part_idx = ((y << (tree_type != UVG_CHROMA_T)) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ?
-                               1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0)
-                              + ((x << (tree_type != UVG_CHROMA_T)) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0);
-      const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx)
-                                          : PU_GET_W(cu_q->part_size, cu_size, pu_part_idx);
-      const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
-                                         : x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
+      // TODO: non-square
+      const int pu_size = dir == EDGE_HOR ? cu_size : cu_size;
+      const int pu_pos = dir == EDGE_HOR ? y_coord
+                                         : x_coord;
       uint8_t max_filter_length_P = 0;
       uint8_t max_filter_length_Q = 0;
       
diff --git a/src/inter.c b/src/inter.c
index 3bbef427..be353506 100644
--- a/src/inter.c
+++ b/src/inter.c
@@ -375,23 +375,26 @@ static void inter_cp_with_ext_border(const uvg_pixel *ref_buf, int ref_stride,
  * \param predict_luma   Enable or disable luma prediction for this call.
  * \param predict_chroma Enable or disable chroma prediction for this call.
 */
-static unsigned inter_recon_unipred(const encoder_state_t * const state,
-                                    const uvg_picture * const ref,
-                                    int32_t pu_x,
-                                    int32_t pu_y,
-                                    int32_t pu_w,
-                                    int32_t pu_h,
-                                    int32_t out_stride_luma,
-                                    const mv_t mv_param[2],
-                                    yuv_t *yuv_px,
-                                    yuv_im_t *yuv_im,
-                                    bool predict_luma,
-                                    bool predict_chroma)
+static unsigned inter_recon_unipred(
+  const encoder_state_t * const state,
+  const uvg_picture * const ref,
+  int32_t out_stride_luma,
+  const mv_t mv_param[2],
+  yuv_t *yuv_px,
+  yuv_im_t *yuv_im,
+  bool predict_luma,
+  bool predict_chroma,
+  const cu_loc_t* const cu_loc)
 {
   vector2d_t int_mv = { mv_param[0], mv_param[1] };
 
   uvg_change_precision_vector2d(INTERNAL_MV_PREC, 0, &int_mv);
 
+  const int pu_x = cu_loc->x;
+  const int pu_y = cu_loc->y;
+  const int pu_w = cu_loc->width;
+  const int pu_h = cu_loc->height;
+
   const vector2d_t int_mv_in_frame = {
     int_mv.x + pu_x + state->tile->offset_x,
     int_mv.y + pu_y + state->tile->offset_y
@@ -507,17 +510,15 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
  * \param predict_luma   Enable or disable luma prediction for this call.
  * \param predict_chroma Enable or disable chroma prediction for this call.
  */
-void uvg_inter_recon_bipred(const encoder_state_t *const state,
+void uvg_inter_recon_bipred(
+  const encoder_state_t *const state,
   const uvg_picture *ref1,
   const uvg_picture *ref2,
-  int32_t pu_x,
-  int32_t pu_y,
-  int32_t pu_w,
-  int32_t pu_h,
   mv_t mv_param[2][2],
   lcu_t *lcu,
   bool predict_luma,
-  bool predict_chroma)
+  bool predict_chroma,
+  const cu_loc_t* const cu_loc)
 {
   // Allocate maximum size arrays for interpolated and copied samples
   ALIGNED(64) uvg_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
@@ -525,6 +526,11 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
   ALIGNED(64) uvg_pixel_im im_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
   ALIGNED(64) uvg_pixel_im im_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
 
+  const int pu_x = cu_loc->x;
+  const int pu_y = cu_loc->y;
+  const int pu_w = cu_loc->width;
+  const int pu_h = cu_loc->height;
+
   yuv_t px_L0;
   px_L0.size = pu_w * pu_h;
   px_L0.y = &px_buf_L0[0];
@@ -551,10 +557,10 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
 
   // Sample blocks from both reference picture lists.
   // Flags state if the outputs were written to high-precision / interpolated sample buffers.
-  unsigned im_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0],
-                                             &px_L0, &im_L0, predict_luma, predict_chroma);
-  unsigned im_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1],
-                                             &px_L1, &im_L1, predict_luma, predict_chroma);
+  unsigned im_flags_L0 = inter_recon_unipred(state, ref1, pu_w, mv_param[0], &px_L0, &im_L0, predict_luma, predict_chroma,
+                                             cu_loc);
+  unsigned im_flags_L1 = inter_recon_unipred(state, ref2, pu_w, mv_param[1], &px_L1, &im_L1, predict_luma, predict_chroma,
+                                             cu_loc);
 
   // After reconstruction, merge the predictors by taking an average of each pixel
   uvg_bipred_average(lcu, &px_L0, &px_L1, &im_L0, &im_L1,
@@ -578,19 +584,14 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
  * \param predict_luma   Enable or disable luma prediction for this call.
  * \param predict_chroma Enable or disable chroma prediction for this call.
  */
-void uvg_inter_recon_cu(const encoder_state_t * const state,
-                        lcu_t *lcu,
-                        int32_t x,
-                        int32_t y,
-                        int32_t width,
-                        bool predict_luma,
-                        bool predict_chroma)
+void uvg_inter_recon_cu(
+  const encoder_state_t * const state,
+  lcu_t *lcu,
+  bool predict_luma,
+  bool predict_chroma,
+  const cu_loc_t* const cu_loc)
 {
-  cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
-  const int num_pu = uvg_part_mode_num_parts[cu->part_size];
-  for (int i = 0; i < num_pu; ++i) {
-    uvg_inter_pred_pu(state, lcu, x, y, width, predict_luma, predict_chroma, i);
-  }
+  uvg_inter_pred_pu(state, lcu, predict_luma, predict_chroma, cu_loc);  
 }
 
 static void ibc_recon_cu(const encoder_state_t * const state,
@@ -599,8 +600,7 @@ static void ibc_recon_cu(const encoder_state_t * const state,
                          int32_t y,
                          int32_t width,
                          bool predict_luma,
-                         bool predict_chroma,
-                         int i_pu)
+                         bool predict_chroma)
 {
   const int x_scu    = SUB_SCU(x);
   const int y_scu    = SUB_SCU(y);
@@ -668,79 +668,63 @@ static void ibc_recon_cu(const encoder_state_t * const state,
  * \param predict_chroma Enable or disable chroma prediction for this call.
  * \param i_pu           Index of the PU. Always zero for 2Nx2N. Used for SMP+AMP.
  */
-void uvg_inter_pred_pu(const encoder_state_t * const state,
-                       lcu_t *lcu,
-                       int32_t x,
-                       int32_t y,
-                       int32_t width,
-                       bool predict_luma,
-                       bool predict_chroma,
-                       int i_pu)
+void uvg_inter_pred_pu(
+  const encoder_state_t * const state,
+  lcu_t *lcu,
+  bool predict_luma,
+  bool predict_chroma,
+  const cu_loc_t* const cu_loc)
 
 {
-  const int x_scu = SUB_SCU(x);
-  const int y_scu = SUB_SCU(y);
-  cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
-  const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
-  const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
-  const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
-  const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
-  cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
+  const int x_scu = SUB_SCU(cu_loc->x);
+  const int y_scu = SUB_SCU(cu_loc->y);
+  cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
 
-  if (cu->type == CU_IBC) {
-    ibc_recon_cu(state, lcu, x, y, width, predict_luma, predict_chroma, i_pu);
-  } else {
+  if (pu->inter.mv_dir == 3) {
+    const uvg_picture *const refs[2] = {
+      state->frame->ref->images[
+        state->frame->ref_LX[0][
+          pu->inter.mv_ref[0]]],
+      state->frame->ref->images[
+        state->frame->ref_LX[1][
+          pu->inter.mv_ref[1]]],
+    };
+    uvg_inter_recon_bipred(state,
+                           refs[0], refs[1],
+                           pu->inter.mv, lcu,
+                           predict_luma, predict_chroma,
+                           cu_loc);
+  }
+  else if (pu->type == CU_IBC) {
+    ibc_recon_cu(state, lcu, cu_loc->x, cu_loc->y, cu_loc->width, predict_luma, predict_chroma);
+  } else{
+    const int mv_idx = pu->inter.mv_dir - 1;
+    const uvg_picture *const ref =
+      state->frame->ref->images[
+        state->frame->ref_LX[mv_idx][
+          pu->inter.mv_ref[mv_idx]]];
 
-    if (pu->inter.mv_dir == 3) {
-      const uvg_picture * const refs[2] = {
-        state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]],
-        state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]],
-      };
-      uvg_inter_recon_bipred(
-        state,
-        refs[0],
-        refs[1],
-        pu_x,
-        pu_y,
-        pu_w,
-        pu_h,
-        pu->inter.mv,
-        lcu,
-        predict_luma,
-        predict_chroma);
-    } else {
-      const int                 mv_idx = pu->inter.mv_dir - 1;
-      const uvg_picture * const ref = 
-        state->frame->ref->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]];
+    const unsigned offset_luma = SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x);
+    const unsigned offset_chroma = SUB_SCU(cu_loc->y) / 2 * LCU_WIDTH_C + SUB_SCU(cu_loc->x) / 2;
+    yuv_t lcu_adapter;
+    lcu_adapter.size = cu_loc->width * cu_loc->height;
+    lcu_adapter.y = lcu->rec.y + offset_luma,
+    lcu_adapter.u = lcu->rec.u + offset_chroma,
+    lcu_adapter.v = lcu->rec.v + offset_chroma,
 
-      const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
-      const unsigned offset_chroma =
-        SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
-      yuv_t lcu_adapter;
-      lcu_adapter.size = pu_w * pu_h;
-      lcu_adapter.y    = lcu->rec.y + offset_luma,
-      lcu_adapter.u    = lcu->rec.u + offset_chroma,
-      lcu_adapter.v    = lcu->rec.v + offset_chroma,
-
-      inter_recon_unipred(
-        state,
-        ref,
-        pu_x,
-        pu_y,
-        pu_w,
-        pu_h,
-        LCU_WIDTH,
-        pu->inter.mv[mv_idx],
-        &lcu_adapter,
-        NULL,
-        predict_luma,
-        predict_chroma);
-    }
+    inter_recon_unipred(state,
+                        ref,
+                        LCU_WIDTH, pu->inter.mv[mv_idx],
+                        &lcu_adapter,
+                        NULL,
+                        predict_luma,
+                        predict_chroma,
+                        cu_loc);
   }
   if (predict_chroma && state->encoder_control->cfg.jccr) {
     const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
-    uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
-    uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
+    uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
+    uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
   }
 }
 
@@ -915,14 +899,12 @@ static bool is_b0_cand_coded(int x, int y, int width, int height)
  * \param ref_idx   index in the reference list
  * \param cand_out  will be filled with C0 and C1 candidates
  */
-static void get_temporal_merge_candidates(const encoder_state_t * const state,
-                                          int32_t x,
-                                          int32_t y,
-                                          int32_t width,
-                                          int32_t height,
-                                          uint8_t ref_list,
-                                          uint8_t ref_idx,
-                                          merge_candidates_t *cand_out)
+static void get_temporal_merge_candidates(
+  const encoder_state_t * const state,
+  const cu_loc_t* const cu_loc,
+  uint8_t ref_list,
+  uint8_t ref_idx,
+  merge_candidates_t *cand_out)
 {
   /*
   Predictor block locations
@@ -951,8 +933,8 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
     cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[colocated_ref];
     int cu_per_width = ref_cu_array->width / SCU_WIDTH;
 
-    int32_t xColBr = x + width;
-    int32_t yColBr = y + height;
+    int32_t xColBr = cu_loc->x + cu_loc->width;
+    int32_t yColBr = cu_loc->y + cu_loc->height;
 
     // C0 must be available
     if (xColBr < state->encoder_control->in.width &&
@@ -972,8 +954,8 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
         }
       }
     }
-    int32_t xColCtr = x + (width / 2);
-    int32_t yColCtr = y + (height / 2);
+    int32_t xColCtr = cu_loc->x + (cu_loc->width / 2);
+    int32_t yColCtr = cu_loc->y + (cu_loc->height / 2);
 
     // C1 must be inside the LCU, in the center position of current CU
     if (xColCtr < state->encoder_control->in.width && yColCtr < state->encoder_control->in.height) {
@@ -1254,10 +1236,7 @@ static void get_ibc_merge_candidates(const encoder_state_t * const state,
  * \param lcu             current LCU
  * \param cand_out        will be filled with A and B candidates
  */
-static void get_spatial_merge_candidates(int32_t x,
-                                         int32_t y,
-                                         int32_t width,
-                                         int32_t height,
+static void get_spatial_merge_candidates(const cu_loc_t* const cu_loc,
                                          int32_t picture_width,
                                          int32_t picture_height,
                                          lcu_t *lcu,
@@ -1276,8 +1255,13 @@ static void get_spatial_merge_candidates(int32_t x,
   |A1|_________|
   |A0|
   */
-  int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
-  int32_t y_local = SUB_SCU(y);
+  const int32_t x_local = SUB_SCU(cu_loc->x); //!< coordinates from top-left of this LCU
+  const int32_t y_local = SUB_SCU(cu_loc->y);
+
+  const int x = cu_loc->x;
+  const int y = cu_loc->y;
+  const int width = cu_loc->width;
+  const int height = cu_loc->height;
   // A0 and A1 availability testing
   if (x != 0) {
     cu_info_t *a1 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1);
@@ -1350,15 +1334,13 @@ static void get_spatial_merge_candidates(int32_t x,
  * \param picture_height  tile height in pixels
  * \param cand_out        will be filled with A and B candidates
  */
-static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
-                                             int32_t x,
-                                             int32_t y,
-                                             int32_t width,
-                                             int32_t height,
-                                             int32_t picture_width,
-                                             int32_t picture_height,
-                                             merge_candidates_t *cand_out,
-                                             bool wpp)
+static void get_spatial_merge_candidates_cua(
+  const cu_array_t *cua,
+  int32_t picture_width,
+  int32_t picture_height,
+  merge_candidates_t *cand_out,
+  bool wpp,
+  const cu_loc_t* const cu_loc)
 {
   /*
   Predictor block locations
@@ -1370,8 +1352,12 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
   |A1|_________|
   |A0|
   */
-  int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
-  int32_t y_local = SUB_SCU(y);
+  const int x = cu_loc->x;
+  const int y = cu_loc->y;
+  const int width = cu_loc->width;
+  const int height = cu_loc->height;
+  const int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
+  const int32_t y_local = SUB_SCU(y);
   // A0 and A1 availability testing
   if (x != 0) {
     const cu_info_t *a1 = uvg_cu_array_at_const(cua, x - 1, y + height - 1);
@@ -1484,15 +1470,13 @@ static bool add_temporal_candidate(const encoder_state_t *state,
 /**
  * \brief Pick two mv candidates from the spatial and temporal candidates.
  */
-static void get_mv_cand_from_candidates(const encoder_state_t * const state,
-                                        int32_t x,
-                                        int32_t y,
-                                        int32_t width,
-                                        int32_t height,
-                                        const merge_candidates_t *merge_cand,
-                                        const cu_info_t * const cur_cu,
-                                        int8_t reflist,
-                                        mv_t mv_cand[2][2])
+static void get_mv_cand_from_candidates(
+  const encoder_state_t * const state,
+  const merge_candidates_t *merge_cand,
+  const cu_info_t * const cur_cu,
+  int8_t reflist,
+  mv_t mv_cand[2][2],
+  int ctu_row)
 {
   const cu_info_t *const *a = merge_cand->a;
   const cu_info_t *const *b = merge_cand->b;
@@ -1552,7 +1536,6 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
 
   if (candidates < AMVP_MAX_NUM_CANDS)
   {
-    const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
     const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
     int32_t num_cand = state->tile->frame->hmvp_size[ctu_row];
     for (int i = 0; i < MIN(/*MAX_NUM_HMVP_AVMPCANDS*/4,num_cand); i++) {
@@ -1595,32 +1578,30 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
  * \param lcu       current LCU
  * \param reflist   reflist index (either 0 or 1)
  */
-void uvg_inter_get_mv_cand(const encoder_state_t * const state,
-                           int32_t x,
-                           int32_t y,
-                           int32_t width,
-                           int32_t height,
-                           mv_t mv_cand[2][2],
-                           const cu_info_t  * const cur_cu,
-                           lcu_t *lcu,
-                           int8_t reflist)
+void uvg_inter_get_mv_cand(
+  const encoder_state_t * const state,
+  mv_t mv_cand[2][2],
+  const cu_info_t  * const cur_cu,
+  lcu_t *lcu,
+  int8_t reflist,
+  const cu_loc_t* const cu_loc)
 {
   merge_candidates_t merge_cand = { 0 };
   const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
   if (cur_cu->type == CU_IBC) {
     mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
-    get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
+    get_ibc_merge_candidates(state, cur_cu,lcu,NULL, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
     memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
     memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
-  } else {
-    get_spatial_merge_candidates(x, y, width, height,
-                                 state->tile->frame->width,
-                                 state->tile->frame->height,
-                                 lcu,
-                                 &merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp);
-    get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
-    get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
+  } else { 
+    get_spatial_merge_candidates(cu_loc, state->tile->frame->width, state->tile->frame->height, lcu,
+                                 &merge_cand,
+                                 parallel_merge_level,
+                                 state->encoder_control->cfg.wpp);
+    get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
+    get_mv_cand_from_candidates(state, &merge_cand, cur_cu, reflist, mv_cand, cu_loc->y >> LOG2_LCU_WIDTH);
   }
+    
   uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
   uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
 }
@@ -1637,31 +1618,29 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
  * \param cur_cu    current CU
  * \param reflist   reflist index (either 0 or 1)
  */
-void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state,
-                               int32_t x,
-                               int32_t y,
-                               int32_t width,
-                               int32_t height,
-                               mv_t mv_cand[2][2],
-                               const cu_info_t* cur_cu,
-                               int8_t reflist)
+void uvg_inter_get_mv_cand_cua(
+  const encoder_state_t * const state,
+  mv_t mv_cand[2][2],
+  const cu_info_t* cur_cu,
+  int8_t reflist,
+  const cu_loc_t* const cu_loc)
 {
   merge_candidates_t merge_cand = { 0 };
 
   const cu_array_t *cua = state->tile->frame->cu_array;
   if (cur_cu->type == CU_IBC) {
     mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
-    get_ibc_merge_candidates(state, cur_cu, NULL,cua,x, y, width, height,ibc_mv_cand);
+    get_ibc_merge_candidates(state, cur_cu, NULL,cua,cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
     memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
     memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);    
   } else {
     get_spatial_merge_candidates_cua(cua,
-                                     x, y, width, height,
-                                     state->tile->frame->width, state->tile->frame->height,
-                                     &merge_cand, state->encoder_control->cfg.wpp);
-    get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
-    get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
+                                     state->tile->frame->width, state->tile->frame->height, &merge_cand, state->encoder_control->cfg.wpp,
+                                     cu_loc);
+    get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
+    get_mv_cand_from_candidates(state, &merge_cand, cur_cu, reflist, mv_cand, cu_loc->y >> LOG2_LCU_WIDTH);
   }
+
   uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
   uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
 }
@@ -1885,23 +1864,23 @@ void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv) {
  * \param lcu       lcu containing the block
  * \return          number of merge candidates
  */
-uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
-                                 int32_t x, int32_t y,
-                                 int32_t width, int32_t height,
-                                 bool use_a1, bool use_b1,
-                                 inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
-                                 lcu_t *lcu)
+uint8_t uvg_inter_get_merge_cand(
+  const encoder_state_t * const state,
+  const cu_loc_t* const cu_loc,
+  inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
+  lcu_t *lcu)
 {
   uint8_t candidates = 0;
   int8_t zero_idx = 0;
   const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
   merge_candidates_t merge_cand = { 0 };
   const uint8_t max_num_cands = state->encoder_control->cfg.max_merge;
+  // Current CU
+  cu_info_t         *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(cu_loc->x), SUB_SCU(cu_loc->y));
 
-  cu_info_t         *cur_cu        = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
   if(cur_cu->type == CU_IBC) {
     mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
-    get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand);
+    get_ibc_merge_candidates(state, cur_cu,lcu,NULL, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
     for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) {
       mv_cand[i].dir = 1;
       mv_cand[i].mv[0][0] = ibc_mv_cand[i][0];
@@ -1909,18 +1888,16 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
     }
     return IBC_MRG_MAX_NUM_CANDS;
   }
-
-  get_spatial_merge_candidates(x, y, width, height,
-                               state->tile->frame->width,
-                               state->tile->frame->height,
-                               lcu,
-                               &merge_cand, parallel_merge_level, state->encoder_control->cfg.wpp);
+  get_spatial_merge_candidates(cu_loc, state->tile->frame->width, state->tile->frame->height, lcu,
+                               &merge_cand,
+                               parallel_merge_level,
+                               state->encoder_control->cfg.wpp);
 
   const cu_info_t **a = merge_cand.a;
   const cu_info_t **b = merge_cand.b;
 
-  if (!use_a1) a[1] = NULL;
-  if (!use_b1) b[1] = NULL;
+  const int x = cu_loc->x;
+  const int y = cu_loc->y;
 
   if (different_mer(x, y, x, y - 1, parallel_merge_level) && add_merge_candidate(b[1], NULL, NULL, &mv_cand[candidates])) candidates++;
   if (different_mer(x, y, x - 1, y, parallel_merge_level) && add_merge_candidate(a[1], b[1], NULL, &mv_cand[candidates])) candidates++;
@@ -1941,7 +1918,7 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
     for (int reflist = 0; reflist <= max_reflist; reflist++) {
       // Fetch temporal candidates for the current CU
       // ToDo: change collocated_from_l0_flag to allow L1 ref
-      get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand);
+      get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
       // TODO: enable L1 TMVP candidate
       // get_temporal_merge_candidates(state, x, y, width, height, 2, 0, &merge_cand);
 
@@ -1973,7 +1950,7 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
   if (candidates == max_num_cands) return candidates;
 
   if (candidates != max_num_cands - 1) {
-    const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
+    const uint32_t ctu_row = (cu_loc->y >> LOG2_LCU_WIDTH);
     const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
     int32_t num_cand = state->tile->frame->hmvp_size[ctu_row];
 
diff --git a/src/inter.h b/src/inter.h
index 45f5e5ea..4d5fccd5 100644
--- a/src/inter.h
+++ b/src/inter.h
@@ -58,61 +58,51 @@ void uvg_change_precision_vector2d(int src, int dst, vector2d_t* mv);
 void uvg_round_precision(int src, int dst, mv_t* hor, mv_t* ver);
 void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv);
 
-void uvg_inter_recon_cu(const encoder_state_t * const state,
-                        lcu_t *lcu,
-                        int32_t x,
-                        int32_t y,
-                        int32_t width,
-                        bool predict_luma,
-                        bool predict_chroma);
-
-void uvg_inter_pred_pu(const encoder_state_t * const state,
+void uvg_inter_recon_cu(
+  const encoder_state_t * const state,
   lcu_t *lcu,
-  int32_t x,
-  int32_t y,
-  int32_t width,
   bool predict_luma,
   bool predict_chroma,
-  int i_pu);
+  const cu_loc_t* const cu_loc);
+
+void uvg_inter_pred_pu(
+  const encoder_state_t * const state,
+  lcu_t *lcu,
+  bool predict_luma,
+  bool predict_chroma,
+  const cu_loc_t* const cu_loc);
 
 void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu);
 
-void uvg_inter_recon_bipred(const encoder_state_t * const state,
-                            const uvg_picture * ref1,
-                            const uvg_picture * ref2,
-                            int32_t xpos,
-                            int32_t ypos,
-                            int32_t width,
-                            int32_t height,
-                            mv_t mv_param[2][2],
-                            lcu_t* lcu,
-                            bool predict_luma,
-                            bool predict_chroma);
+void uvg_inter_recon_bipred(
+  const encoder_state_t * const state,
+  const uvg_picture * ref1,
+  const uvg_picture * ref2,
+  mv_t mv_param[2][2],
+  lcu_t* lcu,
+  bool predict_luma,
+  bool predict_chroma,
+  const cu_loc_t* const cu_loc);
 
 
-void uvg_inter_get_mv_cand(const encoder_state_t * const state,
-                           int32_t x,
-                           int32_t y,
-                           int32_t width,
-                           int32_t height,
-                           mv_t mv_cand[2][2],
-                           const cu_info_t* cur_cu,
-                           lcu_t *lcu,
-                           int8_t reflist);
+void uvg_inter_get_mv_cand(
+  const encoder_state_t * const state,
+  mv_t mv_cand[2][2],
+  const cu_info_t* cur_cu,
+  lcu_t *lcu,
+  int8_t reflist,
+  const cu_loc_t* const cu_loc);
 
-void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state,
-                               int32_t x,
-                               int32_t y,
-                               int32_t width,
-                               int32_t height,
-                               mv_t mv_cand[2][2],
-                               const cu_info_t* cur_cu,
-                               int8_t reflist);
+void uvg_inter_get_mv_cand_cua(
+  const encoder_state_t * const state,
+  mv_t mv_cand[2][2],
+  const cu_info_t* cur_cu,
+  int8_t reflist,
+  const cu_loc_t* const cu_loc);
 
-uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
-                                 int32_t x, int32_t y,
-                                 int32_t width, int32_t height,
-                                 bool use_a1, bool use_b1,
-                                 inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
-                                 lcu_t *lcu);
+uint8_t uvg_inter_get_merge_cand(
+  const encoder_state_t * const state,
+  const cu_loc_t* const cu_loc,
+  inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
+  lcu_t *lcu);
 #endif
diff --git a/src/search.c b/src/search.c
index 0b51412b..d61be039 100644
--- a/src/search.c
+++ b/src/search.c
@@ -166,7 +166,6 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
       cu_info_t *to = LCU_GET_CU_AT_PX(lcu, x, y);
       to->type      = cu->type;
       to->depth     = cu->depth;
-      to->part_size = cu->part_size;
       to->qp        = cu->qp;
       //to->tr_idx    = cu->tr_idx;
       to->lfnst_idx = cu->lfnst_idx;
@@ -191,22 +190,6 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
   }
 }
 
-static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type)
-{
-  const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
-  const int num_pu = uvg_part_mode_num_parts[part_mode];
-
-  for (int i = 0; i < num_pu; ++i) {
-    const int x_pu      = PU_GET_X(part_mode, cu_width, x_local, i);
-    const int y_pu      = PU_GET_Y(part_mode, cu_width, y_local, i);
-    const int width_pu  = PU_GET_W(part_mode, cu_width, i);
-    const int height_pu = PU_GET_H(part_mode, cu_width, i);
-
-    cu_info_t *pu  = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
-    pu->type = type;
-    lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
-  }
-}
 
 static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu)
 {
@@ -559,7 +542,7 @@ static double cu_rd_cost_tr_split_accurate(
     int cbf = cbf_is_set_any(pred_cu->cbf, depth);
     // Only need to signal coded block flag if not skipped or merged
     // skip = no coded residual, merge = coded residual
-    if (pred_cu->type != CU_INTRA && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) {
+    if (pred_cu->type != CU_INTRA && (!pred_cu->merged)) {
       CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
     }
 
@@ -876,18 +859,20 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map)
  */
 static double search_cu(
   encoder_state_t* const state,
-  int x,
-  int y,
-  int depth,
+  const cu_loc_t* const cu_loc,
   lcu_t* work_tree,
   enum uvg_tree_type
-  tree_type)
+  tree_type,
+  const split_tree_t split_tree)
 {
+  const int depth = split_tree.current_depth;
   const encoder_control_t* ctrl = state->encoder_control;
   const videoframe_t * const frame = state->tile->frame;
-  const int cu_width = tree_type != UVG_CHROMA_T ? LCU_WIDTH >> depth : LCU_WIDTH_C >> depth;
-  const int cu_height = cu_width; // TODO: height
-  const int luma_width = LCU_WIDTH >> depth;
+  const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
+  const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
+  const int x = cu_loc->x;
+  const int y = cu_loc->y;
+  const int luma_width = cu_loc->width;
   assert(cu_width >= 4);
   double cost = MAX_DOUBLE;
   double inter_zero_coeff_cost = MAX_DOUBLE;
@@ -896,7 +881,7 @@ static double search_cu(
   cabac_data_t pre_search_cabac;
   memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
 
-  const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
+  const uint32_t ctu_row = (cu_loc->y >> LOG2_LCU_WIDTH);
   const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
 
   cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS];
@@ -913,7 +898,7 @@ static double search_cu(
     int32_t max;
   } pu_depth_inter, pu_depth_intra;
 
-  lcu_t *const lcu = &work_tree[depth];
+  lcu_t *const lcu = &work_tree[split_tree.current_depth];
 
   int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T);
   int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T);
@@ -947,10 +932,9 @@ static double search_cu(
 
   cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
   // Assign correct depth
-  cur_cu->depth = (depth > MAX_DEPTH) ? MAX_DEPTH : depth;
-  cur_cu->tr_depth = (depth > 0) ? depth : 1;
+  cur_cu->depth = (split_tree.current_depth > MAX_DEPTH) ? MAX_DEPTH : split_tree.current_depth;
+  cur_cu->tr_depth = cu_width > TR_MAX_WIDTH || cu_height > TR_MAX_WIDTH ? 1 : split_tree.current_depth;
   cur_cu->type = CU_NOTSET;
-  cur_cu->part_size = SIZE_2Nx2N;
   cur_cu->qp = state->qp;
   cur_cu->bdpcmMode = 0;
   cur_cu->tr_idx = 0;
@@ -969,9 +953,9 @@ static double search_cu(
     int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max;
     bool can_use_inter =
       state->frame->slicetype != UVG_SLICE_I &&
-      depth <= MAX_DEPTH &&
+      split_tree.current_depth <= MAX_DEPTH &&
       (
-        WITHIN(depth, pu_depth_inter.min, pu_depth_inter.max) ||
+        WITHIN(split_tree.current_depth, pu_depth_inter.min, pu_depth_inter.max) ||
         // When the split was forced because the CTU is partially outside the
         // frame, we permit inter coding even if pu_depth_inter would
         // otherwise forbid it.
@@ -983,10 +967,9 @@ static double search_cu(
       double mode_cost;
       double mode_bitcost;
       uvg_search_cu_inter(state,
-                          x, y,
-                          depth,
-                          lcu,
-                          &mode_cost, &mode_bitcost);
+                          cu_loc, lcu,
+                          &mode_cost,
+                          &mode_bitcost);
       if (mode_cost < cost) {
         cost = mode_cost;
         inter_bitcost = mode_bitcost;
@@ -1004,7 +987,7 @@ static double search_cu(
 
     int32_t cu_width_intra_min = LCU_WIDTH >> pu_depth_intra.max;
     bool can_use_intra =
-      (WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) ||
+      (WITHIN(split_tree.current_depth, pu_depth_intra.min, pu_depth_intra.max) ||
         // When the split was forced because the CTU is partially outside
         // the frame, we permit intra coding even if pu_depth_intra would
         // otherwise forbid it.
@@ -1048,7 +1031,7 @@ static double search_cu(
         int8_t intra_mode = intra_search.pred_cu.intra.mode;
 
         // TODO: This heavily relies to square CUs
-        if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
+        if ((split_tree.current_depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
 
           intra_search.pred_cu.joint_cb_cr = 0;
           // There is almost no benefit to doing the chroma mode search for
@@ -1097,7 +1080,7 @@ static double search_cu(
         }
         intra_search.pred_cu.intra.mode = intra_mode;
         if(tree_type == UVG_CHROMA_T) {
-          uvg_lcu_fill_trdepth(lcu, x_local, y_local, depth, depth, tree_type);
+          uvg_lcu_fill_trdepth(lcu, x_local, y_local, split_tree.current_depth, split_tree.current_depth, tree_type);
         }
       }
       if (intra_cost < cost) {
@@ -1120,8 +1103,7 @@ static double search_cu(
       double mode_cost;
       double mode_bitcost;
       uvg_search_cu_ibc(state,
-                        x, y,
-                        depth,
+                        cu_loc,
                         lcu,
                         &mode_cost, &mode_bitcost);
       if (mode_cost < cost) {
@@ -1138,11 +1120,10 @@ static double search_cu(
     // Reconstruct best mode because we need the reconstructed pixels for
     // mode search of adjacent CUs.
     if (cur_cu->type == CU_INTRA) {
-      assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
 
       bool recon_chroma = true;
       bool recon_luma = tree_type != UVG_CHROMA_T;
-      if ((depth == 4) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
+      if ((split_tree.current_depth == 4) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
         recon_chroma = false; 
       }
       lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@@ -1153,7 +1134,7 @@ static double search_cu(
                          lcu, tree_type,recon_luma,recon_chroma);
 
 
-      if(depth == 4 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
+      if(split_tree.current_depth == 4 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
         intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
         uvg_intra_recon_cu(state,
                            x, y,
@@ -1168,8 +1149,8 @@ static double search_cu(
       const int split_type = intra_search.pred_cu.intra.isp_mode;
       const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true);
 
-      const int cbf_cb = cbf_is_set(cur_cu->cbf, depth, COLOR_U);
-      const int cbf_cr = cbf_is_set(cur_cu->cbf, depth, COLOR_V);
+      const int cbf_cb = cbf_is_set(cur_cu->cbf, split_tree.current_depth, COLOR_U);
+      const int cbf_cr = cbf_is_set(cur_cu->cbf, split_tree.current_depth, COLOR_V);
       const int jccr = cur_cu->joint_cb_cr;
       for (int i = 0; i < split_num; ++i) {
         cu_loc_t isp_loc;
@@ -1181,15 +1162,14 @@ static double search_cu(
         uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y);
         cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH);
         bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1;
-        // ISP_TODO: here, cbfs are also set for chroma for all ISP splits, is this behavior wanted?
-        cbf_clear(&split_cu->cbf, depth, COLOR_Y);
-        cbf_clear(&split_cu->cbf, depth, COLOR_U);
-        cbf_clear(&split_cu->cbf, depth, COLOR_V);
+        cbf_clear(&split_cu->cbf, split_tree.current_depth, COLOR_Y);
+        cbf_clear(&split_cu->cbf, split_tree.current_depth, COLOR_U);
+        cbf_clear(&split_cu->cbf, split_tree.current_depth, COLOR_V);
         if (cur_cbf) {
-          cbf_set(&split_cu->cbf, depth, COLOR_Y);
+          cbf_set(&split_cu->cbf, split_tree.current_depth, COLOR_Y);
         }
-        if(cbf_cb) cbf_set(&split_cu->cbf, depth, COLOR_U);
-        if(cbf_cr) cbf_set(&split_cu->cbf, depth, COLOR_V);
+        if(cbf_cb) cbf_set(&split_cu->cbf, split_tree.current_depth, COLOR_U);
+        if(cbf_cr) cbf_set(&split_cu->cbf, split_tree.current_depth, COLOR_V);
         split_cu->joint_cb_cr = jccr;
       }
       lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@@ -1205,24 +1185,20 @@ static double search_cu(
         }
         // Reset transform depth because intra messes with them.
         // This will no longer be necessary if the transform depths are not shared.
-        int tr_depth = MAX(1, depth);
-        if (cur_cu->part_size != SIZE_2Nx2N) {
-          tr_depth = depth + 1;
-        }
+        int tr_depth = MAX(1, split_tree.current_depth);
+
         uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type);
 
         const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
-        uvg_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma);
+        uvg_inter_recon_cu(state, lcu, true, has_chroma, cu_loc);
 
         if (ctrl->cfg.zero_coeff_rdo && !ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) {
           //Calculate cost for zero coeffs
-          inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda;
+          inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, split_tree.current_depth) + inter_bitcost * state->lambda;
 
         }
         cu_loc_t loc;
-        const int width = LCU_WIDTH >> depth;
-        const int height = width; // TODO: height for non-square blocks
-        uvg_cu_loc_ctor(&loc, x, y, width, height);
+        uvg_cu_loc_ctor(&loc, x, y, cu_width, cu_height);
         uvg_quantize_lcu_residual(state,
                                   true, has_chroma && !cur_cu->joint_cb_cr,
                                   cur_cu->joint_cb_cr, &loc,
@@ -1232,9 +1208,9 @@ static double search_cu(
                                   false,
           tree_type);
 
-        int cbf = cbf_is_set_any(cur_cu->cbf, depth);
+        int cbf = cbf_is_set_any(cur_cu->cbf, split_tree.current_depth);
 
-        if (cur_cu->merged && !cbf && cur_cu->part_size == SIZE_2Nx2N) {
+        if (cur_cu->merged && !cbf) {
           cur_cu->merged = 0;
           cur_cu->skipped = 1;
           // Selecting skip reduces bits needed to code the CU
@@ -1244,7 +1220,7 @@ static double search_cu(
           inter_bitcost += cur_cu->merge_idx;        
         }
       }
-      lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type);
+      lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
       lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
     }
   }
@@ -1253,19 +1229,13 @@ static double search_cu(
     double bits = 0;
     cabac_data_t* cabac  = &state->search_cabac;
     cabac->update = 1;
+    
+    bits += uvg_mock_encode_coding_unit(
+      state,
+      cabac,
+      cu_loc, lcu, cur_cu,
+      tree_type);
 
-    if(cur_cu->type != CU_INTRA || cur_cu->part_size == SIZE_2Nx2N) {
-      bits += uvg_mock_encode_coding_unit(
-        state,
-        cabac,
-        x, y, depth,
-        lcu,
-        cur_cu,
-        tree_type);
-    }
-    else {
-      assert(0);
-    }
     
     cost = bits * state->lambda;
 
@@ -1275,15 +1245,15 @@ static double search_cu(
       cost = inter_zero_coeff_cost;
 
       // Restore saved pixels from lower level of the working tree.
-      copy_cu_pixels(x_local, y_local, cu_width, &work_tree[depth + 1], lcu, tree_type);
+      copy_cu_pixels(x_local, y_local, cu_width, &work_tree[split_tree.current_depth + 1], lcu, tree_type);
 
-      if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
+      if (cur_cu->merged) {
         cur_cu->merged = 0;
         cur_cu->skipped = 1;
         lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
       }
 
-      if (cur_cu->tr_depth != depth) {
+      if (cur_cu->tr_depth != 0) {
         // Reset transform depth since there are no coefficients. This
         // ensures that CBF is cleared for the whole area of the CU.
         uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type);
@@ -1299,12 +1269,12 @@ static double search_cu(
     // If the CU is partially outside the frame, we need to split it even
     // if pu_depth_intra and pu_depth_inter would not permit it.
     cur_cu->type == CU_NOTSET ||
-    (depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != UVG_SLICE_I)) ||
+    (split_tree.current_depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != UVG_SLICE_I)) ||
     (state->frame->slicetype != UVG_SLICE_I &&
-      depth < pu_depth_inter.max);
+      split_tree.current_depth < pu_depth_inter.max);
 
   if(state->encoder_control->cabac_debug_file) {
-    fprintf(state->encoder_control->cabac_debug_file, "S %4d %4d %d %d", x, y, depth, tree_type);
+    fprintf(state->encoder_control->cabac_debug_file, "S %4d %4d %d %d", x, y, split_tree.current_depth, tree_type);
     fwrite(&state->search_cabac.ctx, 1,  sizeof(state->search_cabac.ctx), state->encoder_control->cabac_debug_file);
   }
 
@@ -1312,7 +1282,7 @@ static double search_cu(
   if (can_split_cu) {
     int half_cu = cu_width >> (tree_type != UVG_CHROMA_T);
     double split_cost = 0.0;
-    int cbf = cbf_is_set_any(cur_cu->cbf, depth);
+    int cbf = cbf_is_set_any(cur_cu->cbf, split_tree.current_depth);
     cabac_data_t post_seach_cabac;
     memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
     memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
@@ -1320,7 +1290,7 @@ static double search_cu(
 
     double split_bits = 0;
 
-    if (depth < MAX_DEPTH) {
+    if (split_tree.current_depth < MAX_DEPTH) {
 
       state->search_cabac.update = 1;
       // Add cost of cu_split_flag.
@@ -1364,10 +1334,24 @@ static double search_cu(
     // It is ok to interrupt the search as soon as it is known that
     // the split costs at least as much as not splitting.
     if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
-      if (split_cost < cost) split_cost += search_cu(state, x,           y,           depth + 1, work_tree, tree_type);
-      if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y,           depth + 1, work_tree, tree_type);
-      if (split_cost < cost) split_cost += search_cu(state, x,           y + half_cu, depth + 1, work_tree, tree_type);
-      if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y + half_cu, depth + 1, work_tree, tree_type);
+      const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << split_tree.current_depth, split_tree.current_depth + 1};
+      cu_loc_t new_cu_loc;
+      if (split_cost < cost) {
+        uvg_cu_loc_ctor(&new_cu_loc, x, y, half_cu, half_cu);
+        split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
+      }
+      if (split_cost < cost) {
+        uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_cu, half_cu);
+        split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
+      }
+      if (split_cost < cost) {
+        uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_cu, half_cu);
+        split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
+      }
+      if (split_cost < cost) {
+        uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_cu, half_cu);
+        split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
+      }
     } else {
       split_cost = INT_MAX;
     }
@@ -1401,7 +1385,6 @@ static double search_cu(
 
         cur_cu->intra = cu_d1->intra;
         cur_cu->type = CU_INTRA;
-        cur_cu->part_size = SIZE_2Nx2N;
 
         // Disable MRL in this case
         cur_cu->intra.multi_ref_idx = 0;
@@ -1687,14 +1670,17 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
 
   int tree_type = state->frame->slicetype == UVG_SLICE_I
   && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
+
+  cu_loc_t start;
+  uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH);
+  split_tree_t split_tree = { 0, 0 };
   // Start search from depth 0.
   double cost = search_cu(
-    state,
-    x,
-    y,
-    0,
+    state, 
+    &start,
     work_tree,
-    tree_type);
+    tree_type,
+    split_tree);
 
   // Save squared cost for rate control.
   if(state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) {
@@ -1710,12 +1696,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
 
   if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) {
     cost = search_cu(
-      state,
-      x,
-      y,
-      0,
+      state, &start,
       work_tree,
-      UVG_CHROMA_T);
+      UVG_CHROMA_T, split_tree);
 
     if (state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) {
       uvg_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight += cost * cost;
diff --git a/src/search_ibc.c b/src/search_ibc.c
index 44f9ac50..b7067c8c 100644
--- a/src/search_ibc.c
+++ b/src/search_ibc.c
@@ -109,8 +109,10 @@ static INLINE bool fracmv_within_ibc_range(const ibc_search_info_t *info, int x,
 }
 
 
-static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y)
+static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, const cu_loc_t* loc, int32_t mv_x, int32_t mv_y)
 {  
+  const uint32_t x = loc->x;
+  const uint32_t y = loc->y;
   const int x_scu    = SUB_SCU(x);
   const int y_scu    = SUB_SCU(y);
 
@@ -132,9 +134,11 @@ static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu
   cur_cu->inter.mv[0][0]                  = mv_x * (1 << INTERNAL_MV_PREC);;
   cur_cu->inter.mv[0][1]                  = mv_y * (1 << INTERNAL_MV_PREC);;
 
-  uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400);
+  uvg_inter_recon_cu(state, lcu, true, state->encoder_control->chroma_format != UVG_CSP_400, loc);
   
   *cur_cu = cu_backup;
+  uint32_t width = loc->width;
+  uint32_t height = loc->height;
 
   cost = uvg_satd_any_size(width,
                            width,
@@ -162,8 +166,10 @@ static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu
 }
 
 
-static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_sad_func_ptr_t optimized_sad, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y)
+static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_sad_func_ptr_t optimized_sad, lcu_t* lcu, const cu_loc_t* loc, int32_t mv_x, int32_t mv_y)
 {  
+  const uint32_t x = loc->x;
+  const uint32_t y = loc->y;
   cu_info_t *cur_cu    = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
 
   cu_info_t cu_backup  = *cur_cu;
@@ -173,6 +179,8 @@ static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_s
   const int y_scu    = SUB_SCU(y);
   const uint32_t offset = x_scu + y_scu * LCU_WIDTH;
   const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
+  const uint32_t width = loc->width;
+  const uint32_t height = loc->height;
 
   cur_cu->type    = CU_IBC;
   cur_cu->inter.mv_dir   = 1;
@@ -183,7 +191,7 @@ static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_s
   cur_cu->inter.mv[0][0]                  = mv_x * (1 << INTERNAL_MV_PREC);;
   cur_cu->inter.mv[0][1]                  = mv_y * (1 << INTERNAL_MV_PREC);;
 
-  uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400);
+  uvg_inter_recon_cu(state, lcu, true, state->encoder_control->chroma_format != UVG_CSP_400, loc);
   
   *cur_cu = cu_backup;
 
@@ -235,8 +243,11 @@ static bool check_mv_cost(ibc_search_info_t *info,
 
   double bitcost = 0;
   double cost    = MAX_DOUBLE;
+  cu_loc_t loc;
+  uvg_cu_loc_ctor(&loc, info->origin.x, info->origin.y, info->width, info->height);
 
-  cost = calculate_ibc_cost_sad(info->state, info->optimized_sad, info->lcu, info->origin.x, info->origin.y, info->width, x, y);
+
+  cost = calculate_ibc_cost_sad(info->state, info->optimized_sad, info->lcu, &loc, x, y);
 
   if (cost >= *best_cost) return false;
 
@@ -246,7 +257,7 @@ static bool check_mv_cost(ibc_search_info_t *info,
       info->mv_cand,
       NULL,
       0,
-      NULL,
+      0,
       &bitcost
   );
 
@@ -782,63 +793,46 @@ static bool merge_candidate_in_list(inter_merge_cand_t *all_cands,
  * \param amvp        Return searched AMVP PUs sorted by costs
  * \param merge       Return searched Merge PUs sorted by costs
  */
-static void search_pu_ibc(encoder_state_t * const state,
-  int x_cu, int y_cu,
-  int depth,
-  part_mode_t part_mode,
-  int i_pu,
-  unit_stats_map_t *amvp,
-  unit_stats_map_t *merge,
-  ibc_search_info_t *info)
+static void search_pu_ibc(
+  encoder_state_t * const state,
+  const cu_loc_t * const  cu_loc,
+  unit_stats_map_t       *amvp,
+  unit_stats_map_t       *merge,
+  ibc_search_info_t      *info)
 {
-  const uvg_config *cfg = &state->encoder_control->cfg;
-  const videoframe_t * const frame = state->tile->frame;
-  const int width_cu = LCU_WIDTH >> depth;
-  const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
-  const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
-  const int width = PU_GET_W(part_mode, width_cu, i_pu);
-  const int height = PU_GET_H(part_mode, width_cu, i_pu);
-
-  // Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
-  // nRx2N partitions.
-  const bool merge_a1 = i_pu == 0 || width >= height;
-  // Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
-  // 2NxnD partitions.
-  const bool merge_b1 = i_pu == 0 || width <= height;
-
+  const uvg_config          *cfg      = &state->encoder_control->cfg;
+  const videoframe_t * const frame    = state->tile->frame;
+  const int                  width_cu = cu_loc->width;
+  const int                  height_cu= cu_loc->height;
 
   lcu_t                     *lcu      = info->lcu;
-  const int x_local = SUB_SCU(x);
-  const int y_local = SUB_SCU(y);
-  cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
-  cur_pu->type = CU_IBC;
-  cur_pu->part_size = part_mode;
-  cur_pu->depth = depth;
-  cur_pu->tr_depth = depth;
-  cur_pu->qp = state->qp;
-  cur_pu->inter.mv_dir = 1;
+  const int                  x_local  = SUB_SCU(cu_loc->x);
+  const int                  y_local  = SUB_SCU(cu_loc->y);
+  cu_info_t                 *cur_pu   = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
+  cur_pu->type                        = CU_IBC;
+  cur_pu->qp                          = state->qp;
+  cur_pu->inter.mv_dir                = 1;
 
   // Default to candidate 0
   CU_SET_MV_CAND(cur_pu, 0, 0);
-  
+
   FILL(*info, 0);
 
-  info->state          = state;
-  info->pic            = frame->source;
-  info->origin.x       = x;
-  info->origin.y       = y;
-  info->width          = width;
-  info->height         = height;
-  info->mvd_cost_func  = cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
-  info->optimized_sad  = uvg_get_optimized_sad(width);
-  info->lcu            = lcu;
+  info->state    = state;
+  info->pic      = frame->source;
+  info->origin.x = cu_loc->x;
+  info->origin.y = cu_loc->y;
+  info->width    = width_cu;
+  info->height   = height_cu;
+  info->mvd_cost_func =
+    cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
+  info->optimized_sad = uvg_get_optimized_sad(width_cu);
+  info->lcu           = lcu;
 
   // Search for merge mode candidates
   info->num_merge_cand = uvg_inter_get_merge_cand(
                           state,
-                          x, y,
-                          width, height,
-                          merge_a1, merge_b1,
+                          cu_loc,
                           info->merge_cand,
                           lcu);
 
@@ -853,7 +847,7 @@ static void search_pu_ibc(encoder_state_t * const state,
 #ifdef COMPLETE_PRED_MODE_BITS
   // Technically counting these bits would be correct, however counting
   // them universally degrades quality so this block is disabled by default
-  const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(x, y, lcu, NULL)], 0);
+  const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL)], 0);
 #else
   const double no_skip_flag = 0;
 #endif
@@ -875,7 +869,7 @@ static void search_pu_ibc(encoder_state_t * const state,
     {
       continue;
     }
-    uvg_inter_pred_pu(state, info->lcu, x_cu, y_cu, width_cu, true, false, i_pu);
+    uvg_inter_pred_pu(state, info->lcu, true, false, cu_loc);
     merge->unit[merge->size] = *cur_pu;
     merge->unit[merge->size].type = CU_IBC;
     merge->unit[merge->size].merge_idx = merge_idx;
@@ -883,11 +877,11 @@ static void search_pu_ibc(encoder_state_t * const state,
     merge->unit[merge->size].skipped = false;
 
     double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
-    if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
-      uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits);
+    if(state->encoder_control->cfg.rdo >= 2) {
+      uvg_cu_cost_inter_rd2(state, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits, cu_loc);
     }
     else {
-      merge->cost[merge->size] = uvg_satd_any_size(width, height,
+      merge->cost[merge->size] = uvg_satd_any_size(width_cu, height_cu,
         lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
         lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
       bits += no_skip_flag;
@@ -909,7 +903,7 @@ static void search_pu_ibc(encoder_state_t * const state,
     
   // Early Skip Mode Decision
   bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
-  if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) {
+  if (cfg->early_skip) {
     for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) {
       if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) {
         merge->size = 1;
@@ -919,6 +913,7 @@ static void search_pu_ibc(encoder_state_t * const state,
         merge->keys[0] = 0;
       }
       else if(cfg->rdo < 2) {
+        const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
         // Reconstruct blocks with merge candidate.
         // Check luma CBF. Then, check chroma CBFs if luma CBF is not set
         // and chroma exists.
@@ -927,18 +922,18 @@ static void search_pu_ibc(encoder_state_t * const state,
         cur_pu->inter.mv_dir    = info->merge_cand[merge_idx].dir;
         cur_pu->inter.mv[0][0]  = info->merge_cand[merge_idx].mv[0][0];
         cur_pu->inter.mv[0][1]  = info->merge_cand[merge_idx].mv[0][1];
-        uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T);
-        uvg_inter_recon_cu(state, lcu, x, y, width, true, false);
-        uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
+        uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, MAX(1, depth), UVG_BOTH_T);
+        uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
+        uvg_quantize_lcu_residual(state, true, false, false, cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
 
         if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
           continue;
         }
         else if (has_chroma) {
-          uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
+          uvg_inter_recon_cu(state, lcu, false, has_chroma, cu_loc);
           uvg_quantize_lcu_residual(state, false, has_chroma, 
             false, /*we are only checking for lack of coeffs so no need to check jccr*/
-            x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
+            cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
           if (!cbf_is_set_any(cur_pu->cbf, depth)) {
             cur_pu->type = CU_IBC;
             cur_pu->merge_idx = merge_idx;
@@ -964,15 +959,12 @@ static void search_pu_ibc(encoder_state_t * const state,
 
   // Do the motion search
 
-  uvg_inter_get_mv_cand(info->state,
-    info->origin.x,
-    info->origin.y,
-    info->width,
-    info->height,
+  uvg_inter_get_mv_cand(info->state,    
     info->mv_cand,
     cur_pu,
     lcu,
-    NULL);
+    0,
+    cu_loc);
 
   vector2d_t best_mv = { 0, 0 };
 
@@ -1003,9 +995,7 @@ static void search_pu_ibc(encoder_state_t * const state,
     best_cost = calculate_ibc_cost_satd(
       info->state,
       lcu,
-      info->origin.x,
-      info->origin.y,
-      info->width,
+      cu_loc,
       (best_mv.x >> INTERNAL_MV_PREC),
       (best_mv.y >> INTERNAL_MV_PREC));
     best_cost += best_bits * info->state->lambda;
@@ -1052,16 +1042,16 @@ static void search_pu_ibc(encoder_state_t * const state,
   };
 
 
-  if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
-    if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]);    
+  if (state->encoder_control->cfg.rdo >= 2) {
+    if (amvp[0].size) uvg_cu_cost_inter_rd2(state, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]], cu_loc);    
   }
 
 
   if(cfg->rdo < 2) {
     int predmode_ctx;
 
-    const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1) * 3;
-    const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx);
+    const float ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1);
+    const int skip_contest = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, &predmode_ctx);
     const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0);
 
     const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0);
@@ -1077,33 +1067,29 @@ static void search_pu_ibc(encoder_state_t * const state,
 #include "threads.h"
 
 static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
-  int x, int y, int depth,
+  const cu_loc_t* cu_loc,
   lcu_t* lcu,
   double* inter_cost,
   double* inter_bitcost)
 {
-  const int x_cu = x;
-  const int y_cu = y;
+  const int x_cu = cu_loc->x;
+  const int y_cu = cu_loc->y;
   const int part_mode = SIZE_2Nx2N;
   const uvg_config          *cfg      = &state->encoder_control->cfg;
   const videoframe_t * const frame    = state->tile->frame;
-  const int                  width_cu = LCU_WIDTH >> depth;
-  const int                  width    = PU_GET_W(part_mode, width_cu, 0);
-  const int                  height   = PU_GET_H(part_mode, width_cu, 0);
+  const int                  width_cu = cu_loc->width;
+  const int                  height_cu = cu_loc->height;
 
   const bool                 merge_a1  = true;
   const bool                 merge_b1  = true;
 
   ibc_search_info_t info;
 
-  const int  x_local = SUB_SCU(x);
-  const int  y_local = SUB_SCU(y);
+  const int  x_local = SUB_SCU(x_cu);
+  const int  y_local = SUB_SCU(y_cu);
   cu_info_t *cur_pu  = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
 
   cur_pu->type       = CU_IBC;
-  cur_pu->part_size  = part_mode;
-  cur_pu->depth      = depth;
-  cur_pu->tr_depth   = depth;
   cur_pu->qp         = state->qp;
 
   // Default to candidate 0
@@ -1113,22 +1099,19 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
 
   info.state    = state;
   info.pic      = frame->source;
-  info.origin.x = x;
-  info.origin.y = y;
-  info.width    = width;
-  info.height   = height;
+  info.origin.x = cu_loc->x;
+  info.origin.y = cu_loc->y;
+  info.width    = width_cu;
+  info.height   = height_cu;
   info.mvd_cost_func =
     cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
-  info.optimized_sad  = uvg_get_optimized_sad(width);
+  info.optimized_sad  = uvg_get_optimized_sad(width_cu);
   info.lcu            = lcu;
 
   // Search for merge mode candidates
   info.num_merge_cand = uvg_inter_get_merge_cand(
     state,
-    x,
-    y,
-    width,
-    height,
+    cu_loc,
     merge_a1,
     merge_b1,
     info.merge_cand,
@@ -1154,8 +1137,8 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
   UVG_CLOCK_T   hashmap_end_real_time;
   UVG_GET_TIME(&hashmap_start_real_time);
 
-  int           xx  = x;
-  int           yy  = y;
+  int           xx  = x_cu;
+  int           yy  = y_cu;
 
   int           best_mv_x    = INT_MAX>>2;
   int           best_mv_y    = INT_MAX>>2;
@@ -1185,12 +1168,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
       int pos_y = result->value & 0xffff;
       int mv_x = pos_x - xx;
       int mv_y = pos_y - yy;
-      if (pos_x <= xx - width && pos_y <= yy - height) {
+      if (pos_x <= xx - width_cu && pos_y <= yy - height_cu) {
         valid_mv = intmv_within_ibc_range(&info, mv_x, mv_y);
         if (valid_mv) {
           bool full_block = true; // Is the full block covered by the IBC?
-          for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width; offset_x+=UVG_HASHMAP_BLOCKSIZE) {
-            for (int offset_y = 0; offset_y < height; offset_y += UVG_HASHMAP_BLOCKSIZE) {
+          for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width_cu; offset_x+=UVG_HASHMAP_BLOCKSIZE) {
+            for (int offset_y = 0; offset_y < height_cu; offset_y += UVG_HASHMAP_BLOCKSIZE) {
               uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[
                 ((yy+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (xx+offset_x) / UVG_HASHMAP_BLOCKSIZE];
 
@@ -1220,7 +1203,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
               best_mv_y              = mv_y;
               ibc_cost               = cost;
               ibc_bitcost            = bits;
-              fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y);
+              fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x_cu,y_cu, width_cu,height_cu, mv_x, mv_y);
               found_block = true;
               //break;
             }
@@ -1274,11 +1257,9 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
   uvg_inter_recon_cu(
     state,
     lcu,
-    x,
-    y,
-    CU_WIDTH_FROM_DEPTH(depth),
     true,
-    state->encoder_control->chroma_format != UVG_CSP_400);
+    state->encoder_control->chroma_format != UVG_CSP_400,
+    cu_loc);
 
   if (*inter_cost < MAX_DOUBLE) {
     assert(fracmv_within_ibc_range(
@@ -1305,17 +1286,18 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
  * \param inter_bitcost Return inter bitcost
  */
 void uvg_search_cu_ibc(encoder_state_t * const state,
-                         int x, int y, int depth,
-                         lcu_t *lcu,
-                         double   *inter_cost,
-                         double* inter_bitcost)
+                       const cu_loc_t * const  cu_loc,
+                       lcu_t *lcu,
+                       double   *inter_cost,
+                       double* inter_bitcost)
 {
   *inter_cost = MAX_DOUBLE;
   *inter_bitcost = MAX_INT;
+
    // Quick hashmap search
   /* uvg_search_hash_cu_ibc(
     state,
-                          x, y, depth,
+                          cu_loc,
                           lcu,
                           inter_cost,
                           inter_bitcost);
@@ -1330,7 +1312,7 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
   info.lcu = lcu;
 
   search_pu_ibc(state,
-                  x, y, depth,
+                  cu_loc,
                   SIZE_2Nx2N, 0,
                   amvp,
                   &merge,
@@ -1374,14 +1356,14 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
     return;
   }
 
-  const int x_local = SUB_SCU(x);
-  const int y_local = SUB_SCU(y);
+  const int  x_local = SUB_SCU(cu_loc->x);
+  const int  y_local = SUB_SCU(cu_loc->y);
   cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
   *cur_pu = *best_inter_pu;
   cur_pu->type       = CU_IBC;
 
-  uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth),
-    true, state->encoder_control->chroma_format != UVG_CSP_400);   
+  uvg_inter_recon_cu(state, lcu, 
+    true, state->encoder_control->chroma_format != UVG_CSP_400, cu_loc);   
 
   if (*inter_cost < MAX_DOUBLE) {    
     assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));
diff --git a/src/search_ibc.h b/src/search_ibc.h
index 14ce3b6f..b3c4e544 100644
--- a/src/search_ibc.h
+++ b/src/search_ibc.h
@@ -46,7 +46,7 @@
 
 
 void uvg_search_cu_ibc(encoder_state_t * const state,
-                         int x, int y, int depth,
+                         const cu_loc_t * const  cu_loc,
                          lcu_t *lcu,
                          double *inter_cost,
                          double* inter_bitcost);
diff --git a/src/search_inter.c b/src/search_inter.c
index 93598ff2..53587b84 100644
--- a/src/search_inter.c
+++ b/src/search_inter.c
@@ -1293,8 +1293,8 @@ static void apply_mv_scaling(int32_t current_poc,
 /**
  * \brief Perform inter search for a single reference frame.
  */
-static void search_pu_inter_ref(inter_search_info_t *info,
-  int depth,
+static void search_pu_inter_ref(
+  inter_search_info_t *info,
   lcu_t *lcu,
   cu_info_t *cur_cu,
   unit_stats_map_t *amvp)
@@ -1327,15 +1327,15 @@ static void search_pu_inter_ref(inter_search_info_t *info,
   // Get MV candidates
   cur_cu->inter.mv_ref[ref_list] = ref_list_idx[ref_list];
 
+  cu_loc_t cu_loc;
+  uvg_cu_loc_ctor(&cu_loc, info->origin.x, info->origin.y, info->width, info->height);
+
   uvg_inter_get_mv_cand(info->state,
-    info->origin.x,
-    info->origin.y,
-    info->width,
-    info->height,
-    info->mv_cand,
-    cur_cu,
-    lcu,
-    ref_list);
+                        info->mv_cand,
+                        cur_cu,
+                        lcu,
+                        ref_list,
+                        &cu_loc);
 
   vector2d_t best_mv = { 0, 0 };
 
@@ -1498,11 +1498,13 @@ static void search_pu_inter_ref(inter_search_info_t *info,
 /**
  * \brief Search bipred modes for a PU.
  */
-static void search_pu_inter_bipred(inter_search_info_t *info,
-                                   int depth,
-                                   lcu_t *lcu,
-                                   unit_stats_map_t *amvp_bipred)
+static void search_pu_inter_bipred(
+  inter_search_info_t *info,
+  lcu_t *lcu,
+  unit_stats_map_t *amvp_bipred)
 {
+  cu_loc_t cu_loc;
+  uvg_cu_loc_ctor(&cu_loc, info->origin.x, info->origin.y, info->width, info->height);
   const image_list_t *const ref = info->state->frame->ref;
   uint8_t (*ref_LX)[16] = info->state->frame->ref_LX;
   const videoframe_t * const frame = info->state->tile->frame;
@@ -1551,7 +1553,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
     bipred_pu->skipped = false;
 
     for (int reflist = 0; reflist < 2; reflist++) {
-      uvg_inter_get_mv_cand(info->state, x, y, width, height, info->mv_cand, bipred_pu, lcu, reflist);
+      uvg_inter_get_mv_cand(info->state, info->mv_cand, bipred_pu, lcu, reflist, &cu_loc);
     }
 
     // Don't try merge candidates that don't satisfy mv constraints.
@@ -1564,13 +1566,11 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
     uvg_inter_recon_bipred(info->state,
                            ref->images[ref_LX[0][merge_cand[i].ref[0]]],
                            ref->images[ref_LX[1][merge_cand[j].ref[1]]],
-                           x, y,
-                           width,
-                           height,
                            mv,
                            lcu,
                            true,
-                           false);
+                           false,
+                           &cu_loc);
 
     const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
     const uvg_pixel *src = &frame->source->y[x + y * frame->source->stride];
@@ -1666,11 +1666,9 @@ static bool merge_candidate_in_list(inter_merge_cand_t *all_cands,
  * \param amvp        Return searched AMVP PUs sorted by costs
  * \param merge       Return searched Merge PUs sorted by costs
  */
-static void search_pu_inter(encoder_state_t * const state,
-  int x_cu, int y_cu,
-  int depth,
-  part_mode_t part_mode,
-  int i_pu,
+static void search_pu_inter(
+  encoder_state_t * const state,
+  const cu_loc_t* const cu_loc,
   lcu_t *lcu,
   unit_stats_map_t *amvp,
   unit_stats_map_t *merge,
@@ -1678,26 +1676,14 @@ static void search_pu_inter(encoder_state_t * const state,
 {
   const uvg_config *cfg = &state->encoder_control->cfg;
   const videoframe_t * const frame = state->tile->frame;
-  const int width_cu = LCU_WIDTH >> depth;
-  const int height_cu = width_cu; // TODO: non-square blocks
-  const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
-  const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
-  const int width = PU_GET_W(part_mode, width_cu, i_pu);
-  const int height = PU_GET_H(part_mode, width_cu, i_pu);
+  const int width_cu = cu_loc->width;
+  const int height_cu = cu_loc->height; 
 
-  // Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
-  // nRx2N partitions.
-  const bool merge_a1 = i_pu == 0 || width >= height;
-  // Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
-  // 2NxnD partitions.
-  const bool merge_b1 = i_pu == 0 || width <= height;
 
-  const int x_local = SUB_SCU(x);
-  const int y_local = SUB_SCU(y);
+  const int x_local = SUB_SCU(cu_loc->x);
+  const int y_local = SUB_SCU(cu_loc->y);
   cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
   cur_pu->type = CU_NOTSET;
-  cur_pu->part_size = part_mode;
-  cur_pu->depth = depth;
   cur_pu->qp = state->qp;
 
   // Default to candidate 0
@@ -1708,19 +1694,17 @@ static void search_pu_inter(encoder_state_t * const state,
 
   info->state          = state;
   info->pic            = frame->source;
-  info->origin.x       = x;
-  info->origin.y       = y;
-  info->width          = width;
-  info->height         = height;
+  info->origin.x       = cu_loc->x;
+  info->origin.y       = cu_loc->y;
+  info->width          = width_cu;
+  info->height         = height_cu;
   info->mvd_cost_func  = cfg->mv_rdo ? uvg_calc_mvd_cost_cabac : calc_mvd_cost;
-  info->optimized_sad  = uvg_get_optimized_sad(width);
+  info->optimized_sad  = uvg_get_optimized_sad(width_cu);
 
   // Search for merge mode candidates
   info->num_merge_cand = uvg_inter_get_merge_cand(
       state,
-      x, y,
-      width, height,
-      merge_a1, merge_b1,
+      cu_loc,
       info->merge_cand,
       lcu
   );
@@ -1755,7 +1739,7 @@ static void search_pu_inter(encoder_state_t * const state,
     // If bipred is not enabled, do not try candidates with mv_dir == 3.
     // Bipred is also forbidden for 4x8 and 8x4 blocks by the standard. 
     if (cur_pu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
-    if (cur_pu->inter.mv_dir == 3 && !(width + height > 12)) continue;
+    if (cur_pu->inter.mv_dir == 3 && !(cu_loc->width + cu_loc->height > 12)) continue;
 
     bool is_duplicate = merge_candidate_in_list(info->merge_cand, cur_cand, merge);
 
@@ -1769,7 +1753,7 @@ static void search_pu_inter(encoder_state_t * const state,
     {
       continue;
     }
-    uvg_inter_pred_pu(state, lcu, x_cu, y_cu, width_cu, true, false, i_pu);
+    uvg_inter_pred_pu(state, lcu, true, false, cu_loc);
     merge->unit[merge->size] = *cur_pu;
     merge->unit[merge->size].type = CU_INTER;
     merge->unit[merge->size].merge_idx = merge_idx;
@@ -1777,11 +1761,11 @@ static void search_pu_inter(encoder_state_t * const state,
     merge->unit[merge->size].skipped = false;
 
     double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
-    if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
-      uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits);
+    if(state->encoder_control->cfg.rdo >= 2) {
+      uvg_cu_cost_inter_rd2(state, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits, cu_loc);
     }
     else {
-      merge->cost[merge->size] = uvg_satd_any_size(width, height,
+      merge->cost[merge->size] = uvg_satd_any_size(cu_loc->width, cu_loc->height,
         lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
         lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
       bits += no_skip_flag;
@@ -1803,7 +1787,7 @@ static void search_pu_inter(encoder_state_t * const state,
     
   // Early Skip Mode Decision
   bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
-  if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) {
+  if (cfg->early_skip) {
     for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) {
       if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) {
         merge->size = 1;
@@ -1813,6 +1797,8 @@ static void search_pu_inter(encoder_state_t * const state,
         merge->keys[0] = 0;
       }
       else if(cfg->rdo < 2) {
+
+        const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
         // Reconstruct blocks with merge candidate.
         // Check luma CBF. Then, check chroma CBFs if luma CBF is not set
         // and chroma exists.
@@ -1825,23 +1811,20 @@ static void search_pu_inter(encoder_state_t * const state,
         cur_pu->inter.mv[0][1]  = info->merge_cand[merge_idx].mv[0][1];
         cur_pu->inter.mv[1][0]  = info->merge_cand[merge_idx].mv[1][0];
         cur_pu->inter.mv[1][1]  = info->merge_cand[merge_idx].mv[1][1];
-        uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T);
-        uvg_inter_recon_cu(state, lcu, x, y, width, true, false);
+        uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, MAX(1, depth), UVG_BOTH_T);
+        uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
 
-        cu_loc_t loc;
-        uvg_cu_loc_ctor(&loc, x, y, width_cu, height_cu);
-
-        uvg_quantize_lcu_residual(state, true, false, false, &loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
+        uvg_quantize_lcu_residual(state, true, false, false, cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
 
         if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
           continue;
         }
         else if (has_chroma) {
-          uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
+          uvg_inter_recon_cu(state, lcu, false, has_chroma, cu_loc);
           uvg_quantize_lcu_residual(state,
                                     false, has_chroma,
                                     false, /*we are only checking for lack of coeffs so no need to check jccr*/
-                                    &loc, depth, cur_pu, lcu,
+                                    cu_loc, depth, cur_pu, lcu,
                                     true,
             UVG_BOTH_T);
           if (!cbf_is_set_any(cur_pu->cbf, depth)) {
@@ -1876,7 +1859,7 @@ static void search_pu_inter(encoder_state_t * const state,
     info->ref_idx = ref_idx;
     info->ref = state->frame->ref->images[ref_idx];
 
-    search_pu_inter_ref(info, depth, lcu, cur_pu, amvp);
+    search_pu_inter_ref(info, lcu, cur_pu, amvp);
   }
 
   assert(amvp[0].size <= MAX_UNIT_STATS_MAP_SIZE);
@@ -1941,14 +1924,11 @@ static void search_pu_inter(encoder_state_t * const state,
         info->ref = ref->images[info->ref_idx];
 
         uvg_inter_get_mv_cand(info->state,
-          info->origin.x,
-          info->origin.y,
-          info->width,
-          info->height,
-          info->mv_cand,
-          unipred_pu,
-          lcu,
-          list);
+                              info->mv_cand,
+                              unipred_pu,
+                              lcu,
+                              list,
+                              cu_loc);
 
         double     frac_cost = MAX_DOUBLE;
         double   frac_bits = MAX_INT;
@@ -1969,8 +1949,8 @@ static void search_pu_inter(encoder_state_t * const state,
           unipred_pu->inter.mv[list][1] = frac_mv.y;
           CU_SET_MV_CAND(unipred_pu, list, cu_mv_cand);
 
-          if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
-            uvg_cu_cost_inter_rd2(state, x, y, depth, unipred_pu, lcu, &frac_cost, &frac_bits);
+          if (state->encoder_control->cfg.rdo >= 2) {
+            uvg_cu_cost_inter_rd2(state, unipred_pu, lcu, &frac_cost, &frac_bits, cu_loc);
           }
 
           amvp[list].cost[key] = frac_cost;
@@ -1992,15 +1972,15 @@ static void search_pu_inter(encoder_state_t * const state,
     amvp[list].size = n_best;
   }
 
-  if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N && cfg->fme_level == 0) {
-    if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]);
-    if (amvp[1].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]]);
+  if (state->encoder_control->cfg.rdo >= 2 && cfg->fme_level == 0) {
+    if (amvp[0].size) uvg_cu_cost_inter_rd2(state, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]], cu_loc);
+    if (amvp[1].size) uvg_cu_cost_inter_rd2(state, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]], cu_loc);
   }
 
   // Search bi-pred positions
   bool can_use_bipred = state->frame->slicetype == UVG_SLICE_B
     && cfg->bipred
-    && width + height >= 16; // 4x8 and 8x4 PBs are restricted to unipred
+    && cu_loc->width + cu_loc->height >= 16; // 4x8 and 8x4 PBs are restricted to unipred
 
   if (can_use_bipred) {
 
@@ -2031,25 +2011,23 @@ static void search_pu_inter(encoder_state_t * const state,
       bipred_pu->skipped = false;
 
       for (int reflist = 0; reflist < 2; reflist++) {
-        uvg_inter_get_mv_cand(info->state, x, y, width, height, info->mv_cand, bipred_pu, lcu, reflist);
+        uvg_inter_get_mv_cand(info->state, info->mv_cand, bipred_pu, lcu, reflist, cu_loc);
       }
 
       uvg_inter_recon_bipred(info->state,
-        ref->images[ref_LX[0][bipred_pu->inter.mv_ref[0]]],
-        ref->images[ref_LX[1][bipred_pu->inter.mv_ref[1]]],
-        x, y,
-        width,
-        height,
-        mv,
-        lcu,
-        true,
-        false);
+                             ref->images[ref_LX[0][bipred_pu->inter.mv_ref[0]]],
+                             ref->images[ref_LX[1][bipred_pu->inter.mv_ref[1]]],
+                             mv, lcu,
+                             true,
+                             false,
+                             cu_loc
+        );
 
-      const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
-      const uvg_pixel *src = &lcu->ref.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
+      const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x)];
+      const uvg_pixel *src = &lcu->ref.y[SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x)];
 
       best_bipred_cost =
-        uvg_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH);
+        uvg_satd_any_size(cu_loc->width, cu_loc->height, rec, LCU_WIDTH, src, LCU_WIDTH);
 
       double bitcost[2] = { 0, 0 };
 
@@ -2096,17 +2074,17 @@ static void search_pu_inter(encoder_state_t * const state,
     }
 
     // TODO: this probably should have a separate command line option
-    if (cfg->rdo >= 3) search_pu_inter_bipred(info, depth, lcu, &amvp[2]);
+    if (cfg->rdo >= 3) search_pu_inter_bipred(info, lcu, &amvp[2]);
     
     assert(amvp[2].size <= MAX_UNIT_STATS_MAP_SIZE);
     uvg_sort_keys_by_cost(&amvp[2]);
-    if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) {
-      uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]]);
+    if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2) {
+      uvg_cu_cost_inter_rd2(state, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]], cu_loc);
     }
   }
   if(cfg->rdo < 2) {
     int predmode_ctx;
-    const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx);
+    const int skip_contest = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, &predmode_ctx);
     const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0);
 
     const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0);
@@ -2140,25 +2118,23 @@ static void search_pu_inter(encoder_state_t * const state,
 * \param inter_cost    Return inter cost
 * \param inter_bitcost Return inter bitcost
 */
-void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
-                           int x, int y, int depth,
-                           cu_info_t* cur_cu,
-                           lcu_t *lcu,
-                           double   *inter_cost,
-                           double* inter_bitcost){
-  
-  int tr_depth = MAX(1, depth);
-  if (cur_cu->part_size != SIZE_2Nx2N) {
-    tr_depth = depth + 1;
-  }
-  uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, UVG_BOTH_T);
+void uvg_cu_cost_inter_rd2(
+  encoder_state_t * const state,
+  cu_info_t* cur_cu,
+  lcu_t *lcu,
+  double   *inter_cost,
+  double* inter_bitcost,
+  const cu_loc_t* const cu_loc){
 
-  const int x_px = SUB_SCU(x);
-  const int y_px = SUB_SCU(y);
+  const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
+  int tr_depth = MAX(1, depth);
+
+  uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, tr_depth, UVG_BOTH_T);
+
+  const int x_px = SUB_SCU(cu_loc->x);
+  const int y_px = SUB_SCU(cu_loc->y);
   const int width = LCU_WIDTH >> depth;
   const int height = width; // TODO: non-square blocks
-  cu_loc_t loc;
-  uvg_cu_loc_ctor(&loc, x, y, width, height);
 
   cabac_data_t cabac_copy;
   memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
@@ -2169,7 +2145,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
   *cur_pu = *cur_cu;
 
   const bool reconstruct_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
-  uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, reconstruct_chroma);
+  uvg_inter_recon_cu(state, lcu, true, reconstruct_chroma, cu_loc);
 
   int index = y_px * LCU_WIDTH + x_px;
   double ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
@@ -2187,13 +2163,13 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
   }
   double no_cbf_bits;
   double bits = 0;
-  const int skip_context = uvg_get_skip_context(x, y, lcu, NULL, NULL);
-  if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
+  const int skip_context = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, NULL);
+  if (cur_cu->merged) {
     no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost;
-    bits += uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu, UVG_BOTH_T);
+    bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T);
   }
   else {
-    no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu, UVG_BOTH_T);
+    no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T);
     bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1);
   }
   double no_cbf_cost = ssd + no_cbf_bits * state->lambda;
@@ -2207,7 +2183,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
     uvg_quantize_lcu_residual(state,
                               true,
                               false,
-                              false, &loc,
+                              false,
+                              cu_loc,
                               depth,
                               cur_cu,
                               lcu,
@@ -2243,7 +2220,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
       depth,
       lcu,
       &cabac_copy,
-      &loc,
+      cu_loc,
       index,
       0,
       cur_cu,
@@ -2274,7 +2251,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
     uvg_quantize_lcu_residual(state,
                               true, reconstruct_chroma,
                               reconstruct_chroma && state->encoder_control->cfg.jccr,
-                              &loc,
+                              cu_loc,
                               depth,
                               cur_cu,
                               lcu,
@@ -2308,7 +2285,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
 
   if(no_cbf_cost < *inter_cost) {
     cur_cu->cbf = 0;
-    if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
+    if (cur_cu->merged) {
       cur_cu->skipped = 1;
     }
     *inter_cost = no_cbf_cost;
@@ -2332,11 +2309,12 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
  * \param inter_cost    Return inter cost
  * \param inter_bitcost Return inter bitcost
  */
-void uvg_search_cu_inter(encoder_state_t * const state,
-                         int x, int y, int depth,
-                         lcu_t *lcu,
-                         double   *inter_cost,
-                         double* inter_bitcost)
+void uvg_search_cu_inter(
+  encoder_state_t * const state,
+  const cu_loc_t* const cu_loc,
+  lcu_t *lcu,
+  double   *inter_cost,
+  double* inter_bitcost)
 {
   *inter_cost = MAX_DOUBLE;
   *inter_bitcost = MAX_INT;
@@ -2349,12 +2327,8 @@ void uvg_search_cu_inter(encoder_state_t * const state,
   inter_search_info_t info;
 
   search_pu_inter(state,
-                  x, y, depth,
-                  SIZE_2Nx2N, 0,
-                  lcu,
-                  amvp,
-                  &merge,
-                  &info);
+                  cu_loc, lcu, amvp,
+                  &merge, &info);
 
   // Early Skip CU decision
   if (merge.size == 1 && merge.unit[0].skipped) {
@@ -2396,13 +2370,14 @@ void uvg_search_cu_inter(encoder_state_t * const state,
     return;
   }
 
-  const int x_local = SUB_SCU(x);
-  const int y_local = SUB_SCU(y);
+  const int x_local = SUB_SCU(cu_loc->x);
+  const int y_local = SUB_SCU(cu_loc->y);
   cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
   *cur_pu = *best_inter_pu;
 
-  uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth),
-    true, state->encoder_control->chroma_format != UVG_CSP_400);   
+  uvg_inter_recon_cu(state, lcu,
+                     true, state->encoder_control->chroma_format != UVG_CSP_400,
+                     cu_loc);   
 
   if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) {
     assert(fracmv_within_tile(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));
diff --git a/src/search_inter.h b/src/search_inter.h
index d76dd927..cdabd15a 100644
--- a/src/search_inter.h
+++ b/src/search_inter.h
@@ -73,11 +73,12 @@ typedef double uvg_mvd_cost_func(const encoder_state_t *state,
                                   int32_t ref_idx,
                                   double *bitcost);
 
-void uvg_search_cu_inter(encoder_state_t * const state,
-                         int x, int y, int depth,
-                         lcu_t *lcu,
-                         double *inter_cost,
-                         double* inter_bitcost);
+void uvg_search_cu_inter(
+  encoder_state_t * const state,
+  const cu_loc_t* const cu_loc,
+  lcu_t *lcu,
+  double *inter_cost,
+  double* inter_bitcost);
 
 
 
@@ -85,12 +86,13 @@ unsigned uvg_inter_satd_cost(const encoder_state_t* state,
                              const lcu_t *lcu,
                              int x,
                              int y);
-void uvg_cu_cost_inter_rd2(encoder_state_t* const state,
-  int x, int y, int depth,
+void uvg_cu_cost_inter_rd2(
+  encoder_state_t* const state,
   cu_info_t* cur_cu,
   lcu_t* lcu,
   double* inter_cost,
-  double* inter_bitcost);
+  double* inter_bitcost,
+  const cu_loc_t* const cu_loc);
 
 int uvg_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx);
 
diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c
index 2783454d..1d3c117f 100644
--- a/src/strategies/avx2/intra-avx2.c
+++ b/src/strategies/avx2/intra-avx2.c
@@ -294,13 +294,6 @@ static void uvg_angular_pred_avx2(
               f[yy][2] = 16 + offset;
               f[yy][3] = offset;
             }
-            // Cubic must be used if ref line != 0 or if isp mode != 0
-            if (multi_ref_index || isp) {
-              use_cubic = true;
-            }
-            const int16_t filter_coeff[4] = { 16 - (delta_fract[yy] >> 1), 32 - (delta_fract[yy] >> 1), 16 + (delta_fract[yy] >> 1), delta_fract[yy] >> 1 };
-            const int16_t *temp_f = use_cubic ? cubic_filter[delta_fract[yy]] : filter_coeff;
-            memcpy(f[yy], temp_f, 4 * sizeof(*temp_f));
           }
 
           // Do 4-tap intra interpolation filtering
diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c
index b6d062b0..bc70daab 100644
--- a/src/strategies/avx2/quant-avx2.c
+++ b/src/strategies/avx2/quant-avx2.c
@@ -708,7 +708,6 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
       (width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
   {
     int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
-    tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
     uvg_rdoq(state, coeff, coeff_out, width, height, color,
       scan_order, cur_cu->type, tr_depth, cur_cu->cbf, lfnst_index);
   }
diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c
index 4215fc81..be396a8b 100644
--- a/src/strategies/generic/quant-generic.c
+++ b/src/strategies/generic/quant-generic.c
@@ -316,7 +316,6 @@ int uvg_quant_cbcr_residual_generic(
     (width > 4 || !state->encoder_control->cfg.rdoq_skip))
   {
     int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
-    tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
     uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
              scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
       cur_cu->cr_lfnst_idx);
@@ -499,7 +498,6 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
       (width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
   {
     int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
-    tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
     uvg_rdoq(state, coeff, coeff_out, width, height, color,
              scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
       lfnst_index);
diff --git a/src/transform.c b/src/transform.c
index b260eea1..a497003b 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -490,7 +490,7 @@ void uvg_chroma_transform_search(
   int depth,
   lcu_t* const lcu,
   cabac_data_t* temp_cabac,
-  cu_loc_t *cu_loc,
+  const cu_loc_t* const cu_loc,
   const int offset,
   const uint8_t mode,
   cu_info_t* pred_cu,
diff --git a/src/transform.h b/src/transform.h
index 6fdef411..a7427ea0 100644
--- a/src/transform.h
+++ b/src/transform.h
@@ -108,7 +108,7 @@ void uvg_chroma_transform_search(
   int depth,
   lcu_t* const lcu,
   cabac_data_t* temp_cabac,
-  cu_loc_t *cu_loc,
+  const cu_loc_t* const cu_loc,
   const int offset,
   const uint8_t mode,
   cu_info_t* pred_cu,
diff --git a/tests/mv_cand_tests.c b/tests/mv_cand_tests.c
index 84ab9328..849fec2d 100644
--- a/tests/mv_cand_tests.c
+++ b/tests/mv_cand_tests.c
@@ -46,8 +46,11 @@ TEST test_get_spatial_merge_cand(void)
 
   merge_candidates_t cand = { 0 };
 
-  get_spatial_merge_candidates(64 + 32, 64, // x, y
-                               32, 24,      // width, height
+  cu_loc_t cu_loc;
+  uvg_cu_loc_ctor(&cu_loc, 64 + 32, 64, // x, y
+    32, 24); // width, height)
+
+  get_spatial_merge_candidates(&cu_loc,      
                                1920, 1080,  // picture size
                                &lcu,
                                &cand,