diff --git a/src/intra.c b/src/intra.c
index 51a53f15..3c02dac2 100644
--- a/src/intra.c
+++ b/src/intra.c
@@ -1075,13 +1075,7 @@ void uvg_intra_build_reference_any(
         px_available_left = height;
       }
       else {
-        // Left LCU edge has more pixels available
-        if (lcu_px.x > 0) {
-          px_available_left = cu_height - (pu_y - cu_y);
-        }
-        else {
-          px_available_left = LCU_WIDTH - lcu_px.y;
-        }
+        px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4];
       }
     }
     else {
@@ -1198,12 +1192,7 @@ void uvg_intra_build_reference_any(
         px_available_top = width;
       }
       else {
-        if (lcu_px.y > 0) {
-          px_available_top = LCU_WIDTH - lcu_px.x;
-        }
-        else {
-          px_available_top = LCU_WIDTH;
-        }
+        px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4];
       }
     }
     else {
@@ -1372,13 +1361,7 @@ void uvg_intra_build_reference_inner(
       px_available_left = height;
     }
     else {
-      // Left LCU edge has more pixels available
-      if (lcu_px.x > 0) {
-        px_available_left = cu_height - (pu_y - cu_y);
-      }
-      else {
-        px_available_left = LCU_WIDTH - lcu_px.y;
-      }
+      px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4];
     }
 
   }
@@ -1426,12 +1409,7 @@ void uvg_intra_build_reference_inner(
       px_available_top = width;
     }
     else {
-      if (lcu_px.y > 0) {
-        px_available_top = LCU_WIDTH - lcu_px.x;
-      }
-      else {
-        px_available_top = LCU_WIDTH;
-      }
+      px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4];
     }
   }
   else {
diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c
index d3d3bda4..1891ee5a 100644
--- a/src/strategies/generic/intra-generic.c
+++ b/src/strategies/generic/intra-generic.c
@@ -138,6 +138,10 @@ static void uvg_angular_pred_generic(
   // Pointer for the other reference.
   const uvg_pixel *ref_side;
 
+  const int cu_dim = MAX(width, height);
+  const int top_ref_length  = isp_mode ? width + cu_dim  : width << 1;
+  const int left_ref_lenght = isp_mode ? height + cu_dim : height << 1;
+
   // Set ref_main and ref_side such that, when indexed with 0, they point to
   // index 0 in block coordinates.
   if (sample_disp < 0) {
@@ -192,8 +196,8 @@ static void uvg_angular_pred_generic(
       temp_main[i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
       temp_side[i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
     }*/
-    memcpy(&temp_above[0], &in_ref_above[0], ((width << 1) + 1 + multi_ref_index) * sizeof(uvg_pixel));
-    memcpy(&temp_left[0], &in_ref_left[0], ((height << 1) + 1 + multi_ref_index) * sizeof(uvg_pixel));
+    memcpy(&temp_above[0], &in_ref_above[0], (top_ref_length + 1 + multi_ref_index) * sizeof(uvg_pixel));
+    memcpy(&temp_left[0], &in_ref_left[0], (left_ref_lenght + 1 + multi_ref_index) * sizeof(uvg_pixel));
 
     ref_main = vertical_mode ? temp_above : temp_left;
     ref_side = vertical_mode ? temp_left : temp_above;
@@ -202,12 +206,17 @@ static void uvg_angular_pred_generic(
     const int log2_ratio = log2_width - log2_height;
     const int s = MAX(0, vertical_mode ? log2_ratio : -log2_ratio);
     const int max_index = (multi_ref_index << s) + 2;
-    const int ref_length = vertical_mode ? width << 1 : height << 1;
+    int ref_length;
+    if (isp_mode) {
+      ref_length = vertical_mode ? top_ref_length : left_ref_lenght;
+    }
+    else {
+      ref_length = vertical_mode ? width << 1 : height << 1;
+    }
     const uvg_pixel val = ref_main[ref_length + multi_ref_index];
     for (int j = 1; j <= max_index; j++) {
       ref_main[ref_length + multi_ref_index +  j] = val;
     }
-
     
     //// sample_disp >= 0 means we don't need to refer to negative indices,
     //// which means we can just use the references as is.
@@ -221,6 +230,14 @@ static void uvg_angular_pred_generic(
     //tmp_ref[width + last_index] = tmp_ref[width + last_index - 1];
   }
 
+  // Flip dimensions for horizontal modes
+  int tmp_width = vertical_mode ? width : height;
+  int tmp_height = vertical_mode ? height : width;
+
+  uvg_pixel tmp_dst[LCU_WIDTH * LCU_WIDTH];
+  uvg_pixel* dst_buf = vertical_mode ? dst : tmp_dst;
+
+
   // compensate for line offset in reference line buffers
   ref_main += multi_ref_index;
   ref_side += multi_ref_index;
@@ -228,7 +245,7 @@ static void uvg_angular_pred_generic(
   if (sample_disp != 0) {
     // The mode is not horizontal or vertical, we have to do interpolation.
 
-    for (int_fast32_t y = 0, delta_pos = sample_disp * (1 + multi_ref_index); y < height; ++y, delta_pos += sample_disp) {
+    for (int_fast32_t y = 0, delta_pos = sample_disp * (1 + multi_ref_index); y < tmp_height; ++y, delta_pos += sample_disp) {
       int_fast32_t delta_int = delta_pos >> 5;
       int_fast32_t delta_fract = delta_pos & (32 - 1);
 
@@ -255,36 +272,36 @@ static void uvg_angular_pred_generic(
           const int16_t filter_coeff[4] = { 16 - (delta_fract >> 1), 32 - (delta_fract >> 1), 16 + (delta_fract >> 1), delta_fract >> 1 };
           int16_t const * const f = use_cubic ? cubic_filter[delta_fract] : filter_coeff;
           // Do 4-tap intra interpolation filtering
-          for (int_fast32_t x = 0; x < width; x++, ref_main_index++) {
+          for (int_fast32_t x = 0; x < tmp_width; x++, ref_main_index++) {
             p[0] = ref_main[ref_main_index];
             p[1] = ref_main[ref_main_index + 1];
             p[2] = ref_main[ref_main_index + 2];
             p[3] = ref_main[ref_main_index + 3];
          
-            dst[y * width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6);
+            dst_buf[y * tmp_width + x] = CLIP_TO_PIXEL(((int32_t)(f[0] * p[0]) + (int32_t)(f[1] * p[1]) + (int32_t)(f[2] * p[2]) + (int32_t)(f[3] * p[3]) + 32) >> 6);
 
           }
         }
         else {
         
           // Do linear filtering
-          for (int_fast32_t x = 0; x < width; ++x) {
+          for (int_fast32_t x = 0; x < tmp_width; ++x) {
             uvg_pixel ref1 = ref_main[x + delta_int + 1];
             uvg_pixel ref2 = ref_main[x + delta_int + 2];
-            dst[y * width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5);
+            dst_buf[y * tmp_width + x] = ref1 + ((delta_fract * (ref2-ref1) + 16) >> 5);
           }
         }
       }
       else {
         // Just copy the integer samples
-        for (int_fast32_t x = 0; x < width; x++) {
-          dst[y * width + x] = ref_main[x + delta_int + 1];
+        for (int_fast32_t x = 0; x < tmp_width; x++) {
+          dst_buf[y * tmp_width + x] = ref_main[x + delta_int + 1];
         }
       }
 
      
       // PDPC
-      bool PDPC_filter = (width >= 4 || channel_type != 0);
+      bool PDPC_filter = (tmp_width >= 4 || channel_type != 0);
       if (pred_mode > 1 && pred_mode < 67) {
         if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
           PDPC_filter = false;
@@ -295,12 +312,12 @@ static void uvg_angular_pred_generic(
       }
       if(PDPC_filter) {
         int inv_angle_sum = 256;
-        for (int x = 0; x < MIN(3 << scale, width); x++) {
+        for (int x = 0; x < MIN(3 << scale, tmp_width); x++) {
           inv_angle_sum += modedisp2invsampledisp[abs(mode_disp)];
 
           int wL = 32 >> (2 * x >> scale);
           const uvg_pixel left = ref_side[y + (inv_angle_sum >> 9) + 1];
-          dst[y * width + x] = dst[y * width + x] + ((wL * (left - dst[y * width + x]) + 32) >> 6);
+          dst_buf[y * tmp_width + x] = dst_buf[y * tmp_width + x] + ((wL * (left - dst_buf[y * tmp_width + x]) + 32) >> 6);
         }
       }
 
@@ -342,32 +359,32 @@ static void uvg_angular_pred_generic(
     
     // Do not apply PDPC if multi ref line index is other than 0
     // TODO: do not do PDPC if block is in BDPCM mode
-    bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/);
+    bool do_pdpc = (((tmp_width >= 4 && tmp_height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/);
 
     if (do_pdpc) {
       int scale = (log2_width + log2_height - 2) >> 2;
       const uvg_pixel top_left = ref_main[0];
-      for (int_fast32_t y = 0; y < height; ++y) {
-        memcpy(&dst[y * width], &ref_main[1], width * sizeof(uvg_pixel));
+      for (int_fast32_t y = 0; y < tmp_height; ++y) {
+        memcpy(&dst_buf[y * tmp_width], &ref_main[1], tmp_width * sizeof(uvg_pixel));
         const uvg_pixel left = ref_side[1 + y];
-        for (int_fast32_t x = 0; x < MIN(3 << scale, width); ++x) {
+        for (int_fast32_t x = 0; x < MIN(3 << scale, tmp_width); ++x) {
           const int wL = 32 >> (2 * x >> scale);
-          const uvg_pixel val = dst[y * width + x];
-          dst[y * width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
+          const uvg_pixel val = dst_buf[y * tmp_width + x];
+          dst_buf[y * tmp_width + x] = CLIP_TO_PIXEL(val + ((wL * (left - top_left) + 32) >> 6));
         }
       }
     } else {
-      for (int_fast32_t y = 0; y < height; ++y) {
-        memcpy(&dst[y * width], &ref_main[1], width * sizeof(uvg_pixel));
+      for (int_fast32_t y = 0; y < tmp_height; ++y) {
+        memcpy(&dst_buf[y * tmp_width], &ref_main[1], tmp_width * sizeof(uvg_pixel));
       }
     }
   }
 
   // Flip the block if this is was a horizontal mode.
   if (!vertical_mode) {
-    for (int_fast32_t y = 0; y < height - 1; ++y) {
-      for (int_fast32_t x = y + 1; x < width; ++x) {
-        SWAP(dst[y * width + x], dst[x * height + y], uvg_pixel);
+    for (int_fast32_t y = 0; y < tmp_height; ++y) {
+      for (int_fast32_t x = 0; x < tmp_width; ++x) {
+        dst[x * width + y] = tmp_dst[y * tmp_width + x];
       }
     }
   }