diff --git a/src/strategies/avx2/picture-avx2.c b/src/strategies/avx2/picture-avx2.c
index 78c33dba..b53de2a3 100644
--- a/src/strategies/avx2/picture-avx2.c
+++ b/src/strategies/avx2/picture-avx2.c
@@ -922,15 +922,16 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
 	kvz_pixel* temp_lcu_v) 
 {
 
- if (hi_prec_luma_rec0 == 0 && hi_prec_luma_rec1 == 0 && hi_prec_chroma_rec0 == 0 && hi_prec_chroma_rec1 == 0)
+ bool test = false;
+ if (test)//(hi_prec_luma_rec0 == 0 && hi_prec_luma_rec1 == 0 && hi_prec_chroma_rec0 == 0 && hi_prec_chroma_rec1 == 0)
  {
   inter_recon_bipred_no_mov_avx2(height, width, ypos, xpos, high_precision_rec0, high_precision_rec1, lcu, temp_lcu_y, temp_lcu_u, temp_lcu_v);
  }
 
  else
  {
-  int y_in_lcu, x_in_lcu;
 
+  int y_in_lcu, x_in_lcu;
   int shift = 15 - KVZ_BIT_DEPTH;
   int offset = 1 << (shift - 1);
   int shift_left = 14 - KVZ_BIT_DEPTH;
@@ -977,9 +978,12 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
      temp_y_epi16 = _mm256_srai_epi16(temp_y_epi16, shift);
 
      temp_epi8 = _mm256_packus_epi16(temp_y_epi16, temp_y_epi16);
-
+     
      pointer = (int32_t*)&(lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu]);
      *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8));
+     
+     //lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu] = _mm256_extract_epi32(temp_epi8, 0);
+
      break;
 
     case 8:
@@ -1024,7 +1028,7 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
 
      break;
 
-
+     
     default:
 
      // Load total of 16 elements from memory to vector
@@ -1049,7 +1053,7 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
      }
 
      else {
-      temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_y, temp_y_epi16), _MM_SHUFFLE(1, 3, 2, 0));
+      temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_y, temp_y_epi16), _MM_SHUFFLE(3, 1, 2, 0));
 
       // Store 256-bits of integer data into memory
       _mm256_storeu_si256((__m256i*)&(lcu->rec.y[start_point]), temp_epi8);
@@ -1212,10 +1216,10 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
       temp_v_epi16 = _mm256_add_epi16(temp_v_epi16, offset_epi16);
       temp_v_epi16 = _mm256_srai_epi16(temp_v_epi16, shift);
 
-      temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_u_epi16, temp_u_epi16), _MM_SHUFFLE(1, 3, 2, 0));
+      temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_u_epi16, temp_u_epi16), _MM_SHUFFLE(3, 1, 2, 0));
       _mm_storeu_si128((__m128i*)&(lcu->rec.u[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm256_castsi256_si128(temp_epi8));
 
-      temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_v_epi16, temp_v_epi16), _MM_SHUFFLE(1, 3, 2, 0));
+      temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_v_epi16, temp_v_epi16), _MM_SHUFFLE(3, 1, 2, 0));
       _mm_storeu_si128((__m128i*)&(lcu->rec.v[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm256_castsi256_si128(temp_epi8));
 
       break;
@@ -1268,11 +1272,11 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
       }
 
       else {
-       temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_u, temp_u_epi16), _MM_SHUFFLE(1, 3, 2, 0));
+       temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_u, temp_u_epi16), _MM_SHUFFLE(3, 1, 2, 0));
 
        _mm256_storeu_si256((__m256i*)&(lcu->rec.u[start_point_uv]), temp_epi8);
 
-       temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_v, temp_v_epi16), _MM_SHUFFLE(1, 3, 2, 0));
+       temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_v, temp_v_epi16), _MM_SHUFFLE(3, 1, 2, 0));
 
        _mm256_storeu_si256((__m256i*)&(lcu->rec.v[start_point_uv]), temp_epi8);
 
diff --git a/tests/inter_recon_bipred_tests.c b/tests/inter_recon_bipred_tests.c
index 9874a582..3fffa141 100644
--- a/tests/inter_recon_bipred_tests.c
+++ b/tests/inter_recon_bipred_tests.c
@@ -32,12 +32,13 @@ static lcu_t lcu1;
 
 int temp1, temp2, temp3, temp4;
 
-int16_t mv_param[2][2] = { { 7,7 },{ 7,7 } };
-int width = 16;
-int height = 16;
+int16_t mv_param[2][2] = { { 3,3 },{ 3,3 } };
+int width = 32;
+int height = 32;
 int xpos = 0;
 int ypos = 0;
 
+
 kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH];
 kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C];
 kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C];
@@ -124,7 +125,7 @@ TEST test_inter_recon_bipred()
 	memcpy(result.rec.y, lcu1.rec.y, sizeof(kvz_pixel) * 64 * 64);
 	memcpy(result.rec.u, lcu1.rec.u, sizeof(kvz_pixel) * 32 * 32);
 	memcpy(result.rec.v, lcu1.rec.v, sizeof(kvz_pixel) * 32 * 32);
- 
+ /*
  for (temp_y = 0; temp_y < height; ++temp_y) {
   int y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
   for (temp_x = 0; temp_x < width; temp_x += 1) {
@@ -132,8 +133,8 @@ TEST test_inter_recon_bipred()
    printf("%d ", expected_test_result.rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu]);
   }
  }
- printf("\n");
- 
+ printf("\n");*/
+ /*
  for (temp_y = 0; temp_y < height >> 1; ++temp_y) {
   int y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
   for (temp_x = 0; temp_x < width >> 1; ++temp_x) {
@@ -141,10 +142,10 @@ TEST test_inter_recon_bipred()
    printf("%d ", expected_test_result.rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu]);
   }
  }
- printf("\n");
+ printf("\n");*/
 	
 	kvz_inter_recon_bipred_generic(hi_prec_luma_rec0, hi_prec_luma_rec1, hi_prec_chroma_rec0, hi_prec_chroma_rec1, width, height, xpos, ypos, high_precision_rec0, high_precision_rec1, &result, temp_lcu_y, temp_lcu_u, temp_lcu_v); 
- 
+ /*
  for (temp_y = 0; temp_y < height; ++temp_y) {
   int y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
   for (temp_x = 0; temp_x < width; temp_x += 1) {
@@ -153,7 +154,7 @@ TEST test_inter_recon_bipred()
   }
  }
  printf("\n");
-
+ /*
 
  for (temp_y = 0; temp_y < height >> 1; ++temp_y) {
   int y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
@@ -162,7 +163,7 @@ TEST test_inter_recon_bipred()
    printf("%d ", result.rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu]);
   }
  }
- printf("\n");
+ printf("\n");*/
  
 
 	for (temp_y = 0; temp_y < height; ++temp_y) {
diff --git a/tests/tests_main.c b/tests/tests_main.c
index a62784de..c6c94b6b 100644
--- a/tests/tests_main.c
+++ b/tests/tests_main.c
@@ -58,7 +58,7 @@ int main(int argc, char **argv)
 
   RUN_SUITE(mv_cand_tests);
 
-  //RUN_SUITE(inter_recon_bipred_tests);
+  RUN_SUITE(inter_recon_bipred_tests);
 
   GREATEST_MAIN_END();
 }