diff --git a/src/kvazaar.h b/src/kvazaar.h
index df4146bc..1b28397a 100644
--- a/src/kvazaar.h
+++ b/src/kvazaar.h
@@ -175,6 +175,8 @@ typedef struct kvz_config
   kvz_gop_config gop[KVZ_MAX_GOP_LENGTH];  /*!< \brief Array of GOP settings */
 
   int32_t target_bitrate;
+
+  int8_t mv_rdo;            /*!< \brief MV RDO calculation in search (0: estimation, 1: RDO). */
 } kvz_config;
 
 /**
diff --git a/src/search_inter.c b/src/search_inter.c
index d2f232c5..df5faadb 100644
--- a/src/search_inter.c
+++ b/src/search_inter.c
@@ -141,6 +141,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
   
   vector2d_t mv_best = { 0, 0 };
 
+
+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
   assert(pattern_type < 4);
 
   //implemented search patterns
@@ -252,7 +260,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
                             (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
                             block_width, block_width, max_lcu_below);
-      cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+      cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
       PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
         (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
@@ -293,6 +301,13 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
   int k;
 
   vector2d_t mv_best = { 0, 0 };
+
+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
   
   //compute SAD values for every point in the iRaster downsampled version of the current search area
   for (i = iSearchRange; i >= -iSearchRange; i -= iRaster)
@@ -309,7 +324,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
           (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
           (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
           block_width, block_width, max_lcu_below);
-        cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+        cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
         PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
           (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
@@ -362,6 +377,13 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
   unsigned best_index = num_cand;
   int max_lcu_below = -1;
 
+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
   if (state->encoder_control->owf) {
     max_lcu_below = 1;
   }
@@ -377,7 +399,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
                                         (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                         (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                         block_width, block_width, max_lcu_below);
-    best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
+    best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
 
     PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@@ -402,7 +424,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
                                    (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                    (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                    block_width, block_width, max_lcu_below);
-    cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
     PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@@ -531,6 +553,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
   unsigned i;
   unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
   int max_lcu_below = -1;
+
+  int (*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
   
   if (state->encoder_control->owf) {
     max_lcu_below = 1;
@@ -554,7 +584,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                                         (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                         (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                         block_width, block_width, max_lcu_below);
-    best_cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
     best_bitcost = bitcost;
     best_index = num_cand; 
 
@@ -578,7 +608,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                                    (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
                                    (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
                                    block_width, block_width, max_lcu_below);
-    cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
     PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@@ -611,7 +641,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                              (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, 
                              (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
                              block_width, block_width, max_lcu_below);
-      cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+      cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
       PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
                               (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, 
@@ -654,7 +684,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                                (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
                                (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
                                block_width, block_width, max_lcu_below);
-        cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+        cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
         PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
               (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, 
               (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, 
@@ -686,7 +716,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
                              (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
                              (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
                              block_width, block_width, max_lcu_below);
-      cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+      cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
       PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, 
             (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, 
@@ -823,6 +853,13 @@ static unsigned search_frac(const encoder_state_t * const state,
   kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
   kvz_pixel* dst_off = &dst[dst_stride*4+4];
 
+  int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+    int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+    int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+  if (state->encoder_control->cfg->mv_rdo) {
+    calc_mvd = kvz_calc_mvd_cost_cabac;
+  }
+
   kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1,
                 state->tile->lcu_offset_x * LCU_WIDTH,
                 state->tile->lcu_offset_y * LCU_WIDTH,
@@ -856,7 +893,7 @@ static unsigned search_frac(const encoder_state_t * const state,
 
     cost = satd(tmp_pic,tmp_filtered);
 
-    cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
     if (cost < best_cost) {
       best_cost    = cost;
@@ -892,7 +929,7 @@ static unsigned search_frac(const encoder_state_t * const state,
 
     cost = satd(tmp_pic,tmp_filtered);
 
-    cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
+    cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
 
     if (cost < best_cost) {
       best_cost    = cost;
@@ -1060,6 +1097,15 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
     static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
     static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
     uint8_t cutoff = num_cand;
+
+
+    int(*calc_mvd)(const encoder_state_t * const, int, int, int,
+      int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
+      int16_t, int32_t, uint32_t *) = calc_mvd_cost;
+    if (state->encoder_control->cfg->mv_rdo) {
+      calc_mvd = kvz_calc_mvd_cost_cabac;
+    }
+
     for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) {
       uint8_t i = priorityList0[idx];
       uint8_t j = priorityList1[idx];
@@ -1110,8 +1156,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
 
           cost = satd(tmp_pic, tmp_block);
 
-          cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
-          cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
+          cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
+          cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
 
           if (cost < cur_cu->inter.cost) {