Huge refactoring

Split some parts of encoder_control into encoder_state (idea: encoder_control is immutable) Goal is to allow multiple substreams in the future.
2024-11-23 18:14:06 +00:00 · 2014-04-17 14:42:20 +02:00 · 2014-04-17 14:42:20 +02:00 · 5fea5875a5
parent 88a67a4e49
commit 5fea5875a5
17 changed files with 588 additions and 537 deletions
--- a/src/encmain.c
+++ b/src/encmain.c
@ -60,7 +60,9 @@ int main(int argc, char *argv[])
  config *cfg  = NULL; //!< Global configuration
  FILE *input  = NULL; //!< input file (YUV)
  FILE *output = NULL; //!< output file (HEVC NAL stream)
-  encoder_control *encoder = NULL; //!< Encoder control struct
+  encoder_control encoder;
+  encoder_state encoder_state;
+  picture *cur_pic;
  double psnr[3] = { 0.0, 0.0, 0.0 };
  uint64_t curpos  = 0;
  uint64_t lastpos = 0;
@ -207,67 +209,78 @@ int main(int argc, char *argv[])
    }
  }
  
-  encoder = init_encoder_control(cfg);
-  if (!encoder)
+  //Allocate and init exp golomb table
+  if (!init_exp_golomb(4096*8)) {
+    fprintf(stderr, "Failed to allocate the exp golomb code table, shutting down!\n");
    goto exit_failure;
+  }
+
+  if (!encoder_control_init(&encoder, cfg)) {
+    goto exit_failure;
+  }
  
  // Set output file
+  encoder.out.file = output;
  
-  encoder->output = output;
-  encoder->stream.file.output = output;
  // input init (TODO: read from commandline / config)
-  encoder->bitdepth = 8;
-  encoder->frame    = 0;
-  encoder->QP       = (int8_t)encoder->cfg->qp;
-  encoder->in.video_format = FORMAT_420;
-  // deblocking filter
-  encoder->deblock_enable   = (int8_t)encoder->cfg->deblock_enable;
-  encoder->beta_offset_div2 = (int8_t)encoder->cfg->deblock_beta;
-  encoder->tc_offset_div2   = (int8_t)encoder->cfg->deblock_tc;
-  // SAO
-  encoder->sao_enable = (int8_t)encoder->cfg->sao_enable;
-  // RDO
-  encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable;
-  encoder->rdo         = (int8_t)encoder->cfg->rdo;
-  // TR SKIP
-  encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable;
-  // VUI
-  encoder->vui.sar_width   = (int16_t)encoder->cfg->vui.sar_width;
-  encoder->vui.sar_height  = (int16_t)encoder->cfg->vui.sar_height;
-  encoder->vui.overscan    = encoder->cfg->vui.overscan;
-  encoder->vui.videoformat = encoder->cfg->vui.videoformat;
-  encoder->vui.fullrange   = encoder->cfg->vui.fullrange;
-  encoder->vui.colorprim   = encoder->cfg->vui.colorprim;
-  encoder->vui.transfer    = encoder->cfg->vui.transfer;
-  encoder->vui.colormatrix = encoder->cfg->vui.colormatrix;
-  encoder->vui.chroma_loc  = (int8_t)encoder->cfg->vui.chroma_loc;
-  // AUD
-  encoder->aud_enable = (int8_t)encoder->cfg->aud_enable;
+  encoder.bitdepth = 8;
+  encoder.in.video_format = FORMAT_420;
  
-  init_encoder_input(&encoder->in, input, cfg->width, cfg->height);
+  // deblocking filter
+  encoder.deblock_enable   = (int8_t)encoder.cfg->deblock_enable;
+  encoder.beta_offset_div2 = (int8_t)encoder.cfg->deblock_beta;
+  encoder.tc_offset_div2   = (int8_t)encoder.cfg->deblock_tc;
+  // SAO
+  encoder.sao_enable = (int8_t)encoder.cfg->sao_enable;
+  // RDO
+  encoder.rdoq_enable = (int8_t)encoder.cfg->rdoq_enable;
+  encoder.rdo         = (int8_t)encoder.cfg->rdo;
+  // TR SKIP
+  encoder.trskip_enable = (int8_t)encoder.cfg->trskip_enable;
+  // VUI
+  encoder.vui.sar_width   = (int16_t)encoder.cfg->vui.sar_width;
+  encoder.vui.sar_height  = (int16_t)encoder.cfg->vui.sar_height;
+  encoder.vui.overscan    = encoder.cfg->vui.overscan;
+  encoder.vui.videoformat = encoder.cfg->vui.videoformat;
+  encoder.vui.fullrange   = encoder.cfg->vui.fullrange;
+  encoder.vui.colorprim   = encoder.cfg->vui.colorprim;
+  encoder.vui.transfer    = encoder.cfg->vui.transfer;
+  encoder.vui.colormatrix = encoder.cfg->vui.colormatrix;
+  encoder.vui.chroma_loc  = (int8_t)encoder.cfg->vui.chroma_loc;
+  // AUD
+  encoder.aud_enable = (int8_t)encoder.cfg->aud_enable;
+
+  encoder_control_input_init(&encoder, input, cfg->width, cfg->height);

  fprintf(stderr, "Input: %s, output: %s\n", cfg->input, cfg->output);
  fprintf(stderr, "  Video size: %dx%d (input=%dx%d)\n",
-         encoder->in.cur_pic->width, encoder->in.cur_pic->height,
-         encoder->in.real_width, encoder->in.real_height);
+         encoder.in.width, encoder.in.height,
+         encoder.in.real_width, encoder.in.real_height);
+  
+  if (!encoder_state_init(&encoder_state, &encoder)) {
+    goto exit_failure;
+  }
+  
+  encoder_state.frame    = 0;
+  encoder_state.QP       = (int8_t)encoder.cfg->qp;

  // Only the code that handles conformance window coding needs to know
  // the real dimensions. As a quick fix for broken non-multiple of 8 videos,
  // change the input values here to be the real values. For a real fix
  // encoder.in probably needs to be merged into cfg.
  // The real fix would be: never go dig in cfg
-  //cfg->width = encoder->in.width;
-  //cfg->height = encoder->in.height;
+  //cfg->width = encoder.in.width;
+  //cfg->height = encoder.in.height;

  // Start coding cycle while data on input and not on the last frame
-  while(!cfg->frames || encoder->frame < cfg->frames) {
+  while(!cfg->frames || encoder_state.frame < cfg->frames) {
    int32_t diff;
    double temp_psnr[3];

    // Skip '--seek' frames before input.
    // This block can be moved outside this while loop when there is a
    // mechanism to skip the while loop on error.
-    if (encoder->frame == 0 && cfg->seek > 0) {
+    if (encoder_state.frame == 0 && cfg->seek > 0) {
      int frame_bytes = cfg->width * cfg->height * 3 / 2;
      int error = 0;

@ -275,7 +288,7 @@ int main(int argc, char *argv[])
        // Input is stdin.
        int i;
        for (i = 0; !error && i < cfg->seek; ++i) {
-          error = !read_one_frame(input, encoder);
+          error = !read_one_frame(input, &encoder_state);
        }
      } else {
        // input is a file. We hope. Proper detection is OS dependent.
@ -288,25 +301,27 @@ int main(int argc, char *argv[])
    }

    // Read one frame from the input
-    if (!read_one_frame(input, encoder)) {
+    if (!read_one_frame(input, &encoder_state)) {
      if (!feof(input))
-        fprintf(stderr, "Failed to read a frame %d\n", encoder->frame);
+        fprintf(stderr, "Failed to read a frame %d\n", encoder_state.frame);
      break;
    }

    // The actual coding happens here, after this function we have a coded frame
-    encode_one_frame(encoder);
+    encode_one_frame(&encoder_state);
+    
+    cur_pic = encoder_state.cur_pic;

    if (cfg->debug != NULL) {
      // Write reconstructed frame out.
      // Use conformance-window dimensions instead of internal ones.
-      const int width = encoder->in.cur_pic->width;
-      const int out_width = encoder->in.real_width;
-      const int out_height = encoder->in.real_height;
+      const int width = cur_pic->width;
+      const int out_width = encoder.in.real_width;
+      const int out_height = encoder.in.real_height;
      int y;
-      const pixel *y_rec = encoder->in.cur_pic->y_recdata;
-      const pixel *u_rec = encoder->in.cur_pic->u_recdata;
-      const pixel *v_rec = encoder->in.cur_pic->v_recdata;
+      const pixel *y_rec = cur_pic->y_recdata;
+      const pixel *u_rec = cur_pic->u_recdata;
+      const pixel *v_rec = cur_pic->v_recdata;

      for (y = 0; y < out_height; ++y) {
        fwrite(&y_rec[y * width], sizeof(*y_rec), out_width, recout);
@ -325,12 +340,12 @@ int main(int argc, char *argv[])
    lastpos = curpos;

    // PSNR calculations
-    temp_psnr[0] = image_psnr(encoder->in.cur_pic->y_data, encoder->in.cur_pic->y_recdata, cfg->width, cfg->height);
-    temp_psnr[1] = image_psnr(encoder->in.cur_pic->u_data, encoder->in.cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
-    temp_psnr[2] = image_psnr(encoder->in.cur_pic->v_data, encoder->in.cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
+    temp_psnr[0] = image_psnr(cur_pic->y_data, cur_pic->y_recdata, cfg->width, cfg->height);
+    temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
+    temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);

-    fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder->frame,
-           "BPI"[encoder->in.cur_pic->slicetype%3], diff<<3,
+    fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame,
+           "BPI"[cur_pic->slicetype%3], diff<<3,
           temp_psnr[0], temp_psnr[1], temp_psnr[2]);

    // Increment total PSNR
@ -342,33 +357,33 @@ int main(int argc, char *argv[])
    // TODO: add more than one reference

    // Remove the ref pic (if present)
-    if (encoder->ref->used_size == (uint32_t)encoder->cfg->ref_frames) {
-      picture_list_rem(encoder->ref, encoder->ref->used_size-1, 1);
+    if (encoder_state.ref->used_size == (uint32_t)encoder.cfg->ref_frames) {
+      picture_list_rem(encoder_state.ref, encoder_state.ref->used_size-1, 1);
    }
    // Add current picture as reference
-    picture_list_add(encoder->ref, encoder->in.cur_pic);
+    picture_list_add(encoder_state.ref, cur_pic);
    // Allocate new memory to current picture
    // TODO: reuse memory from old reference
-    encoder->in.cur_pic = picture_init(encoder->in.cur_pic->width, encoder->in.cur_pic->height, encoder->in.cur_pic->width_in_lcu, encoder->in.cur_pic->height_in_lcu);
+    encoder_state.cur_pic = picture_init(encoder_state.cur_pic->width, encoder_state.cur_pic->height, encoder_state.cur_pic->width_in_lcu, encoder_state.cur_pic->height_in_lcu);

    // Copy pointer from the last cur_pic because we don't want to reallocate it
-    MOVE_POINTER(encoder->in.cur_pic->coeff_y,encoder->ref->pics[0]->coeff_y);
-    MOVE_POINTER(encoder->in.cur_pic->coeff_u,encoder->ref->pics[0]->coeff_u);
-    MOVE_POINTER(encoder->in.cur_pic->coeff_v,encoder->ref->pics[0]->coeff_v);
+    MOVE_POINTER(encoder_state.cur_pic->coeff_y,encoder_state.ref->pics[0]->coeff_y);
+    MOVE_POINTER(encoder_state.cur_pic->coeff_u,encoder_state.ref->pics[0]->coeff_u);
+    MOVE_POINTER(encoder_state.cur_pic->coeff_v,encoder_state.ref->pics[0]->coeff_v);

-    MOVE_POINTER(encoder->in.cur_pic->pred_y,encoder->ref->pics[0]->pred_y);
-    MOVE_POINTER(encoder->in.cur_pic->pred_u,encoder->ref->pics[0]->pred_u);
-    MOVE_POINTER(encoder->in.cur_pic->pred_v,encoder->ref->pics[0]->pred_v);
+    MOVE_POINTER(encoder_state.cur_pic->pred_y,encoder_state.ref->pics[0]->pred_y);
+    MOVE_POINTER(encoder_state.cur_pic->pred_u,encoder_state.ref->pics[0]->pred_u);
+    MOVE_POINTER(encoder_state.cur_pic->pred_v,encoder_state.ref->pics[0]->pred_v);

-    encoder->frame++;
-    encoder->poc++;
+    encoder_state.frame++;
+    encoder_state.poc++;
  }
  // Coding finished
  fgetpos(output,(fpos_t*)&curpos);

  // Print statistics of the coding
-  fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, (long long unsigned int) curpos<<3,
-         psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame);
+  fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, (long long unsigned int) curpos<<3,
+         psnr[0] / encoder_state.frame, psnr[1] / encoder_state.frame, psnr[2] / encoder_state.frame);
  fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC);

  fclose(input);
@ -377,12 +392,9 @@ int main(int argc, char *argv[])

  // Deallocating
  config_destroy(cfg);
-  scalinglist_destroy(&encoder->scaling_list);
-  picture_list_destroy(encoder->ref);
-  picture_destroy(encoder->in.cur_pic);
-  FREE_POINTER(encoder->in.cur_pic);
-  bitstream_finalize(&encoder->stream);
-  free(encoder);
+  encoder_state_finalize(&encoder_state);
+  encoder_control_finalize(&encoder);
+
  free_exp_golomb();

  return EXIT_SUCCESS;
--- a/src/encoder.c
+++ b/src/encoder.c
--- a/src/encoder.h
+++ b/src/encoder.h
@ -44,33 +44,33 @@ typedef struct

 enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 };

-/* Input info struct */
-typedef struct
-{
-  FILE *file;
-  int32_t real_width;  /*!< \brief real input picture width */
-  int32_t real_height; /*!< \brief real input picture width */
-  picture *cur_pic;
-  int8_t video_format;
-  int8_t bitdepth;  /*!< \brief input bit depth (8,10) */
-} encoder_input;
-
 /* Encoder control options, the main struct */
 typedef struct
 {
-  int32_t frame;
-  int32_t poc; /*!< \brief picture order count */
+  /* Configuration */
  const config *cfg;
-  encoder_input in;
+  
+  /* Input */
+  struct {
+    FILE *file;
+    int32_t width;
+    int32_t height;
+    int32_t width_in_lcu;
+    int32_t height_in_lcu;
+    int32_t real_width;  /*!< \brief real input picture width */
+    int32_t real_height; /*!< \brief real input picture width */
+    int8_t video_format;
+    int8_t bitdepth;  /*!< \brief input bit depth (8,10) */
+  } in;
+  
+  /* Output */
+  struct {
+    FILE *file;
+  } out;
+  
  encoder_me me;
-  bitstream stream;
-  FILE *output;
-  picture_list *ref;
-  int8_t ref_list;
-  int8_t ref_idx_num[2];
-  int8_t QP;             // \brief Quantization parameter
+  
  int8_t bitdepth;
-  double cur_lambda_cost;

  /* Filtering */
  int8_t deblock_enable; // \brief Flag to enable deblocking filter
@ -101,20 +101,48 @@ typedef struct
  scaling_list scaling_list;
 } encoder_control;

-void init_lambda(encoder_control *encoder);
-encoder_control *init_encoder_control(config *cfg);
-void init_encoder_input(encoder_input *input, FILE* inputfile,
-                        int32_t width, int32_t height);
-void encode_one_frame(encoder_control *encoder);
-int read_one_frame(FILE *file, const encoder_control * const encoder);
+typedef struct encoder_state {
+  const encoder_control *encoder_control;
  
-void encode_seq_parameter_set(encoder_control * const encoder);
-void encode_pic_parameter_set(encoder_control * const encoder);
-void encode_vid_parameter_set(encoder_control * const encoder);
-void encode_slice_header(encoder_control * const encoder);
-void encode_access_unit_delimiter(encoder_control * const encoder);
-void encode_prefix_sei_version(encoder_control * const encoder);
-void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac, uint16_t x_ctb,
+  picture *cur_pic;
+  int32_t frame;
+  int32_t poc; /*!< \brief picture order count */
+  
+  bitstream stream;
+  
+  picture_list *ref;
+  int8_t ref_list;
+  int8_t ref_idx_num[2];
+  int8_t QP;             // \brief Quantization parameter
+  
+  double cur_lambda_cost;
+  
+  cabac_data cabac;
+  
+  struct encoder_state *children;
+} encoder_state;
+
+int encoder_control_init(encoder_control *encoder, const config *cfg);
+int encoder_control_finalize(encoder_control *encoder);
+
+void encoder_control_input_init(encoder_control *encoder, FILE *inputfile, int32_t width, int32_t height);
+
+int encoder_state_init(encoder_state *encoder_state, const encoder_control * encoder);
+int encoder_state_finalize(encoder_state *encoder_state);
+void encoder_state_init_lambda(encoder_state *encoder_state);
+
+void init_encoder_input(encoder_control *encoder, FILE* inputfile,
+                        int32_t width, int32_t height);
+void encode_one_frame(encoder_state *encoder_state);
+int read_one_frame(FILE* file, const encoder_state *encoder);
+
+void encode_seq_parameter_set(encoder_state *encoder);
+void encode_pic_parameter_set(encoder_state *encoder);
+void encode_vid_parameter_set(encoder_state *encoder);
+void encode_slice_header(encoder_state * encoder);
+void encode_access_unit_delimiter(encoder_state *encoder);
+void encode_prefix_sei_version(encoder_state *encoder);
+void encode_coding_tree(encoder_state *encoder, cabac_data *cabac, uint16_t x_ctb,
                        uint16_t y_ctb, uint8_t depth);

 void encode_last_significant_xy(cabac_data *cabac,
@ -123,8 +151,8 @@ void encode_last_significant_xy(cabac_data *cabac,
                                uint8_t type, uint8_t scan);
 void encode_coeff_nxn(const encoder_control * const encoder, cabac_data *cabac, int16_t *coeff, uint8_t width,
                      uint8_t type, int8_t scan_mode, int8_t tr_skip);
-void encode_transform_tree(const encoder_control * const encoder, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu );
-void encode_transform_coeff(const encoder_control * const encoder, cabac_data *cabac, int32_t x_cu, int32_t y_cu,
+void encode_transform_tree(encoder_state *encoder_state, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu );
+void encode_transform_coeff(encoder_state *encoder_state, cabac_data *cabac, int32_t x_cu, int32_t y_cu,
                            int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v);
 void encode_block_residual(const encoder_control * const encoder,
                           uint16_t x_ctb, uint16_t y_ctb, uint8_t depth);
--- a/src/filter.c
+++ b/src/filter.c
@ -163,11 +163,13 @@ INLINE void filter_deblock_chroma(const encoder_control * const encoder, pixel *
 /**
 * \brief
 */
-void filter_deblock_edge_luma(const encoder_control * const encoder,
+void filter_deblock_edge_luma(encoder_state * const encoder_state,
                              int32_t xpos, int32_t ypos,
                              int8_t depth, int8_t dir)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  
  cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)];

  {
@ -192,7 +194,7 @@ void filter_deblock_edge_luma(const encoder_control * const encoder,
    int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
    int8_t strength = 0;

-    int32_t qp              = encoder->QP;
+    int32_t qp              = encoder_state->QP;
    int32_t bitdepth_scale  = 1 << (encoder->bitdepth - 8);
    int32_t b_index         = CLIP(0, 51, qp + (beta_offset_div2 << 1));
    int32_t beta            = g_beta_table_8x8[b_index] * bitdepth_scale;
@ -288,11 +290,12 @@ void filter_deblock_edge_luma(const encoder_control * const encoder,
 /**
 * \brief
 */
-void filter_deblock_edge_chroma(const encoder_control * const encoder,
+void filter_deblock_edge_chroma(encoder_state * const encoder_state,
                                int32_t x, int32_t y,
                                int8_t depth, int8_t dir)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  const picture * const cur_pic = encoder_state->cur_pic;
  cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)];

  // Chroma edges that do not lay on a 8x8 grid are not deblocked.
@ -324,7 +327,7 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder,
    int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
    int8_t strength = 2;

-    int32_t QP             = g_chroma_scale[encoder->QP];
+    int32_t QP             = g_chroma_scale[encoder_state->QP];
    int32_t bitdepth_scale = 1 << (encoder->bitdepth-8);
    int32_t TC_index       = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
    int32_t Tc             = g_tc_table_8x8[TC_index]*bitdepth_scale;
@ -384,9 +387,9 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder,
 * until the coded block size has been achived. Calls luma and chroma filtering
 * functions for each coded CU size.
 */
-void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int32_t edge)
+void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
  cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x + y*(cur_pic->width_in_lcu << MAX_DEPTH)];
  uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
  uint8_t border_x = (cur_pic->width  < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
@ -404,15 +407,15 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
    // Tell clang-analyzer that everything is ok.
    assert(depth >= 0 && depth < MAX_DEPTH);

-    filter_deblock_cu(encoder, x, y, depth + 1, edge);
+    filter_deblock_cu(encoder_state, x, y, depth + 1, edge);
    if(!border_x || border_split_x) {
-      filter_deblock_cu(encoder, x + change, y, depth + 1, edge);
+      filter_deblock_cu(encoder_state, x + change, y, depth + 1, edge);
    }
    if(!border_y || border_split_y) {
-      filter_deblock_cu(encoder, x , y + change, depth + 1, edge);
+      filter_deblock_cu(encoder_state, x , y + change, depth + 1, edge);
    }
    if((!border_x && !border_y) || (border_split_x && border_split_y)) {
-      filter_deblock_cu(encoder, x + change, y + change, depth + 1, edge);
+      filter_deblock_cu(encoder_state, x + change, y + change, depth + 1, edge);
    }
    return;
  }
@ -421,8 +424,8 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
  if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return;

  // do the filtering for block edge
-  filter_deblock_edge_luma(encoder,   x*(LCU_WIDTH >> MAX_DEPTH),       y*(LCU_WIDTH >> MAX_DEPTH),       depth, edge);
-  filter_deblock_edge_chroma(encoder, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
+  filter_deblock_edge_luma(encoder_state,   x*(LCU_WIDTH >> MAX_DEPTH),       y*(LCU_WIDTH >> MAX_DEPTH),       depth, edge);
+  filter_deblock_edge_chroma(encoder_state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
 }

 /**
@ -433,9 +436,9 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
 * the Largest Coding Units (LCU) and call filter_deblock_cu with absolute
 * X and Y coordinates of the LCU.
 */
-void filter_deblock(const encoder_control * const encoder)
+void filter_deblock(encoder_state * const encoder_state)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
  int16_t x, y;

  // TODO: Optimization: add thread for each LCU
@ -444,7 +447,7 @@ void filter_deblock(const encoder_control * const encoder)
  {
    for (x = 0; x < cur_pic->width_in_lcu; x++)
    {
-      filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER);
+      filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER);
    }
  }

@ -453,7 +456,7 @@ void filter_deblock(const encoder_control * const encoder)
  {
    for (x = 0; x < cur_pic->width_in_lcu; x++)
    {
-      filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR);
+      filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR);
    }
  }
 }
@ -469,11 +472,11 @@ void filter_deblock(const encoder_control * const encoder)
 * - After vertical filtering the left edge, filter the last 4 pixels of
 *   horizontal edges in the LCU to the left.
 */
-void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px)
+void filter_deblock_lcu(encoder_state * const encoder_state, int x_px, int y_px)
 {
  const vector2d lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH };

-  filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
+  filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);

  // Filter rightmost 4 pixels from last LCU now that they have been
  // finally deblocked vertically.
@ -481,15 +484,15 @@ void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_p
    int y;
    for (y = 0; y < 64; y += 8) {
      if (lcu.y + y == 0) continue;
-      filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
+      filter_deblock_edge_luma(encoder_state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
    }
    for (y = 0; y < 32; y += 8) {
      if (lcu.y + y == 0) continue;
-      filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
+      filter_deblock_edge_chroma(encoder_state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
    }
  }

-  filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
+  filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
 }


--- a/src/filter.h
+++ b/src/filter.h
@ -32,16 +32,16 @@
 //////////////////////////////////////////////////////////////////////////
 // FUNCTIONS
 // Deblocking
-void filter_deblock_cu(const encoder_control * const encoder, int32_t x_px, int32_t y_px,
+void filter_deblock_cu(encoder_state *encoder_state, int32_t x_px, int32_t y_px,
                       int8_t depth, int32_t edge);
-void filter_deblock_edge_luma(const encoder_control * const encoder,
+void filter_deblock_edge_luma(encoder_state *encoder_state,
                              int32_t x_pos, int32_t y_pos,
                              int8_t depth, int8_t dir);
-void filter_deblock_edge_chroma(const encoder_control * const encoder,
+void filter_deblock_edge_chroma(encoder_state *encoder_state,
                                int32_t xpos, int32_t ypos,
                                int8_t depth, int8_t dir);
-void filter_deblock(const encoder_control * const encoder);
-void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px);
+void filter_deblock(encoder_state *encoder_state);
+void filter_deblock_lcu(encoder_state *encoder_state, int x_px, int y_px);
 void filter_deblock_luma(const encoder_control * const encoder, pixel *src, int32_t offset, int32_t tc , int8_t sw,
                         int8_t part_p_nofilter, int8_t part_q_nofilter,
                         int32_t thr_cut,
--- a/src/inter.c
+++ b/src/inter.c
@ -322,7 +322,7 @@ void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_i
 * \param depth current block depth
 * \param mv_pred[2][2] 2x motion vector prediction
 */
-void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu)
+void inter_get_mv_cand(const encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu)
 {
  uint8_t candidates = 0;
  uint8_t b_candidates = 0;
@ -332,8 +332,8 @@ void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t
  inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu);

 #define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6)
-#define APPLY_MV_SCALING(cu, cand) {int td = encoder->poc - encoder->ref->pics[(cu)->inter.mv_ref]->poc;\
-                                   int tb = encoder->poc - encoder->ref->pics[cur_cu->inter.mv_ref]->poc;\
+#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->poc - encoder_state->ref->pics[(cu)->inter.mv_ref]->poc;\
+                                   int tb = encoder_state->poc - encoder_state->ref->pics[cur_cu->inter.mv_ref]->poc;\
                                   if (td != tb) { \
                                      int scale = CALCULATE_SCALE(cu,tb,td); \
                                       mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \
--- a/src/inter.h
+++ b/src/inter.h
@ -35,6 +35,6 @@ void inter_recon_lcu(const encoder_control *encoder, picture* ref,int32_t xpos,

 void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_info **b0, cu_info **b1,
                                        cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
-void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
+void inter_get_mv_cand(const encoder_state *encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
 uint8_t inter_get_merge_cand(int32_t x, int32_t y, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][3], lcu_t *lcu);
 #endif
--- a/src/intra.c
+++ b/src/intra.c
@ -327,7 +327,7 @@ static void intra_get_pred(const encoder_control * const encoder, pixel *rec[2],
 * \param sad_out sad value of best mode
 * \returns best intra mode
 */
-int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
+int16_t intra_prediction(const encoder_state * const encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
                         uint8_t width, uint32_t *sad_out,
                         int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac)
 {
@ -336,6 +336,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
  int16_t best_mode = 1;
  uint32_t best_bitcost = 0;
  int16_t mode;
+  int8_t rdo = encoder_state->encoder_control->rdo;

  // Check 8 modes for 4x4 and 8x8, 3 for others
  int8_t   rdo_modes_to_check = (width == 4 || width == 8)? 8 : 3;
@ -371,12 +372,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
  // Try all modes and select the best one.
  for (mode = 0; mode < 35; mode++) {
    uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds);
-    intra_get_pred(encoder, ref, recstride, pred, width, mode, 0);
+    intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0);

    sad = cost_func(pred, orig_block);
-    sad += mode_cost * (int)(encoder->cur_lambda_cost + 0.5);
+    sad += mode_cost * (int)(encoder_state->cur_lambda_cost + 0.5);
    // When rdo == 2, store best costs to an array and do full RDO later
-    if(encoder->rdo == 2) {
+    if(rdo == 2) {
      int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad);
      if(rdo_mode != -1) {
        rdo_modes[rdo_mode] = mode; rdo_costs[rdo_mode] = sad;
@ -390,7 +391,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
  }

  // Select from three best modes if using RDO
-  if(encoder->rdo == 2) {
+  if(rdo == 2) {
    int rdo_mode;
    int pred_mode;
    // Check that the predicted modes are in the RDO mode list
@ -413,12 +414,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
    for(rdo_mode = 0; rdo_mode < rdo_modes_to_check; rdo_mode ++) {
      int rdo_bitcost;
      // The reconstruction is calculated again here, it could be saved from before..
-      intra_recon(encoder, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0);
-      rdo_costs[rdo_mode] = rdo_cost_intra(encoder,pred,orig_block,width,cabac,rdo_modes[rdo_mode]);
+      intra_recon(encoder_state->encoder_control, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0);
+      rdo_costs[rdo_mode] = rdo_cost_intra(encoder_state,pred,orig_block,width,cabac,rdo_modes[rdo_mode]);
      // Bitcost also calculated again for this mode
      rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds);
      // Add bitcost * lambda
-      rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder->cur_lambda_cost + 0.5);
+      rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->cur_lambda_cost + 0.5);

      if(rdo_costs[rdo_mode] < best_sad) {
        best_sad = rdo_costs[rdo_mode];
@ -832,8 +833,9 @@ void intra_get_planar_pred(pixel* src, int32_t srcstride, uint32_t width, pixel*
  }
 }

-void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
+void intra_recon_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
  int x_local = (x&0x3f), y_local = (y&0x3f);
  cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];

@ -890,5 +892,5 @@ void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, i
                            rec_stride, width, width);
  }

-  encode_transform_tree(encoder, cabac, x, y, depth, lcu);
+  encode_transform_tree(encoder_state, cabac, x, y, depth, lcu);
 }
--- a/src/intra.h
+++ b/src/intra.h
@ -39,7 +39,7 @@ void intra_build_reference_border(const encoder_control *encoder, int32_t x_luma
 void intra_filter(pixel* ref, int32_t stride, int32_t width, int8_t mode);

 /* Predictions */
-int16_t intra_prediction(const encoder_control *encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
+int16_t intra_prediction(const encoder_state *encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
                         uint8_t width, uint32_t *sad_out,
                         int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac);

@ -49,6 +49,6 @@ void intra_get_angular_pred(const encoder_control *encoder, pixel* src, int32_t

 void intra_recon(const encoder_control *encoder, pixel* rec, int32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);

-void intra_recon_lcu(const encoder_control *encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
+void intra_recon_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);

 #endif
--- a/src/rdo.c
+++ b/src/rdo.c
@ -63,8 +63,9 @@ const uint32_t entropy_bits[128] =

 ** Only for luma
 */
-uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode)
+uint32_t rdo_cost_intra(const encoder_state * const encoder_state, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode)
 {
+    const encoder_control * const encoder = encoder_state->encoder_control;
    coefficient pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2];
    int16_t block[LCU_WIDTH*LCU_WIDTH>>2];
    int16_t temp_block[LCU_WIDTH*LCU_WIDTH>>2];
@ -92,11 +93,11 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe
    }
    transform2d(encoder, block,pre_quant_coeff,width,0);
    if(encoder->rdoq_enable) {
-      rdoq(encoder, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0);
+      rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0);
    } else {
-      quant(encoder, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA);
+      quant(encoder_state, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA);
    }
-    dequant(encoder, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA);
+    dequant(encoder_state, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA);
    itransform2d(encoder, temp_block,pre_quant_coeff,width,0);

    // SSD between original and reconstructed
@ -111,12 +112,12 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe
      for (i = 0; i < width*width; i++) {
        coeffcost += abs((int)temp_coeff[i]);
      }
-      cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder->cur_lambda_cost+0.5);
+      cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5);
      // Full RDO
    } else if(encoder->rdo == 2) {
      coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, width, 0, luma_scan_mode);

-      cost  += coeffcost*((int)encoder->cur_lambda_cost+0.5);
+      cost  += coeffcost*((int)encoder_state->cur_lambda_cost+0.5);
    }
    return cost;
 }
@ -284,7 +285,7 @@ int32_t get_ic_rate( cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one
 * This method calculates the best quantized transform level for a given scan position.
 * From HM 12.0
 */
-uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
+uint32_t get_coded_level ( const encoder_state * const encoder_state, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
                           int32_t level_double, uint32_t max_abs_level,
                           uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                           uint16_t abs_go_rice,
@ -298,7 +299,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
  cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma);

  if( !last && max_abs_level < 3 ) {
-    *coded_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
+    *coded_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
    *coded_cost     = *coded_cost0 + *coded_cost_sig;
    if (max_abs_level == 0) return best_abs_level;
  } else {
@ -306,13 +307,13 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
  }

  if( !last ) {
-    cur_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
+    cur_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
  }

  min_abs_level    = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
  for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
    double err       = (double)(level_double - ( abs_level << q_bits ) );
-    double cur_cost  = err * err * temp + encoder->cur_lambda_cost *
+    double cur_cost  = err * err * temp + encoder_state->cur_lambda_cost *
                       get_ic_rate_cost( cabac, abs_level, ctx_num_one, ctx_num_abs,
                                         abs_go_rice, c1_idx, c2_idx, type);
    cur_cost        += cur_cost_sig;
@ -336,7 +337,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
 *
 * From HM 12.0
 */
-static double get_rate_last(const encoder_control * const encoder,
+static double get_rate_last(const encoder_state * const encoder_state,
                            const uint32_t  pos_x, const uint32_t pos_y,
                            int32_t* last_x_bits, int32_t* last_y_bits)
 {
@ -349,7 +350,7 @@ static double get_rate_last(const encoder_control * const encoder,
  if( ctx_y > 3 ) {
    uiCost += 32768.0 * ((ctx_y-2)>>1);
  }
-  return encoder->cur_lambda_cost*uiCost;
+  return encoder_state->cur_lambda_cost*uiCost;
 }

 static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int8_t type,
@ -388,9 +389,10 @@ static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int
 * coding engines using probability models like CABAC
 * From HM 12.0
 */
-void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
+void  rdoq(const encoder_state * const encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
           int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
  uint32_t log2_tr_size    = g_convert_to_bit[ width ] + 2;
  int32_t  transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size;  // Represents scaling through forward transform
  uint16_t go_rice_param   = 0;
@ -398,7 +400,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
  uint32_t max_num_coeff   = width * height;
  int32_t  scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);

-  int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);

  {
  int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
@ -511,7 +513,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
        uint16_t  abs_ctx = ctx_set + c2;

        if( scanpos == last_scanpos ) {
-          level            = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
+          level            = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
                                               level_double, max_abs_level, 0, one_ctx, abs_ctx, go_rice_param,
                                               c1_idx, c2_idx, q_bits, temp, 1, type );
        } else {
@ -519,7 +521,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
          uint32_t  pos_x    = blkpos - ( pos_y << log2_block_size );
          uint16_t  ctx_sig  = (uint16_t)context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y,
                                                       log2_block_size, type);
-          level              = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
+          level              = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
                                               level_double, max_abs_level, ctx_sig, one_ctx, abs_ctx, go_rice_param,
                                               c1_idx, c2_idx, q_bits, temp, 0, type );
          sig_rate_delta[ blkpos ] = CTX_ENTROPY_BITS(&baseCtx[ctx_sig],1) - CTX_ENTROPY_BITS(&baseCtx[ctx_sig],0);
@ -587,7 +589,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
        if (sig_coeffgroup_flag[ cg_blkpos ] == 0) {
          uint32_t ctx_sig  = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
                                                          cg_pos_y, width);
-          cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+          cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
          base_cost += cost_coeffgroup_sig[ cg_scanpos ]  - rd_stats.sig_cost;
        } else {
          if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below.
@ -604,9 +606,9 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
            ctx_sig  = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
                                                            cg_pos_y, width);
            if (cg_scanpos < cg_last_scanpos) {
-              cost_coeffgroup_sig[cg_scanpos] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
+              cost_coeffgroup_sig[cg_scanpos] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
              base_cost    += cost_coeffgroup_sig[cg_scanpos];
-              cost_zero_cg += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+              cost_zero_cg += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
            }

            // try to convert the current coeff group from non-zero to all-zero
@ -620,7 +622,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
              sig_coeffgroup_flag[ cg_blkpos ] = 0;
              base_cost = cost_zero_cg;
              if (cg_scanpos < cg_last_scanpos) {
-                cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+                cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
              }
              // reset coeffs to 0 in this block
              for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) {
@ -648,13 +650,13 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient


  if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
-    best_cost  = block_uncoded_cost +   encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
-    base_cost +=   encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
+    best_cost  = block_uncoded_cost +   encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
+    base_cost +=   encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
  } else {
    cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma);
    ctx_cbf   = ( type ? tr_depth : !tr_depth);
-    best_cost  = block_uncoded_cost +  encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
-    base_cost +=   encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
+    best_cost  = block_uncoded_cost +  encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
+    base_cost +=   encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
  }

  for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
@ -672,7 +674,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
          uint32_t   pos_y       = blkpos >> log2_block_size;
          uint32_t   pos_x       = blkpos - ( pos_y << log2_block_size );

-          double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder, pos_x, pos_y, last_x_bits,last_y_bits );
+          double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder_state, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder_state, pos_x, pos_y, last_x_bits,last_y_bits );
          double totalCost = base_cost + cost_last - cost_sig[ scanpos ];

          if( totalCost < best_cost ) {
@ -708,7 +710,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
  if(*abs_sum >= 2) {
    int64_t rd_factor = (int64_t) (
                     g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6)))
-                   /  encoder->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
+                   /  encoder_state->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
                   + 0.5);
    int32_t lastCG = -1;
    int32_t absSum = 0;
--- a/src/rdo.h
+++ b/src/rdo.h
@ -42,10 +42,10 @@ extern const uint32_t g_go_rice_range[5];
 extern const uint32_t g_go_rice_prefix_len[5];


-void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
+void  rdoq(const encoder_state *encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
           int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);

-uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode);
+uint32_t rdo_cost_intra(const encoder_state *encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode);

 int32_t get_coeff_cost(const encoder_control * const encoder, cabac_data *cabac, coefficient *coeff, int32_t width, int32_t type, int8_t scan_mode);

@ -53,7 +53,7 @@ int32_t get_ic_rate(cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one,
                     uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
 double get_ic_rate_cost  (cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                          uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
-uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
+uint32_t get_coded_level ( const encoder_state * encoder_state, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
                           int32_t level_double, uint32_t max_abs_level,
                           uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                           uint16_t abs_go_rice,
--- a/src/sao.c
+++ b/src/sao.c
@ -54,12 +54,12 @@ static int sao_calc_eo_cat(pixel a, pixel b, pixel c)
 }


-int sao_band_ddistortion(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data,
+int sao_band_ddistortion(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data,
                         int block_width, int block_height,
                         int band_pos, int sao_bands[4])
 {
  int y, x;
-  int shift = encoder->bitdepth-5;
+  int shift = encoder_state->encoder_control->bitdepth-5;
  int sum = 0;

  for (y = 0; y < block_height; ++y) {
@ -345,12 +345,12 @@ static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4],
 * \param rec_data  Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
 * \param sao_bands an array of bands for original and reconstructed block
 */
-static void calc_sao_bands(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data,
+static void calc_sao_bands(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data,
                           int block_width, int block_height,
                           int sao_bands[2][32])
 {
  int y, x;
-  int shift = encoder->bitdepth-5;
+  int shift = encoder_state->encoder_control->bitdepth-5;

  //Loop pixels and take top 5 bits to classify different bands
  for (y = 0; y < block_height; ++y) {
@ -608,7 +608,7 @@ void sao_reconstruct(const encoder_control * const encoder, picture * pic, const



-static void sao_search_edge_sao(const encoder_control * const encoder, 
+static void sao_search_edge_sao(const encoder_state * const encoder_state, 
                                const pixel * data[], const pixel * recdata[],
                                int block_width, int block_height,
                                unsigned buf_cnt,
@ -669,7 +669,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder,

    {
      int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left);
-      sum_ddistortion += (int)((double)mode_bits*(encoder->cur_lambda_cost+0.5));
+      sum_ddistortion += (int)((double)mode_bits*(encoder_state->cur_lambda_cost+0.5));
    }
    // SAO is not applied for category 0.
    edge_offset[SAO_EO_CAT0] = 0;
@ -684,7 +684,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder,
 }


-static void sao_search_band_sao(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[],
+static void sao_search_band_sao(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[],
                               int block_width, int block_height,
                               unsigned buf_cnt,
                               sao_info *sao_out, sao_info *sao_top,
@ -704,14 +704,14 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix

    memset(sao_bands, 0, 2 * 32 * sizeof(int));
    for (i = 0; i < buf_cnt; ++i) {
-      calc_sao_bands(encoder, data[i], recdata[i],block_width,
+      calc_sao_bands(encoder_state, data[i], recdata[i],block_width,
                     block_height,sao_bands);
    }

    ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position);

    temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left);
-    ddistortion += (int)((double)temp_rate*(encoder->cur_lambda_cost+0.5));
+    ddistortion += (int)((double)temp_rate*(encoder_state->cur_lambda_cost+0.5));

    // Select band sao over edge sao when distortion is lower
    if (ddistortion < sao_out->ddistortion) {
@ -731,7 +731,7 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix
 * \param buf_cnt  Number of pointers data and recdata have.
 * \param sao_out  Output parameter for the best sao parameters.
 */
-static void sao_search_best_mode(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[],
+static void sao_search_best_mode(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[],
                                 int block_width, int block_height,
                                 unsigned buf_cnt,
                                 sao_info *sao_out, sao_info *sao_top,
@ -740,12 +740,12 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
  sao_info edge_sao;
  sao_info band_sao;

-  sao_search_edge_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
-  sao_search_band_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);
+  sao_search_edge_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
+  sao_search_band_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);

  {
    int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left);
-    int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5);
+    int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
    unsigned buf_i;
    
    for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
@ -759,11 +759,11 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi

  {
    int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left);
-    int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5);
+    int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
    unsigned buf_i;
    
    for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
-      ddistortion += sao_band_ddistortion(encoder, data[buf_i], recdata[buf_i], 
+      ddistortion += sao_band_ddistortion(encoder_state, data[buf_i], recdata[buf_i], 
                                          block_width, block_height, 
                                          band_sao.band_position, &band_sao.offsets[1]);
    }
@ -780,7 +780,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
  // Choose between SAO and doing nothing, taking into account the
  // rate-distortion cost of coding do nothing.
  {
-    int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder->cur_lambda_cost + 0.5);
+    int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->cur_lambda_cost + 0.5);
    if (sao_out->ddistortion >= cost_of_nothing) {
      sao_out->type = SAO_TYPE_NONE;
    }
@ -794,7 +794,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
  return;
 }

- void sao_search_chroma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
+ void sao_search_chroma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
 {
  int block_width  = (LCU_WIDTH / 2);
  int block_height = (LCU_WIDTH / 2);
@ -827,10 +827,10 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
  }

  // Calculate
-  sao_search_best_mode(encoder, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left);
+  sao_search_best_mode(encoder_state, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left);
 }

-void sao_search_luma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
+void sao_search_luma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
 {
  pixel orig[LCU_LUMA_SIZE];
  pixel rec[LCU_LUMA_SIZE];
@ -857,13 +857,13 @@ void sao_search_luma(const encoder_control * const encoder, const picture *pic,

  orig_list[0] = orig;
  rec_list[0] = rec;
-  sao_search_best_mode(encoder, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left);
+  sao_search_best_mode(encoder_state, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left);
 }

-void sao_reconstruct_frame(const encoder_control * const encoder)
+void sao_reconstruct_frame(encoder_state * const encoder_state)
 {
  vector2d lcu;
-  picture * const cur_pic = encoder->in.cur_pic;
+  picture * const cur_pic = encoder_state->cur_pic;

  // These are needed because SAO needs the pre-SAO pixels form left and
  // top LCUs. Single pixel wide buffers, like what search_lcu takes, would
@ -882,9 +882,9 @@ void sao_reconstruct_frame(const encoder_control * const encoder)
      sao_info *sao_chroma = &cur_pic->sao_chroma[lcu.y * stride + lcu.x];

      // sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma);
-      sao_reconstruct(encoder, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
-      sao_reconstruct(encoder, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
-      sao_reconstruct(encoder, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
+      sao_reconstruct(encoder_state->encoder_control, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
+      sao_reconstruct(encoder_state->encoder_control, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
+      sao_reconstruct(encoder_state->encoder_control, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
    }
  }

--- a/src/sao.h
+++ b/src/sao.h
@ -46,11 +46,11 @@ typedef struct sao_info_struct {


 void init_sao_info(sao_info *sao);
-void sao_search_chroma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
-void sao_search_luma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
+void sao_search_chroma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
+void sao_search_luma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
 void sao_reconstruct(const encoder_control * encoder, picture *pic, const pixel *old_rec,
                     unsigned x_ctb, unsigned y_ctb,
                     const sao_info *sao, color_index color_i);
-void sao_reconstruct_frame(const encoder_control * const encoder);
+void sao_reconstruct_frame(encoder_state *encoder_state);

 #endif
--- a/src/search.c
+++ b/src/search.c
@ -117,7 +117,7 @@ static uint32_t get_mvd_coding_cost(vector2d *mvd)
  return bitcost;
 }

-static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
+static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y,
                         int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
                         int16_t num_cand,int32_t ref_idx, uint32_t *bitcost)
 {
@ -159,7 +159,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
    temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
  }
  *bitcost = temp_bitcost;
-  return temp_bitcost*(int32_t)(encoder->cur_lambda_cost+0.5);
+  return temp_bitcost*(int32_t)(encoder_state->cur_lambda_cost+0.5);
 }


@ -183,7 +183,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
 * the predicted motion vector is way off. In the future even more additional
 * points like 0,0 might be used, such as vectors from top or left.
 */
-static unsigned hexagon_search(const encoder_control * const encoder, unsigned depth,
+static unsigned hexagon_search(const encoder_state * const encoder_state, unsigned depth,
                               const picture *pic, const picture *ref,
                               const vector2d *orig, vector2d *mv_in_out,
                               int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
@ -203,7 +203,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
    unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
                             orig->x + mv.x + pattern->x, orig->y + mv.y + pattern->y,
                             block_width, block_width);
-    cost += calc_mvd_cost(encoder, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+    cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);

    if (cost < best_cost) {
      best_cost    = cost;
@ -217,7 +217,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
    unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
                             orig->x, orig->y,
                             block_width, block_width);
-    cost += calc_mvd_cost(encoder, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+    cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);

    // If the 0,0 is better, redo the hexagon around that point.
    if (cost < best_cost) {
@ -233,7 +233,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
                                 orig->x + pattern->x,
                                 orig->y + pattern->y,
                                 block_width, block_width);
-        cost += calc_mvd_cost(encoder, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+        cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);

        if (cost < best_cost) {
          best_cost    = cost;
@ -268,7 +268,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
                               orig->x + mv.x + offset->x,
                               orig->y + mv.y + offset->y,
                               block_width, block_width);
-      cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+      cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);

      if (cost < best_cost) {
        best_cost    = cost;
@ -291,7 +291,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
                             orig->x + mv.x + offset->x,
                             orig->y + mv.y + offset->y,
                             block_width, block_width);
-    cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+    cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);

    if (cost > 0 && cost < best_cost) {
      best_cost    = cost;
@ -370,9 +370,9 @@ static unsigned search_mv_full(unsigned depth,
 * Update lcu to have best modes at this depth.
 * \return Cost of best mode.
 */
-static int search_cu_inter(const encoder_control * const encoder, int x, int y, int depth, lcu_t *lcu)
+static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
  uint32_t ref_idx = 0;
  int x_local = (x&0x3f), y_local = (y&0x3f);
  int x_cu = x>>3;
@ -392,8 +392,8 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y,

  cur_cu->inter.cost = UINT_MAX;

-  for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) {
-    picture *ref_pic = encoder->ref->pics[ref_idx];
+  for (ref_idx = 0; ref_idx < encoder_state->ref->used_size; ref_idx++) {
+    picture *ref_pic = encoder_state->ref->pics[ref_idx];
    unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
    cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu];
    uint32_t temp_bitcost = 0;
@ -413,13 +413,13 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y,
    }
    // Get MV candidates
    cur_cu->inter.mv_ref = ref_idx;
-    inter_get_mv_cand(encoder, x, y, depth, mv_cand, cur_cu, lcu);
+    inter_get_mv_cand(encoder_state, x, y, depth, mv_cand, cur_cu, lcu);
    cur_cu->inter.mv_ref = temp_ref_idx;

 #if SEARCH_MV_FULL_RADIUS
    temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
 #else
-    temp_cost += hexagon_search(encoder, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
+    temp_cost += hexagon_search(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
 #endif

    merged = 0;
@ -664,11 +664,11 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu
 * Update lcu to have best modes at this depth.
 * \return Cost of best mode.
 */
-static int search_cu_intra(const encoder_control * const encoder,
+static int search_cu_intra(const encoder_state * const encoder_state,
                           const int x_px, const int y_px,
                           const int depth, lcu_t *lcu, cabac_data *cabac)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
  const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f };
  const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
  const int8_t cu_width = (LCU_WIDTH >> (depth));
@ -696,7 +696,7 @@ static int search_cu_intra(const encoder_control * const encoder,
  intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);

  // Build reconstructed block to use in prediction with extrapolated borders
-  intra_build_reference_border(encoder, x_px, y_px, cu_width * 2 + 8,
+  intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8,
                               rec_buffer, cu_width * 2 + 8, 0,
                               cur_pic->width,
                               cur_pic->height,
@ -709,7 +709,7 @@ static int search_cu_intra(const encoder_control * const encoder,
    uint32_t bitcost = UINT32_MAX;
    pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
    unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
-    mode = intra_prediction(encoder,ref_pixels, LCU_WIDTH,
+    mode = intra_prediction(encoder_state,ref_pixels, LCU_WIDTH,
                            cu_in_rec_buffer, cu_width * 2 + 8, cu_width,
                            &cost, candidate_modes, &bitcost, cabac);
    cur_cu->intra[pu_index].mode = (int8_t)mode;
@ -728,7 +728,7 @@ static int search_cu_intra(const encoder_control * const encoder,
 * coding (bitcost * lambda) and cost for coding coefficients (estimated
 * here as (coefficient_sum * 1.5) * lambda)
 */
-static int lcu_get_final_cost(const encoder_control * const encoder,
+static int lcu_get_final_cost(const encoder_state * const encoder_state,
                              cabac_data *cabac,
                              const int x_px, const int y_px,
                              const int depth, lcu_t *lcu)
@ -737,6 +737,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
  int x_local = (x_px&0x3f), y_local = (y_px&0x3f);
  int cost = 0;
  int coeff_cost = 0;
+  const int rdo = encoder_state->encoder_control->rdo;

  int width = LCU_WIDTH>>depth;
  int x,y;
@ -759,7 +760,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
    }
  }

-  if(encoder->rdo == 1) {
+  if(rdo == 1) {
    // sum of coeffs
    for (y = y_local; y < y_local+width; ++y) {
      for (x = x_local; x < x_local+width; ++x) {
@ -774,11 +775,11 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
      }
    }
    // Coefficient costs
-    cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder->cur_lambda_cost+0.5);
+    cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->cur_lambda_cost+0.5);

  // Calculate actual bit costs for coding the coeffs
  // RDO
-  } else if (encoder->rdo == 2) {
+  } else if (rdo == 2) {
    coefficient coeff_temp[32*32];
    coefficient coeff_temp_u[16*16];
    coefficient coeff_temp_v[16*16];
@ -817,7 +818,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,

      // Calculate luma coeff bit count
      picture_blit_coeffs(&lcu->coeff.y[(blk_y*LCU_WIDTH)+blk_x],coeff_temp,blockwidth,blockwidth,LCU_WIDTH,blockwidth);
-      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp, blockwidth, 0, luma_scan_mode);
+      coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp, blockwidth, 0, luma_scan_mode);

      blk_y >>= 1;
      blk_x >>= 1;
@ -832,15 +833,15 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
      picture_blit_coeffs(&lcu->coeff.u[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_u,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
      picture_blit_coeffs(&lcu->coeff.v[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_v,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);

-      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
-      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
+      coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
+      coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
    }
    // Multiply bit count with lambda to get RD-cost
-    cost += coeff_cost * (int32_t)(encoder->cur_lambda_cost+0.5);
+    cost += coeff_cost * (int32_t)(encoder_state->cur_lambda_cost+0.5);
  }

  // Bitcost
-  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder->cur_lambda_cost+0.5);
+  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->cur_lambda_cost+0.5);

  return cost;
 }
@ -855,9 +856,9 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
 * - All the final data for the LCU gets eventually copied to depth 0, which
 *   will be the final output of the recursion.
 */
-static int search_cu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
+static int search_cu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
  int cu_width = LCU_WIDTH >> depth;
  int cost = MAX_INT;
  cu_info *cur_cu;
@ -885,7 +886,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
        depth >= MIN_INTER_SEARCH_DEPTH &&
        depth <= MAX_INTER_SEARCH_DEPTH)
    {
-      int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]);
+      int mode_cost = search_cu_inter(encoder_state, x, y, depth, &work_tree[depth]);
      if (mode_cost < cost) {
        cost = mode_cost;
        cur_cu->type = CU_INTER;
@ -895,7 +896,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
    if (depth >= MIN_INTRA_SEARCH_DEPTH &&
        depth <= MAX_INTRA_SEARCH_DEPTH)
    {
-      int mode_cost = search_cu_intra(encoder, x, y, depth, &work_tree[depth], cabac);
+      int mode_cost = search_cu_intra(encoder_state, x, y, depth, &work_tree[depth], cabac);
      if (mode_cost < cost) {
        cost = mode_cost;
        cur_cu->type = CU_INTRA;
@ -906,10 +907,10 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
    // mode search of adjacent CUs.
    if (cur_cu->type == CU_INTRA) {
      lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
-      intra_recon_lcu(encoder, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
+      intra_recon_lcu(encoder_state, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
    } else if (cur_cu->type == CU_INTER) {
-      inter_recon_lcu(encoder, encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
-      encode_transform_tree(encoder, cabac, x, y, depth, &work_tree[depth]);
+      inter_recon_lcu(encoder_state->encoder_control, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
+      encode_transform_tree(encoder_state, cabac, x, y, depth, &work_tree[depth]);

      if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
        cur_cu->merged = 0;
@ -922,23 +923,23 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
    }
  }
  if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
-    cost = lcu_get_final_cost(encoder, cabac, x, y, depth, &work_tree[depth]);
+    cost = lcu_get_final_cost(encoder_state, cabac, x, y, depth, &work_tree[depth]);
  }

  // Recursively split all the way to max search depth.
  if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
    int half_cu = cu_width / 2;
-    int split_cost = (int)(4.5 * encoder->cur_lambda_cost);
+    int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost);

    // If skip mode was selected for the block, skip further search.
    // Skip mode means there's no coefficients in the block, so splitting
    // might not give any better results but takes more time to do.
    if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] ||
       cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
-      split_cost += search_cu(encoder, cabac, x,           y,           depth + 1, work_tree);
-      split_cost += search_cu(encoder, cabac, x + half_cu, y,           depth + 1, work_tree);
-      split_cost += search_cu(encoder, cabac, x,           y + half_cu, depth + 1, work_tree);
-      split_cost += search_cu(encoder, cabac, x + half_cu, y + half_cu, depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x,           y,           depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x + half_cu, y,           depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x,           y + half_cu, depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x + half_cu, y + half_cu, depth + 1, work_tree);
    } else {
      split_cost = INT_MAX;
    }
@ -963,9 +964,9 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
 * - Copy reference pixels from neighbouring LCUs.
 * - Copy reference pixels from this LCU.
 */
-static void init_lcu_t(const encoder_control * const encoder, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
+static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
  
  // Copy reference cu_info structs from neighbouring LCUs.
  {
@ -1045,7 +1046,7 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const

  // Copy LCU pixels.
  {
-    const picture * const pic = encoder->in.cur_pic;
+    const picture * const pic = encoder_state->cur_pic;
    int pic_width = cur_pic->width;
    int x_max = MIN(x + LCU_WIDTH, pic_width) - x;
    int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y;
@ -1069,13 +1070,13 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const
 /**
 * Copy CU and pixel data to it's place in picture datastructure.
 */
-static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px, int y_px, const lcu_t *lcu)
+static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x_px, int y_px, const lcu_t *lcu)
 {
  // Copy non-reference CUs to picture.
  {
    const int x_cu = x_px >> MAX_DEPTH;
    const int y_cu = y_px >> MAX_DEPTH;
-    const picture * const cur_pic = encoder->in.cur_pic;
+    const picture * const cur_pic = encoder_state->cur_pic;
    const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH;
    cu_info *const cu_array = cur_pic->cu_array[MAX_DEPTH];

@ -1095,7 +1096,7 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px,

  // Copy pixels to picture.
  {
-    picture * const pic = encoder->in.cur_pic;
+    picture * const pic = encoder_state->cur_pic;
    const int pic_width = pic->width;
    const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px;
    const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px;
@ -1123,18 +1124,18 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px,
 * Search LCU for modes.
 * - Best mode gets copied to current picture.
 */
-void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf)
+void search_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf)
 {
  lcu_t work_tree[MAX_PU_DEPTH + 1];
  int depth;
  // Initialize work tree.
  for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) {
    memset(&work_tree[depth], 0, sizeof(work_tree[depth]));
-    init_lcu_t(encoder, x, y, &work_tree[depth], hor_buf, ver_buf);
+    init_lcu_t(encoder_state, x, y, &work_tree[depth], hor_buf, ver_buf);
  }

  // Start search from depth 0.
-  search_cu(encoder, cabac, x, y, 0, work_tree);
+  search_cu(encoder_state, cabac, x, y, 0, work_tree);

-  copy_lcu_to_cu_data(encoder, x, y, &work_tree[0]);
+  copy_lcu_to_cu_data(encoder_state, x, y, &work_tree[0]);
 }
--- a/src/search.h
+++ b/src/search.h
@ -30,6 +30,6 @@
 #include "picture.h"


-void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf);
+void search_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf);

 #endif
--- a/src/transform.c
+++ b/src/transform.c
@ -623,9 +623,10 @@ void itransform2d(const encoder_control * const encoder,int16_t *block,int16_t *
 * \brief quantize transformed coefficents
 *
 */
-void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef, int32_t width,
+void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_coef, int32_t width,
           int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type )
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
  const uint32_t log2_block_size = g_convert_to_bit[ width ] + 2;
  const uint32_t * const scan = g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ];

@ -633,7 +634,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
  int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2];
  #endif

-  int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);

  //New block for variable definitions
  {
@ -645,7 +646,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef

  int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
  int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
-  int32_t add = ((encoder->in.cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
+  int32_t add = ((encoder_state->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);

  int32_t q_bits8 = q_bits - 8;
  for (n = 0; n < width * height; n++) {
@ -754,13 +755,14 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
 * \brief inverse quantize transformed and quantized coefficents
 *
 */
-void dequant(const encoder_control * const encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
+void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
  int32_t shift,add,coeff_q,clip_q_coef;
  int32_t n;
  int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2);

-  int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);

  shift = 20 - QUANT_SHIFT - transform_shift;

--- a/src/transform.h
+++ b/src/transform.h
@ -34,9 +34,9 @@ extern const uint8_t g_chroma_scale[58];
 extern const int16_t g_inv_quant_scales[6];


-void quant(const encoder_control *encoder, int16_t *coef, int16_t *q_coef, int32_t width,
+void quant(const encoder_state *encoder_state, int16_t *coef, int16_t *q_coef, int32_t width,
           int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type);
-void dequant(const encoder_control *encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);
+void dequant(const encoder_state *encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);

 void transformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
 void itransformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);