diff --git a/src/encmain.c b/src/encmain.c
index ede264bb..c8e730bc 100644
--- a/src/encmain.c
+++ b/src/encmain.c
@@ -60,7 +60,9 @@ int main(int argc, char *argv[])
   config *cfg  = NULL; //!< Global configuration
   FILE *input  = NULL; //!< input file (YUV)
   FILE *output = NULL; //!< output file (HEVC NAL stream)
-  encoder_control *encoder = NULL; //!< Encoder control struct
+  encoder_control encoder;
+  encoder_state encoder_state;
+  picture *cur_pic;
   double psnr[3] = { 0.0, 0.0, 0.0 };
   uint64_t curpos  = 0;
   uint64_t lastpos = 0;
@@ -206,68 +208,79 @@ int main(int argc, char *argv[])
       goto exit_failure;
     }
   }
-
-  encoder = init_encoder_control(cfg);
-  if (!encoder)
+  
+  //Allocate and init exp golomb table
+  if (!init_exp_golomb(4096*8)) {
+    fprintf(stderr, "Failed to allocate the exp golomb code table, shutting down!\n");
     goto exit_failure;
+  }
 
+  if (!encoder_control_init(&encoder, cfg)) {
+    goto exit_failure;
+  }
+  
   // Set output file
-
-  encoder->output = output;
-  encoder->stream.file.output = output;
+  encoder.out.file = output;
+  
   // input init (TODO: read from commandline / config)
-  encoder->bitdepth = 8;
-  encoder->frame    = 0;
-  encoder->QP       = (int8_t)encoder->cfg->qp;
-  encoder->in.video_format = FORMAT_420;
+  encoder.bitdepth = 8;
+  encoder.in.video_format = FORMAT_420;
+  
   // deblocking filter
-  encoder->deblock_enable   = (int8_t)encoder->cfg->deblock_enable;
-  encoder->beta_offset_div2 = (int8_t)encoder->cfg->deblock_beta;
-  encoder->tc_offset_div2   = (int8_t)encoder->cfg->deblock_tc;
+  encoder.deblock_enable   = (int8_t)encoder.cfg->deblock_enable;
+  encoder.beta_offset_div2 = (int8_t)encoder.cfg->deblock_beta;
+  encoder.tc_offset_div2   = (int8_t)encoder.cfg->deblock_tc;
   // SAO
-  encoder->sao_enable = (int8_t)encoder->cfg->sao_enable;
+  encoder.sao_enable = (int8_t)encoder.cfg->sao_enable;
   // RDO
-  encoder->rdoq_enable = (int8_t)encoder->cfg->rdoq_enable;
-  encoder->rdo         = (int8_t)encoder->cfg->rdo;
+  encoder.rdoq_enable = (int8_t)encoder.cfg->rdoq_enable;
+  encoder.rdo         = (int8_t)encoder.cfg->rdo;
   // TR SKIP
-  encoder->trskip_enable = (int8_t)encoder->cfg->trskip_enable;
+  encoder.trskip_enable = (int8_t)encoder.cfg->trskip_enable;
   // VUI
-  encoder->vui.sar_width   = (int16_t)encoder->cfg->vui.sar_width;
-  encoder->vui.sar_height  = (int16_t)encoder->cfg->vui.sar_height;
-  encoder->vui.overscan    = encoder->cfg->vui.overscan;
-  encoder->vui.videoformat = encoder->cfg->vui.videoformat;
-  encoder->vui.fullrange   = encoder->cfg->vui.fullrange;
-  encoder->vui.colorprim   = encoder->cfg->vui.colorprim;
-  encoder->vui.transfer    = encoder->cfg->vui.transfer;
-  encoder->vui.colormatrix = encoder->cfg->vui.colormatrix;
-  encoder->vui.chroma_loc  = (int8_t)encoder->cfg->vui.chroma_loc;
+  encoder.vui.sar_width   = (int16_t)encoder.cfg->vui.sar_width;
+  encoder.vui.sar_height  = (int16_t)encoder.cfg->vui.sar_height;
+  encoder.vui.overscan    = encoder.cfg->vui.overscan;
+  encoder.vui.videoformat = encoder.cfg->vui.videoformat;
+  encoder.vui.fullrange   = encoder.cfg->vui.fullrange;
+  encoder.vui.colorprim   = encoder.cfg->vui.colorprim;
+  encoder.vui.transfer    = encoder.cfg->vui.transfer;
+  encoder.vui.colormatrix = encoder.cfg->vui.colormatrix;
+  encoder.vui.chroma_loc  = (int8_t)encoder.cfg->vui.chroma_loc;
   // AUD
-  encoder->aud_enable = (int8_t)encoder->cfg->aud_enable;
+  encoder.aud_enable = (int8_t)encoder.cfg->aud_enable;
 
-  init_encoder_input(&encoder->in, input, cfg->width, cfg->height);
+  encoder_control_input_init(&encoder, input, cfg->width, cfg->height);
 
   fprintf(stderr, "Input: %s, output: %s\n", cfg->input, cfg->output);
   fprintf(stderr, "  Video size: %dx%d (input=%dx%d)\n",
-         encoder->in.cur_pic->width, encoder->in.cur_pic->height,
-         encoder->in.real_width, encoder->in.real_height);
+         encoder.in.width, encoder.in.height,
+         encoder.in.real_width, encoder.in.real_height);
+  
+  if (!encoder_state_init(&encoder_state, &encoder)) {
+    goto exit_failure;
+  }
+  
+  encoder_state.frame    = 0;
+  encoder_state.QP       = (int8_t)encoder.cfg->qp;
 
   // Only the code that handles conformance window coding needs to know
   // the real dimensions. As a quick fix for broken non-multiple of 8 videos,
   // change the input values here to be the real values. For a real fix
   // encoder.in probably needs to be merged into cfg.
   // The real fix would be: never go dig in cfg
-  //cfg->width = encoder->in.width;
-  //cfg->height = encoder->in.height;
+  //cfg->width = encoder.in.width;
+  //cfg->height = encoder.in.height;
 
   // Start coding cycle while data on input and not on the last frame
-  while(!cfg->frames || encoder->frame < cfg->frames) {
+  while(!cfg->frames || encoder_state.frame < cfg->frames) {
     int32_t diff;
     double temp_psnr[3];
 
     // Skip '--seek' frames before input.
     // This block can be moved outside this while loop when there is a
     // mechanism to skip the while loop on error.
-    if (encoder->frame == 0 && cfg->seek > 0) {
+    if (encoder_state.frame == 0 && cfg->seek > 0) {
       int frame_bytes = cfg->width * cfg->height * 3 / 2;
       int error = 0;
 
@@ -275,7 +288,7 @@ int main(int argc, char *argv[])
         // Input is stdin.
         int i;
         for (i = 0; !error && i < cfg->seek; ++i) {
-          error = !read_one_frame(input, encoder);
+          error = !read_one_frame(input, &encoder_state);
         }
       } else {
         // input is a file. We hope. Proper detection is OS dependent.
@@ -288,25 +301,27 @@ int main(int argc, char *argv[])
     }
 
     // Read one frame from the input
-    if (!read_one_frame(input, encoder)) {
+    if (!read_one_frame(input, &encoder_state)) {
       if (!feof(input))
-        fprintf(stderr, "Failed to read a frame %d\n", encoder->frame);
+        fprintf(stderr, "Failed to read a frame %d\n", encoder_state.frame);
       break;
     }
 
     // The actual coding happens here, after this function we have a coded frame
-    encode_one_frame(encoder);
+    encode_one_frame(&encoder_state);
+    
+    cur_pic = encoder_state.cur_pic;
 
     if (cfg->debug != NULL) {
       // Write reconstructed frame out.
       // Use conformance-window dimensions instead of internal ones.
-      const int width = encoder->in.cur_pic->width;
-      const int out_width = encoder->in.real_width;
-      const int out_height = encoder->in.real_height;
+      const int width = cur_pic->width;
+      const int out_width = encoder.in.real_width;
+      const int out_height = encoder.in.real_height;
       int y;
-      const pixel *y_rec = encoder->in.cur_pic->y_recdata;
-      const pixel *u_rec = encoder->in.cur_pic->u_recdata;
-      const pixel *v_rec = encoder->in.cur_pic->v_recdata;
+      const pixel *y_rec = cur_pic->y_recdata;
+      const pixel *u_rec = cur_pic->u_recdata;
+      const pixel *v_rec = cur_pic->v_recdata;
 
       for (y = 0; y < out_height; ++y) {
         fwrite(&y_rec[y * width], sizeof(*y_rec), out_width, recout);
@@ -325,12 +340,12 @@ int main(int argc, char *argv[])
     lastpos = curpos;
 
     // PSNR calculations
-    temp_psnr[0] = image_psnr(encoder->in.cur_pic->y_data, encoder->in.cur_pic->y_recdata, cfg->width, cfg->height);
-    temp_psnr[1] = image_psnr(encoder->in.cur_pic->u_data, encoder->in.cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
-    temp_psnr[2] = image_psnr(encoder->in.cur_pic->v_data, encoder->in.cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
+    temp_psnr[0] = image_psnr(cur_pic->y_data, cur_pic->y_recdata, cfg->width, cfg->height);
+    temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
+    temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
 
-    fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder->frame,
-           "BPI"[encoder->in.cur_pic->slicetype%3], diff<<3,
+    fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame,
+           "BPI"[cur_pic->slicetype%3], diff<<3,
            temp_psnr[0], temp_psnr[1], temp_psnr[2]);
 
     // Increment total PSNR
@@ -342,33 +357,33 @@ int main(int argc, char *argv[])
     // TODO: add more than one reference
 
     // Remove the ref pic (if present)
-    if (encoder->ref->used_size == (uint32_t)encoder->cfg->ref_frames) {
-      picture_list_rem(encoder->ref, encoder->ref->used_size-1, 1);
+    if (encoder_state.ref->used_size == (uint32_t)encoder.cfg->ref_frames) {
+      picture_list_rem(encoder_state.ref, encoder_state.ref->used_size-1, 1);
     }
     // Add current picture as reference
-    picture_list_add(encoder->ref, encoder->in.cur_pic);
+    picture_list_add(encoder_state.ref, cur_pic);
     // Allocate new memory to current picture
     // TODO: reuse memory from old reference
-    encoder->in.cur_pic = picture_init(encoder->in.cur_pic->width, encoder->in.cur_pic->height, encoder->in.cur_pic->width_in_lcu, encoder->in.cur_pic->height_in_lcu);
+    encoder_state.cur_pic = picture_init(encoder_state.cur_pic->width, encoder_state.cur_pic->height, encoder_state.cur_pic->width_in_lcu, encoder_state.cur_pic->height_in_lcu);
 
     // Copy pointer from the last cur_pic because we don't want to reallocate it
-    MOVE_POINTER(encoder->in.cur_pic->coeff_y,encoder->ref->pics[0]->coeff_y);
-    MOVE_POINTER(encoder->in.cur_pic->coeff_u,encoder->ref->pics[0]->coeff_u);
-    MOVE_POINTER(encoder->in.cur_pic->coeff_v,encoder->ref->pics[0]->coeff_v);
+    MOVE_POINTER(encoder_state.cur_pic->coeff_y,encoder_state.ref->pics[0]->coeff_y);
+    MOVE_POINTER(encoder_state.cur_pic->coeff_u,encoder_state.ref->pics[0]->coeff_u);
+    MOVE_POINTER(encoder_state.cur_pic->coeff_v,encoder_state.ref->pics[0]->coeff_v);
 
-    MOVE_POINTER(encoder->in.cur_pic->pred_y,encoder->ref->pics[0]->pred_y);
-    MOVE_POINTER(encoder->in.cur_pic->pred_u,encoder->ref->pics[0]->pred_u);
-    MOVE_POINTER(encoder->in.cur_pic->pred_v,encoder->ref->pics[0]->pred_v);
+    MOVE_POINTER(encoder_state.cur_pic->pred_y,encoder_state.ref->pics[0]->pred_y);
+    MOVE_POINTER(encoder_state.cur_pic->pred_u,encoder_state.ref->pics[0]->pred_u);
+    MOVE_POINTER(encoder_state.cur_pic->pred_v,encoder_state.ref->pics[0]->pred_v);
 
-    encoder->frame++;
-    encoder->poc++;
+    encoder_state.frame++;
+    encoder_state.poc++;
   }
   // Coding finished
   fgetpos(output,(fpos_t*)&curpos);
 
   // Print statistics of the coding
-  fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder->frame, (long long unsigned int) curpos<<3,
-         psnr[0] / encoder->frame, psnr[1] / encoder->frame, psnr[2] / encoder->frame);
+  fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, (long long unsigned int) curpos<<3,
+         psnr[0] / encoder_state.frame, psnr[1] / encoder_state.frame, psnr[2] / encoder_state.frame);
   fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC);
 
   fclose(input);
@@ -377,12 +392,9 @@ int main(int argc, char *argv[])
 
   // Deallocating
   config_destroy(cfg);
-  scalinglist_destroy(&encoder->scaling_list);
-  picture_list_destroy(encoder->ref);
-  picture_destroy(encoder->in.cur_pic);
-  FREE_POINTER(encoder->in.cur_pic);
-  bitstream_finalize(&encoder->stream);
-  free(encoder);
+  encoder_state_finalize(&encoder_state);
+  encoder_control_finalize(&encoder);
+
   free_exp_golomb();
 
   return EXIT_SUCCESS;
diff --git a/src/encoder.c b/src/encoder.c
index 92253e5a..e81c79f7 100644
--- a/src/encoder.c
+++ b/src/encoder.c
@@ -44,9 +44,9 @@
 #include "rdo.h"
 
 /* Local functions. */
-static void add_checksum(encoder_control * const encoder);
-static void encode_VUI(encoder_control * const encoder);
-static void encode_sao(encoder_control * const encoder,
+static void add_checksum(encoder_state *encoder);
+static void encode_VUI(encoder_state *encoder);
+static void encode_sao(encoder_state *encoder,
                        cabac_data *cabac,
                        unsigned x_lcu, uint16_t y_lcu,
                        sao_info *sao_luma, sao_info *sao_chroma);
@@ -57,10 +57,10 @@ static void encode_sao(encoder_control * const encoder,
   Implementation closer to HM (Used HM12 as reference)
    - Still missing functionality when GOP and B-pictures are used
  */
-void init_lambda(encoder_control * const encoder)
+void encoder_state_init_lambda(encoder_state * const encoder_state)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
-  double qp = encoder->QP;
+  const picture * const cur_pic = encoder_state->cur_pic;
+  double qp = encoder_state->QP;
   double lambda_scale = 1.0;
   double qp_temp      = qp - 12;
   double lambda;
@@ -78,180 +78,179 @@ void init_lambda(encoder_control * const encoder)
     lambda *= 0.95;
   }
 
-  encoder->cur_lambda_cost = lambda;
+  encoder_state->cur_lambda_cost = lambda;
 }
 
-encoder_control *init_encoder_control(config *cfg)
-{
-  encoder_control *enc_c    = NULL;
-  bitstream       *stream   = NULL;
-  picture_list    *pic_list = NULL;
-
+int encoder_control_init(encoder_control * const encoder, const config * const cfg) {
   if (!cfg) {
     fprintf(stderr, "Config object must not be null!\n");
-    goto init_failure;
+    return 0;
   }
-
-  // Allocate the main struct
-  enc_c = malloc(sizeof(encoder_control));
-  if(!enc_c){
-    fprintf(stderr, "Failed to allocate encoder_control!\n");
-    goto init_failure;
-  }
-
-  // Config pointer to encoder struct
-  enc_c->cfg = cfg;
-
-  // input init (TODO: read from commandline / config)
-  enc_c->bitdepth = 8;
-  enc_c->frame    = 0;
-  enc_c->QP       = (int8_t)enc_c->cfg->qp;
-  enc_c->in.video_format = FORMAT_420;
+  
+  // Config pointer to config struct
+  encoder->cfg = cfg;
+  encoder->bitdepth = 8;
+  
   // deblocking filter
-  enc_c->deblock_enable    = 1;
-  enc_c->beta_offset_div2  = 0;
-  enc_c->tc_offset_div2    = 0;
+  encoder->deblock_enable    = 1;
+  encoder->beta_offset_div2  = 0;
+  encoder->tc_offset_div2    = 0;
   // SAO
-  enc_c->sao_enable = 1;
+  encoder->sao_enable = 1;
   // Rate-distortion optimization level
-  enc_c->rdo        = 1;
-
-  // Allocate the bitstream struct
-  bitstream_init(&enc_c->stream, BITSTREAM_TYPE_FILE);
-
-  //Allocate and init exp golomb table
-  if (!init_exp_golomb(4096*8)) {
-    fprintf(stderr, "Failed to allocate the exp golomb code table, shutting down!\n");
-    goto init_failure;
-  }
-
+  encoder->rdo        = 1;
+  
   // Initialize the scaling list
-  scalinglist_init(&enc_c->scaling_list);
-
-  pic_list = picture_list_init(MAX_REF_PIC_COUNT);
-  if(!pic_list) {
-    fprintf(stderr, "Failed to allocate the picture list!\n");
-    goto init_failure;
-  }
-
-  enc_c->ref = pic_list;
-  enc_c->ref_list = REF_PIC_LIST_0;
+  scalinglist_init(&encoder->scaling_list);
   
   // CQM
   {
     FILE* cqmfile;
     cqmfile = cfg->cqmfile ? fopen(cfg->cqmfile, "rb") : NULL;
     if (cqmfile) {
-      scalinglist_parse(&enc_c->scaling_list, cqmfile);
+      scalinglist_parse(&encoder->scaling_list, cqmfile);
       fclose(cqmfile);
     }
   }
-  scalinglist_process(&enc_c->scaling_list, enc_c->bitdepth);
+  scalinglist_process(&encoder->scaling_list, encoder->bitdepth);
   
-  return enc_c;
-
-init_failure:
-  // Free everything allocated in this function
-  free(pic_list);
-  free(stream);
-  free(enc_c);
-
-  return NULL;
+  return 1;
 }
 
-void init_encoder_input(encoder_input *input, FILE *inputfile,
+int encoder_control_finalize(encoder_control * const encoder) {
+  scalinglist_destroy(&encoder->scaling_list);
+  
+  return 1;
+}
+
+int encoder_state_init(encoder_state * const encoder_state, const encoder_control * const encoder) {
+  picture_list    *pic_list = NULL;
+  
+  encoder_state->encoder_control = encoder;
+  
+  // Allocate the bitstream struct
+  if (!bitstream_init(&encoder_state->stream, BITSTREAM_TYPE_FILE)) {
+    fprintf(stderr, "Could not initialize stream!\n");
+    return 0;
+  }
+  
+  pic_list = picture_list_init(MAX_REF_PIC_COUNT);
+  if(!pic_list) {
+    fprintf(stderr, "Failed to allocate the picture list!\n");
+    return 0;
+  }
+  
+  encoder_state->ref = pic_list;
+  encoder_state->ref_list = REF_PIC_LIST_0;
+  
+  encoder_state->frame = 0;
+  encoder_state->poc = 0;
+  
+  // Allocate the picture and CU array
+  encoder_state->cur_pic = picture_init(encoder->in.width, encoder->in.height,
+                                encoder->in.width_in_lcu, encoder->in.height_in_lcu);
+
+  if (!encoder_state->cur_pic) {
+    printf("Error allocating picture!\r\n");
+    return 0;
+  }
+  
+  // Init coeff data table
+  encoder_state->cur_pic->coeff_y = MALLOC(coefficient, encoder->in.width * encoder->in.height);
+  encoder_state->cur_pic->coeff_u = MALLOC(coefficient, (encoder->in.width * encoder->in.height) >> 2);
+  encoder_state->cur_pic->coeff_v = MALLOC(coefficient, (encoder->in.width * encoder->in.height) >> 2);
+
+  // Init predicted data table
+  encoder_state->cur_pic->pred_y = MALLOC(pixel, encoder->in.width * encoder->in.height);
+  encoder_state->cur_pic->pred_u = MALLOC(pixel, (encoder->in.width * encoder->in.height) >> 2);
+  encoder_state->cur_pic->pred_v = MALLOC(pixel, (encoder->in.width * encoder->in.height) >> 2);
+  
+  encoder_state->children = NULL;
+  
+  encoder_state->stream.file.output = encoder->out.file;
+  
+  // Set CABAC output bitstream
+  encoder_state->cabac.stream = &encoder_state->stream;
+  
+  return 1;
+}
+
+int encoder_state_finalize(encoder_state * const encoder_state) {
+  picture_destroy(encoder_state->cur_pic);
+  FREE_POINTER(encoder_state->cur_pic);
+  
+  picture_list_destroy(encoder_state->ref);
+  bitstream_finalize(&encoder_state->stream);
+  return 1;
+}
+
+void encoder_control_input_init(encoder_control * const encoder, FILE *inputfile,
                         const int32_t width, const int32_t height)
 {
-  int32_t i_width = width; /*!< \brief input picture width (divisible by the minimum block size)*/
-  int32_t i_height = height; /*!< \brief input picture height (divisible by the minimum block size) */
-  int32_t i_width_in_lcu; /*!< \brief input picture width in LCU*/
-  int32_t i_height_in_lcu;  /*!< \brief input picture height in LCU */
-  input->file = inputfile;
-  i_width = width;
-  i_height = height;
-  input->real_width = width;
-  input->real_height = height;
+  encoder->in.file = inputfile;
+  encoder->in.width = width;
+  encoder->in.height = height;
+  encoder->in.real_width = width;
+  encoder->in.real_height = height;
 
   // If input dimensions are not divisible by the smallest block size, add
   // pixels to the dimensions, so that they are. These extra pixels will be
   // compressed along with the real ones but they will be cropped out before
   // rendering.
-  if (i_width % CU_MIN_SIZE_PIXELS) {
-    i_width += CU_MIN_SIZE_PIXELS - (width % CU_MIN_SIZE_PIXELS);
+  if (encoder->in.width % CU_MIN_SIZE_PIXELS) {
+    encoder->in.width += CU_MIN_SIZE_PIXELS - (width % CU_MIN_SIZE_PIXELS);
   }
 
-  if (i_height % CU_MIN_SIZE_PIXELS) {
-    i_height += CU_MIN_SIZE_PIXELS - (height % CU_MIN_SIZE_PIXELS);
+  if (encoder->in.height % CU_MIN_SIZE_PIXELS) {
+    encoder->in.height += CU_MIN_SIZE_PIXELS - (height % CU_MIN_SIZE_PIXELS);
   }
 
-  i_height_in_lcu = i_height / LCU_WIDTH;
-  i_width_in_lcu  = i_width / LCU_WIDTH;
+  encoder->in.height_in_lcu = encoder->in.height / LCU_WIDTH;
+  encoder->in.width_in_lcu  = encoder->in.width / LCU_WIDTH;
 
   // Add one extra LCU when image not divisible by LCU_WIDTH
-  if (i_height_in_lcu * LCU_WIDTH < height) {
-    i_height_in_lcu++;
+  if (encoder->in.height_in_lcu * LCU_WIDTH < height) {
+    encoder->in.height_in_lcu++;
   }
 
-  if (i_width_in_lcu * LCU_WIDTH < width) {
-    i_width_in_lcu++;
+  if (encoder->in.width_in_lcu * LCU_WIDTH < width) {
+    encoder->in.width_in_lcu++;
   }
 
-  // Allocate the picture and CU array
-  input->cur_pic = picture_init(i_width, i_height,
-                                i_width_in_lcu,
-                                i_height_in_lcu);
 
-  if (!input->cur_pic) {
-    printf("Error allocating picture!\r\n");
-    exit(1);
-  }
 
   #ifdef _DEBUG
   if (width != i_width || height != i_height) {
     printf("Picture buffer has been extended to be a multiple of the smallest block size:\r\n");
-    printf("  Width = %d (%d), Height = %d (%d)\r\n", width, i_width, height,
-           i_height);
+    printf("  Width = %d (%d), Height = %d (%d)\r\n", width, encoder->in.width, height,
+           encoder->in.height);
   }
   #endif
-  
-  // Init coeff data table
-  input->cur_pic->coeff_y = MALLOC(coefficient, i_width * i_height);
-  input->cur_pic->coeff_u = MALLOC(coefficient, (i_width * i_height) >> 2);
-  input->cur_pic->coeff_v = MALLOC(coefficient, (i_width * i_height) >> 2);
-
-  // Init predicted data table
-  input->cur_pic->pred_y = MALLOC(pixel, i_width * i_height);
-  input->cur_pic->pred_u = MALLOC(pixel, (i_width * i_height) >> 2);
-  input->cur_pic->pred_v = MALLOC(pixel, (i_width * i_height) >> 2);
 }
 
-static void write_aud(encoder_control * const encoder)
+static void write_aud(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
-  encode_access_unit_delimiter(encoder);
+  bitstream * const stream = &encoder_state->stream;
+  encode_access_unit_delimiter(encoder_state);
   nal_write(stream, AUD_NUT, 0, 1);
   bitstream_align(stream);
 }
 
-void encode_one_frame(encoder_control* encoder)
+void encode_one_frame(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
-  picture * const cur_pic = encoder->in.cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  bitstream * const stream = &encoder_state->stream;
   
-  yuv_t *hor_buf = alloc_yuv_t(cur_pic->width);
+  yuv_t *hor_buf = alloc_yuv_t(encoder_state->cur_pic->width);
   // Allocate 2 extra luma pixels so we get 1 extra chroma pixel for the
   // for the extra pixel on the top right.
   yuv_t *ver_buf = alloc_yuv_t(LCU_WIDTH + 2);
 
-  const int is_first_frame = (encoder->frame == 0);
-  const int is_i_radl = (encoder->cfg->intra_period == 1 && encoder->frame % 2 == 0);
-  const int is_p_radl = (encoder->cfg->intra_period > 1 && (encoder->frame % encoder->cfg->intra_period) == 0);
+  const int is_first_frame = (encoder_state->frame == 0);
+  const int is_i_radl = (encoder->cfg->intra_period == 1 && encoder_state->frame % 2 == 0);
+  const int is_p_radl = (encoder->cfg->intra_period > 1 && (encoder_state->frame % encoder->cfg->intra_period) == 0);
   const int is_radl_frame = is_first_frame || is_i_radl || is_p_radl;
 
-  
-
-  cabac_data cabac;
-
 
   /** IDR picture when: period == 0 and frame == 0
    *                    period == 1 && frame%2 == 0
@@ -259,48 +258,48 @@ void encode_one_frame(encoder_control* encoder)
    **/
   if (is_radl_frame) {
     // Clear the reference list
-    while (encoder->ref->used_size) {
-      picture_list_rem(encoder->ref, encoder->ref->used_size - 1, 1);
+    while (encoder_state->ref->used_size) {
+      picture_list_rem(encoder_state->ref, encoder_state->ref->used_size - 1, 1);
     }
 
-    encoder->poc = 0;
+    encoder_state->poc = 0;
 
-    cur_pic->slicetype = SLICE_I;
-    cur_pic->type = NAL_IDR_W_RADL;
+    encoder_state->cur_pic->slicetype = SLICE_I;
+    encoder_state->cur_pic->type = NAL_IDR_W_RADL;
 
     // Access Unit Delimiter (AUD)
     if (encoder->aud_enable)
-      write_aud(encoder);
+      write_aud(encoder_state);
 
     // Video Parameter Set (VPS)
     nal_write(stream, NAL_VPS_NUT, 0, 1);
-    encode_vid_parameter_set(encoder);
+    encode_vid_parameter_set(encoder_state);
     bitstream_align(stream);
 
     // Sequence Parameter Set (SPS)
     nal_write(stream, NAL_SPS_NUT, 0, 1);
-    encode_seq_parameter_set(encoder);
+    encode_seq_parameter_set(encoder_state);
     bitstream_align(stream);
 
     // Picture Parameter Set (PPS)
     nal_write(stream, NAL_PPS_NUT, 0, 1);
-    encode_pic_parameter_set(encoder);
+    encode_pic_parameter_set(encoder_state);
     bitstream_align(stream);
 
-    if (encoder->frame == 0) {
+    if (encoder_state->frame == 0) {
       // Prefix SEI
       nal_write(stream, PREFIX_SEI_NUT, 0, 0);
-      encode_prefix_sei_version(encoder);
+      encode_prefix_sei_version(encoder_state);
       bitstream_align(stream);
     }
   } else {
     // When intra period == 1, all pictures are intra
-    cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P;
-    cur_pic->type = NAL_TRAIL_R;
+    encoder_state->cur_pic->slicetype = encoder->cfg->intra_period==1 ? SLICE_I : SLICE_P;
+    encoder_state->cur_pic->type = NAL_TRAIL_R;
 
     // Access Unit Delimiter (AUD)
     if (encoder->aud_enable)
-      write_aud(encoder);
+      write_aud(encoder_state);
   }
 
   {
@@ -312,18 +311,16 @@ void encode_one_frame(encoder_control* encoder)
               is_radl_frame ? NAL_IDR_W_RADL : NAL_TRAIL_R, 0, long_start_code);
   }
 
-  // Set CABAC output bitstream
-  cabac.stream = stream;
-
-  cabac_start(&cabac);
-  init_contexts(&cabac, encoder->QP, cur_pic->slicetype);
-  encode_slice_header(encoder);
+  cabac_start(&encoder_state->cabac);
+  init_contexts(&encoder_state->cabac, encoder_state->QP, encoder_state->cur_pic->slicetype);
+  encode_slice_header(encoder_state);
   bitstream_align(stream);
 
   // Initialize lambda value(s) to use in search
-  init_lambda(encoder);
+  encoder_state_init_lambda(encoder_state);
 
   {
+    picture* const cur_pic = encoder_state->cur_pic;
     vector2d lcu;
     const vector2d size = { cur_pic->width, cur_pic->height };
     const vector2d size_lcu = { cur_pic->width_in_lcu, cur_pic->height_in_lcu };
@@ -339,7 +336,7 @@ void encode_one_frame(encoder_control* encoder)
         const int right = px.x + lcu_dim.x;
         const int bottom = px.y + lcu_dim.y;
 
-        search_lcu(encoder, &cabac, px.x, px.y, hor_buf, ver_buf);
+        search_lcu(encoder_state, &encoder_state->cabac, px.x, px.y, hor_buf, ver_buf);
 
         // Take the bottom right pixel from the LCU above and put it as the
         // first pixel in this LCUs rightmost pixels.
@@ -371,7 +368,7 @@ void encode_one_frame(encoder_control* encoder)
                             1, lcu_dim.y / 2, size.x / 2, 1);
 
         if (encoder->deblock_enable) {
-          filter_deblock_lcu(encoder, px.x, px.y);
+          filter_deblock_lcu(encoder_state, px.x, px.y);
         }
 
         if (encoder->sao_enable) {
@@ -384,43 +381,43 @@ void encode_one_frame(encoder_control* encoder)
           {
             sao_info *sao_top = lcu. y != 0 ? &cur_pic->sao_luma[(lcu.y - 1) * stride + lcu.x] : NULL;
             sao_info *sao_left = lcu.x != 0 ? &cur_pic->sao_luma[lcu.y * stride + lcu.x -1] : NULL;
-            sao_search_luma(encoder, cur_pic, lcu.x, lcu.y, sao_luma, sao_top, sao_left);
+            sao_search_luma(encoder_state, cur_pic, lcu.x, lcu.y, sao_luma, sao_top, sao_left);
           }
 
           {
             sao_info *sao_top = lcu.y != 0 ? &cur_pic->sao_chroma[(lcu.y - 1) * stride + lcu.x] : NULL;
             sao_info *sao_left = lcu.x != 0 ? &cur_pic->sao_chroma[lcu.y * stride + lcu.x - 1] : NULL;
-            sao_search_chroma(encoder, cur_pic, lcu.x, lcu.y, sao_chroma, sao_top, sao_left);
+            sao_search_chroma(encoder_state, cur_pic, lcu.x, lcu.y, sao_chroma, sao_top, sao_left);
           }
 
           // Merge only if both luma and chroma can be merged
           sao_luma->merge_left_flag = sao_luma->merge_left_flag & sao_chroma->merge_left_flag;
           sao_luma->merge_up_flag = sao_luma->merge_up_flag & sao_chroma->merge_up_flag;
 
-          encode_sao(encoder, &cabac, lcu.x, lcu.y, sao_luma, sao_chroma);
+          encode_sao(encoder_state, &encoder_state->cabac, lcu.x, lcu.y, sao_luma, sao_chroma);
         }
-
-        encode_coding_tree(encoder, &cabac, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0);
+        
+        encode_coding_tree(encoder_state, &encoder_state->cabac, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0);
 
         {
           const int last_lcu = (lcu.x == size_lcu.x - 1 && lcu.y == size_lcu.y - 1);
-          cabac_encode_bin_trm(&cabac, last_lcu ? 1 : 0);  // end_of_slice_segment_flag
+          cabac_encode_bin_trm(&encoder_state->cabac, last_lcu ? 1 : 0);  // end_of_slice_segment_flag
         }
       }
     }
   }
 
-  cabac_flush(&cabac);
+  cabac_flush(&encoder_state->cabac);
   bitstream_align(stream);
 
   if (encoder->sao_enable) {
-    sao_reconstruct_frame(encoder);
+    sao_reconstruct_frame(encoder_state);
   }
 
   // Calculate checksum
-  add_checksum(encoder);
+  add_checksum(encoder_state);
 
-  cur_pic->poc = encoder->poc;
+  encoder_state->cur_pic->poc = encoder_state->poc;
 
   dealloc_yuv_t(hor_buf);
   dealloc_yuv_t(ver_buf);
@@ -465,43 +462,42 @@ static int read_and_fill_frame_data(FILE *file,
   return 1;
 }
 
-int read_one_frame(FILE* file, const encoder_control * const encoder)
+int read_one_frame(FILE* file, const encoder_state * const encoder_state)
 {
-  const encoder_input* const in = &encoder->in;
-  unsigned width = in->real_width;
-  unsigned height = in->real_height;
-  unsigned array_width = in->cur_pic->width;
-  unsigned array_height = in->cur_pic->height;
+  unsigned width = encoder_state->encoder_control->in.real_width;
+  unsigned height = encoder_state->encoder_control->in.real_height;
+  unsigned array_width = encoder_state->cur_pic->width;
+  unsigned array_height = encoder_state->cur_pic->height;
 
   if (width != array_width) {
     // In the case of frames not being aligned on 8 bit borders, bits need to be copied to fill them in.
     if (!read_and_fill_frame_data(file, width, height, array_width,
-                                  in->cur_pic->y_data) ||
+                                  encoder_state->cur_pic->y_data) ||
         !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1,
-                                  in->cur_pic->u_data) ||
+                                  encoder_state->cur_pic->u_data) ||
         !read_and_fill_frame_data(file, width >> 1, height >> 1, array_width >> 1,
-                                  in->cur_pic->v_data))
+                                  encoder_state->cur_pic->v_data))
       return 0;
   } else {
     // Otherwise the data can be read directly to the array.
     unsigned y_size = width * height;
     unsigned uv_size = (width >> 1) * (height >> 1);
-    if (y_size  != fread(in->cur_pic->y_data, sizeof(unsigned char),
+    if (y_size  != fread(encoder_state->cur_pic->y_data, sizeof(unsigned char),
                          y_size, file) ||
-        uv_size != fread(in->cur_pic->u_data, sizeof(unsigned char),
+        uv_size != fread(encoder_state->cur_pic->u_data, sizeof(unsigned char),
                          uv_size, file) ||
-        uv_size != fread(in->cur_pic->v_data, sizeof(unsigned char),
+        uv_size != fread(encoder_state->cur_pic->v_data, sizeof(unsigned char),
                          uv_size, file))
       return 0;
   }
 
   if (height != array_height) {
     fill_after_frame(height, array_width, array_height,
-                     in->cur_pic->y_data);
+                     encoder_state->cur_pic->y_data);
     fill_after_frame(height >> 1, array_width >> 1, array_height >> 1,
-                     in->cur_pic->u_data);
+                     encoder_state->cur_pic->u_data);
     fill_after_frame(height >> 1, array_width >> 1, array_height >> 1,
-                     in->cur_pic->v_data);
+                     encoder_state->cur_pic->v_data);
   }
   return 1;
 }
@@ -511,10 +507,10 @@ int read_one_frame(FILE* file, const encoder_control * const encoder)
  * \param encoder The encoder.
  * \returns Void
  */
-static void add_checksum(encoder_control * const encoder)
+static void add_checksum(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
-  const picture * const cur_pic = encoder->in.cur_pic;
+  bitstream * const stream = &encoder_state->stream;
+  const picture * const cur_pic = encoder_state->cur_pic;
   unsigned char checksum[3][SEI_HASH_MAX_LENGTH];
   uint32_t checksum_val;
   unsigned int i;
@@ -538,24 +534,24 @@ static void add_checksum(encoder_control * const encoder)
   bitstream_align(stream);
 }
 
-void encode_access_unit_delimiter(encoder_control * const encoder)
+void encode_access_unit_delimiter(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
-  const picture * const cur_pic = encoder->in.cur_pic;
+  bitstream * const stream = &encoder_state->stream;
+  const picture * const cur_pic = encoder_state->cur_pic;
   uint8_t pic_type = cur_pic->slicetype == SLICE_I ? 0
                    : cur_pic->slicetype == SLICE_P ? 1
                    :                                             2;
   WRITE_U(stream, pic_type, 3, "pic_type");
 }
 
-void encode_prefix_sei_version(encoder_control * const encoder)
+void encode_prefix_sei_version(encoder_state * const encoder_state)
 {
 #define STR_BUF_LEN 1000
-  bitstream * const stream = &encoder->stream;
+  bitstream * const stream = &encoder_state->stream;
   int i, length;
   char buf[STR_BUF_LEN] = { 0 };
   char *s = buf + 16;
-  const config *cfg = encoder->cfg;
+  const config * const cfg = encoder_state->encoder_control->cfg;
 
   // random uuid_iso_iec_11578 generated with www.famkruithof.net/uuid/uuidgen
   static const uint8_t uuid[16] = {
@@ -595,9 +591,9 @@ void encode_prefix_sei_version(encoder_control * const encoder)
 #undef STR_BUF_LEN
 }
 
-void encode_pic_parameter_set(encoder_control * const encoder)
+void encode_pic_parameter_set(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
+  bitstream * const stream = &encoder_state->stream;
 #ifdef _DEBUG
   printf("=========== Picture Parameter Set ID: 0 ===========\n");
 #endif
@@ -611,9 +607,9 @@ void encode_pic_parameter_set(encoder_control * const encoder)
 
   WRITE_UE(stream, 0, "num_ref_idx_l0_default_active_minus1");
   WRITE_UE(stream, 0, "num_ref_idx_l1_default_active_minus1");
-  WRITE_SE(stream, ((int8_t)encoder->QP)-26, "pic_init_qp_minus26");
+  WRITE_SE(stream, ((int8_t)encoder_state->QP)-26, "pic_init_qp_minus26");
   WRITE_U(stream, 0, 1, "constrained_intra_pred_flag");
-  WRITE_U(stream, encoder->trskip_enable, 1, "transform_skip_enabled_flag");
+  WRITE_U(stream, encoder_state->encoder_control->trskip_enable, 1, "transform_skip_enabled_flag");
   WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
   //if cu_qp_delta_enabled_flag
   //WRITE_UE(stream, 0, "diff_cu_qp_delta_depth");
@@ -636,13 +632,13 @@ void encode_pic_parameter_set(encoder_control * const encoder)
   WRITE_U(stream, 1, 1, "deblocking_filter_control_present_flag");
   //IF deblocking_filter
     WRITE_U(stream, 0, 1, "deblocking_filter_override_enabled_flag");
-  WRITE_U(stream, encoder->deblock_enable ? 0 : 1, 1,
+  WRITE_U(stream, encoder_state->encoder_control->deblock_enable ? 0 : 1, 1,
           "pps_disable_deblocking_filter_flag");
 
     //IF !disabled
-  if (encoder->deblock_enable) {
-     WRITE_SE(stream, encoder->beta_offset_div2, "beta_offset_div2");
-     WRITE_SE(stream, encoder->tc_offset_div2, "tc_offset_div2");
+  if (encoder_state->encoder_control->deblock_enable) {
+     WRITE_SE(stream, encoder_state->encoder_control->beta_offset_div2, "beta_offset_div2");
+     WRITE_SE(stream, encoder_state->encoder_control->tc_offset_div2, "tc_offset_div2");
     }
 
     //ENDIF
@@ -656,9 +652,9 @@ void encode_pic_parameter_set(encoder_control * const encoder)
   WRITE_U(stream, 0, 1, "pps_extension_flag");
 }
 
-static void encode_PTL(encoder_control * const encoder)
+static void encode_PTL(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
+  bitstream * const stream = &encoder_state->stream;
   int i;
   // PTL
   // Profile Tier
@@ -696,9 +692,10 @@ static void encode_PTL(encoder_control * const encoder)
   // end PTL
 }
 
-static void encode_scaling_list(encoder_control * const encoder)
+static void encode_scaling_list(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  bitstream * const stream = &encoder_state->stream;
   uint32_t size_id;
   for (size_id = 0; size_id < SCALING_LIST_SIZE_NUM; size_id++) {
     int32_t list_id;
@@ -752,11 +749,10 @@ static void encode_scaling_list(encoder_control * const encoder)
   }
 }
 
-void encode_seq_parameter_set(encoder_control * const encoder)
+void encode_seq_parameter_set(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
-  const picture * const cur_pic = encoder->in.cur_pic;
-  const encoder_input* const in = &encoder->in;
+  bitstream * const stream = &encoder_state->stream;
+  const picture * const cur_pic = encoder_state->cur_pic;
 
 #ifdef _DEBUG
   printf("=========== Sequence Parameter Set ID: 0 ===========\n");
@@ -767,20 +763,20 @@ void encode_seq_parameter_set(encoder_control * const encoder)
   WRITE_U(stream, 1, 3, "sps_max_sub_layers_minus1");
   WRITE_U(stream, 0, 1, "sps_temporal_id_nesting_flag");
 
-  encode_PTL(encoder);
+  encode_PTL(encoder_state);
 
   WRITE_UE(stream, 0, "sps_seq_parameter_set_id");
-  WRITE_UE(stream, encoder->in.video_format,
+  WRITE_UE(stream, encoder_state->encoder_control->in.video_format,
            "chroma_format_idc");
 
-  if (encoder->in.video_format == 3) {
+  if (encoder_state->encoder_control->in.video_format == 3) {
     WRITE_U(stream, 0, 1, "separate_colour_plane_flag");
   }
 
   WRITE_UE(stream, cur_pic->width, "pic_width_in_luma_samples");
   WRITE_UE(stream, cur_pic->height, "pic_height_in_luma_samples");
 
-  if (cur_pic->width != in->real_width || cur_pic->height != in->real_height) {
+  if (cur_pic->width != encoder_state->encoder_control->in.real_width || cur_pic->height != encoder_state->encoder_control->in.real_height) {
     // The standard does not seem to allow setting conf_win values such that
     // the number of luma samples is not a multiple of 2. Options are to either
     // hide one line or show an extra line of non-video. Neither seems like a
@@ -788,10 +784,10 @@ void encode_seq_parameter_set(encoder_control * const encoder)
     assert(!(cur_pic->width % 2));
     WRITE_U(stream, 1, 1, "conformance_window_flag");
     WRITE_UE(stream, 0, "conf_win_left_offset");
-    WRITE_UE(stream, (cur_pic->width - in->real_width) >> 1,
+    WRITE_UE(stream, (cur_pic->width - encoder_state->encoder_control->in.real_width) >> 1,
              "conf_win_right_offset");
     WRITE_UE(stream, 0, "conf_win_top_offset");
-    WRITE_UE(stream, (cur_pic->height - in->real_height) >> 1,
+    WRITE_UE(stream, (cur_pic->height - encoder_state->encoder_control->in.real_height) >> 1,
              "conf_win_bottom_offset");
   } else {
     WRITE_U(stream, 0, 1, "conformance_window_flag");
@@ -800,8 +796,8 @@ void encode_seq_parameter_set(encoder_control * const encoder)
   //IF window flag
   //END IF
 
-  WRITE_UE(stream, encoder->bitdepth-8, "bit_depth_luma_minus8");
-  WRITE_UE(stream, encoder->bitdepth-8, "bit_depth_chroma_minus8");
+  WRITE_UE(stream, encoder_state->encoder_control->bitdepth-8, "bit_depth_luma_minus8");
+  WRITE_UE(stream, encoder_state->encoder_control->bitdepth-8, "bit_depth_chroma_minus8");
   WRITE_UE(stream, 0, "log2_max_pic_order_cnt_lsb_minus4");
   WRITE_U(stream, 0, 1, "sps_sub_layer_ordering_info_present_flag");
 
@@ -819,14 +815,14 @@ void encode_seq_parameter_set(encoder_control * const encoder)
   WRITE_UE(stream, TR_DEPTH_INTRA, "max_transform_hierarchy_depth_intra");
 
   // scaling list
-  WRITE_U(stream, encoder->scaling_list.enable, 1, "scaling_list_enable_flag");
-  if (encoder->scaling_list.enable) {
+  WRITE_U(stream, encoder_state->encoder_control->scaling_list.enable, 1, "scaling_list_enable_flag");
+  if (encoder_state->encoder_control->scaling_list.enable) {
     WRITE_U(stream, 1, 1, "sps_scaling_list_data_present_flag");
-    encode_scaling_list(encoder);
+    encode_scaling_list(encoder_state);
   }
 
   WRITE_U(stream, 0, 1, "amp_enabled_flag");
-  WRITE_U(stream, encoder->sao_enable ? 1 : 0, 1,
+  WRITE_U(stream, encoder_state->encoder_control->sao_enable ? 1 : 0, 1,
           "sample_adaptive_offset_enabled_flag");
   WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag");
   #if ENABLE_PCM == 1
@@ -852,14 +848,14 @@ void encode_seq_parameter_set(encoder_control * const encoder)
   WRITE_U(stream, 0, 1, "sps_strong_intra_smoothing_enable_flag");
   WRITE_U(stream, 1, 1, "vui_parameters_present_flag");
 
-  encode_VUI(encoder);
+  encode_VUI(encoder_state);
 
   WRITE_U(stream, 0, 1, "sps_extension_flag");
 }
 
-void encode_vid_parameter_set(encoder_control * const encoder)
+void encode_vid_parameter_set(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
+  bitstream * const stream = &encoder_state->stream;
   int i;
 #ifdef _DEBUG
   printf("=========== Video Parameter Set ID: 0 ===========\n");
@@ -872,7 +868,7 @@ void encode_vid_parameter_set(encoder_control * const encoder)
   WRITE_U(stream, 0, 1, "vps_temporal_id_nesting_flag");
   WRITE_U(stream, 0xffff, 16, "vps_reserved_ffff_16bits");
 
-  encode_PTL(encoder);
+  encode_PTL(encoder_state);
 
   WRITE_U(stream, 0, 1, "vps_sub_layer_ordering_info_present_flag");
 
@@ -893,9 +889,10 @@ void encode_vid_parameter_set(encoder_control * const encoder)
   WRITE_U(stream, 0, 1, "vps_extension_flag");
 }
 
-static void encode_VUI(encoder_control * const encoder)
+static void encode_VUI(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
+  bitstream * const stream = &encoder_state->stream;
+  const encoder_control * const encoder = encoder_state->encoder_control;
 #ifdef _DEBUG
   printf("=========== VUI Set ID: 0 ===========\n");
 #endif
@@ -993,10 +990,11 @@ static void encode_VUI(encoder_control * const encoder)
   //ENDIF
 }
 
-void encode_slice_header(encoder_control * const encoder)
+void encode_slice_header(encoder_state * const encoder_state)
 {
-  bitstream * const stream = &encoder->stream;
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  bitstream * const stream = &encoder_state->stream;
+  const picture * const cur_pic = encoder_state->cur_pic;
 
 #ifdef _DEBUG
   printf("=========== Slice ===========\n");
@@ -1024,9 +1022,9 @@ void encode_slice_header(encoder_control * const encoder)
   if (cur_pic->type != NAL_IDR_W_RADL
       && cur_pic->type != NAL_IDR_N_LP) {
       int j;
-      int ref_negative = encoder->ref->used_size;
+      int ref_negative = encoder_state->ref->used_size;
       int ref_positive = 0;
-      WRITE_U(stream, encoder->poc&0xf, 4, "pic_order_cnt_lsb");
+      WRITE_U(stream, encoder_state->poc&0xf, 4, "pic_order_cnt_lsb");
       WRITE_U(stream, 0, 1, "short_term_ref_pic_set_sps_flag");
       WRITE_UE(stream, ref_negative, "num_negative_pics");
       WRITE_UE(stream, ref_positive, "num_positive_pics");
@@ -1049,7 +1047,7 @@ void encode_slice_header(encoder_control * const encoder)
 
   if (cur_pic->slicetype != SLICE_I) {
       WRITE_U(stream, 1, 1, "num_ref_idx_active_override_flag");
-        WRITE_UE(stream, encoder->ref->used_size-1, "num_ref_idx_l0_active_minus1");
+        WRITE_UE(stream, encoder_state->ref->used_size-1, "num_ref_idx_l0_active_minus1");
       WRITE_UE(stream, 5-MRG_MAX_NUM_CANDS, "five_minus_max_num_merge_cand");
   }
 
@@ -1064,10 +1062,10 @@ void encode_slice_header(encoder_control * const encoder)
 }
 
 
-static void encode_sao_color(const encoder_control * const encoder, cabac_data *cabac, sao_info *sao,
+static void encode_sao_color(const encoder_state * const encoder_state, cabac_data *cabac, sao_info *sao,
                              color_index color_i)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   sao_eo_cat i;
 
   // Skip colors with no SAO.
@@ -1131,7 +1129,7 @@ static void encode_sao_merge_flags(sao_info *sao, cabac_data *cabac,
 /**
  * \brief Encode SAO information.
  */
-static void encode_sao(encoder_control * const encoder,
+static void encode_sao(encoder_state * const encoder_state,
                        cabac_data *cabac,
                        unsigned x_lcu, uint16_t y_lcu,
                        sao_info *sao_luma, sao_info *sao_chroma)
@@ -1141,17 +1139,17 @@ static void encode_sao(encoder_control * const encoder,
 
   // If SAO is merged, nothing else needs to be coded.
   if (!sao_luma->merge_left_flag && !sao_luma->merge_up_flag) {
-    encode_sao_color(encoder, cabac, sao_luma, COLOR_Y);
-    encode_sao_color(encoder, cabac, sao_chroma, COLOR_U);
-    encode_sao_color(encoder, cabac, sao_chroma, COLOR_V);
+    encode_sao_color(encoder_state, cabac, sao_luma, COLOR_Y);
+    encode_sao_color(encoder_state, cabac, sao_chroma, COLOR_U);
+    encode_sao_color(encoder_state, cabac, sao_chroma, COLOR_V);
   }
 }
 
 
-void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac,
+void encode_coding_tree(encoder_state * const encoder_state, cabac_data *cabac,
                         uint16_t x_ctb, uint16_t y_ctb, uint8_t depth)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x_ctb + y_ctb * (cur_pic->width_in_lcu << MAX_DEPTH)];
   uint8_t split_flag = GET_SPLITDATA(cur_cu, depth);
   uint8_t split_model = 0;
@@ -1184,17 +1182,17 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
     if (split_flag || border) {
       // Split blocks and remember to change x and y block positions
       uint8_t change = 1<<(MAX_DEPTH-1-depth);
-      encode_coding_tree(encoder, cabac, x_ctb, y_ctb, depth + 1); // x,y
+      encode_coding_tree(encoder_state, cabac, x_ctb, y_ctb, depth + 1); // x,y
 
       // TODO: fix when other half of the block would not be completely over the border
       if (!border_x || border_split_x) {
-        encode_coding_tree(encoder, cabac, x_ctb + change, y_ctb, depth + 1);
+        encode_coding_tree(encoder_state, cabac, x_ctb + change, y_ctb, depth + 1);
       }
       if (!border_y || border_split_y) {
-        encode_coding_tree(encoder, cabac, x_ctb, y_ctb + change, depth + 1);
+        encode_coding_tree(encoder_state, cabac, x_ctb, y_ctb + change, depth + 1);
       }
       if (!border || (border_split_x && border_split_y)) {
-        encode_coding_tree(encoder, cabac, x_ctb + change, y_ctb + change, depth + 1);
+        encode_coding_tree(encoder_state, cabac, x_ctb + change, y_ctb + change, depth + 1);
       }
       return;
     }
@@ -1307,10 +1305,10 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
       */
 
       for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) {
-            //if(encoder->ref_idx_num[uiRefListIdx] > 0)
+            //if(encoder_state->ref_idx_num[uiRefListIdx] > 0)
             {
           if (cur_cu->inter.mv_dir & (1 << ref_list_idx)) {
-            if (encoder->ref->used_size != 1) { //encoder->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1)
+            if (encoder_state->ref->used_size != 1) { //encoder_state->ref_idx_num[uiRefListIdx] != 1)//NumRefIdx != 1)
               // parseRefFrmIdx
               int32_t ref_frame = cur_cu->inter.mv_ref;
 
@@ -1319,7 +1317,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
 
               if (ref_frame > 0) {
                 int32_t i;
-                int32_t ref_num = encoder->ref->used_size - 2;
+                int32_t ref_num = encoder_state->ref->used_size - 2;
 
                 cabac->ctx = &(cabac->ctx_cu_ref_pic_model[1]);
                 ref_frame--;
@@ -1337,7 +1335,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
               }
             }
 
-            if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) {
+            if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder_state->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) {
               const int32_t mvd_hor = cur_cu->inter.mvd[0];
               const int32_t mvd_ver = cur_cu->inter.mvd[1];
               const int8_t hor_abs_gr0 = mvd_hor != 0;
@@ -1394,7 +1392,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
     // Code (possible) coeffs to bitstream
 
     if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) {
-      encode_transform_coeff(encoder, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
+      encode_transform_coeff(encoder_state, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
     }
 
 
@@ -1516,7 +1514,7 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
       }
     }  // end intra chroma pred mode coding
 
-    encode_transform_coeff(encoder, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
+    encode_transform_coeff(encoder_state, cabac, x_ctb * 2, y_ctb * 2, depth, 0, 0, 0);
   }
 
     #if ENABLE_PCM == 1
@@ -1567,11 +1565,12 @@ void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac
   /* end coding_unit */
 }
 
-static void transform_chroma(const encoder_control * const encoder, cabac_data *cabac, cu_info *cur_cu,
+static void transform_chroma(encoder_state * const encoder_state, cabac_data *cabac, cu_info *cur_cu,
                              int depth, pixel *base_u, pixel *pred_u,
                              coefficient *coeff_u, int8_t scan_idx_chroma,
                              coefficient *pre_quant_coeff, coefficient *block)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
   int base_stride = LCU_WIDTH;
   int pred_stride = LCU_WIDTH;
 
@@ -1592,15 +1591,15 @@ static void transform_chroma(const encoder_control * const encoder, cabac_data *
 
   transform2d(encoder, block, pre_quant_coeff, width_c, 65535);
   if (encoder->rdoq_enable) {
-    rdoq(encoder, cabac, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2,
+    rdoq(encoder_state, cabac, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2,
          scan_idx_chroma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth);
   } else {
-    quant(encoder, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2,
+    quant(encoder_state, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2,
           scan_idx_chroma, cur_cu->type);
   }
 }
 
-static void reconstruct_chroma(const encoder_control * const encoder, cu_info *cur_cu,
+static void reconstruct_chroma(const encoder_state * const encoder_state, cu_info *cur_cu,
                                int depth, int has_coeffs, coefficient *coeff_u,
                                pixel *recbase_u, pixel *pred_u, int color_type,
                                coefficient *pre_quant_coeff, coefficient *block)
@@ -1613,8 +1612,8 @@ static void reconstruct_chroma(const encoder_control * const encoder, cu_info *c
 
   if (has_coeffs) {
     // RECONSTRUCT for predictions
-    dequant(encoder, coeff_u, pre_quant_coeff, width_c, width_c, (int8_t)color_type, cur_cu->type);
-    itransform2d(encoder, block, pre_quant_coeff, width_c, 65535);
+    dequant(encoder_state, coeff_u, pre_quant_coeff, width_c, width_c, (int8_t)color_type, cur_cu->type);
+    itransform2d(encoder_state->encoder_control, block, pre_quant_coeff, width_c, 65535);
 
     i = 0;
 
@@ -1637,8 +1636,9 @@ static void reconstruct_chroma(const encoder_control * const encoder, cu_info *c
   }
 }
 
-void encode_transform_tree(const encoder_control * const encoder, cabac_data* cabac, int32_t x, int32_t y, const uint8_t depth, lcu_t* lcu)
+void encode_transform_tree(encoder_state * const encoder_state, cabac_data* cabac, int32_t x, int32_t y, const uint8_t depth, lcu_t* lcu)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
   // we have 64>>depth transform size
   int x_local = (x&0x3f), y_local = (y&0x3f);
   cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
@@ -1654,10 +1654,10 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
   // Split transform and increase depth
   if (depth == 0 || cur_cu->tr_depth > depth) {
     int offset = width_c;
-    encode_transform_tree(encoder, cabac, x,          y,          depth+1, lcu);
-    encode_transform_tree(encoder, cabac, x + offset, y,          depth+1, lcu);
-    encode_transform_tree(encoder, cabac, x,          y + offset, depth+1, lcu);
-    encode_transform_tree(encoder, cabac, x + offset, y + offset, depth+1, lcu);
+    encode_transform_tree(encoder_state, cabac, x,          y,          depth+1, lcu);
+    encode_transform_tree(encoder_state, cabac, x + offset, y,          depth+1, lcu);
+    encode_transform_tree(encoder_state, cabac, x,          y + offset, depth+1, lcu);
+    encode_transform_tree(encoder_state, cabac, x + offset, y + offset, depth+1, lcu);
 
     // Derive coded coeff flags from the next depth
     if (depth == MAX_DEPTH) {
@@ -1796,20 +1796,20 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
       // Test for transform skip
       transformskip(encoder, block,pre_quant_coeff,width);
       if (encoder->rdoq_enable) {
-        rdoq(encoder, cabac, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0);
+        rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0);
       } else {
-        quant(encoder, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type);
+        quant(encoder_state, pre_quant_coeff, temp_coeff, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type);
       }
-      dequant(encoder, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type);
+      dequant(encoder_state, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type);
       itransformskip(encoder, temp_block,pre_quant_coeff,width);
 
       transform2d(encoder, block,pre_quant_coeff,width,0);
       if (encoder->rdoq_enable) {
-        rdoq(encoder, cabac, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0);
+        rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type,0);
       } else {
-        quant(encoder, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type);
+        quant(encoder_state, pre_quant_coeff, temp_coeff2, 4, 4, &ac_sum, 0, scan_idx_luma, cur_cu->type);
       }
-      dequant(encoder, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type);
+      dequant(encoder_state, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type);
       itransform2d(encoder, temp_block2,pre_quant_coeff,width,0);
 
       // SSD between original and reconstructed
@@ -1828,15 +1828,15 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
           coeffcost += abs((int)temp_coeff[i]);
           coeffcost2 += abs((int)temp_coeff2[i]);
         }
-        cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder->cur_lambda_cost+0.5);
-        cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder->cur_lambda_cost+0.5);
+        cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5);
+        cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder_state->cur_lambda_cost+0.5);
         // Full RDO
       } else if(encoder->rdo == 2) {
         coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, 4, 0, scan_idx_luma);
         coeffcost2 = get_coeff_cost(encoder, cabac, temp_coeff2, 4, 0, scan_idx_luma);
 
-        cost  += coeffcost*((int)encoder->cur_lambda_cost+0.5);
-        cost2 += coeffcost2*((int)encoder->cur_lambda_cost+0.5);
+        cost  += coeffcost*((int)encoder_state->cur_lambda_cost+0.5);
+        cost2 += coeffcost2*((int)encoder_state->cur_lambda_cost+0.5);
       }
 
       cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip = (cost < cost2);
@@ -1850,10 +1850,10 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
     }
 
     if (encoder->rdoq_enable) {
-      rdoq(encoder, cabac, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0,
+      rdoq(encoder_state, cabac, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0,
            scan_idx_luma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth);
     } else {
-      quant(encoder, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type);
+      quant(encoder_state, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type);
     }
 
     // Check for non-zero coeffs
@@ -1891,7 +1891,7 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
         }
       }
 
-      dequant(encoder, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type);
+      dequant(encoder_state, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type);
       if(width == 4 && cur_cu->intra[PU_INDEX(x_pu, y_pu)].tr_skip) {
         itransformskip(encoder, block,pre_quant_coeff,width);
       } else {
@@ -1934,7 +1934,7 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
         }
       }
 
-      transform_chroma(encoder, cabac, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block);
+      transform_chroma(encoder_state, cabac, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block);
       for (i = 0; i < chroma_size; i++) {
         if (coeff_u[i] != 0) {
           int d;
@@ -1944,7 +1944,7 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
           break;
         }
       }
-      transform_chroma(encoder, cabac, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block);
+      transform_chroma(encoder_state, cabac, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block);
       for (i = 0; i < chroma_size; i++) {
         if (coeff_v[i] != 0) {
           int d;
@@ -1967,11 +1967,11 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
         }
       }
 
-      reconstruct_chroma(encoder, cur_cu, chroma_depth,
+      reconstruct_chroma(encoder_state, cur_cu, chroma_depth,
                          cur_cu->coeff_top_u[depth],
                          coeff_u, recbase_u, pred_u, color_type_u,
                          pre_quant_coeff, block);
-      reconstruct_chroma(encoder, cur_cu, chroma_depth,
+      reconstruct_chroma(encoder_state, cur_cu, chroma_depth,
                          cur_cu->coeff_top_v[depth],
                          coeff_v, recbase_v, pred_v, color_type_v,
                          pre_quant_coeff, block);
@@ -1983,10 +1983,11 @@ void encode_transform_tree(const encoder_control * const encoder, cabac_data* ca
   // end Residual Coding
 }
 
-static void encode_transform_unit(const encoder_control * const encoder, cabac_data *cabac,
+static void encode_transform_unit(encoder_state * const encoder_state, cabac_data *cabac,
                                   int x_pu, int y_pu, int depth, int tr_depth)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  const picture * const cur_pic = encoder_state->cur_pic;
   uint8_t width = LCU_WIDTH >> depth;
   uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
 
@@ -2134,12 +2135,12 @@ static void encode_transform_unit(const encoder_control * const encoder, cabac_d
  * \param parent_coeff_u  What was signaled at previous level for cbf_cb.
  * \param parent_coeff_v  What was signlaed at previous level for cbf_cr.
  */
-void encode_transform_coeff(const encoder_control * const encoder, cabac_data *cabac, int32_t x_pu,int32_t y_pu,
+void encode_transform_coeff(encoder_state * const encoder_state, cabac_data *cabac, int32_t x_pu,int32_t y_pu,
                             int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v)
 {
   int32_t x_cu = x_pu / 2;
   int32_t y_cu = y_pu / 2;
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (cur_pic->width_in_lcu << MAX_DEPTH)];
 
   // NxN signifies implicit transform split at the first transform level.
@@ -2193,10 +2194,10 @@ void encode_transform_coeff(const encoder_control * const encoder, cabac_data *c
 
   if (split) {
     uint8_t pu_offset = 1 << (MAX_PU_DEPTH - (depth + 1));
-    encode_transform_coeff(encoder, cabac, x_pu, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
-    encode_transform_coeff(encoder, cabac, x_pu + pu_offset, y_pu,  depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
-    encode_transform_coeff(encoder, cabac, x_pu, y_pu + pu_offset,  depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
-    encode_transform_coeff(encoder, cabac, x_pu + pu_offset, y_pu + pu_offset,  depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
+    encode_transform_coeff(encoder_state, cabac, x_pu, y_pu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
+    encode_transform_coeff(encoder_state, cabac, x_pu + pu_offset, y_pu,  depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
+    encode_transform_coeff(encoder_state, cabac, x_pu, y_pu + pu_offset,  depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
+    encode_transform_coeff(encoder_state, cabac, x_pu + pu_offset, y_pu + pu_offset,  depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v);
     return;
   }
 
@@ -2211,7 +2212,7 @@ void encode_transform_coeff(const encoder_control * const encoder, cabac_data *c
   }
 
   if (cb_flag_y | cb_flag_u | cb_flag_v) {
-    encode_transform_unit(encoder, cabac, x_pu, y_pu, depth, tr_depth);
+    encode_transform_unit(encoder_state, cabac, x_pu, y_pu, depth, tr_depth);
   }
 }
 
diff --git a/src/encoder.h b/src/encoder.h
index 80d4deeb..f0b41634 100644
--- a/src/encoder.h
+++ b/src/encoder.h
@@ -44,33 +44,33 @@ typedef struct
 
 enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 };
 
-/* Input info struct */
-typedef struct
-{
-  FILE *file;
-  int32_t real_width;  /*!< \brief real input picture width */
-  int32_t real_height; /*!< \brief real input picture width */
-  picture *cur_pic;
-  int8_t video_format;
-  int8_t bitdepth;  /*!< \brief input bit depth (8,10) */
-} encoder_input;
-
 /* Encoder control options, the main struct */
 typedef struct
 {
-  int32_t frame;
-  int32_t poc; /*!< \brief picture order count */
+  /* Configuration */
   const config *cfg;
-  encoder_input in;
+  
+  /* Input */
+  struct {
+    FILE *file;
+    int32_t width;
+    int32_t height;
+    int32_t width_in_lcu;
+    int32_t height_in_lcu;
+    int32_t real_width;  /*!< \brief real input picture width */
+    int32_t real_height; /*!< \brief real input picture width */
+    int8_t video_format;
+    int8_t bitdepth;  /*!< \brief input bit depth (8,10) */
+  } in;
+  
+  /* Output */
+  struct {
+    FILE *file;
+  } out;
+  
   encoder_me me;
-  bitstream stream;
-  FILE *output;
-  picture_list *ref;
-  int8_t ref_list;
-  int8_t ref_idx_num[2];
-  int8_t QP;             // \brief Quantization parameter
+  
   int8_t bitdepth;
-  double cur_lambda_cost;
 
   /* Filtering */
   int8_t deblock_enable; // \brief Flag to enable deblocking filter
@@ -101,20 +101,48 @@ typedef struct
   scaling_list scaling_list;
 } encoder_control;
 
-void init_lambda(encoder_control *encoder);
-encoder_control *init_encoder_control(config *cfg);
-void init_encoder_input(encoder_input *input, FILE* inputfile,
-                        int32_t width, int32_t height);
-void encode_one_frame(encoder_control *encoder);
-int read_one_frame(FILE *file, const encoder_control * const encoder);
+typedef struct encoder_state {
+  const encoder_control *encoder_control;
+  
+  picture *cur_pic;
+  int32_t frame;
+  int32_t poc; /*!< \brief picture order count */
+  
+  bitstream stream;
+  
+  picture_list *ref;
+  int8_t ref_list;
+  int8_t ref_idx_num[2];
+  int8_t QP;             // \brief Quantization parameter
+  
+  double cur_lambda_cost;
+  
+  cabac_data cabac;
+  
+  struct encoder_state *children;
+} encoder_state;
 
-void encode_seq_parameter_set(encoder_control * const encoder);
-void encode_pic_parameter_set(encoder_control * const encoder);
-void encode_vid_parameter_set(encoder_control * const encoder);
-void encode_slice_header(encoder_control * const encoder);
-void encode_access_unit_delimiter(encoder_control * const encoder);
-void encode_prefix_sei_version(encoder_control * const encoder);
-void encode_coding_tree(const encoder_control * const encoder, cabac_data *cabac, uint16_t x_ctb,
+int encoder_control_init(encoder_control *encoder, const config *cfg);
+int encoder_control_finalize(encoder_control *encoder);
+
+void encoder_control_input_init(encoder_control *encoder, FILE *inputfile, int32_t width, int32_t height);
+
+int encoder_state_init(encoder_state *encoder_state, const encoder_control * encoder);
+int encoder_state_finalize(encoder_state *encoder_state);
+void encoder_state_init_lambda(encoder_state *encoder_state);
+
+void init_encoder_input(encoder_control *encoder, FILE* inputfile,
+                        int32_t width, int32_t height);
+void encode_one_frame(encoder_state *encoder_state);
+int read_one_frame(FILE* file, const encoder_state *encoder);
+
+void encode_seq_parameter_set(encoder_state *encoder);
+void encode_pic_parameter_set(encoder_state *encoder);
+void encode_vid_parameter_set(encoder_state *encoder);
+void encode_slice_header(encoder_state * encoder);
+void encode_access_unit_delimiter(encoder_state *encoder);
+void encode_prefix_sei_version(encoder_state *encoder);
+void encode_coding_tree(encoder_state *encoder, cabac_data *cabac, uint16_t x_ctb,
                         uint16_t y_ctb, uint8_t depth);
 
 void encode_last_significant_xy(cabac_data *cabac,
@@ -123,8 +151,8 @@ void encode_last_significant_xy(cabac_data *cabac,
                                 uint8_t type, uint8_t scan);
 void encode_coeff_nxn(const encoder_control * const encoder, cabac_data *cabac, int16_t *coeff, uint8_t width,
                       uint8_t type, int8_t scan_mode, int8_t tr_skip);
-void encode_transform_tree(const encoder_control * const encoder, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu );
-void encode_transform_coeff(const encoder_control * const encoder, cabac_data *cabac, int32_t x_cu, int32_t y_cu,
+void encode_transform_tree(encoder_state *encoder_state, cabac_data* cabac, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu );
+void encode_transform_coeff(encoder_state *encoder_state, cabac_data *cabac, int32_t x_cu, int32_t y_cu,
                             int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v);
 void encode_block_residual(const encoder_control * const encoder,
                            uint16_t x_ctb, uint16_t y_ctb, uint8_t depth);
diff --git a/src/filter.c b/src/filter.c
index 03f03a53..b82ca6df 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -163,11 +163,13 @@ INLINE void filter_deblock_chroma(const encoder_control * const encoder, pixel *
 /**
  * \brief
  */
-void filter_deblock_edge_luma(const encoder_control * const encoder,
+void filter_deblock_edge_luma(encoder_state * const encoder_state,
                               int32_t xpos, int32_t ypos,
                               int8_t depth, int8_t dir)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  
   cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)];
 
   {
@@ -192,7 +194,7 @@ void filter_deblock_edge_luma(const encoder_control * const encoder,
     int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
     int8_t strength = 0;
 
-    int32_t qp              = encoder->QP;
+    int32_t qp              = encoder_state->QP;
     int32_t bitdepth_scale  = 1 << (encoder->bitdepth - 8);
     int32_t b_index         = CLIP(0, 51, qp + (beta_offset_div2 << 1));
     int32_t beta            = g_beta_table_8x8[b_index] * bitdepth_scale;
@@ -288,11 +290,12 @@ void filter_deblock_edge_luma(const encoder_control * const encoder,
 /**
  * \brief
  */
-void filter_deblock_edge_chroma(const encoder_control * const encoder,
+void filter_deblock_edge_chroma(encoder_state * const encoder_state,
                                 int32_t x, int32_t y,
                                 int8_t depth, int8_t dir)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const encoder_control * const encoder = encoder_state->encoder_control;
+  const picture * const cur_pic = encoder_state->cur_pic;
   cu_info *cu_q = &cur_pic->cu_array[MAX_DEPTH][(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)];
 
   // Chroma edges that do not lay on a 8x8 grid are not deblocked.
@@ -324,7 +327,7 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder,
     int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
     int8_t strength = 2;
 
-    int32_t QP             = g_chroma_scale[encoder->QP];
+    int32_t QP             = g_chroma_scale[encoder_state->QP];
     int32_t bitdepth_scale = 1 << (encoder->bitdepth-8);
     int32_t TC_index       = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
     int32_t Tc             = g_tc_table_8x8[TC_index]*bitdepth_scale;
@@ -384,9 +387,9 @@ void filter_deblock_edge_chroma(const encoder_control * const encoder,
  * until the coded block size has been achived. Calls luma and chroma filtering
  * functions for each coded CU size.
  */
-void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int32_t edge)
+void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   cu_info *cur_cu = &cur_pic->cu_array[MAX_DEPTH][x + y*(cur_pic->width_in_lcu << MAX_DEPTH)];
   uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
   uint8_t border_x = (cur_pic->width  < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
@@ -404,15 +407,15 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
     // Tell clang-analyzer that everything is ok.
     assert(depth >= 0 && depth < MAX_DEPTH);
 
-    filter_deblock_cu(encoder, x, y, depth + 1, edge);
+    filter_deblock_cu(encoder_state, x, y, depth + 1, edge);
     if(!border_x || border_split_x) {
-      filter_deblock_cu(encoder, x + change, y, depth + 1, edge);
+      filter_deblock_cu(encoder_state, x + change, y, depth + 1, edge);
     }
     if(!border_y || border_split_y) {
-      filter_deblock_cu(encoder, x , y + change, depth + 1, edge);
+      filter_deblock_cu(encoder_state, x , y + change, depth + 1, edge);
     }
     if((!border_x && !border_y) || (border_split_x && border_split_y)) {
-      filter_deblock_cu(encoder, x + change, y + change, depth + 1, edge);
+      filter_deblock_cu(encoder_state, x + change, y + change, depth + 1, edge);
     }
     return;
   }
@@ -421,8 +424,8 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
   if ((x == 0 && edge == EDGE_VER) || (y == 0 && edge == EDGE_HOR)) return;
 
   // do the filtering for block edge
-  filter_deblock_edge_luma(encoder,   x*(LCU_WIDTH >> MAX_DEPTH),       y*(LCU_WIDTH >> MAX_DEPTH),       depth, edge);
-  filter_deblock_edge_chroma(encoder, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
+  filter_deblock_edge_luma(encoder_state,   x*(LCU_WIDTH >> MAX_DEPTH),       y*(LCU_WIDTH >> MAX_DEPTH),       depth, edge);
+  filter_deblock_edge_chroma(encoder_state, x*(LCU_WIDTH >> (MAX_DEPTH + 1)), y*(LCU_WIDTH >> (MAX_DEPTH + 1)), depth, edge);
 }
 
 /**
@@ -433,9 +436,9 @@ void filter_deblock_cu(const encoder_control * const encoder, int32_t x, int32_t
  * the Largest Coding Units (LCU) and call filter_deblock_cu with absolute
  * X and Y coordinates of the LCU.
  */
-void filter_deblock(const encoder_control * const encoder)
+void filter_deblock(encoder_state * const encoder_state)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   int16_t x, y;
 
   // TODO: Optimization: add thread for each LCU
@@ -444,7 +447,7 @@ void filter_deblock(const encoder_control * const encoder)
   {
     for (x = 0; x < cur_pic->width_in_lcu; x++)
     {
-      filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER);
+      filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_VER);
     }
   }
 
@@ -453,7 +456,7 @@ void filter_deblock(const encoder_control * const encoder)
   {
     for (x = 0; x < cur_pic->width_in_lcu; x++)
     {
-      filter_deblock_cu(encoder, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR);
+      filter_deblock_cu(encoder_state, x << MAX_DEPTH, y << MAX_DEPTH, 0, EDGE_HOR);
     }
   }
 }
@@ -469,11 +472,11 @@ void filter_deblock(const encoder_control * const encoder)
  * - After vertical filtering the left edge, filter the last 4 pixels of
  *   horizontal edges in the LCU to the left.
  */
-void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px)
+void filter_deblock_lcu(encoder_state * const encoder_state, int x_px, int y_px)
 {
   const vector2d lcu = { x_px / LCU_WIDTH, y_px / LCU_WIDTH };
 
-  filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
+  filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_VER);
 
   // Filter rightmost 4 pixels from last LCU now that they have been
   // finally deblocked vertically.
@@ -481,15 +484,15 @@ void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_p
     int y;
     for (y = 0; y < 64; y += 8) {
       if (lcu.y + y == 0) continue;
-      filter_deblock_edge_luma(encoder, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
+      filter_deblock_edge_luma(encoder_state, lcu.x * 64 - 4, lcu.y * 64 + y, 4, EDGE_HOR);
     }
     for (y = 0; y < 32; y += 8) {
       if (lcu.y + y == 0) continue;
-      filter_deblock_edge_chroma(encoder, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
+      filter_deblock_edge_chroma(encoder_state, lcu.x * 32 - 4, lcu.y * 32 + y, 4, EDGE_HOR);
     }
   }
 
-  filter_deblock_cu(encoder, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
+  filter_deblock_cu(encoder_state, lcu.x << MAX_DEPTH, lcu.y << MAX_DEPTH, 0, EDGE_HOR);
 }
 
 
diff --git a/src/filter.h b/src/filter.h
index e7448c29..b7a51fa5 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -32,16 +32,16 @@
 //////////////////////////////////////////////////////////////////////////
 // FUNCTIONS
 // Deblocking
-void filter_deblock_cu(const encoder_control * const encoder, int32_t x_px, int32_t y_px,
+void filter_deblock_cu(encoder_state *encoder_state, int32_t x_px, int32_t y_px,
                        int8_t depth, int32_t edge);
-void filter_deblock_edge_luma(const encoder_control * const encoder,
+void filter_deblock_edge_luma(encoder_state *encoder_state,
                               int32_t x_pos, int32_t y_pos,
                               int8_t depth, int8_t dir);
-void filter_deblock_edge_chroma(const encoder_control * const encoder,
+void filter_deblock_edge_chroma(encoder_state *encoder_state,
                                 int32_t xpos, int32_t ypos,
                                 int8_t depth, int8_t dir);
-void filter_deblock(const encoder_control * const encoder);
-void filter_deblock_lcu(const encoder_control * const encoder, int x_px, int y_px);
+void filter_deblock(encoder_state *encoder_state);
+void filter_deblock_lcu(encoder_state *encoder_state, int x_px, int y_px);
 void filter_deblock_luma(const encoder_control * const encoder, pixel *src, int32_t offset, int32_t tc , int8_t sw,
                          int8_t part_p_nofilter, int8_t part_q_nofilter,
                          int32_t thr_cut,
diff --git a/src/inter.c b/src/inter.c
index 020d7e72..0b45cffe 100644
--- a/src/inter.c
+++ b/src/inter.c
@@ -322,7 +322,7 @@ void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_i
  * \param depth current block depth
  * \param mv_pred[2][2] 2x motion vector prediction
  */
-void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu)
+void inter_get_mv_cand(const encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu)
 {
   uint8_t candidates = 0;
   uint8_t b_candidates = 0;
@@ -332,8 +332,8 @@ void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t
   inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu);
 
  #define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6)
-#define APPLY_MV_SCALING(cu, cand) {int td = encoder->poc - encoder->ref->pics[(cu)->inter.mv_ref]->poc;\
-                                   int tb = encoder->poc - encoder->ref->pics[cur_cu->inter.mv_ref]->poc;\
+#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->poc - encoder_state->ref->pics[(cu)->inter.mv_ref]->poc;\
+                                   int tb = encoder_state->poc - encoder_state->ref->pics[cur_cu->inter.mv_ref]->poc;\
                                    if (td != tb) { \
                                       int scale = CALCULATE_SCALE(cu,tb,td); \
                                        mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \
diff --git a/src/inter.h b/src/inter.h
index 5020cb38..8613f8b9 100644
--- a/src/inter.h
+++ b/src/inter.h
@@ -35,6 +35,6 @@ void inter_recon_lcu(const encoder_control *encoder, picture* ref,int32_t xpos,
 
 void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_info **b0, cu_info **b1,
                                         cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
-void inter_get_mv_cand(const encoder_control * const encoder, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
+void inter_get_mv_cand(const encoder_state *encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
 uint8_t inter_get_merge_cand(int32_t x, int32_t y, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][3], lcu_t *lcu);
 #endif
diff --git a/src/intra.c b/src/intra.c
index ab08ed93..72ca69d9 100644
--- a/src/intra.c
+++ b/src/intra.c
@@ -327,7 +327,7 @@ static void intra_get_pred(const encoder_control * const encoder, pixel *rec[2],
  * \param sad_out sad value of best mode
  * \returns best intra mode
 */
-int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
+int16_t intra_prediction(const encoder_state * const encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
                          uint8_t width, uint32_t *sad_out,
                          int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac)
 {
@@ -336,6 +336,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
   int16_t best_mode = 1;
   uint32_t best_bitcost = 0;
   int16_t mode;
+  int8_t rdo = encoder_state->encoder_control->rdo;
 
   // Check 8 modes for 4x4 and 8x8, 3 for others
   int8_t   rdo_modes_to_check = (width == 4 || width == 8)? 8 : 3;
@@ -371,12 +372,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
   // Try all modes and select the best one.
   for (mode = 0; mode < 35; mode++) {
     uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds);
-    intra_get_pred(encoder, ref, recstride, pred, width, mode, 0);
+    intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0);
 
     sad = cost_func(pred, orig_block);
-    sad += mode_cost * (int)(encoder->cur_lambda_cost + 0.5);
+    sad += mode_cost * (int)(encoder_state->cur_lambda_cost + 0.5);
     // When rdo == 2, store best costs to an array and do full RDO later
-    if(encoder->rdo == 2) {
+    if(rdo == 2) {
       int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad);
       if(rdo_mode != -1) {
         rdo_modes[rdo_mode] = mode; rdo_costs[rdo_mode] = sad;
@@ -390,7 +391,7 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
   }
 
   // Select from three best modes if using RDO
-  if(encoder->rdo == 2) {
+  if(rdo == 2) {
     int rdo_mode;
     int pred_mode;
     // Check that the predicted modes are in the RDO mode list
@@ -413,12 +414,12 @@ int16_t intra_prediction(const encoder_control * const encoder, pixel *orig, int
     for(rdo_mode = 0; rdo_mode < rdo_modes_to_check; rdo_mode ++) {
       int rdo_bitcost;
       // The reconstruction is calculated again here, it could be saved from before..
-      intra_recon(encoder, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0);
-      rdo_costs[rdo_mode] = rdo_cost_intra(encoder,pred,orig_block,width,cabac,rdo_modes[rdo_mode]);
+      intra_recon(encoder_state->encoder_control, rec, recstride, width, pred, width, rdo_modes[rdo_mode], 0);
+      rdo_costs[rdo_mode] = rdo_cost_intra(encoder_state,pred,orig_block,width,cabac,rdo_modes[rdo_mode]);
       // Bitcost also calculated again for this mode
       rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds);
       // Add bitcost * lambda
-      rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder->cur_lambda_cost + 0.5);
+      rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->cur_lambda_cost + 0.5);
 
       if(rdo_costs[rdo_mode] < best_sad) {
         best_sad = rdo_costs[rdo_mode];
@@ -832,8 +833,9 @@ void intra_get_planar_pred(pixel* src, int32_t srcstride, uint32_t width, pixel*
   }
 }
 
-void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
+void intra_recon_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
   int x_local = (x&0x3f), y_local = (y&0x3f);
   cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
 
@@ -890,5 +892,5 @@ void intra_recon_lcu(const encoder_control * const encoder, cabac_data *cabac, i
                             rec_stride, width, width);
   }
 
-  encode_transform_tree(encoder, cabac, x, y, depth, lcu);
+  encode_transform_tree(encoder_state, cabac, x, y, depth, lcu);
 }
\ No newline at end of file
diff --git a/src/intra.h b/src/intra.h
index 1a0378c5..43537c36 100644
--- a/src/intra.h
+++ b/src/intra.h
@@ -39,7 +39,7 @@ void intra_build_reference_border(const encoder_control *encoder, int32_t x_luma
 void intra_filter(pixel* ref, int32_t stride, int32_t width, int8_t mode);
 
 /* Predictions */
-int16_t intra_prediction(const encoder_control *encoder, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
+int16_t intra_prediction(const encoder_state *encoder_state, pixel *orig, int32_t origstride, pixel *rec, int16_t recstride,
                          uint8_t width, uint32_t *sad_out,
                          int8_t *intra_preds, uint32_t *bitcost_out, cabac_data *cabac);
 
@@ -49,6 +49,6 @@ void intra_get_angular_pred(const encoder_control *encoder, pixel* src, int32_t
 
 void intra_recon(const encoder_control *encoder, pixel* rec, int32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma);
 
-void intra_recon_lcu(const encoder_control *encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
+void intra_recon_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);
 
 #endif
diff --git a/src/rdo.c b/src/rdo.c
index 107db134..4115f379 100644
--- a/src/rdo.c
+++ b/src/rdo.c
@@ -63,8 +63,9 @@ const uint32_t entropy_bits[128] =
 
  ** Only for luma
  */
-uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode)
+uint32_t rdo_cost_intra(const encoder_state * const encoder_state, pixel *pred, pixel *orig_block, int width, cabac_data *cabac, int8_t mode)
 {
+    const encoder_control * const encoder = encoder_state->encoder_control;
     coefficient pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2];
     int16_t block[LCU_WIDTH*LCU_WIDTH>>2];
     int16_t temp_block[LCU_WIDTH*LCU_WIDTH>>2];
@@ -92,11 +93,11 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe
     }
     transform2d(encoder, block,pre_quant_coeff,width,0);
     if(encoder->rdoq_enable) {
-      rdoq(encoder, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0);
+      rdoq(encoder_state, cabac, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA,0);
     } else {
-      quant(encoder, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA);
+      quant(encoder_state, pre_quant_coeff, temp_coeff, width, width, &ac_sum, 0, luma_scan_mode, CU_INTRA);
     }
-    dequant(encoder, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA);
+    dequant(encoder_state, temp_coeff, pre_quant_coeff, width, width, 0, CU_INTRA);
     itransform2d(encoder, temp_block,pre_quant_coeff,width,0);
 
     // SSD between original and reconstructed
@@ -111,12 +112,12 @@ uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel *pred, pixe
       for (i = 0; i < width*width; i++) {
         coeffcost += abs((int)temp_coeff[i]);
       }
-      cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder->cur_lambda_cost+0.5);
+      cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5);
       // Full RDO
     } else if(encoder->rdo == 2) {
       coeffcost = get_coeff_cost(encoder, cabac, temp_coeff, width, 0, luma_scan_mode);
 
-      cost  += coeffcost*((int)encoder->cur_lambda_cost+0.5);
+      cost  += coeffcost*((int)encoder_state->cur_lambda_cost+0.5);
     }
     return cost;
 }
@@ -284,7 +285,7 @@ int32_t get_ic_rate( cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one
  * This method calculates the best quantized transform level for a given scan position.
  * From HM 12.0
  */
-uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
+uint32_t get_coded_level ( const encoder_state * const encoder_state, cabac_data *cabac, double *coded_cost, double *coded_cost0, double *coded_cost_sig,
                            int32_t level_double, uint32_t max_abs_level,
                            uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                            uint16_t abs_go_rice,
@@ -298,7 +299,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
   cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma);
 
   if( !last && max_abs_level < 3 ) {
-    *coded_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
+    *coded_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
     *coded_cost     = *coded_cost0 + *coded_cost_sig;
     if (max_abs_level == 0) return best_abs_level;
   } else {
@@ -306,13 +307,13 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
   }
 
   if( !last ) {
-    cur_cost_sig = encoder->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
+    cur_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
   }
 
   min_abs_level    = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
   for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
     double err       = (double)(level_double - ( abs_level << q_bits ) );
-    double cur_cost  = err * err * temp + encoder->cur_lambda_cost *
+    double cur_cost  = err * err * temp + encoder_state->cur_lambda_cost *
                        get_ic_rate_cost( cabac, abs_level, ctx_num_one, ctx_num_abs,
                                          abs_go_rice, c1_idx, c2_idx, type);
     cur_cost        += cur_cost_sig;
@@ -336,7 +337,7 @@ uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *ca
  *
  * From HM 12.0
 */
-static double get_rate_last(const encoder_control * const encoder,
+static double get_rate_last(const encoder_state * const encoder_state,
                             const uint32_t  pos_x, const uint32_t pos_y,
                             int32_t* last_x_bits, int32_t* last_y_bits)
 {
@@ -349,7 +350,7 @@ static double get_rate_last(const encoder_control * const encoder,
   if( ctx_y > 3 ) {
     uiCost += 32768.0 * ((ctx_y-2)>>1);
   }
-  return encoder->cur_lambda_cost*uiCost;
+  return encoder_state->cur_lambda_cost*uiCost;
 }
 
 static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int8_t type,
@@ -388,9 +389,10 @@ static void calc_last_bits(cabac_data *cabac, int32_t width, int32_t height, int
  * coding engines using probability models like CABAC
  * From HM 12.0
  */
-void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
+void  rdoq(const encoder_state * const encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
            int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
   uint32_t log2_tr_size    = g_convert_to_bit[ width ] + 2;
   int32_t  transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size;  // Represents scaling through forward transform
   uint16_t go_rice_param   = 0;
@@ -398,7 +400,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
   uint32_t max_num_coeff   = width * height;
   int32_t  scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
 
-  int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
 
   {
   int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
@@ -511,7 +513,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
         uint16_t  abs_ctx = ctx_set + c2;
 
         if( scanpos == last_scanpos ) {
-          level            = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
+          level            = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
                                                level_double, max_abs_level, 0, one_ctx, abs_ctx, go_rice_param,
                                                c1_idx, c2_idx, q_bits, temp, 1, type );
         } else {
@@ -519,7 +521,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
           uint32_t  pos_x    = blkpos - ( pos_y << log2_block_size );
           uint16_t  ctx_sig  = (uint16_t)context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y,
                                                        log2_block_size, type);
-          level              = get_coded_level(encoder, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
+          level              = get_coded_level(encoder_state, cabac, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
                                                level_double, max_abs_level, ctx_sig, one_ctx, abs_ctx, go_rice_param,
                                                c1_idx, c2_idx, q_bits, temp, 0, type );
           sig_rate_delta[ blkpos ] = CTX_ENTROPY_BITS(&baseCtx[ctx_sig],1) - CTX_ENTROPY_BITS(&baseCtx[ctx_sig],0);
@@ -587,7 +589,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
         if (sig_coeffgroup_flag[ cg_blkpos ] == 0) {
           uint32_t ctx_sig  = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
                                                           cg_pos_y, width);
-          cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+          cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
           base_cost += cost_coeffgroup_sig[ cg_scanpos ]  - rd_stats.sig_cost;
         } else {
           if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below.
@@ -604,9 +606,9 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
             ctx_sig  = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
                                                             cg_pos_y, width);
             if (cg_scanpos < cg_last_scanpos) {
-              cost_coeffgroup_sig[cg_scanpos] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
+              cost_coeffgroup_sig[cg_scanpos] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
               base_cost    += cost_coeffgroup_sig[cg_scanpos];
-              cost_zero_cg += encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+              cost_zero_cg += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
             }
 
             // try to convert the current coeff group from non-zero to all-zero
@@ -620,7 +622,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
               sig_coeffgroup_flag[ cg_blkpos ] = 0;
               base_cost = cost_zero_cg;
               if (cg_scanpos < cg_last_scanpos) {
-                cost_coeffgroup_sig[ cg_scanpos ] = encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
+                cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
               }
               // reset coeffs to 0 in this block
               for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) {
@@ -648,13 +650,13 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
 
 
   if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
-    best_cost  = block_uncoded_cost +   encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
-    base_cost +=   encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
+    best_cost  = block_uncoded_cost +   encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
+    base_cost +=   encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
   } else {
     cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma);
     ctx_cbf   = ( type ? tr_depth : !tr_depth);
-    best_cost  = block_uncoded_cost +  encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
-    base_cost +=   encoder->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
+    best_cost  = block_uncoded_cost +  encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
+    base_cost +=   encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
   }
 
   for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
@@ -672,7 +674,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
           uint32_t   pos_y       = blkpos >> log2_block_size;
           uint32_t   pos_x       = blkpos - ( pos_y << log2_block_size );
 
-          double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder, pos_x, pos_y, last_x_bits,last_y_bits );
+          double cost_last = (scan_mode == SCAN_VER) ? get_rate_last(encoder_state, pos_y, pos_x,last_x_bits,last_y_bits) : get_rate_last(encoder_state, pos_x, pos_y, last_x_bits,last_y_bits );
           double totalCost = base_cost + cost_last - cost_sig[ scanpos ];
 
           if( totalCost < best_cost ) {
@@ -708,7 +710,7 @@ void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient
   if(*abs_sum >= 2) {
     int64_t rd_factor = (int64_t) (
                      g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6)))
-                   /  encoder->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
+                   /  encoder_state->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
                    + 0.5);
     int32_t lastCG = -1;
     int32_t absSum = 0;
diff --git a/src/rdo.h b/src/rdo.h
index d89a9f81..1392a9d8 100644
--- a/src/rdo.h
+++ b/src/rdo.h
@@ -42,10 +42,10 @@ extern const uint32_t g_go_rice_range[5];
 extern const uint32_t g_go_rice_prefix_len[5];
 
 
-void  rdoq(const encoder_control * const encoder, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
+void  rdoq(const encoder_state *encoder_state, cabac_data *cabac, coefficient *coef, coefficient *dest_coeff, int32_t width,
            int32_t height, uint32_t *abs_sum, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth);
 
-uint32_t rdo_cost_intra(const encoder_control * const encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode);
+uint32_t rdo_cost_intra(const encoder_state *encoder, pixel* pred, pixel* orig_block, int width, cabac_data* cabac, int8_t mode);
 
 int32_t get_coeff_cost(const encoder_control * const encoder, cabac_data *cabac, coefficient *coeff, int32_t width, int32_t type, int8_t scan_mode);
 
@@ -53,7 +53,7 @@ int32_t get_ic_rate(cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one,
                      uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
 double get_ic_rate_cost  (cabac_data *cabac, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                           uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type);
-uint32_t get_coded_level ( const encoder_control * const encoder, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
+uint32_t get_coded_level ( const encoder_state * encoder_state, cabac_data *cabac, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
                            int32_t level_double, uint32_t max_abs_level,
                            uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs,
                            uint16_t abs_go_rice,
diff --git a/src/sao.c b/src/sao.c
index b75b9e83..aff4cf6f 100644
--- a/src/sao.c
+++ b/src/sao.c
@@ -54,12 +54,12 @@ static int sao_calc_eo_cat(pixel a, pixel b, pixel c)
 }
 
 
-int sao_band_ddistortion(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data,
+int sao_band_ddistortion(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data,
                          int block_width, int block_height,
                          int band_pos, int sao_bands[4])
 {
   int y, x;
-  int shift = encoder->bitdepth-5;
+  int shift = encoder_state->encoder_control->bitdepth-5;
   int sum = 0;
 
   for (y = 0; y < block_height; ++y) {
@@ -345,12 +345,12 @@ static int calc_sao_band_offsets(int sao_bands[2][32], int offsets[4],
  * \param rec_data  Reconstructed pixel data. 64x64 for luma, 32x32 for chroma.
  * \param sao_bands an array of bands for original and reconstructed block
  */
-static void calc_sao_bands(const encoder_control * const encoder, const pixel *orig_data, const pixel *rec_data,
+static void calc_sao_bands(const encoder_state * const encoder_state, const pixel *orig_data, const pixel *rec_data,
                            int block_width, int block_height,
                            int sao_bands[2][32])
 {
   int y, x;
-  int shift = encoder->bitdepth-5;
+  int shift = encoder_state->encoder_control->bitdepth-5;
 
   //Loop pixels and take top 5 bits to classify different bands
   for (y = 0; y < block_height; ++y) {
@@ -608,7 +608,7 @@ void sao_reconstruct(const encoder_control * const encoder, picture * pic, const
 
 
 
-static void sao_search_edge_sao(const encoder_control * const encoder, 
+static void sao_search_edge_sao(const encoder_state * const encoder_state, 
                                 const pixel * data[], const pixel * recdata[],
                                 int block_width, int block_height,
                                 unsigned buf_cnt,
@@ -669,7 +669,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder,
 
     {
       int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left);
-      sum_ddistortion += (int)((double)mode_bits*(encoder->cur_lambda_cost+0.5));
+      sum_ddistortion += (int)((double)mode_bits*(encoder_state->cur_lambda_cost+0.5));
     }
     // SAO is not applied for category 0.
     edge_offset[SAO_EO_CAT0] = 0;
@@ -684,7 +684,7 @@ static void sao_search_edge_sao(const encoder_control * const encoder,
 }
 
 
-static void sao_search_band_sao(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[],
+static void sao_search_band_sao(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[],
                                int block_width, int block_height,
                                unsigned buf_cnt,
                                sao_info *sao_out, sao_info *sao_top,
@@ -704,14 +704,14 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix
 
     memset(sao_bands, 0, 2 * 32 * sizeof(int));
     for (i = 0; i < buf_cnt; ++i) {
-      calc_sao_bands(encoder, data[i], recdata[i],block_width,
+      calc_sao_bands(encoder_state, data[i], recdata[i],block_width,
                      block_height,sao_bands);
     }
 
     ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position);
 
     temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left);
-    ddistortion += (int)((double)temp_rate*(encoder->cur_lambda_cost+0.5));
+    ddistortion += (int)((double)temp_rate*(encoder_state->cur_lambda_cost+0.5));
 
     // Select band sao over edge sao when distortion is lower
     if (ddistortion < sao_out->ddistortion) {
@@ -731,7 +731,7 @@ static void sao_search_band_sao(const encoder_control * const encoder, const pix
  * \param buf_cnt  Number of pointers data and recdata have.
  * \param sao_out  Output parameter for the best sao parameters.
  */
-static void sao_search_best_mode(const encoder_control * const encoder, const pixel * data[], const pixel * recdata[],
+static void sao_search_best_mode(const encoder_state * const encoder_state, const pixel * data[], const pixel * recdata[],
                                  int block_width, int block_height,
                                  unsigned buf_cnt,
                                  sao_info *sao_out, sao_info *sao_top,
@@ -740,12 +740,12 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
   sao_info edge_sao;
   sao_info band_sao;
 
-  sao_search_edge_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
-  sao_search_band_sao(encoder, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);
+  sao_search_edge_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &edge_sao, sao_top, sao_left);
+  sao_search_band_sao(encoder_state, data, recdata, block_width, block_height, buf_cnt, &band_sao, sao_top, sao_left);
 
   {
     int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left);
-    int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5);
+    int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
     unsigned buf_i;
     
     for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
@@ -759,11 +759,11 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
 
   {
     int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left);
-    int ddistortion = mode_bits * (int)(encoder->cur_lambda_cost + 0.5);
+    int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
     unsigned buf_i;
     
     for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
-      ddistortion += sao_band_ddistortion(encoder, data[buf_i], recdata[buf_i], 
+      ddistortion += sao_band_ddistortion(encoder_state, data[buf_i], recdata[buf_i], 
                                           block_width, block_height, 
                                           band_sao.band_position, &band_sao.offsets[1]);
     }
@@ -780,7 +780,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
   // Choose between SAO and doing nothing, taking into account the
   // rate-distortion cost of coding do nothing.
   {
-    int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder->cur_lambda_cost + 0.5);
+    int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->cur_lambda_cost + 0.5);
     if (sao_out->ddistortion >= cost_of_nothing) {
       sao_out->type = SAO_TYPE_NONE;
     }
@@ -794,7 +794,7 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
   return;
 }
 
- void sao_search_chroma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
+ void sao_search_chroma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
 {
   int block_width  = (LCU_WIDTH / 2);
   int block_height = (LCU_WIDTH / 2);
@@ -827,10 +827,10 @@ static void sao_search_best_mode(const encoder_control * const encoder, const pi
   }
 
   // Calculate
-  sao_search_best_mode(encoder, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left);
+  sao_search_best_mode(encoder_state, orig_list, rec_list, block_width / 2, block_height / 2, 2, sao, sao_top, sao_left);
 }
 
-void sao_search_luma(const encoder_control * const encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
+void sao_search_luma(const encoder_state * const encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left)
 {
   pixel orig[LCU_LUMA_SIZE];
   pixel rec[LCU_LUMA_SIZE];
@@ -857,13 +857,13 @@ void sao_search_luma(const encoder_control * const encoder, const picture *pic,
 
   orig_list[0] = orig;
   rec_list[0] = rec;
-  sao_search_best_mode(encoder, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left);
+  sao_search_best_mode(encoder_state, orig_list, rec_list, block_width, block_height, 1, sao, sao_top, sao_left);
 }
 
-void sao_reconstruct_frame(const encoder_control * const encoder)
+void sao_reconstruct_frame(encoder_state * const encoder_state)
 {
   vector2d lcu;
-  picture * const cur_pic = encoder->in.cur_pic;
+  picture * const cur_pic = encoder_state->cur_pic;
 
   // These are needed because SAO needs the pre-SAO pixels form left and
   // top LCUs. Single pixel wide buffers, like what search_lcu takes, would
@@ -882,9 +882,9 @@ void sao_reconstruct_frame(const encoder_control * const encoder)
       sao_info *sao_chroma = &cur_pic->sao_chroma[lcu.y * stride + lcu.x];
 
       // sao_do_rdo(encoder, lcu.x, lcu.y, sao_luma, sao_chroma);
-      sao_reconstruct(encoder, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
-      sao_reconstruct(encoder, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
-      sao_reconstruct(encoder, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
+      sao_reconstruct(encoder_state->encoder_control, cur_pic, new_y_data, lcu.x, lcu.y, sao_luma, COLOR_Y);
+      sao_reconstruct(encoder_state->encoder_control, cur_pic, new_u_data, lcu.x, lcu.y, sao_chroma, COLOR_U);
+      sao_reconstruct(encoder_state->encoder_control, cur_pic, new_v_data, lcu.x, lcu.y, sao_chroma, COLOR_V);
     }
   }
 
diff --git a/src/sao.h b/src/sao.h
index 365bb5f2..93fd7967 100644
--- a/src/sao.h
+++ b/src/sao.h
@@ -46,11 +46,11 @@ typedef struct sao_info_struct {
 
 
 void init_sao_info(sao_info *sao);
-void sao_search_chroma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
-void sao_search_luma(const encoder_control * encoder, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
+void sao_search_chroma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
+void sao_search_luma(const encoder_state * encoder_state, const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao, sao_info *sao_top, sao_info *sao_left);
 void sao_reconstruct(const encoder_control * encoder, picture *pic, const pixel *old_rec,
                      unsigned x_ctb, unsigned y_ctb,
                      const sao_info *sao, color_index color_i);
-void sao_reconstruct_frame(const encoder_control * const encoder);
+void sao_reconstruct_frame(encoder_state *encoder_state);
 
 #endif
diff --git a/src/search.c b/src/search.c
index f272c6ec..b0584dc0 100644
--- a/src/search.c
+++ b/src/search.c
@@ -117,7 +117,7 @@ static uint32_t get_mvd_coding_cost(vector2d *mvd)
   return bitcost;
 }
 
-static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
+static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y,
                          int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
                          int16_t num_cand,int32_t ref_idx, uint32_t *bitcost)
 {
@@ -159,7 +159,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
     temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
   }
   *bitcost = temp_bitcost;
-  return temp_bitcost*(int32_t)(encoder->cur_lambda_cost+0.5);
+  return temp_bitcost*(int32_t)(encoder_state->cur_lambda_cost+0.5);
 }
 
 
@@ -183,7 +183,7 @@ static int calc_mvd_cost(const encoder_control * const encoder, int x, int y,
  * the predicted motion vector is way off. In the future even more additional
  * points like 0,0 might be used, such as vectors from top or left.
  */
-static unsigned hexagon_search(const encoder_control * const encoder, unsigned depth,
+static unsigned hexagon_search(const encoder_state * const encoder_state, unsigned depth,
                                const picture *pic, const picture *ref,
                                const vector2d *orig, vector2d *mv_in_out,
                                int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
@@ -203,7 +203,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
     unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
                              orig->x + mv.x + pattern->x, orig->y + mv.y + pattern->y,
                              block_width, block_width);
-    cost += calc_mvd_cost(encoder, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+    cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
 
     if (cost < best_cost) {
       best_cost    = cost;
@@ -217,7 +217,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
     unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
                              orig->x, orig->y,
                              block_width, block_width);
-    cost += calc_mvd_cost(encoder, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+    cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
 
     // If the 0,0 is better, redo the hexagon around that point.
     if (cost < best_cost) {
@@ -233,7 +233,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
                                  orig->x + pattern->x,
                                  orig->y + pattern->y,
                                  block_width, block_width);
-        cost += calc_mvd_cost(encoder, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+        cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
 
         if (cost < best_cost) {
           best_cost    = cost;
@@ -268,7 +268,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
                                orig->x + mv.x + offset->x,
                                orig->y + mv.y + offset->y,
                                block_width, block_width);
-      cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+      cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
 
       if (cost < best_cost) {
         best_cost    = cost;
@@ -291,7 +291,7 @@ static unsigned hexagon_search(const encoder_control * const encoder, unsigned d
                              orig->x + mv.x + offset->x,
                              orig->y + mv.y + offset->y,
                              block_width, block_width);
-    cost += calc_mvd_cost(encoder, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
+    cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
 
     if (cost > 0 && cost < best_cost) {
       best_cost    = cost;
@@ -370,9 +370,9 @@ static unsigned search_mv_full(unsigned depth,
  * Update lcu to have best modes at this depth.
  * \return Cost of best mode.
  */
-static int search_cu_inter(const encoder_control * const encoder, int x, int y, int depth, lcu_t *lcu)
+static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   uint32_t ref_idx = 0;
   int x_local = (x&0x3f), y_local = (y&0x3f);
   int x_cu = x>>3;
@@ -392,8 +392,8 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y,
 
   cur_cu->inter.cost = UINT_MAX;
 
-  for (ref_idx = 0; ref_idx < encoder->ref->used_size; ref_idx++) {
-    picture *ref_pic = encoder->ref->pics[ref_idx];
+  for (ref_idx = 0; ref_idx < encoder_state->ref->used_size; ref_idx++) {
+    picture *ref_pic = encoder_state->ref->pics[ref_idx];
     unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
     cu_info *ref_cu = &ref_pic->cu_array[MAX_DEPTH][y_cu * width_in_scu + x_cu];
     uint32_t temp_bitcost = 0;
@@ -413,13 +413,13 @@ static int search_cu_inter(const encoder_control * const encoder, int x, int y,
     }
     // Get MV candidates
     cur_cu->inter.mv_ref = ref_idx;
-    inter_get_mv_cand(encoder, x, y, depth, mv_cand, cur_cu, lcu);
+    inter_get_mv_cand(encoder_state, x, y, depth, mv_cand, cur_cu, lcu);
     cur_cu->inter.mv_ref = temp_ref_idx;
 
 #if SEARCH_MV_FULL_RADIUS
     temp_cost += search_mv_full(depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
 #else
-    temp_cost += hexagon_search(encoder, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
+    temp_cost += hexagon_search(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
 #endif
 
     merged = 0;
@@ -664,11 +664,11 @@ static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info *cu
  * Update lcu to have best modes at this depth.
  * \return Cost of best mode.
  */
-static int search_cu_intra(const encoder_control * const encoder,
+static int search_cu_intra(const encoder_state * const encoder_state,
                            const int x_px, const int y_px,
                            const int depth, lcu_t *lcu, cabac_data *cabac)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f };
   const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
   const int8_t cu_width = (LCU_WIDTH >> (depth));
@@ -696,7 +696,7 @@ static int search_cu_intra(const encoder_control * const encoder,
   intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
 
   // Build reconstructed block to use in prediction with extrapolated borders
-  intra_build_reference_border(encoder, x_px, y_px, cu_width * 2 + 8,
+  intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8,
                                rec_buffer, cu_width * 2 + 8, 0,
                                cur_pic->width,
                                cur_pic->height,
@@ -709,7 +709,7 @@ static int search_cu_intra(const encoder_control * const encoder,
     uint32_t bitcost = UINT32_MAX;
     pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
     unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
-    mode = intra_prediction(encoder,ref_pixels, LCU_WIDTH,
+    mode = intra_prediction(encoder_state,ref_pixels, LCU_WIDTH,
                             cu_in_rec_buffer, cu_width * 2 + 8, cu_width,
                             &cost, candidate_modes, &bitcost, cabac);
     cur_cu->intra[pu_index].mode = (int8_t)mode;
@@ -728,7 +728,7 @@ static int search_cu_intra(const encoder_control * const encoder,
  * coding (bitcost * lambda) and cost for coding coefficients (estimated
  * here as (coefficient_sum * 1.5) * lambda)
  */
-static int lcu_get_final_cost(const encoder_control * const encoder,
+static int lcu_get_final_cost(const encoder_state * const encoder_state,
                               cabac_data *cabac,
                               const int x_px, const int y_px,
                               const int depth, lcu_t *lcu)
@@ -737,6 +737,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
   int x_local = (x_px&0x3f), y_local = (y_px&0x3f);
   int cost = 0;
   int coeff_cost = 0;
+  const int rdo = encoder_state->encoder_control->rdo;
 
   int width = LCU_WIDTH>>depth;
   int x,y;
@@ -759,7 +760,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
     }
   }
 
-  if(encoder->rdo == 1) {
+  if(rdo == 1) {
     // sum of coeffs
     for (y = y_local; y < y_local+width; ++y) {
       for (x = x_local; x < x_local+width; ++x) {
@@ -774,11 +775,11 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
       }
     }
     // Coefficient costs
-    cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder->cur_lambda_cost+0.5);
+    cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->cur_lambda_cost+0.5);
 
   // Calculate actual bit costs for coding the coeffs
   // RDO
-  } else if (encoder->rdo == 2) {
+  } else if (rdo == 2) {
     coefficient coeff_temp[32*32];
     coefficient coeff_temp_u[16*16];
     coefficient coeff_temp_v[16*16];
@@ -817,7 +818,7 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
 
       // Calculate luma coeff bit count
       picture_blit_coeffs(&lcu->coeff.y[(blk_y*LCU_WIDTH)+blk_x],coeff_temp,blockwidth,blockwidth,LCU_WIDTH,blockwidth);
-      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp, blockwidth, 0, luma_scan_mode);
+      coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp, blockwidth, 0, luma_scan_mode);
 
       blk_y >>= 1;
       blk_x >>= 1;
@@ -832,15 +833,15 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
       picture_blit_coeffs(&lcu->coeff.u[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_u,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
       picture_blit_coeffs(&lcu->coeff.v[(blk_y*(LCU_WIDTH>>1))+blk_x],coeff_temp_v,blockwidth,blockwidth,LCU_WIDTH>>1,blockwidth);
 
-      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
-      coeff_cost += get_coeff_cost(encoder, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
+      coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_u, blockwidth, 2, chroma_scan_mode);
+      coeff_cost += get_coeff_cost(encoder_state->encoder_control, cabac, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
     }
     // Multiply bit count with lambda to get RD-cost
-    cost += coeff_cost * (int32_t)(encoder->cur_lambda_cost+0.5);
+    cost += coeff_cost * (int32_t)(encoder_state->cur_lambda_cost+0.5);
   }
 
   // Bitcost
-  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder->cur_lambda_cost+0.5);
+  cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->cur_lambda_cost+0.5);
 
   return cost;
 }
@@ -855,9 +856,9 @@ static int lcu_get_final_cost(const encoder_control * const encoder,
  * - All the final data for the LCU gets eventually copied to depth 0, which
  *   will be the final output of the recursion.
  */
-static int search_cu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
+static int search_cu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   int cu_width = LCU_WIDTH >> depth;
   int cost = MAX_INT;
   cu_info *cur_cu;
@@ -885,7 +886,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
         depth >= MIN_INTER_SEARCH_DEPTH &&
         depth <= MAX_INTER_SEARCH_DEPTH)
     {
-      int mode_cost = search_cu_inter(encoder, x, y, depth, &work_tree[depth]);
+      int mode_cost = search_cu_inter(encoder_state, x, y, depth, &work_tree[depth]);
       if (mode_cost < cost) {
         cost = mode_cost;
         cur_cu->type = CU_INTER;
@@ -895,7 +896,7 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
     if (depth >= MIN_INTRA_SEARCH_DEPTH &&
         depth <= MAX_INTRA_SEARCH_DEPTH)
     {
-      int mode_cost = search_cu_intra(encoder, x, y, depth, &work_tree[depth], cabac);
+      int mode_cost = search_cu_intra(encoder_state, x, y, depth, &work_tree[depth], cabac);
       if (mode_cost < cost) {
         cost = mode_cost;
         cur_cu->type = CU_INTRA;
@@ -906,10 +907,10 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
     // mode search of adjacent CUs.
     if (cur_cu->type == CU_INTRA) {
       lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
-      intra_recon_lcu(encoder, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
+      intra_recon_lcu(encoder_state, cabac, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
     } else if (cur_cu->type == CU_INTER) {
-      inter_recon_lcu(encoder, encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
-      encode_transform_tree(encoder, cabac, x, y, depth, &work_tree[depth]);
+      inter_recon_lcu(encoder_state->encoder_control, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
+      encode_transform_tree(encoder_state, cabac, x, y, depth, &work_tree[depth]);
 
       if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
         cur_cu->merged = 0;
@@ -922,23 +923,23 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
     }
   }
   if (cur_cu->type == CU_INTRA || cur_cu->type == CU_INTER) {
-    cost = lcu_get_final_cost(encoder, cabac, x, y, depth, &work_tree[depth]);
+    cost = lcu_get_final_cost(encoder_state, cabac, x, y, depth, &work_tree[depth]);
   }
 
   // Recursively split all the way to max search depth.
   if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
     int half_cu = cu_width / 2;
-    int split_cost = (int)(4.5 * encoder->cur_lambda_cost);
+    int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost);
 
     // If skip mode was selected for the block, skip further search.
     // Skip mode means there's no coefficients in the block, so splitting
     // might not give any better results but takes more time to do.
     if(cur_cu->type == CU_NOTSET || cur_cu->coeff_top_y[depth] ||
        cur_cu->coeff_top_u[depth] || cur_cu->coeff_top_v[depth]) {
-      split_cost += search_cu(encoder, cabac, x,           y,           depth + 1, work_tree);
-      split_cost += search_cu(encoder, cabac, x + half_cu, y,           depth + 1, work_tree);
-      split_cost += search_cu(encoder, cabac, x,           y + half_cu, depth + 1, work_tree);
-      split_cost += search_cu(encoder, cabac, x + half_cu, y + half_cu, depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x,           y,           depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x + half_cu, y,           depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x,           y + half_cu, depth + 1, work_tree);
+      split_cost += search_cu(encoder_state, cabac, x + half_cu, y + half_cu, depth + 1, work_tree);
     } else {
       split_cost = INT_MAX;
     }
@@ -963,9 +964,9 @@ static int search_cu(const encoder_control * const encoder, cabac_data *cabac, i
  * - Copy reference pixels from neighbouring LCUs.
  * - Copy reference pixels from this LCU.
  */
-static void init_lcu_t(const encoder_control * const encoder, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
+static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
 {
-  const picture * const cur_pic = encoder->in.cur_pic;
+  const picture * const cur_pic = encoder_state->cur_pic;
   
   // Copy reference cu_info structs from neighbouring LCUs.
   {
@@ -1045,7 +1046,7 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const
 
   // Copy LCU pixels.
   {
-    const picture * const pic = encoder->in.cur_pic;
+    const picture * const pic = encoder_state->cur_pic;
     int pic_width = cur_pic->width;
     int x_max = MIN(x + LCU_WIDTH, pic_width) - x;
     int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y;
@@ -1069,13 +1070,13 @@ static void init_lcu_t(const encoder_control * const encoder, const int x, const
 /**
  * Copy CU and pixel data to it's place in picture datastructure.
  */
-static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px, int y_px, const lcu_t *lcu)
+static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x_px, int y_px, const lcu_t *lcu)
 {
   // Copy non-reference CUs to picture.
   {
     const int x_cu = x_px >> MAX_DEPTH;
     const int y_cu = y_px >> MAX_DEPTH;
-    const picture * const cur_pic = encoder->in.cur_pic;
+    const picture * const cur_pic = encoder_state->cur_pic;
     const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH;
     cu_info *const cu_array = cur_pic->cu_array[MAX_DEPTH];
 
@@ -1095,7 +1096,7 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px,
 
   // Copy pixels to picture.
   {
-    picture * const pic = encoder->in.cur_pic;
+    picture * const pic = encoder_state->cur_pic;
     const int pic_width = pic->width;
     const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px;
     const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px;
@@ -1123,18 +1124,18 @@ static void copy_lcu_to_cu_data(const encoder_control * const encoder, int x_px,
  * Search LCU for modes.
  * - Best mode gets copied to current picture.
  */
-void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf)
+void search_lcu(encoder_state * const encoder_state, cabac_data *cabac, int x, int y, yuv_t* hor_buf, yuv_t* ver_buf)
 {
   lcu_t work_tree[MAX_PU_DEPTH + 1];
   int depth;
   // Initialize work tree.
   for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) {
     memset(&work_tree[depth], 0, sizeof(work_tree[depth]));
-    init_lcu_t(encoder, x, y, &work_tree[depth], hor_buf, ver_buf);
+    init_lcu_t(encoder_state, x, y, &work_tree[depth], hor_buf, ver_buf);
   }
 
   // Start search from depth 0.
-  search_cu(encoder, cabac, x, y, 0, work_tree);
+  search_cu(encoder_state, cabac, x, y, 0, work_tree);
 
-  copy_lcu_to_cu_data(encoder, x, y, &work_tree[0]);
+  copy_lcu_to_cu_data(encoder_state, x, y, &work_tree[0]);
 }
diff --git a/src/search.h b/src/search.h
index 6f9f69d3..68642b84 100644
--- a/src/search.h
+++ b/src/search.h
@@ -30,6 +30,6 @@
 #include "picture.h"
 
 
-void search_lcu(const encoder_control * const encoder, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf);
+void search_lcu(encoder_state *encoder_state, cabac_data *cabac, int x, int y, yuv_t *hor_buf, yuv_t *ver_buf);
 
 #endif
diff --git a/src/transform.c b/src/transform.c
index 2d6bc148..4cc1d26d 100644
--- a/src/transform.c
+++ b/src/transform.c
@@ -623,9 +623,10 @@ void itransform2d(const encoder_control * const encoder,int16_t *block,int16_t *
  * \brief quantize transformed coefficents
  *
  */
-void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef, int32_t width,
+void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_coef, int32_t width,
            int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type )
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
   const uint32_t log2_block_size = g_convert_to_bit[ width ] + 2;
   const uint32_t * const scan = g_sig_last_scan[ scan_idx ][ log2_block_size - 1 ];
 
@@ -633,7 +634,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
   int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2];
   #endif
 
-  int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
 
   //New block for variable definitions
   {
@@ -645,7 +646,7 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
 
   int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
   int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
-  int32_t add = ((encoder->in.cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
+  int32_t add = ((encoder_state->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
 
   int32_t q_bits8 = q_bits - 8;
   for (n = 0; n < width * height; n++) {
@@ -754,13 +755,14 @@ void quant(const encoder_control * const encoder, int16_t *coef, int16_t *q_coef
  * \brief inverse quantize transformed and quantized coefficents
  *
  */
-void dequant(const encoder_control * const encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
+void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type)
 {
+  const encoder_control * const encoder = encoder_state->encoder_control;
   int32_t shift,add,coeff_q,clip_q_coef;
   int32_t n;
   int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2);
 
-  int32_t qp_scaled = get_scaled_qp(type, encoder->QP, 0);
+  int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
 
   shift = 20 - QUANT_SHIFT - transform_shift;
 
diff --git a/src/transform.h b/src/transform.h
index 95c2d1eb..4cc8870a 100644
--- a/src/transform.h
+++ b/src/transform.h
@@ -34,9 +34,9 @@ extern const uint8_t g_chroma_scale[58];
 extern const int16_t g_inv_quant_scales[6];
 
 
-void quant(const encoder_control *encoder, int16_t *coef, int16_t *q_coef, int32_t width,
+void quant(const encoder_state *encoder_state, int16_t *coef, int16_t *q_coef, int32_t width,
            int32_t height, uint32_t *ac_sum, int8_t type, int8_t scan_idx, int8_t block_type);
-void dequant(const encoder_control *encoder, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);
+void dequant(const encoder_state *encoder_state, int16_t *q_coef, int16_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type);
 
 void transformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);
 void itransformskip(const encoder_control *encoder, int16_t *block,int16_t *coeff, int8_t block_size);