Started digging into the intra prediction stuff

2024-11-23 18:14:06 +00:00 · 2012-06-11 18:43:29 +03:00 · 2012-06-11 18:43:29 +03:00 · 9bfd542b73
parent bf11aad827
commit 9bfd542b73
10 changed files with 505 additions and 71 deletions
--- a/build/VS2010/HEVC_encoder.vcxproj
+++ b/build/VS2010/HEVC_encoder.vcxproj
@ -84,8 +84,10 @@
    <ClCompile Include="..\..\src\config.c" />
    <ClCompile Include="..\..\src\encmain.c" />
    <ClCompile Include="..\..\src\encoder.c" />
+    <ClCompile Include="..\..\src\intra.c" />
    <ClCompile Include="..\..\src\nal.c" />
    <ClCompile Include="..\..\src\picture.c" />
+    <ClCompile Include="..\..\src\transform.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\src\bitstream.h" />
@ -93,8 +95,10 @@
    <ClInclude Include="..\..\src\config.h" />
    <ClInclude Include="..\..\src\encoder.h" />
    <ClInclude Include="..\..\src\global.h" />
+    <ClInclude Include="..\..\src\intra.h" />
    <ClInclude Include="..\..\src\nal.h" />
    <ClInclude Include="..\..\src\picture.h" />
+    <ClInclude Include="..\..\src\transform.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
--- a/build/VS2010/HEVC_encoder.vcxproj.filters
+++ b/build/VS2010/HEVC_encoder.vcxproj.filters
@ -36,6 +36,12 @@
    <ClCompile Include="..\..\src\nal.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\..\src\intra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\transform.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\src\global.h">
@ -59,5 +65,11 @@
    <ClInclude Include="..\..\src\nal.h">
      <Filter>Header Files</Filter>
    </ClInclude>
+    <ClInclude Include="..\..\src\intra.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\transform.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
  </ItemGroup>
 </Project>
--- a/src/encoder.c
+++ b/src/encoder.c
@ -52,6 +52,11 @@ void init_encoder_input(encoder_input* input,FILE* inputfile, uint32_t width, ui
  input->cur_pic.uData = (uint8_t *)malloc((width*height)>>2);
  input->cur_pic.vData = (uint8_t *)malloc((width*height)>>2);

+  /* Reconstruction buffers */
+  input->cur_pic.yRecData = (uint8_t *)malloc(width*height);
+  input->cur_pic.uRecData = (uint8_t *)malloc((width*height)>>2);
+  input->cur_pic.vRecData = (uint8_t *)malloc((width*height)>>2);
+
  /* Allocate memory for CU info 2D array */
  //ToDo: we don't need this much space on LCU...MAX_DEPTH-1
  input->cur_pic.CU = (CU_info**)malloc((MAX_DEPTH+1)*sizeof(CU_info*));
@ -68,12 +73,14 @@ void encode_one_frame(encoder_control* encoder)
  /* output parameters before first frame */
  if(encoder->frame == 0)
  {
+    /* Sequence Parameter Set (SPS) */
    encode_seq_parameter_set(encoder);
    bitstream_align(encoder->stream);
    bitstream_flush(encoder->stream);
    nal_write(encoder->output, encoder->stream->buffer, encoder->stream->buffer_pos, 1, NAL_SEQ_PARAMETER_SET, 1);
    bitstream_clear_buffer(encoder->stream);

+    /* Picture Parameter Set (PPS) */
    encode_pic_parameter_set(encoder);
    bitstream_align(encoder->stream);
    bitstream_flush(encoder->stream);
@ -94,6 +101,7 @@ void encode_one_frame(encoder_control* encoder)
  }
  else if(encoder->frame < 3)
  {
+    /* Non-IDR slice */
    cabac_start(&cabac);
    encoder->in.cur_pic.type = NAL_NONIDR_SLICE;
    encode_slice_header(encoder);
@ -143,6 +151,7 @@ void encode_seq_parameter_set(encoder_control* encoder)
 #ifdef _DEBUG
  printf("=========== Sequence Parameter Set ID: 0 ===========\n");
 #endif
+  /* ToDo: profile IDC and level IDC should be defined later on */
  WRITE_U(encoder->stream, 0, 8, "profile_idc");
  WRITE_U(encoder->stream, 0, 8, "reserved_zero_8bits");
  WRITE_U(encoder->stream, 0, 8, "level_idc");
@ -152,12 +161,13 @@ void encode_seq_parameter_set(encoder_control* encoder)
  WRITE_UE(encoder->stream, encoder->in.width, "pic_width_in_luma_samples");
  WRITE_UE(encoder->stream, encoder->in.height, "pic_height_in_luma_samples");
  WRITE_U(encoder->stream, 0, 1, "pic_cropping_flag");
+  /* ToDo: 10bit support? */
  WRITE_UE(encoder->stream, 0, "bit_depth_luma_minus8");
  WRITE_UE(encoder->stream, 0, "bit_depth_chroma_minus8");
  WRITE_U(encoder->stream, ENABLE_PCM, 1, "pcm_enabled_flag");
  #if ENABLE_PCM == 1
-  WRITE_U(encoder->stream, 7, 4, "pcm_bit_depth_luma_minus1");
-  WRITE_U(encoder->stream, 7, 4, "pcm_bit_depth_chroma_minus1");
+    WRITE_U(encoder->stream, 7, 4, "pcm_bit_depth_luma_minus1");
+    WRITE_U(encoder->stream, 7, 4, "pcm_bit_depth_chroma_minus1");
  #endif
  WRITE_U(encoder->stream, 0, 1, "qpprime_y_zero_transquant_bypass_flag");
  WRITE_UE(encoder->stream, 4, "log2_max_pic_order_cnt_lsb_minus4");
@ -166,7 +176,7 @@ void encode_seq_parameter_set(encoder_control* encoder)
  WRITE_UE(encoder->stream, 0, "max_latency_increase");
  WRITE_U(encoder->stream, 0, 1, "restricted_ref_pic_lists_flag");
  WRITE_UE(encoder->stream, 1, "log2_min_coding_block_size_minus3");
-  WRITE_UE(encoder->stream, 2, "log2_diff_max_min_coding_block_size");
+  WRITE_UE(encoder->stream, MAX_DEPTH, "log2_diff_max_min_coding_block_size");
  WRITE_UE(encoder->stream, 0, "log2_min_transform_block_size_minus2");
  WRITE_UE(encoder->stream, 3, "log2_diff_max_min_transform_block_size");

@ -198,8 +208,7 @@ void encode_seq_parameter_set(encoder_control* encoder)
  WRITE_UE(encoder->stream, 0, "num_short_term_ref_pic_sets");
  WRITE_U(encoder->stream, 0, 1, "long_term_ref_pics_present_flag");
  WRITE_U(encoder->stream, 0, 2, "tiles_or_entropy_coding_sync_idc");  
-	WRITE_U(encoder->stream, 0, 1, "sps_extension_flag");
-  //WRITE_U(encoder->stream, 0, 8, "stuffing");
+	WRITE_U(encoder->stream, 0, 1, "sps_extension_flag");  
 }

 void encode_slice_header(encoder_control* encoder)
@ -228,34 +237,53 @@ void encode_slice_header(encoder_control* encoder)
      WRITE_U(encoder->stream, encoder->frame, 8, "pic_order_cnt_lsb");
      WRITE_U(encoder->stream, 1, 1, "short_term_ref_pic_set_sps_flag");
      WRITE_UE(encoder->stream, 0, "short_term_ref_pic_set_idx");
-
    }
    //end if
  //end if
-  /*
-   Skip unpresent flags */
+  /* Skip flags that are not present */
  // if !entropy_slice_flag
    WRITE_UE(encoder->stream, 0, "slice_qp_delta");
    WRITE_UE(encoder->stream, 0, "5_minus_max_num_merge_cand");
 }
  
+/* CONTEXTS */
+/* ToDo: move somewhere else */
 cabac_ctx *SplitFlagSCModel;
-cabac_ctx g_SplitFlagSCModel[3];
-cabac_ctx PCMFlagSCModel;
+cabac_ctx g_SplitFlagSCModel[3]; /*<! \brief split flag context models */
+cabac_ctx g_IntraModeSCModel;    /*<! \brief intra mode context models */
+cabac_ctx g_ChromaPredSCModel[2];
+cabac_ctx g_TransSubdivSCModel[4];    /*<! \brief intra mode context models */
+cabac_ctx g_QtCbfSCModel[8];
 cabac_ctx PartSizeSCModel;

 void encode_slice_data(encoder_control* encoder)
 {
-  uint16_t xCtb,yCtb;
+  uint16_t xCtb,yCtb,i;
  /* Initialize contexts */
+  /* ToDo: add P/B slice */
  cxt_init(&g_SplitFlagSCModel[0], encoder->QP, INIT_SPLIT_FLAG[SLICE_I][0]);
  cxt_init(&g_SplitFlagSCModel[1], encoder->QP, INIT_SPLIT_FLAG[SLICE_I][1]);
  cxt_init(&g_SplitFlagSCModel[2], encoder->QP, INIT_SPLIT_FLAG[SLICE_I][2]);

-  cxt_init(&PartSizeSCModel, encoder->QP, 154);
-  //g_SplitFlagSCModel[1].ucState = 47;
-  //g_SplitFlagSCModel[2].ucState = 36;
+  cxt_init(&g_IntraModeSCModel, encoder->QP, INIT_INTRA_PRED_MODE[SLICE_I]);

+  cxt_init(&g_ChromaPredSCModel[0], encoder->QP, INIT_CHROMA_PRED_MODE[SLICE_I][0]);
+  cxt_init(&g_ChromaPredSCModel[1], encoder->QP, INIT_CHROMA_PRED_MODE[SLICE_I][1]);
+  
+
+  for(i = 0; i < 4; i++)
+  {
+    cxt_init(&g_TransSubdivSCModel[i], encoder->QP, INIT_TRANS_SUBDIV_FLAG[SLICE_I][i]);
+  }
+  for(i = 0; i < 8; i++)
+  {
+    cxt_init(&g_QtCbfSCModel[i], encoder->QP, INIT_QT_CBF[SLICE_I][i]);
+  }
+
+  encoder->in.cur_pic.CU[1][0].type = CU_INTRA;
+  encoder->in.cur_pic.CU[1][2].type = CU_INTRA;  
+  
+  /* Loop through every LCU in the slice */
  for(yCtb = 0; yCtb < encoder->in.height_in_LCU; yCtb++)
  {
    uint8_t lastCUy = (yCtb == (encoder->in.height_in_LCU-1))?1:0;
@ -263,17 +291,15 @@ void encode_slice_data(encoder_control* encoder)
    {
      uint8_t lastCUx = (xCtb == (encoder->in.width_in_LCU-1))?1:0;
      uint8_t depth = 0;
- 
-      encode_coding_tree(encoder, xCtb<<2,yCtb<<2, depth);
-      //Terminating bit
+
+      /* Recursive function for looping through all the sub-blocks */
+      encode_coding_tree(encoder, xCtb<<MAX_DEPTH,yCtb<<MAX_DEPTH, depth);
+
+      /* signal Terminating bit */
      if(!lastCUx || !lastCUy)
      {
        cabac_encodeBinTrm(&cabac, 0);
      }
-      else
-      {
-        cabac_encodeBinTrm(&cabac, 1);
-      }
    }
  }
 }
@ -283,6 +309,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
  int i,x,y;
  uint8_t split_flag = (depth!=1)?1:0;
  uint8_t split_model = 0;
+
  /* Get left and top block split_flags and if they are present and true, increase model number */
  if(xCtb > 0 && GET_SPLITDATA(&(encoder->in.cur_pic.CU[depth][(xCtb>>(MAX_DEPTH-depth))-1+(yCtb>>(MAX_DEPTH-depth))*(encoder->in.width_in_LCU<<MAX_DEPTH)])) == 1)
  {
@ -301,11 +328,13 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
    CABAC_BIN(&cabac, split_flag, "SplitFlag");
    if(split_flag)
    {
+      /* Split blocks and remember to change x and y block positions */
      uint8_t change = 1<<(MAX_DEPTH-1-depth);
      encode_coding_tree(encoder,xCtb,yCtb,depth+1);
      encode_coding_tree(encoder,xCtb+change,yCtb,depth+1);
      encode_coding_tree(encoder,xCtb,yCtb+change,depth+1);
      encode_coding_tree(encoder,xCtb+change,yCtb+change,depth+1);
+      /* We don't need to do anything else here */
      return;
    }
  }
@ -314,7 +343,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
    //if !intra PREDMODE
    /* if depth = MAX_DEPTH */
     //PartSize
-     if(depth == 2)
+     if(depth == MAX_DEPTH)
     {
       cabac.ctx = &PartSizeSCModel;
       CABAC_BIN(&cabac, 1, "PartSize");
@ -322,47 +351,70 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
   /*end partsize*/
   //If MODE_INTRA
    //cabac.ctx = &PCMFlagSCModel;
-    cabac_encodeBinTrm(&cabac, 1);
-    printf("\tIPCMFlag = 1\n");
-    cabac_finish(&cabac);
-    WRITE_U(cabac.stream, 1, 1, "stop_bit");
-    WRITE_U(cabac.stream, 0, 1, "numSubseqIPCM_flag");    
-    bitstream_align(cabac.stream);
-     /* PCM sample */
-    {
-      uint8_t *base = &encoder->in.cur_pic.yData[xCtb*16 + (yCtb*16)*encoder->in.width];
-      uint8_t *baseCb = &encoder->in.cur_pic.uData[(xCtb*8 + (yCtb*8)*encoder->in.width/2)];
-      uint8_t *baseCr = &encoder->in.cur_pic.vData[(xCtb*8 + (yCtb*8)*encoder->in.width/2)];
-      for(y = 0; y < 32; y++)
+     /* Code IPCM block */
+     if(encoder->in.cur_pic.CU[depth][(xCtb>>(MAX_DEPTH-depth))+(yCtb>>(MAX_DEPTH-depth))*(encoder->in.width_in_LCU<<MAX_DEPTH)].type <= CU_PCM)
+     {
+      cabac_encodeBinTrm(&cabac, 1);
+      //printf("\tIPCMFlag = 1\n");
+      cabac_finish(&cabac);
+      WRITE_U(cabac.stream, 1, 1, "stop_bit");
+      WRITE_U(cabac.stream, 0, 1, "numSubseqIPCM_flag");    
+      bitstream_align(cabac.stream);
+       /* PCM sample */
      {
-        for(x = 0; x < 32; x++)
-        {          
-          bitstream_put(cabac.stream, base[x+y*encoder->in.width], 8);
-        }
-      }
-      //Cb
-      for(y = 0; y < 16; y++)
-      {
-        for(x = 0; x < 16; x++)
+        uint8_t *base = &encoder->in.cur_pic.yData[xCtb*(LCU_WIDTH>>(depth+1)) + (yCtb*(LCU_WIDTH>>(depth+1)))*encoder->in.width];
+        uint8_t *baseCb = &encoder->in.cur_pic.uData[(xCtb*(LCU_WIDTH>>(depth+2)) + (yCtb*(LCU_WIDTH>>(depth+2)))*encoder->in.width/2)];
+        uint8_t *baseCr = &encoder->in.cur_pic.vData[(xCtb*(LCU_WIDTH>>(depth+2)) + (yCtb*(LCU_WIDTH>>(depth+2)))*encoder->in.width/2)];
+        for(y = 0; y < LCU_WIDTH>>depth; y++)
        {
-          bitstream_put(cabac.stream, baseCb[x+y*(encoder->in.width>>1)], 8);
+          for(x = 0; x < LCU_WIDTH>>depth; x++)
+          {          
+            bitstream_put(cabac.stream, base[x+y*encoder->in.width], 8);
+          }
+        }
+        //Cb
+        for(y = 0; y < LCU_WIDTH>>(depth+1); y++)
+        {
+          for(x = 0; x < LCU_WIDTH>>(depth+1); x++)
+          {
+            bitstream_put(cabac.stream, baseCb[x+y*(encoder->in.width>>1)], 8);
+          }
        }
-      }

-      //Cr
-      for(y = 0; y < 16; y++)
-      {
-        for(x = 0; x < 16; x++)
+        //Cr
+        for(y = 0; y < LCU_WIDTH>>(depth+1); y++)
        {
-          bitstream_put(cabac.stream, baseCr[x+y*(encoder->in.width>>1)], 8);
+          for(x = 0; x < LCU_WIDTH>>(depth+1); x++)
+          {
+            bitstream_put(cabac.stream, baseCr[x+y*(encoder->in.width>>1)], 8);
+          }
        }
      }
-    }
-    /* end PCM sample */
-    cabac_start(&cabac);
-   //endif
-   
-   /* end prediction unit */  
+      /* end PCM sample */
+      cabac_start(&cabac);
+
+     } /* end Code IPCM block */
+     else
+     {
+       cabac_encodeBinTrm(&cabac, 0); /* IPCMFlag == 0 */
+       
+       cabac.ctx = &g_IntraModeSCModel;
+       CABAC_BIN(&cabac,0,"IntraPred");
+
+       cabac.ctx = &g_ChromaPredSCModel[0];
+       CABAC_BIN(&cabac,0,"IntraPredChroma");
+
+       cabac.ctx = &g_TransSubdivSCModel[1]; /* //uiLog2TransformBlockSize */
+       CABAC_BIN(&cabac,0,"TransformSubdivFlag");
+
+       /* Transform tree */
+
+       /* end Transform tree */
+
+     }
+   //endif   
+   /* end prediction unit */
+
   //cabac_encodeBin(&cabac, 0); //prev_intra_luma_pred_flag

   //cabac_encodeBin(&cabac, 1); //rem_intra_luma_pred_mode
--- a/src/encoder.h
+++ b/src/encoder.h
@ -10,8 +10,8 @@
    
    Structures for encoding
 */
-#ifndef _ENCODER_H
-#define _ENCODER_H
+#ifndef __ENCODER_H
+#define __ENCODER_H

 #include "picture.h"
 #include "bitstream.h"
@ -62,12 +62,28 @@ void encode_slice_header(encoder_control* encoder);
 void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth);


-static const uint8_t 
-      INIT_SPLIT_FLAG[3][3] =  
-      {
-        { 107,  139,  126, },
-        { 107,  139,  126, }, 
-        { 139,  141,  157, }
-      };
+static const uint8_t  INIT_SPLIT_FLAG[3][3] =  
+                       { { 107,  139,  126 }, { 107,  139,  126 },  { 139,  141,  157 } };
+
+static const uint8_t INIT_INTRA_PRED_MODE[3] = { 183,154,184 };
+
+static const uint8_t INIT_CHROMA_PRED_MODE[3][2] = { { 152,  139 }, { 152,  139 }, {  63,  139 } };
+
+#define CNU 154
+static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][4] = 
+{
+  { CNU,  153,  138,  138 }, 
+  { CNU,  124,  138,   94 }, 
+  { CNU,  224,  167,  122 }
+};
+
+static const uint8_t INIT_QT_CBF[3][8] =  
+{
+  { 153,  111,  CNU,  CNU,  CNU,  149,   92,  167 }, 
+  { 153,  111,  CNU,  CNU,  CNU,  149,  107,  167 }, 
+  { 111,  141,  CNU,  CNU,  CNU,   94,  138,  182 }
+};
+
+

 #endif
--- a/src/intra.c
+++ b/src/intra.c
@ -0,0 +1,13 @@
+/**
+ *  HEVC Encoder
+ *  - Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
+ */
+
+/*! \file intra.c
+    \brief Intra functions
+    \author Marko Viitanen
+    \date 2012-06
+    
+    Intra functions
+*/
+
--- a/src/intra.h
+++ b/src/intra.h
@ -0,0 +1,17 @@
+/**
+ *  HEVC Encoder
+ *  - Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
+ */
+
+/*! \file intra.h
+    \brief Intra functions
+    \author Marko Viitanen
+    \date 2012-06
+    
+    Intra functions
+*/
+#ifndef __INTRA_H
+#define __INTRA_H
+
+
+#endif
--- a/src/nal.c
+++ b/src/nal.c
@ -30,21 +30,22 @@ void nal_write(FILE* output, uint8_t* buffer, uint32_t buffer_len, uint8_t nal_r
  uint8_t byte;
  uint32_t i;
  uint8_t zerocount=0;
+
+  /* Some useful constants */
  const uint8_t emulation_prevention_three_byte = 0x03;
  const uint8_t start_code_prefix_one_3bytes = 0x01;
  const uint8_t zero = 0x00;

-  //start_code_prefix_one_3bytes
-  //fwrite(&zero, 1, 1, output);
+  /*start_code_prefix_one_3bytes */  
  fwrite(&zero, 1, 1, output);
  fwrite(&zero, 1, 1, output);
  fwrite(&start_code_prefix_one_3bytes, 1, 1, output);

-  //forbidden_zero_flag(1) + nal_ref_flag(1) + nal_unit_type(6)
+  /* forbidden_zero_flag(1) + nal_ref_flag(1) + nal_unit_type(6) */
  byte = nal_ref<<6 | nal_type;
  fwrite(&byte, 1, 1, output);

-  //Temporal_id(3) + reserved_one_5bits(5)
+  /* Temporal_id(3) + reserved_one_5bits(5) */
  byte = temporal_id << 5 | 1;
  fwrite(&byte, 1, 1, output);

@ -58,16 +59,22 @@ void nal_write(FILE* output, uint8_t* buffer, uint32_t buffer_len, uint8_t nal_r
      zerocount = 0;
    }
    if(buffer[i] == 0)
+    {
      zerocount++;
+    }
    else
+    {
      zerocount = 0;
+    }

    /* Write the actual data */
    fwrite(&buffer[i], 1, 1, output);
  }

-  //If last byte was 0, add emulation_prevention_three_byte
+  /* If last byte was 0, add emulation_prevention_three_byte */
  if(buffer[buffer_len-1] == 0)
+  {
    fwrite(&emulation_prevention_three_byte, 1, 1, output);
+  }

 }
--- a/src/picture.h
+++ b/src/picture.h
@ -20,7 +20,7 @@
 *  @{
 */

-enum { CU_NOTSET = 0, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER};
+enum { CU_NOTSET = 0,CU_PCM, CU_SKIP, CU_SPLIT, CU_INTRA, CU_INTER };

 #define GET_SPLITDATA(CU) ((CU)->split)
 #define SET_SPLITDATA(CU,flag) { (CU)->split=(flag); }
@ -53,6 +53,11 @@ typedef struct
  uint8_t* yData;     /*!< \brief Pointer to Y-data  */
  uint8_t* uData;     /*!< \brief Pointer to U-data  */
  uint8_t* vData;     /*!< \brief Pointer to V-data  */
+
+  uint8_t* yRecData;     /*!< \brief Pointer to reconstructed Y-data  */
+  uint8_t* uRecData;     /*!< \brief Pointer to reconstructed U-data  */
+  uint8_t* vRecData;     /*!< \brief Pointer to reconstructed V-data  */
+
  int width;          /*!< \brief Picture width */
  int height;         /*!< \brief Picture height  */
  uint8_t referenced; /*!< \brief Is this picture referenced */
--- a/src/transform.c
+++ b/src/transform.c
@ -0,0 +1,291 @@
+/**
+ *  HEVC Encoder
+ *  - Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
+ */
+
+/*! \file transform.c
+    \brief Transform functions
+    \author Marko Viitanen
+    \date 2012-06
+    
+    Transform functions
+*/
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "global.h"
+
+
+const int16_t g_aiT4[4][4] =
+{
+  { 64, 64, 64, 64},
+  { 83, 36,-36,-83},
+  { 64,-64,-64, 64},
+  { 36,-83, 83,-36}
+};
+
+const int16_t g_aiT8[8][8] =
+{
+  { 64, 64, 64, 64, 64, 64, 64, 64},
+  { 89, 75, 50, 18,-18,-50,-75,-89},
+  { 83, 36,-36,-83,-83,-36, 36, 83},
+  { 75,-18,-89,-50, 50, 89, 18,-75},
+  { 64,-64,-64, 64, 64,-64,-64, 64},
+  { 50,-89, 18, 75,-75,-18, 89,-50},
+  { 36,-83, 83,-36,-36, 83,-83, 36},
+  { 18,-50, 75,-89, 89,-75, 50,-18}
+};
+
+const int16_t g_aiT16[16][16] =
+{
+  { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+  { 90, 87, 80, 70, 57, 43, 25,  9, -9,-25,-43,-57,-70,-80,-87,-90},
+  { 89, 75, 50, 18,-18,-50,-75,-89,-89,-75,-50,-18, 18, 50, 75, 89},
+  { 87, 57,  9,-43,-80,-90,-70,-25, 25, 70, 90, 80, 43, -9,-57,-87},
+  { 83, 36,-36,-83,-83,-36, 36, 83, 83, 36,-36,-83,-83,-36, 36, 83},
+  { 80,  9,-70,-87,-25, 57, 90, 43,-43,-90,-57, 25, 87, 70, -9,-80},
+  { 75,-18,-89,-50, 50, 89, 18,-75,-75, 18, 89, 50,-50,-89,-18, 75},
+  { 70,-43,-87,  9, 90, 25,-80,-57, 57, 80,-25,-90, -9, 87, 43,-70},
+  { 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64},
+  { 57,-80,-25, 90, -9,-87, 43, 70,-70,-43, 87,  9,-90, 25, 80,-57},
+  { 50,-89, 18, 75,-75,-18, 89,-50,-50, 89,-18,-75, 75, 18,-89, 50},
+  { 43,-90, 57, 25,-87, 70,  9,-80, 80, -9,-70, 87,-25,-57, 90,-43},
+  { 36,-83, 83,-36,-36, 83,-83, 36, 36,-83, 83,-36,-36, 83,-83, 36},
+  { 25,-70, 90,-80, 43,  9,-57, 87,-87, 57, -9,-43, 80,-90, 70,-25},
+  { 18,-50, 75,-89, 89,-75, 50,-18,-18, 50,-75, 89,-89, 75,-50, 18},
+  {  9,-25, 43,-57, 70,-80, 87,-90, 90,-87, 80,-70, 57,-43, 25, -9}
+};
+
+const int16_t g_aiT32[32][32] =
+{
+  { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},
+  { 90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13,  4, -4,-13,-22,-31,-38,-46,-54,-61,-67,-73,-78,-82,-85,-88,-90,-90},
+  { 90, 87, 80, 70, 57, 43, 25,  9, -9,-25,-43,-57,-70,-80,-87,-90,-90,-87,-80,-70,-57,-43,-25, -9,  9, 25, 43, 57, 70, 80, 87, 90},
+  { 90, 82, 67, 46, 22, -4,-31,-54,-73,-85,-90,-88,-78,-61,-38,-13, 13, 38, 61, 78, 88, 90, 85, 73, 54, 31,  4,-22,-46,-67,-82,-90},
+  { 89, 75, 50, 18,-18,-50,-75,-89,-89,-75,-50,-18, 18, 50, 75, 89, 89, 75, 50, 18,-18,-50,-75,-89,-89,-75,-50,-18, 18, 50, 75, 89},
+  { 88, 67, 31,-13,-54,-82,-90,-78,-46, -4, 38, 73, 90, 85, 61, 22,-22,-61,-85,-90,-73,-38,  4, 46, 78, 90, 82, 54, 13,-31,-67,-88},
+  { 87, 57,  9,-43,-80,-90,-70,-25, 25, 70, 90, 80, 43, -9,-57,-87,-87,-57, -9, 43, 80, 90, 70, 25,-25,-70,-90,-80,-43,  9, 57, 87},
+  { 85, 46,-13,-67,-90,-73,-22, 38, 82, 88, 54, -4,-61,-90,-78,-31, 31, 78, 90, 61,  4,-54,-88,-82,-38, 22, 73, 90, 67, 13,-46,-85},
+  { 83, 36,-36,-83,-83,-36, 36, 83, 83, 36,-36,-83,-83,-36, 36, 83, 83, 36,-36,-83,-83,-36, 36, 83, 83, 36,-36,-83,-83,-36, 36, 83},
+  { 82, 22,-54,-90,-61, 13, 78, 85, 31,-46,-90,-67,  4, 73, 88, 38,-38,-88,-73, -4, 67, 90, 46,-31,-85,-78,-13, 61, 90, 54,-22,-82},
+  { 80,  9,-70,-87,-25, 57, 90, 43,-43,-90,-57, 25, 87, 70, -9,-80,-80, -9, 70, 87, 25,-57,-90,-43, 43, 90, 57,-25,-87,-70,  9, 80},
+  { 78, -4,-82,-73, 13, 85, 67,-22,-88,-61, 31, 90, 54,-38,-90,-46, 46, 90, 38,-54,-90,-31, 61, 88, 22,-67,-85,-13, 73, 82,  4,-78},
+  { 75,-18,-89,-50, 50, 89, 18,-75,-75, 18, 89, 50,-50,-89,-18, 75, 75,-18,-89,-50, 50, 89, 18,-75,-75, 18, 89, 50,-50,-89,-18, 75},
+  { 73,-31,-90,-22, 78, 67,-38,-90,-13, 82, 61,-46,-88, -4, 85, 54,-54,-85,  4, 88, 46,-61,-82, 13, 90, 38,-67,-78, 22, 90, 31,-73},
+  { 70,-43,-87,  9, 90, 25,-80,-57, 57, 80,-25,-90, -9, 87, 43,-70,-70, 43, 87, -9,-90,-25, 80, 57,-57,-80, 25, 90,  9,-87,-43, 70},
+  { 67,-54,-78, 38, 85,-22,-90,  4, 90, 13,-88,-31, 82, 46,-73,-61, 61, 73,-46,-82, 31, 88,-13,-90, -4, 90, 22,-85,-38, 78, 54,-67},
+  { 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64},
+  { 61,-73,-46, 82, 31,-88,-13, 90, -4,-90, 22, 85,-38,-78, 54, 67,-67,-54, 78, 38,-85,-22, 90,  4,-90, 13, 88,-31,-82, 46, 73,-61},
+  { 57,-80,-25, 90, -9,-87, 43, 70,-70,-43, 87,  9,-90, 25, 80,-57,-57, 80, 25,-90,  9, 87,-43,-70, 70, 43,-87, -9, 90,-25,-80, 57},
+  { 54,-85, -4, 88,-46,-61, 82, 13,-90, 38, 67,-78,-22, 90,-31,-73, 73, 31,-90, 22, 78,-67,-38, 90,-13,-82, 61, 46,-88,  4, 85,-54},
+  { 50,-89, 18, 75,-75,-18, 89,-50,-50, 89,-18,-75, 75, 18,-89, 50, 50,-89, 18, 75,-75,-18, 89,-50,-50, 89,-18,-75, 75, 18,-89, 50},
+  { 46,-90, 38, 54,-90, 31, 61,-88, 22, 67,-85, 13, 73,-82,  4, 78,-78, -4, 82,-73,-13, 85,-67,-22, 88,-61,-31, 90,-54,-38, 90,-46},
+  { 43,-90, 57, 25,-87, 70,  9,-80, 80, -9,-70, 87,-25,-57, 90,-43,-43, 90,-57,-25, 87,-70, -9, 80,-80,  9, 70,-87, 25, 57,-90, 43},
+  { 38,-88, 73, -4,-67, 90,-46,-31, 85,-78, 13, 61,-90, 54, 22,-82, 82,-22,-54, 90,-61,-13, 78,-85, 31, 46,-90, 67,  4,-73, 88,-38},
+  { 36,-83, 83,-36,-36, 83,-83, 36, 36,-83, 83,-36,-36, 83,-83, 36, 36,-83, 83,-36,-36, 83,-83, 36, 36,-83, 83,-36,-36, 83,-83, 36},
+  { 31,-78, 90,-61,  4, 54,-88, 82,-38,-22, 73,-90, 67,-13,-46, 85,-85, 46, 13,-67, 90,-73, 22, 38,-82, 88,-54, -4, 61,-90, 78,-31},
+  { 25,-70, 90,-80, 43,  9,-57, 87,-87, 57, -9,-43, 80,-90, 70,-25,-25, 70,-90, 80,-43, -9, 57,-87, 87,-57,  9, 43,-80, 90,-70, 25},
+  { 22,-61, 85,-90, 73,-38, -4, 46,-78, 90,-82, 54,-13,-31, 67,-88, 88,-67, 31, 13,-54, 82,-90, 78,-46,  4, 38,-73, 90,-85, 61,-22},
+  { 18,-50, 75,-89, 89,-75, 50,-18,-18, 50,-75, 89,-89, 75,-50, 18, 18,-50, 75,-89, 89,-75, 50,-18,-18, 50,-75, 89,-89, 75,-50, 18},
+  { 13,-38, 61,-78, 88,-90, 85,-73, 54,-31,  4, 22,-46, 67,-82, 90,-90, 82,-67, 46,-22, -4, 31,-54, 73,-85, 90,-88, 78,-61, 38,-13},
+  {  9,-25, 43,-57, 70,-80, 87,-90, 90,-87, 80,-70, 57,-43, 25, -9, -9, 25,-43, 57,-70, 80,-87, 90,-90, 87,-80, 70,-57, 43,-25,  9},
+  {  4,-13, 22,-31, 38,-46, 54,-61, 67,-73, 78,-82, 85,-88, 90,-90, 90,-90, 88,-85, 82,-78, 73,-67, 61,-54, 46,-38, 31,-22, 13, -4}
+};
+
+
+void partialButterfly4(short *src,short *dst,int shift, int line)
+{
+  int j;  
+  int E[2],O[2];
+  int add = 1<<(shift-1);
+
+  for (j=0; j<line; j++)
+  {    
+    /* E and O */
+    E[0] = src[0] + src[3];
+    O[0] = src[0] - src[3];
+    E[1] = src[1] + src[2];
+    O[1] = src[1] - src[2];
+
+    dst[0] = (g_aiT4[0][0]*E[0] + g_aiT4[0][1]*E[1] + add)>>shift;
+    dst[2*line] = (g_aiT4[2][0]*E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
+    dst[line] = (g_aiT4[1][0]*O[0] + g_aiT4[1][1]*O[1] + add)>>shift;
+    dst[3*line] = (g_aiT4[3][0]*O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
+
+    src += 4;
+    dst ++;
+  }
+}
+
+// Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm 
+// give identical results
+void fastForwardDst(short *block,short *coeff,int shift)  // input block, output coeff
+{
+  int i, c[4];
+  int rnd_factor = 1<<(shift-1);
+  for (i=0; i<4; i++)
+  {
+    // Intermediate Variables
+    c[0] = block[4*i+0] + block[4*i+3];
+    c[1] = block[4*i+1] + block[4*i+3];
+    c[2] = block[4*i+0] - block[4*i+1];
+    c[3] = 74* block[4*i+2];
+
+    coeff[   i] =  ( 29 * c[0] + 55 * c[1]         + c[3]               + rnd_factor ) >> shift;
+    coeff[ 4+i] =  ( 74 * (block[4*i+0]+ block[4*i+1] - block[4*i+3])   + rnd_factor ) >> shift;
+    coeff[ 8+i] =  ( 29 * c[2] + 55 * c[0]         - c[3]               + rnd_factor ) >> shift;
+    coeff[12+i] =  ( 55 * c[2] - 29 * c[1]         + c[3]               + rnd_factor ) >> shift;
+  }
+}
+
+
+
+void partialButterfly8(short *src,short *dst,int shift, int line)
+{
+  int j,k;  
+  int E[4],O[4];
+  int EE[2],EO[2];
+  int add = 1<<(shift-1);
+
+  for (j=0; j<line; j++)
+  {  
+    /* E and O*/
+    for (k=0;k<4;k++)
+    {
+      E[k] = src[k] + src[7-k];
+      O[k] = src[k] - src[7-k];
+    }    
+    /* EE and EO */
+    EE[0] = E[0] + E[3];    
+    EO[0] = E[0] - E[3];
+    EE[1] = E[1] + E[2];
+    EO[1] = E[1] - E[2];
+
+    dst[0] = (g_aiT8[0][0]*EE[0] + g_aiT8[0][1]*EE[1] + add)>>shift;
+    dst[4*line] = (g_aiT8[4][0]*EE[0] + g_aiT8[4][1]*EE[1] + add)>>shift; 
+    dst[2*line] = (g_aiT8[2][0]*EO[0] + g_aiT8[2][1]*EO[1] + add)>>shift;
+    dst[6*line] = (g_aiT8[6][0]*EO[0] + g_aiT8[6][1]*EO[1] + add)>>shift; 
+
+    dst[line] = (g_aiT8[1][0]*O[0] + g_aiT8[1][1]*O[1] + g_aiT8[1][2]*O[2] + g_aiT8[1][3]*O[3] + add)>>shift;
+    dst[3*line] = (g_aiT8[3][0]*O[0] + g_aiT8[3][1]*O[1] + g_aiT8[3][2]*O[2] + g_aiT8[3][3]*O[3] + add)>>shift;
+    dst[5*line] = (g_aiT8[5][0]*O[0] + g_aiT8[5][1]*O[1] + g_aiT8[5][2]*O[2] + g_aiT8[5][3]*O[3] + add)>>shift;
+    dst[7*line] = (g_aiT8[7][0]*O[0] + g_aiT8[7][1]*O[1] + g_aiT8[7][2]*O[2] + g_aiT8[7][3]*O[3] + add)>>shift;
+
+    src += 8;
+    dst ++;
+  }
+}
+
+
+
+void partialButterfly16(short *src,short *dst,int shift, int line)
+{
+  int j,k;
+  int E[8],O[8];
+  int EE[4],EO[4];
+  int EEE[2],EEO[2];
+  int add = 1<<(shift-1);
+
+  for (j=0; j<line; j++) 
+  {    
+    /* E and O*/
+    for (k=0;k<8;k++)
+    {
+      E[k] = src[k] + src[15-k];
+      O[k] = src[k] - src[15-k];
+    } 
+    /* EE and EO */
+    for (k=0;k<4;k++)
+    {
+      EE[k] = E[k] + E[7-k];
+      EO[k] = E[k] - E[7-k];
+    }
+    /* EEE and EEO */
+    EEE[0] = EE[0] + EE[3];    
+    EEO[0] = EE[0] - EE[3];
+    EEE[1] = EE[1] + EE[2];
+    EEO[1] = EE[1] - EE[2];
+
+    dst[ 0      ] = (g_aiT16[ 0][0]*EEE[0] + g_aiT16[ 0][1]*EEE[1] + add)>>shift;        
+    dst[ 8*line ] = (g_aiT16[ 8][0]*EEE[0] + g_aiT16[ 8][1]*EEE[1] + add)>>shift;    
+    dst[ 4*line ] = (g_aiT16[ 4][0]*EEO[0] + g_aiT16[ 4][1]*EEO[1] + add)>>shift;        
+    dst[ 12*line] = (g_aiT16[12][0]*EEO[0] + g_aiT16[12][1]*EEO[1] + add)>>shift;
+
+    for (k=2;k<16;k+=4)
+    {
+      dst[ k*line ] = (g_aiT16[k][0]*EO[0] + g_aiT16[k][1]*EO[1] + g_aiT16[k][2]*EO[2] + g_aiT16[k][3]*EO[3] + add)>>shift;      
+    }
+
+    for (k=1;k<16;k+=2)
+    {
+      dst[ k*line ] = (g_aiT16[k][0]*O[0] + g_aiT16[k][1]*O[1] + g_aiT16[k][2]*O[2] + g_aiT16[k][3]*O[3] + 
+        g_aiT16[k][4]*O[4] + g_aiT16[k][5]*O[5] + g_aiT16[k][6]*O[6] + g_aiT16[k][7]*O[7] + add)>>shift;
+    }
+
+    src += 16;
+    dst ++; 
+
+  }
+}
+
+
+
+void partialButterfly32(short *src,short *dst,int shift, int line)
+{
+  int j,k;
+  int E[16],O[16];
+  int EE[8],EO[8];
+  int EEE[4],EEO[4];
+  int EEEE[2],EEEO[2];
+  int add = 1<<(shift-1);
+
+  for (j=0; j<line; j++)
+  {    
+    /* E and O*/
+    for (k=0;k<16;k++)
+    {
+      E[k] = src[k] + src[31-k];
+      O[k] = src[k] - src[31-k];
+    } 
+    /* EE and EO */
+    for (k=0;k<8;k++)
+    {
+      EE[k] = E[k] + E[15-k];
+      EO[k] = E[k] - E[15-k];
+    }
+    /* EEE and EEO */
+    for (k=0;k<4;k++)
+    {
+      EEE[k] = EE[k] + EE[7-k];
+      EEO[k] = EE[k] - EE[7-k];
+    }
+    /* EEEE and EEEO */
+    EEEE[0] = EEE[0] + EEE[3];    
+    EEEO[0] = EEE[0] - EEE[3];
+    EEEE[1] = EEE[1] + EEE[2];
+    EEEO[1] = EEE[1] - EEE[2];
+
+    dst[ 0       ] = (g_aiT32[ 0][0]*EEEE[0] + g_aiT32[ 0][1]*EEEE[1] + add)>>shift;
+    dst[ 16*line ] = (g_aiT32[16][0]*EEEE[0] + g_aiT32[16][1]*EEEE[1] + add)>>shift;
+    dst[ 8*line  ] = (g_aiT32[ 8][0]*EEEO[0] + g_aiT32[ 8][1]*EEEO[1] + add)>>shift; 
+    dst[ 24*line ] = (g_aiT32[24][0]*EEEO[0] + g_aiT32[24][1]*EEEO[1] + add)>>shift;
+    for (k=4;k<32;k+=8)
+    {
+      dst[ k*line ] = (g_aiT32[k][0]*EEO[0] + g_aiT32[k][1]*EEO[1] + g_aiT32[k][2]*EEO[2] + g_aiT32[k][3]*EEO[3] + add)>>shift;
+    }       
+    for (k=2;k<32;k+=4)
+    {
+      dst[ k*line ] = (g_aiT32[k][0]*EO[0] + g_aiT32[k][1]*EO[1] + g_aiT32[k][2]*EO[2] + g_aiT32[k][3]*EO[3] + 
+        g_aiT32[k][4]*EO[4] + g_aiT32[k][5]*EO[5] + g_aiT32[k][6]*EO[6] + g_aiT32[k][7]*EO[7] + add)>>shift;
+    }       
+    for (k=1;k<32;k+=2)
+    {
+      dst[ k*line ] = (g_aiT32[k][ 0]*O[ 0] + g_aiT32[k][ 1]*O[ 1] + g_aiT32[k][ 2]*O[ 2] + g_aiT32[k][ 3]*O[ 3] + 
+        g_aiT32[k][ 4]*O[ 4] + g_aiT32[k][ 5]*O[ 5] + g_aiT32[k][ 6]*O[ 6] + g_aiT32[k][ 7]*O[ 7] +
+        g_aiT32[k][ 8]*O[ 8] + g_aiT32[k][ 9]*O[ 9] + g_aiT32[k][10]*O[10] + g_aiT32[k][11]*O[11] + 
+        g_aiT32[k][12]*O[12] + g_aiT32[k][13]*O[13] + g_aiT32[k][14]*O[14] + g_aiT32[k][15]*O[15] + add)>>shift;
+    }
+    src += 32;
+    dst ++;
+  }
+}
--- a/src/transform.h
+++ b/src/transform.h
@ -0,0 +1,17 @@
+/**
+ *  HEVC Encoder
+ *  - Marko Viitanen ( fador at iki.fi ), Tampere University of Technology, Department of Computer Systems.
+ */
+
+/*! \file transform.h
+    \brief Transform functions
+    \author Marko Viitanen
+    \date 2012-06
+    
+    Transform functions
+*/
+#ifndef __TRANSFORM_H
+#define __TRANSFORM_H
+
+
+#endif