Fixed dequantization and made preparations for smaller blocks. This version produces bit-perfect output.

2024-11-23 18:14:06 +00:00 · 2013-03-21 17:12:55 +02:00 · 2013-03-21 17:12:55 +02:00 · 78894b05d1
parent fa4c4acbe5
commit 78894b05d1
5 changed files with 46 additions and 45 deletions
--- a/build/VS2010/HEVC_encoder.vcxproj
+++ b/build/VS2010/HEVC_encoder.vcxproj
@ -71,6 +71,9 @@
      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <CompileAs>CompileAsC</CompileAs>
      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
+      <InlineFunctionExpansion>Default</InlineFunctionExpansion>
+      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+      <AdditionalOptions>/MP %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
--- a/src/encoder.c
+++ b/src/encoder.c
@ -199,13 +199,19 @@ void init_encoder_input(encoder_input* input,FILE* inputfile, uint32_t width, ui
  input->height = height;

  input->height_in_LCU = height / LCU_WIDTH;
-  input->width_in_LCU =  width / LCU_WIDTH;
-  if(input->height_in_LCU * LCU_WIDTH < height)
-    input->height_in_LCU++;
-  if(input->width_in_LCU * LCU_WIDTH < width)
-    input->width_in_LCU++;
+  input->width_in_LCU  =  width / LCU_WIDTH;

-  input->cur_pic.width = width;
+  /* Add one extra LCU when image not divisible by LCU_WIDTH */
+  if(input->height_in_LCU * LCU_WIDTH < height)
+  {
+    input->height_in_LCU++;
+  }
+  if(input->width_in_LCU * LCU_WIDTH < width)
+  {
+    input->width_in_LCU++;
+  }
+
+  input->cur_pic.width  = width;
  input->cur_pic.height = height;
  input->cur_pic.referenced = 0;
  /* Allocate buffers */
@ -629,7 +635,6 @@ void encode_slice_data(encoder_control* encoder)

 void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, uint8_t depth)
 {    
-  int x,y;
  uint8_t split_flag = (depth<2)?1:0; /* ToDo: get from CU data */
  uint8_t split_model = 0;

@ -729,15 +734,23 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
      intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth))*2+8, rec, (LCU_WIDTH>>(depth))*2+8, 0);

      intraPredMode = (uint8_t)intra_prediction(encoder->in.cur_pic.yData,encoder->in.width,recShift,(LCU_WIDTH>>(depth))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH)),yCtb*(LCU_WIDTH>>(MAX_DEPTH)),width,pred,width,&bestSAD);
-
+      
+      /* Filter DC-prediction */
+      if(intraPredMode == 1 && (LCU_WIDTH>>depth) < 32 && xCtb && yCtb)
+      {
+        intra_DCPredFiltering(recbase,encoder->in.width,recbase,encoder->in.width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
+      }
+      
+      /* ToDo: separate chroma prediction(?) */
+      /* intraPredModeChroma = 1; */
+      intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
+      intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
+      intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
+      intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predV,width>>1,intraPredModeChroma!=36?intraPredModeChroma:intraPredMode,1);
+      
+      /* This affects reconstruction, do after that */
      intra_setBlockMode(&encoder->in.cur_pic, xCtb, yCtb, depth, intraPredMode);

-      /* ToDo: separate chroma prediction(?) */     
-      intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 1);
-      intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predU,width>>1,intraPredMode,1);
-      intra_buildReferenceBorder(&encoder->in.cur_pic, xCtb, yCtb,(LCU_WIDTH>>(depth+1))*2+8, rec, (LCU_WIDTH>>(depth+1))*2+8, 2);
-      intra_recon(recShiftU,(LCU_WIDTH>>(depth+1))*2+8,xCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),yCtb*(LCU_WIDTH>>(MAX_DEPTH+1)),width>>1,predV,width>>1,intraPredMode,1);
-      
      /*
        PREDINFO CODING
        If intra prediction mode is found from the predictors,
@ -866,7 +879,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui

        /* Transform and quant residual to coeffs */          
        transform2d(block,pre_quant_coeff,width,0);
-        quant(encoder,pre_quant_coeff,coeff, width, width, 0);
+        quant(encoder,pre_quant_coeff,coeff,width, width,0, 0, SCAN_DIAG);

        /* Check for non-zero coeffs */
        for(i = 0; (uint32_t)i < width*width; i++)
@ -911,10 +924,9 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
          }
        }

-
        if(encoder->in.video_format != FORMAT_400)
        {
-          /* U */ 
+          /* U */
          i = 0;
          for(y = 0; y < LCU_WIDTH>>(depth+1); y++)
          {
@ -924,7 +936,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
            }
          }
          transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),0);
-          quant(encoder,pre_quant_coeff,coeffU, width>>1, width>>1, 2);                    
+          quant(encoder,pre_quant_coeff,coeffU, width>>1, width>>1, 0,2,SCAN_DIAG);
          for(i = 0; (uint32_t)i < width*width>>2; i++)
          {
            if(coeffU[i] != 0)
@ -945,7 +957,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
            }
          }
          transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),0);
-          quant(encoder,pre_quant_coeff,coeffV, width>>1, width>>1, 3);
+          quant(encoder,pre_quant_coeff,coeffV, width>>1, width>>1, 0,3,SCAN_DIAG);
          for(i = 0; (uint32_t)i < width*width>>2; i++)
          {
            if(coeffV[i] != 0)
@ -984,7 +996,7 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
                recbaseU[x+y*(encoder->in.width>>1)] = (uint8_t)CLIP(0,255,predU[x+y*(LCU_WIDTH>>(depth+1))]);
              }
            }
-          }           
+          }
        
          if(CbV)
          {
@ -1015,17 +1027,8 @@ void encode_coding_tree(encoder_control* encoder,uint16_t xCtb,uint16_t yCtb, ui
              }
            }
          }
-
        }

-        /* Filter DC-prediction */
-        /*
-        if(intraPredMode == 1 && (LCU_WIDTH>>depth) < 32 && xCtb && yCtb)
-        {
-          intra_DCPredFiltering(recbase,encoder->in.width,recbase,encoder->in.width,LCU_WIDTH>>depth,LCU_WIDTH>>depth);
-        }
-        */
-        
        
        /* END INTRAPREDICTION */

--- a/src/intra.c
+++ b/src/intra.c
@ -236,7 +236,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
  int16_t pred[LCU_WIDTH*LCU_WIDTH>>2];
  int16_t origBlock[LCU_WIDTH*LCU_WIDTH>>2];
  uint8_t *origShift = &orig[xpos+ypos*origstride];  
-  int8_t filter = (width<32);
+  int8_t filter = (width<32); //ToDo: chroma support
  SADfunction SADarray[4] = {&SAD4x4,&SAD8x8,&SAD16x16,&SAD32x32};
  uint8_t threshold = intraHorVerDistThres[g_toBits[width]]; /*!< Intra filtering threshold */
  #define COPY_PRED_TO_DST() for(y = 0; y < (int32_t)width; y++)  {   for(x = 0; x < (int32_t)width; x++)  {  dst[x+y*dststride] = pred[x+y*width];  }   }
@ -261,8 +261,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
    }
  }

-
-  /* Test DC */  
+  /* Test DC */
  /*
  x = intra_getDCPred(rec, recstride, xpos, ypos, width);
  for(i = 0; i < (int32_t)(width*width); i++)
@ -272,7 +271,6 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
  CHECK_FOR_BEST(1);
  */
  /* Check angular not requiring filtering */
-  
  for(i = 2; i < 35; i++)
  {
    if(MIN(abs(i-26),abs(i-10)) <= threshold)
@ -280,13 +278,11 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
      intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
      CHECK_FOR_BEST(i);
    }
-  }
-  
+  }  
  /*Apply filter*/
  intra_filter(rec,recstride,width,0);

  /* Test planar */  
-  
  intra_getPlanarPred(rec, recstride, xpos, ypos, width, pred, width);
  CHECK_FOR_BEST(0);
  
@ -295,8 +291,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
  /* ToDo: add conditions to skip some modes on borders */
  
  //chroma can use only 26 and 10
-  /* Test angular predictions which require filtered samples */
-  
+  /* Test angular predictions which require filtered samples */  
  for(i = 2; i < 35; i++)
  {
    if(MIN(abs(i-26),abs(i-10)) > threshold)
@ -304,8 +299,7 @@ int16_t intra_prediction(uint8_t* orig,uint32_t origstride,int16_t* rec,uint32_t
      intra_getAngularPred(rec,recstride,pred, width,width,width,i, xpos?1:0, ypos?1:0, filter);
      CHECK_FOR_BEST(i);
    }
-  }
-  
+  }  
  *sad = bestSAD;
  #undef COPY_PRED_TO_DST
  #undef CHECK_FOR_BEST
--- a/src/transform.c
+++ b/src/transform.c
@ -694,8 +694,8 @@ void itransform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int8_t uiMode

 #define QUANT_SHIFT 14

-void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, /*int32_t** pArlDes,*/ int32_t iWidth,
-           int32_t iHeight, /*uint32_t *uiAcSum,*/ int8_t eTType/*, uint32_t uiAbsPartIdx*/ )
+void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidth,
+           int32_t iHeight, uint32_t *uiAcSum, int8_t eTType, int8_t scanIdx )
 {
  int16_t*   piCoef    = pSrc;
  int16_t*   piQCoef   = pDes;
@ -704,7 +704,7 @@ void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, /*int32_t** p
  int8_t useRDOQForTransformSkip = 0;
  uint32_t log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;

-  uint32_t scanIdx = SCAN_DIAG;
+  //uint32_t scanIdx = SCAN_DIAG;

  scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];  
  {
@ -804,7 +804,7 @@ void dequant(encoder_control* encoder, int16_t* piQCoef, int16_t* piCoef, int32_
  iShift += 4;
  

-  if(iShift >qpScaled/5)
+  if(iShift >qpScaled/6)
  {
    iAdd = 1 << (iShift - qpScaled/6 - 1);
      
--- a/src/transform.h
+++ b/src/transform.h
@ -16,7 +16,8 @@
 extern int32_t* g_quant_coeff[4][6][6];
 extern const int32_t g_quantIntraDefault8x8[64];

-void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes,int32_t iWidth,int32_t iHeight, int8_t eTType );
+void quant(encoder_control* encoder, int16_t* pSrc, int16_t* pDes, int32_t iWidth,
+           int32_t iHeight, uint32_t *uiAcSum, int8_t eTType, int8_t scanIdx );
 void dequant(encoder_control* encoder, int16_t* piQCoef, int16_t* piCoef, int32_t iWidth, int32_t iHeight,int8_t eTType);

 void transform2d(int16_t *block,int16_t *coeff, int8_t blockSize, int8_t uiMode);