Merge branch 'master' into inter_residual

Conflicts:
	src/encoder.c
This commit is contained in:
Marko Viitanen 2013-10-08 14:59:23 +03:00
commit ef0bea32e1
12 changed files with 1342 additions and 1210 deletions

View file

@ -19,7 +19,9 @@
*/
FILE * open_cu_file(char *filename) {
FILE *fp = fopen(filename, "w");
fprintf(fp, "<html><head><link rel='stylesheet' type='text/css' href='cu_style.css' /></head><body>");
fprintf(fp, "<?xml version='1.0' encoding='UTF-8' ?>\r\n"
"<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en'>\r\n"
"<head><link rel='stylesheet' type='text/css' href='cu_style.css' /></head><body>");
return fp;
}
@ -31,23 +33,74 @@ void close_cu_file(FILE *fp) {
fclose(fp);
}
void yuv2rgb(unsigned char yuv[3], unsigned char rgb[3])
{
int y = yuv[0];
int u = yuv[1];
int v = yuv[2];
int r = 1.164 * y + 1.596 * (v - 128);
int g = 1.165 * y - 0.392 * (u - 128) - 0.813 * (v - 128);
int b = 1.164 * y + 2.017 * (u - 128);
rgb[0] = CLIP(0, 255, r);
rgb[1] = CLIP(0, 255, g);
rgb[2] = CLIP(0, 255, b);
}
/**
* Print information about the Coding Unit (CU) into the FILE* provided by open_cu_file.
*/
unsigned render_cu_file(encoder_control *encoder, unsigned depth, uint16_t xCtb, uint16_t yCtb, FILE *fp)
unsigned render_cu_file(encoder_control *encoder, picture *pic,
unsigned depth, uint16_t xCtb, uint16_t yCtb, FILE *fp)
{
cu_info *cu = &encoder->in.cur_pic->cu_array[depth][xCtb + yCtb * (encoder->in.width_in_lcu<<MAX_DEPTH)];
cu_info *cu = &pic->cu_array[depth][xCtb + yCtb * (pic->width_in_lcu<<MAX_DEPTH)];
cu_info *final_cu = &pic->cu_array[MAX_DEPTH][xCtb + yCtb * (pic->width_in_lcu<<MAX_DEPTH)];
unsigned lambda_cost = (4 * g_lambda_cost[encoder->QP]) << 4;
unsigned sum = 0;
unsigned best_cost = -1;
char type = cu->type == CU_INTRA ? 'I' : 'P';
unsigned x = xCtb * CU_MIN_SIZE_PIXELS;
unsigned y = yCtb * CU_MIN_SIZE_PIXELS;
unsigned luma = y * pic->width + x;
unsigned chroma = (y >> 1) * (pic->width >> 1) + (x >> 1);
unsigned char yuv[3] = { 0, 0, 0 };
unsigned char rgb[3] = { 0, 0, 0 };
if (x >= pic->width || y >= pic->height) {
// Don't output anything for CU's completely outside the botders.
return 0;
}
if (encoder->ref->used_size > 0) {
const picture *ref_pic = encoder->ref->pics[0];
yuv[0] = ref_pic->y_recdata[luma];
yuv[1] = ref_pic->u_recdata[chroma];
yuv[2] = ref_pic->v_recdata[chroma];
yuv2rgb(yuv, rgb);
}
// Enclose everything in a table with the assumption that this function is
// called from left to right and from top to down.
if (depth == 0) {
if (yCtb == 0 && xCtb == 0) {
fprintf(fp, "<table><tr><td>");
} else if (xCtb == 0) {
fprintf(fp, "</td></tr><tr><td>");
} else if (xCtb == NO_SCU_IN_LCU(pic->width_in_lcu)
&& yCtb == NO_SCU_IN_LCU(pic->height_in_lcu)) {
fprintf(fp, "</td></tr></table>");
} else {
fprintf(fp, "</td><td>");
}
}
fprintf(fp,
"\n<table class=d%u><tr><td colspan=2>"
"%u (%u, %u), %d, %c, "
"\n<table class='d%u' bgcolor='#%02x%02x%02x'><tr><td colspan='2'>"
"%u (%u, %u), %c, "
"c=%u, mv=(%d, %d)</td></tr>\n",
depth,
depth, xCtb, yCtb, cu->split, (cu->type == CU_INTRA ? 'I' : 'P'),
depth, rgb[0], rgb[1], rgb[2],
depth, xCtb, yCtb, (cu->type == CU_INTRA ? 'I' : 'P'),
cu->inter.cost, cu->inter.mv[0], cu->inter.mv[1]);
@ -57,18 +110,18 @@ unsigned render_cu_file(encoder_control *encoder, unsigned depth, uint16_t xCtb,
uint8_t change = 1<<(MAX_DEPTH-1-depth);
fprintf(fp, "<tr><td>");
sum += render_cu_file(encoder, depth + 1, xCtb, yCtb, fp);
sum += render_cu_file(encoder, pic, depth + 1, xCtb, yCtb, fp);
fprintf(fp, "</td><td>");
sum += render_cu_file(encoder, depth + 1, xCtb + change, yCtb, fp);
sum += render_cu_file(encoder, pic, depth + 1, xCtb + change, yCtb, fp);
fprintf(fp, "</td></tr>");
fprintf(fp, "<tr><td>");
sum += render_cu_file(encoder, depth + 1, xCtb, yCtb + change, fp);
sum += render_cu_file(encoder, pic, depth + 1, xCtb, yCtb + change, fp);
fprintf(fp, "</td><td>");
sum += render_cu_file(encoder, depth + 1, xCtb + change, yCtb + change, fp);
sum += render_cu_file(encoder, pic, depth + 1, xCtb + change, yCtb + change, fp);
fprintf(fp, "</td></tr>");
fprintf(fp, "<tr><td colspan=2>sum=%u, sum+lambda=%u</td></tr>",
fprintf(fp, "<tr><td colspan='2'>sum=%u, sum+lambda=%u</td></tr>",
sum, sum + lambda_cost);
if (sum + lambda_cost < cu->inter.cost) {
best_cost = sum + lambda_cost;
@ -79,6 +132,16 @@ unsigned render_cu_file(encoder_control *encoder, unsigned depth, uint16_t xCtb,
best_cost = cu->inter.cost;
}
if (depth == 0) {
fprintf(fp,
"<tr><td colspan='2'>"
"best depth=%u, %c, "
"c=%u, mv=(%d, %d)</td></tr>\n"
"</td></tr>",
final_cu->depth, (final_cu->type == CU_INTRA ? 'I' : 'P'),
final_cu->inter.cost, final_cu->inter.mv[0], final_cu->inter.mv[1]);
}
fprintf(fp, "</table>");
return best_cost;
}

View file

@ -21,6 +21,6 @@
FILE * open_cu_file(char *filename);
void close_cu_file(FILE *fp);
unsigned render_cu_file(encoder_control *encoder, unsigned depth, uint16_t x_cu, uint16_t y_cu, FILE *fp);
unsigned render_cu_file(encoder_control *encoder, picture *pic, unsigned depth, uint16_t x_cu, uint16_t y_cu, FILE *fp);
#endif

View file

@ -146,7 +146,7 @@ int main(int argc, char *argv[])
encoder->QP = 32;
encoder->in.video_format = FORMAT_420;
// deblocking filter
encoder->deblock_enable = 0;
encoder->deblock_enable = 1;
encoder->beta_offset_div2 = 0;
encoder->tc_offset_div2 = 0;
// SAO

File diff suppressed because it is too large Load diff

View file

@ -33,7 +33,7 @@ enum { FORMAT_400 = 0, FORMAT_420, FORMAT_422, FORMAT_444 };
/* Input info struct */
typedef struct
{
FILE* file;
FILE *file;
int32_t width; /*!< \brief input picture width */
int32_t height; /*!< \brief input picture height */
int32_t real_width; /*!< \brief real input picture width */
@ -52,19 +52,19 @@ typedef struct
config *cfg;
encoder_input in;
encoder_me me;
bitstream* stream;
bitstream *stream;
FILE *output;
picture_list *ref;
int8_t ref_list;
int8_t ref_idx_num[2];
int8_t QP; /*!< \brief Quantization parameter */
int8_t QP; // \brief Quantization parameter
int8_t bitdepth;
/* Filtering */
int8_t deblock_enable; /*!< \brief Flag to enable deblocking filter */
int8_t sao_enable; /*!< \brief Flag to enable sample adaptive offset filter */
int8_t beta_offset_div2; /*!< \brief (deblocking) beta offset (div 2), range -6...6 */
int8_t tc_offset_div2; /*!< \brief (deblocking)tc offset (div 2), range -6...6 */
int8_t deblock_enable; // \brief Flag to enable deblocking filter
int8_t sao_enable; // \brief Flag to enable sample adaptive offset filter
int8_t beta_offset_div2; // \brief (deblocking) beta offset (div 2), range -6...6
int8_t tc_offset_div2; // \brief (deblocking)tc offset (div 2), range -6...6
} encoder_control;
typedef struct
@ -86,7 +86,7 @@ typedef struct
int32_t recbase_stride;
int32_t pred_stride;
/* TODO: unify luma+chroma arrays */
// TODO: unify luma+chroma arrays
int16_t *coeff[3];
int8_t cb_top[3];
int8_t cb[4];
@ -101,51 +101,64 @@ typedef struct
} transform_info;
void init_tables(void);
void init_encoder_control(encoder_control* control,bitstream* output);
void init_encoder_input(encoder_input* input,FILE* inputfile, int32_t width, int32_t height);
void encode_one_frame(encoder_control* encoder);
void read_one_frame(FILE* file, encoder_control* encoder);
void init_encoder_control(encoder_control *control, bitstream *output);
void init_encoder_input(encoder_input *input, FILE* inputfile,
int32_t width, int32_t height);
void encode_one_frame(encoder_control *encoder);
void read_one_frame(FILE *file, encoder_control *encoder);
void encode_seq_parameter_set(encoder_control* encoder);
void encode_pic_parameter_set(encoder_control* encoder);
void encode_vid_parameter_set(encoder_control* encoder);
void encode_slice_data(encoder_control* encoder);
void encode_slice_header(encoder_control* encoder);
void encode_coding_tree(encoder_control* encoder,uint16_t x_ctb,uint16_t y_ctb, uint8_t depth);
void encode_lastSignificantXY(encoder_control* encoder,uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, uint8_t type, uint8_t scan);
void encode_CoeffNxN(encoder_control* encoder,int16_t* coeff, uint8_t width, uint8_t type, int8_t scan_mode);
void encode_transform_tree(encoder_control* encoder,transform_info* ti,uint8_t depth);
void encode_transform_coeff(encoder_control* encoder,transform_info* ti,int8_t depth, int8_t tr_depth);
void encode_seq_parameter_set(encoder_control *encoder);
void encode_pic_parameter_set(encoder_control *encoder);
void encode_vid_parameter_set(encoder_control *encoder);
void encode_slice_data(encoder_control *encoder);
void encode_slice_header(encoder_control *encoder);
void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb,
uint16_t y_ctb, uint8_t depth);
void encode_last_significant_xy(encoder_control *encoder, uint8_t lastpos_x,
uint8_t lastpos_y, uint8_t width, uint8_t height,
uint8_t type, uint8_t scan);
void encode_coeff_nxn(encoder_control *encoder, int16_t *coeff, uint8_t width,
uint8_t type, int8_t scan_mode);
void encode_transform_tree(encoder_control *encoder, transform_info *ti,
uint8_t depth);
void encode_transform_coeff(encoder_control *encoder, transform_info *ti,
int8_t depth, int8_t tr_depth);
extern int16_t g_lambda_cost[55];
extern uint32_t* g_sig_last_scan[3][7];
int8_t g_convert_to_bit[LCU_WIDTH+1];
int8_t g_convert_to_bit[LCU_WIDTH + 1];
static int8_t g_bitdepth = 8;
static int8_t g_bit_increment = 0;
#define MAX_NUM_SPU_W ((1<<(MAX_DEPTH))/4)
static uint32_t g_z_scan_to_raster [ MAX_NUM_SPU_W*MAX_NUM_SPU_W ] = { 0, };
static uint32_t g_raster_to_z_scan [ MAX_NUM_SPU_W*MAX_NUM_SPU_W ] = { 0, };
static const uint8_t g_group_idx[ 32 ] = {0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9};
static const uint8_t g_min_in_group[ 10 ] = {0,1,2,3,4,6,8,12,16,24};
static uint32_t g_sig_last_scan_32x32[ 64 ] =
{ 0, 8, 1,16, 9, 2,24,17,
10, 3,32,25,18,11, 4,40,
33,26,19,12, 5,48,41,34,
27,20,13, 6,56,49,42,35,
28,21,14, 7,57,50,43,36,
29,22,15,58,51,44,37,30,
23,59,52,45,38,31,60,53,
46,39,61,54,47,62,55,63 };
#define MAX_NUM_SPU_W ((1 << (MAX_DEPTH)) / 4)
static uint32_t g_z_scan_to_raster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
static uint32_t g_raster_to_z_scan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, };
static const uint32_t g_sig_last_scan_8x8[ 3 ][ 4 ] =
static const uint8_t g_group_idx[32] = {
0, 1, 2, 3, 4, 4, 5, 5, 6, 6,
6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9, 9,
9, 9 };
static const uint8_t g_min_in_group[10] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
static uint32_t g_sig_last_scan_32x32[64] = {
0, 8, 1, 16, 9, 2, 24, 17, 10, 3,
32, 25, 18, 11, 4, 40, 33, 26, 19, 12,
5, 48, 41, 34, 27, 20, 13, 6, 56, 49,
42, 35, 28, 21, 14, 7, 57, 50, 43, 36,
29, 22, 15, 58, 51, 44, 37, 30, 23, 59,
52, 45, 38, 31, 60, 53, 46, 39, 61, 54,
47, 62, 55, 63 };
static const uint32_t g_sig_last_scan_8x8[3][4] =
{ {0, 2, 1, 3},
{0, 1, 2, 3},
{0, 2, 1, 3}
};
//
//4 8 16 32 64 128
//0 1 2 3 4 5
static const uint8_t g_to_bits[129] =
@ -156,20 +169,21 @@ static const uint8_t g_to_bits[129] =
0,0,0,0,0,0,0,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
};
#define TOBITS(len) g_to_bits[len]
#define C1FLAG_NUMBER 8 /*!< maximum number of largerThan1 flag coded in one chunk */
#define C2FLAG_NUMBER 1 /*!< maximum number of largerThan2 flag coded in one chunk */
#define C1FLAG_NUMBER 8 // maximum number of largerThan1 flag coded in one chunk
#define C2FLAG_NUMBER 1 // maximum number of largerThan2 flag coded in one chunk
enum COEFF_SCAN_TYPE
{
SCAN_DIAG = 0, /*!< up-right diagonal scan */
SCAN_HOR, /*!< horizontal first scan */
SCAN_VER /*!< vertical first scan */
SCAN_DIAG = 0, // up-right diagonal scan
SCAN_HOR, // horizontal first scan
SCAN_VER // vertical first scan
};
#endif
#endif

View file

@ -157,13 +157,16 @@ void filter_deblock_edge_luma(encoder_control *encoder,
int32_t stride = encoder->in.cur_pic->width;
int32_t offset = stride;
int32_t beta_offset_div2 = encoder->beta_offset_div2;
int32_t tc_offset_div2 = encoder->tc_offset_div2;
int8_t strength = 2; // Filter strength
int32_t tc_offset_div2 = encoder->tc_offset_div2;
// TODO: support 10+bits
uint8_t *orig_src = &encoder->in.cur_pic->y_recdata[xpos + ypos*stride];
uint8_t *src = orig_src;
int32_t step = 1;
cu_info *cu_q = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (encoder->in.width_in_lcu << MAX_DEPTH)];
cu_info *cu_p = 0;
int8_t strength = 0;
if(dir == EDGE_VER) {
offset = 1;
step = stride;
@ -171,23 +174,31 @@ void filter_deblock_edge_luma(encoder_control *encoder,
{
int32_t qp = encoder->QP;
int32_t bitdepth_scale = 1 << (g_bitdepth - 8);
int32_t tc_index = CLIP(0, 51 + 2, (int32_t)(qp + 2*(strength - 1) + (tc_offset_div2 << 1)));
int32_t bitdepth_scale = 1 << (g_bitdepth - 8);
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
int32_t tc = g_tc_table_8x8[tc_index] * bitdepth_scale;
int32_t beta = g_beta_table_8x8[b_index] * bitdepth_scale;
int32_t side_threshold = (beta + (beta >>1 )) >> 3;
int32_t thr_cut = tc * 10;
uint32_t blocks_in_part = (LCU_WIDTH >> depth) / 4;
uint32_t block_idx;
int32_t tc_index,tc,thr_cut;
// TODO: add CU based QP calculation
// For each 4-pixel part in the edge
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx)
{
for (block_idx = 0; block_idx < blocks_in_part; ++block_idx) {
int32_t dp0, dq0, dp3, dq3, d0, d3, dp, dq, d;
if((block_idx & 1) == 0)
{
// CU in the side we are filtering, update every 8-pixels
cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][((xpos>>MIN_SIZE)-(dir == EDGE_VER)+(dir == EDGE_HOR?block_idx/2:0)) +
((ypos>>MIN_SIZE)-(dir == EDGE_HOR)+(dir == EDGE_VER?block_idx/2:0)) * (encoder->in.width_in_lcu << MAX_DEPTH)];
// Filter strength
strength = ((cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) ? 2 :
(((abs(cu_q->inter.mv[0] - cu_p->inter.mv[0]) >= 4) || (abs(cu_q->inter.mv[1] - cu_p->inter.mv[1]) >= 4)) ? 1 : 0));
tc_index = CLIP(0, 51 + 2, (int32_t)(qp + 2*(strength - 1) + (tc_offset_div2 << 1)));
tc = g_tc_table_8x8[tc_index] * bitdepth_scale;
thr_cut = tc * 10;
}
if(!strength) continue;
// Check conditions for filtering
// TODO: Get rid of these inline defines.
#define calc_DP(s,o) abs( (int16_t)s[-o*3] - (int16_t)2*s[-o*2] + (int16_t)s[-o] )
@ -242,9 +253,13 @@ void filter_deblock_edge_chroma(encoder_control *encoder,
// Init offset and step to EDGE_HOR
int32_t offset = stride;
int32_t step = 1;
cu_info *cu_q = &encoder->in.cur_pic->cu_array[MAX_DEPTH][(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (encoder->in.width_in_lcu << MAX_DEPTH)];
cu_info *cu_p = &encoder->in.cur_pic->cu_array[MAX_DEPTH][((x>>(MIN_SIZE-1))-(dir == EDGE_VER)) +
((y>>(MIN_SIZE-1))-(dir == EDGE_HOR)) * (encoder->in.width_in_lcu << MAX_DEPTH)];
int8_t strength = (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) ? 2 : 0; // Filter strength
// We cannot filter edges not on 8x8 grid
if(depth == MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) )
if(strength != 2 || (depth == MAX_DEPTH && (( (y & 0x7) && dir == EDGE_HOR ) || ( (x & 0x7) && dir == EDGE_VER ) ) ))
{
return;
}
@ -259,7 +274,7 @@ void filter_deblock_edge_chroma(encoder_control *encoder,
{
int32_t QP = g_chroma_scale[encoder->QP];
int32_t bitdepth_scale = 1 << (g_bitdepth-8);
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2 + (tc_offset_div2 << 1)));
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
uint32_t blocks_in_part= (LCU_WIDTH>>(depth+1)) / 4;
uint32_t blk_idx;
@ -386,7 +401,9 @@ void filter_inter_halfpel_chroma(int16_t *src, int16_t src_stride, int width, in
int32_t shift1 = g_bitdepth-8;
int32_t shift2 = 6;
int32_t shift3 = 14-g_bitdepth;
int32_t offset = 1<<(shift2-1); //!< offset for rounding purposes
int32_t offset = 1 << (shift2 - 1); //!< offset for rounding purposes
int32_t offset3 = 1 << (shift3 - 1);
int32_t offset23 = 1 << (shift2 + shift3 - 1);
// Loop source pixels and generate four filtered half-pel pixels on each round
for (y = 0; y < height; y++) {
@ -406,12 +423,12 @@ void filter_inter_halfpel_chroma(int16_t *src, int16_t src_stride, int width, in
// ae0,0 - We need this only when hor_flag and for ee0,0
if (hor_flag) {
ae_temp = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1) + offset; // ae0,0
ae_temp = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1); // ae0,0
}
// ea0,0 - needed only when ver_flag
if(ver_flag) {
dst[dst_pos + 1*dst_stride] = (((-4*src[src_pos - src_stride] + 36*src[src_pos] + 36*src[src_pos + src_stride]
- 4*src[src_pos + 2*src_stride] ) >> shift1) + offset) >> shift3; // ea0,0
- 4*src[src_pos + 2*src_stride] ) >> shift1) + (1<<(shift3-1))) >> shift3; // ea0,0
}
// When both flags, we use _only_ this pixel (but still need ae0,0 for it)
@ -419,17 +436,17 @@ void filter_inter_halfpel_chroma(int16_t *src, int16_t src_stride, int width, in
// Calculate temporary values..
//TODO: optimization, store these values
src_pos -= src_stride; //0,-1
ae_temp1 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1) + offset; // ae0,-1
src_pos += src_stride; //0,1
ae_temp2 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1) + offset; // ae0,1
ae_temp1 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1); // ae0,-1
src_pos += 2*src_stride; //0,1
ae_temp2 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1); // ae0,1
src_pos += src_stride; //0,2
ae_temp3 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1) + offset; // ae0,2
ae_temp3 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2] ) >> shift1); // ae0,2
dst[dst_pos + 1*dst_stride + 1] = ((( -4*ae_temp1 + 36*dst[dst_pos + 1] + 36*ae_temp2 - 4*ae_temp3 ) >> shift2) + offset) >> shift3; // ee0,0
dst[dst_pos + 1*dst_stride + 1] = (((-4*ae_temp1 + 36*ae_temp + 36*ae_temp2 - 4*ae_temp3 ) + offset23) >> shift2) >> shift3; // ee0,0
}
if(hor_flag) {
dst[dst_pos + 1] = ae_temp >> shift3;
dst[dst_pos + 1] = (ae_temp + offset3) >> shift3;
}
}
}

View file

@ -34,7 +34,7 @@
/* CONFIG VARIABLES */
#define LCU_WIDTH 64 /*!< Largest Coding Unit (IT'S 64x64, DO NOT TOUCH!) */
#define MAX_INTER_SEARCH_DEPTH 2
#define MAX_INTER_SEARCH_DEPTH 3
#define MIN_INTER_SEARCH_DEPTH 0
#define MAX_INTRA_SEARCH_DEPTH 3 /*!< Max search depth -> min block size (3 == 8x8) */

View file

@ -58,9 +58,10 @@ void inter_set_block(picture* pic, uint32_t x_cu, uint32_t y_cu, uint8_t depth,
* \param dst destination picture
* \returns Void
*/
void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, int16_t mv[2], picture *dst)
void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], picture *dst)
{
int x,y,coord_x,coord_y;
int16_t mv[2] = { mv_param[0], mv_param[1] };
int32_t dst_width_c = dst->width>>1; //!< Destination picture width in chroma pixels
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
@ -84,8 +85,8 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, int16_t
int16_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v)
// TODO: Fractional pixel support
mv[0] = mv[0]>>2;
mv[1] = mv[1]>>2;
mv[0] >>= 2;
mv[1] >>= 2;
// Chroma half-pel
// get half-pel interpolated block and push it to output
@ -265,8 +266,11 @@ void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int
// B0, B1 and B2 availability testing
if (y_cu != 0) {
b0 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu + (y_cu - 1) * (encoder->in.width_in_lcu<<MAX_DEPTH)];
if (!b0->coded) b0 = NULL;
if (x_cu + cur_block_in_scu < encoder->in.width_in_lcu<<MAX_DEPTH) {
b0 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu + (y_cu - 1) * (encoder->in.width_in_lcu<<MAX_DEPTH)];
if (!b0->coded) b0 = NULL;
}
b1 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu - 1 + (y_cu - 1) * (encoder->in.width_in_lcu<<MAX_DEPTH)];
if (!b1->coded) b1 = NULL;

View file

@ -19,7 +19,7 @@
void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu);
void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, int16_t mv[2], picture* dst);
void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv[2], picture* dst);
void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[2][2]);

View file

@ -20,29 +20,6 @@
#define PSNRMAX (255.0 * 255.0)
/**
* \brief Set block splitflag
* \param pic picture to use
* \param x_scu x SCU position (smallest CU)
* \param y_scu y SCU position (smallest CU)
* \param depth current CU depth
* \param mode mode to set
*/
void picture_set_block_split(picture *pic, uint32_t x_scu, uint32_t y_scu,
uint8_t depth, int8_t split)
{
uint32_t x, y;
int width_in_scu = pic->width_in_lcu << MAX_DEPTH;
int block_scu_width = (LCU_WIDTH >> depth) / (LCU_WIDTH >> MAX_DEPTH);
for (y = y_scu; y < y_scu + block_scu_width; ++y) {
int cu_row = y * width_in_scu;
for (x = x_scu; x < x_scu + block_scu_width; ++x) {
pic->cu_array[depth][cu_row + x].split = split;
}
}
}
/**
* \brief Set block coded status
* \param pic picture to use

View file

@ -57,7 +57,6 @@ typedef struct
int8_t coded;
cu_info_intra intra;
cu_info_inter inter;
int8_t split;
} cu_info;
/**

View file

@ -26,26 +26,76 @@
// Temporarily for debugging.
#define USE_INTRA_IN_P 0
//#define RENDER_CU encoder->frame==2
#define RENDER_CU 0
#define USE_FULL_SEARCH 0
#define USE_CHROMA_IN_MV_SEARCH 0
#define IN_FRAME(x, y, width, height, block) ((x) >= 0 && (y) >= 0 && (x) + (block) <= (width) && (y) + (block) <= (height))
#define IN_FRAME(x, y, width, height, block_width, block_height) \
((x) >= 0 && (y) >= 0 \
&& (x) + (block_width) <= (width) \
&& (y) + (block_height) <= (height))
unsigned get_sad(int x, int y, int width, int height, int block, uint8_t *pic_data, uint8_t *ref_data)
/**
* \brief Get Sum of Absolute Differences (SAD) between two blocks in two
* different frames.
* \param pic First frame.
* \param ref Second frame.
* \param pic_x X coordinate of the first block.
* \param pic_y Y coordinate of the first block.
* \param ref_x X coordinate of the second block.
* \param ref_y Y coordinate of the second block.
* \param block_width Width of the blocks.
* \param block_height Height of the blocks.
*/
unsigned get_block_sad(picture *pic, picture *ref,
int pic_x, int pic_y, int ref_x, int ref_y,
int block_width, int block_height)
{
if (!IN_FRAME(x, y, width, height, block)) return 0; // This means invalid, for now.
uint8_t *pic_data, *ref_data;
int width = pic->width;
int height = pic->height;
return 1 + sad(pic_data, &ref_data[y * width + x], block, block, width);
unsigned result = 1; // Start from 1 so result is never 0.
// 0 means invalid, for now.
if (!IN_FRAME(ref_x, ref_y, width, height, block_width, block_height)) return 0;
pic_data = &pic->y_data[pic_y * width + pic_x];
ref_data = &ref->y_data[ref_y * width + ref_x];
result += sad(pic_data, ref_data, block_width, block_height, width);
#if USE_CHROMA_IN_MV_SEARCH
// Halve everything because chroma is half the resolution.
width >>= 2;
pic_x >>= 2;
pic_y >>= 2;
ref_x >>= 2;
ref_y >>= 2;
block >>= 2;
pic_data = &pic->u_data[pic_y * width + pic_x];
ref_data = &ref->u_data[ref_y * width + ref_x];
result += sad(pic_data, ref_data, block_width, block_height, width);
pic_data = &pic->v_data[pic_y * width + pic_x];
ref_data = &ref->v_data[ref_y * width + ref_x];
result += sad(pic_data, ref_data, block_width, block_height, width);
#endif
return result;
}
void search_mv(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
void search_mv(picture *pic, picture *ref,
cu_info *cur_cu, int orig_x, int orig_y, int x, int y,
unsigned depth)
{
int block_width = CU_WIDTH_FROM_DEPTH(depth);
unsigned cost = get_sad(orig_x + x, orig_y + y, pic->width, pic->height, block_width, pic_data, ref_data);
// Get cost for the predicted motion vector.
unsigned cost = get_block_sad(pic, ref, orig_x, orig_y, orig_x + x, orig_y + y,
block_width, block_width);
unsigned best_cost = -1;
unsigned step = 8;
@ -55,15 +105,15 @@ void search_mv(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
cur_cu->inter.mv[1] = y;
}
// If initial vector is farther away than the step, try the (0, 0) vector
// in addition to the initial vector.
if (abs(x) > step || abs(y) > step) {
cost = get_sad(orig_x, orig_y, pic->width, pic->height, block_width, pic_data, ref_data);
// If initial vector is long, also try the (0, 0) vector just in case.
if (x != 0 || y != 0) {
cost = get_block_sad(pic, ref, orig_x, orig_y, orig_x, orig_y,
block_width, block_width);
if (cost > 0 && cost < best_cost) {
best_cost = cost;
cur_cu->inter.mv[0] = x;
cur_cu->inter.mv[1] = y - step;
cur_cu->inter.mv[0] = 0;
cur_cu->inter.mv[1] = 0;
}
}
@ -75,16 +125,20 @@ void search_mv(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
// due to quantization. It's value is just a guess based on the first
// blocks of the BQMall sequence, which don't move.
// TODO: Quantization factor probably affects what the constant should be.
/*
if (best_cost <= block_width * block_width * 1.8) {
break;
}
*/
// Change center of search to the current best point.
x = cur_cu->inter.mv[0];
y = cur_cu->inter.mv[1];
// above
cost = get_sad(orig_x + x, orig_y + y - step, pic->width, pic->height, block_width, pic_data, ref_data);
cost = get_block_sad(pic, ref, orig_x, orig_y,
orig_x + x, orig_y + y - step,
block_width, block_width);
if (cost > 0 && cost < best_cost) {
best_cost = cost;
cur_cu->inter.mv[0] = x;
@ -92,7 +146,9 @@ void search_mv(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
}
// left
cost = get_sad(orig_x + x - step, orig_y + y, pic->width, pic->height, block_width, pic_data, ref_data);
cost = get_block_sad(pic, ref, orig_x, orig_y,
orig_x + x - step, orig_y + y,
block_width, block_width);
if (cost > 0 && cost < best_cost) {
best_cost = cost;
cur_cu->inter.mv[0] = x - step;
@ -100,7 +156,9 @@ void search_mv(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
}
// right
cost = get_sad(orig_x + x + step, orig_y + y, pic->width, pic->height, block_width, pic_data, ref_data);
cost = get_block_sad(pic, ref, orig_x, orig_y,
orig_x + x + step, orig_y + y,
block_width, block_width);
if (cost > 0 && cost < best_cost) {
best_cost = cost;
cur_cu->inter.mv[0] = x + step;
@ -108,7 +166,9 @@ void search_mv(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
}
// below
cost = get_sad(orig_x + x, orig_y + y + step, pic->width, pic->height, block_width, pic_data, ref_data);
cost = get_block_sad(pic, ref, orig_x, orig_y,
orig_x + x, orig_y + y + step,
block_width, block_width);
if (cost > 0 && cost < best_cost) {
best_cost = cost;
cur_cu->inter.mv[0] = x;
@ -157,13 +217,13 @@ void search_mv_full(picture *pic, uint8_t *pic_data, uint8_t *ref_data,
step /= 2;
if (step > 0) {
search_mv_full(pic, pic_data, ref_data, cur_cu, step, orig_x, orig_y,
x, y - step, depth);
x, y - step, depth);
search_mv_full(pic, pic_data, ref_data, cur_cu, step, orig_x, orig_y,
x - step, y, depth);
x - step, y, depth);
search_mv_full(pic, pic_data, ref_data, cur_cu, step, orig_x, orig_y,
x + step, y, depth);
x + step, y, depth);
search_mv_full(pic, pic_data, ref_data, cur_cu, step, orig_x, orig_y,
x, y + step, depth);
x, y + step, depth);
}
}
@ -268,7 +328,6 @@ void search_tree(encoder_control *encoder,
if (border) {
// Split blocks and remember to change x and y block positions
uint8_t change = 1 << (MAX_DEPTH - 1 - depth);
SET_SPLITDATA(cur_cu, 1);
search_tree(encoder, x_ctb, y_ctb, depth + 1);
if (!border_x || border_split_x) {
search_tree(encoder, x_ctb + change, y_ctb, depth + 1);
@ -305,8 +364,8 @@ void search_tree(encoder_control *encoder,
int start_y = 0;
// Convert from sub-pixel accuracy.
if (ref_cu->type == CU_INTER) {
int start_x = ref_cu->inter.mv[0] >> 2;
int start_y = ref_cu->inter.mv[1] >> 2;
start_x = ref_cu->inter.mv[0] >> 2;
start_y = ref_cu->inter.mv[1] >> 2;
}
if (USE_FULL_SEARCH) {
@ -314,7 +373,7 @@ void search_tree(encoder_control *encoder,
cur_cu, 8, x, y,
start_x, start_y, depth);
} else {
search_mv(cur_pic, cur_data, ref_pic->y_data,
search_mv(cur_pic, ref_pic,
cur_cu, x, y,
start_x, start_y, depth);
}
@ -385,26 +444,24 @@ uint32_t search_best_mode(encoder_control *encoder,
cost += search_best_mode(encoder, x_ctb + change, y_ctb + change, depth + 1);
// We split if the cost is better (0 cost -> not checked)
if (cost != 0
if ( (encoder->in.cur_pic->slicetype == SLICE_I && depth < MIN_INTRA_SEARCH_DEPTH) ||
(cost != 0
&& (best_intra_cost != 0 && cost + lambdaCost < best_intra_cost)
&& (best_inter_cost != 0
&& cost + lambdaCost < best_inter_cost
&& encoder->in.cur_pic->slicetype != SLICE_I))
&& encoder->in.cur_pic->slicetype != SLICE_I)))
{
// Set split to 1
picture_set_block_split(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1);
best_cost = cost + lambdaCost;
} else if (best_inter_cost != 0 // Else, check if inter cost is smaller or the same as intra
&& (best_inter_cost <= best_intra_cost || best_intra_cost == 0)
&& encoder->in.cur_pic->slicetype != SLICE_I)
{
// Set split to 0 and mode to inter.mode
picture_set_block_split(encoder->in.cur_pic, x_ctb, y_ctb, depth, 0);
inter_set_block(encoder->in.cur_pic, x_ctb, y_ctb, depth, cur_cu);
best_cost = best_inter_cost;
} else { // Else, dont split and recursively set block mode
// Set split to 0 and mode to intra.mode
picture_set_block_split(encoder->in.cur_pic, x_ctb, y_ctb, depth, 0);
intra_set_block_mode(encoder->in.cur_pic, x_ctb, y_ctb, depth,
cur_cu->intra.mode);
best_cost = best_intra_cost;
@ -414,12 +471,10 @@ uint32_t search_best_mode(encoder_control *encoder,
&& encoder->in.cur_pic->slicetype != SLICE_I)
{
// Set split to 0 and mode to inter.mode
picture_set_block_split(encoder->in.cur_pic, x_ctb, y_ctb, depth, 0);
inter_set_block(encoder->in.cur_pic, x_ctb, y_ctb, depth, cur_cu);
best_cost = best_inter_cost;
} else {
// Set split to 0 and mode to intra.mode
picture_set_block_split(encoder->in.cur_pic, x_ctb, y_ctb, depth, 0);
intra_set_block_mode(encoder->in.cur_pic, x_ctb, y_ctb, depth,
cur_cu->intra.mode);
best_cost = best_intra_cost;
@ -436,7 +491,7 @@ void search_slice_data(encoder_control *encoder)
int16_t x_lcu, y_lcu;
FILE *fp = 0, *fp2 = 0;
if (RENDER_CU && encoder->frame == 1) {
if (RENDER_CU) {
fp = open_cu_file("cu_search.html");
fp2 = open_cu_file("cu_best.html");
}
@ -447,14 +502,14 @@ void search_slice_data(encoder_control *encoder)
uint8_t depth = 0;
// Recursive function for looping through all the sub-blocks
search_tree(encoder, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, depth);
if (RENDER_CU && encoder->frame == 1) {
render_cu_file(encoder, depth, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, fp);
if (RENDER_CU) {
render_cu_file(encoder, encoder->in.cur_pic, depth, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, fp);
}
// Decide actual coding modes
search_best_mode(encoder, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, depth);
if (RENDER_CU && encoder->frame == 1) {
render_cu_file(encoder, depth, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, fp2);
if (RENDER_CU) {
render_cu_file(encoder, encoder->in.cur_pic, depth, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, fp2);
}
}
}