2013-04-16 08:23:03 +00:00
/**
2013-09-18 14:29:30 +00:00
* \ file
*
* \ author Marko Viitanen ( fador @ iki . fi ) ,
* Tampere University of Technology ,
* Department of Pervasive Computing .
* \ author Ari Koivula ( ari @ koivu . la ) ,
* Tampere University of Technology ,
* Department of Pervasive Computing .
2013-04-16 08:23:03 +00:00
*/
2013-09-18 09:16:03 +00:00
# include "search.h"
2013-04-16 08:23:03 +00:00
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
2013-09-18 09:16:03 +00:00
2013-04-16 08:23:03 +00:00
# include "config.h"
# include "bitstream.h"
# include "picture.h"
2013-04-16 12:10:43 +00:00
# include "intra.h"
2013-09-05 12:02:53 +00:00
# include "inter.h"
2013-04-16 08:23:03 +00:00
# include "filter.h"
2013-09-18 08:07:48 +00:00
# include "debug.h"
2013-04-16 12:10:43 +00:00
2013-09-16 14:34:20 +00:00
2013-09-16 16:18:24 +00:00
// Temporarily for debugging.
# define USE_INTRA_IN_P 0
2013-10-01 16:55:45 +00:00
//#define RENDER_CU encoder->frame==2
2013-09-25 14:48:02 +00:00
# define RENDER_CU 0
2013-09-27 12:41:37 +00:00
# define USE_CHROMA_IN_MV_SEARCH 0
2013-09-16 16:18:24 +00:00
2013-09-30 07:47:05 +00:00
# define IN_FRAME(x, y, width, height, block_width, block_height) \
( ( x ) > = 0 & & ( y ) > = 0 \
& & ( x ) + ( block_width ) < = ( width ) \
& & ( y ) + ( block_height ) < = ( height ) )
2013-09-25 14:47:40 +00:00
2013-10-08 09:30:25 +00:00
typedef struct {
int x ;
int y ;
} vector2d ;
/**
* This is used in the hexagon_search to select 3 points to search .
*
* The start of the hexagonal pattern has been repeated at the end so that
* the indices between 1 - 6 can be used as the start of a 3 - point list of new
* points to search .
*
* 6 o - o 1 / 7
* / \
* 5 o 0 o 2 / 8
* \ /
* 4 o - o 3
*/
const vector2d large_hexbs [ 10 ] = {
{ 0 , 0 } ,
{ 1 , - 2 } , { 2 , 0 } , { 1 , 2 } , { - 1 , 2 } , { - 2 , 0 } , { - 1 , - 2 } ,
{ 1 , - 2 } , { 2 , 0 }
} ;
/**
* This is used as the last step of the hexagon search .
*/
const vector2d small_hexbs [ 5 ] = {
{ 0 , 0 } ,
{ - 1 , - 1 } , { - 1 , 0 } , { 1 , 0 } , { 1 , 1 }
} ;
void hexagon_search ( picture * pic , picture * ref ,
cu_info * cur_cu , int orig_x , int orig_y , int x , int y ,
unsigned depth )
{
int block_width = CU_WIDTH_FROM_DEPTH ( depth ) ;
unsigned best_cost = - 1 ;
unsigned i ;
unsigned best_index = 0 ; // in large_hexbs[]
// Search the initial 7 points of the hexagon.
for ( i = 0 ; i < 7 ; + + i ) {
const vector2d * pattern = large_hexbs + i ;
2013-10-11 12:19:21 +00:00
unsigned cost = calc_sad ( pic , ref , orig_x , orig_y ,
orig_x + x + pattern - > x , orig_y + y + pattern - > y ,
block_width , block_width ) ;
2013-10-08 09:30:25 +00:00
if ( cost > 0 & & cost < best_cost ) {
best_cost = cost ;
best_index = i ;
}
}
2013-10-08 13:11:00 +00:00
// Try the 0,0 vector.
if ( ! ( x = = 0 & & y = = 0 ) ) {
2013-10-11 12:19:21 +00:00
unsigned cost = calc_sad ( pic , ref , orig_x , orig_y ,
orig_x , orig_y ,
block_width , block_width ) ;
2013-10-08 13:11:00 +00:00
if ( cost > 0 & & cost < best_cost ) {
best_cost = cost ;
best_index = 0 ;
x = 0 ;
y = 0 ;
// Redo the search around the 0,0 point.
for ( i = 1 ; i < 7 ; + + i ) {
const vector2d * pattern = large_hexbs + i ;
2013-10-11 12:19:21 +00:00
unsigned cost = calc_sad ( pic , ref , orig_x , orig_y ,
orig_x + pattern - > x , orig_y + pattern - > y ,
block_width , block_width ) ;
2013-10-08 13:11:00 +00:00
if ( cost > 0 & & cost < best_cost ) {
best_cost = cost ;
best_index = i ;
}
}
}
}
2013-10-08 09:30:25 +00:00
// Iteratively search the 3 new points around the best match, until the best
// match is in the center.
while ( best_index ! = 0 ) {
unsigned start ; // Starting point of the 3 offsets to be searched.
if ( best_index = = 1 ) {
start = 6 ;
} else if ( best_index = = 8 ) {
start = 1 ;
} else {
start = best_index - 1 ;
}
// Move the center to the best match.
x + = large_hexbs [ best_index ] . x ;
y + = large_hexbs [ best_index ] . y ;
best_index = 0 ;
// Iterate through the next 3 points.
for ( i = 0 ; i < 3 ; + + i ) {
const vector2d * offset = large_hexbs + start + i ;
2013-10-11 12:19:21 +00:00
unsigned cost = calc_sad ( pic , ref , orig_x , orig_y ,
orig_x + x + offset - > x , orig_y + y + offset - > y ,
block_width , block_width ) ;
2013-10-08 09:30:25 +00:00
if ( cost > 0 & & cost < best_cost ) {
best_cost = cost ;
best_index = start + i ;
}
+ + offset ;
}
}
// Do the final step of the search with a small pattern.
x + = large_hexbs [ best_index ] . x ;
y + = large_hexbs [ best_index ] . y ;
best_index = 0 ;
for ( i = 1 ; i < 5 ; + + i ) {
const vector2d * offset = small_hexbs + i ;
2013-10-11 12:19:21 +00:00
unsigned cost = calc_sad ( pic , ref , orig_x , orig_y ,
orig_x + x + offset - > x , orig_y + y + offset - > y ,
block_width , block_width ) ;
2013-10-08 09:30:25 +00:00
if ( cost > 0 & & cost < best_cost ) {
best_cost = cost ;
best_index = i ;
}
}
x + = small_hexbs [ best_index ] . x ;
y + = small_hexbs [ best_index ] . y ;
best_index = 0 ;
2013-10-18 12:19:22 +00:00
cur_cu - > inter . cost = best_cost ;
2013-10-08 09:30:25 +00:00
cur_cu - > inter . mv [ 0 ] = x < < 2 ;
cur_cu - > inter . mv [ 1 ] = y < < 2 ;
}
2013-09-20 09:17:13 +00:00
/**
* \ brief
*/
void search_buildReferenceBorder ( picture * pic , int32_t x_ctb , int32_t y_ctb ,
int16_t outwidth , int16_t * dst ,
int32_t dststride , int8_t chroma )
2013-04-18 11:04:15 +00:00
{
2013-09-20 09:17:13 +00:00
int32_t left_col ; // left column iterator
int16_t val ; // variable to store extrapolated value
int32_t i ; // index iterator
int16_t dc_val = 1 < < ( g_bitdepth - 1 ) ; // default predictor value
int32_t top_row ; // top row iterator
int32_t src_width = ( pic - > width > > ( chroma ? 1 : 0 ) ) ; // source picture width
int32_t src_height = ( pic - > height > > ( chroma ? 1 : 0 ) ) ; // source picture height
2013-10-14 14:27:25 +00:00
pixel * src_pic = ( ! chroma ) ? pic - > y_data : ( ( chroma = = 1 ) ? pic - > u_data : pic - > v_data ) ; // input picture pointer
2013-09-20 09:17:13 +00:00
int16_t scu_width = LCU_WIDTH > > ( MAX_DEPTH + ( chroma ? 1 : 0 ) ) ; // Smallest Coding Unit width
2013-10-14 14:27:25 +00:00
pixel * src_shifted = & src_pic [ x_ctb * scu_width + ( y_ctb * scu_width ) * src_width ] ; // input picture pointer shifted to start from the left-top corner of the current block
2013-09-20 09:17:13 +00:00
int32_t width_in_scu = pic - > width_in_lcu < < MAX_DEPTH ; // picture width in SCU
// Fill left column
if ( x_ctb ) {
// Loop SCU's
for ( left_col = 1 ; left_col < outwidth / scu_width ; left_col + + ) {
// If over the picture height or block not yet searched, stop
if ( ( y_ctb + left_col ) * scu_width > = src_height
2013-09-25 13:11:31 +00:00
| | pic - > cu_array [ MAX_DEPTH ] [ x_ctb - 1 + ( y_ctb + left_col ) * width_in_scu ] . type = = CU_NOTSET ) {
2013-04-18 11:04:15 +00:00
break ;
}
}
2013-09-20 09:17:13 +00:00
// Copy the pixels to output
for ( i = 0 ; i < left_col * scu_width - 1 ; i + + ) {
dst [ ( i + 1 ) * dststride ] = src_shifted [ i * src_width - 1 ] ;
2013-04-18 11:04:15 +00:00
}
2013-09-20 09:17:13 +00:00
// if the loop was not completed, extrapolate the last pixel pushed to output
if ( left_col ! = outwidth / scu_width ) {
val = src_shifted [ ( left_col * scu_width - 1 ) * src_width - 1 ] ;
for ( i = ( left_col * scu_width ) ; i < outwidth ; i + + ) {
dst [ i * dststride ] = val ;
2013-04-18 11:04:15 +00:00
}
2013-09-20 09:17:13 +00:00
}
} else { // If left column not available, copy from toprow or use the default predictor
val = y_ctb ? src_shifted [ - src_width ] : dc_val ;
for ( i = 0 ; i < outwidth ; i + + ) {
dst [ i * dststride ] = val ;
2013-04-18 11:04:15 +00:00
}
}
2013-09-20 09:17:13 +00:00
if ( y_ctb ) {
// Loop top SCU's
for ( top_row = 1 ; top_row < outwidth / scu_width ; top_row + + ) {
if ( ( x_ctb + top_row ) * scu_width > = src_width
2013-09-25 13:11:31 +00:00
| | pic - > cu_array [ MAX_DEPTH ] [ x_ctb + top_row + ( y_ctb - 1 ) * width_in_scu ] . type
2013-09-20 09:17:13 +00:00
= = CU_NOTSET ) {
2013-04-18 11:04:15 +00:00
break ;
}
}
2013-09-20 09:17:13 +00:00
for ( i = 0 ; i < top_row * scu_width - 1 ; i + + ) {
dst [ i + 1 ] = src_shifted [ i - src_width ] ;
2013-04-18 11:04:15 +00:00
}
2013-09-20 09:17:13 +00:00
if ( top_row ! = outwidth / scu_width ) {
val = src_shifted [ ( top_row * scu_width ) - src_width - 1 ] ;
for ( i = ( top_row * scu_width ) ; i < outwidth ; i + + ) {
2013-04-18 11:04:15 +00:00
dst [ i ] = val ;
}
}
2013-09-20 09:17:13 +00:00
} else {
val = x_ctb ? src_shifted [ - 1 ] : dc_val ;
for ( i = 1 ; i < outwidth ; i + + ) {
2013-04-18 11:04:15 +00:00
dst [ i ] = val ;
}
}
2013-09-20 09:17:13 +00:00
// Topleft corner
dst [ 0 ] = ( x_ctb & & y_ctb ) ? src_shifted [ - src_width - 1 ] : dst [ dststride ] ;
2013-04-18 11:04:15 +00:00
}
2013-09-20 09:17:13 +00:00
/**
* \ brief
*/
void search_tree ( encoder_control * encoder ,
uint16_t x_ctb , uint16_t y_ctb , uint8_t depth )
{
uint8_t border_x = ( ( encoder - > in . width ) < ( x_ctb * ( LCU_WIDTH > > MAX_DEPTH ) + ( LCU_WIDTH > > depth ) ) ) ? 1 : 0 ;
uint8_t border_y = ( ( encoder - > in . height ) < ( y_ctb * ( LCU_WIDTH > > MAX_DEPTH ) + ( LCU_WIDTH > > depth ) ) ) ? 1 : 0 ;
uint8_t border_split_x = ( ( encoder - > in . width ) < ( ( x_ctb + 1 ) * ( LCU_WIDTH > > MAX_DEPTH ) + ( LCU_WIDTH > > ( depth + 1 ) ) ) ) ? 0 : 1 ;
uint8_t border_split_y = ( ( encoder - > in . height ) < ( ( y_ctb + 1 ) * ( LCU_WIDTH > > MAX_DEPTH ) + ( LCU_WIDTH > > ( depth + 1 ) ) ) ) ? 0 : 1 ;
uint8_t border = border_x | border_y ; // are we in any border CU
2013-10-18 12:19:22 +00:00
picture * cur_pic = encoder - > in . cur_pic ;
cu_info * cur_cu = & cur_pic - > cu_array [ depth ] [ x_ctb + y_ctb * ( encoder - > in . width_in_lcu < < MAX_DEPTH ) ] ;
2013-09-20 09:17:13 +00:00
cur_cu - > intra . cost = 0xffffffff ;
cur_cu - > inter . cost = 0xffffffff ;
// Force split on border
if ( depth ! = MAX_DEPTH ) {
if ( border ) {
uint8_t change = 1 < < ( MAX_DEPTH - 1 - depth ) ;
search_tree ( encoder , x_ctb , y_ctb , depth + 1 ) ;
if ( ! border_x | | border_split_x ) {
search_tree ( encoder , x_ctb + change , y_ctb , depth + 1 ) ;
2013-04-16 12:10:43 +00:00
}
2013-09-20 09:17:13 +00:00
if ( ! border_y | | border_split_y ) {
search_tree ( encoder , x_ctb , y_ctb + change , depth + 1 ) ;
2013-04-16 12:10:43 +00:00
}
2013-09-20 09:17:13 +00:00
if ( ! border | | ( border_split_x & & border_split_y ) ) {
search_tree ( encoder , x_ctb + change , y_ctb + change , depth + 1 ) ;
2013-04-16 12:10:43 +00:00
}
return ;
}
}
2013-09-20 09:17:13 +00:00
// INTER SEARCH
2013-10-18 12:19:22 +00:00
if ( cur_pic - > slicetype ! = SLICE_I
& & depth > = MIN_INTER_SEARCH_DEPTH & & depth < = MAX_INTER_SEARCH_DEPTH ) {
picture * ref_pic = encoder - > ref - > pics [ 0 ] ;
unsigned width_in_scu = NO_SCU_IN_LCU ( ref_pic - > width_in_lcu ) ;
cu_info * ref_cu = & ref_pic - > cu_array [ MAX_DEPTH ] [ y_ctb * width_in_scu + x_ctb ] ;
int x = x_ctb * CU_MIN_SIZE_PIXELS ;
int y = y_ctb * CU_MIN_SIZE_PIXELS ;
int start_x = 0 ;
int start_y = 0 ;
// Convert from sub-pixel accuracy.
if ( ref_cu - > type = = CU_INTER ) {
start_x = ref_cu - > inter . mv [ 0 ] > > 2 ;
start_y = ref_cu - > inter . mv [ 1 ] > > 2 ;
2013-09-18 09:26:51 +00:00
}
2013-04-24 13:49:47 +00:00
2013-10-18 12:19:22 +00:00
hexagon_search ( cur_pic , ref_pic ,
cur_cu , x , y ,
start_x , start_y , depth ) ;
2013-09-25 15:25:55 +00:00
2013-09-20 09:17:13 +00:00
cur_cu - > inter . mv_dir = 1 ;
2013-04-16 12:10:43 +00:00
}
2013-09-20 09:17:13 +00:00
// INTRA SEARCH
if ( depth > = MIN_INTRA_SEARCH_DEPTH & & depth < = MAX_INTRA_SEARCH_DEPTH
2013-09-18 09:26:51 +00:00
& & ( encoder - > in . cur_pic - > slicetype = = SLICE_I | | USE_INTRA_IN_P ) ) {
2013-09-20 09:17:13 +00:00
int x = 0 , y = 0 ;
2013-10-15 12:27:32 +00:00
pixel * base = & encoder - > in . cur_pic - > y_data [ x_ctb * ( LCU_WIDTH > > ( MAX_DEPTH ) ) + ( y_ctb * ( LCU_WIDTH > > ( MAX_DEPTH ) ) ) * encoder - > in . width ] ;
2013-09-20 09:17:13 +00:00
uint32_t width = LCU_WIDTH > > depth ;
2013-04-16 12:10:43 +00:00
2013-09-20 09:17:13 +00:00
// INTRAPREDICTION
int16_t pred [ LCU_WIDTH * LCU_WIDTH + 1 ] ;
int16_t rec [ ( LCU_WIDTH * 2 + 8 ) * ( LCU_WIDTH * 2 + 8 ) ] ;
int16_t * recShift = & rec [ ( LCU_WIDTH > > ( depth ) ) * 2 + 8 + 1 ] ;
2013-08-02 13:35:30 +00:00
2013-05-22 14:27:15 +00:00
//int16_t *pred = (int16_t*)malloc(LCU_WIDTH*LCU_WIDTH*sizeof(int16_t));
//int16_t *rec = (int16_t*)malloc((LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)*sizeof(int16_t));
2013-04-17 14:08:52 +00:00
2013-09-20 09:17:13 +00:00
// Build reconstructed block to use in prediction with extrapolated borders
search_buildReferenceBorder ( encoder - > in . cur_pic , x_ctb , y_ctb ,
( LCU_WIDTH > > ( depth ) ) * 2 + 8 , rec , ( LCU_WIDTH > > ( depth ) ) * 2 + 8 , 0 ) ;
cur_cu - > intra . mode = ( uint8_t ) intra_prediction ( encoder - > in . cur_pic - > y_data ,
encoder - > in . width , recShift , ( LCU_WIDTH > > ( depth ) ) * 2 + 8 ,
x_ctb * ( LCU_WIDTH > > ( MAX_DEPTH ) ) , y_ctb * ( LCU_WIDTH > > ( MAX_DEPTH ) ) ,
width , pred , width , & cur_cu - > intra . cost ) ;
2013-05-22 14:27:15 +00:00
//free(pred);
//free(rec);
2013-04-16 12:10:43 +00:00
}
2013-09-20 09:17:13 +00:00
// Split and search to max_depth
if ( depth < MAX_INTRA_SEARCH_DEPTH & & depth < MAX_INTER_SEARCH_DEPTH ) {
// Split blocks and remember to change x and y block positions
uint8_t change = 1 < < ( MAX_DEPTH - 1 - depth ) ;
search_tree ( encoder , x_ctb , y_ctb , depth + 1 ) ;
search_tree ( encoder , x_ctb + change , y_ctb , depth + 1 ) ;
search_tree ( encoder , x_ctb , y_ctb + change , depth + 1 ) ;
search_tree ( encoder , x_ctb + change , y_ctb + change , depth + 1 ) ;
2013-04-16 12:10:43 +00:00
}
}
2013-09-20 09:17:13 +00:00
/**
* \ brief
*/
uint32_t search_best_mode ( encoder_control * encoder ,
uint16_t x_ctb , uint16_t y_ctb , uint8_t depth )
2013-04-16 12:10:43 +00:00
{
2013-10-18 12:23:17 +00:00
cu_info * cur_cu = & encoder - > in . cur_pic - > cu_array [ depth ]
[ x_ctb + y_ctb * ( encoder - > in . width_in_lcu < < MAX_DEPTH ) ] ;
2013-09-20 09:17:13 +00:00
uint32_t best_intra_cost = cur_cu - > intra . cost ;
uint32_t best_inter_cost = cur_cu - > inter . cost ;
2013-10-18 12:23:17 +00:00
uint32_t lambda_cost = ( 4 * g_lambda_cost [ encoder - > QP ] ) < < 4 ; //<<5; //TODO: Correct cost calculation
2013-09-20 09:17:13 +00:00
2013-10-18 12:23:17 +00:00
if ( depth < MAX_INTRA_SEARCH_DEPTH & & depth < MAX_INTER_SEARCH_DEPTH ) {
uint32_t cost = lambda_cost ;
2013-09-20 09:17:13 +00:00
uint8_t change = 1 < < ( MAX_DEPTH - 1 - depth ) ;
cost = search_best_mode ( encoder , x_ctb , y_ctb , depth + 1 ) ;
cost + = search_best_mode ( encoder , x_ctb + change , y_ctb , depth + 1 ) ;
cost + = search_best_mode ( encoder , x_ctb , y_ctb + change , depth + 1 ) ;
cost + = search_best_mode ( encoder , x_ctb + change , y_ctb + change , depth + 1 ) ;
2013-10-18 12:23:17 +00:00
if ( cost < best_intra_cost & & cost < best_inter_cost )
2013-04-16 12:10:43 +00:00
{
2013-10-18 12:23:17 +00:00
// Better value was found at a lower level.
return cost ;
2013-09-05 12:02:53 +00:00
}
2013-10-18 12:23:17 +00:00
}
2013-10-18 12:19:22 +00:00
// If search hasn't been peformed at all for this block, the cost will be
// max value, so it is safe to just compare costs. It just has to be made
// sure that no value overflows.
2013-10-18 12:23:17 +00:00
if ( best_inter_cost < = best_intra_cost ) {
2013-09-20 09:17:13 +00:00
inter_set_block ( encoder - > in . cur_pic , x_ctb , y_ctb , depth , cur_cu ) ;
2013-10-18 12:23:17 +00:00
return best_inter_cost ;
2013-09-20 09:17:13 +00:00
} else {
intra_set_block_mode ( encoder - > in . cur_pic , x_ctb , y_ctb , depth ,
cur_cu - > intra . mode ) ;
2013-10-18 12:23:17 +00:00
return best_intra_cost ;
2013-04-17 14:08:52 +00:00
}
2013-04-16 12:10:43 +00:00
}
2013-09-20 09:17:13 +00:00
/**
* \ brief
*/
void search_slice_data ( encoder_control * encoder )
2013-04-16 12:10:43 +00:00
{
2013-09-20 09:17:13 +00:00
int16_t x_lcu , y_lcu ;
2013-09-18 08:07:48 +00:00
FILE * fp = 0 , * fp2 = 0 ;
2013-10-01 16:55:45 +00:00
if ( RENDER_CU ) {
2013-09-18 08:07:48 +00:00
fp = open_cu_file ( " cu_search.html " ) ;
fp2 = open_cu_file ( " cu_best.html " ) ;
}
2013-09-20 09:17:13 +00:00
// Loop through every LCU in the slice
for ( y_lcu = 0 ; y_lcu < encoder - > in . height_in_lcu ; y_lcu + + ) {
for ( x_lcu = 0 ; x_lcu < encoder - > in . width_in_lcu ; x_lcu + + ) {
2013-04-16 12:10:43 +00:00
uint8_t depth = 0 ;
2013-09-20 09:17:13 +00:00
// Recursive function for looping through all the sub-blocks
search_tree ( encoder , x_lcu < < MAX_DEPTH , y_lcu < < MAX_DEPTH , depth ) ;
2013-10-01 16:55:45 +00:00
if ( RENDER_CU ) {
render_cu_file ( encoder , encoder - > in . cur_pic , depth , x_lcu < < MAX_DEPTH , y_lcu < < MAX_DEPTH , fp ) ;
2013-09-20 09:17:13 +00:00
}
2013-04-16 12:10:43 +00:00
2013-09-20 09:17:13 +00:00
// Decide actual coding modes
search_best_mode ( encoder , x_lcu < < MAX_DEPTH , y_lcu < < MAX_DEPTH , depth ) ;
2013-10-01 16:55:45 +00:00
if ( RENDER_CU ) {
render_cu_file ( encoder , encoder - > in . cur_pic , depth , x_lcu < < MAX_DEPTH , y_lcu < < MAX_DEPTH , fp2 ) ;
2013-09-20 09:17:13 +00:00
}
2013-04-16 12:10:43 +00:00
}
}
2013-09-18 08:07:48 +00:00
if ( RENDER_CU & & fp ) {
close_cu_file ( fp ) ;
fp = 0 ;
}
if ( RENDER_CU & & fp2 ) {
close_cu_file ( fp2 ) ;
fp2 = 0 ;
}
2013-09-16 14:34:20 +00:00
}