Reimplement intra_build_reference_border to support NxN.

New implementation uses precalculated tables to look up number of reference
pixels available in coded CUs. Otherwise it works just like the previous
version.

- NxN mostly works. Prediction appears to be almost correct but there is a
  slight error in the exact values, probably related to filtering.
This commit is contained in:
Ari Koivula 2014-01-22 15:57:19 +02:00
parent 71a996f50f
commit 81b51f17d8
2 changed files with 254 additions and 69 deletions

View file

@ -427,95 +427,159 @@ void intra_recon(pixel* rec,uint32_t recstride, uint32_t xpos, uint32_t ypos,uin
} }
/** /**
* \brief this functions build a reference block (only borders) used for intra predictions * \brief Build top and left borders for a reference block.
* \param pic picture to use as a source, should contain full CU-data * \param pic picture to use as a source
* \param outwidth width of the prediction block * \param outwidth width of the prediction block
* \param chroma signaling if chroma is used, 0 = luma, 1 = U and 2 = V * \param chroma signaling if chroma is used, 0 = luma, 1 = U and 2 = V
* *
* The end result is 2*width+8 x 2*width+8 array, with only the top and left * The end result is 2*width+8 x 2*width+8 array, with only the top and left
* edge pixels filled with the reconstructed pixels. * edge pixels filled with the reconstructed pixels.
*/ */
void intra_build_reference_border(picture *pic, int32_t x, int32_t y,int16_t outwidth, pixel *dst, int32_t dststride, int8_t chroma) void intra_build_reference_border(picture *pic, int32_t x_luma, int32_t y_luma, int16_t outwidth,
pixel *dst, int32_t dststride, int8_t chroma)
{ {
int32_t left_column; //!< left column iterator // Some other function might make use of the arrays num_ref_pixels_top and
pixel val; //!< variable to store extrapolated value // num_ref_pixels_left in the future, but until that happens lets leave
int32_t i; //!< index iterator // them here.
pixel dc_val = 1<<(g_bitdepth-1); //!< default predictor value
int32_t top_row; //!< top row iterator
int32_t src_width = (pic->width>>(chroma?1:0)); //!< source picture width
int32_t src_height = (pic->height>>(chroma?1:0));//!< source picture height
pixel *src = (!chroma) ? pic->y_recdata : ((chroma == 1) ? pic->u_recdata : pic->v_recdata); //!< input picture pointer
int16_t scu_width = LCU_WIDTH>>(MAX_DEPTH+(chroma?1:0)); //!< Smallest Coding Unit width
int32_t x_cu = x >> MIN_SIZE;
int32_t y_cu = y >> MIN_SIZE;
int xx = chroma ? x / 2 : x; /**
int yy = chroma ? y / 2 : y; * \brief Table for looking up the number of intra reference pixels based on
* prediction units coordinate within an LCU.
*
* This table was generated by "tools/generate_ref_pixel_tables.py".
*/
static const uint8_t num_ref_pixels_top[16][16] = {
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }
};
pixel *src_shifted = &src[xx + yy * src_width]; //!< input picture pointer shifted to start from the left-top corner of the current block /**
int width_in_scu = pic->width_in_lcu<<MAX_DEPTH; //!< picture width in smallest CU * \brief Table for looking up the number of intra reference pixels based on
* prediction units coordinate within an LCU.
*
* This table was generated by "tools/generate_ref_pixel_tables.py".
*/
static const uint8_t num_ref_pixels_left[16][16] = {
{ 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
{ 64, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
{ 64, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
{ 64, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
{ 64, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
{ 64, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
{ 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
{ 64, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
{ 64, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
{ 64, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
{ 64, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
{ 64, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
{ 64, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
};
// Gather reference pixels from the left. const pixel dc_val = 1 << (g_bitdepth - 1);
if (x_cu > 0) { const int is_chroma = chroma ? 1 : 0;
int num_ref_scu = outwidth / scu_width; const int src_width = pic->width >> is_chroma;
// Check CUs needed for reference until one that isn't coded is found.
// Afterwards left_column refers to the first CU that is not coded. // input picture pointer
for (left_column = 1; left_column < num_ref_scu; left_column++) { const pixel * const src = (!chroma) ? pic->y_recdata : ((chroma == 1) ? pic->u_recdata : pic->v_recdata);
if ((y_cu + left_column) * scu_width >= src_height || !pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu + left_column) * width_in_scu].coded) {
break; // Convert luma coordinates to chroma coordinates for chroma.
} const int x = chroma ? x_luma / 2 : x_luma;
const int y = chroma ? y_luma / 2 : y_luma;
// input picture pointer shifted to start from the left-top corner of the current block
const pixel *const src_shifted = &src[x + y * src_width];
const int y_in_lcu = y_luma % LCU_WIDTH;
const int x_in_lcu = x_luma % LCU_WIDTH;
// Copy pixels for left edge.
if (x > 0) {
// Get the number of reference pixels based on the PU coordinate within the LCU.
int num_ref_pixels = num_ref_pixels_left[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma;
int i;
pixel nearest_pixel;
// Max pixel we can copy from src is yy + outwidth - 1 because the dst
// extends one pixel to the left.
num_ref_pixels = MIN(num_ref_pixels, outwidth - 1);
// There are no coded pixels below the bottom of the LCU due to raster scan order.
if ((num_ref_pixels << is_chroma) + y_in_lcu > LCU_WIDTH) {
num_ref_pixels = (LCU_WIDTH - y_in_lcu) >> is_chroma;
} }
// Copy pixels from coded CUs. // Copy pixels from coded CUs.
for (i = 0; i < left_column*scu_width - 1; i ++) { for (i = 0; i < num_ref_pixels; ++i) {
dst[(i + 1) * dststride] = src_shifted[i*src_width - 1]; dst[(i + 1) * dststride] = src_shifted[i*src_width - 1];
} }
// Extend the last pixel for the rest of the reference values.
// Extrapolate the rest from nearest pixel. nearest_pixel = dst[i * dststride];
if (left_column != num_ref_scu) { for (i = num_ref_pixels; i < outwidth - 1; ++i) {
val = src_shifted[(left_column * scu_width - 1) * src_width - 1]; dst[i * dststride] = nearest_pixel;
for (i = (left_column * scu_width); i < outwidth; i++) {
dst[i * dststride] = val;
}
}
} else { // If left column not available, copy from toprow or use the default predictor
val = y_cu ? src_shifted[-src_width] : dc_val;
for (i = 0; i < outwidth; i++) {
dst[i * dststride] = val;
}
}
// Gather reference pixels from the left.
if(y_cu) {
int num_ref_scu = outwidth / scu_width;
for(top_row = 1; top_row < num_ref_scu; top_row++) {
// If over the picture width or block not yet coded, stop
if ((x_cu + top_row) * scu_width >= src_width || !pic->cu_array[MAX_DEPTH][x_cu + top_row+(y_cu - 1) * width_in_scu].coded) {
break;
}
}
// Copy the pixels to output
for(i = 0; i < top_row * scu_width - 1; i++) {
dst[i + 1] = src_shifted[i - src_width];
}
if(top_row != num_ref_scu) {
val = src_shifted[(top_row * scu_width) - src_width - 1];
for(i = (top_row * scu_width); i < outwidth; i++) {
dst[i] = val;
}
} }
} else { } else {
val = x_cu ? src_shifted[-1] : dc_val; // If we are on the left edge, extend the first pixel of the top row.
pixel nearest_pixel = y > 0 ? src_shifted[-src_width] : dc_val;
int i;
for (i = 0; i < outwidth - 1; i++) {
dst[i * dststride] = nearest_pixel;
}
}
// Copy pixels for top edge.
if (y > 0) {
// Get the number of reference pixels based on the PU coordinate within the LCU.
int num_ref_pixels = num_ref_pixels_top[y_in_lcu / 4][x_in_lcu / 4] >> is_chroma;
int i;
pixel nearest_pixel;
// Max pixel we can copy from src is yy + outwidth - 1 because the dst
// extends one pixel to the left.
num_ref_pixels = MIN(num_ref_pixels, outwidth - 1);
// All LCUs in the row above have been coded.
if (x + num_ref_pixels > src_width) {
num_ref_pixels = src_width - x;
}
// Copy pixels from coded CUs.
// For some reason copying the all the refe
for (i = 0; i < num_ref_pixels; ++i) {
dst[i + 1] = src_shifted[i - src_width];
}
// Extend the last pixel for the rest of the reference values.
nearest_pixel = src_shifted[num_ref_pixels - src_width - 1];
for (; i < outwidth - 1; ++i) {
dst[i + 1] = nearest_pixel;
}
} else {
// Extend nearest pixel.
pixel nearest_pixel = x > 0 ? src_shifted[-1] : dc_val;
int i;
for(i = 1; i < outwidth; i++) for(i = 1; i < outwidth; i++)
{ {
dst[i] = val; dst[i] = nearest_pixel;
} }
} }
// Topleft corner sample // Topleft corner sample
dst[0] = (x_cu && y_cu) ? src_shifted[-src_width - 1] : dst[dststride]; dst[0] = (x > 0 && y > 0) ? src_shifted[-src_width - 1] : dst[dststride];
} }

View file

@ -0,0 +1,121 @@
"""This is a script that generates tables for Kvazaar HEVC encoder.
This script is provided as reference, in case we ever need to change the tables
or generate more similar tables.
Because the CUs are coded in Z-order a particular index in the LCU will always
have the same number of coded reference pixels, except if the PU is on the very
top or left edge of the LCU.
"""
import numpy
def make_z_order_table(width, coord = None, zid=0, min_width=4, result=None):
"""Return a table with the quadtree z-order.
Args:
width: width of the area (LCU)
coord: numpy.array with index 0 as x and 1 as y
min_width: width at which the recursion is stopped
result: numpy.array with the current table
Returns: numpy.array with the quadtree z-order.
"""
if coord is None:
coord = numpy.array([0, 0])
if result is None:
num_pu = width / min_width
result = numpy.zeros((num_pu, num_pu), numpy.int16)
offset = width / 2
if offset >= min_width:
# Recurse in quadtree z-order.
offsets = map(numpy.array, [[0,0],[1,0],[0,1],[1,1]])
for num, os in enumerate(offsets):
num_pu = offset**2 / min_width**2
result = make_z_order_table(offset, coord + os * offset,
zid + num * num_pu, min_width, result)
else:
pu = coord / min_width
result[pu[1]][pu[0]] = zid
return result
def num_lessed_zid_on_left(table, x, y):
"""Z-order table + coord -> number of ref PUs on the left."""
i = 0
while True:
if x == 0:
return 16
if y + i >= 16 or table[y + i][x - 1] > table[y][x]:
return i
i = i + 1
def num_lessed_zid_on_top(table, x, y):
"""Z-order table + coord -> number of ref PUs on the top."""
i = 0
while True:
if y == 0:
return 16
if x + i >= 16 or table[y - 1][x + i] > table[y][x]:
return i
i = i + 1
def matrix_to_initializer_list(table):
"""Output a list of lists as an initializer list in C syntax.
Args:
table: list(list(int)) representing 2d array
Returns:
str
"""
# Convert the numbers into strings and pad them to be 2-chars wide to make
# the table look nicer.
str_nums = (("{0: >2}".format(x) for x in line) for line in table)
# Get the lines with all the numbers divided by commas.
lines = (", ".join(line) for line in str_nums)
# Join the lines with commas and newlines in between.
result = "{ %s }" % (" },\n{ ".join(lines))
return result
def main():
zid_table = make_z_order_table(64)
num_pu = 16
left_table = numpy.zeros((num_pu, num_pu), numpy.int16)
top_table = numpy.zeros((num_pu, num_pu), numpy.int16)
for y in range(16):
for x in range(16):
left_table[y][x] = num_lessed_zid_on_left(zid_table, x, y)
top_table[y][x] = num_lessed_zid_on_top(zid_table, x, y)
print zid_table
print left_table
print top_table
# Multiply by number of pixels in a PU
left_table = left_table * 4
top_table = top_table * 4
print
print "left"
print matrix_to_initializer_list(left_table)
print
print "top"
print matrix_to_initializer_list(top_table)
print
if __name__ == '__main__':
main()