mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 20:54:07 +00:00
Merge branch 'tiles_working_with_p_frames'
This commit is contained in:
commit
f348532bae
|
@ -5,7 +5,7 @@ ifeq (, $(ARCH))
|
||||||
endif
|
endif
|
||||||
SYSTEM = $(shell uname -s)
|
SYSTEM = $(shell uname -s)
|
||||||
ASMFLAGS =
|
ASMFLAGS =
|
||||||
DFLAGS = -O2 -g -Werror
|
DFLAGS = -O2 -g -Werror -DUSE_TILES=1
|
||||||
|
|
||||||
# ARCH related flags
|
# ARCH related flags
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
|
@ -44,10 +44,10 @@ else
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CC = gcc
|
CC = gcc -fopenmp
|
||||||
CCFLAGS = $(DFLAGS) -I. -I/usr/local/include -L/usr/local/lib -Wall
|
CCFLAGS = $(DFLAGS) -I. -I/usr/local/include -L/usr/local/lib -Wall
|
||||||
LDFLAGS += -lm
|
LDFLAGS += -lm
|
||||||
LD = gcc
|
LD = gcc -fopenmp
|
||||||
YASM = yasm
|
YASM = yasm
|
||||||
ASMOBJS = cpu.o
|
ASMOBJS = cpu.o
|
||||||
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o tables.o transform.o extras/getopt.o
|
OBJS = interface_main.o encmain.o bitstream.o cabac.o config.o context.o encoder.o filter.o inter.o intra.o nal.o picture.o rdo.o sao.o scalinglist.o search.o tables.o transform.o extras/getopt.o
|
||||||
|
|
|
@ -715,8 +715,9 @@ void encode_one_frame(encoder_state * const encoder_state)
|
||||||
|
|
||||||
if (USE_TILES && encoder->tiles_enable) {
|
if (USE_TILES && encoder->tiles_enable) {
|
||||||
#if USE_TILES
|
#if USE_TILES
|
||||||
int i,x,y;
|
int x,y;
|
||||||
//This can be parallelized
|
//This can be parallelized
|
||||||
|
#pragma omp parallel for private(x,y) collapse(2)
|
||||||
for (y=0; y < encoder->tiles_num_tile_rows; ++y) {
|
for (y=0; y < encoder->tiles_num_tile_rows; ++y) {
|
||||||
for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
|
for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
|
||||||
const int tile_width_in_lcu = encoder->tiles_col_bd[x+1]-encoder->tiles_col_bd[x];
|
const int tile_width_in_lcu = encoder->tiles_col_bd[x+1]-encoder->tiles_col_bd[x];
|
||||||
|
@ -727,7 +728,7 @@ void encode_one_frame(encoder_state * const encoder_state)
|
||||||
const int tile_height = MIN(tile_height_in_lcu * LCU_WIDTH, encoder->in.height - tile_offset_y);
|
const int tile_height = MIN(tile_height_in_lcu * LCU_WIDTH, encoder->in.height - tile_offset_y);
|
||||||
const int tile_offset_full = tile_offset_x+tile_offset_y*encoder_state->cur_pic->width;
|
const int tile_offset_full = tile_offset_x+tile_offset_y*encoder_state->cur_pic->width;
|
||||||
const int tile_offset_half = tile_offset_x/2+tile_offset_y/2*encoder_state->cur_pic->width/2;
|
const int tile_offset_half = tile_offset_x/2+tile_offset_y/2*encoder_state->cur_pic->width/2;
|
||||||
i = y * encoder->tiles_num_tile_columns + x;
|
const int i = y * encoder->tiles_num_tile_columns + x;
|
||||||
|
|
||||||
//TODO: ref frames
|
//TODO: ref frames
|
||||||
|
|
||||||
|
@ -751,7 +752,7 @@ void encode_one_frame(encoder_state * const encoder_state)
|
||||||
//This has to be serial
|
//This has to be serial
|
||||||
for (y=0; y < encoder->tiles_num_tile_rows; ++y) {
|
for (y=0; y < encoder->tiles_num_tile_rows; ++y) {
|
||||||
for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
|
for (x=0; x < encoder->tiles_num_tile_columns; ++x) {
|
||||||
i = y * encoder->tiles_num_tile_columns + x;
|
const int i = y * encoder->tiles_num_tile_columns + x;
|
||||||
|
|
||||||
if (x == (encoder->tiles_num_tile_columns-1) && y == (encoder->tiles_num_tile_rows-1)) {
|
if (x == (encoder->tiles_num_tile_columns-1) && y == (encoder->tiles_num_tile_rows-1)) {
|
||||||
//Last tile
|
//Last tile
|
||||||
|
|
36
src/inter.c
36
src/inter.c
|
@ -74,7 +74,7 @@ void inter_set_block(picture* pic, uint32_t x_cu, uint32_t y_cu, uint8_t depth,
|
||||||
* \param lcu destination lcu
|
* \param lcu destination lcu
|
||||||
* \returns Void
|
* \returns Void
|
||||||
*/
|
*/
|
||||||
void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu)
|
void inter_recon_lcu(const encoder_state * const encoder_state, const picture * const ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu)
|
||||||
{
|
{
|
||||||
int x,y,coord_x,coord_y;
|
int x,y,coord_x,coord_y;
|
||||||
int16_t mv[2] = { mv_param[0], mv_param[1] };
|
int16_t mv[2] = { mv_param[0], mv_param[1] };
|
||||||
|
@ -83,12 +83,12 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
|
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
|
||||||
|
|
||||||
// negative overflow flag
|
// negative overflow flag
|
||||||
int8_t overflow_neg_x = (xpos + (mv[0]>>2) < 0)?1:0;
|
int8_t overflow_neg_x = (encoder_state->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) < 0)?1:0;
|
||||||
int8_t overflow_neg_y = (ypos + (mv[1]>>2) < 0)?1:0;
|
int8_t overflow_neg_y = (encoder_state->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) < 0)?1:0;
|
||||||
|
|
||||||
// positive overflow flag
|
// positive overflow flag
|
||||||
int8_t overflow_pos_x = (xpos + (mv[0]>>2) + width > ref->width )?1:0;
|
int8_t overflow_pos_x = (encoder_state->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) + width > ref->width )?1:0;
|
||||||
int8_t overflow_pos_y = (ypos + (mv[1]>>2) + width > ref->height)?1:0;
|
int8_t overflow_pos_y = (encoder_state->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) + width > ref->height)?1:0;
|
||||||
|
|
||||||
// Chroma half-pel
|
// Chroma half-pel
|
||||||
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
||||||
|
@ -114,7 +114,7 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
// Fill source blocks with data from reference, -4...width+4
|
// Fill source blocks with data from reference, -4...width+4
|
||||||
for (halfpel_y = 0, y = (ypos>>1) - 4; y < ((ypos + width)>>1) + 4; halfpel_y++, y++) {
|
for (halfpel_y = 0, y = (ypos>>1) - 4; y < ((ypos + width)>>1) + 4; halfpel_y++, y++) {
|
||||||
// calculate y-pixel offset
|
// calculate y-pixel offset
|
||||||
coord_y = y + (mv[1]>>1);
|
coord_y = (y + encoder_state->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1);
|
||||||
|
|
||||||
// On y-overflow set coord_y accordingly
|
// On y-overflow set coord_y accordingly
|
||||||
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
||||||
|
@ -124,7 +124,7 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
coord_y *= ref_width_c;
|
coord_y *= ref_width_c;
|
||||||
|
|
||||||
for (halfpel_x = 0, x = (xpos>>1) - 4; x < ((xpos + width)>>1) + 4; halfpel_x++, x++) {
|
for (halfpel_x = 0, x = (xpos>>1) - 4; x < ((xpos + width)>>1) + 4; halfpel_x++, x++) {
|
||||||
coord_x = x + (mv[0]>>1);
|
coord_x = (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1);
|
||||||
|
|
||||||
// On x-overflow set coord_x accordingly
|
// On x-overflow set coord_x accordingly
|
||||||
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
||||||
|
@ -139,8 +139,8 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter the block to half-pel resolution
|
// Filter the block to half-pel resolution
|
||||||
filter_inter_halfpel_chroma(encoder, halfpel_src_off_u, HALFPEL_CHROMA_WIDTH, width>>1, width>>1, halfpel_u, LCU_WIDTH, abs_mv_x, abs_mv_y);
|
filter_inter_halfpel_chroma(encoder_state->encoder_control, halfpel_src_off_u, HALFPEL_CHROMA_WIDTH, width>>1, width>>1, halfpel_u, LCU_WIDTH, abs_mv_x, abs_mv_y);
|
||||||
filter_inter_halfpel_chroma(encoder, halfpel_src_off_v, HALFPEL_CHROMA_WIDTH, width>>1, width>>1, halfpel_v, LCU_WIDTH, abs_mv_x, abs_mv_y);
|
filter_inter_halfpel_chroma(encoder_state->encoder_control, halfpel_src_off_v, HALFPEL_CHROMA_WIDTH, width>>1, width>>1, halfpel_v, LCU_WIDTH, abs_mv_x, abs_mv_y);
|
||||||
|
|
||||||
// Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x
|
// Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x
|
||||||
for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) {
|
for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) {
|
||||||
|
@ -161,8 +161,8 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
||||||
|
|
||||||
coord_x = x + mv[0];
|
coord_x = (x + encoder_state->lcu_offset_x * LCU_WIDTH) + mv[0];
|
||||||
coord_y = y + mv[1];
|
coord_y = (y + encoder_state->lcu_offset_y * LCU_WIDTH) + mv[1];
|
||||||
overflow_neg_x = (coord_x < 0)?1:0;
|
overflow_neg_x = (coord_x < 0)?1:0;
|
||||||
overflow_neg_y = (coord_y < 0)?1:0;
|
overflow_neg_y = (coord_y < 0)?1:0;
|
||||||
|
|
||||||
|
@ -196,8 +196,8 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||||
|
|
||||||
coord_x = x + (mv[0]>>1);
|
coord_x = (x + encoder_state->lcu_offset_x * (LCU_WIDTH >> 1)) + (mv[0]>>1);
|
||||||
coord_y = y + (mv[1]>>1);
|
coord_y = (y + encoder_state->lcu_offset_y * (LCU_WIDTH >> 1)) + (mv[1]>>1);
|
||||||
|
|
||||||
overflow_neg_x = (coord_x < 0)?1:0;
|
overflow_neg_x = (coord_x < 0)?1:0;
|
||||||
overflow_neg_y = (y + (mv[1]>>1) < 0)?1:0;
|
overflow_neg_y = (y + (mv[1]>>1) < 0)?1:0;
|
||||||
|
@ -229,11 +229,11 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
// Copy Luma
|
// Copy Luma
|
||||||
for (y = ypos; y < ypos + width; y++) {
|
for (y = ypos; y < ypos + width; y++) {
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
||||||
coord_y = (y + mv[1]) * ref->width; // pre-calculate
|
coord_y = ((y + encoder_state->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate
|
||||||
for (x = xpos; x < xpos + width; x++) {
|
for (x = xpos; x < xpos + width; x++) {
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
||||||
|
|
||||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + x + mv[0]];
|
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + (x + encoder_state->lcu_offset_x * LCU_WIDTH) + mv[0]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -242,11 +242,11 @@ void inter_recon_lcu(const encoder_control * const encoder, picture* ref,int32_t
|
||||||
// TODO: chroma fractional pixel interpolation
|
// TODO: chroma fractional pixel interpolation
|
||||||
for (y = ypos>>1; y < (ypos + width)>>1; y++) {
|
for (y = ypos>>1; y < (ypos + width)>>1; y++) {
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||||
coord_y = (y + (mv[1]>>1)) * ref_width_c; // pre-calculate
|
coord_y = ((y + encoder_state->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1)) * ref_width_c; // pre-calculate
|
||||||
for (x = xpos>>1; x < (xpos + width)>>1; x++) {
|
for (x = xpos>>1; x < (xpos + width)>>1; x++) {
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||||
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + x + (mv[0]>>1)];
|
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)];
|
||||||
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + x + (mv[0]>>1)];
|
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
|
|
||||||
|
|
||||||
void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu);
|
void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu);
|
||||||
void inter_recon_lcu(const encoder_control *encoder, picture* ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv_param[2], lcu_t *lcu);
|
void inter_recon_lcu(const encoder_state*const encoder_state, const picture * ref, int32_t xpos, int32_t ypos, int32_t width, const int16_t mv_param[2], lcu_t* lcu);
|
||||||
|
|
||||||
void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_info **b0, cu_info **b1,
|
void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_info **b0, cu_info **b1,
|
||||||
cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
|
cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
|
||||||
|
|
|
@ -910,7 +910,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
|
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
|
||||||
intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
|
intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
|
||||||
} else if (cur_cu->type == CU_INTER) {
|
} else if (cur_cu->type == CU_INTER) {
|
||||||
inter_recon_lcu(encoder_state->encoder_control, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
|
inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
|
||||||
encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]);
|
encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]);
|
||||||
|
|
||||||
if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
|
if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) {
|
||||||
|
|
Loading…
Reference in a new issue