mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
Merge branch 'refactor_encoder_state'
This commit is contained in:
commit
5890dd5350
|
@ -14,7 +14,8 @@
|
||||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||||
<PreprocessorDefinitions>WIN32;WIN64;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;WIN64;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<DisableSpecificWarnings>4244;4204;4206</DisableSpecificWarnings>
|
<DisableSpecificWarnings>4244;4204;4206;4028</DisableSpecificWarnings>
|
||||||
|
<OpenMPSupport>true</OpenMPSupport>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<AdditionalDependencies>Ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>Ws2_32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
|
|
77
src/config.c
77
src/config.c
|
@ -86,6 +86,12 @@ int config_init(config *cfg)
|
||||||
cfg->tiles_width_split = NULL;
|
cfg->tiles_width_split = NULL;
|
||||||
cfg->tiles_height_split = NULL;
|
cfg->tiles_height_split = NULL;
|
||||||
|
|
||||||
|
cfg->wpp = 0;
|
||||||
|
cfg->slice_count = 1;
|
||||||
|
cfg->slice_addresses_in_ts = MALLOC(int32_t, 1);
|
||||||
|
cfg->slice_addresses_in_ts[0] = 0;
|
||||||
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,6 +107,7 @@ int config_destroy(config *cfg)
|
||||||
FREE_POINTER(cfg->cqmfile);
|
FREE_POINTER(cfg->cqmfile);
|
||||||
FREE_POINTER(cfg->tiles_width_split);
|
FREE_POINTER(cfg->tiles_width_split);
|
||||||
FREE_POINTER(cfg->tiles_height_split);
|
FREE_POINTER(cfg->tiles_height_split);
|
||||||
|
FREE_POINTER(cfg->slice_addresses_in_ts);
|
||||||
free(cfg);
|
free(cfg);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -155,7 +162,7 @@ static int parse_enum(const char *arg, const char * const *names, int8_t *dst)
|
||||||
static int parse_tiles_specification(const char* const arg, int32_t * const ntiles, int32_t** const array) {
|
static int parse_tiles_specification(const char* const arg, int32_t * const ntiles, int32_t** const array) {
|
||||||
const char* current_arg = NULL;
|
const char* current_arg = NULL;
|
||||||
int32_t current_value;
|
int32_t current_value;
|
||||||
int32_t values[256];
|
int32_t values[MAX_TILES_PER_DIM];
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -189,6 +196,7 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil
|
||||||
if (current_arg) ++current_arg;
|
if (current_arg) ++current_arg;
|
||||||
values[*ntiles] = current_value;
|
values[*ntiles] = current_value;
|
||||||
++(*ntiles);
|
++(*ntiles);
|
||||||
|
if (MAX_TILES_PER_DIM <= *ntiles) break;
|
||||||
} while (current_arg);
|
} while (current_arg);
|
||||||
|
|
||||||
if (MAX_TILES_PER_DIM <= *ntiles || 0 >= *ntiles) {
|
if (MAX_TILES_PER_DIM <= *ntiles || 0 >= *ntiles) {
|
||||||
|
@ -210,6 +218,67 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int parse_slice_specification(const char* const arg, int32_t * const nslices, int32_t** const array) {
|
||||||
|
const char* current_arg = NULL;
|
||||||
|
int32_t current_value;
|
||||||
|
int32_t values[MAX_SLICES];
|
||||||
|
|
||||||
|
int i;
|
||||||
|
|
||||||
|
//Free pointer in any case
|
||||||
|
if (*array) {
|
||||||
|
FREE_POINTER(*array);
|
||||||
|
}
|
||||||
|
|
||||||
|
//If the arg starts with u, we want an uniform split
|
||||||
|
if (arg[0]=='u') {
|
||||||
|
*nslices = atoi(arg+1);
|
||||||
|
if (MAX_SLICES <= *nslices || 0 >= *nslices) {
|
||||||
|
fprintf(stderr, "Invalid number of tiles (0 < %d <= %d = MAX_SLICES)!\n", *nslices + 1, MAX_SLICES);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//Done with parsing
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
//We have a comma-separated list of int for the split...
|
||||||
|
current_arg = arg;
|
||||||
|
//We always have a slice starting at 0
|
||||||
|
values[0] = 0;
|
||||||
|
*nslices = 1;
|
||||||
|
do {
|
||||||
|
int ret = sscanf(current_arg, "%d", ¤t_value);
|
||||||
|
if (ret != 1) {
|
||||||
|
fprintf(stderr, "Could not parse integer \"%s\"!\n", current_arg);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
current_arg = strchr(current_arg, ',');
|
||||||
|
//Skip the , if we found one
|
||||||
|
if (current_arg) ++current_arg;
|
||||||
|
values[*nslices] = current_value;
|
||||||
|
++(*nslices);
|
||||||
|
if (MAX_SLICES <= *nslices) break;
|
||||||
|
} while (current_arg);
|
||||||
|
|
||||||
|
if (MAX_SLICES <= *nslices || 0 >= *nslices) {
|
||||||
|
fprintf(stderr, "Invalid number of slices (0 < %d <= %d = MAX_SLICES)!\n", *nslices, MAX_SLICES);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
*array = MALLOC(int32_t, *nslices);
|
||||||
|
if (!*array) {
|
||||||
|
fprintf(stderr, "Could not allocate array for slices\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: memcpy?
|
||||||
|
for (i = 0; i < *nslices; ++i) {
|
||||||
|
(*array)[i] = values[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static int config_parse(config *cfg, const char *name, const char *value)
|
static int config_parse(config *cfg, const char *name, const char *value)
|
||||||
{
|
{
|
||||||
static const char * const overscan_names[] = { "undef", "show", "crop", NULL };
|
static const char * const overscan_names[] = { "undef", "show", "crop", NULL };
|
||||||
|
@ -340,6 +409,10 @@ static int config_parse(config *cfg, const char *name, const char *value)
|
||||||
error = !parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split);
|
error = !parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split);
|
||||||
else if OPT("tiles-height-split")
|
else if OPT("tiles-height-split")
|
||||||
error = !parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split);
|
error = !parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split);
|
||||||
|
else if OPT("wpp")
|
||||||
|
cfg->wpp = atobool(value);
|
||||||
|
else if OPT("slice-addresses")
|
||||||
|
error = !parse_slice_specification(value, &cfg->slice_count, &cfg->slice_addresses_in_ts);
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
#undef OPT
|
#undef OPT
|
||||||
|
@ -388,6 +461,8 @@ int config_read(config *cfg,int argc, char *argv[])
|
||||||
{ "seek", required_argument, NULL, 0 },
|
{ "seek", required_argument, NULL, 0 },
|
||||||
{ "tiles-width-split", required_argument, NULL, 0 },
|
{ "tiles-width-split", required_argument, NULL, 0 },
|
||||||
{ "tiles-height-split", required_argument, NULL, 0 },
|
{ "tiles-height-split", required_argument, NULL, 0 },
|
||||||
|
{ "wpp", no_argument, NULL, 0 },
|
||||||
|
{ "slice-addresses", required_argument, NULL, 0 },
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -69,6 +69,11 @@ typedef struct
|
||||||
int32_t tiles_height_count; /*!< \brief number of tiles separation in y direction */
|
int32_t tiles_height_count; /*!< \brief number of tiles separation in y direction */
|
||||||
int32_t* tiles_width_split; /*!< \brief tiles split x coordinates (dimension: tiles_width_count) */
|
int32_t* tiles_width_split; /*!< \brief tiles split x coordinates (dimension: tiles_width_count) */
|
||||||
int32_t* tiles_height_split; /*!< \brief tiles split y coordinates (dimension: tiles_height_count) */
|
int32_t* tiles_height_split; /*!< \brief tiles split y coordinates (dimension: tiles_height_count) */
|
||||||
|
|
||||||
|
int wpp;
|
||||||
|
|
||||||
|
int32_t slice_count;
|
||||||
|
int32_t* slice_addresses_in_ts;
|
||||||
} config;
|
} config;
|
||||||
|
|
||||||
/* Function definitions */
|
/* Function definitions */
|
||||||
|
|
|
@ -151,6 +151,16 @@ int main(int argc, char *argv[])
|
||||||
" Can also be u followed by and a single int n,\n"
|
" Can also be u followed by and a single int n,\n"
|
||||||
" in which case it produces rows of uniform height.\n"
|
" in which case it produces rows of uniform height.\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
" Wpp:\n"
|
||||||
|
" --wpp: Enable wavefront parallel processing\n"
|
||||||
|
"\n"
|
||||||
|
" Slices:\n"
|
||||||
|
" --slice-addresses <string>|u<int>: \n"
|
||||||
|
" Specifies a comma separated list of LCU\n"
|
||||||
|
" positions in tile scan order of tile separations.\n"
|
||||||
|
" Can also be u followed by and a single int n,\n"
|
||||||
|
" in which case it produces uniform slice length.\n"
|
||||||
|
"\n"
|
||||||
" Deprecated parameters: (might be removed at some point)\n"
|
" Deprecated parameters: (might be removed at some point)\n"
|
||||||
" Use --input-res:\n"
|
" Use --input-res:\n"
|
||||||
" -w, --width : Width of input in pixels\n"
|
" -w, --width : Width of input in pixels\n"
|
||||||
|
@ -256,12 +266,13 @@ int main(int argc, char *argv[])
|
||||||
encoder.in.width, encoder.in.height,
|
encoder.in.width, encoder.in.height,
|
||||||
encoder.in.real_width, encoder.in.real_height);
|
encoder.in.real_width, encoder.in.real_height);
|
||||||
|
|
||||||
if (!encoder_state_init(&encoder_state, &encoder)) {
|
encoder_state.encoder_control = &encoder;
|
||||||
|
if (!encoder_state_init(&encoder_state, NULL)) {
|
||||||
goto exit_failure;
|
goto exit_failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
encoder_state.frame = 0;
|
encoder_state.global->frame = 0;
|
||||||
encoder_state.QP = (int8_t)encoder.cfg->qp;
|
encoder_state.global->QP = (int8_t)encoder.cfg->qp;
|
||||||
|
|
||||||
// Only the code that handles conformance window coding needs to know
|
// Only the code that handles conformance window coding needs to know
|
||||||
// the real dimensions. As a quick fix for broken non-multiple of 8 videos,
|
// the real dimensions. As a quick fix for broken non-multiple of 8 videos,
|
||||||
|
@ -272,14 +283,14 @@ int main(int argc, char *argv[])
|
||||||
//cfg->height = encoder.in.height;
|
//cfg->height = encoder.in.height;
|
||||||
|
|
||||||
// Start coding cycle while data on input and not on the last frame
|
// Start coding cycle while data on input and not on the last frame
|
||||||
while(!cfg->frames || encoder_state.frame < cfg->frames) {
|
while(!cfg->frames || encoder_state.global->frame < cfg->frames) {
|
||||||
int32_t diff;
|
int32_t diff;
|
||||||
double temp_psnr[3];
|
double temp_psnr[3];
|
||||||
|
|
||||||
// Skip '--seek' frames before input.
|
// Skip '--seek' frames before input.
|
||||||
// This block can be moved outside this while loop when there is a
|
// This block can be moved outside this while loop when there is a
|
||||||
// mechanism to skip the while loop on error.
|
// mechanism to skip the while loop on error.
|
||||||
if (encoder_state.frame == 0 && cfg->seek > 0) {
|
if (encoder_state.global->frame == 0 && cfg->seek > 0) {
|
||||||
int frame_bytes = cfg->width * cfg->height * 3 / 2;
|
int frame_bytes = cfg->width * cfg->height * 3 / 2;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
|
@ -302,14 +313,14 @@ int main(int argc, char *argv[])
|
||||||
// Read one frame from the input
|
// Read one frame from the input
|
||||||
if (!read_one_frame(input, &encoder_state)) {
|
if (!read_one_frame(input, &encoder_state)) {
|
||||||
if (!feof(input))
|
if (!feof(input))
|
||||||
fprintf(stderr, "Failed to read a frame %d\n", encoder_state.frame);
|
fprintf(stderr, "Failed to read a frame %d\n", encoder_state.global->frame);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The actual coding happens here, after this function we have a coded frame
|
// The actual coding happens here, after this function we have a coded frame
|
||||||
encode_one_frame(&encoder_state);
|
encode_one_frame(&encoder_state);
|
||||||
|
|
||||||
cur_pic = encoder_state.cur_pic;
|
cur_pic = encoder_state.tile->cur_pic;
|
||||||
|
|
||||||
if (cfg->debug != NULL) {
|
if (cfg->debug != NULL) {
|
||||||
// Write reconstructed frame out.
|
// Write reconstructed frame out.
|
||||||
|
@ -343,8 +354,8 @@ int main(int argc, char *argv[])
|
||||||
temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
|
temp_psnr[1] = image_psnr(cur_pic->u_data, cur_pic->u_recdata, cfg->width>>1, cfg->height>>1);
|
||||||
temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
|
temp_psnr[2] = image_psnr(cur_pic->v_data, cur_pic->v_recdata, cfg->width>>1, cfg->height>>1);
|
||||||
|
|
||||||
fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame,
|
fprintf(stderr, "POC %4d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f\n", encoder_state.global->frame,
|
||||||
"BPI"[cur_pic->slicetype%3], diff<<3,
|
"BPI"[encoder_state.global->slicetype%3], diff<<3,
|
||||||
temp_psnr[0], temp_psnr[1], temp_psnr[2]);
|
temp_psnr[0], temp_psnr[1], temp_psnr[2]);
|
||||||
|
|
||||||
// Increment total PSNR
|
// Increment total PSNR
|
||||||
|
@ -361,8 +372,8 @@ int main(int argc, char *argv[])
|
||||||
fgetpos(output,(fpos_t*)&curpos);
|
fgetpos(output,(fpos_t*)&curpos);
|
||||||
|
|
||||||
// Print statistics of the coding
|
// Print statistics of the coding
|
||||||
fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.frame, (long long unsigned int) curpos<<3,
|
fprintf(stderr, " Processed %d frames, %10llu bits AVG PSNR: %2.4f %2.4f %2.4f\n", encoder_state.global->frame, (long long unsigned int) curpos<<3,
|
||||||
psnr[0] / encoder_state.frame, psnr[1] / encoder_state.frame, psnr[2] / encoder_state.frame);
|
psnr[0] / encoder_state.global->frame, psnr[1] / encoder_state.global->frame, psnr[2] / encoder_state.global->frame);
|
||||||
fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC);
|
fprintf(stderr, " Total time: %.3f s.\n", ((float)(clock() - start_time)) / CLOCKS_PER_SEC);
|
||||||
|
|
||||||
fclose(input);
|
fclose(input);
|
||||||
|
|
1285
src/encoder.c
1285
src/encoder.c
File diff suppressed because it is too large
Load diff
|
@ -120,32 +120,91 @@ typedef struct
|
||||||
|
|
||||||
const int32_t *tiles_tile_id; /*!<spec: TileId (6.5.1); dimension: PicSizeInCtbsY */
|
const int32_t *tiles_tile_id; /*!<spec: TileId (6.5.1); dimension: PicSizeInCtbsY */
|
||||||
|
|
||||||
|
//WPP
|
||||||
|
int wpp;
|
||||||
|
|
||||||
|
//Slices
|
||||||
|
int slice_count;
|
||||||
|
const int* slice_addresses_in_ts;
|
||||||
|
|
||||||
} encoder_control;
|
} encoder_control;
|
||||||
|
|
||||||
typedef struct encoder_state {
|
typedef enum {
|
||||||
const encoder_control *encoder_control;
|
ENCODER_STATE_TYPE_INVALID = 'i',
|
||||||
|
ENCODER_STATE_TYPE_MAIN = 'M',
|
||||||
|
ENCODER_STATE_TYPE_SLICE = 'S',
|
||||||
|
ENCODER_STATE_TYPE_TILE = 'T',
|
||||||
|
ENCODER_STATE_TYPE_WAVEFRONT_ROW = 'W',
|
||||||
|
} encoder_state_type;
|
||||||
|
|
||||||
int32_t lcu_offset_x;
|
|
||||||
int32_t lcu_offset_y;
|
|
||||||
|
|
||||||
picture *cur_pic;
|
|
||||||
|
typedef struct {
|
||||||
|
double cur_lambda_cost;
|
||||||
|
|
||||||
int32_t frame;
|
int32_t frame;
|
||||||
int32_t poc; /*!< \brief picture order count */
|
int32_t poc; /*!< \brief picture order count */
|
||||||
|
|
||||||
bitstream stream;
|
int8_t QP; //!< \brief Quantization parameter
|
||||||
|
|
||||||
|
//Current picture available references
|
||||||
picture_list *ref;
|
picture_list *ref;
|
||||||
int8_t ref_list;
|
int8_t ref_list;
|
||||||
int8_t ref_idx_num[2];
|
//int8_t ref_idx_num[2];
|
||||||
int8_t QP; // \brief Quantization parameter
|
|
||||||
|
|
||||||
double cur_lambda_cost;
|
int is_radl_frame;
|
||||||
|
uint8_t pictype;
|
||||||
|
uint8_t slicetype;
|
||||||
|
|
||||||
cabac_data cabac;
|
} encoder_state_config_global;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
//Current picture to encode
|
||||||
|
picture *cur_pic;
|
||||||
|
|
||||||
|
int32_t id;
|
||||||
|
|
||||||
|
//Tile: offset in LCU for current encoder_state in global coordinates
|
||||||
|
int32_t lcu_offset_x;
|
||||||
|
int32_t lcu_offset_y;
|
||||||
|
|
||||||
|
//Position of the first element in tile scan in global coordinates
|
||||||
|
int32_t lcu_offset_in_ts;
|
||||||
|
} encoder_state_config_tile;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int32_t id;
|
||||||
|
|
||||||
|
//Global coordinates
|
||||||
|
int32_t start_in_ts;
|
||||||
|
int32_t end_in_ts;
|
||||||
|
|
||||||
|
//Global coordinates
|
||||||
|
int32_t start_in_rs;
|
||||||
|
int32_t end_in_rs;
|
||||||
|
} encoder_state_config_slice;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
//Row in image coordinates of the wavefront
|
||||||
|
int32_t lcu_offset_y;
|
||||||
|
} encoder_state_config_wfrow;
|
||||||
|
|
||||||
|
typedef struct encoder_state {
|
||||||
|
const encoder_control *encoder_control;
|
||||||
|
encoder_state_type type;
|
||||||
|
|
||||||
//List of children, the last item of this list is a pseudo-encoder with encoder_control = NULL
|
//List of children, the last item of this list is a pseudo-encoder with encoder_control = NULL
|
||||||
//Use do { } while (encoder_state->children[++i].encoder_control)
|
//Use for (i = 0; encoder_state->children[i].encoder_control; ++i) {
|
||||||
struct encoder_state *children;
|
struct encoder_state *children;
|
||||||
|
struct encoder_state *parent;
|
||||||
|
|
||||||
|
encoder_state_config_global *global;
|
||||||
|
encoder_state_config_tile *tile;
|
||||||
|
encoder_state_config_slice *slice;
|
||||||
|
encoder_state_config_wfrow *wfrow;
|
||||||
|
|
||||||
|
bitstream stream;
|
||||||
|
cabac_data cabac;
|
||||||
} encoder_state;
|
} encoder_state;
|
||||||
|
|
||||||
int encoder_control_init(encoder_control *encoder, const config *cfg);
|
int encoder_control_init(encoder_control *encoder, const config *cfg);
|
||||||
|
@ -153,8 +212,8 @@ int encoder_control_finalize(encoder_control *encoder);
|
||||||
|
|
||||||
void encoder_control_input_init(encoder_control *encoder, int32_t width, int32_t height);
|
void encoder_control_input_init(encoder_control *encoder, int32_t width, int32_t height);
|
||||||
|
|
||||||
int encoder_state_init(encoder_state *encoder_state, const encoder_control * encoder);
|
int encoder_state_init(encoder_state * child_state, encoder_state * parent_state);
|
||||||
int encoder_state_finalize(encoder_state *encoder_state);
|
void encoder_state_finalize(encoder_state *encoder_state);
|
||||||
void encoder_state_init_lambda(encoder_state *encoder_state);
|
void encoder_state_init_lambda(encoder_state *encoder_state);
|
||||||
|
|
||||||
void encode_one_frame(encoder_state *encoder_state);
|
void encode_one_frame(encoder_state *encoder_state);
|
||||||
|
|
12
src/filter.c
12
src/filter.c
|
@ -167,7 +167,7 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state,
|
||||||
int32_t xpos, int32_t ypos,
|
int32_t xpos, int32_t ypos,
|
||||||
int8_t depth, int8_t dir)
|
int8_t depth, int8_t dir)
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
const encoder_control * const encoder = encoder_state->encoder_control;
|
const encoder_control * const encoder = encoder_state->encoder_control;
|
||||||
|
|
||||||
cu_info *cu_q = &cur_pic->cu_array[(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)];
|
cu_info *cu_q = &cur_pic->cu_array[(xpos>>MIN_SIZE) + (ypos>>MIN_SIZE) * (cur_pic->width_in_lcu << MAX_DEPTH)];
|
||||||
|
@ -194,7 +194,7 @@ void filter_deblock_edge_luma(encoder_state * const encoder_state,
|
||||||
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
int16_t x_cu = xpos>>MIN_SIZE,y_cu = ypos>>MIN_SIZE;
|
||||||
int8_t strength = 0;
|
int8_t strength = 0;
|
||||||
|
|
||||||
int32_t qp = encoder_state->QP;
|
int32_t qp = encoder_state->global->QP;
|
||||||
int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8);
|
int32_t bitdepth_scale = 1 << (encoder->bitdepth - 8);
|
||||||
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
|
int32_t b_index = CLIP(0, 51, qp + (beta_offset_div2 << 1));
|
||||||
int32_t beta = g_beta_table_8x8[b_index] * bitdepth_scale;
|
int32_t beta = g_beta_table_8x8[b_index] * bitdepth_scale;
|
||||||
|
@ -295,7 +295,7 @@ void filter_deblock_edge_chroma(encoder_state * const encoder_state,
|
||||||
int8_t depth, int8_t dir)
|
int8_t depth, int8_t dir)
|
||||||
{
|
{
|
||||||
const encoder_control * const encoder = encoder_state->encoder_control;
|
const encoder_control * const encoder = encoder_state->encoder_control;
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
cu_info *cu_q = &cur_pic->cu_array[(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)];
|
cu_info *cu_q = &cur_pic->cu_array[(x>>(MIN_SIZE-1)) + (y>>(MIN_SIZE-1)) * (cur_pic->width_in_lcu << MAX_DEPTH)];
|
||||||
|
|
||||||
// Chroma edges that do not lay on a 8x8 grid are not deblocked.
|
// Chroma edges that do not lay on a 8x8 grid are not deblocked.
|
||||||
|
@ -327,7 +327,7 @@ void filter_deblock_edge_chroma(encoder_state * const encoder_state,
|
||||||
int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
|
int16_t x_cu = x>>(MIN_SIZE-1),y_cu = y>>(MIN_SIZE-1);
|
||||||
int8_t strength = 2;
|
int8_t strength = 2;
|
||||||
|
|
||||||
int32_t QP = g_chroma_scale[encoder_state->QP];
|
int32_t QP = g_chroma_scale[encoder_state->global->QP];
|
||||||
int32_t bitdepth_scale = 1 << (encoder->bitdepth-8);
|
int32_t bitdepth_scale = 1 << (encoder->bitdepth-8);
|
||||||
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
|
int32_t TC_index = CLIP(0, 51+2, (int32_t)(QP + 2*(strength-1) + (tc_offset_div2 << 1)));
|
||||||
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
|
int32_t Tc = g_tc_table_8x8[TC_index]*bitdepth_scale;
|
||||||
|
@ -389,7 +389,7 @@ void filter_deblock_edge_chroma(encoder_state * const encoder_state,
|
||||||
*/
|
*/
|
||||||
void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge)
|
void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y, int8_t depth, int32_t edge)
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
cu_info *cur_cu = &cur_pic->cu_array[x + y*(cur_pic->width_in_lcu << MAX_DEPTH)];
|
cu_info *cur_cu = &cur_pic->cu_array[x + y*(cur_pic->width_in_lcu << MAX_DEPTH)];
|
||||||
uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
|
uint8_t split_flag = (cur_cu->depth > depth) ? 1 : 0;
|
||||||
uint8_t border_x = (cur_pic->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
|
uint8_t border_x = (cur_pic->width < x*(LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth)) ? 1 : 0;
|
||||||
|
@ -437,7 +437,7 @@ void filter_deblock_cu(encoder_state * const encoder_state, int32_t x, int32_t y
|
||||||
*/
|
*/
|
||||||
void filter_deblock(encoder_state * const encoder_state)
|
void filter_deblock(encoder_state * const encoder_state)
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
int16_t x, y;
|
int16_t x, y;
|
||||||
|
|
||||||
// TODO: Optimization: add thread for each LCU
|
// TODO: Optimization: add thread for each LCU
|
||||||
|
|
|
@ -128,6 +128,7 @@ typedef int16_t coefficient;
|
||||||
#define SIZE_NONE 15
|
#define SIZE_NONE 15
|
||||||
|
|
||||||
#define MAX_TILES_PER_DIM 16
|
#define MAX_TILES_PER_DIM 16
|
||||||
|
#define MAX_SLICES 16
|
||||||
|
|
||||||
/* Inlining functions */
|
/* Inlining functions */
|
||||||
#ifdef _MSC_VER /* Visual studio */
|
#ifdef _MSC_VER /* Visual studio */
|
||||||
|
|
34
src/inter.c
34
src/inter.c
|
@ -83,12 +83,12 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
|
int32_t ref_width_c = ref->width>>1; //!< Reference picture width in chroma pixels
|
||||||
|
|
||||||
// negative overflow flag
|
// negative overflow flag
|
||||||
int8_t overflow_neg_x = (encoder_state->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) < 0)?1:0;
|
int8_t overflow_neg_x = (encoder_state->tile->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) < 0)?1:0;
|
||||||
int8_t overflow_neg_y = (encoder_state->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) < 0)?1:0;
|
int8_t overflow_neg_y = (encoder_state->tile->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) < 0)?1:0;
|
||||||
|
|
||||||
// positive overflow flag
|
// positive overflow flag
|
||||||
int8_t overflow_pos_x = (encoder_state->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) + width > ref->width )?1:0;
|
int8_t overflow_pos_x = (encoder_state->tile->lcu_offset_x * LCU_WIDTH + xpos + (mv[0]>>2) + width > ref->width )?1:0;
|
||||||
int8_t overflow_pos_y = (encoder_state->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) + width > ref->height)?1:0;
|
int8_t overflow_pos_y = (encoder_state->tile->lcu_offset_y * LCU_WIDTH + ypos + (mv[1]>>2) + width > ref->height)?1:0;
|
||||||
|
|
||||||
// Chroma half-pel
|
// Chroma half-pel
|
||||||
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
||||||
|
@ -114,7 +114,7 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
// Fill source blocks with data from reference, -4...width+4
|
// Fill source blocks with data from reference, -4...width+4
|
||||||
for (halfpel_y = 0, y = (ypos>>1) - 4; y < ((ypos + width)>>1) + 4; halfpel_y++, y++) {
|
for (halfpel_y = 0, y = (ypos>>1) - 4; y < ((ypos + width)>>1) + 4; halfpel_y++, y++) {
|
||||||
// calculate y-pixel offset
|
// calculate y-pixel offset
|
||||||
coord_y = (y + encoder_state->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1);
|
coord_y = (y + encoder_state->tile->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1);
|
||||||
|
|
||||||
// On y-overflow set coord_y accordingly
|
// On y-overflow set coord_y accordingly
|
||||||
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
||||||
|
@ -124,7 +124,7 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
coord_y *= ref_width_c;
|
coord_y *= ref_width_c;
|
||||||
|
|
||||||
for (halfpel_x = 0, x = (xpos>>1) - 4; x < ((xpos + width)>>1) + 4; halfpel_x++, x++) {
|
for (halfpel_x = 0, x = (xpos>>1) - 4; x < ((xpos + width)>>1) + 4; halfpel_x++, x++) {
|
||||||
coord_x = (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1);
|
coord_x = (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1);
|
||||||
|
|
||||||
// On x-overflow set coord_x accordingly
|
// On x-overflow set coord_x accordingly
|
||||||
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
||||||
|
@ -161,8 +161,8 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
||||||
|
|
||||||
coord_x = (x + encoder_state->lcu_offset_x * LCU_WIDTH) + mv[0];
|
coord_x = (x + encoder_state->tile->lcu_offset_x * LCU_WIDTH) + mv[0];
|
||||||
coord_y = (y + encoder_state->lcu_offset_y * LCU_WIDTH) + mv[1];
|
coord_y = (y + encoder_state->tile->lcu_offset_y * LCU_WIDTH) + mv[1];
|
||||||
overflow_neg_x = (coord_x < 0)?1:0;
|
overflow_neg_x = (coord_x < 0)?1:0;
|
||||||
overflow_neg_y = (coord_y < 0)?1:0;
|
overflow_neg_y = (coord_y < 0)?1:0;
|
||||||
|
|
||||||
|
@ -196,8 +196,8 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||||
|
|
||||||
coord_x = (x + encoder_state->lcu_offset_x * (LCU_WIDTH >> 1)) + (mv[0]>>1);
|
coord_x = (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH >> 1)) + (mv[0]>>1);
|
||||||
coord_y = (y + encoder_state->lcu_offset_y * (LCU_WIDTH >> 1)) + (mv[1]>>1);
|
coord_y = (y + encoder_state->tile->lcu_offset_y * (LCU_WIDTH >> 1)) + (mv[1]>>1);
|
||||||
|
|
||||||
overflow_neg_x = (coord_x < 0)?1:0;
|
overflow_neg_x = (coord_x < 0)?1:0;
|
||||||
overflow_neg_y = (y + (mv[1]>>1) < 0)?1:0;
|
overflow_neg_y = (y + (mv[1]>>1) < 0)?1:0;
|
||||||
|
@ -229,11 +229,11 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
// Copy Luma
|
// Copy Luma
|
||||||
for (y = ypos; y < ypos + width; y++) {
|
for (y = ypos; y < ypos + width; y++) {
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH)-1));
|
||||||
coord_y = ((y + encoder_state->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate
|
coord_y = ((y + encoder_state->tile->lcu_offset_y * LCU_WIDTH) + mv[1]) * ref->width; // pre-calculate
|
||||||
for (x = xpos; x < xpos + width; x++) {
|
for (x = xpos; x < xpos + width; x++) {
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH)-1));
|
||||||
|
|
||||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + (x + encoder_state->lcu_offset_x * LCU_WIDTH) + mv[0]];
|
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = ref->y_recdata[coord_y + (x + encoder_state->tile->lcu_offset_x * LCU_WIDTH) + mv[0]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -242,11 +242,11 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
// TODO: chroma fractional pixel interpolation
|
// TODO: chroma fractional pixel interpolation
|
||||||
for (y = ypos>>1; y < (ypos + width)>>1; y++) {
|
for (y = ypos>>1; y < (ypos + width)>>1; y++) {
|
||||||
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
int y_in_lcu = (y & ((LCU_WIDTH>>1)-1));
|
||||||
coord_y = ((y + encoder_state->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1)) * ref_width_c; // pre-calculate
|
coord_y = ((y + encoder_state->tile->lcu_offset_y * (LCU_WIDTH>>1)) + (mv[1]>>1)) * ref_width_c; // pre-calculate
|
||||||
for (x = xpos>>1; x < (xpos + width)>>1; x++) {
|
for (x = xpos>>1; x < (xpos + width)>>1; x++) {
|
||||||
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
int x_in_lcu = (x & ((LCU_WIDTH>>1)-1));
|
||||||
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)];
|
lcu->rec.u[y_in_lcu*dst_width_c + x_in_lcu] = ref->u_recdata[coord_y + (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)];
|
||||||
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + (x + encoder_state->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)];
|
lcu->rec.v[y_in_lcu*dst_width_c + x_in_lcu] = ref->v_recdata[coord_y + (x + encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1)) + (mv[0]>>1)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -332,8 +332,8 @@ void inter_get_mv_cand(const encoder_state * const encoder_state, int32_t x, int
|
||||||
inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu);
|
inter_get_spatial_merge_candidates(x, y, depth, &b0, &b1, &b2, &a0, &a1, lcu);
|
||||||
|
|
||||||
#define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6)
|
#define CALCULATE_SCALE(cu,tb,td) ((tb * ((0x4000 + (abs(td)>>1))/td) + 32) >> 6)
|
||||||
#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->poc - encoder_state->ref->pics[(cu)->inter.mv_ref]->poc;\
|
#define APPLY_MV_SCALING(cu, cand) {int td = encoder_state->global->poc - encoder_state->global->ref->pics[(cu)->inter.mv_ref]->poc;\
|
||||||
int tb = encoder_state->poc - encoder_state->ref->pics[cur_cu->inter.mv_ref]->poc;\
|
int tb = encoder_state->global->poc - encoder_state->global->ref->pics[cur_cu->inter.mv_ref]->poc;\
|
||||||
if (td != tb) { \
|
if (td != tb) { \
|
||||||
int scale = CALCULATE_SCALE(cu,tb,td); \
|
int scale = CALCULATE_SCALE(cu,tb,td); \
|
||||||
mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \
|
mv_cand[cand][0] = ((scale * (cu)->inter.mv[0] + 127 + (scale * (cu)->inter.mv[0] < 0)) >> 8 ); \
|
||||||
|
|
|
@ -375,7 +375,7 @@ int16_t intra_prediction(encoder_state * const encoder_state, pixel *orig, int32
|
||||||
intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0);
|
intra_get_pred(encoder_state->encoder_control, ref, recstride, pred, width, mode, 0);
|
||||||
|
|
||||||
sad = cost_func(pred, orig_block);
|
sad = cost_func(pred, orig_block);
|
||||||
sad += mode_cost * (int)(encoder_state->cur_lambda_cost + 0.5);
|
sad += mode_cost * (int)(encoder_state->global->cur_lambda_cost + 0.5);
|
||||||
// When rdo == 2, store best costs to an array and do full RDO later
|
// When rdo == 2, store best costs to an array and do full RDO later
|
||||||
if(rdo == 2) {
|
if(rdo == 2) {
|
||||||
int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad);
|
int rdo_mode = intra_rdo_cost_compare(rdo_costs, rdo_modes_to_check, sad);
|
||||||
|
@ -419,7 +419,7 @@ int16_t intra_prediction(encoder_state * const encoder_state, pixel *orig, int32
|
||||||
// Bitcost also calculated again for this mode
|
// Bitcost also calculated again for this mode
|
||||||
rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds);
|
rdo_bitcost = intra_pred_ratecost(rdo_modes[rdo_mode],intra_preds);
|
||||||
// Add bitcost * lambda
|
// Add bitcost * lambda
|
||||||
rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->cur_lambda_cost + 0.5);
|
rdo_costs[rdo_mode] += rdo_bitcost * (int)(encoder_state->global->cur_lambda_cost + 0.5);
|
||||||
|
|
||||||
if(rdo_costs[rdo_mode] < best_sad) {
|
if(rdo_costs[rdo_mode] < best_sad) {
|
||||||
best_sad = rdo_costs[rdo_mode];
|
best_sad = rdo_costs[rdo_mode];
|
||||||
|
|
|
@ -302,8 +302,6 @@ picture *picture_alloc(const int32_t width, const int32_t height,
|
||||||
|
|
||||||
pic->coeff_y = NULL; pic->coeff_u = NULL; pic->coeff_v = NULL;
|
pic->coeff_y = NULL; pic->coeff_u = NULL; pic->coeff_v = NULL;
|
||||||
|
|
||||||
pic->slice_sao_luma_flag = 1;
|
|
||||||
pic->slice_sao_chroma_flag = 1;
|
|
||||||
pic->sao_luma = MALLOC(sao_info, width_in_lcu * height_in_lcu);
|
pic->sao_luma = MALLOC(sao_info, width_in_lcu * height_in_lcu);
|
||||||
pic->sao_chroma = MALLOC(sao_info, width_in_lcu * height_in_lcu);
|
pic->sao_chroma = MALLOC(sao_info, width_in_lcu * height_in_lcu);
|
||||||
|
|
||||||
|
|
|
@ -126,10 +126,6 @@ typedef struct picture_struct
|
||||||
uint8_t referenced; //!< \brief Whether this picture is referenced.
|
uint8_t referenced; //!< \brief Whether this picture is referenced.
|
||||||
int32_t refcount; //!< \brief Number of references in reflist to the picture
|
int32_t refcount; //!< \brief Number of references in reflist to the picture
|
||||||
cu_info* cu_array; //!< \brief Info for each CU at each depth.
|
cu_info* cu_array; //!< \brief Info for each CU at each depth.
|
||||||
uint8_t type;
|
|
||||||
uint8_t slicetype;
|
|
||||||
uint8_t slice_sao_luma_flag;
|
|
||||||
uint8_t slice_sao_chroma_flag;
|
|
||||||
struct sao_info_struct *sao_luma; //!< \brief Array of sao parameters for every LCU.
|
struct sao_info_struct *sao_luma; //!< \brief Array of sao parameters for every LCU.
|
||||||
struct sao_info_struct *sao_chroma; //!< \brief Array of sao parameters for every LCU.
|
struct sao_info_struct *sao_chroma; //!< \brief Array of sao parameters for every LCU.
|
||||||
int32_t poc; //!< \brief Picture order count
|
int32_t poc; //!< \brief Picture order count
|
||||||
|
|
32
src/rdo.c
32
src/rdo.c
|
@ -112,12 +112,12 @@ uint32_t rdo_cost_intra(encoder_state * const encoder_state, pixel *pred, pixel
|
||||||
for (i = 0; i < width*width; i++) {
|
for (i = 0; i < width*width; i++) {
|
||||||
coeffcost += abs((int)temp_coeff[i]);
|
coeffcost += abs((int)temp_coeff[i]);
|
||||||
}
|
}
|
||||||
cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->cur_lambda_cost+0.5);
|
cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5);
|
||||||
// Full RDO
|
// Full RDO
|
||||||
} else if(encoder->rdo == 2) {
|
} else if(encoder->rdo == 2) {
|
||||||
coeffcost = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode);
|
coeffcost = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode);
|
||||||
|
|
||||||
cost += coeffcost*((int)encoder_state->cur_lambda_cost+0.5);
|
cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5);
|
||||||
}
|
}
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
@ -299,7 +299,7 @@ uint32_t get_coded_level ( encoder_state * const encoder_state, double *coded_co
|
||||||
cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma);
|
cabac_ctx* base_sig_model = type?(cabac->ctx_cu_sig_model_chroma):(cabac->ctx_cu_sig_model_luma);
|
||||||
|
|
||||||
if( !last && max_abs_level < 3 ) {
|
if( !last && max_abs_level < 3 ) {
|
||||||
*coded_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
|
*coded_cost_sig = encoder_state->global->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
|
||||||
*coded_cost = *coded_cost0 + *coded_cost_sig;
|
*coded_cost = *coded_cost0 + *coded_cost_sig;
|
||||||
if (max_abs_level == 0) return best_abs_level;
|
if (max_abs_level == 0) return best_abs_level;
|
||||||
} else {
|
} else {
|
||||||
|
@ -307,13 +307,13 @@ uint32_t get_coded_level ( encoder_state * const encoder_state, double *coded_co
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !last ) {
|
if( !last ) {
|
||||||
cur_cost_sig = encoder_state->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
|
cur_cost_sig = encoder_state->global->cur_lambda_cost * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
|
min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
|
||||||
for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
|
for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
|
||||||
double err = (double)(level_double - ( abs_level << q_bits ) );
|
double err = (double)(level_double - ( abs_level << q_bits ) );
|
||||||
double cur_cost = err * err * temp + encoder_state->cur_lambda_cost *
|
double cur_cost = err * err * temp + encoder_state->global->cur_lambda_cost *
|
||||||
get_ic_rate_cost( encoder_state, abs_level, ctx_num_one, ctx_num_abs,
|
get_ic_rate_cost( encoder_state, abs_level, ctx_num_one, ctx_num_abs,
|
||||||
abs_go_rice, c1_idx, c2_idx, type);
|
abs_go_rice, c1_idx, c2_idx, type);
|
||||||
cur_cost += cur_cost_sig;
|
cur_cost += cur_cost_sig;
|
||||||
|
@ -350,7 +350,7 @@ static double get_rate_last(const encoder_state * const encoder_state,
|
||||||
if( ctx_y > 3 ) {
|
if( ctx_y > 3 ) {
|
||||||
uiCost += 32768.0 * ((ctx_y-2)>>1);
|
uiCost += 32768.0 * ((ctx_y-2)>>1);
|
||||||
}
|
}
|
||||||
return encoder_state->cur_lambda_cost*uiCost;
|
return encoder_state->global->cur_lambda_cost*uiCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void calc_last_bits(encoder_state * const encoder_state, int32_t width, int32_t height, int8_t type,
|
static void calc_last_bits(encoder_state * const encoder_state, int32_t width, int32_t height, int8_t type,
|
||||||
|
@ -402,7 +402,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient *
|
||||||
uint32_t max_num_coeff = width * height;
|
uint32_t max_num_coeff = width * height;
|
||||||
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
|
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
|
||||||
|
|
||||||
int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
|
int32_t qp_scaled = get_scaled_qp(type, encoder_state->global->QP, 0);
|
||||||
|
|
||||||
{
|
{
|
||||||
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
|
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
|
||||||
|
@ -591,7 +591,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient *
|
||||||
if (sig_coeffgroup_flag[ cg_blkpos ] == 0) {
|
if (sig_coeffgroup_flag[ cg_blkpos ] == 0) {
|
||||||
uint32_t ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
uint32_t ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
||||||
cg_pos_y, width);
|
cg_pos_y, width);
|
||||||
cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
||||||
base_cost += cost_coeffgroup_sig[ cg_scanpos ] - rd_stats.sig_cost;
|
base_cost += cost_coeffgroup_sig[ cg_scanpos ] - rd_stats.sig_cost;
|
||||||
} else {
|
} else {
|
||||||
if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below.
|
if (cg_scanpos < cg_last_scanpos) {//skip the last coefficient group, which will be handled together with last position below.
|
||||||
|
@ -608,9 +608,9 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient *
|
||||||
ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
ctx_sig = context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
|
||||||
cg_pos_y, width);
|
cg_pos_y, width);
|
||||||
if (cg_scanpos < cg_last_scanpos) {
|
if (cg_scanpos < cg_last_scanpos) {
|
||||||
cost_coeffgroup_sig[cg_scanpos] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
|
cost_coeffgroup_sig[cg_scanpos] = encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],1);
|
||||||
base_cost += cost_coeffgroup_sig[cg_scanpos];
|
base_cost += cost_coeffgroup_sig[cg_scanpos];
|
||||||
cost_zero_cg += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
cost_zero_cg += encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to convert the current coeff group from non-zero to all-zero
|
// try to convert the current coeff group from non-zero to all-zero
|
||||||
|
@ -624,7 +624,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient *
|
||||||
sig_coeffgroup_flag[ cg_blkpos ] = 0;
|
sig_coeffgroup_flag[ cg_blkpos ] = 0;
|
||||||
base_cost = cost_zero_cg;
|
base_cost = cost_zero_cg;
|
||||||
if (cg_scanpos < cg_last_scanpos) {
|
if (cg_scanpos < cg_last_scanpos) {
|
||||||
cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
cost_coeffgroup_sig[ cg_scanpos ] = encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
|
||||||
}
|
}
|
||||||
// reset coeffs to 0 in this block
|
// reset coeffs to 0 in this block
|
||||||
for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
for (scanpos_in_cg = cg_size-1; scanpos_in_cg >= 0; scanpos_in_cg--) {
|
||||||
|
@ -652,13 +652,13 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient *
|
||||||
|
|
||||||
|
|
||||||
if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
|
if( block_type != CU_INTRA && !type/* && pcCU->getTransformIdx( uiAbsPartIdx ) == 0*/ ) {
|
||||||
best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
|
best_cost = block_uncoded_cost + encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),0);
|
||||||
base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
|
base_cost += encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&(cabac->ctx_cu_qt_root_cbf_model),1);
|
||||||
} else {
|
} else {
|
||||||
cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma);
|
cabac_ctx* base_cbf_model = type?(cabac->ctx_qt_cbf_model_chroma):(cabac->ctx_qt_cbf_model_luma);
|
||||||
ctx_cbf = ( type ? tr_depth : !tr_depth);
|
ctx_cbf = ( type ? tr_depth : !tr_depth);
|
||||||
best_cost = block_uncoded_cost + encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
|
best_cost = block_uncoded_cost + encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
|
||||||
base_cost += encoder_state->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
|
base_cost += encoder_state->global->cur_lambda_cost*CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
|
for (cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
|
||||||
|
@ -712,7 +712,7 @@ void rdoq(encoder_state * const encoder_state, coefficient *coef, coefficient *
|
||||||
if(*abs_sum >= 2) {
|
if(*abs_sum >= 2) {
|
||||||
int64_t rd_factor = (int64_t) (
|
int64_t rd_factor = (int64_t) (
|
||||||
g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6)))
|
g_inv_quant_scales[qp_scaled%6] * g_inv_quant_scales[qp_scaled%6] * (1<<(2*(qp_scaled/6)))
|
||||||
/ encoder_state->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
|
/ encoder_state->global->cur_lambda_cost / 16 / (1<<(2*(encoder->bitdepth-8)))
|
||||||
+ 0.5);
|
+ 0.5);
|
||||||
int32_t lastCG = -1;
|
int32_t lastCG = -1;
|
||||||
int32_t absSum = 0;
|
int32_t absSum = 0;
|
||||||
|
|
12
src/sao.c
12
src/sao.c
|
@ -669,7 +669,7 @@ static void sao_search_edge_sao(const encoder_state * const encoder_state,
|
||||||
|
|
||||||
{
|
{
|
||||||
int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left);
|
int mode_bits = sao_mode_bits_edge(edge_class, edge_offset, sao_top, sao_left);
|
||||||
sum_ddistortion += (int)((double)mode_bits*(encoder_state->cur_lambda_cost+0.5));
|
sum_ddistortion += (int)((double)mode_bits*(encoder_state->global->cur_lambda_cost+0.5));
|
||||||
}
|
}
|
||||||
// SAO is not applied for category 0.
|
// SAO is not applied for category 0.
|
||||||
edge_offset[SAO_EO_CAT0] = 0;
|
edge_offset[SAO_EO_CAT0] = 0;
|
||||||
|
@ -711,7 +711,7 @@ static void sao_search_band_sao(const encoder_state * const encoder_state, const
|
||||||
ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position);
|
ddistortion = calc_sao_band_offsets(sao_bands, temp_offsets, &sao_out->band_position);
|
||||||
|
|
||||||
temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left);
|
temp_rate = sao_mode_bits_band(sao_out->band_position, temp_offsets, sao_top, sao_left);
|
||||||
ddistortion += (int)((double)temp_rate*(encoder_state->cur_lambda_cost+0.5));
|
ddistortion += (int)((double)temp_rate*(encoder_state->global->cur_lambda_cost+0.5));
|
||||||
|
|
||||||
// Select band sao over edge sao when distortion is lower
|
// Select band sao over edge sao when distortion is lower
|
||||||
if (ddistortion < sao_out->ddistortion) {
|
if (ddistortion < sao_out->ddistortion) {
|
||||||
|
@ -745,7 +745,7 @@ static void sao_search_best_mode(const encoder_state * const encoder_state, cons
|
||||||
|
|
||||||
{
|
{
|
||||||
int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left);
|
int mode_bits = sao_mode_bits_edge(edge_sao.eo_class, edge_sao.offsets, sao_top, sao_left);
|
||||||
int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
|
int ddistortion = mode_bits * (int)(encoder_state->global->cur_lambda_cost + 0.5);
|
||||||
unsigned buf_i;
|
unsigned buf_i;
|
||||||
|
|
||||||
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
||||||
|
@ -759,7 +759,7 @@ static void sao_search_best_mode(const encoder_state * const encoder_state, cons
|
||||||
|
|
||||||
{
|
{
|
||||||
int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left);
|
int mode_bits = sao_mode_bits_band(band_sao.band_position, &band_sao.offsets[1], sao_top, sao_left);
|
||||||
int ddistortion = mode_bits * (int)(encoder_state->cur_lambda_cost + 0.5);
|
int ddistortion = mode_bits * (int)(encoder_state->global->cur_lambda_cost + 0.5);
|
||||||
unsigned buf_i;
|
unsigned buf_i;
|
||||||
|
|
||||||
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
for (buf_i = 0; buf_i < buf_cnt; ++buf_i) {
|
||||||
|
@ -780,7 +780,7 @@ static void sao_search_best_mode(const encoder_state * const encoder_state, cons
|
||||||
// Choose between SAO and doing nothing, taking into account the
|
// Choose between SAO and doing nothing, taking into account the
|
||||||
// rate-distortion cost of coding do nothing.
|
// rate-distortion cost of coding do nothing.
|
||||||
{
|
{
|
||||||
int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->cur_lambda_cost + 0.5);
|
int cost_of_nothing = sao_mode_bits_none(sao_top, sao_left) * (int)(encoder_state->global->cur_lambda_cost + 0.5);
|
||||||
if (sao_out->ddistortion >= cost_of_nothing) {
|
if (sao_out->ddistortion >= cost_of_nothing) {
|
||||||
sao_out->type = SAO_TYPE_NONE;
|
sao_out->type = SAO_TYPE_NONE;
|
||||||
}
|
}
|
||||||
|
@ -863,7 +863,7 @@ void sao_search_luma(const encoder_state * const encoder_state, const picture *p
|
||||||
void sao_reconstruct_frame(encoder_state * const encoder_state)
|
void sao_reconstruct_frame(encoder_state * const encoder_state)
|
||||||
{
|
{
|
||||||
vector2d lcu;
|
vector2d lcu;
|
||||||
picture * const cur_pic = encoder_state->cur_pic;
|
picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
|
|
||||||
// These are needed because SAO needs the pre-SAO pixels form left and
|
// These are needed because SAO needs the pre-SAO pixels form left and
|
||||||
// top LCUs. Single pixel wide buffers, like what search_lcu takes, would
|
// top LCUs. Single pixel wide buffers, like what search_lcu takes, would
|
||||||
|
|
52
src/search.c
52
src/search.c
|
@ -159,7 +159,7 @@ static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y
|
||||||
temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
|
temp_bitcost += cur_mv_cand ? cand2_cost : cand1_cost;
|
||||||
}
|
}
|
||||||
*bitcost = temp_bitcost;
|
*bitcost = temp_bitcost;
|
||||||
return temp_bitcost*(int32_t)(encoder_state->cur_lambda_cost+0.5);
|
return temp_bitcost*(int32_t)(encoder_state->global->cur_lambda_cost+0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -201,8 +201,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
for (i = 0; i < 7; ++i) {
|
for (i = 0; i < 7; ++i) {
|
||||||
const vector2d *pattern = &large_hexbs[i];
|
const vector2d *pattern = &large_hexbs[i];
|
||||||
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||||
(encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
@ -216,8 +216,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
// Try the 0,0 vector.
|
// Try the 0,0 vector.
|
||||||
if (!(mv.x == 0 && mv.y == 0)) {
|
if (!(mv.x == 0 && mv.y == 0)) {
|
||||||
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x,
|
||||||
(encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
@ -232,8 +232,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
for (i = 1; i < 7; ++i) {
|
for (i = 1; i < 7; ++i) {
|
||||||
const vector2d *pattern = &large_hexbs[i];
|
const vector2d *pattern = &large_hexbs[i];
|
||||||
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x,
|
||||||
(encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
@ -267,8 +267,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
for (i = 0; i < 3; ++i) {
|
for (i = 0; i < 3; ++i) {
|
||||||
const vector2d *offset = &large_hexbs[start + i];
|
const vector2d *offset = &large_hexbs[start + i];
|
||||||
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
@ -290,8 +290,8 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
for (i = 1; i < 5; ++i) {
|
for (i = 1; i < 5; ++i) {
|
||||||
const vector2d *offset = &small_hexbs[i];
|
const vector2d *offset = &small_hexbs[i];
|
||||||
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
unsigned cost = calc_sad(pic, ref, orig->x, orig->y,
|
||||||
(encoder_state->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(encoder_state->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
@ -374,7 +374,7 @@ static unsigned search_mv_full(unsigned depth,
|
||||||
*/
|
*/
|
||||||
static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu)
|
static int search_cu_inter(const encoder_state * const encoder_state, int x, int y, int depth, lcu_t *lcu)
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
uint32_t ref_idx = 0;
|
uint32_t ref_idx = 0;
|
||||||
int x_local = (x&0x3f), y_local = (y&0x3f);
|
int x_local = (x&0x3f), y_local = (y&0x3f);
|
||||||
int x_cu = x>>3;
|
int x_cu = x>>3;
|
||||||
|
@ -394,8 +394,8 @@ static int search_cu_inter(const encoder_state * const encoder_state, int x, int
|
||||||
|
|
||||||
cur_cu->inter.cost = UINT_MAX;
|
cur_cu->inter.cost = UINT_MAX;
|
||||||
|
|
||||||
for (ref_idx = 0; ref_idx < encoder_state->ref->used_size; ref_idx++) {
|
for (ref_idx = 0; ref_idx < encoder_state->global->ref->used_size; ref_idx++) {
|
||||||
picture *ref_pic = encoder_state->ref->pics[ref_idx];
|
picture *ref_pic = encoder_state->global->ref->pics[ref_idx];
|
||||||
unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
|
unsigned width_in_scu = NO_SCU_IN_LCU(ref_pic->width_in_lcu);
|
||||||
cu_info *ref_cu = &ref_pic->cu_array[y_cu * width_in_scu + x_cu];
|
cu_info *ref_cu = &ref_pic->cu_array[y_cu * width_in_scu + x_cu];
|
||||||
uint32_t temp_bitcost = 0;
|
uint32_t temp_bitcost = 0;
|
||||||
|
@ -670,7 +670,7 @@ static int search_cu_intra(encoder_state * const encoder_state,
|
||||||
const int x_px, const int y_px,
|
const int x_px, const int y_px,
|
||||||
const int depth, lcu_t *lcu)
|
const int depth, lcu_t *lcu)
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f };
|
const vector2d lcu_px = { x_px & 0x3f, y_px & 0x3f };
|
||||||
const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
|
const vector2d lcu_cu = { lcu_px.x >> 3, lcu_px.y >> 3 };
|
||||||
const int8_t cu_width = (LCU_WIDTH >> (depth));
|
const int8_t cu_width = (LCU_WIDTH >> (depth));
|
||||||
|
@ -776,7 +776,7 @@ static int lcu_get_final_cost(const encoder_state * const encoder_state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Coefficient costs
|
// Coefficient costs
|
||||||
cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->cur_lambda_cost+0.5);
|
cost += (coeff_cost + (coeff_cost>>1)) * (int32_t)(encoder_state->global->cur_lambda_cost+0.5);
|
||||||
|
|
||||||
// Calculate actual bit costs for coding the coeffs
|
// Calculate actual bit costs for coding the coeffs
|
||||||
// RDO
|
// RDO
|
||||||
|
@ -838,11 +838,11 @@ static int lcu_get_final_cost(const encoder_state * const encoder_state,
|
||||||
coeff_cost += get_coeff_cost(encoder_state, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
|
coeff_cost += get_coeff_cost(encoder_state, coeff_temp_v, blockwidth, 2, chroma_scan_mode);
|
||||||
}
|
}
|
||||||
// Multiply bit count with lambda to get RD-cost
|
// Multiply bit count with lambda to get RD-cost
|
||||||
cost += coeff_cost * (int32_t)(encoder_state->cur_lambda_cost+0.5);
|
cost += coeff_cost * (int32_t)(encoder_state->global->cur_lambda_cost+0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bitcost
|
// Bitcost
|
||||||
cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->cur_lambda_cost+0.5);
|
cost += (cur_cu->type == CU_INTER ? cur_cu->inter.bitcost : cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost)*(int32_t)(encoder_state->global->cur_lambda_cost+0.5);
|
||||||
|
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
@ -859,7 +859,7 @@ static int lcu_get_final_cost(const encoder_state * const encoder_state,
|
||||||
*/
|
*/
|
||||||
static int search_cu(encoder_state * const encoder_state, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
|
static int search_cu(encoder_state * const encoder_state, int x, int y, int depth, lcu_t work_tree[MAX_PU_DEPTH])
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
int cu_width = LCU_WIDTH >> depth;
|
int cu_width = LCU_WIDTH >> depth;
|
||||||
int cost = MAX_INT;
|
int cost = MAX_INT;
|
||||||
cu_info *cur_cu;
|
cu_info *cur_cu;
|
||||||
|
@ -883,7 +883,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
y + cu_width <= cur_pic->height)
|
y + cu_width <= cur_pic->height)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (cur_pic->slicetype != SLICE_I &&
|
if (encoder_state->global->slicetype != SLICE_I &&
|
||||||
depth >= MIN_INTER_SEARCH_DEPTH &&
|
depth >= MIN_INTER_SEARCH_DEPTH &&
|
||||||
depth <= MAX_INTER_SEARCH_DEPTH)
|
depth <= MAX_INTER_SEARCH_DEPTH)
|
||||||
{
|
{
|
||||||
|
@ -911,7 +911,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
|
intra_recon_lcu(encoder_state, x, y, depth,&work_tree[depth], cur_pic->width, cur_pic->height);
|
||||||
} else if (cur_cu->type == CU_INTER) {
|
} else if (cur_cu->type == CU_INTER) {
|
||||||
int cbf;
|
int cbf;
|
||||||
inter_recon_lcu(encoder_state, encoder_state->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
|
inter_recon_lcu(encoder_state, encoder_state->global->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
|
||||||
encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]);
|
encode_transform_tree(encoder_state, x, y, depth, &work_tree[depth]);
|
||||||
|
|
||||||
cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
|
cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
|
||||||
|
@ -933,7 +933,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
// Recursively split all the way to max search depth.
|
// Recursively split all the way to max search depth.
|
||||||
if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
|
if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) {
|
||||||
int half_cu = cu_width / 2;
|
int half_cu = cu_width / 2;
|
||||||
int split_cost = (int)(4.5 * encoder_state->cur_lambda_cost);
|
int split_cost = (int)(4.5 * encoder_state->global->cur_lambda_cost);
|
||||||
int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
|
int cbf = cbf_is_set(cur_cu->cbf.y, depth) || cbf_is_set(cur_cu->cbf.u, depth) || cbf_is_set(cur_cu->cbf.v, depth);
|
||||||
|
|
||||||
// If skip mode was selected for the block, skip further search.
|
// If skip mode was selected for the block, skip further search.
|
||||||
|
@ -970,7 +970,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
|
||||||
*/
|
*/
|
||||||
static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
|
static void init_lcu_t(const encoder_state * const encoder_state, const int x, const int y, lcu_t *lcu, const yuv_t *hor_buf, const yuv_t *ver_buf)
|
||||||
{
|
{
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
|
|
||||||
// Copy reference cu_info structs from neighbouring LCUs.
|
// Copy reference cu_info structs from neighbouring LCUs.
|
||||||
{
|
{
|
||||||
|
@ -1050,7 +1050,7 @@ static void init_lcu_t(const encoder_state * const encoder_state, const int x, c
|
||||||
|
|
||||||
// Copy LCU pixels.
|
// Copy LCU pixels.
|
||||||
{
|
{
|
||||||
const picture * const pic = encoder_state->cur_pic;
|
const picture * const pic = encoder_state->tile->cur_pic;
|
||||||
int pic_width = cur_pic->width;
|
int pic_width = cur_pic->width;
|
||||||
int x_max = MIN(x + LCU_WIDTH, pic_width) - x;
|
int x_max = MIN(x + LCU_WIDTH, pic_width) - x;
|
||||||
int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y;
|
int y_max = MIN(y + LCU_WIDTH, cur_pic->height) - y;
|
||||||
|
@ -1080,7 +1080,7 @@ static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x
|
||||||
{
|
{
|
||||||
const int x_cu = x_px >> MAX_DEPTH;
|
const int x_cu = x_px >> MAX_DEPTH;
|
||||||
const int y_cu = y_px >> MAX_DEPTH;
|
const int y_cu = y_px >> MAX_DEPTH;
|
||||||
const picture * const cur_pic = encoder_state->cur_pic;
|
const picture * const cur_pic = encoder_state->tile->cur_pic;
|
||||||
const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH;
|
const int cu_array_width = cur_pic->width_in_lcu << MAX_DEPTH;
|
||||||
cu_info *const cu_array = cur_pic->cu_array;
|
cu_info *const cu_array = cur_pic->cu_array;
|
||||||
|
|
||||||
|
@ -1100,7 +1100,7 @@ static void copy_lcu_to_cu_data(const encoder_state * const encoder_state, int x
|
||||||
|
|
||||||
// Copy pixels to picture.
|
// Copy pixels to picture.
|
||||||
{
|
{
|
||||||
picture * const pic = encoder_state->cur_pic;
|
picture * const pic = encoder_state->tile->cur_pic;
|
||||||
const int pic_width = pic->width;
|
const int pic_width = pic->width;
|
||||||
const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px;
|
const int x_max = MIN(x_px + LCU_WIDTH, pic_width) - x_px;
|
||||||
const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px;
|
const int y_max = MIN(y_px + LCU_WIDTH, pic->height) - y_px;
|
||||||
|
|
|
@ -634,7 +634,7 @@ void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_
|
||||||
int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2];
|
int32_t delta_u[LCU_WIDTH*LCU_WIDTH>>2];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
|
int32_t qp_scaled = get_scaled_qp(type, encoder_state->global->QP, 0);
|
||||||
|
|
||||||
//New block for variable definitions
|
//New block for variable definitions
|
||||||
{
|
{
|
||||||
|
@ -646,7 +646,7 @@ void quant(const encoder_state * const encoder_state, int16_t *coef, int16_t *q_
|
||||||
|
|
||||||
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
|
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; //!< Represents scaling through forward transform
|
||||||
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
|
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
|
||||||
int32_t add = ((encoder_state->cur_pic->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
|
int32_t add = ((encoder_state->global->slicetype == SLICE_I) ? 171 : 85) << (q_bits - 9);
|
||||||
|
|
||||||
int32_t q_bits8 = q_bits - 8;
|
int32_t q_bits8 = q_bits - 8;
|
||||||
for (n = 0; n < width * height; n++) {
|
for (n = 0; n < width * height; n++) {
|
||||||
|
@ -762,7 +762,7 @@ void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t
|
||||||
int32_t n;
|
int32_t n;
|
||||||
int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2);
|
int32_t transform_shift = 15 - encoder->bitdepth - (g_convert_to_bit[ width ] + 2);
|
||||||
|
|
||||||
int32_t qp_scaled = get_scaled_qp(type, encoder_state->QP, 0);
|
int32_t qp_scaled = get_scaled_qp(type, encoder_state->global->QP, 0);
|
||||||
|
|
||||||
shift = 20 - QUANT_SHIFT - transform_shift;
|
shift = 20 - QUANT_SHIFT - transform_shift;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue