diff --git a/README.md b/README.md index 2daa0fae..5d36012e 100644 --- a/README.md +++ b/README.md @@ -156,11 +156,20 @@ Video structure: - frametile: Constrain within the tile. - frametilemargin: Constrain even more. --roi : Use a delta QP map for region of interest. - Reads an array of delta QP values from a text - file. The file format is: width and height of - the QP delta map followed by width*height delta - QP values in raster order. The map can be of any - size and will be scaled to the video size. + Reads an array of delta QP values from a file. + Text and binary files are supported and detected + from the file extension (.txt/.bin). If a known + extension is not found, the file is treated as + a text file. The file can include one or many + ROI frames each in the following format: + width and height of the QP delta map followed + by width * height delta QP values in raster + order. In binary format, width and height are + 32-bit integers whereas the delta QP values are + signed 8-bit values. The map can be of any size + and will be scaled to the video size. The file + reading will loop if end of the file is reached. + See roi.txt in the examples folder. --set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26. in PPS and slice_qp_delta in slize header zero. --(no-)erp-aqp : Use adaptive QP for 360 degree video with diff --git a/configure.ac b/configure.ac index 832b584d..178a9b3d 100644 --- a/configure.ac +++ b/configure.ac @@ -22,8 +22,8 @@ AC_CONFIG_SRCDIR([src/encmain.c]) # - Increment when making new releases and major or minor was not changed since last release. # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html -ver_major=6 -ver_minor=6 +ver_major=7 +ver_minor=0 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS diff --git a/doc/kvazaar.1 b/doc/kvazaar.1 index 93def73f..c5883b84 100644 --- a/doc/kvazaar.1 +++ b/doc/kvazaar.1 @@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "October 2021" "kvazaar v2.1.0" "User Commands" +.TH KVAZAAR "1" "February 2022" "kvazaar v2.1.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -180,11 +180,20 @@ Constrain movement vectors. [none] .TP \fB\-\-roi Use a delta QP map for region of interest. -Reads an array of delta QP values from a text -file. The file format is: width and height of -the QP delta map followed by width*height delta -QP values in raster order. The map can be of any -size and will be scaled to the video size. +Reads an array of delta QP values from a file. +Text and binary files are supported and detected +from the file extension (.txt/.bin). If a known +extension is not found, the file is treated as +a text file. The file can include one or many +ROI frames each in the following format: +width and height of the QP delta map followed +by width * height delta QP values in raster +order. In binary format, width and height are +32\-bit integers whereas the delta QP values are +signed 8\-bit values. The map can be of any size +and will be scaled to the video size. The file +reading will loop if end of the file is reached. +See roi.txt in the examples folder. .TP \fB\-\-set\-qp\-in\-cu Set QP at CU level keeping pic_init_qp_minus26. diff --git a/src/cfg.c b/src/cfg.c index 6a1fcf40..69745a96 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -139,11 +139,9 @@ int kvz_config_init(kvz_config *cfg) cfg->gop_lp_definition.t = 1; cfg->open_gop = true; - cfg->roi.width = 0; - cfg->roi.height = 0; - cfg->roi.dqps = NULL; - - cfg->roi_file = NULL; + cfg->roi.file_path = NULL; + cfg->roi.format = KVZ_ROI_TXT; + cfg->set_qp_in_cu = false; cfg->erp_aqp = false; @@ -192,12 +190,11 @@ int kvz_config_destroy(kvz_config *cfg) { if (cfg) { FREE_POINTER(cfg->cqmfile); - FREE_POINTER(cfg->roi_file); + FREE_POINTER(cfg->roi.file_path); FREE_POINTER(cfg->fast_coeff_table_fn); FREE_POINTER(cfg->tiles_width_split); FREE_POINTER(cfg->tiles_height_split); FREE_POINTER(cfg->slice_addresses_in_ts); - FREE_POINTER(cfg->roi.dqps); FREE_POINTER(cfg->optional_key); FREE_POINTER(cfg->fastrd_learning_outdir_fn); } @@ -1244,70 +1241,29 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) } else if OPT("implicit-rdpcm") cfg->implicit_rdpcm = (bool)atobool(value); + else if OPT("roi") { - // The ROI description is as follows: - // First number is width, second number is height, - // then follows width * height number of dqp values. - FILE* f = fopen(value, "rb"); - if (!f) { - fprintf(stderr, "Could not open ROI file.\n"); + static enum kvz_roi_format const formats[] = { KVZ_ROI_TXT, KVZ_ROI_BIN }; + static const char * const format_names[] = { "txt", "bin", NULL }; + + char *roi_file = strdup(value); + if (!roi_file) { + fprintf(stderr, "Failed to allocate memory for ROI file name.\n"); return 0; } + FREE_POINTER(cfg->roi.file_path); + cfg->roi.file_path = roi_file; - int width = 0; - int height = 0; - if (!fscanf(f, "%d", &width) || !fscanf(f, "%d", &height)) { - fprintf(stderr, "Failed to read ROI size.\n"); - fclose(f); - return 0; + // Get file extension or the substring after the last dot + char *maybe_extension = strrchr(cfg->roi.file_path, '.'); + if (!maybe_extension) { + cfg->roi.format = KVZ_ROI_TXT; + } else { + maybe_extension++; + int8_t format; + bool unknown_format = !parse_enum(maybe_extension, format_names, &format); + cfg->roi.format = unknown_format ? KVZ_ROI_TXT : formats[format]; } - - if (width <= 0 || height <= 0) { - fprintf(stderr, "Invalid ROI size: %dx%d.\n", width, height); - fclose(f); - return 0; - } - - if (width > 10000 || height > 10000) { - fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n"); - fclose(f); - return 0; - } - - const unsigned size = width * height; - int8_t *dqp_array = calloc((size_t)size, sizeof(cfg->roi.dqps[0])); - if (!dqp_array) { - fprintf(stderr, "Failed to allocate memory for ROI table.\n"); - fclose(f); - return 0; - } - - FREE_POINTER(cfg->roi.dqps); - cfg->roi.dqps = dqp_array; - cfg->roi.width = width; - cfg->roi.height = height; - - for (int i = 0; i < size; ++i) { - int number; // Need a pointer to int for fscanf - if (fscanf(f, "%d", &number) != 1) { - fprintf(stderr, "Reading ROI file failed.\n"); - fclose(f); - return 0; - } - dqp_array[i] = CLIP(-51, 51, number); - } - - fclose(f); - } - else if OPT("roi-file") - { - char* roifile = strdup(value); - if (!roifile) { - fprintf(stderr, "Failed to allocate memory for roi file name.\n"); - return 0; - } - FREE_POINTER(cfg->roi_file); - cfg->roi_file = roifile; } else if OPT("set-qp-in-cu") { cfg->set_qp_in_cu = (bool)atobool(value); diff --git a/src/cli.c b/src/cli.c index 4aa86794..b32c10c0 100644 --- a/src/cli.c +++ b/src/cli.c @@ -501,11 +501,20 @@ void print_help(void) " - frametile: Constrain within the tile.\n" " - frametilemargin: Constrain even more.\n" " --roi : Use a delta QP map for region of interest.\n" - " Reads an array of delta QP values from a text\n" - " file. The file format is: width and height of\n" - " the QP delta map followed by width*height delta\n" - " QP values in raster order. The map can be of any\n" - " size and will be scaled to the video size.\n" + " Reads an array of delta QP values from a file.\n" + " Text and binary files are supported and detected\n" + " from the file extension (.txt/.bin). If a known\n" + " extension is not found, the file is treated as\n" + " a text file. The file can include one or many\n" + " ROI frames each in the following format:\n" + " width and height of the QP delta map followed\n" + " by width * height delta QP values in raster\n" + " order. In binary format, width and height are\n" + " 32-bit integers whereas the delta QP values are\n" + " signed 8-bit values. The map can be of any size\n" + " and will be scaled to the video size. The file\n" + " reading will loop if end of the file is reached.\n" + " See roi.txt in the examples folder.\n" " --set-qp-in-cu : Set QP at CU level keeping pic_init_qp_minus26.\n" " in PPS and slice_qp_delta in slize header zero.\n" " --(no-)erp-aqp : Use adaptive QP for 360 degree video with\n" diff --git a/src/encmain.c b/src/encmain.c index 37f1c121..6d172442 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -147,7 +147,6 @@ typedef struct { // Parameters passed from main thread to input thread. FILE* input; - FILE* roi_file; const kvz_api *api; const cmdline_opts_t *opts; const encoder_control_t *encoder; @@ -249,21 +248,6 @@ static void* input_read_thread(void* in_args) } } - if(args->roi_file) { - if (fread(&frame_in->roi, 4, 2, args->roi_file) != 2) { - fprintf(stderr, "Failed to read roi matrix size for frame: %d. Shutting down.\n", frames_read); - retval = RETVAL_FAILURE; - goto done; - } - const size_t roi_size = frame_in->roi.height*frame_in->roi.width; - frame_in->roi.roi_array = malloc(roi_size); - if(fread(frame_in->roi.roi_array, 1, roi_size, args->roi_file) != roi_size) { - fprintf(stderr, "Failed to read roi matrix for frame: %d. Shutting down.\n", frames_read); - retval = RETVAL_FAILURE; - goto done; - } - } - frames_read++; if (args->encoder->cfg.source_scan_type != 0) { @@ -510,14 +494,6 @@ int main(int argc, char *argv[]) goto exit_failure; } - if(opts->config->roi_file) { - roifile = fopen(opts->config->roi_file, "rb"); - if(roifile == NULL) { - fprintf(stderr, "Could not open roi file although it was required. Shutting down!\n"); - goto exit_failure; - } - } - #ifdef _WIN32 // Set stdin and stdout to binary for pipes. if (input == stdin) { @@ -594,7 +570,6 @@ int main(int argc, char *argv[]) .filled_input_slots = filled_input_slots, .input = input, - .roi_file = roifile, .api = api, .opts = opts, .encoder = encoder, diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 90df4dd1..03b04943 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -758,7 +758,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state, bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + half_cu; bool border = border_x || border_y; /*!< are we in any border CU */ - if (depth <= ctrl->max_qp_delta_depth) { + if (depth <= state->frame->max_qp_delta_depth) { state->must_code_qp_delta = true; } diff --git a/src/encoder.c b/src/encoder.c index dd485e6a..67751f56 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -32,9 +32,6 @@ #include "encoder.h" -// This define is required for M_PI on Windows. -#define _USE_MATH_DEFINES -#include #include #include @@ -45,14 +42,6 @@ #include "kvz_math.h" #include "fast_coeff_cost.h" -/** - * \brief Strength of QP adjustments when using adaptive QP for 360 video. - * - * Determined empirically. - */ -static const double ERP_AQP_STRENGTH = 3.0; - - static int encoder_control_init_gop_layer_weights(encoder_control_t * const); static unsigned cfg_num_threads(void) @@ -136,82 +125,6 @@ static int get_max_parallelism(const encoder_control_t *const encoder) } -/** - * \brief Return weight for 360 degree ERP video - * - * Returns the scaling factor of area from equirectangular projection to - * spherical surface. - * - * \param y y-coordinate of the pixel - * \param h height of the picture - */ -static double ws_weight(int y, int h) -{ - return cos((y - 0.5 * h + 0.5) * (M_PI / h)); -} - - - -/** - * \brief Update ROI QPs for 360 video with equirectangular projection. - * - * Writes updated ROI parameters to encoder->cfg.roi. - * - * \param encoder encoder control - * \param orig_roi original delta QPs or NULL - * \param orig_width width of orig_roi - * \param orig_height height of orig_roi - */ -static void init_erp_aqp_roi(encoder_control_t* encoder, - int8_t *orig_roi, - int32_t orig_width, - int32_t orig_height) -{ - // Update ROI with WS-PSNR delta QPs. - int height = encoder->in.height_in_lcu; - int width = orig_roi ? orig_width : 1; - - int frame_height = encoder->in.real_height; - - encoder->cfg.roi.width = width; - encoder->cfg.roi.height = height; - encoder->cfg.roi.dqps = calloc(width * height, sizeof(orig_roi[0])); - - double total_weight = 0.0; - for (int y = 0; y < frame_height; y++) { - total_weight += ws_weight(y, frame_height); - } - - for (int y_lcu = 0; y_lcu < height; y_lcu++) { - int y_orig = LCU_WIDTH * y_lcu; - int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig); - - double lcu_weight = 0.0; - for (int y = y_orig; y < y_orig + lcu_height; y++) { - lcu_weight += ws_weight(y, frame_height); - } - // Normalize. - lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height); - - int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight)); - - if (orig_roi) { - // If a ROI array already exists, we copy the existing values to the - // new array while adding qp_delta to each. - int y_roi = y_lcu * orig_height / height; - for (int x = 0; x < width; x++) { - encoder->cfg.roi.dqps[x + y_lcu * width] = - CLIP(-51, 51, orig_roi[x + y_roi * width] + qp_delta); - } - - } else { - // Otherwise, simply write qp_delta to the ROI array. - encoder->cfg.roi.dqps[y_lcu] = qp_delta; - } - } -} - - /** * \brief Allocate and initialize an encoder control structure. * @@ -353,6 +266,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) encoder->scaling_list.use_default_list = 1; } + // ROI / delta QP + if (cfg->roi.file_path) { + const char *mode[2] = { "r", "rb" }; + encoder->roi_file = fopen(cfg->roi.file_path, mode[cfg->roi.format]); + if (!encoder->roi_file) { + fprintf(stderr, "Could not open ROI file.\n"); + goto init_failed; + } + } + if (cfg->fast_coeff_table_fn) { FILE *fast_coeff_table_f = fopen(cfg->fast_coeff_table_fn, "rb"); if (fast_coeff_table_f == NULL) { @@ -396,32 +319,10 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) goto init_failed; } - if (cfg->erp_aqp) { - init_erp_aqp_roi(encoder, - cfg->roi.dqps, - cfg->roi.width, - cfg->roi.height); - - } else if (cfg->roi.dqps) { - // Copy delta QP array for ROI coding. - const size_t roi_size = encoder->cfg.roi.width * encoder->cfg.roi.height; - encoder->cfg.roi.dqps = calloc(roi_size, sizeof(cfg->roi.dqps[0])); - memcpy(encoder->cfg.roi.dqps, - cfg->roi.dqps, - roi_size * sizeof(*cfg->roi.dqps)); - - } - // NOTE: When tr_depth_inter is equal to 0, the transform is still split // for SMP and AMP partition units. encoder->tr_depth_inter = 0; - if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps || encoder->cfg.roi_file || encoder->cfg.set_qp_in_cu || encoder->cfg.vaq) { - encoder->max_qp_delta_depth = 0; - } else { - encoder->max_qp_delta_depth = -1; - } - //Tiles encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 || encoder->cfg.tiles_height_count > 1; @@ -724,7 +625,7 @@ void kvz_encoder_control_free(encoder_control_t *const encoder) FREE_POINTER(encoder->tiles_tile_id); - FREE_POINTER(encoder->cfg.roi.dqps); + FREE_POINTER(encoder->cfg.roi.file_path); FREE_POINTER(encoder->cfg.optional_key); kvz_scalinglist_destroy(&encoder->scaling_list); @@ -734,6 +635,10 @@ void kvz_encoder_control_free(encoder_control_t *const encoder) kvz_close_rdcost_outfiles(); + if (encoder->roi_file) { + fclose(encoder->roi_file); + } + free(encoder); } diff --git a/src/encoder.h b/src/encoder.h index 89f6b3a2..24a93f86 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -130,7 +130,7 @@ typedef struct encoder_control_t //! Picture weights when GOP is used. double gop_layer_weights[MAX_GOP_LAYERS]; - int8_t max_qp_delta_depth; + FILE *roi_file; int tr_depth_inter; diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 707103ad..05b934d4 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -503,10 +503,10 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream, WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); WRITE_U(stream, encoder->cfg.trskip_enable, 1, "transform_skip_enabled_flag"); - if (encoder->max_qp_delta_depth >= 0) { + if (state->frame->max_qp_delta_depth >= 0) { // Use separate QP for each LCU when rate control is enabled. WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag"); - WRITE_UE(stream, encoder->max_qp_delta_depth, "diff_cu_qp_delta_depth"); + WRITE_UE(stream, state->frame->max_qp_delta_depth, "diff_cu_qp_delta_depth"); } else { WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); } diff --git a/src/encoderstate.c b/src/encoderstate.c index 6bcce76b..6e3cf0b4 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -32,6 +32,9 @@ #include "encoderstate.h" + // This define is required for M_PI on Windows. +#define _USE_MATH_DEFINES +#include #include #include #include @@ -51,6 +54,13 @@ #include "strategies/strategies-picture.h" +/** + * \brief Strength of QP adjustments when using adaptive QP for 360 video. + * + * Determined empirically. + */ +static const double ERP_AQP_STRENGTH = 3.0; + int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) { int i; @@ -570,7 +580,7 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y); const int cu_width = LCU_WIDTH >> depth; - if (depth <= state->encoder_control->max_qp_delta_depth) { + if (depth <= state->frame->max_qp_delta_depth) { *prev_qp = -1; } @@ -650,7 +660,7 @@ static void encoder_state_worker_encode_lcu(void * opaque) encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search); - if (encoder->max_qp_delta_depth >= 0) { + if (state->frame->max_qp_delta_depth >= 0) { int last_qp = state->last_qp; int prev_qp = -1; set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp); @@ -1252,6 +1262,154 @@ static bool edge_lcu(int id, int lcus_x, int lcus_y, bool xdiv64, bool ydiv64) } } + +/** + * \brief Return weight for 360 degree ERP video + * + * Returns the scaling factor of area from equirectangular projection to + * spherical surface. + * + * \param y y-coordinate of the pixel + * \param h height of the picture + */ +static double ws_weight(int y, int h) +{ + return cos((y - 0.5 * h + 0.5) * (M_PI / h)); +} + + +/** + * \brief Update ROI QPs for 360 video with equirectangular projection. + * + * Updates the ROI parameters in frame->roi. + * + * \param encoder encoder control + * \param frame frame that will have the ROI map + */ +static void init_erp_aqp_roi(const encoder_control_t *encoder, kvz_picture *frame) +{ + int8_t *orig_roi = frame->roi.roi_array; + int32_t orig_width = frame->roi.width; + int32_t orig_height = frame->roi.height; + + // Update ROI with WS-PSNR delta QPs. + int new_height = encoder->in.height_in_lcu; + int new_width = orig_roi ? orig_width : 1; + int8_t *new_array = calloc(new_width * new_height, sizeof(orig_roi[0])); + + int frame_height = encoder->in.real_height; + + double total_weight = 0.0; + for (int y = 0; y < frame_height; y++) { + total_weight += ws_weight(y, frame_height); + } + + for (int y_lcu = 0; y_lcu < new_height; y_lcu++) { + int y_orig = LCU_WIDTH * y_lcu; + int lcu_height = MIN(LCU_WIDTH, frame_height - y_orig); + + double lcu_weight = 0.0; + for (int y = y_orig; y < y_orig + lcu_height; y++) { + lcu_weight += ws_weight(y, frame_height); + } + // Normalize. + lcu_weight = (lcu_weight * frame_height) / (total_weight * lcu_height); + + int8_t qp_delta = round(-ERP_AQP_STRENGTH * log2(lcu_weight)); + + if (orig_roi) { + // If a ROI array already exists, we copy the existing values to the + // new array while adding qp_delta to each. + int y_roi = y_lcu * orig_height / new_height; + for (int x = 0; x < new_width; x++) { + new_array[x + y_lcu * new_width] = + CLIP(-51, 51, orig_roi[x + y_roi * new_width] + qp_delta); + } + + } else { + // Otherwise, simply write qp_delta to the ROI array. + new_array[y_lcu] = qp_delta; + } + } + + // Update new values + frame->roi.width = new_width; + frame->roi.height = new_height; + frame->roi.roi_array = new_array; + FREE_POINTER(orig_roi); +} + + +static void next_roi_frame_from_file(kvz_picture *frame, FILE *file, enum kvz_roi_format format) { + // The ROI description is as follows: + // First number is width, second number is height, + // then follows width * height number of dqp values. + + // Rewind the (seekable) ROI file when end of file is reached. + // Allows a single ROI frame to be used for a whole sequence + // and looping with --loop-input. Skips possible whitespace. + if (ftell(file) != -1L) { + int c = fgetc(file); + while (format == KVZ_ROI_TXT && isspace(c)) c = fgetc(file); + ungetc(c, file); + if (c == EOF) rewind(file); + } + + int *width = &frame->roi.width; + int *height = &frame->roi.height; + + bool failed = false; + + if (format == KVZ_ROI_TXT) failed = !fscanf(file, "%d", width) || !fscanf(file, "%d", height); + if (format == KVZ_ROI_BIN) failed = fread(&frame->roi, 4, 2, file) != 2; + + if (failed) { + fprintf(stderr, "Failed to read ROI size.\n"); + fclose(file); + assert(0); + } + + if (*width <= 0 || *height <= 0) { + fprintf(stderr, "Invalid ROI size: %dx%d.\n", *width, *height); + fclose(file); + assert(0); + } + + if (*width > 10000 || *height > 10000) { + fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n"); + fclose(file); + assert(0); + } + + const unsigned size = (*width) * (*height); + int8_t *dqp_array = calloc((size_t)size, sizeof(frame->roi.roi_array[0])); + if (!dqp_array) { + fprintf(stderr, "Failed to allocate memory for ROI table.\n"); + fclose(file); + assert(0); + } + + FREE_POINTER(frame->roi.roi_array); + frame->roi.roi_array = dqp_array; + + if (format == KVZ_ROI_TXT) { + for (int i = 0; i < size; ++i) { + int number; // Need a pointer to int for fscanf + if (fscanf(file, "%d", &number) != 1) { + fprintf(stderr, "Reading ROI file failed.\n"); + fclose(file); + assert(0); + } + dqp_array[i] = CLIP(-51, 51, number); + } + } else if (format == KVZ_ROI_BIN) { + if (fread(dqp_array, 1, size, file) != size) { + fprintf(stderr, "Reading ROI file failed.\n"); + assert(0); + } + } +} + static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_picture* frame) { assert(state->type == ENCODER_STATE_TYPE_MAIN); @@ -1265,6 +1423,21 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict state->tile->frame->height ); + // ROI / delta QP maps + if (frame->roi.roi_array && cfg->roi.file_path) { + assert(0 && "Conflict: Other ROI data was supplied when a ROI file was specified."); + } + + // Read frame from the file. If no file is specified, + // ROI data should be already set by the application. + if (cfg->roi.file_path) { + next_roi_frame_from_file(frame, state->encoder_control->roi_file, cfg->roi.format); + } + + if (cfg->erp_aqp) { + init_erp_aqp_roi(state->encoder_control, state->tile->frame->source); + } + // Variance adaptive quantization if (cfg->vaq) { const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400; @@ -1351,6 +1524,12 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict } // Variance adaptive quantization - END + if (cfg->target_bitrate > 0 || frame->roi.roi_array || cfg->set_qp_in_cu || cfg->vaq) { + state->frame->max_qp_delta_depth = 0; + } else { + state->frame->max_qp_delta_depth = -1; + } + // Use this flag to handle closed gop irap picture selection. // If set to true, irap is already set and we avoid // setting it based on the intra period @@ -1603,10 +1782,9 @@ lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y) int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp) { - const encoder_control_t *ctrl = state->encoder_control; const cu_array_t *cua = state->tile->frame->cu_array; // Quantization group width - const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth); + const int qg_width = LCU_WIDTH >> MIN(state->frame->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth); // Coordinates of the top-left corner of the quantization group const int x_qg = x & ~(qg_width - 1); diff --git a/src/encoderstate.h b/src/encoderstate.h index a65e8b35..00885aa4 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -180,6 +180,8 @@ typedef struct encoder_state_config_frame_t { */ double *aq_offsets; + int8_t max_qp_delta_depth; + /** * \brief Whether next NAL is the first NAL in the access unit. */ @@ -380,10 +382,10 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state) */ static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth) { - if (state->encoder_control->max_qp_delta_depth < 0) return false; + if (state->frame->max_qp_delta_depth < 0) return false; const int cu_width = LCU_WIDTH >> depth; - const int qg_width = LCU_WIDTH >> state->encoder_control->max_qp_delta_depth; + const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth; const int right = x + cu_width; const int bottom = y + cu_width; return (right % qg_width == 0 || right >= state->tile->frame->width) && diff --git a/src/filter.c b/src/filter.c index d3bdfb7b..510b9ea6 100644 --- a/src/filter.c +++ b/src/filter.c @@ -274,7 +274,7 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir) static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir) { - if (state->encoder_control->max_qp_delta_depth < 0) { + if (state->frame->max_qp_delta_depth < 0) { return state->qp; } diff --git a/src/kvazaar.h b/src/kvazaar.h index 967a3c67..73c7538d 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -250,6 +250,11 @@ enum kvz_file_format KVZ_FORMAT_YUV = 2 }; +enum kvz_roi_format +{ + KVZ_ROI_TXT = 0, + KVZ_ROI_BIN = 1 +}; // Map from input format to chroma format. #define KVZ_FORMAT2CSP(format) ((enum kvz_chroma_format)"\0\1\2\3"[format]) @@ -388,12 +393,9 @@ typedef struct kvz_config int32_t implicit_rdpcm; /*!< \brief Enable implicit residual DPCM. */ struct { - int32_t width; - int32_t height; - int8_t *dqps; - } roi; /*!< \since 3.14.0 \brief Map of delta QPs for region of interest coding. */ - - char *roi_file; + char *file_path; + enum kvz_roi_format format; + } roi; /*!< \brief Specify delta QPs for region of interest coding. */ unsigned slices; /*!< \since 3.15.0 \brief How to map slices to frame. */ @@ -764,6 +766,9 @@ typedef struct kvz_api { * the bitstream, length of the bitstream, the reconstructed frame, the * original frame and frame info in data_out, len_out, pic_out, src_out and * info_out, respectively. Otherwise, set the output parameters to NULL. + * + * Region of interest (ROI) / delta QP map can be specified in the input + * picture's ROI field but only when a ROI file is not used. * * After passing all of the input frames, the caller should keep calling this * function with pic_in set to NULL, until no more data is returned in the diff --git a/src/rate_control.c b/src/rate_control.c index e5620fb0..64983ec1 100644 --- a/src/rate_control.c +++ b/src/rate_control.c @@ -1085,7 +1085,7 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, const encoder_control_t * const ctrl = state->encoder_control; lcu_stats_t *lcu = kvz_get_lcu_stats(state, pos.x, pos.y); - if (ctrl->cfg.roi.dqps != NULL || state->tile->frame->source->roi.roi_array) { + if (state->tile->frame->source->roi.roi_array) { vector2d_t lcu_vec = { pos.x + state->tile->lcu_offset_x, pos.y + state->tile->lcu_offset_y @@ -1101,26 +1101,7 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state, } state->qp = CLIP_TO_QP(state->frame->QP + dqp); state->lambda = qp_to_lambda(state, state->qp); - state->lambda_sqrt = sqrt(state->frame->lambda); - } - else if (ctrl->cfg.roi.dqps != NULL) { - vector2d_t lcu = { - pos.x + state->tile->lcu_offset_x, - pos.y + state->tile->lcu_offset_y - }; - vector2d_t roi = { - lcu.x * ctrl->cfg.roi.width / ctrl->in.width_in_lcu, - lcu.y * ctrl->cfg.roi.height / ctrl->in.height_in_lcu - }; - int roi_index = roi.x + roi.y * ctrl->cfg.roi.width; - int dqp = ctrl->cfg.roi.dqps[roi_index]; - if (dqp != 0) { - pos.x = 0; - } - state->qp = CLIP_TO_QP(state->frame->QP + dqp); - state->lambda = qp_to_lambda(state, state->qp); state->lambda_sqrt = sqrt(state->lambda); - } else if (ctrl->cfg.target_bitrate > 0) { const uint32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) *