diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..6110e3a0 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,47 @@ +# Use Kvazaar CI base image which includes the build tools and ffmpeg + hmdec in ${HOME}/bin +image: ultravideo/kvazaar_ci_base:latest + +# Build and test kvazaar +test-kvazaar: &test-template + stage: test + script: + - export PATH="${HOME}/bin:${PATH}" + - ./autogen.sh + - ./configure --enable-werror || (cat config.log && false) + - make --jobs=8 + - make check --jobs=8 VERBOSE=1 + artifacts: + paths: + - src/kvazaar + - src/.libs + expire_in: 1 week + +test-asan: + <<: *test-template + variables: + CFLAGS: '-fsanitize=address' + # LeakSanitizer doesn't work inside the container because it requires + # ptrace so we disable it. + ASAN_OPTIONS: 'detect_leaks=0' + # AddressSanitizer adds some extra symbols so we expect a failure from + # the external symbols test. + XFAIL_TESTS: test_external_symbols.sh + +test-tsan: + <<: *test-template + variables: + CFLAGS: '-fsanitize=thread' + +test-ubsan: + <<: *test-template + variables: + CFLAGS: '-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=alignment' + +test-valgrind: + <<: *test-template + variables: + KVAZAAR_OVERRIDE_angular_pred: generic + KVAZAAR_OVERRIDE_sao_band_ddistortion: generic + KVAZAAR_OVERRIDE_sao_edge_ddistortion: generic + KVAZAAR_OVERRIDE_calc_sao_edge_dir: generic + KVZ_TEST_VALGRIND: 1 diff --git a/.travis.yml b/.travis.yml index db8faa38..d54c9d98 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,16 @@ matrix: include: - compiler: clang + env: KVZ_TEST_VALGRIND=1 + + - compiler: clang + env: CFLAGS='-fsanitize=thread' + + - compiler: clang + env: CFLAGS='-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=alignment' + - compiler: gcc-4.8 + env: CFLAGS='-fsanitize=address' # We have some Mac specific code and Mac sometimes has odd build issues. - os: osx @@ -27,14 +36,15 @@ matrix: install: true script: - ./autogen.sh - - ./configure --enable-werror + - ./configure --enable-werror || (cat config.log && false) - make --jobs=2 V=1 + - make check TESTS=kvazaar_tests install: bash .travis-install.bash script: - ./autogen.sh - - ./configure --enable-werror + - ./configure --enable-werror || (cat config.log && false) - make --jobs=2 V=1 - make check VERBOSE=1 diff --git a/README.md b/README.md index e48f277d..968e98ea 100644 --- a/README.md +++ b/README.md @@ -53,14 +53,14 @@ Usage: kvazaar -i --input-res x -o Required: - -i, --input : Input file + -i, --input : Input file --input-res : Input resolution [auto] - auto: detect from file name - x: width times height - -o, --output : Output file + - auto: Detect from file name. + - x: width times height + -o, --output : Output file Presets: - --preset= : Set options to a preset [medium] + --preset : Set options to a preset [medium] - ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow placebo @@ -68,160 +68,175 @@ Presets: Input: -n, --frames : Number of frames to code [all] --seek : First frame to code [0] - --input-fps [/] : Framerate of the input video [25.0] - --source-scan-type : Set source scan type [progressive]. - - progressive: progressive scan - - tff: top field first - - bff: bottom field first - --input-format : P420 or P400 - --input-bitdepth : 8-16 - --loop-input : Re-read input file forever + --input-fps [/] : Frame rate of the input video [25] + --source-scan-type : Source scan type [progressive] + - progressive: Progressive scan + - tff: Top field first + - bff: Bottom field first + --input-format : P420 or P400 [P420] + --input-bitdepth : 8-16 [8] + --loop-input : Re-read input file forever. Options: - --help : Print this help message and exit - --version : Print version information and exit - --aud : Use access unit delimiters - --debug : Output encoders reconstruction. - --cpuid : Disable runtime cpu optimizations with value 0. - --hash : Decoded picture hash [checksum] + --help : Print this help message and exit. + --version : Print version information and exit. + --(no-)aud : Use access unit delimiters. [disabled] + --debug : Output internal reconstruction. + --(no-)cpuid : Enable runtime CPU optimizations. [enabled] + --hash : Decoded picture hash [checksum] - none: 0 bytes - checksum: 18 bytes - md5: 56 bytes - --no-psnr : Don't calculate PSNR for frames - --no-info : Don't add encoder info SEI. + --(no-)psnr : Calculate PSNR for frames. [enabled] + --(no-)info : Add encoder info SEI. [enabled] + --crypto : Selective encryption. Crypto support must be + enabled at compile-time. Can be 'on' or 'off' or + a list of features separated with a '+'. [off] + - on: Enable all encryption features. + - off: Disable selective encryption. + - mvs: Motion vector magnitudes. + - mv_signs: Motion vector signs. + - trans_coeffs: Coefficient magnitudes. + - trans_coeff_signs: Coefficient signs. + - intra_pred_modes: Intra prediction modes. + --key : Encryption key [16,213,27,56,255,127,242,112, + 97,126,197,204,25,59,38,30] Video structure: - -q, --qp : Quantization Parameter [32] - -p, --period : Period of intra pictures [0] - - 0: only first picture is intra - - 1: all pictures are intra - - 2-N: every Nth picture is intra - --vps-period : Specify how often the video parameter set is - re-sent. [0] - - 0: only send VPS with the first frame - - N: send VPS with every Nth intra frame - -r, --ref : Reference frames, range 1..15 [3] - --gop : Definition of GOP structure [0] - - 0: disabled + -q, --qp : Quantization parameter [22] + -p, --period : Period of intra pictures [64] + - 0: Only first picture is intra. + - 1: All pictures are intra. + - N: Every Nth picture is intra. + --vps-period : How often the video parameter set is re-sent [0] + - 0: Only send VPS with the first frame. + - N: Send VPS with every Nth intra frame. + -r, --ref : Number of reference frames, in range 1..15 [4] + --gop : GOP structure [8] + - 0: Disabled - 8: B-frame pyramid of length 8 - - lp-: lp-gop definition - (e.g. lp-g8d4t2, see README) - --cqmfile : Custom Quantization Matrices from a file - --bitrate : Target bitrate. [0] - - 0: disable rate-control - - N: target N bits per second - --lossless : Use lossless coding - --mv-constraint : Constrain movement vectors - - none: no constraint - - frametile: constrain within the tile - - frametilemargin: constrain even more - --roi : Use a delta QP map for region of interest - Read an array of delta QP values from - a file, where the first two values are the - width and height, followed by width*height - delta QP values in raster order. - The delta QP map can be any size or aspect - ratio, and will be mapped to LCU's. - --(no-)erp-aqp : Use adaptive QP for 360 video with - equirectangular projection + - lp-: Low-delay P-frame GOP + (e.g. lp-g8d4t2, see README) + --cqmfile : Read custom quantization matrices from a file. + --bitrate : Target bitrate [0] + - 0: Disable rate control. + - N: Target N bits per second. + --(no-)lossless : Use lossless coding. [disabled] + --mv-constraint : Constrain movement vectors. [none] + - none: No constraint + - frametile: Constrain within the tile. + - frametilemargin: Constrain even more. + --roi : Use a delta QP map for region of interest. + Reads an array of delta QP values from a text + file. The file format is: width and height of + the QP delta map followed by width*height delta + QP values in raster order. The map can be of any + size and will be scaled to the video size. + --(no-)erp-aqp : Use adaptive QP for 360 degree video with + equirectangular projection. [disabled] --level : Use the given HEVC level in the output and give - an error if the input doesn't fit to it's - limits [6.2] - Allowed values are 1, 2, 2.1, 3, 3.1, 4, 4.1 - 5, 5.1, 5.2, 6, 6.1 and 6.2. The dot is - optional. - --force-level : Same as --level, except instead of errors you - get warnings - --high-tier : Used with --level. Tells the encoder to use - high-tier bitrate limits instead of the - main-tier limits during encoding. + an error if level limits are exceeded. [6.2] + - 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, + 6.1, 6.2 + --force-level : Same as --level but warnings instead of errors. + --high-tier : Used with --level. Use high tier bitrate limits + instead of the main tier limits during encoding. + High tier requires level 4 or higher. Compression tools: - --deblock [] : Deblocking - - beta: between -6 and 6 - - tc: between -6 and 6 - --(no-)sao : Sample Adaptive Offset - --(no-)rdoq : Rate-Distortion Optimized Quantization - --(no-)signhide : Sign Hiding - --(no-)smp : Symmetric Motion Partition - --(no-)amp : Asymmetric Motion Partition - --rd : Intra mode search complexity - - 0: skip intra if inter is good enough - - 1: rough intra mode search with SATD - - 2: refine intra mode search with SSE - --(no-)mv-rdo : Rate-Distortion Optimized motion vector costs - --(no-)full-intra-search - : Try all intra modes during rough search. - --(no-)transform-skip : Transform skip - --me : Integer motion estimation + --(no-)deblock : Deblocking filter. [0:0] + - beta: Between -6 and 6 + - tc: Between -6 and 6 + --sao : Sample Adaptive Offset [full] + - off: SAO disabled + - band: Band offset only + - edge: Edge offset only + - full: Full SAO + --(no-)rdoq : Rate-distortion optimized quantization [enabled] + --(no-)rdoq-skip : Skip RDOQ for 4x4 blocks. [disabled] + --(no-)signhide : Sign hiding [disabled] + --(no-)smp : Symmetric motion partition [disabled] + --(no-)amp : Asymmetric motion partition [disabled] + --rd : Intra mode search complexity [0] + - 0: Skip intra if inter is good enough. + - 1: Rough intra mode search with SATD. + - 2: Refine intra mode search with SSE. + - 3: Try all intra modes and enable intra + chroma mode search. + --(no-)mv-rdo : Rate-distortion optimized motion vector costs + [disabled] + --(no-)full-intra-search : Try all intra modes during rough search. + [disabled] + --(no-)transform-skip : Try transform skip [disabled] + --me : Integer motion estimation algorithm [hexbs] - hexbs: Hexagon Based Search - tz: Test Zone Search - full: Full Search - full8, full16, full32, full64 - dia: Diamond Search - --me-steps : How many search steps does the motion estimation - do before cutting off [-1] - Has effect only for 'hexbs' and 'dia' - --subme : Set fractional pixel motion estimation level - - 0: only integer motion estimation + --me-steps : Motion estimation search step limit. Only + affects 'hexbs' and 'dia'. [-1] + --subme : Fractional pixel motion estimation level [4] + - 0: Integer motion estimation only - 1: + 1/2-pixel horizontal and vertical - 2: + 1/2-pixel diagonal - 3: + 1/4-pixel horizontal and vertical - 4: + 1/4-pixel diagonal - --pu-depth-inter - - : Range for sizes for inter predictions + --pu-depth-inter - : Inter prediction units sizes [0-3] - 0, 1, 2, 3: from 64x64 to 8x8 - --pu-depth-intra - : Range for sizes for intra predictions + --pu-depth-intra - : Intra prediction units sizes [1-4] - 0, 1, 2, 3, 4: from 64x64 to 4x4 - --(no-)bipred : Bi-prediction - --(no-)cu-split-termination - : CU split search termination condition - - off: Never terminate cu-split search - - zero: Terminate with zero residual - --(no-)me-early-termination : ME early termination condition - - off: Don't terminate early - - on: Terminate early - - sensitive: Terminate even earlier - --(no-)implicit-rdpcm : Implicit residual DPCM - Currently only supported with lossless coding. - --(no-)tmvp : Temporal Motion Vector Prediction - --(no-)rdoq-skip : Skips RDOQ for 4x4 blocks + --tr-depth-intra : Transform split depth for intra blocks [0] + --(no-)bipred : Bi-prediction [disabled] + --cu-split-termination : CU split search termination [zero] + - off: Don't terminate early. + - zero: Terminate when residual is zero. + --me-early-termination : Motion estimation termination [on] + - off: Don't terminate early. + - on: Terminate early. + - sensitive: Terminate even earlier. + --(no-)intra-rdo-et : Check intra modes in rdo stage only until + a zero coefficient CU is found. [disabled] + --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported + with lossless coding. [disabled] + --(no-)tmvp : Temporal motion vector prediction [enabled] Parallel processing: --threads : Number of threads to use [auto] - - 0: process everything with main thread - - N: use N threads for encoding - - auto: select based on number of cores - --owf : Frame parallelism [auto] - - N: Process N-1 frames at a time - - auto: Select automatically - --(no-)wpp : Wavefront parallel processing [enabled] + - 0: Process everything with main thread. + - N: Use N threads for encoding. + - auto: Select automatically. + --owf : Frame-level parallelism [auto] + - N: Process N+1 frames at a time. + - auto: Select automatically. + --(no-)wpp : Wavefront parallel processing. [enabled] Enabling tiles automatically disables WPP. To enable WPP with tiles, re-enable it after - enabling tiles. + enabling tiles. Enabling wpp with tiles is, + however, an experimental feature since it is + not supported in any HEVC profile. --tiles x : Split picture into width x height uniform tiles. --tiles-width-split |u : - Specifies a comma separated list of pixel - positions of tiles columns separation - coordinates. - Can also be u, followed by a single int n, in - which case it produces columns of uniform width. + - : A comma-separated list of tile + column pixel coordinates. + - u: Number of tile columns of uniform + width. --tiles-height-split |u : - Specifies a comma separated list of pixel - positions of tiles rows separation coordinates. - Can also be u followed by and a single int n, in - which case it produces rows of uniform height. - --slices : Control how slices are used - - tiles: put tiles in independent slices - - wpp: put rows in dependent slices - - tiles+wpp: do both + - : A comma-separated list of tile row + column pixel coordinates. + - u: Number of tile rows of uniform + height. + --slices : Control how slices are used. + - tiles: Put tiles in independent slices. + - wpp: Put rows in dependent slices. + - tiles+wpp: Do both. Video Usability Information: - --sar : Specify Sample Aspect Ratio + --sar : Specify sample aspect ratio --overscan : Specify crop overscan setting [undef] - undef, show, crop --videoformat : Specify video format [undef] - - component, pal, ntsc, secam, mac, undef + - undef, component, pal, ntsc, secam, mac --range : Specify color range [tv] - tv, pc --colorprim : Specify color primaries [undef] @@ -238,8 +253,8 @@ Video Usability Information: --chromaloc : Specify chroma sample location (0 to 5) [0] Deprecated parameters: (might be removed at some point) - -w, --width : Use --input-res - -h, --height : Use --input-res + -w, --width : Use --input-res. + -h, --height : Use --input-res. ``` [comment]: # (END KVAZAAR HELP MESSAGE) @@ -268,24 +283,27 @@ where the names have been abbreviated to fit the layout in GitHub. | | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p | | -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | -| rd | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | -| pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-4 | 1-4 | -| pu-depth-inter | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | +| rd | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 2 | +| pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-4 | 1-4 | 1-4 | 1-4 | 1-4 | +| pu-depth-inter | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | 0-3 | 0-3 | | me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz | -| ref | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 4 | +| gop | g4d4t1| g4d4t1| g4d4t1| g4d4t1| g4d4t1| 8 | 8 | 8 | 8 | 8 | +| ref | 1 | 1 | 1 | 1 | 2 | 4 | 4 | 4 | 4 | 4 | +| bipred | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | | deblock | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | -| signhide | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | -| subme | 0 | 0 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 | -| sao | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| signhide | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | +| subme | 2 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | +| sao | off | full | full | full | full | full | full | full | full | full | | rdoq | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | -| rdoq-skip | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | +| rdoq-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | | transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | -| smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | | amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off | -| me-early-termination | sens. | sens. | sens. | sens. | on | on | on | on | on | off | +| me-early-termination | sens. | sens. | sens. | sens. | sens. | on | on | off | off | off | +| intra-rdo-et | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ## Kvazaar library @@ -352,11 +370,11 @@ For other examples, see [Dockerfile](./Dockerfile) ### Visualization (Windows only) -Branch `visualizer` has a visual studio project, which can be compiled to enable visualization feature in Kvazaar. +Compiling `kvazaar_cli` project in the `visualizer` branch results in a Kvazaar executable with visualization enabled. Additional Requirements: [`SDL2`](https://www.libsdl.org/download-2.0.php), [`SDL2-ttf`](https://www.libsdl.org/projects/SDL_ttf/). -Directory `visualizer_extras` is expected to be found from the same directory level as the kvazaar project directory. Inside should be directories `include` and `lib` found from the development library zip packages. +Directory `visualizer_extras` has to be added into the same directory level as the kvazaar project directory. Inside should be directories `include` and `lib` found from the development library zip packages. `SDL2.dll`, `SDL2_ttf.dll`, `libfreetype-6.dll`, `zlib1.dll`, and `pthreadVC2.dll` should be placed in the working directory (i.e. the folder the `kvazaar.exe` is in after compiling the `kvazaar_cli` project/solution) when running the visualizer. The required `.dll` can be found in the aforementioned `lib`-folder (`lib\x64`) and the dll folder inside the pthreads folder (see `Required libraries`). diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj b/build/kvazaar_lib/kvazaar_lib.vcxproj index a0581833..d49c477b 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj @@ -84,7 +84,11 @@ ARCH_X86_64=1;%(Defines) +<<<<<<< HEAD $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm; +======= + $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm; +>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd $(SolutionDir)..\..\pthreads.2\include;$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories);$(SolutionDir)..\src\strategies; @@ -93,7 +97,11 @@ ARCH_X86_64=0;PREFIX +<<<<<<< HEAD $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm; +======= + $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm; +>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd $(SolutionDir)..\..\pthreads.2\lib\x86 @@ -108,7 +116,11 @@ ARCH_X86_64=0;PREFIX +<<<<<<< HEAD $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm; +======= + $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm; +>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd $(SolutionDir)..\..\pthreads.2\lib\x86 @@ -123,7 +135,11 @@ ARCH_X86_64=1;%(Defines) +<<<<<<< HEAD $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm; +======= + $(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm; +>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd $(SolutionDir)..\..\pthreads.2\lib\x64 diff --git a/configure.ac b/configure.ac index 78791a32..c481bb03 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c]) # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=4 -ver_minor=0 +ver_minor=1 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS @@ -45,15 +45,17 @@ AC_SYS_LARGEFILE LT_INIT([win32-dll]) +AX_CHECK_COMPILE_FLAG([-maltivec],[flag_altivec="true"]) AX_CHECK_COMPILE_FLAG([-mavx2], [flag_avx2="true"]) AX_CHECK_COMPILE_FLAG([-msse4.1], [flag_sse4_1="true"]) AX_CHECK_COMPILE_FLAG([-msse2], [flag_sse2="true"]) +AM_CONDITIONAL([HAVE_ALTIVEC], [test x"$flag_altivec" = x"true"]) AM_CONDITIONAL([HAVE_AVX2], [test x"$flag_avx2" = x"true"]) AM_CONDITIONAL([HAVE_SSE4_1], [test x"$flag_sse4_1" = x"true"]) AM_CONDITIONAL([HAVE_SSE2], [test x"$flag_sse2" = x"true"]) -KVZ_CFLAGS="-Wall -Wtype-limits -Wvla -I$srcdir/src -I$srcdir/src/extras -ftree-vectorize -fvisibility=hidden" +KVZ_CFLAGS="-Wall -Wextra -Wvla -Wno-sign-compare -Wno-unused-parameter -I$srcdir/src -I$srcdir/src/extras -ftree-vectorize -fvisibility=hidden" CFLAGS="$KVZ_CFLAGS $CFLAGS" AC_SEARCH_LIBS([log], [m c], [], [exit 1]) diff --git a/doc/kvazaar.1 b/doc/kvazaar.1 index 7b234b9b..c936d27c 100644 --- a/doc/kvazaar.1 +++ b/doc/kvazaar.1 @@ -1,24 +1,24 @@ -.TH KVAZAAR "1" "November 2017" "kvazaar v1.2.0" "User Commands" +.TH KVAZAAR "1" "June 2018" "kvazaar v1.2.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS \fBkvazaar \fR\-i \-\-input\-res x \-o .SH DESCRIPTION .TP -\fB\-i\fR, \fB\-\-input +\fB\-i\fR, \fB\-\-input Input file .TP \fB\-\-input\-res Input resolution [auto] -auto: detect from file name -x: width times height + \- auto: Detect from file name. + \- x: width times height .TP -\fB\-o\fR, \fB\-\-output +\fB\-o\fR, \fB\-\-output Output file .SS "Presets:" .TP -\fB\-\-preset= +\fB\-\-preset Set options to a preset [medium] \- ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow @@ -32,241 +32,288 @@ Number of frames to code [all] \fB\-\-seek First frame to code [0] .TP -\fB\-\-input\-fps / -Framerate of the input video [25.0] +\fB\-\-input\-fps [/] +Frame rate of the input video [25] .TP \fB\-\-source\-scan\-type -Set source scan type [progressive]. - \- progressive: progressive scan - \- tff: top field first - \- bff: bottom field first +Source scan type [progressive] + \- progressive: Progressive scan + \- tff: Top field first + \- bff: Bottom field first .TP -\fB\-\-input\-format -P420 or P400 +\fB\-\-input\-format +P420 or P400 [P420] .TP -\fB\-\-input\-bitdepth -8\-16 +\fB\-\-input\-bitdepth +8\-16 [8] .TP \fB\-\-loop\-input -Re\-read input file forever +Re\-read input file forever. .SS "Options:" .TP \fB\-\-help -Print this help message and exit +Print this help message and exit. .TP \fB\-\-version -Print version information and exit +Print version information and exit. .TP -\fB\-\-aud -Use access unit delimiters +\fB\-\-(no\-)aud +Use access unit delimiters. [disabled] .TP -\fB\-\-debug -Output encoders reconstruction. +\fB\-\-debug +Output internal reconstruction. .TP -\fB\-\-cpuid -Disable runtime cpu optimizations with value 0. +\fB\-\-(no\-)cpuid +Enable runtime CPU optimizations. [enabled] .TP -\fB\-\-hash +\fB\-\-hash Decoded picture hash [checksum] \- none: 0 bytes \- checksum: 18 bytes \- md5: 56 bytes .TP -\fB\-\-no\-psnr -Don't calculate PSNR for frames +\fB\-\-(no\-)psnr +Calculate PSNR for frames. [enabled] .TP -\fB\-\-no\-info -Don't add encoder info SEI. +\fB\-\-(no\-)info +Add encoder info SEI. [enabled] +.TP +\fB\-\-crypto +Selective encryption. Crypto support must be +enabled at compile\-time. Can be 'on' or 'off' or +a list of features separated with a '+'. [off] + \- on: Enable all encryption features. + \- off: Disable selective encryption. + \- mvs: Motion vector magnitudes. + \- mv_signs: Motion vector signs. + \- trans_coeffs: Coefficient magnitudes. + \- trans_coeff_signs: Coefficient signs. + \- intra_pred_modes: Intra prediction modes. +.TP +\fB\-\-key +Encryption key [16,213,27,56,255,127,242,112, + 97,126,197,204,25,59,38,30] .SS "Video structure:" .TP \fB\-q\fR, \fB\-\-qp -Quantization Parameter [32] +Quantization parameter [22] .TP \fB\-p\fR, \fB\-\-period -Period of intra pictures [0] -\- 0: only first picture is intra -\- 1: all pictures are intra -\- 2\-N: every Nth picture is intra +Period of intra pictures [64] + \- 0: Only first picture is intra. + \- 1: All pictures are intra. + \- N: Every Nth picture is intra. .TP \fB\-\-vps\-period -Specify how often the video parameter set is -re\-sent. [0] - \- 0: only send VPS with the first frame - \- N: send VPS with every Nth intra frame +How often the video parameter set is re\-sent [0] + \- 0: Only send VPS with the first frame. + \- N: Send VPS with every Nth intra frame. .TP \fB\-r\fR, \fB\-\-ref -Reference frames, range 1..15 [3] +Number of reference frames, in range 1..15 [4] .TP \fB\-\-gop -Definition of GOP structure [0] - \- 0: disabled +GOP structure [8] + \- 0: Disabled \- 8: B\-frame pyramid of length 8 - \- lp\-: lp\-gop definition - (e.g. lp\-g8d4t2, see README) + \- lp\-: Low\-delay P\-frame GOP + (e.g. lp\-g8d4t2, see README) .TP -\fB\-\-cqmfile -Custom Quantization Matrices from a file +\fB\-\-cqmfile +Read custom quantization matrices from a file. .TP \fB\-\-bitrate -Target bitrate. [0] - \- 0: disable rate\-control - \- N: target N bits per second +Target bitrate [0] + \- 0: Disable rate control. + \- N: Target N bits per second. .TP -\fB\-\-lossless -Use lossless coding +\fB\-\-(no\-)lossless +Use lossless coding. [disabled] .TP -\fB\-\-mv\-constraint -Constrain movement vectors - \- none: no constraint - \- frametile: constrain within the tile - \- frametilemargin: constrain even more +\fB\-\-mv\-constraint +Constrain movement vectors. [none] + \- none: No constraint + \- frametile: Constrain within the tile. + \- frametilemargin: Constrain even more. .TP -\fB\-\-roi -Use a delta QP map for region of interest - Read an array of delta QP values from - a file, where the first two values are the - width and height, followed by width*height - delta QP values in raster order. - The delta QP map can be any size or aspect - ratio, and will be mapped to LCU's. +\fB\-\-roi +Use a delta QP map for region of interest. +Reads an array of delta QP values from a text +file. The file format is: width and height of +the QP delta map followed by width*height delta +QP values in raster order. The map can be of any +size and will be scaled to the video size. .TP \fB\-\-(no\-)erp\-aqp -Use adaptive QP for 360 video with -equirectangular projection +Use adaptive QP for 360 degree video with +equirectangular projection. [disabled] +.TP +\fB\-\-level +Use the given HEVC level in the output and give +an error if level limits are exceeded. [6.2] + \- 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6, + 6.1, 6.2 +.TP +\fB\-\-force\-level +Same as \-\-level but warnings instead of errors. +.TP +\fB\-\-high\-tier +Used with \-\-level. Use high tier bitrate limits +instead of the main tier limits during encoding. +High tier requires level 4 or higher. .SS "Compression tools:" .TP -\fB\-\-deblock [] -Deblocking - \- beta: between \-6 and 6 - \- tc: between \-6 and 6 +\fB\-\-(no\-)deblock +Deblocking filter. [0:0] + \- beta: Between \-6 and 6 + \- tc: Between \-6 and 6 .TP -\fB\-\-(no\-)sao -Sample Adaptive Offset +\fB\-\-sao +Sample Adaptive Offset [full] + \- off: SAO disabled + \- band: Band offset only + \- edge: Edge offset only + \- full: Full SAO .TP \fB\-\-(no\-)rdoq -Rate\-Distortion Optimized Quantization +Rate\-distortion optimized quantization [enabled] +.TP +\fB\-\-(no\-)rdoq\-skip +Skip RDOQ for 4x4 blocks. [disabled] .TP \fB\-\-(no\-)signhide -Sign Hiding +Sign hiding [disabled] .TP \fB\-\-(no\-)smp -Symmetric Motion Partition +Symmetric motion partition [disabled] .TP \fB\-\-(no\-)amp -Asymmetric Motion Partition +Asymmetric motion partition [disabled] .TP \fB\-\-rd -Intra mode search complexity - \- 0: skip intra if inter is good enough - \- 1: rough intra mode search with SATD - \- 2: refine intra mode search with SSE +Intra mode search complexity [0] + \- 0: Skip intra if inter is good enough. + \- 1: Rough intra mode search with SATD. + \- 2: Refine intra mode search with SSE. + \- 3: Try all intra modes and enable intra + chroma mode search. .TP \fB\-\-(no\-)mv\-rdo -Rate\-Distortion Optimized motion vector costs +Rate\-distortion optimized motion vector costs +[disabled] .TP \fB\-\-(no\-)full\-intra\-search - Try all intra modes during rough search. +[disabled] .TP \fB\-\-(no\-)transform\-skip -Transform skip +Try transform skip [disabled] .TP \fB\-\-me -Integer motion estimation +Integer motion estimation algorithm [hexbs] \- hexbs: Hexagon Based Search \- tz: Test Zone Search \- full: Full Search \- full8, full16, full32, full64 + \- dia: Diamond Search +.TP +\fB\-\-me\-steps +Motion estimation search step limit. Only +affects 'hexbs' and 'dia'. [\-1] .TP \fB\-\-subme -Set fractional pixel motion estimation level - \- 0: only integer motion estimation +Fractional pixel motion estimation level [4] + \- 0: Integer motion estimation only \- 1: + 1/2\-pixel horizontal and vertical \- 2: + 1/2\-pixel diagonal \- 3: + 1/4\-pixel horizontal and vertical \- 4: + 1/4\-pixel diagonal .TP \fB\-\-pu\-depth\-inter \- - -Range for sizes for inter predictions +Inter prediction units sizes [0\-3] \- 0, 1, 2, 3: from 64x64 to 8x8 .TP \fB\-\-pu\-depth\-intra \- -Range for sizes for intra predictions +Intra prediction units sizes [1\-4] \- 0, 1, 2, 3, 4: from 64x64 to 4x4 .TP +\fB\-\-tr\-depth\-intra +Transform split depth for intra blocks [0] +.TP \fB\-\-(no\-)bipred -Bi\-prediction +Bi\-prediction [disabled] .TP -\fB\-\-(no\-)cu\-split\-termination - -CU split search termination condition - \- off: Never terminate cu\-split search - \- zero: Terminate with zero residual +\fB\-\-cu\-split\-termination +CU split search termination [zero] + \- off: Don't terminate early. + \- zero: Terminate when residual is zero. .TP -\fB\-\-(no\-)me\-early\-termination -ME early termination condition - \- off: Don't terminate early - \- on: Terminate early - \- sensitive: Terminate even earlier +\fB\-\-me\-early\-termination +Motion estimation termination [on] + \- off: Don't terminate early. + \- on: Terminate early. + \- sensitive: Terminate even earlier. +.TP +\fB\-\-(no\-)intra\-rdo\-et +Check intra modes in rdo stage only until +a zero coefficient CU is found. [disabled] .TP \fB\-\-(no\-)implicit\-rdpcm -Implicit residual DPCM -Currently only supported with lossless coding. +Implicit residual DPCM. Currently only supported +with lossless coding. [disabled] .TP \fB\-\-(no\-)tmvp -Temporal Motion Vector Prediction -.TP -\fB\-\-(no\-)rdoq\-skip -Skips RDOQ for 4x4 blocks +Temporal motion vector prediction [enabled] .SS "Parallel processing:" .TP \fB\-\-threads Number of threads to use [auto] - \- 0: process everything with main thread - \- N: use N threads for encoding - \- auto: select based on number of cores + \- 0: Process everything with main thread. + \- N: Use N threads for encoding. + \- auto: Select automatically. .TP \fB\-\-owf -Frame parallelism [auto] - \- N: Process N\-1 frames at a time - \- auto: Select automatically +Frame\-level parallelism [auto] + \- N: Process N+1 frames at a time. + \- auto: Select automatically. .TP \fB\-\-(no\-)wpp -Wavefront parallel processing [enabled] +Wavefront parallel processing. [enabled] Enabling tiles automatically disables WPP. To enable WPP with tiles, re\-enable it after -enabling tiles. +enabling tiles. Enabling wpp with tiles is, +however, an experimental feature since it is +not supported in any HEVC profile. .TP \fB\-\-tiles x Split picture into width x height uniform tiles. .TP \fB\-\-tiles\-width\-split |u -Specifies a comma separated list of pixel -positions of tiles columns separation coordinates. -Can also be u followed by and a single int n, -in which case it produces columns of uniform width. + \- : A comma\-separated list of tile + column pixel coordinates. + \- u: Number of tile columns of uniform + width. .TP \fB\-\-tiles\-height\-split |u -Specifies a comma separated list of pixel -positions of tiles rows separation coordinates. -Can also be u followed by and a single int n, -in which case it produces rows of uniform height. + \- : A comma\-separated list of tile row + column pixel coordinates. + \- u: Number of tile rows of uniform + height. .TP \fB\-\-slices -Control how slices are used - \- tiles: put tiles in independent slices - \- wpp: put rows in dependent slices - \- tiles+wpp: do both +Control how slices are used. + \- tiles: Put tiles in independent slices. + \- wpp: Put rows in dependent slices. + \- tiles+wpp: Do both. .SS "Video Usability Information:" .TP \fB\-\-sar -Specify Sample Aspect Ratio +Specify sample aspect ratio .TP \fB\-\-overscan Specify crop overscan setting [undef] @@ -274,7 +321,7 @@ Specify crop overscan setting [undef] .TP \fB\-\-videoformat Specify video format [undef] - \- component, pal, ntsc, secam, mac, undef + \- undef, component, pal, ntsc, secam, mac .TP \fB\-\-range Specify color range [tv] diff --git a/src/Makefile.am b/src/Makefile.am index 5e884401..55d6d115 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -197,9 +197,13 @@ libsse41_la_SOURCES = \ strategies/sse41/picture-sse41.h if HAVE_PPC + +if HAVE_ALTIVEC libaltivec_la_CFLAGS = -maltivec endif +endif #HAVE_PPC + if HAVE_X86 if HAVE_AVX2 diff --git a/src/cfg.c b/src/cfg.c index 831b7b43..1701826b 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -86,13 +86,13 @@ int kvz_config_init(kvz_config *cfg) cfg->tiles_height_count = 1; cfg->tiles_width_split = NULL; cfg->tiles_height_split = NULL; - + cfg->wpp = 1; cfg->owf = -1; cfg->slice_count = 1; cfg->slice_addresses_in_ts = MALLOC(int32_t, 1); cfg->slice_addresses_in_ts[0] = 0; - + cfg->threads = -1; cfg->cpuid = 1; @@ -109,6 +109,7 @@ int kvz_config_init(kvz_config *cfg) cfg->crypto_features = KVZ_CRYPTO_OFF; cfg->me_early_termination = 1; + cfg->intra_rdo_et = 0; cfg->input_format = KVZ_FORMAT_P420; cfg->input_bitdepth = 8; @@ -185,14 +186,14 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil const char* current_arg = NULL; int32_t current_value; int32_t values[MAX_TILES_PER_DIM]; - + int i; - + //Free pointer in any case if (*array) { FREE_POINTER(*array); } - + //If the arg starts with u, we want an uniform split if (arg[0]=='u') { *ntiles = atoi(arg + 1); @@ -203,7 +204,7 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil //Done with parsing return 1; } - + //We have a comma-separated list of int for the split... current_arg = arg; *ntiles = 1; @@ -220,27 +221,27 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil ++(*ntiles); if (MAX_TILES_PER_DIM <= *ntiles) break; } while (current_arg); - + if (MAX_TILES_PER_DIM <= *ntiles || 1 >= *ntiles) { fprintf(stderr, "Invalid number of tiles (1 <= %d <= %d = MAX_TILES_PER_DIM)!\n", *ntiles, MAX_TILES_PER_DIM); return 0; } - + *array = MALLOC(int32_t, *ntiles - 1); if (!*array) { fprintf(stderr, "Could not allocate array for tiles\n"); return 0; } - + //TODO: memcpy? for (i = 0; i < *ntiles - 1; ++i) { (*array)[i] = values[i]; } - + return 1; } -static int parse_uint8(const char *numstr,uint8_t* number,int min, int max) +static int parse_uint8(const char *numstr,uint8_t* number,int min, int max) { char *tail; int d = strtol(numstr, &tail, 10); @@ -292,14 +293,14 @@ static int parse_slice_specification(const char* const arg, int32_t * const nsli const char* current_arg = NULL; int32_t current_value; int32_t values[MAX_SLICES]; - + int i; - + //Free pointer in any case if (*array) { FREE_POINTER(*array); } - + //If the arg starts with u, we want an uniform split if (arg[0]=='u') { *nslices = atoi(arg+1); @@ -310,7 +311,7 @@ static int parse_slice_specification(const char* const arg, int32_t * const nsli //Done with parsing return 1; } - + //We have a comma-separated list of int for the split... current_arg = arg; //We always have a slice starting at 0 @@ -329,23 +330,23 @@ static int parse_slice_specification(const char* const arg, int32_t * const nsli ++(*nslices); if (MAX_SLICES <= *nslices) break; } while (current_arg); - + if (MAX_SLICES <= *nslices || 0 >= *nslices) { fprintf(stderr, "Invalid number of slices (0 < %d <= %d = MAX_SLICES)!\n", *nslices, MAX_SLICES); return 0; } - + *array = MALLOC(int32_t, *nslices); if (!*array) { fprintf(stderr, "Could not allocate array for slices\n"); return 0; } - + //TODO: memcpy? for (i = 0; i < *nslices; ++i) { (*array)[i] = values[i]; } - + return 1; } @@ -375,221 +376,241 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) static const char * const sao_names[] = { "off", "edge", "band", "full", NULL }; - static const char * const preset_values[11][20*2] = { - { - "ultrafast", + static const char * const preset_values[11][22*2] = { + { + "ultrafast", + "rd", "0", "pu-depth-intra", "2-3", "pu-depth-inter", "2-3", - "rd", "0", "me", "hexbs", + "gop", "lp-g4d4t1", "ref", "1", + "bipred", "0", "deblock", "0:0", "signhide", "0", - "subme", "0", + "subme", "2", "sao", "off", "rdoq", "0", - "rdoq-skip", "1", - "transform-skip", "0", - "full-intra-search", "0", + "rdoq-skip", "0", + "transform-skip", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", - "gop", "lp-g4d3t1", - NULL + "intra-rdo-et", "0", + NULL }, - { + { "superfast", + "rd", "0", "pu-depth-intra", "2-3", "pu-depth-inter", "2-3", - "rd", "0", "me", "hexbs", + "gop", "lp-g4d4t1", "ref", "1", + "bipred", "0", "deblock", "0:0", "signhide", "0", - "subme", "0", + "subme", "2", "sao", "full", "rdoq", "0", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", - "gop", "lp-g4d3t1", + "intra-rdo-et", "0", NULL }, { "veryfast", - "pu-depth-intra", "2-3", - "pu-depth-inter", "2-3", "rd", "0", + "pu-depth-intra", "2-3", + "pu-depth-inter", "1-3", "me", "hexbs", + "gop", "lp-g4d4t1", "ref", "1", + "bipred", "0", "deblock", "0:0", "signhide", "0", "subme", "2", "sao", "full", "rdoq", "0", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", - "gop", "lp-g4d3t1", + "intra-rdo-et", "0", NULL }, { "faster", + "rd", "0", "pu-depth-intra", "2-3", "pu-depth-inter", "1-3", - "rd", "1", "me", "hexbs", + "gop", "lp-g4d4t1", "ref", "1", + "bipred", "0", "deblock", "0:0", "signhide", "0", - "subme", "2", + "subme", "4", "sao", "full", "rdoq", "0", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", "me-early-termination", "sensitive", - "gop", "lp-g4d3t1", + "intra-rdo-et", "0", NULL }, { "fast", - "pu-depth-intra", "2-3", + "rd", "0", + "pu-depth-intra", "1-3", "pu-depth-inter", "1-3", - "rd", "1", "me", "hexbs", - "ref", "1", + "gop", "lp-g4d4t1", + "ref", "2", + "bipred", "0", "deblock", "0:0", "signhide", "0", "subme", "4", "sao", "full", "rdoq", "0", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", - "me-early-termination", "on", - "gop", "lp-g4d3t1", + "me-early-termination", "sensitive", + "intra-rdo-et", "0", NULL }, { "medium", - "pu-depth-intra", "1-3", - "pu-depth-inter", "1-3", - "rd", "1", + "rd", "0", + "pu-depth-intra", "1-4", + "pu-depth-inter", "0-3", "me", "hexbs", - "ref", "1", + "gop", "8", + "ref", "4", + "bipred", "0", "deblock", "0:0", "signhide", "0", "subme", "4", "sao", "full", "rdoq", "1", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", "me-early-termination", "on", - "gop", "lp-g4d3t1", + "intra-rdo-et", "0", NULL }, { "slow", - "pu-depth-intra", "1-3", - "pu-depth-inter", "1-3", - "rd", "1", + "rd", "0", + "pu-depth-intra", "1-4", + "pu-depth-inter", "0-3", "me", "hexbs", - "ref", "2", + "gop", "8", + "ref", "4", + "bipred", "1", "deblock", "0:0", - "signhide", "1", + "signhide", "0", "subme", "4", "sao", "full", "rdoq", "1", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", "me-early-termination", "on", - "gop", "lp-g4d2t1", + "intra-rdo-et", "0", NULL }, { "slower", - "pu-depth-intra", "1-3", + "rd", "2", + "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", - "rd", "1", "me", "hexbs", - "ref", "2", + "gop", "8", + "ref", "4", + "bipred", "1", "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "full", "rdoq", "1", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", + "full-intra-search", "0", "smp", "0", "amp", "0", "cu-split-termination", "zero", - "me-early-termination", "on", - "gop", "lp-g4d2t1", + "me-early-termination", "off", + "intra-rdo-et", "0", NULL }, { "veryslow", + "rd", "2", "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", - "rd", "1", "me", "hexbs", - "ref", "3", + "gop", "8", + "ref", "4", + "bipred", "1", "deblock", "0:0", "signhide", "1", "subme", "4", "sao", "full", "rdoq", "1", - "rdoq-skip", "1", + "rdoq-skip", "0", "transform-skip", "0", - "full-intra-search", "0", "mv-rdo", "0", - "smp", "0", + "full-intra-search", "0", + "smp", "1", "amp", "0", "cu-split-termination", "zero", - "me-early-termination", "on", - "gop", "lp-g4d2t1", + "me-early-termination", "off", + "intra-rdo-et", "0", NULL }, { "placebo", + "rd", "2", "pu-depth-intra", "1-4", "pu-depth-inter", "0-3", - "rd", "1", "me", "tz", + "gop", "8", "ref", "4", + "bipred", "1", "deblock", "0:0", "signhide", "1", "subme", "4", @@ -597,13 +618,13 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) "rdoq", "1", "rdoq-skip", "0", "transform-skip", "1", - "full-intra-search", "0", "mv-rdo", "1", + "full-intra-search", "0", "smp", "1", "amp", "1", "cu-split-termination", "off", "me-early-termination", "off", - "gop", "lp-g4d2t1", + "intra-rdo-et", "0", NULL }, { NULL } @@ -727,7 +748,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) } else if OPT("tiles-width-split") { int retval = parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split); - + if (cfg->tiles_width_count > 1 && cfg->tmvp_enable) { cfg->tmvp_enable = false; fprintf(stderr, "Disabling TMVP because tiles are used.\n"); @@ -742,7 +763,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) } else if OPT("tiles-height-split") { int retval = parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split); - + if (cfg->tiles_height_count > 1 && cfg->tmvp_enable) { cfg->tmvp_enable = false; fprintf(stderr, "Disabling TMVP because tiles are used.\n"); @@ -822,7 +843,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) } } else if OPT("cpuid") - cfg->cpuid = atoi(value); + cfg->cpuid = atobool(value); else if OPT("pu-depth-inter") return sscanf(value, "%d-%d", &cfg->pu_depth_inter.min, &cfg->pu_depth_inter.max) == 2; else if OPT("pu-depth-intra") @@ -1022,6 +1043,8 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) cfg->me_early_termination = mode; return result; } + else if OPT("intra-rdo-et") + cfg->intra_rdo_et = (bool)atobool(value); else if OPT("lossless") cfg->lossless = (bool)atobool(value); else if OPT("tmvp") { @@ -1088,6 +1111,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) if (width > 10000 || height > 10000) { fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n"); + fclose(f); return 0; } @@ -1330,7 +1354,9 @@ int kvz_config_validate(const kvz_config *const cfg) error = 1; } - if (cfg->gop_len && cfg->intra_period && !cfg->gop_lowdelay && + if (cfg->gop_len && + cfg->intra_period > 1 && + !cfg->gop_lowdelay && cfg->intra_period % cfg->gop_len != 0) { fprintf(stderr, @@ -1391,7 +1417,7 @@ int kvz_config_validate(const kvz_config *const cfg) } if (!WITHIN(cfg->pu_depth_inter.min, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX) || - !WITHIN(cfg->pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)) + !WITHIN(cfg->pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX)) { fprintf(stderr, "Input error: illegal value for --pu-depth-inter (%d-%d)\n", cfg->pu_depth_inter.min, cfg->pu_depth_inter.max); @@ -1506,7 +1532,7 @@ static int validate_hevc_level(kvz_config *const cfg) { }; int level_error = 0; - + const char* level_err_prefix; if (cfg->force_level) { level_err_prefix = "Level warning"; diff --git a/src/cli.c b/src/cli.c index e742a3a0..22c78b35 100644 --- a/src/cli.c +++ b/src/cli.c @@ -36,9 +36,9 @@ static const struct option long_options[] = { { "input", required_argument, NULL, 'i' }, { "output", required_argument, NULL, 'o' }, { "debug", required_argument, NULL, 'd' }, - { "width", required_argument, NULL, 'w' }, + { "width", required_argument, NULL, 'w' }, // deprecated { "height", required_argument, NULL, 'h' }, // deprecated - { "frames", required_argument, NULL, 'n' }, // deprecated + { "frames", required_argument, NULL, 'n' }, { "qp", required_argument, NULL, 'q' }, { "period", required_argument, NULL, 'p' }, { "ref", required_argument, NULL, 'r' }, @@ -86,7 +86,8 @@ static const struct option long_options[] = { { "owf", required_argument, NULL, 0 }, { "slices", required_argument, NULL, 0 }, { "threads", required_argument, NULL, 0 }, - { "cpuid", required_argument, NULL, 0 }, + { "cpuid", optional_argument, NULL, 0 }, + { "no-cpuid", no_argument, NULL, 0 }, { "pu-depth-inter", required_argument, NULL, 0 }, { "pu-depth-intra", required_argument, NULL, 0 }, { "info", no_argument, NULL, 0 }, @@ -109,6 +110,8 @@ static const struct option long_options[] = { { "crypto", required_argument, NULL, 0 }, { "key", required_argument, NULL, 0 }, { "me-early-termination",required_argument, NULL, 0 }, + { "intra-rdo-et", no_argument, NULL, 0 }, + { "no-intra-rdo-et", no_argument, NULL, 0 }, { "lossless", no_argument, NULL, 0 }, { "no-lossless", no_argument, NULL, 0 }, { "tmvp", no_argument, NULL, 0 }, @@ -322,15 +325,15 @@ void print_help(void) "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Required:\n" - " -i, --input : Input file\n" + " -i, --input : Input file\n" " --input-res : Input resolution [auto]\n" - " auto: detect from file name\n" - " x: width times height\n" - " -o, --output : Output file\n" + " - auto: Detect from file name.\n" + " - x: width times height\n" + " -o, --output : Output file\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Presets:\n" - " --preset= : Set options to a preset [medium]\n" + " --preset : Set options to a preset [medium]\n" " - ultrafast, superfast, veryfast, faster,\n" " fast, medium, slow, slower, veryslow\n" " placebo\n" @@ -339,160 +342,176 @@ void print_help(void) "Input:\n" " -n, --frames : Number of frames to code [all]\n" " --seek : First frame to code [0]\n" - " --input-fps [/] : Framerate of the input video [25.0]\n" - " --source-scan-type : Set source scan type [progressive].\n" - " - progressive: progressive scan\n" - " - tff: top field first\n" - " - bff: bottom field first\n" - " --input-format : P420 or P400\n" - " --input-bitdepth : 8-16\n" - " --loop-input : Re-read input file forever\n" + " --input-fps [/] : Frame rate of the input video [25]\n" + " --source-scan-type : Source scan type [progressive]\n" + " - progressive: Progressive scan\n" + " - tff: Top field first\n" + " - bff: Bottom field first\n" + " --input-format : P420 or P400 [P420]\n" + " --input-bitdepth : 8-16 [8]\n" + " --loop-input : Re-read input file forever.\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Options:\n" - " --help : Print this help message and exit\n" - " --version : Print version information and exit\n" - " --aud : Use access unit delimiters\n" - " --debug : Output encoders reconstruction.\n" - " --cpuid : Disable runtime cpu optimizations with value 0.\n" - " --hash : Decoded picture hash [checksum]\n" + " --help : Print this help message and exit.\n" + " --version : Print version information and exit.\n" + " --(no-)aud : Use access unit delimiters. [disabled]\n" + " --debug : Output internal reconstruction.\n" + " --(no-)cpuid : Enable runtime CPU optimizations. [enabled]\n" + " --hash : Decoded picture hash [checksum]\n" " - none: 0 bytes\n" " - checksum: 18 bytes\n" " - md5: 56 bytes\n" - " --no-psnr : Don't calculate PSNR for frames\n" - " --no-info : Don't add encoder info SEI.\n" + " --(no-)psnr : Calculate PSNR for frames. [enabled]\n" + " --(no-)info : Add encoder info SEI. [enabled]\n" + " --crypto : Selective encryption. Crypto support must be\n" + " enabled at compile-time. Can be 'on' or 'off' or\n" + " a list of features separated with a '+'. [off]\n" + " - on: Enable all encryption features.\n" + " - off: Disable selective encryption.\n" + " - mvs: Motion vector magnitudes.\n" + " - mv_signs: Motion vector signs.\n" + " - trans_coeffs: Coefficient magnitudes.\n" + " - trans_coeff_signs: Coefficient signs.\n" + " - intra_pred_modes: Intra prediction modes.\n" + " --key : Encryption key [16,213,27,56,255,127,242,112,\n" + " 97,126,197,204,25,59,38,30]\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Video structure:\n" - " -q, --qp : Quantization Parameter [32]\n" - " -p, --period : Period of intra pictures [0]\n" - " - 0: only first picture is intra\n" - " - 1: all pictures are intra\n" - " - 2-N: every Nth picture is intra\n" - " --vps-period : Specify how often the video parameter set is\n" - " re-sent. [0]\n" - " - 0: only send VPS with the first frame\n" - " - N: send VPS with every Nth intra frame\n" - " -r, --ref : Reference frames, range 1..15 [3]\n" - " --gop : Definition of GOP structure [0]\n" - " - 0: disabled\n" + " -q, --qp : Quantization parameter [22]\n" + " -p, --period : Period of intra pictures [64]\n" + " - 0: Only first picture is intra.\n" + " - 1: All pictures are intra.\n" + " - N: Every Nth picture is intra.\n" + " --vps-period : How often the video parameter set is re-sent [0]\n" + " - 0: Only send VPS with the first frame.\n" + " - N: Send VPS with every Nth intra frame.\n" + " -r, --ref : Number of reference frames, in range 1..15 [4]\n" + " --gop : GOP structure [8]\n" + " - 0: Disabled\n" " - 8: B-frame pyramid of length 8\n" - " - lp-: lp-gop definition\n" - " (e.g. lp-g8d4t2, see README)\n" - " --cqmfile : Custom Quantization Matrices from a file\n" - " --bitrate : Target bitrate. [0]\n" - " - 0: disable rate-control\n" - " - N: target N bits per second\n" - " --lossless : Use lossless coding\n" - " --mv-constraint : Constrain movement vectors\n" - " - none: no constraint\n" - " - frametile: constrain within the tile\n" - " - frametilemargin: constrain even more\n" - " --roi : Use a delta QP map for region of interest\n" - " Read an array of delta QP values from\n" - " a file, where the first two values are the\n" - " width and height, followed by width*height\n" - " delta QP values in raster order.\n" - " The delta QP map can be any size or aspect\n" - " ratio, and will be mapped to LCU's.\n" - " --(no-)erp-aqp : Use adaptive QP for 360 video with\n" - " equirectangular projection\n" + " - lp-: Low-delay P-frame GOP\n" + " (e.g. lp-g8d4t2, see README)\n" + " --cqmfile : Read custom quantization matrices from a file.\n" + " --bitrate : Target bitrate [0]\n" + " - 0: Disable rate control.\n" + " - N: Target N bits per second.\n" + " --(no-)lossless : Use lossless coding. [disabled]\n" + " --mv-constraint : Constrain movement vectors. [none]\n" + " - none: No constraint\n" + " - frametile: Constrain within the tile.\n" + " - frametilemargin: Constrain even more.\n" + " --roi : Use a delta QP map for region of interest.\n" + " Reads an array of delta QP values from a text\n" + " file. The file format is: width and height of\n" + " the QP delta map followed by width*height delta\n" + " QP values in raster order. The map can be of any\n" + " size and will be scaled to the video size.\n" + " --(no-)erp-aqp : Use adaptive QP for 360 degree video with\n" + " equirectangular projection. [disabled]\n" " --level : Use the given HEVC level in the output and give\n" - " an error if the input doesn't fit to it's\n" - " limits [6.2]\n" - " Allowed values are 1, 2, 2.1, 3, 3.1, 4, 4.1\n" - " 5, 5.1, 5.2, 6, 6.1 and 6.2. The dot is\n" - " optional.\n" - " --force-level : Same as --level, except instead of errors you\n" - " get warnings\n" - " --high-tier : Used with --level. Use high-tier bitrate limits\n" - " instead of the main-tier limits during encoding.\n" + " an error if level limits are exceeded. [6.2]\n" + " - 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6,\n" + " 6.1, 6.2\n" + " --force-level : Same as --level but warnings instead of errors.\n" + " --high-tier : Used with --level. Use high tier bitrate limits\n" + " instead of the main tier limits during encoding.\n" + " High tier requires level 4 or higher.\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Compression tools:\n" - " --deblock [] : Deblocking\n" - " - beta: between -6 and 6\n" - " - tc: between -6 and 6\n" - " --(no-)sao : Sample Adaptive Offset\n" - " --(no-)rdoq : Rate-Distortion Optimized Quantization\n" - " --(no-)signhide : Sign Hiding\n" - " --(no-)smp : Symmetric Motion Partition\n" - " --(no-)amp : Asymmetric Motion Partition\n" - " --rd : Intra mode search complexity\n" - " - 0: skip intra if inter is good enough\n" - " - 1: rough intra mode search with SATD\n" - " - 2: refine intra mode search with SSE\n" - " --(no-)mv-rdo : Rate-Distortion Optimized motion vector costs\n" - " --(no-)full-intra-search\n" - " : Try all intra modes during rough search.\n" - " --(no-)transform-skip : Transform skip\n" - " --me : Integer motion estimation\n" + " --(no-)deblock : Deblocking filter. [0:0]\n" + " - beta: Between -6 and 6\n" + " - tc: Between -6 and 6\n" + " --sao : Sample Adaptive Offset [full]\n" + " - off: SAO disabled\n" + " - band: Band offset only\n" + " - edge: Edge offset only\n" + " - full: Full SAO\n" + " --(no-)rdoq : Rate-distortion optimized quantization [enabled]\n" + " --(no-)rdoq-skip : Skip RDOQ for 4x4 blocks. [disabled]\n" + " --(no-)signhide : Sign hiding [disabled]\n" + " --(no-)smp : Symmetric motion partition [disabled]\n" + " --(no-)amp : Asymmetric motion partition [disabled]\n" + " --rd : Intra mode search complexity [0]\n" + " - 0: Skip intra if inter is good enough.\n" + " - 1: Rough intra mode search with SATD.\n" + " - 2: Refine intra mode search with SSE.\n" + " - 3: Try all intra modes and enable intra\n" + " chroma mode search.\n" + " --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n" + " [disabled]\n" + " --(no-)full-intra-search : Try all intra modes during rough search.\n" + " [disabled]\n" + " --(no-)transform-skip : Try transform skip [disabled]\n" + " --me : Integer motion estimation algorithm [hexbs]\n" " - hexbs: Hexagon Based Search\n" " - tz: Test Zone Search\n" " - full: Full Search\n" " - full8, full16, full32, full64\n" " - dia: Diamond Search\n" - " --me-steps : How many search steps does the motion estimation\n" - " do before cutting off [-1]\n" - " Has effect only for 'hexbs' and 'dia'\n" - " --subme : Set fractional pixel motion estimation level\n" - " - 0: only integer motion estimation\n" + " --me-steps : Motion estimation search step limit. Only\n" + " affects 'hexbs' and 'dia'. [-1]\n" + " --subme : Fractional pixel motion estimation level [4]\n" + " - 0: Integer motion estimation only\n" " - 1: + 1/2-pixel horizontal and vertical\n" " - 2: + 1/2-pixel diagonal\n" " - 3: + 1/4-pixel horizontal and vertical\n" " - 4: + 1/4-pixel diagonal\n" - " --pu-depth-inter -\n" - " : Range for sizes for inter predictions\n" + " --pu-depth-inter - : Inter prediction units sizes [0-3]\n" " - 0, 1, 2, 3: from 64x64 to 8x8\n" - " --pu-depth-intra - : Range for sizes for intra predictions\n" + " --pu-depth-intra - : Intra prediction units sizes [1-4]\n" " - 0, 1, 2, 3, 4: from 64x64 to 4x4\n" - " --(no-)bipred : Bi-prediction\n" - " --(no-)cu-split-termination\n" - " : CU split search termination condition\n" - " - off: Never terminate cu-split search\n" - " - zero: Terminate with zero residual\n" - " --(no-)me-early-termination : ME early termination condition\n" - " - off: Don't terminate early\n" - " - on: Terminate early\n" - " - sensitive: Terminate even earlier\n" - " --(no-)implicit-rdpcm : Implicit residual DPCM\n" - " Currently only supported with lossless coding.\n" - " --(no-)tmvp : Temporal Motion Vector Prediction\n" - " --(no-)rdoq-skip : Skips RDOQ for 4x4 blocks\n" + " --tr-depth-intra : Transform split depth for intra blocks [0]\n" + " --(no-)bipred : Bi-prediction [disabled]\n" + " --cu-split-termination : CU split search termination [zero]\n" + " - off: Don't terminate early.\n" + " - zero: Terminate when residual is zero.\n" + " --me-early-termination : Motion estimation termination [on]\n" + " - off: Don't terminate early.\n" + " - on: Terminate early.\n" + " - sensitive: Terminate even earlier.\n" + " --(no-)intra-rdo-et : Check intra modes in rdo stage only until\n" + " a zero coefficient CU is found. [disabled]\n" + " --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n" + " with lossless coding. [disabled]\n" + " --(no-)tmvp : Temporal motion vector prediction [enabled]\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Parallel processing:\n" " --threads : Number of threads to use [auto]\n" - " - 0: process everything with main thread\n" - " - N: use N threads for encoding\n" - " - auto: select based on number of cores\n" - " --owf : Frame parallelism [auto]\n" - " - N: Process N-1 frames at a time\n" - " - auto: Select automatically\n" - " --(no-)wpp : Wavefront parallel processing [enabled]\n" + " - 0: Process everything with main thread.\n" + " - N: Use N threads for encoding.\n" + " - auto: Select automatically.\n" + " --owf : Frame-level parallelism [auto]\n" + " - N: Process N+1 frames at a time.\n" + " - auto: Select automatically.\n" + " --(no-)wpp : Wavefront parallel processing. [enabled]\n" " Enabling tiles automatically disables WPP.\n" " To enable WPP with tiles, re-enable it after\n" - " enabling tiles.\n" + " enabling tiles. Enabling wpp with tiles is,\n" + " however, an experimental feature since it is\n" + " not supported in any HEVC profile.\n" " --tiles x : Split picture into width x height uniform tiles.\n" " --tiles-width-split |u :\n" - " Specifies a comma separated list of pixel\n" - " positions of tiles columns separation\n" - " coordinates.\n" - " Can also be u, followed by a single int n, in\n" - " which case it produces columns of uniform width.\n" + " - : A comma-separated list of tile\n" + " column pixel coordinates.\n" + " - u: Number of tile columns of uniform\n" + " width.\n" " --tiles-height-split |u :\n" - " Specifies a comma separated list of pixel\n" - " positions of tiles rows separation coordinates.\n" - " Can also be u followed by and a single int n, in\n" - " which case it produces rows of uniform height.\n" - " --slices : Control how slices are used\n" - " - tiles: put tiles in independent slices\n" - " - wpp: put rows in dependent slices\n" - " - tiles+wpp: do both\n" + " - : A comma-separated list of tile row\n" + " column pixel coordinates.\n" + " - u: Number of tile rows of uniform\n" + " height.\n" + " --slices : Control how slices are used.\n" + " - tiles: Put tiles in independent slices.\n" + " - wpp: Put rows in dependent slices.\n" + " - tiles+wpp: Do both.\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Video Usability Information:\n" - " --sar : Specify Sample Aspect Ratio\n" + " --sar : Specify sample aspect ratio\n" " --overscan : Specify crop overscan setting [undef]\n" " - undef, show, crop\n" " --videoformat : Specify video format [undef]\n" @@ -514,8 +533,8 @@ void print_help(void) "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Deprecated parameters: (might be removed at some point)\n" - " -w, --width : Use --input-res\n" - " -h, --height : Use --input-res\n"); + " -w, --width : Use --input-res.\n" + " -h, --height : Use --input-res.\n"); } diff --git a/src/cu.c b/src/cu.c index c5d70407..a9b5d8da 100644 --- a/src/cu.c +++ b/src/cu.c @@ -184,9 +184,10 @@ void kvz_cu_array_free(cu_array_t **cua_ptr) */ cu_array_t * kvz_cu_array_copy_ref(cu_array_t* cua) { - // The caller should have had another reference. - assert(cua->refcount > 0); - KVZ_ATOMIC_INC(&cua->refcount); + int32_t new_refcount = KVZ_ATOMIC_INC(&cua->refcount); + // The caller should have had another reference and we added one + // reference so refcount should be at least 2. + assert(new_refcount >= 2); return cua; } diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 1ad4aa50..5c6552b3 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -435,7 +435,9 @@ static void encode_transform_coeff(encoder_state_t * const state, const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y); // Round coordinates down to a multiple of 8 to get the location of the // containing CU. - const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x & ~7, y & ~7); + const int x_cu = 8 * (x / 8); + const int y_cu = 8 * (y / 8); + const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x_cu, y_cu); // NxN signifies implicit transform split at the first transform level. // There is a similar implicit split for inter, but it is only used when @@ -508,9 +510,10 @@ static void encode_transform_coeff(encoder_state_t * const state, if (cb_flag_y | cb_flag_u | cb_flag_v) { if (state->must_code_qp_delta) { - const int qp_delta = state->qp - state->ref_qp; - const int qp_delta_abs = ABS(qp_delta); - cabac_data_t* cabac = &state->cabac; + const int qp_pred = kvz_get_cu_ref_qp(state, x_cu, y_cu, state->last_qp); + const int qp_delta = cur_cu->qp - qp_pred; + const int qp_delta_abs = ABS(qp_delta); + cabac_data_t* cabac = &state->cabac; // cu_qp_delta_abs prefix cabac->cur_ctx = &cabac->ctx.cu_qp_delta_abs[0]; @@ -526,7 +529,6 @@ static void encode_transform_coeff(encoder_state_t * const state, } state->must_code_qp_delta = false; - state->ref_qp = state->qp; } encode_transform_unit(state, x, y, depth); @@ -957,6 +959,9 @@ void kvz_encode_coding_tree(encoder_state_t * const state, const videoframe_t * const frame = state->tile->frame; const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x, y); + const int cu_width = LCU_WIDTH >> depth; + const int half_cu = cu_width >> 1; + const cu_info_t *left_cu = NULL; if (x > 0) { left_cu = kvz_cu_array_at_const(frame->cu_array, x - 1, y); @@ -973,13 +978,17 @@ void kvz_encode_coding_tree(encoder_state_t * const state, uint16_t abs_x = x + state->tile->offset_x; uint16_t abs_y = y + state->tile->offset_y; - // Check for slice border FIXME - bool border_x = ctrl->in.width < abs_x + (LCU_WIDTH >> depth); - bool border_y = ctrl->in.height < abs_y + (LCU_WIDTH >> depth); - bool border_split_x = ctrl->in.width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)); - bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)); + // Check for slice border + bool border_x = ctrl->in.width < abs_x + cu_width; + bool border_y = ctrl->in.height < abs_y + cu_width; + bool border_split_x = ctrl->in.width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + half_cu; + bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + half_cu; bool border = border_x || border_y; /*!< are we in any border CU */ + if (depth <= ctrl->max_qp_delta_depth) { + state->must_code_qp_delta = true; + } + // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (depth != MAX_DEPTH) { // Implisit split flag when on border @@ -999,25 +1008,22 @@ void kvz_encode_coding_tree(encoder_state_t * const state, if (split_flag || border) { // Split blocks and remember to change x and y block positions - int offset = LCU_WIDTH >> (depth + 1); - kvz_encode_coding_tree(state, x, y, depth + 1); - // TODO: fix when other half of the block would not be completely over the border if (!border_x || border_split_x) { - kvz_encode_coding_tree(state, x + offset, y, depth + 1); + kvz_encode_coding_tree(state, x + half_cu, y, depth + 1); } if (!border_y || border_split_y) { - kvz_encode_coding_tree(state, x, y + offset, depth + 1); + kvz_encode_coding_tree(state, x, y + half_cu, depth + 1); } if (!border || (border_split_x && border_split_y)) { - kvz_encode_coding_tree(state, x + offset, y + offset, depth + 1); + kvz_encode_coding_tree(state, x + half_cu, y + half_cu, depth + 1); } return; } } - if (state->encoder_control->cfg.lossless) { + if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; CABAC_BIN(cabac, 1, "cu_transquant_bypass_flag"); } @@ -1053,7 +1059,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state, } } } - return; + goto end; } } @@ -1068,7 +1074,6 @@ void kvz_encode_coding_tree(encoder_state_t * const state, if (cur_cu->type == CU_INTER) { const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size]; - const int cu_width = LCU_WIDTH >> depth; for (int i = 0; i < num_pu; ++i) { const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i); @@ -1139,6 +1144,12 @@ void kvz_encode_coding_tree(encoder_state_t * const state, assert(0); exit(1); } + +end: + + if (is_last_cu_in_qg(state, x, y, depth)) { + state->last_qp = cur_cu->qp; + } } diff --git a/src/encoder.c b/src/encoder.c index 2abf057c..5830285c 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -347,12 +347,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg) } - encoder->lcu_dqp_enabled = cfg->target_bitrate > 0 || encoder->cfg.roi.dqps; - // NOTE: When tr_depth_inter is equal to 0, the transform is still split // for SMP and AMP partition units. encoder->tr_depth_inter = 0; + if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps) { + encoder->max_qp_delta_depth = 0; + } else { + encoder->max_qp_delta_depth = -1; + } + //Tiles encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 || encoder->cfg.tiles_height_count > 1; diff --git a/src/encoder.h b/src/encoder.h index 00d698d8..87896e29 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -118,7 +118,7 @@ typedef struct encoder_control_t //! Picture weights when GOP is used. double gop_layer_weights[MAX_GOP_LAYERS]; - bool lcu_dqp_enabled; + int8_t max_qp_delta_depth; int tr_depth_inter; diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index a9c086b5..9c90be5c 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -60,7 +60,7 @@ static void encoder_state_write_bitstream_PTL(bitstream_t *stream, // PTL // Profile Tier WRITE_U(stream, 0, 2, "general_profile_space"); - WRITE_U(stream, 0, 1, "general_tier_flag"); + WRITE_U(stream, state->encoder_control->cfg.high_tier, 1, "general_tier_flag"); // Main Profile == 1, Main 10 profile == 2 WRITE_U(stream, (state->encoder_control->bitdepth == 8)?1:2, 5, "general_profile_idc"); /* Compatibility flags should be set at general_profile_idc @@ -455,12 +455,12 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream, WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); WRITE_U(stream, encoder->cfg.trskip_enable, 1, "transform_skip_enabled_flag"); - if (encoder->lcu_dqp_enabled) { + if (encoder->max_qp_delta_depth >= 0) { // Use separate QP for each LCU when rate control is enabled. WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag"); - WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); + WRITE_UE(stream, encoder->max_qp_delta_depth, "diff_cu_qp_delta_depth"); } else { - WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); + WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag"); } //TODO: add QP offsets diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index 4dcafb6f..93569c1a 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -312,6 +312,7 @@ int kvz_encoder_state_init(encoder_state_t * const child_state, encoder_state_t child_state->children = MALLOC(encoder_state_t, 1); child_state->children[0].encoder_control = NULL; child_state->crypto_hdl = NULL; + child_state->must_code_qp_delta = false; child_state->tqj_bitstream_written = NULL; child_state->tqj_recon_done = NULL; diff --git a/src/encoderstate.c b/src/encoderstate.c index 0a4a18bc..25acb88a 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -37,9 +37,6 @@ #include "tables.h" #include "threadqueue.h" -#define SAO_BUF_WIDTH (LCU_WIDTH + SAO_DELAY_PX + 2) -#define SAO_BUF_WIDTH_C (SAO_BUF_WIDTH / 2) - int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) { int i; @@ -250,10 +247,18 @@ static void encoder_sao_reconstruct(const encoder_state_t *const state, { videoframe_t *const frame = state->tile->frame; - // Temporary buffers for SAO input pixels. - kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH]; - kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C]; - kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C]; + + // Temporary buffers for SAO input pixels. The buffers cover the pixels + // inside the LCU (LCU_WIDTH x LCU_WIDTH), SAO_DELAY_PX wide bands to the + // left and above the LCU, and one pixel border on the left and top + // sides. We add two extra pixels to the buffers because the AVX2 SAO + // reconstruction reads up to two extra bytes when using edge SAO in the + // horizontal direction. +#define SAO_BUF_WIDTH (1 + SAO_DELAY_PX + LCU_WIDTH) +#define SAO_BUF_WIDTH_C (1 + SAO_DELAY_PX/2 + LCU_WIDTH_C) + kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH + 2]; + kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2]; + kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2]; // Pointers to the top-left pixel of the LCU in the buffers. kvz_pixel *const sao_buf_y = &sao_buf_y_array[(SAO_DELAY_PX + 1) * (SAO_BUF_WIDTH + 1)]; @@ -526,68 +531,81 @@ static void encode_sao(encoder_state_t * const state, /** * \brief Sets the QP for each CU in state->tile->frame->cu_array. * - * The QPs are used in deblocking. + * The QPs are used in deblocking and QP prediction. * - * The delta QP for an LCU is coded when the first CU with coded block flag - * set is encountered. Hence, for the purposes of deblocking, all CUs - * before the first one with cbf set use state->ref_qp and all CUs after - * that use state->qp. + * The QP delta for a quantization group is coded when the first CU with + * coded block flag set is encountered. Hence, for the purposes of + * deblocking and QP prediction, all CUs in before the first one that has + * cbf set use the QP predictor and all CUs after that use (QP predictor + * + QP delta). * * \param state encoder state * \param x x-coordinate of the left edge of the root CU * \param y y-coordinate of the top edge of the root CU * \param depth depth in the CU quadtree - * \param coeffs_coded Used for tracking whether a CU with a residual - * has been encountered. Should be set to false at - * the top level. - * \return Whether there were any CUs with residual or not. + * \param last_qp QP of the last CU in the last quantization group + * \param prev_qp -1 if QP delta has not been coded in current QG, + * otherwise the QP of the current QG */ -static bool set_cu_qps(encoder_state_t *state, int x, int y, int depth, bool coeffs_coded) +static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp) { - if (state->qp == state->ref_qp) { - // If the QPs are equal there is no need to care about the residuals. - coeffs_coded = true; - } + + // Stop recursion if the CU is completely outside the frame. + if (x >= state->tile->frame->width || y >= state->tile->frame->height) return; cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y); const int cu_width = LCU_WIDTH >> depth; - coeffs_coded = coeffs_coded || cbf_is_set_any(cu->cbf, cu->depth); - if (!coeffs_coded && cu->depth > depth) { + if (depth <= state->encoder_control->max_qp_delta_depth) { + *prev_qp = -1; + } + + if (cu->depth > depth) { // Recursively process sub-CUs. const int d = cu_width >> 1; - coeffs_coded = set_cu_qps(state, x, y, depth + 1, coeffs_coded); - coeffs_coded = set_cu_qps(state, x + d, y, depth + 1, coeffs_coded); - coeffs_coded = set_cu_qps(state, x, y + d, depth + 1, coeffs_coded); - coeffs_coded = set_cu_qps(state, x + d, y + d, depth + 1, coeffs_coded); + set_cu_qps(state, x, y, depth + 1, last_qp, prev_qp); + set_cu_qps(state, x + d, y, depth + 1, last_qp, prev_qp); + set_cu_qps(state, x, y + d, depth + 1, last_qp, prev_qp); + set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp); } else { - if (!coeffs_coded && cu->tr_depth > depth) { + bool cbf_found = *prev_qp >= 0; + + if (cu->tr_depth > depth) { // The CU is split into smaller transform units. Check whether coded // block flag is set for any of the TUs. const int tu_width = LCU_WIDTH >> cu->tr_depth; - for (int y_scu = y; y_scu < y + cu_width; y_scu += tu_width) { - for (int x_scu = x; x_scu < x + cu_width; x_scu += tu_width) { + for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) { + for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) { cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu); if (cbf_is_set_any(tu->cbf, cu->depth)) { - coeffs_coded = true; + cbf_found = true; } } } + } else if (cbf_is_set_any(cu->cbf, cu->depth)) { + cbf_found = true; + } + + int8_t qp; + if (cbf_found) { + *prev_qp = qp = cu->qp; + } else { + qp = kvz_get_cu_ref_qp(state, x, y, *last_qp); } // Set the correct QP for all state->tile->frame->cu_array elements in // the area covered by the CU. - const int8_t qp = coeffs_coded ? state->qp : state->ref_qp; - for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) { for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) { kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp; } } - } - return coeffs_coded; + if (is_last_cu_in_qg(state, x, y, depth)) { + *last_qp = cu->qp; + } + } } @@ -608,11 +626,13 @@ static void encoder_state_worker_encode_lcu(void * opaque) encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search); - if (encoder->cfg.deblock_enable) { - if (encoder->lcu_dqp_enabled) { - set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, false); - } + if (encoder->max_qp_delta_depth >= 0) { + int last_qp = state->last_qp; + int prev_qp = -1; + set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp); + } + if (encoder->cfg.deblock_enable) { kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y); } @@ -635,9 +655,6 @@ static void encoder_state_worker_encode_lcu(void * opaque) encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]); } - // QP delta is not used when rate control is turned off. - state->must_code_qp_delta = encoder->lcu_dqp_enabled; - //Encode coding tree kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0); @@ -709,7 +726,7 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) const encoder_control_t *ctrl = state->encoder_control; const kvz_config *cfg = &ctrl->cfg; - state->ref_qp = state->frame->QP; + state->last_qp = state->frame->QP; if (cfg->crypto_features) { state->crypto_hdl = kvz_crypto_create(cfg); @@ -1362,3 +1379,27 @@ lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y) state->encoder_control->in.width_in_lcu; return &state->frame->lcu_stats[index]; } + +int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp) +{ + const encoder_control_t *ctrl = state->encoder_control; + const cu_array_t *cua = state->tile->frame->cu_array; + // Quantization group width + const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth); + + // Coordinates of the top-left corner of the quantization group + const int x_qg = x & ~(qg_width - 1); + const int y_qg = y & ~(qg_width - 1); + + int qp_pred_a = last_qp; + if (x_qg % LCU_WIDTH > 0) { + qp_pred_a = kvz_cu_array_at_const(cua, x_qg - 1, y_qg)->qp; + } + + int qp_pred_b = last_qp; + if (y_qg % LCU_WIDTH > 0) { + qp_pred_b = kvz_cu_array_at_const(cua, x_qg, y_qg - 1)->qp; + } + + return ((qp_pred_a + qp_pred_b + 1) >> 1); +} diff --git a/src/encoderstate.h b/src/encoderstate.h index bdfb490f..6ea5e28e 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -268,10 +268,17 @@ typedef struct encoder_state_t { bool must_code_qp_delta; /** - * \brief Reference for computing QP delta for the next LCU that is coded - * next. Updated whenever a QP delta is coded. + * \brief QP value of the last CU in the last coded quantization group. + * + * A quantization group is a square of width + * (LCU_WIDTH >> encoder_control->max_qp_delta_depth). All CUs of in the + * same quantization group share the QP predictor value, but may have + * different QP values. + * + * Set to the frame QP at the beginning of a wavefront row or a tile and + * updated when the last CU of a quantization group is coded. */ - int8_t ref_qp; + int8_t last_qp; /** * \brief Coeffs for the LCU. @@ -297,6 +304,8 @@ void kvz_encoder_create_ref_lists(const encoder_state_t *const state); lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y); +int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp); + /** * Whether the parameter sets should be written with the current frame. */ @@ -309,6 +318,30 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state) (vps_period >= 0 && frame == 0); } + +/** + * \brief Returns true if the CU is the last CU in its containing + * quantization group. + * + * \param state encoder state + * \param x x-coordinate of the left edge of the CU + * \param y y-cooradinate of the top edge of the CU + * \param depth depth in the CU tree + * \return true, if it's the last CU in its QG, otherwise false + */ +static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth) +{ + if (state->encoder_control->max_qp_delta_depth < 0) return false; + + const int cu_width = LCU_WIDTH >> depth; + const int qg_width = LCU_WIDTH >> state->encoder_control->max_qp_delta_depth; + const int right = x + cu_width; + const int bottom = y + cu_width; + return (right % qg_width == 0 || right >= state->tile->frame->width) && + (bottom % qg_width == 0 || bottom >= state->tile->frame->height); +} + + static const uint8_t g_group_idx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, diff --git a/src/filter.c b/src/filter.c index 233e6f00..6926147d 100644 --- a/src/filter.c +++ b/src/filter.c @@ -262,7 +262,7 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir) static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir) { - if (!state->encoder_control->lcu_dqp_enabled) { + if (state->encoder_control->max_qp_delta_depth < 0) { return state->qp; } diff --git a/src/image.c b/src/image.c index b95b579e..a0df339f 100644 --- a/src/image.c +++ b/src/image.c @@ -128,10 +128,10 @@ void kvz_image_free(kvz_picture *const im) */ kvz_picture *kvz_image_copy_ref(kvz_picture *im) { - // The caller should have had another reference. - assert(im->refcount > 0); - KVZ_ATOMIC_INC(&(im->refcount)); - + int32_t new_refcount = KVZ_ATOMIC_INC(&im->refcount); + // The caller should have had another reference and we added one + // reference so refcount should be at least 2. + assert(new_refcount >= 2); return im; } diff --git a/src/kvazaar.c b/src/kvazaar.c index 2e14ec0a..6c4369f2 100644 --- a/src/kvazaar.c +++ b/src/kvazaar.c @@ -142,8 +142,8 @@ static void set_frame_info(kvz_frame_info *const info, const encoder_state_t *co info->nal_unit_type = state->frame->pictype; info->slice_type = state->frame->slicetype; - memset(info->ref_list[0], 0, 16); - memset(info->ref_list[1], 0, 16); + memset(info->ref_list[0], 0, 16 * sizeof(int)); + memset(info->ref_list[1], 0, 16 * sizeof(int)); for (size_t i = 0; i < state->frame->ref_LX_size[0]; i++) { info->ref_list[0][i] = state->frame->ref->pocs[state->frame->ref_LX[0][i]]; diff --git a/src/kvazaar.h b/src/kvazaar.h index b7f5e445..acc0a37d 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -323,6 +323,7 @@ typedef struct kvz_config uint8_t *optional_key; enum kvz_me_early_termination me_early_termination; /*!< \since 3.8.0 \brief Mode of me early termination. */ + int32_t intra_rdo_et; /*!< \since 4.1.0 \brief Use early termination in intra rdo. */ int32_t lossless; /*!< \brief Use lossless coding. */ diff --git a/src/search.c b/src/search.c index c02aeebe..12c76237 100644 --- a/src/search.c +++ b/src/search.c @@ -138,6 +138,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in to->type = cu->type; to->depth = cu->depth; to->part_size = cu->part_size; + to->qp = cu->qp; if (cu->type == CU_INTRA) { to->intra.mode = cu->intra.mode; @@ -413,6 +414,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, cur_cu->tr_depth = depth > 0 ? depth : 1; cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N; + cur_cu->qp = state->qp; // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. diff --git a/src/search_inter.c b/src/search_inter.c index d493a43d..5f1ff512 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1747,6 +1747,7 @@ void kvz_search_cu_smp(encoder_state_t * const state, cur_pu->type = CU_INTER; cur_pu->part_size = part_mode; cur_pu->depth = depth; + cur_pu->qp = state->qp; double cost = MAX_INT; uint32_t bitcost = MAX_INT; diff --git a/src/search_intra.c b/src/search_intra.c index f3d01454..9c31d95c 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -621,8 +621,17 @@ static int8_t search_intra_rdo(encoder_state_t * const state, double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu); costs[rdo_mode] += mode_cost; + + // Early termination if no coefficients has to be coded + if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(pred_cu.cbf, depth)) { + modes_to_check = rdo_mode + 1; + break; + } } + // Update order according to new costs + sort_modes(modes, costs, modes_to_check); + // The best transform split hierarchy is not saved anywhere, so to get the // transform split hierarchy the search has to be performed again with the // best mode. diff --git a/src/strategyselector.c b/src/strategyselector.c index e3a49e5f..d531dc89 100644 --- a/src/strategyselector.c +++ b/src/strategyselector.c @@ -26,9 +26,6 @@ #ifdef _WIN32 #include -#elif MACOS -#include -#include #else #include #endif @@ -372,6 +369,7 @@ static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info #endif // COMPILE_INTEL #if COMPILE_POWERPC +# if defined(__linux__) #include #include #include @@ -406,6 +404,49 @@ out_close: close(fd); return result; } +# elif defined(__FreeBSD__) +#include +#include +#include + +static int altivec_available(void) +{ + u_long cpu_features = 0; + size_t len = sizeof(cpu_features); + + sysctlbyname("hw.cpu_features", &cpu_features, &len, NULL, 0); + return !!(cpu_features & PPC_FEATURE_HAS_ALTIVEC); +} +# elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__) +#include +#include +#ifndef __APPLE__ +#include +#endif + +static int altivec_available(void) +{ + int cpu_altivec = 0; + size_t len = sizeof(cpu_altivec); +#ifdef HW_VECTORUNIT + int mib[] = { CTL_HW, HW_VECTORUNIT }; +#else + int mib[] = { CTL_MACHDEP, CPU_ALTIVEC }; +#endif + + sysctl(mib, sizeof(mib)/sizeof(mib[0]), &cpu_altivec, &len, NULL, 0); + return cpu_altivec; +} +# else +static int altivec_available(void) +{ +#if COMPILE_POWERPC_ALTIVEC + return 1; +#else + return 0; +#endif +} +# endif #endif //COMPILE_POWERPC static void set_hardware_flags(int32_t cpuid) { diff --git a/src/threadqueue.c b/src/threadqueue.c index 152ee0c3..ce402ee8 100644 --- a/src/threadqueue.c +++ b/src/threadqueue.c @@ -500,9 +500,10 @@ int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *depen */ threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job) { - // The caller should have had another reference. - assert(job->refcount > 0); - KVZ_ATOMIC_INC(&job->refcount); + int32_t new_refcount = KVZ_ATOMIC_INC(&job->refcount); + // The caller should have had another reference and we added one + // reference so refcount should be at least 2. + assert(new_refcount >= 2); return job; } diff --git a/tests/dct_tests.c b/tests/dct_tests.c index 0ce2cb5d..d27085dc 100644 --- a/tests/dct_tests.c +++ b/tests/dct_tests.c @@ -186,7 +186,7 @@ SUITE(dct_tests) // Loop through all strategies picking out the intra sad ones and run // select strategies though all tests - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { const strategy_t * strategy = &strategies.strategies[i]; // Select buffer width according to function name for dct function. diff --git a/tests/sad_tests.c b/tests/sad_tests.c index 031ece47..1ee2ab36 100644 --- a/tests/sad_tests.c +++ b/tests/sad_tests.c @@ -329,7 +329,7 @@ SUITE(sad_tests) setup_tests(); - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { if (strcmp(strategies.strategies[i].type, "reg_sad") != 0) { continue; } diff --git a/tests/speed_tests.c b/tests/speed_tests.c index 2a0d245c..e9b3d5ef 100644 --- a/tests/speed_tests.c +++ b/tests/speed_tests.c @@ -355,7 +355,7 @@ SUITE(speed_tests) // Loop through all strategies picking out the intra sad ones and run // selectec strategies though all tests - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { const strategy_t * strategy = &strategies.strategies[i]; // Select buffer width according to function name. diff --git a/tests/test_gop.sh b/tests/test_gop.sh index c8905fe9..b5c6059a 100755 --- a/tests/test_gop.sh +++ b/tests/test_gop.sh @@ -9,4 +9,5 @@ common_args='-p0 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-s valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=1 valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=4 valgrind_test 264x130 20 $common_args --gop=8 -p16 --owf=0 +valgrind_test 264x130 10 $common_args --gop=8 -p1 --owf=4 valgrind_test 264x130 10 $common_args --gop=lp-g4d3t1 -p5 --owf=4 diff --git a/tests/util.sh b/tests/util.sh index 89a1e0fc..bc12767d 100644 --- a/tests/util.sh +++ b/tests/util.sh @@ -34,9 +34,18 @@ valgrind_test() { prepare "${dimensions}" "${frames}" + # If $KVZ_TEST_VALGRIND is defined and equal to "1", run the test with + # valgrind. Otherwise, run without valgrind. + if [ "${KVZ_TEST_VALGRIND:-0}" = '1' ]; then + valgrind='valgrind --leak-check=full --error-exitcode=1 --' + else + valgrind='' + fi + + # No quotes for $valgrind because it expands to multiple (or zero) + # arguments. print_and_run \ - libtool execute \ - valgrind --leak-check=full --error-exitcode=1 -- \ + libtool execute $valgrind \ ../src/kvazaar -i "${yuvfile}" "--input-res=${dimensions}" -o "${hevcfile}" "$@" print_and_run \