Merge branch 'bipred_recon' of https://gitlab.tut.fi/TIE/ultravideo/kvazaar into HEAD

# Conflicts:
#	build/kvazaar_lib/kvazaar_lib.vcxproj
This commit is contained in:
Reima Hyvönen 2018-07-10 16:21:04 +03:00
commit 9a339eef89
31 changed files with 925 additions and 580 deletions

47
.gitlab-ci.yml Normal file
View file

@ -0,0 +1,47 @@
# Use Kvazaar CI base image which includes the build tools and ffmpeg + hmdec in ${HOME}/bin
image: ultravideo/kvazaar_ci_base:latest
# Build and test kvazaar
test-kvazaar: &test-template
stage: test
script:
- export PATH="${HOME}/bin:${PATH}"
- ./autogen.sh
- ./configure --enable-werror || (cat config.log && false)
- make --jobs=8
- make check --jobs=8 VERBOSE=1
artifacts:
paths:
- src/kvazaar
- src/.libs
expire_in: 1 week
test-asan:
<<: *test-template
variables:
CFLAGS: '-fsanitize=address'
# LeakSanitizer doesn't work inside the container because it requires
# ptrace so we disable it.
ASAN_OPTIONS: 'detect_leaks=0'
# AddressSanitizer adds some extra symbols so we expect a failure from
# the external symbols test.
XFAIL_TESTS: test_external_symbols.sh
test-tsan:
<<: *test-template
variables:
CFLAGS: '-fsanitize=thread'
test-ubsan:
<<: *test-template
variables:
CFLAGS: '-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=alignment'
test-valgrind:
<<: *test-template
variables:
KVAZAAR_OVERRIDE_angular_pred: generic
KVAZAAR_OVERRIDE_sao_band_ddistortion: generic
KVAZAAR_OVERRIDE_sao_edge_ddistortion: generic
KVAZAAR_OVERRIDE_calc_sao_edge_dir: generic
KVZ_TEST_VALGRIND: 1

View file

@ -19,7 +19,16 @@ matrix:
include:
- compiler: clang
env: KVZ_TEST_VALGRIND=1
- compiler: clang
env: CFLAGS='-fsanitize=thread'
- compiler: clang
env: CFLAGS='-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=alignment'
- compiler: gcc-4.8
env: CFLAGS='-fsanitize=address'
# We have some Mac specific code and Mac sometimes has odd build issues.
- os: osx
@ -27,14 +36,15 @@ matrix:
install: true
script:
- ./autogen.sh
- ./configure --enable-werror
- ./configure --enable-werror || (cat config.log && false)
- make --jobs=2 V=1
- make check TESTS=kvazaar_tests
install: bash .travis-install.bash
script:
- ./autogen.sh
- ./configure --enable-werror
- ./configure --enable-werror || (cat config.log && false)
- make --jobs=2 V=1
- make check VERBOSE=1

294
README.md
View file

@ -53,14 +53,14 @@ Usage:
kvazaar -i <input> --input-res <width>x<height> -o <output>
Required:
-i, --input : Input file
-i, --input <filename> : Input file
--input-res <res> : Input resolution [auto]
auto: detect from file name
<int>x<int>: width times height
-o, --output : Output file
- auto: Detect from file name.
- <int>x<int>: width times height
-o, --output <filename> : Output file
Presets:
--preset=<preset> : Set options to a preset [medium]
--preset <preset> : Set options to a preset [medium]
- ultrafast, superfast, veryfast, faster,
fast, medium, slow, slower, veryslow
placebo
@ -68,160 +68,175 @@ Presets:
Input:
-n, --frames <integer> : Number of frames to code [all]
--seek <integer> : First frame to code [0]
--input-fps <num>[/<denom>] : Framerate of the input video [25.0]
--source-scan-type <string> : Set source scan type [progressive].
- progressive: progressive scan
- tff: top field first
- bff: bottom field first
--input-format : P420 or P400
--input-bitdepth : 8-16
--loop-input : Re-read input file forever
--input-fps <num>[/<denom>] : Frame rate of the input video [25]
--source-scan-type <string> : Source scan type [progressive]
- progressive: Progressive scan
- tff: Top field first
- bff: Bottom field first
--input-format <string> : P420 or P400 [P420]
--input-bitdepth <int> : 8-16 [8]
--loop-input : Re-read input file forever.
Options:
--help : Print this help message and exit
--version : Print version information and exit
--aud : Use access unit delimiters
--debug <string> : Output encoders reconstruction.
--cpuid <integer> : Disable runtime cpu optimizations with value 0.
--hash : Decoded picture hash [checksum]
--help : Print this help message and exit.
--version : Print version information and exit.
--(no-)aud : Use access unit delimiters. [disabled]
--debug <filename> : Output internal reconstruction.
--(no-)cpuid : Enable runtime CPU optimizations. [enabled]
--hash <string> : Decoded picture hash [checksum]
- none: 0 bytes
- checksum: 18 bytes
- md5: 56 bytes
--no-psnr : Don't calculate PSNR for frames
--no-info : Don't add encoder info SEI.
--(no-)psnr : Calculate PSNR for frames. [enabled]
--(no-)info : Add encoder info SEI. [enabled]
--crypto <string> : Selective encryption. Crypto support must be
enabled at compile-time. Can be 'on' or 'off' or
a list of features separated with a '+'. [off]
- on: Enable all encryption features.
- off: Disable selective encryption.
- mvs: Motion vector magnitudes.
- mv_signs: Motion vector signs.
- trans_coeffs: Coefficient magnitudes.
- trans_coeff_signs: Coefficient signs.
- intra_pred_modes: Intra prediction modes.
--key <string> : Encryption key [16,213,27,56,255,127,242,112,
97,126,197,204,25,59,38,30]
Video structure:
-q, --qp <integer> : Quantization Parameter [32]
-p, --period <integer> : Period of intra pictures [0]
- 0: only first picture is intra
- 1: all pictures are intra
- 2-N: every Nth picture is intra
--vps-period <integer> : Specify how often the video parameter set is
re-sent. [0]
- 0: only send VPS with the first frame
- N: send VPS with every Nth intra frame
-r, --ref <integer> : Reference frames, range 1..15 [3]
--gop <string> : Definition of GOP structure [0]
- 0: disabled
-q, --qp <integer> : Quantization parameter [22]
-p, --period <integer> : Period of intra pictures [64]
- 0: Only first picture is intra.
- 1: All pictures are intra.
- N: Every Nth picture is intra.
--vps-period <integer> : How often the video parameter set is re-sent [0]
- 0: Only send VPS with the first frame.
- N: Send VPS with every Nth intra frame.
-r, --ref <integer> : Number of reference frames, in range 1..15 [4]
--gop <string> : GOP structure [8]
- 0: Disabled
- 8: B-frame pyramid of length 8
- lp-<string>: lp-gop definition
(e.g. lp-g8d4t2, see README)
--cqmfile <string> : Custom Quantization Matrices from a file
--bitrate <integer> : Target bitrate. [0]
- 0: disable rate-control
- N: target N bits per second
--lossless : Use lossless coding
--mv-constraint : Constrain movement vectors
- none: no constraint
- frametile: constrain within the tile
- frametilemargin: constrain even more
--roi <string> : Use a delta QP map for region of interest
Read an array of delta QP values from
a file, where the first two values are the
width and height, followed by width*height
delta QP values in raster order.
The delta QP map can be any size or aspect
ratio, and will be mapped to LCU's.
--(no-)erp-aqp : Use adaptive QP for 360 video with
equirectangular projection
- lp-<string>: Low-delay P-frame GOP
(e.g. lp-g8d4t2, see README)
--cqmfile <filename> : Read custom quantization matrices from a file.
--bitrate <integer> : Target bitrate [0]
- 0: Disable rate control.
- N: Target N bits per second.
--(no-)lossless : Use lossless coding. [disabled]
--mv-constraint <string> : Constrain movement vectors. [none]
- none: No constraint
- frametile: Constrain within the tile.
- frametilemargin: Constrain even more.
--roi <filename> : Use a delta QP map for region of interest.
Reads an array of delta QP values from a text
file. The file format is: width and height of
the QP delta map followed by width*height delta
QP values in raster order. The map can be of any
size and will be scaled to the video size.
--(no-)erp-aqp : Use adaptive QP for 360 degree video with
equirectangular projection. [disabled]
--level <number> : Use the given HEVC level in the output and give
an error if the input doesn't fit to it's
limits [6.2]
Allowed values are 1, 2, 2.1, 3, 3.1, 4, 4.1
5, 5.1, 5.2, 6, 6.1 and 6.2. The dot is
optional.
--force-level <number> : Same as --level, except instead of errors you
get warnings
--high-tier : Used with --level. Tells the encoder to use
high-tier bitrate limits instead of the
main-tier limits during encoding.
an error if level limits are exceeded. [6.2]
- 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6,
6.1, 6.2
--force-level <number> : Same as --level but warnings instead of errors.
--high-tier : Used with --level. Use high tier bitrate limits
instead of the main tier limits during encoding.
High tier requires level 4 or higher.
Compression tools:
--deblock [<beta:tc>] : Deblocking
- beta: between -6 and 6
- tc: between -6 and 6
--(no-)sao : Sample Adaptive Offset
--(no-)rdoq : Rate-Distortion Optimized Quantization
--(no-)signhide : Sign Hiding
--(no-)smp : Symmetric Motion Partition
--(no-)amp : Asymmetric Motion Partition
--rd <integer> : Intra mode search complexity
- 0: skip intra if inter is good enough
- 1: rough intra mode search with SATD
- 2: refine intra mode search with SSE
--(no-)mv-rdo : Rate-Distortion Optimized motion vector costs
--(no-)full-intra-search
: Try all intra modes during rough search.
--(no-)transform-skip : Transform skip
--me <string> : Integer motion estimation
--(no-)deblock <beta:tc> : Deblocking filter. [0:0]
- beta: Between -6 and 6
- tc: Between -6 and 6
--sao <string> : Sample Adaptive Offset [full]
- off: SAO disabled
- band: Band offset only
- edge: Edge offset only
- full: Full SAO
--(no-)rdoq : Rate-distortion optimized quantization [enabled]
--(no-)rdoq-skip : Skip RDOQ for 4x4 blocks. [disabled]
--(no-)signhide : Sign hiding [disabled]
--(no-)smp : Symmetric motion partition [disabled]
--(no-)amp : Asymmetric motion partition [disabled]
--rd <integer> : Intra mode search complexity [0]
- 0: Skip intra if inter is good enough.
- 1: Rough intra mode search with SATD.
- 2: Refine intra mode search with SSE.
- 3: Try all intra modes and enable intra
chroma mode search.
--(no-)mv-rdo : Rate-distortion optimized motion vector costs
[disabled]
--(no-)full-intra-search : Try all intra modes during rough search.
[disabled]
--(no-)transform-skip : Try transform skip [disabled]
--me <string> : Integer motion estimation algorithm [hexbs]
- hexbs: Hexagon Based Search
- tz: Test Zone Search
- full: Full Search
- full8, full16, full32, full64
- dia: Diamond Search
--me-steps <integer> : How many search steps does the motion estimation
do before cutting off [-1]
Has effect only for 'hexbs' and 'dia'
--subme <integer> : Set fractional pixel motion estimation level
- 0: only integer motion estimation
--me-steps <integer> : Motion estimation search step limit. Only
affects 'hexbs' and 'dia'. [-1]
--subme <integer> : Fractional pixel motion estimation level [4]
- 0: Integer motion estimation only
- 1: + 1/2-pixel horizontal and vertical
- 2: + 1/2-pixel diagonal
- 3: + 1/4-pixel horizontal and vertical
- 4: + 1/4-pixel diagonal
--pu-depth-inter <int>-<int>
: Range for sizes for inter predictions
--pu-depth-inter <int>-<int> : Inter prediction units sizes [0-3]
- 0, 1, 2, 3: from 64x64 to 8x8
--pu-depth-intra <int>-<int> : Range for sizes for intra predictions
--pu-depth-intra <int>-<int> : Intra prediction units sizes [1-4]
- 0, 1, 2, 3, 4: from 64x64 to 4x4
--(no-)bipred : Bi-prediction
--(no-)cu-split-termination
: CU split search termination condition
- off: Never terminate cu-split search
- zero: Terminate with zero residual
--(no-)me-early-termination : ME early termination condition
- off: Don't terminate early
- on: Terminate early
- sensitive: Terminate even earlier
--(no-)implicit-rdpcm : Implicit residual DPCM
Currently only supported with lossless coding.
--(no-)tmvp : Temporal Motion Vector Prediction
--(no-)rdoq-skip : Skips RDOQ for 4x4 blocks
--tr-depth-intra <int> : Transform split depth for intra blocks [0]
--(no-)bipred : Bi-prediction [disabled]
--cu-split-termination <string> : CU split search termination [zero]
- off: Don't terminate early.
- zero: Terminate when residual is zero.
--me-early-termination <string> : Motion estimation termination [on]
- off: Don't terminate early.
- on: Terminate early.
- sensitive: Terminate even earlier.
--(no-)intra-rdo-et : Check intra modes in rdo stage only until
a zero coefficient CU is found. [disabled]
--(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported
with lossless coding. [disabled]
--(no-)tmvp : Temporal motion vector prediction [enabled]
Parallel processing:
--threads <integer> : Number of threads to use [auto]
- 0: process everything with main thread
- N: use N threads for encoding
- auto: select based on number of cores
--owf <integer> : Frame parallelism [auto]
- N: Process N-1 frames at a time
- auto: Select automatically
--(no-)wpp : Wavefront parallel processing [enabled]
- 0: Process everything with main thread.
- N: Use N threads for encoding.
- auto: Select automatically.
--owf <integer> : Frame-level parallelism [auto]
- N: Process N+1 frames at a time.
- auto: Select automatically.
--(no-)wpp : Wavefront parallel processing. [enabled]
Enabling tiles automatically disables WPP.
To enable WPP with tiles, re-enable it after
enabling tiles.
enabling tiles. Enabling wpp with tiles is,
however, an experimental feature since it is
not supported in any HEVC profile.
--tiles <int>x<int> : Split picture into width x height uniform tiles.
--tiles-width-split <string>|u<int> :
Specifies a comma separated list of pixel
positions of tiles columns separation
coordinates.
Can also be u, followed by a single int n, in
which case it produces columns of uniform width.
- <string>: A comma-separated list of tile
column pixel coordinates.
- u<int>: Number of tile columns of uniform
width.
--tiles-height-split <string>|u<int> :
Specifies a comma separated list of pixel
positions of tiles rows separation coordinates.
Can also be u followed by and a single int n, in
which case it produces rows of uniform height.
--slices <string> : Control how slices are used
- tiles: put tiles in independent slices
- wpp: put rows in dependent slices
- tiles+wpp: do both
- <string>: A comma-separated list of tile row
column pixel coordinates.
- u<int>: Number of tile rows of uniform
height.
--slices <string> : Control how slices are used.
- tiles: Put tiles in independent slices.
- wpp: Put rows in dependent slices.
- tiles+wpp: Do both.
Video Usability Information:
--sar <width:height> : Specify Sample Aspect Ratio
--sar <width:height> : Specify sample aspect ratio
--overscan <string> : Specify crop overscan setting [undef]
- undef, show, crop
--videoformat <string> : Specify video format [undef]
- component, pal, ntsc, secam, mac, undef
- undef, component, pal, ntsc, secam, mac
--range <string> : Specify color range [tv]
- tv, pc
--colorprim <string> : Specify color primaries [undef]
@ -238,8 +253,8 @@ Video Usability Information:
--chromaloc <integer> : Specify chroma sample location (0 to 5) [0]
Deprecated parameters: (might be removed at some point)
-w, --width : Use --input-res
-h, --height : Use --input-res
-w, --width <integer> : Use --input-res.
-h, --height <integer> : Use --input-res.
```
[comment]: # (END KVAZAAR HELP MESSAGE)
@ -268,24 +283,27 @@ where the names have been abbreviated to fit the layout in GitHub.
| | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p |
| -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- |
| rd | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-4 | 1-4 |
| pu-depth-inter | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 |
| rd | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 2 |
| pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-4 | 1-4 | 1-4 | 1-4 | 1-4 |
| pu-depth-inter | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | 0-3 | 0-3 |
| me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz |
| ref | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 4 |
| gop | g4d4t1| g4d4t1| g4d4t1| g4d4t1| g4d4t1| 8 | 8 | 8 | 8 | 8 |
| ref | 1 | 1 | 1 | 1 | 2 | 4 | 4 | 4 | 4 | 4 |
| bipred | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 |
| deblock | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| signhide | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 |
| subme | 0 | 0 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 |
| sao | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| signhide | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 |
| subme | 2 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |
| sao | off | full | full | full | full | full | full | full | full | full |
| rdoq | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |
| rdoq-skip | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 |
| rdoq-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off |
| me-early-termination | sens. | sens. | sens. | sens. | on | on | on | on | on | off |
| me-early-termination | sens. | sens. | sens. | sens. | sens. | on | on | off | off | off |
| intra-rdo-et | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
## Kvazaar library
@ -352,11 +370,11 @@ For other examples, see [Dockerfile](./Dockerfile)
### Visualization (Windows only)
Branch `visualizer` has a visual studio project, which can be compiled to enable visualization feature in Kvazaar.
Compiling `kvazaar_cli` project in the `visualizer` branch results in a Kvazaar executable with visualization enabled.
Additional Requirements: [`SDL2`](https://www.libsdl.org/download-2.0.php), [`SDL2-ttf`](https://www.libsdl.org/projects/SDL_ttf/).
Directory `visualizer_extras` is expected to be found from the same directory level as the kvazaar project directory. Inside should be directories `include` and `lib` found from the development library zip packages.
Directory `visualizer_extras` has to be added into the same directory level as the kvazaar project directory. Inside should be directories `include` and `lib` found from the development library zip packages.
`SDL2.dll`, `SDL2_ttf.dll`, `libfreetype-6.dll`, `zlib1.dll`, and `pthreadVC2.dll` should be placed in the working directory (i.e. the folder the `kvazaar.exe` is in after compiling the `kvazaar_cli` project/solution) when running the visualizer. The required `.dll` can be found in the aforementioned `lib`-folder (`lib\x64`) and the dll folder inside the pthreads folder (see `Required libraries`).

View file

@ -84,7 +84,11 @@
</Lib>
<YASM>
<Defines>ARCH_X86_64=1;%(Defines)</Defines>
<<<<<<< HEAD
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
=======
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
</YASM>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)..\..\pthreads.2\include;$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories);$(SolutionDir)..\src\strategies;</AdditionalIncludeDirectories>
@ -93,7 +97,11 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<YASM>
<Defines>ARCH_X86_64=0;PREFIX</Defines>
<<<<<<< HEAD
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
=======
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
</YASM>
<Lib>
<AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x86</AdditionalLibraryDirectories>
@ -108,7 +116,11 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<YASM>
<Defines>ARCH_X86_64=0;PREFIX</Defines>
<<<<<<< HEAD
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
=======
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
</YASM>
<Lib>
<AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x86</AdditionalLibraryDirectories>
@ -123,7 +135,11 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<YASM>
<Defines>ARCH_X86_64=1;%(Defines)</Defines>
<<<<<<< HEAD
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
=======
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
</YASM>
<Lib>
<AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x64</AdditionalLibraryDirectories>

View file

@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
#
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
ver_major=4
ver_minor=0
ver_minor=1
ver_release=0
# Prevents configure from adding a lot of defines to the CFLAGS
@ -45,15 +45,17 @@ AC_SYS_LARGEFILE
LT_INIT([win32-dll])
AX_CHECK_COMPILE_FLAG([-maltivec],[flag_altivec="true"])
AX_CHECK_COMPILE_FLAG([-mavx2], [flag_avx2="true"])
AX_CHECK_COMPILE_FLAG([-msse4.1], [flag_sse4_1="true"])
AX_CHECK_COMPILE_FLAG([-msse2], [flag_sse2="true"])
AM_CONDITIONAL([HAVE_ALTIVEC], [test x"$flag_altivec" = x"true"])
AM_CONDITIONAL([HAVE_AVX2], [test x"$flag_avx2" = x"true"])
AM_CONDITIONAL([HAVE_SSE4_1], [test x"$flag_sse4_1" = x"true"])
AM_CONDITIONAL([HAVE_SSE2], [test x"$flag_sse2" = x"true"])
KVZ_CFLAGS="-Wall -Wtype-limits -Wvla -I$srcdir/src -I$srcdir/src/extras -ftree-vectorize -fvisibility=hidden"
KVZ_CFLAGS="-Wall -Wextra -Wvla -Wno-sign-compare -Wno-unused-parameter -I$srcdir/src -I$srcdir/src/extras -ftree-vectorize -fvisibility=hidden"
CFLAGS="$KVZ_CFLAGS $CFLAGS"
AC_SEARCH_LIBS([log], [m c], [], [exit 1])

View file

@ -1,24 +1,24 @@
.TH KVAZAAR "1" "November 2017" "kvazaar v1.2.0" "User Commands"
.TH KVAZAAR "1" "June 2018" "kvazaar v1.2.0" "User Commands"
.SH NAME
kvazaar \- open source HEVC encoder
.SH SYNOPSIS
\fBkvazaar \fR\-i <input> \-\-input\-res <width>x<height> \-o <output>
.SH DESCRIPTION
.TP
\fB\-i\fR, \fB\-\-input
\fB\-i\fR, \fB\-\-input <filename>
Input file
.TP
\fB\-\-input\-res <res>
Input resolution [auto]
auto: detect from file name
<int>x<int>: width times height
\- auto: Detect from file name.
\- <int>x<int>: width times height
.TP
\fB\-o\fR, \fB\-\-output
\fB\-o\fR, \fB\-\-output <filename>
Output file
.SS "Presets:"
.TP
\fB\-\-preset=<preset>
\fB\-\-preset <preset>
Set options to a preset [medium]
\- ultrafast, superfast, veryfast, faster,
fast, medium, slow, slower, veryslow
@ -32,241 +32,288 @@ Number of frames to code [all]
\fB\-\-seek <integer>
First frame to code [0]
.TP
\fB\-\-input\-fps <num>/<denom>
Framerate of the input video [25.0]
\fB\-\-input\-fps <num>[/<denom>]
Frame rate of the input video [25]
.TP
\fB\-\-source\-scan\-type <string>
Set source scan type [progressive].
\- progressive: progressive scan
\- tff: top field first
\- bff: bottom field first
Source scan type [progressive]
\- progressive: Progressive scan
\- tff: Top field first
\- bff: Bottom field first
.TP
\fB\-\-input\-format
P420 or P400
\fB\-\-input\-format <string>
P420 or P400 [P420]
.TP
\fB\-\-input\-bitdepth
8\-16
\fB\-\-input\-bitdepth <int>
8\-16 [8]
.TP
\fB\-\-loop\-input
Re\-read input file forever
Re\-read input file forever.
.SS "Options:"
.TP
\fB\-\-help
Print this help message and exit
Print this help message and exit.
.TP
\fB\-\-version
Print version information and exit
Print version information and exit.
.TP
\fB\-\-aud
Use access unit delimiters
\fB\-\-(no\-)aud
Use access unit delimiters. [disabled]
.TP
\fB\-\-debug <string>
Output encoders reconstruction.
\fB\-\-debug <filename>
Output internal reconstruction.
.TP
\fB\-\-cpuid <integer>
Disable runtime cpu optimizations with value 0.
\fB\-\-(no\-)cpuid
Enable runtime CPU optimizations. [enabled]
.TP
\fB\-\-hash
\fB\-\-hash <string>
Decoded picture hash [checksum]
\- none: 0 bytes
\- checksum: 18 bytes
\- md5: 56 bytes
.TP
\fB\-\-no\-psnr
Don't calculate PSNR for frames
\fB\-\-(no\-)psnr
Calculate PSNR for frames. [enabled]
.TP
\fB\-\-no\-info
Don't add encoder info SEI.
\fB\-\-(no\-)info
Add encoder info SEI. [enabled]
.TP
\fB\-\-crypto <string>
Selective encryption. Crypto support must be
enabled at compile\-time. Can be 'on' or 'off' or
a list of features separated with a '+'. [off]
\- on: Enable all encryption features.
\- off: Disable selective encryption.
\- mvs: Motion vector magnitudes.
\- mv_signs: Motion vector signs.
\- trans_coeffs: Coefficient magnitudes.
\- trans_coeff_signs: Coefficient signs.
\- intra_pred_modes: Intra prediction modes.
.TP
\fB\-\-key <string>
Encryption key [16,213,27,56,255,127,242,112,
97,126,197,204,25,59,38,30]
.SS "Video structure:"
.TP
\fB\-q\fR, \fB\-\-qp <integer>
Quantization Parameter [32]
Quantization parameter [22]
.TP
\fB\-p\fR, \fB\-\-period <integer>
Period of intra pictures [0]
\- 0: only first picture is intra
\- 1: all pictures are intra
\- 2\-N: every Nth picture is intra
Period of intra pictures [64]
\- 0: Only first picture is intra.
\- 1: All pictures are intra.
\- N: Every Nth picture is intra.
.TP
\fB\-\-vps\-period <integer>
Specify how often the video parameter set is
re\-sent. [0]
\- 0: only send VPS with the first frame
\- N: send VPS with every Nth intra frame
How often the video parameter set is re\-sent [0]
\- 0: Only send VPS with the first frame.
\- N: Send VPS with every Nth intra frame.
.TP
\fB\-r\fR, \fB\-\-ref <integer>
Reference frames, range 1..15 [3]
Number of reference frames, in range 1..15 [4]
.TP
\fB\-\-gop <string>
Definition of GOP structure [0]
\- 0: disabled
GOP structure [8]
\- 0: Disabled
\- 8: B\-frame pyramid of length 8
\- lp\-<string>: lp\-gop definition
(e.g. lp\-g8d4t2, see README)
\- lp\-<string>: Low\-delay P\-frame GOP
(e.g. lp\-g8d4t2, see README)
.TP
\fB\-\-cqmfile <string>
Custom Quantization Matrices from a file
\fB\-\-cqmfile <filename>
Read custom quantization matrices from a file.
.TP
\fB\-\-bitrate <integer>
Target bitrate. [0]
\- 0: disable rate\-control
\- N: target N bits per second
Target bitrate [0]
\- 0: Disable rate control.
\- N: Target N bits per second.
.TP
\fB\-\-lossless
Use lossless coding
\fB\-\-(no\-)lossless
Use lossless coding. [disabled]
.TP
\fB\-\-mv\-constraint
Constrain movement vectors
\- none: no constraint
\- frametile: constrain within the tile
\- frametilemargin: constrain even more
\fB\-\-mv\-constraint <string>
Constrain movement vectors. [none]
\- none: No constraint
\- frametile: Constrain within the tile.
\- frametilemargin: Constrain even more.
.TP
\fB\-\-roi <string>
Use a delta QP map for region of interest
Read an array of delta QP values from
a file, where the first two values are the
width and height, followed by width*height
delta QP values in raster order.
The delta QP map can be any size or aspect
ratio, and will be mapped to LCU's.
\fB\-\-roi <filename>
Use a delta QP map for region of interest.
Reads an array of delta QP values from a text
file. The file format is: width and height of
the QP delta map followed by width*height delta
QP values in raster order. The map can be of any
size and will be scaled to the video size.
.TP
\fB\-\-(no\-)erp\-aqp
Use adaptive QP for 360 video with
equirectangular projection
Use adaptive QP for 360 degree video with
equirectangular projection. [disabled]
.TP
\fB\-\-level <number>
Use the given HEVC level in the output and give
an error if level limits are exceeded. [6.2]
\- 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6,
6.1, 6.2
.TP
\fB\-\-force\-level <number>
Same as \-\-level but warnings instead of errors.
.TP
\fB\-\-high\-tier
Used with \-\-level. Use high tier bitrate limits
instead of the main tier limits during encoding.
High tier requires level 4 or higher.
.SS "Compression tools:"
.TP
\fB\-\-deblock [<beta:tc>]
Deblocking
\- beta: between \-6 and 6
\- tc: between \-6 and 6
\fB\-\-(no\-)deblock <beta:tc>
Deblocking filter. [0:0]
\- beta: Between \-6 and 6
\- tc: Between \-6 and 6
.TP
\fB\-\-(no\-)sao
Sample Adaptive Offset
\fB\-\-sao <string>
Sample Adaptive Offset [full]
\- off: SAO disabled
\- band: Band offset only
\- edge: Edge offset only
\- full: Full SAO
.TP
\fB\-\-(no\-)rdoq
Rate\-Distortion Optimized Quantization
Rate\-distortion optimized quantization [enabled]
.TP
\fB\-\-(no\-)rdoq\-skip
Skip RDOQ for 4x4 blocks. [disabled]
.TP
\fB\-\-(no\-)signhide
Sign Hiding
Sign hiding [disabled]
.TP
\fB\-\-(no\-)smp
Symmetric Motion Partition
Symmetric motion partition [disabled]
.TP
\fB\-\-(no\-)amp
Asymmetric Motion Partition
Asymmetric motion partition [disabled]
.TP
\fB\-\-rd <integer>
Intra mode search complexity
\- 0: skip intra if inter is good enough
\- 1: rough intra mode search with SATD
\- 2: refine intra mode search with SSE
Intra mode search complexity [0]
\- 0: Skip intra if inter is good enough.
\- 1: Rough intra mode search with SATD.
\- 2: Refine intra mode search with SSE.
\- 3: Try all intra modes and enable intra
chroma mode search.
.TP
\fB\-\-(no\-)mv\-rdo
Rate\-Distortion Optimized motion vector costs
Rate\-distortion optimized motion vector costs
[disabled]
.TP
\fB\-\-(no\-)full\-intra\-search
Try all intra modes during rough search.
[disabled]
.TP
\fB\-\-(no\-)transform\-skip
Transform skip
Try transform skip [disabled]
.TP
\fB\-\-me <string>
Integer motion estimation
Integer motion estimation algorithm [hexbs]
\- hexbs: Hexagon Based Search
\- tz: Test Zone Search
\- full: Full Search
\- full8, full16, full32, full64
\- dia: Diamond Search
.TP
\fB\-\-me\-steps <integer>
Motion estimation search step limit. Only
affects 'hexbs' and 'dia'. [\-1]
.TP
\fB\-\-subme <integer>
Set fractional pixel motion estimation level
\- 0: only integer motion estimation
Fractional pixel motion estimation level [4]
\- 0: Integer motion estimation only
\- 1: + 1/2\-pixel horizontal and vertical
\- 2: + 1/2\-pixel diagonal
\- 3: + 1/4\-pixel horizontal and vertical
\- 4: + 1/4\-pixel diagonal
.TP
\fB\-\-pu\-depth\-inter <int>\-<int>
Range for sizes for inter predictions
Inter prediction units sizes [0\-3]
\- 0, 1, 2, 3: from 64x64 to 8x8
.TP
\fB\-\-pu\-depth\-intra <int>\-<int>
Range for sizes for intra predictions
Intra prediction units sizes [1\-4]
\- 0, 1, 2, 3, 4: from 64x64 to 4x4
.TP
\fB\-\-tr\-depth\-intra <int>
Transform split depth for intra blocks [0]
.TP
\fB\-\-(no\-)bipred
Bi\-prediction
Bi\-prediction [disabled]
.TP
\fB\-\-(no\-)cu\-split\-termination
CU split search termination condition
\- off: Never terminate cu\-split search
\- zero: Terminate with zero residual
\fB\-\-cu\-split\-termination <string>
CU split search termination [zero]
\- off: Don't terminate early.
\- zero: Terminate when residual is zero.
.TP
\fB\-\-(no\-)me\-early\-termination
ME early termination condition
\- off: Don't terminate early
\- on: Terminate early
\- sensitive: Terminate even earlier
\fB\-\-me\-early\-termination <string>
Motion estimation termination [on]
\- off: Don't terminate early.
\- on: Terminate early.
\- sensitive: Terminate even earlier.
.TP
\fB\-\-(no\-)intra\-rdo\-et
Check intra modes in rdo stage only until
a zero coefficient CU is found. [disabled]
.TP
\fB\-\-(no\-)implicit\-rdpcm
Implicit residual DPCM
Currently only supported with lossless coding.
Implicit residual DPCM. Currently only supported
with lossless coding. [disabled]
.TP
\fB\-\-(no\-)tmvp
Temporal Motion Vector Prediction
.TP
\fB\-\-(no\-)rdoq\-skip
Skips RDOQ for 4x4 blocks
Temporal motion vector prediction [enabled]
.SS "Parallel processing:"
.TP
\fB\-\-threads <integer>
Number of threads to use [auto]
\- 0: process everything with main thread
\- N: use N threads for encoding
\- auto: select based on number of cores
\- 0: Process everything with main thread.
\- N: Use N threads for encoding.
\- auto: Select automatically.
.TP
\fB\-\-owf <integer>
Frame parallelism [auto]
\- N: Process N\-1 frames at a time
\- auto: Select automatically
Frame\-level parallelism [auto]
\- N: Process N+1 frames at a time.
\- auto: Select automatically.
.TP
\fB\-\-(no\-)wpp
Wavefront parallel processing [enabled]
Wavefront parallel processing. [enabled]
Enabling tiles automatically disables WPP.
To enable WPP with tiles, re\-enable it after
enabling tiles.
enabling tiles. Enabling wpp with tiles is,
however, an experimental feature since it is
not supported in any HEVC profile.
.TP
\fB\-\-tiles <int>x<int>
Split picture into width x height uniform tiles.
.TP
\fB\-\-tiles\-width\-split <string>|u<int>
Specifies a comma separated list of pixel
positions of tiles columns separation coordinates.
Can also be u followed by and a single int n,
in which case it produces columns of uniform width.
\- <string>: A comma\-separated list of tile
column pixel coordinates.
\- u<int>: Number of tile columns of uniform
width.
.TP
\fB\-\-tiles\-height\-split <string>|u<int>
Specifies a comma separated list of pixel
positions of tiles rows separation coordinates.
Can also be u followed by and a single int n,
in which case it produces rows of uniform height.
\- <string>: A comma\-separated list of tile row
column pixel coordinates.
\- u<int>: Number of tile rows of uniform
height.
.TP
\fB\-\-slices <string>
Control how slices are used
\- tiles: put tiles in independent slices
\- wpp: put rows in dependent slices
\- tiles+wpp: do both
Control how slices are used.
\- tiles: Put tiles in independent slices.
\- wpp: Put rows in dependent slices.
\- tiles+wpp: Do both.
.SS "Video Usability Information:"
.TP
\fB\-\-sar <width:height>
Specify Sample Aspect Ratio
Specify sample aspect ratio
.TP
\fB\-\-overscan <string>
Specify crop overscan setting [undef]
@ -274,7 +321,7 @@ Specify crop overscan setting [undef]
.TP
\fB\-\-videoformat <string>
Specify video format [undef]
\- component, pal, ntsc, secam, mac, undef
\- undef, component, pal, ntsc, secam, mac
.TP
\fB\-\-range <string>
Specify color range [tv]

View file

@ -197,9 +197,13 @@ libsse41_la_SOURCES = \
strategies/sse41/picture-sse41.h
if HAVE_PPC
if HAVE_ALTIVEC
libaltivec_la_CFLAGS = -maltivec
endif
endif #HAVE_PPC
if HAVE_X86
if HAVE_AVX2

206
src/cfg.c
View file

@ -86,13 +86,13 @@ int kvz_config_init(kvz_config *cfg)
cfg->tiles_height_count = 1;
cfg->tiles_width_split = NULL;
cfg->tiles_height_split = NULL;
cfg->wpp = 1;
cfg->owf = -1;
cfg->slice_count = 1;
cfg->slice_addresses_in_ts = MALLOC(int32_t, 1);
cfg->slice_addresses_in_ts[0] = 0;
cfg->threads = -1;
cfg->cpuid = 1;
@ -109,6 +109,7 @@ int kvz_config_init(kvz_config *cfg)
cfg->crypto_features = KVZ_CRYPTO_OFF;
cfg->me_early_termination = 1;
cfg->intra_rdo_et = 0;
cfg->input_format = KVZ_FORMAT_P420;
cfg->input_bitdepth = 8;
@ -185,14 +186,14 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil
const char* current_arg = NULL;
int32_t current_value;
int32_t values[MAX_TILES_PER_DIM];
int i;
//Free pointer in any case
if (*array) {
FREE_POINTER(*array);
}
//If the arg starts with u, we want an uniform split
if (arg[0]=='u') {
*ntiles = atoi(arg + 1);
@ -203,7 +204,7 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil
//Done with parsing
return 1;
}
//We have a comma-separated list of int for the split...
current_arg = arg;
*ntiles = 1;
@ -220,27 +221,27 @@ static int parse_tiles_specification(const char* const arg, int32_t * const ntil
++(*ntiles);
if (MAX_TILES_PER_DIM <= *ntiles) break;
} while (current_arg);
if (MAX_TILES_PER_DIM <= *ntiles || 1 >= *ntiles) {
fprintf(stderr, "Invalid number of tiles (1 <= %d <= %d = MAX_TILES_PER_DIM)!\n", *ntiles, MAX_TILES_PER_DIM);
return 0;
}
*array = MALLOC(int32_t, *ntiles - 1);
if (!*array) {
fprintf(stderr, "Could not allocate array for tiles\n");
return 0;
}
//TODO: memcpy?
for (i = 0; i < *ntiles - 1; ++i) {
(*array)[i] = values[i];
}
return 1;
}
static int parse_uint8(const char *numstr,uint8_t* number,int min, int max)
static int parse_uint8(const char *numstr,uint8_t* number,int min, int max)
{
char *tail;
int d = strtol(numstr, &tail, 10);
@ -292,14 +293,14 @@ static int parse_slice_specification(const char* const arg, int32_t * const nsli
const char* current_arg = NULL;
int32_t current_value;
int32_t values[MAX_SLICES];
int i;
//Free pointer in any case
if (*array) {
FREE_POINTER(*array);
}
//If the arg starts with u, we want an uniform split
if (arg[0]=='u') {
*nslices = atoi(arg+1);
@ -310,7 +311,7 @@ static int parse_slice_specification(const char* const arg, int32_t * const nsli
//Done with parsing
return 1;
}
//We have a comma-separated list of int for the split...
current_arg = arg;
//We always have a slice starting at 0
@ -329,23 +330,23 @@ static int parse_slice_specification(const char* const arg, int32_t * const nsli
++(*nslices);
if (MAX_SLICES <= *nslices) break;
} while (current_arg);
if (MAX_SLICES <= *nslices || 0 >= *nslices) {
fprintf(stderr, "Invalid number of slices (0 < %d <= %d = MAX_SLICES)!\n", *nslices, MAX_SLICES);
return 0;
}
*array = MALLOC(int32_t, *nslices);
if (!*array) {
fprintf(stderr, "Could not allocate array for slices\n");
return 0;
}
//TODO: memcpy?
for (i = 0; i < *nslices; ++i) {
(*array)[i] = values[i];
}
return 1;
}
@ -375,221 +376,241 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
static const char * const sao_names[] = { "off", "edge", "band", "full", NULL };
static const char * const preset_values[11][20*2] = {
{
"ultrafast",
static const char * const preset_values[11][22*2] = {
{
"ultrafast",
"rd", "0",
"pu-depth-intra", "2-3",
"pu-depth-inter", "2-3",
"rd", "0",
"me", "hexbs",
"gop", "lp-g4d4t1",
"ref", "1",
"bipred", "0",
"deblock", "0:0",
"signhide", "0",
"subme", "0",
"subme", "2",
"sao", "off",
"rdoq", "0",
"rdoq-skip", "1",
"transform-skip", "0",
"full-intra-search", "0",
"rdoq-skip", "0",
"transform-skip", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "sensitive",
"gop", "lp-g4d3t1",
NULL
"intra-rdo-et", "0",
NULL
},
{
{
"superfast",
"rd", "0",
"pu-depth-intra", "2-3",
"pu-depth-inter", "2-3",
"rd", "0",
"me", "hexbs",
"gop", "lp-g4d4t1",
"ref", "1",
"bipred", "0",
"deblock", "0:0",
"signhide", "0",
"subme", "0",
"subme", "2",
"sao", "full",
"rdoq", "0",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "sensitive",
"gop", "lp-g4d3t1",
"intra-rdo-et", "0",
NULL
},
{
"veryfast",
"pu-depth-intra", "2-3",
"pu-depth-inter", "2-3",
"rd", "0",
"pu-depth-intra", "2-3",
"pu-depth-inter", "1-3",
"me", "hexbs",
"gop", "lp-g4d4t1",
"ref", "1",
"bipred", "0",
"deblock", "0:0",
"signhide", "0",
"subme", "2",
"sao", "full",
"rdoq", "0",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "sensitive",
"gop", "lp-g4d3t1",
"intra-rdo-et", "0",
NULL
},
{
"faster",
"rd", "0",
"pu-depth-intra", "2-3",
"pu-depth-inter", "1-3",
"rd", "1",
"me", "hexbs",
"gop", "lp-g4d4t1",
"ref", "1",
"bipred", "0",
"deblock", "0:0",
"signhide", "0",
"subme", "2",
"subme", "4",
"sao", "full",
"rdoq", "0",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "sensitive",
"gop", "lp-g4d3t1",
"intra-rdo-et", "0",
NULL
},
{
"fast",
"pu-depth-intra", "2-3",
"rd", "0",
"pu-depth-intra", "1-3",
"pu-depth-inter", "1-3",
"rd", "1",
"me", "hexbs",
"ref", "1",
"gop", "lp-g4d4t1",
"ref", "2",
"bipred", "0",
"deblock", "0:0",
"signhide", "0",
"subme", "4",
"sao", "full",
"rdoq", "0",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "on",
"gop", "lp-g4d3t1",
"me-early-termination", "sensitive",
"intra-rdo-et", "0",
NULL
},
{
"medium",
"pu-depth-intra", "1-3",
"pu-depth-inter", "1-3",
"rd", "1",
"rd", "0",
"pu-depth-intra", "1-4",
"pu-depth-inter", "0-3",
"me", "hexbs",
"ref", "1",
"gop", "8",
"ref", "4",
"bipred", "0",
"deblock", "0:0",
"signhide", "0",
"subme", "4",
"sao", "full",
"rdoq", "1",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "on",
"gop", "lp-g4d3t1",
"intra-rdo-et", "0",
NULL
},
{
"slow",
"pu-depth-intra", "1-3",
"pu-depth-inter", "1-3",
"rd", "1",
"rd", "0",
"pu-depth-intra", "1-4",
"pu-depth-inter", "0-3",
"me", "hexbs",
"ref", "2",
"gop", "8",
"ref", "4",
"bipred", "1",
"deblock", "0:0",
"signhide", "1",
"signhide", "0",
"subme", "4",
"sao", "full",
"rdoq", "1",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "on",
"gop", "lp-g4d2t1",
"intra-rdo-et", "0",
NULL
},
{
"slower",
"pu-depth-intra", "1-3",
"rd", "2",
"pu-depth-intra", "1-4",
"pu-depth-inter", "0-3",
"rd", "1",
"me", "hexbs",
"ref", "2",
"gop", "8",
"ref", "4",
"bipred", "1",
"deblock", "0:0",
"signhide", "1",
"subme", "4",
"sao", "full",
"rdoq", "1",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"full-intra-search", "0",
"smp", "0",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "on",
"gop", "lp-g4d2t1",
"me-early-termination", "off",
"intra-rdo-et", "0",
NULL
},
{
"veryslow",
"rd", "2",
"pu-depth-intra", "1-4",
"pu-depth-inter", "0-3",
"rd", "1",
"me", "hexbs",
"ref", "3",
"gop", "8",
"ref", "4",
"bipred", "1",
"deblock", "0:0",
"signhide", "1",
"subme", "4",
"sao", "full",
"rdoq", "1",
"rdoq-skip", "1",
"rdoq-skip", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
"smp", "0",
"full-intra-search", "0",
"smp", "1",
"amp", "0",
"cu-split-termination", "zero",
"me-early-termination", "on",
"gop", "lp-g4d2t1",
"me-early-termination", "off",
"intra-rdo-et", "0",
NULL
},
{
"placebo",
"rd", "2",
"pu-depth-intra", "1-4",
"pu-depth-inter", "0-3",
"rd", "1",
"me", "tz",
"gop", "8",
"ref", "4",
"bipred", "1",
"deblock", "0:0",
"signhide", "1",
"subme", "4",
@ -597,13 +618,13 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "1",
"rdoq-skip", "0",
"transform-skip", "1",
"full-intra-search", "0",
"mv-rdo", "1",
"full-intra-search", "0",
"smp", "1",
"amp", "1",
"cu-split-termination", "off",
"me-early-termination", "off",
"gop", "lp-g4d2t1",
"intra-rdo-et", "0",
NULL
},
{ NULL }
@ -727,7 +748,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
}
else if OPT("tiles-width-split") {
int retval = parse_tiles_specification(value, &cfg->tiles_width_count, &cfg->tiles_width_split);
if (cfg->tiles_width_count > 1 && cfg->tmvp_enable) {
cfg->tmvp_enable = false;
fprintf(stderr, "Disabling TMVP because tiles are used.\n");
@ -742,7 +763,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
}
else if OPT("tiles-height-split") {
int retval = parse_tiles_specification(value, &cfg->tiles_height_count, &cfg->tiles_height_split);
if (cfg->tiles_height_count > 1 && cfg->tmvp_enable) {
cfg->tmvp_enable = false;
fprintf(stderr, "Disabling TMVP because tiles are used.\n");
@ -822,7 +843,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
}
}
else if OPT("cpuid")
cfg->cpuid = atoi(value);
cfg->cpuid = atobool(value);
else if OPT("pu-depth-inter")
return sscanf(value, "%d-%d", &cfg->pu_depth_inter.min, &cfg->pu_depth_inter.max) == 2;
else if OPT("pu-depth-intra")
@ -1022,6 +1043,8 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
cfg->me_early_termination = mode;
return result;
}
else if OPT("intra-rdo-et")
cfg->intra_rdo_et = (bool)atobool(value);
else if OPT("lossless")
cfg->lossless = (bool)atobool(value);
else if OPT("tmvp") {
@ -1088,6 +1111,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
if (width > 10000 || height > 10000) {
fprintf(stderr, "ROI dimensions exceed arbitrary value of 10000.\n");
fclose(f);
return 0;
}
@ -1330,7 +1354,9 @@ int kvz_config_validate(const kvz_config *const cfg)
error = 1;
}
if (cfg->gop_len && cfg->intra_period && !cfg->gop_lowdelay &&
if (cfg->gop_len &&
cfg->intra_period > 1 &&
!cfg->gop_lowdelay &&
cfg->intra_period % cfg->gop_len != 0)
{
fprintf(stderr,
@ -1391,7 +1417,7 @@ int kvz_config_validate(const kvz_config *const cfg)
}
if (!WITHIN(cfg->pu_depth_inter.min, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX) ||
!WITHIN(cfg->pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX))
!WITHIN(cfg->pu_depth_inter.max, PU_DEPTH_INTER_MIN, PU_DEPTH_INTER_MAX))
{
fprintf(stderr, "Input error: illegal value for --pu-depth-inter (%d-%d)\n",
cfg->pu_depth_inter.min, cfg->pu_depth_inter.max);
@ -1506,7 +1532,7 @@ static int validate_hevc_level(kvz_config *const cfg) {
};
int level_error = 0;
const char* level_err_prefix;
if (cfg->force_level) {
level_err_prefix = "Level warning";

273
src/cli.c
View file

@ -36,9 +36,9 @@ static const struct option long_options[] = {
{ "input", required_argument, NULL, 'i' },
{ "output", required_argument, NULL, 'o' },
{ "debug", required_argument, NULL, 'd' },
{ "width", required_argument, NULL, 'w' },
{ "width", required_argument, NULL, 'w' }, // deprecated
{ "height", required_argument, NULL, 'h' }, // deprecated
{ "frames", required_argument, NULL, 'n' }, // deprecated
{ "frames", required_argument, NULL, 'n' },
{ "qp", required_argument, NULL, 'q' },
{ "period", required_argument, NULL, 'p' },
{ "ref", required_argument, NULL, 'r' },
@ -86,7 +86,8 @@ static const struct option long_options[] = {
{ "owf", required_argument, NULL, 0 },
{ "slices", required_argument, NULL, 0 },
{ "threads", required_argument, NULL, 0 },
{ "cpuid", required_argument, NULL, 0 },
{ "cpuid", optional_argument, NULL, 0 },
{ "no-cpuid", no_argument, NULL, 0 },
{ "pu-depth-inter", required_argument, NULL, 0 },
{ "pu-depth-intra", required_argument, NULL, 0 },
{ "info", no_argument, NULL, 0 },
@ -109,6 +110,8 @@ static const struct option long_options[] = {
{ "crypto", required_argument, NULL, 0 },
{ "key", required_argument, NULL, 0 },
{ "me-early-termination",required_argument, NULL, 0 },
{ "intra-rdo-et", no_argument, NULL, 0 },
{ "no-intra-rdo-et", no_argument, NULL, 0 },
{ "lossless", no_argument, NULL, 0 },
{ "no-lossless", no_argument, NULL, 0 },
{ "tmvp", no_argument, NULL, 0 },
@ -322,15 +325,15 @@ void print_help(void)
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Required:\n"
" -i, --input : Input file\n"
" -i, --input <filename> : Input file\n"
" --input-res <res> : Input resolution [auto]\n"
" auto: detect from file name\n"
" <int>x<int>: width times height\n"
" -o, --output : Output file\n"
" - auto: Detect from file name.\n"
" - <int>x<int>: width times height\n"
" -o, --output <filename> : Output file\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Presets:\n"
" --preset=<preset> : Set options to a preset [medium]\n"
" --preset <preset> : Set options to a preset [medium]\n"
" - ultrafast, superfast, veryfast, faster,\n"
" fast, medium, slow, slower, veryslow\n"
" placebo\n"
@ -339,160 +342,176 @@ void print_help(void)
"Input:\n"
" -n, --frames <integer> : Number of frames to code [all]\n"
" --seek <integer> : First frame to code [0]\n"
" --input-fps <num>[/<denom>] : Framerate of the input video [25.0]\n"
" --source-scan-type <string> : Set source scan type [progressive].\n"
" - progressive: progressive scan\n"
" - tff: top field first\n"
" - bff: bottom field first\n"
" --input-format : P420 or P400\n"
" --input-bitdepth : 8-16\n"
" --loop-input : Re-read input file forever\n"
" --input-fps <num>[/<denom>] : Frame rate of the input video [25]\n"
" --source-scan-type <string> : Source scan type [progressive]\n"
" - progressive: Progressive scan\n"
" - tff: Top field first\n"
" - bff: Bottom field first\n"
" --input-format <string> : P420 or P400 [P420]\n"
" --input-bitdepth <int> : 8-16 [8]\n"
" --loop-input : Re-read input file forever.\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Options:\n"
" --help : Print this help message and exit\n"
" --version : Print version information and exit\n"
" --aud : Use access unit delimiters\n"
" --debug <string> : Output encoders reconstruction.\n"
" --cpuid <integer> : Disable runtime cpu optimizations with value 0.\n"
" --hash : Decoded picture hash [checksum]\n"
" --help : Print this help message and exit.\n"
" --version : Print version information and exit.\n"
" --(no-)aud : Use access unit delimiters. [disabled]\n"
" --debug <filename> : Output internal reconstruction.\n"
" --(no-)cpuid : Enable runtime CPU optimizations. [enabled]\n"
" --hash <string> : Decoded picture hash [checksum]\n"
" - none: 0 bytes\n"
" - checksum: 18 bytes\n"
" - md5: 56 bytes\n"
" --no-psnr : Don't calculate PSNR for frames\n"
" --no-info : Don't add encoder info SEI.\n"
" --(no-)psnr : Calculate PSNR for frames. [enabled]\n"
" --(no-)info : Add encoder info SEI. [enabled]\n"
" --crypto <string> : Selective encryption. Crypto support must be\n"
" enabled at compile-time. Can be 'on' or 'off' or\n"
" a list of features separated with a '+'. [off]\n"
" - on: Enable all encryption features.\n"
" - off: Disable selective encryption.\n"
" - mvs: Motion vector magnitudes.\n"
" - mv_signs: Motion vector signs.\n"
" - trans_coeffs: Coefficient magnitudes.\n"
" - trans_coeff_signs: Coefficient signs.\n"
" - intra_pred_modes: Intra prediction modes.\n"
" --key <string> : Encryption key [16,213,27,56,255,127,242,112,\n"
" 97,126,197,204,25,59,38,30]\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Video structure:\n"
" -q, --qp <integer> : Quantization Parameter [32]\n"
" -p, --period <integer> : Period of intra pictures [0]\n"
" - 0: only first picture is intra\n"
" - 1: all pictures are intra\n"
" - 2-N: every Nth picture is intra\n"
" --vps-period <integer> : Specify how often the video parameter set is\n"
" re-sent. [0]\n"
" - 0: only send VPS with the first frame\n"
" - N: send VPS with every Nth intra frame\n"
" -r, --ref <integer> : Reference frames, range 1..15 [3]\n"
" --gop <string> : Definition of GOP structure [0]\n"
" - 0: disabled\n"
" -q, --qp <integer> : Quantization parameter [22]\n"
" -p, --period <integer> : Period of intra pictures [64]\n"
" - 0: Only first picture is intra.\n"
" - 1: All pictures are intra.\n"
" - N: Every Nth picture is intra.\n"
" --vps-period <integer> : How often the video parameter set is re-sent [0]\n"
" - 0: Only send VPS with the first frame.\n"
" - N: Send VPS with every Nth intra frame.\n"
" -r, --ref <integer> : Number of reference frames, in range 1..15 [4]\n"
" --gop <string> : GOP structure [8]\n"
" - 0: Disabled\n"
" - 8: B-frame pyramid of length 8\n"
" - lp-<string>: lp-gop definition\n"
" (e.g. lp-g8d4t2, see README)\n"
" --cqmfile <string> : Custom Quantization Matrices from a file\n"
" --bitrate <integer> : Target bitrate. [0]\n"
" - 0: disable rate-control\n"
" - N: target N bits per second\n"
" --lossless : Use lossless coding\n"
" --mv-constraint : Constrain movement vectors\n"
" - none: no constraint\n"
" - frametile: constrain within the tile\n"
" - frametilemargin: constrain even more\n"
" --roi <string> : Use a delta QP map for region of interest\n"
" Read an array of delta QP values from\n"
" a file, where the first two values are the\n"
" width and height, followed by width*height\n"
" delta QP values in raster order.\n"
" The delta QP map can be any size or aspect\n"
" ratio, and will be mapped to LCU's.\n"
" --(no-)erp-aqp : Use adaptive QP for 360 video with\n"
" equirectangular projection\n"
" - lp-<string>: Low-delay P-frame GOP\n"
" (e.g. lp-g8d4t2, see README)\n"
" --cqmfile <filename> : Read custom quantization matrices from a file.\n"
" --bitrate <integer> : Target bitrate [0]\n"
" - 0: Disable rate control.\n"
" - N: Target N bits per second.\n"
" --(no-)lossless : Use lossless coding. [disabled]\n"
" --mv-constraint <string> : Constrain movement vectors. [none]\n"
" - none: No constraint\n"
" - frametile: Constrain within the tile.\n"
" - frametilemargin: Constrain even more.\n"
" --roi <filename> : Use a delta QP map for region of interest.\n"
" Reads an array of delta QP values from a text\n"
" file. The file format is: width and height of\n"
" the QP delta map followed by width*height delta\n"
" QP values in raster order. The map can be of any\n"
" size and will be scaled to the video size.\n"
" --(no-)erp-aqp : Use adaptive QP for 360 degree video with\n"
" equirectangular projection. [disabled]\n"
" --level <number> : Use the given HEVC level in the output and give\n"
" an error if the input doesn't fit to it's\n"
" limits [6.2]\n"
" Allowed values are 1, 2, 2.1, 3, 3.1, 4, 4.1\n"
" 5, 5.1, 5.2, 6, 6.1 and 6.2. The dot is\n"
" optional.\n"
" --force-level <number> : Same as --level, except instead of errors you\n"
" get warnings\n"
" --high-tier : Used with --level. Use high-tier bitrate limits\n"
" instead of the main-tier limits during encoding.\n"
" an error if level limits are exceeded. [6.2]\n"
" - 1, 2, 2.1, 3, 3.1, 4, 4.1, 5, 5.1, 5.2, 6,\n"
" 6.1, 6.2\n"
" --force-level <number> : Same as --level but warnings instead of errors.\n"
" --high-tier : Used with --level. Use high tier bitrate limits\n"
" instead of the main tier limits during encoding.\n"
" High tier requires level 4 or higher.\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Compression tools:\n"
" --deblock [<beta:tc>] : Deblocking\n"
" - beta: between -6 and 6\n"
" - tc: between -6 and 6\n"
" --(no-)sao : Sample Adaptive Offset\n"
" --(no-)rdoq : Rate-Distortion Optimized Quantization\n"
" --(no-)signhide : Sign Hiding\n"
" --(no-)smp : Symmetric Motion Partition\n"
" --(no-)amp : Asymmetric Motion Partition\n"
" --rd <integer> : Intra mode search complexity\n"
" - 0: skip intra if inter is good enough\n"
" - 1: rough intra mode search with SATD\n"
" - 2: refine intra mode search with SSE\n"
" --(no-)mv-rdo : Rate-Distortion Optimized motion vector costs\n"
" --(no-)full-intra-search\n"
" : Try all intra modes during rough search.\n"
" --(no-)transform-skip : Transform skip\n"
" --me <string> : Integer motion estimation\n"
" --(no-)deblock <beta:tc> : Deblocking filter. [0:0]\n"
" - beta: Between -6 and 6\n"
" - tc: Between -6 and 6\n"
" --sao <string> : Sample Adaptive Offset [full]\n"
" - off: SAO disabled\n"
" - band: Band offset only\n"
" - edge: Edge offset only\n"
" - full: Full SAO\n"
" --(no-)rdoq : Rate-distortion optimized quantization [enabled]\n"
" --(no-)rdoq-skip : Skip RDOQ for 4x4 blocks. [disabled]\n"
" --(no-)signhide : Sign hiding [disabled]\n"
" --(no-)smp : Symmetric motion partition [disabled]\n"
" --(no-)amp : Asymmetric motion partition [disabled]\n"
" --rd <integer> : Intra mode search complexity [0]\n"
" - 0: Skip intra if inter is good enough.\n"
" - 1: Rough intra mode search with SATD.\n"
" - 2: Refine intra mode search with SSE.\n"
" - 3: Try all intra modes and enable intra\n"
" chroma mode search.\n"
" --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n"
" [disabled]\n"
" --(no-)full-intra-search : Try all intra modes during rough search.\n"
" [disabled]\n"
" --(no-)transform-skip : Try transform skip [disabled]\n"
" --me <string> : Integer motion estimation algorithm [hexbs]\n"
" - hexbs: Hexagon Based Search\n"
" - tz: Test Zone Search\n"
" - full: Full Search\n"
" - full8, full16, full32, full64\n"
" - dia: Diamond Search\n"
" --me-steps <integer> : How many search steps does the motion estimation\n"
" do before cutting off [-1]\n"
" Has effect only for 'hexbs' and 'dia'\n"
" --subme <integer> : Set fractional pixel motion estimation level\n"
" - 0: only integer motion estimation\n"
" --me-steps <integer> : Motion estimation search step limit. Only\n"
" affects 'hexbs' and 'dia'. [-1]\n"
" --subme <integer> : Fractional pixel motion estimation level [4]\n"
" - 0: Integer motion estimation only\n"
" - 1: + 1/2-pixel horizontal and vertical\n"
" - 2: + 1/2-pixel diagonal\n"
" - 3: + 1/4-pixel horizontal and vertical\n"
" - 4: + 1/4-pixel diagonal\n"
" --pu-depth-inter <int>-<int>\n"
" : Range for sizes for inter predictions\n"
" --pu-depth-inter <int>-<int> : Inter prediction units sizes [0-3]\n"
" - 0, 1, 2, 3: from 64x64 to 8x8\n"
" --pu-depth-intra <int>-<int> : Range for sizes for intra predictions\n"
" --pu-depth-intra <int>-<int> : Intra prediction units sizes [1-4]\n"
" - 0, 1, 2, 3, 4: from 64x64 to 4x4\n"
" --(no-)bipred : Bi-prediction\n"
" --(no-)cu-split-termination\n"
" : CU split search termination condition\n"
" - off: Never terminate cu-split search\n"
" - zero: Terminate with zero residual\n"
" --(no-)me-early-termination : ME early termination condition\n"
" - off: Don't terminate early\n"
" - on: Terminate early\n"
" - sensitive: Terminate even earlier\n"
" --(no-)implicit-rdpcm : Implicit residual DPCM\n"
" Currently only supported with lossless coding.\n"
" --(no-)tmvp : Temporal Motion Vector Prediction\n"
" --(no-)rdoq-skip : Skips RDOQ for 4x4 blocks\n"
" --tr-depth-intra <int> : Transform split depth for intra blocks [0]\n"
" --(no-)bipred : Bi-prediction [disabled]\n"
" --cu-split-termination <string> : CU split search termination [zero]\n"
" - off: Don't terminate early.\n"
" - zero: Terminate when residual is zero.\n"
" --me-early-termination <string> : Motion estimation termination [on]\n"
" - off: Don't terminate early.\n"
" - on: Terminate early.\n"
" - sensitive: Terminate even earlier.\n"
" --(no-)intra-rdo-et : Check intra modes in rdo stage only until\n"
" a zero coefficient CU is found. [disabled]\n"
" --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n"
" with lossless coding. [disabled]\n"
" --(no-)tmvp : Temporal motion vector prediction [enabled]\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Parallel processing:\n"
" --threads <integer> : Number of threads to use [auto]\n"
" - 0: process everything with main thread\n"
" - N: use N threads for encoding\n"
" - auto: select based on number of cores\n"
" --owf <integer> : Frame parallelism [auto]\n"
" - N: Process N-1 frames at a time\n"
" - auto: Select automatically\n"
" --(no-)wpp : Wavefront parallel processing [enabled]\n"
" - 0: Process everything with main thread.\n"
" - N: Use N threads for encoding.\n"
" - auto: Select automatically.\n"
" --owf <integer> : Frame-level parallelism [auto]\n"
" - N: Process N+1 frames at a time.\n"
" - auto: Select automatically.\n"
" --(no-)wpp : Wavefront parallel processing. [enabled]\n"
" Enabling tiles automatically disables WPP.\n"
" To enable WPP with tiles, re-enable it after\n"
" enabling tiles.\n"
" enabling tiles. Enabling wpp with tiles is,\n"
" however, an experimental feature since it is\n"
" not supported in any HEVC profile.\n"
" --tiles <int>x<int> : Split picture into width x height uniform tiles.\n"
" --tiles-width-split <string>|u<int> :\n"
" Specifies a comma separated list of pixel\n"
" positions of tiles columns separation\n"
" coordinates.\n"
" Can also be u, followed by a single int n, in\n"
" which case it produces columns of uniform width.\n"
" - <string>: A comma-separated list of tile\n"
" column pixel coordinates.\n"
" - u<int>: Number of tile columns of uniform\n"
" width.\n"
" --tiles-height-split <string>|u<int> :\n"
" Specifies a comma separated list of pixel\n"
" positions of tiles rows separation coordinates.\n"
" Can also be u followed by and a single int n, in\n"
" which case it produces rows of uniform height.\n"
" --slices <string> : Control how slices are used\n"
" - tiles: put tiles in independent slices\n"
" - wpp: put rows in dependent slices\n"
" - tiles+wpp: do both\n"
" - <string>: A comma-separated list of tile row\n"
" column pixel coordinates.\n"
" - u<int>: Number of tile rows of uniform\n"
" height.\n"
" --slices <string> : Control how slices are used.\n"
" - tiles: Put tiles in independent slices.\n"
" - wpp: Put rows in dependent slices.\n"
" - tiles+wpp: Do both.\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Video Usability Information:\n"
" --sar <width:height> : Specify Sample Aspect Ratio\n"
" --sar <width:height> : Specify sample aspect ratio\n"
" --overscan <string> : Specify crop overscan setting [undef]\n"
" - undef, show, crop\n"
" --videoformat <string> : Specify video format [undef]\n"
@ -514,8 +533,8 @@ void print_help(void)
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Deprecated parameters: (might be removed at some point)\n"
" -w, --width : Use --input-res\n"
" -h, --height : Use --input-res\n");
" -w, --width <integer> : Use --input-res.\n"
" -h, --height <integer> : Use --input-res.\n");
}

View file

@ -184,9 +184,10 @@ void kvz_cu_array_free(cu_array_t **cua_ptr)
*/
cu_array_t * kvz_cu_array_copy_ref(cu_array_t* cua)
{
// The caller should have had another reference.
assert(cua->refcount > 0);
KVZ_ATOMIC_INC(&cua->refcount);
int32_t new_refcount = KVZ_ATOMIC_INC(&cua->refcount);
// The caller should have had another reference and we added one
// reference so refcount should be at least 2.
assert(new_refcount >= 2);
return cua;
}

View file

@ -435,7 +435,9 @@ static void encode_transform_coeff(encoder_state_t * const state,
const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y);
// Round coordinates down to a multiple of 8 to get the location of the
// containing CU.
const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x & ~7, y & ~7);
const int x_cu = 8 * (x / 8);
const int y_cu = 8 * (y / 8);
const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x_cu, y_cu);
// NxN signifies implicit transform split at the first transform level.
// There is a similar implicit split for inter, but it is only used when
@ -508,9 +510,10 @@ static void encode_transform_coeff(encoder_state_t * const state,
if (cb_flag_y | cb_flag_u | cb_flag_v) {
if (state->must_code_qp_delta) {
const int qp_delta = state->qp - state->ref_qp;
const int qp_delta_abs = ABS(qp_delta);
cabac_data_t* cabac = &state->cabac;
const int qp_pred = kvz_get_cu_ref_qp(state, x_cu, y_cu, state->last_qp);
const int qp_delta = cur_cu->qp - qp_pred;
const int qp_delta_abs = ABS(qp_delta);
cabac_data_t* cabac = &state->cabac;
// cu_qp_delta_abs prefix
cabac->cur_ctx = &cabac->ctx.cu_qp_delta_abs[0];
@ -526,7 +529,6 @@ static void encode_transform_coeff(encoder_state_t * const state,
}
state->must_code_qp_delta = false;
state->ref_qp = state->qp;
}
encode_transform_unit(state, x, y, depth);
@ -957,6 +959,9 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
const videoframe_t * const frame = state->tile->frame;
const cu_info_t *cur_cu = kvz_cu_array_at_const(frame->cu_array, x, y);
const int cu_width = LCU_WIDTH >> depth;
const int half_cu = cu_width >> 1;
const cu_info_t *left_cu = NULL;
if (x > 0) {
left_cu = kvz_cu_array_at_const(frame->cu_array, x - 1, y);
@ -973,13 +978,17 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
uint16_t abs_x = x + state->tile->offset_x;
uint16_t abs_y = y + state->tile->offset_y;
// Check for slice border FIXME
bool border_x = ctrl->in.width < abs_x + (LCU_WIDTH >> depth);
bool border_y = ctrl->in.height < abs_y + (LCU_WIDTH >> depth);
bool border_split_x = ctrl->in.width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1));
bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1));
// Check for slice border
bool border_x = ctrl->in.width < abs_x + cu_width;
bool border_y = ctrl->in.height < abs_y + cu_width;
bool border_split_x = ctrl->in.width >= abs_x + (LCU_WIDTH >> MAX_DEPTH) + half_cu;
bool border_split_y = ctrl->in.height >= abs_y + (LCU_WIDTH >> MAX_DEPTH) + half_cu;
bool border = border_x || border_y; /*!< are we in any border CU */
if (depth <= ctrl->max_qp_delta_depth) {
state->must_code_qp_delta = true;
}
// When not in MAX_DEPTH, insert split flag and split the blocks if needed
if (depth != MAX_DEPTH) {
// Implisit split flag when on border
@ -999,25 +1008,22 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
if (split_flag || border) {
// Split blocks and remember to change x and y block positions
int offset = LCU_WIDTH >> (depth + 1);
kvz_encode_coding_tree(state, x, y, depth + 1);
// TODO: fix when other half of the block would not be completely over the border
if (!border_x || border_split_x) {
kvz_encode_coding_tree(state, x + offset, y, depth + 1);
kvz_encode_coding_tree(state, x + half_cu, y, depth + 1);
}
if (!border_y || border_split_y) {
kvz_encode_coding_tree(state, x, y + offset, depth + 1);
kvz_encode_coding_tree(state, x, y + half_cu, depth + 1);
}
if (!border || (border_split_x && border_split_y)) {
kvz_encode_coding_tree(state, x + offset, y + offset, depth + 1);
kvz_encode_coding_tree(state, x + half_cu, y + half_cu, depth + 1);
}
return;
}
}
if (state->encoder_control->cfg.lossless) {
if (ctrl->cfg.lossless) {
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;
CABAC_BIN(cabac, 1, "cu_transquant_bypass_flag");
}
@ -1053,7 +1059,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
}
}
}
return;
goto end;
}
}
@ -1068,7 +1074,6 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
if (cur_cu->type == CU_INTER) {
const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size];
const int cu_width = LCU_WIDTH >> depth;
for (int i = 0; i < num_pu; ++i) {
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i);
@ -1139,6 +1144,12 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
assert(0);
exit(1);
}
end:
if (is_last_cu_in_qg(state, x, y, depth)) {
state->last_qp = cur_cu->qp;
}
}

View file

@ -347,12 +347,16 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg)
}
encoder->lcu_dqp_enabled = cfg->target_bitrate > 0 || encoder->cfg.roi.dqps;
// NOTE: When tr_depth_inter is equal to 0, the transform is still split
// for SMP and AMP partition units.
encoder->tr_depth_inter = 0;
if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps) {
encoder->max_qp_delta_depth = 0;
} else {
encoder->max_qp_delta_depth = -1;
}
//Tiles
encoder->tiles_enable = encoder->cfg.tiles_width_count > 1 ||
encoder->cfg.tiles_height_count > 1;

View file

@ -118,7 +118,7 @@ typedef struct encoder_control_t
//! Picture weights when GOP is used.
double gop_layer_weights[MAX_GOP_LAYERS];
bool lcu_dqp_enabled;
int8_t max_qp_delta_depth;
int tr_depth_inter;

View file

@ -60,7 +60,7 @@ static void encoder_state_write_bitstream_PTL(bitstream_t *stream,
// PTL
// Profile Tier
WRITE_U(stream, 0, 2, "general_profile_space");
WRITE_U(stream, 0, 1, "general_tier_flag");
WRITE_U(stream, state->encoder_control->cfg.high_tier, 1, "general_tier_flag");
// Main Profile == 1, Main 10 profile == 2
WRITE_U(stream, (state->encoder_control->bitdepth == 8)?1:2, 5, "general_profile_idc");
/* Compatibility flags should be set at general_profile_idc
@ -455,12 +455,12 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
WRITE_U(stream, 0, 1, "constrained_intra_pred_flag");
WRITE_U(stream, encoder->cfg.trskip_enable, 1, "transform_skip_enabled_flag");
if (encoder->lcu_dqp_enabled) {
if (encoder->max_qp_delta_depth >= 0) {
// Use separate QP for each LCU when rate control is enabled.
WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag");
WRITE_UE(stream, 0, "diff_cu_qp_delta_depth");
WRITE_UE(stream, encoder->max_qp_delta_depth, "diff_cu_qp_delta_depth");
} else {
WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
WRITE_U(stream, 0, 1, "cu_qp_delta_enabled_flag");
}
//TODO: add QP offsets

View file

@ -312,6 +312,7 @@ int kvz_encoder_state_init(encoder_state_t * const child_state, encoder_state_t
child_state->children = MALLOC(encoder_state_t, 1);
child_state->children[0].encoder_control = NULL;
child_state->crypto_hdl = NULL;
child_state->must_code_qp_delta = false;
child_state->tqj_bitstream_written = NULL;
child_state->tqj_recon_done = NULL;

View file

@ -37,9 +37,6 @@
#include "tables.h"
#include "threadqueue.h"
#define SAO_BUF_WIDTH (LCU_WIDTH + SAO_DELAY_PX + 2)
#define SAO_BUF_WIDTH_C (SAO_BUF_WIDTH / 2)
int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) {
int i;
@ -250,10 +247,18 @@ static void encoder_sao_reconstruct(const encoder_state_t *const state,
{
videoframe_t *const frame = state->tile->frame;
// Temporary buffers for SAO input pixels.
kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH];
kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C];
kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C];
// Temporary buffers for SAO input pixels. The buffers cover the pixels
// inside the LCU (LCU_WIDTH x LCU_WIDTH), SAO_DELAY_PX wide bands to the
// left and above the LCU, and one pixel border on the left and top
// sides. We add two extra pixels to the buffers because the AVX2 SAO
// reconstruction reads up to two extra bytes when using edge SAO in the
// horizontal direction.
#define SAO_BUF_WIDTH (1 + SAO_DELAY_PX + LCU_WIDTH)
#define SAO_BUF_WIDTH_C (1 + SAO_DELAY_PX/2 + LCU_WIDTH_C)
kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH + 2];
kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2];
kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C + 2];
// Pointers to the top-left pixel of the LCU in the buffers.
kvz_pixel *const sao_buf_y = &sao_buf_y_array[(SAO_DELAY_PX + 1) * (SAO_BUF_WIDTH + 1)];
@ -526,68 +531,81 @@ static void encode_sao(encoder_state_t * const state,
/**
* \brief Sets the QP for each CU in state->tile->frame->cu_array.
*
* The QPs are used in deblocking.
* The QPs are used in deblocking and QP prediction.
*
* The delta QP for an LCU is coded when the first CU with coded block flag
* set is encountered. Hence, for the purposes of deblocking, all CUs
* before the first one with cbf set use state->ref_qp and all CUs after
* that use state->qp.
* The QP delta for a quantization group is coded when the first CU with
* coded block flag set is encountered. Hence, for the purposes of
* deblocking and QP prediction, all CUs in before the first one that has
* cbf set use the QP predictor and all CUs after that use (QP predictor
* + QP delta).
*
* \param state encoder state
* \param x x-coordinate of the left edge of the root CU
* \param y y-coordinate of the top edge of the root CU
* \param depth depth in the CU quadtree
* \param coeffs_coded Used for tracking whether a CU with a residual
* has been encountered. Should be set to false at
* the top level.
* \return Whether there were any CUs with residual or not.
* \param last_qp QP of the last CU in the last quantization group
* \param prev_qp -1 if QP delta has not been coded in current QG,
* otherwise the QP of the current QG
*/
static bool set_cu_qps(encoder_state_t *state, int x, int y, int depth, bool coeffs_coded)
static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp)
{
if (state->qp == state->ref_qp) {
// If the QPs are equal there is no need to care about the residuals.
coeffs_coded = true;
}
// Stop recursion if the CU is completely outside the frame.
if (x >= state->tile->frame->width || y >= state->tile->frame->height) return;
cu_info_t *cu = kvz_cu_array_at(state->tile->frame->cu_array, x, y);
const int cu_width = LCU_WIDTH >> depth;
coeffs_coded = coeffs_coded || cbf_is_set_any(cu->cbf, cu->depth);
if (!coeffs_coded && cu->depth > depth) {
if (depth <= state->encoder_control->max_qp_delta_depth) {
*prev_qp = -1;
}
if (cu->depth > depth) {
// Recursively process sub-CUs.
const int d = cu_width >> 1;
coeffs_coded = set_cu_qps(state, x, y, depth + 1, coeffs_coded);
coeffs_coded = set_cu_qps(state, x + d, y, depth + 1, coeffs_coded);
coeffs_coded = set_cu_qps(state, x, y + d, depth + 1, coeffs_coded);
coeffs_coded = set_cu_qps(state, x + d, y + d, depth + 1, coeffs_coded);
set_cu_qps(state, x, y, depth + 1, last_qp, prev_qp);
set_cu_qps(state, x + d, y, depth + 1, last_qp, prev_qp);
set_cu_qps(state, x, y + d, depth + 1, last_qp, prev_qp);
set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp);
} else {
if (!coeffs_coded && cu->tr_depth > depth) {
bool cbf_found = *prev_qp >= 0;
if (cu->tr_depth > depth) {
// The CU is split into smaller transform units. Check whether coded
// block flag is set for any of the TUs.
const int tu_width = LCU_WIDTH >> cu->tr_depth;
for (int y_scu = y; y_scu < y + cu_width; y_scu += tu_width) {
for (int x_scu = x; x_scu < x + cu_width; x_scu += tu_width) {
for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) {
for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) {
cu_info_t *tu = kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu);
if (cbf_is_set_any(tu->cbf, cu->depth)) {
coeffs_coded = true;
cbf_found = true;
}
}
}
} else if (cbf_is_set_any(cu->cbf, cu->depth)) {
cbf_found = true;
}
int8_t qp;
if (cbf_found) {
*prev_qp = qp = cu->qp;
} else {
qp = kvz_get_cu_ref_qp(state, x, y, *last_qp);
}
// Set the correct QP for all state->tile->frame->cu_array elements in
// the area covered by the CU.
const int8_t qp = coeffs_coded ? state->qp : state->ref_qp;
for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) {
for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) {
kvz_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp;
}
}
}
return coeffs_coded;
if (is_last_cu_in_qg(state, x, y, depth)) {
*last_qp = cu->qp;
}
}
}
@ -608,11 +626,13 @@ static void encoder_state_worker_encode_lcu(void * opaque)
encoder_state_recdata_to_bufs(state, lcu, state->tile->hor_buf_search, state->tile->ver_buf_search);
if (encoder->cfg.deblock_enable) {
if (encoder->lcu_dqp_enabled) {
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, false);
}
if (encoder->max_qp_delta_depth >= 0) {
int last_qp = state->last_qp;
int prev_qp = -1;
set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp);
}
if (encoder->cfg.deblock_enable) {
kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y);
}
@ -635,9 +655,6 @@ static void encoder_state_worker_encode_lcu(void * opaque)
encode_sao(state, lcu->position.x, lcu->position.y, &frame->sao_luma[lcu->position.y * frame->width_in_lcu + lcu->position.x], &frame->sao_chroma[lcu->position.y * frame->width_in_lcu + lcu->position.x]);
}
// QP delta is not used when rate control is turned off.
state->must_code_qp_delta = encoder->lcu_dqp_enabled;
//Encode coding tree
kvz_encode_coding_tree(state, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, 0);
@ -709,7 +726,7 @@ static void encoder_state_encode_leaf(encoder_state_t * const state)
const encoder_control_t *ctrl = state->encoder_control;
const kvz_config *cfg = &ctrl->cfg;
state->ref_qp = state->frame->QP;
state->last_qp = state->frame->QP;
if (cfg->crypto_features) {
state->crypto_hdl = kvz_crypto_create(cfg);
@ -1362,3 +1379,27 @@ lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y)
state->encoder_control->in.width_in_lcu;
return &state->frame->lcu_stats[index];
}
int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp)
{
const encoder_control_t *ctrl = state->encoder_control;
const cu_array_t *cua = state->tile->frame->cu_array;
// Quantization group width
const int qg_width = LCU_WIDTH >> MIN(ctrl->max_qp_delta_depth, kvz_cu_array_at_const(cua, x, y)->depth);
// Coordinates of the top-left corner of the quantization group
const int x_qg = x & ~(qg_width - 1);
const int y_qg = y & ~(qg_width - 1);
int qp_pred_a = last_qp;
if (x_qg % LCU_WIDTH > 0) {
qp_pred_a = kvz_cu_array_at_const(cua, x_qg - 1, y_qg)->qp;
}
int qp_pred_b = last_qp;
if (y_qg % LCU_WIDTH > 0) {
qp_pred_b = kvz_cu_array_at_const(cua, x_qg, y_qg - 1)->qp;
}
return ((qp_pred_a + qp_pred_b + 1) >> 1);
}

View file

@ -268,10 +268,17 @@ typedef struct encoder_state_t {
bool must_code_qp_delta;
/**
* \brief Reference for computing QP delta for the next LCU that is coded
* next. Updated whenever a QP delta is coded.
* \brief QP value of the last CU in the last coded quantization group.
*
* A quantization group is a square of width
* (LCU_WIDTH >> encoder_control->max_qp_delta_depth). All CUs of in the
* same quantization group share the QP predictor value, but may have
* different QP values.
*
* Set to the frame QP at the beginning of a wavefront row or a tile and
* updated when the last CU of a quantization group is coded.
*/
int8_t ref_qp;
int8_t last_qp;
/**
* \brief Coeffs for the LCU.
@ -297,6 +304,8 @@ void kvz_encoder_create_ref_lists(const encoder_state_t *const state);
lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y);
int kvz_get_cu_ref_qp(const encoder_state_t *state, int x, int y, int last_qp);
/**
* Whether the parameter sets should be written with the current frame.
*/
@ -309,6 +318,30 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state)
(vps_period >= 0 && frame == 0);
}
/**
* \brief Returns true if the CU is the last CU in its containing
* quantization group.
*
* \param state encoder state
* \param x x-coordinate of the left edge of the CU
* \param y y-cooradinate of the top edge of the CU
* \param depth depth in the CU tree
* \return true, if it's the last CU in its QG, otherwise false
*/
static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth)
{
if (state->encoder_control->max_qp_delta_depth < 0) return false;
const int cu_width = LCU_WIDTH >> depth;
const int qg_width = LCU_WIDTH >> state->encoder_control->max_qp_delta_depth;
const int right = x + cu_width;
const int bottom = y + cu_width;
return (right % qg_width == 0 || right >= state->tile->frame->width) &&
(bottom % qg_width == 0 || bottom >= state->tile->frame->height);
}
static const uint8_t g_group_idx[32] = {
0, 1, 2, 3, 4, 4, 5, 5, 6, 6,
6, 6, 7, 7, 7, 7, 8, 8, 8, 8,

View file

@ -262,7 +262,7 @@ static bool is_on_8x8_grid(int x, int y, edge_dir dir)
static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir)
{
if (!state->encoder_control->lcu_dqp_enabled) {
if (state->encoder_control->max_qp_delta_depth < 0) {
return state->qp;
}

View file

@ -128,10 +128,10 @@ void kvz_image_free(kvz_picture *const im)
*/
kvz_picture *kvz_image_copy_ref(kvz_picture *im)
{
// The caller should have had another reference.
assert(im->refcount > 0);
KVZ_ATOMIC_INC(&(im->refcount));
int32_t new_refcount = KVZ_ATOMIC_INC(&im->refcount);
// The caller should have had another reference and we added one
// reference so refcount should be at least 2.
assert(new_refcount >= 2);
return im;
}

View file

@ -142,8 +142,8 @@ static void set_frame_info(kvz_frame_info *const info, const encoder_state_t *co
info->nal_unit_type = state->frame->pictype;
info->slice_type = state->frame->slicetype;
memset(info->ref_list[0], 0, 16);
memset(info->ref_list[1], 0, 16);
memset(info->ref_list[0], 0, 16 * sizeof(int));
memset(info->ref_list[1], 0, 16 * sizeof(int));
for (size_t i = 0; i < state->frame->ref_LX_size[0]; i++) {
info->ref_list[0][i] = state->frame->ref->pocs[state->frame->ref_LX[0][i]];

View file

@ -323,6 +323,7 @@ typedef struct kvz_config
uint8_t *optional_key;
enum kvz_me_early_termination me_early_termination; /*!< \since 3.8.0 \brief Mode of me early termination. */
int32_t intra_rdo_et; /*!< \since 4.1.0 \brief Use early termination in intra rdo. */
int32_t lossless; /*!< \brief Use lossless coding. */

View file

@ -138,6 +138,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
to->type = cu->type;
to->depth = cu->depth;
to->part_size = cu->part_size;
to->qp = cu->qp;
if (cu->type == CU_INTRA) {
to->intra.mode = cu->intra.mode;
@ -413,6 +414,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->tr_depth = depth > 0 ? depth : 1;
cur_cu->type = CU_NOTSET;
cur_cu->part_size = SIZE_2Nx2N;
cur_cu->qp = state->qp;
// If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth.

View file

@ -1747,6 +1747,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
cur_pu->type = CU_INTER;
cur_pu->part_size = part_mode;
cur_pu->depth = depth;
cur_pu->qp = state->qp;
double cost = MAX_INT;
uint32_t bitcost = MAX_INT;

View file

@ -621,8 +621,17 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu);
costs[rdo_mode] += mode_cost;
// Early termination if no coefficients has to be coded
if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(pred_cu.cbf, depth)) {
modes_to_check = rdo_mode + 1;
break;
}
}
// Update order according to new costs
sort_modes(modes, costs, modes_to_check);
// The best transform split hierarchy is not saved anywhere, so to get the
// transform split hierarchy the search has to be performed again with the
// best mode.

View file

@ -26,9 +26,6 @@
#ifdef _WIN32
#include <windows.h>
#elif MACOS
#include <sys/param.h>
#include <sys/sysctl.h>
#else
#include <unistd.h>
#endif
@ -372,6 +369,7 @@ static INLINE int get_cpuid(unsigned level, unsigned sublevel, cpuid_t *cpu_info
#endif // COMPILE_INTEL
#if COMPILE_POWERPC
# if defined(__linux__)
#include <fcntl.h>
#include <unistd.h>
#include <linux/auxvec.h>
@ -406,6 +404,49 @@ out_close:
close(fd);
return result;
}
# elif defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
static int altivec_available(void)
{
u_long cpu_features = 0;
size_t len = sizeof(cpu_features);
sysctlbyname("hw.cpu_features", &cpu_features, &len, NULL, 0);
return !!(cpu_features & PPC_FEATURE_HAS_ALTIVEC);
}
# elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
#include <sys/param.h>
#include <sys/sysctl.h>
#ifndef __APPLE__
#include <machine/cpu.h>
#endif
static int altivec_available(void)
{
int cpu_altivec = 0;
size_t len = sizeof(cpu_altivec);
#ifdef HW_VECTORUNIT
int mib[] = { CTL_HW, HW_VECTORUNIT };
#else
int mib[] = { CTL_MACHDEP, CPU_ALTIVEC };
#endif
sysctl(mib, sizeof(mib)/sizeof(mib[0]), &cpu_altivec, &len, NULL, 0);
return cpu_altivec;
}
# else
static int altivec_available(void)
{
#if COMPILE_POWERPC_ALTIVEC
return 1;
#else
return 0;
#endif
}
# endif
#endif //COMPILE_POWERPC
static void set_hardware_flags(int32_t cpuid) {

View file

@ -500,9 +500,10 @@ int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *depen
*/
threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job)
{
// The caller should have had another reference.
assert(job->refcount > 0);
KVZ_ATOMIC_INC(&job->refcount);
int32_t new_refcount = KVZ_ATOMIC_INC(&job->refcount);
// The caller should have had another reference and we added one
// reference so refcount should be at least 2.
assert(new_refcount >= 2);
return job;
}

View file

@ -186,7 +186,7 @@ SUITE(dct_tests)
// Loop through all strategies picking out the intra sad ones and run
// select strategies though all tests
for (unsigned i = 0; i < strategies.count; ++i) {
for (volatile unsigned i = 0; i < strategies.count; ++i) {
const strategy_t * strategy = &strategies.strategies[i];
// Select buffer width according to function name for dct function.

View file

@ -329,7 +329,7 @@ SUITE(sad_tests)
setup_tests();
for (unsigned i = 0; i < strategies.count; ++i) {
for (volatile unsigned i = 0; i < strategies.count; ++i) {
if (strcmp(strategies.strategies[i].type, "reg_sad") != 0) {
continue;
}

View file

@ -355,7 +355,7 @@ SUITE(speed_tests)
// Loop through all strategies picking out the intra sad ones and run
// selectec strategies though all tests
for (unsigned i = 0; i < strategies.count; ++i) {
for (volatile unsigned i = 0; i < strategies.count; ++i) {
const strategy_t * strategy = &strategies.strategies[i];
// Select buffer width according to function name.

View file

@ -9,4 +9,5 @@ common_args='-p0 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-s
valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=1
valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=4
valgrind_test 264x130 20 $common_args --gop=8 -p16 --owf=0
valgrind_test 264x130 10 $common_args --gop=8 -p1 --owf=4
valgrind_test 264x130 10 $common_args --gop=lp-g4d3t1 -p5 --owf=4

View file

@ -34,9 +34,18 @@ valgrind_test() {
prepare "${dimensions}" "${frames}"
# If $KVZ_TEST_VALGRIND is defined and equal to "1", run the test with
# valgrind. Otherwise, run without valgrind.
if [ "${KVZ_TEST_VALGRIND:-0}" = '1' ]; then
valgrind='valgrind --leak-check=full --error-exitcode=1 --'
else
valgrind=''
fi
# No quotes for $valgrind because it expands to multiple (or zero)
# arguments.
print_and_run \
libtool execute \
valgrind --leak-check=full --error-exitcode=1 -- \
libtool execute $valgrind \
../src/kvazaar -i "${yuvfile}" "--input-res=${dimensions}" -o "${hevcfile}" "$@"
print_and_run \