mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
better documentation for the rdcost extraction
This commit is contained in:
parent
4d1eb1aab9
commit
379cbd8901
|
@ -1,33 +0,0 @@
|
|||
Build Kvazaar as usual with make, then edit extract_rdcosts.py so that the
|
||||
parameters suit your usage (the directories, num of threads and Kvazaar
|
||||
params) and then run extract_rdcosts.py. It will run a lot of Kvazaar
|
||||
instances in parallel to encode a lot of videos and sift off all the coeff
|
||||
groups they measure RD cost for. The coeff groups will be written into the
|
||||
relevant data file in the following format (although through GZIP):
|
||||
|
||||
Size (B) | Description
|
||||
----------+------------
|
||||
4 | size: Coeff group size, in int16's
|
||||
4 | ccc: Coeff group's coding cost
|
||||
size * 2 | coeffs: Coeff group data
|
||||
|
||||
You can roll your own filter_rdcosts.c program to analyze the data the way
|
||||
you want, and run it like:
|
||||
|
||||
$ gzip -d < /path/to/compressed_datafile.gz | ./filter_rdcosts | less
|
||||
|
||||
Maybe one day, there'll be a multithreaded script like extract_rdcosts.py to
|
||||
automate and parallelize processing of a massive heap of data files.
|
||||
|
||||
EDIT:
|
||||
It's now possible to do OLS regression by streaming the source data twice
|
||||
from source and using Octave to invert the temporary result matrix, and
|
||||
that's what run_filter.py does in parallel. To do this on data you've
|
||||
gathered by extract_rdcosts.py:
|
||||
|
||||
$ gcc filter_rdcosts.c -o frcosts_matrix
|
||||
$ gcc ols_2ndpart.c -o ols_2ndpart
|
||||
$ ./run_filter.py
|
||||
|
||||
Although you should probably adjust the run_filter.py params before actually
|
||||
running it
|
35
rdcost-weight-tool/README.txt
Normal file
35
rdcost-weight-tool/README.txt
Normal file
|
@ -0,0 +1,35 @@
|
|||
To extract the block costs, build Kvazaar as usual, and edit relevant
|
||||
parameters in the beginning of extract_rdcosts.py and run_filter.py, most
|
||||
importantly the number of cores and the set of video sequences you want to
|
||||
encode to extract costs. Run extract_rdcosts.py, it will use Kvazaar to encode
|
||||
each sequence and extract the costs measured there for the quantized blocks.
|
||||
The costs are stored compressed and sorted by block QP, in the following
|
||||
format:
|
||||
|
||||
Size (B) | Description
|
||||
----------+------------
|
||||
4 | size: Coeff group size, in int16's
|
||||
4 | ccc: Coeff group's coding cost
|
||||
size * 2 | coeffs: Coeff group data
|
||||
|
||||
To analyze the costs by running a linear regression over them, build the two
|
||||
tools using:
|
||||
|
||||
$ gcc filter_rdcosts.c -O2 -o frcosts_matrix
|
||||
$ gcc ols_2ndpart.c -O2 -o ols_2ndpart
|
||||
|
||||
Then run the regression in parallel by running run_filter.py. The reason to do
|
||||
it this way is because the data is stored compressed, so there is no way to
|
||||
mmap it in Matlab/Octave/something; the data sets are absolutely huge (larger
|
||||
than reasonable amounts of RAM in a decent workstation), but this way we can
|
||||
store the data compressed and process it in O(1) memory complexity, so it can
|
||||
be done as widely parallelized as you have CPU cores. The result files each
|
||||
consist of 4 numbers, which represent an approximate linear solution to the
|
||||
corresponding set of costs: the price in bits of a coefficient whose absolute
|
||||
value is a) 0, b) 1, c) 2, d) 3 or higher.
|
||||
|
||||
After that, run rdcost_do_avg.py. It will calculate a per-QP average of the
|
||||
costs over the set of the sequences having been run (ie. for each QP, take the
|
||||
results for that QP for each sequence, and calculate their average). This data
|
||||
is what you can use to fill in the default_fast_coeff_cost_wts table in
|
||||
src/fast_coeff_cost.h.
|
|
@ -7,14 +7,20 @@ import subprocess
|
|||
import threading
|
||||
import time
|
||||
|
||||
logdir = os.path.join("/tmp", "rdcost", "logs")
|
||||
ofdir = os.path.join("/tmp", "rdcost", "data")
|
||||
# Where logs and sampled data will wind up, and where the sequences are read.
|
||||
# Do note that the sequences variable is supposed to be a tuple, because you
|
||||
# could have multiple sets of sequences.
|
||||
logdir = "/tmp/rdcost/logs"
|
||||
ofdir = "/tmp/rdcost/data"
|
||||
sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",)
|
||||
|
||||
# Note that n_kvazaars * len(dest_qps) has to be less than the max number of
|
||||
# fd's that a process can have (check it out: ulimit -a, likely 1024)
|
||||
smt_threads = 8 # Kinda lazy, but just match this to your cpu
|
||||
n_kvz_threads = 1 # How many threads each kvz instance is running?
|
||||
n_kvazaars = smt_threads // n_kvz_threads
|
||||
|
||||
# You likely will not need to change anything below this line
|
||||
kvz_srcdir = lambda path: os.path.join(
|
||||
os.path.dirname(
|
||||
os.path.dirname(
|
||||
|
@ -25,7 +31,6 @@ kvz_srcdir = lambda path: os.path.join(
|
|||
|
||||
dest_qps = tuple(range(51))
|
||||
base_qps = tuple(range(12, 43))
|
||||
sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",)
|
||||
|
||||
kvzargs = [kvz_srcdir("kvazaar"), "--threads", str(n_kvz_threads), "--preset=ultrafast", "--fastrd-sampling", "--fast-residual-cost=0"]
|
||||
kvzenv = {"LD_LIBRARY_PATH": kvz_srcdir(".libs/")}
|
||||
|
@ -144,6 +149,9 @@ def threadfunc(joblist):
|
|||
|
||||
def main():
|
||||
assert(isinstance(sequences, tuple))
|
||||
for d in (logdir, ofdir):
|
||||
os.makedirs(d, exist_ok=True)
|
||||
|
||||
jobs = combinations(chain(map(glob.glob, sequences)), base_qps)
|
||||
joblist = MTSafeIterable(jobs)
|
||||
|
||||
|
|
|
@ -10,13 +10,15 @@ import tempfile
|
|||
import threading
|
||||
import time
|
||||
|
||||
# You should change these to your liking
|
||||
n_threads = 8
|
||||
datadirs = "/tmp/rdcost/data/"
|
||||
resultdir = "/tmp/rdcost/coeff_buckets"
|
||||
|
||||
gzargs = ["gzip", "-d"]
|
||||
filtargs = ["./frcosts_matrix"]
|
||||
octargs = ["octave-cli", "invert_matrix.m"]
|
||||
filt2args = ["./ols_2ndpart"]
|
||||
resultdir = os.path.join("/tmp", "rdcost", "coeff_buckets")
|
||||
|
||||
class MultiPipeManager:
|
||||
pipe_fn_template = "%02i.txt"
|
||||
|
@ -135,6 +137,9 @@ def scan_datadirs(path):
|
|||
yield job_name, glob.glob(os.path.join(seq_glob, qp_fn))
|
||||
|
||||
def main():
|
||||
for d in (datadirs, resultdir):
|
||||
os.makedirs(d, exist_ok=True)
|
||||
|
||||
jobs = scan_datadirs(datadirs)
|
||||
joblist = MTSafeIterable(iter(jobs))
|
||||
|
||||
|
|
Loading…
Reference in a new issue