mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
better documentation for the rdcost extraction
This commit is contained in:
parent
4d1eb1aab9
commit
379cbd8901
|
@ -1,33 +0,0 @@
|
||||||
Build Kvazaar as usual with make, then edit extract_rdcosts.py so that the
|
|
||||||
parameters suit your usage (the directories, num of threads and Kvazaar
|
|
||||||
params) and then run extract_rdcosts.py. It will run a lot of Kvazaar
|
|
||||||
instances in parallel to encode a lot of videos and sift off all the coeff
|
|
||||||
groups they measure RD cost for. The coeff groups will be written into the
|
|
||||||
relevant data file in the following format (although through GZIP):
|
|
||||||
|
|
||||||
Size (B) | Description
|
|
||||||
----------+------------
|
|
||||||
4 | size: Coeff group size, in int16's
|
|
||||||
4 | ccc: Coeff group's coding cost
|
|
||||||
size * 2 | coeffs: Coeff group data
|
|
||||||
|
|
||||||
You can roll your own filter_rdcosts.c program to analyze the data the way
|
|
||||||
you want, and run it like:
|
|
||||||
|
|
||||||
$ gzip -d < /path/to/compressed_datafile.gz | ./filter_rdcosts | less
|
|
||||||
|
|
||||||
Maybe one day, there'll be a multithreaded script like extract_rdcosts.py to
|
|
||||||
automate and parallelize processing of a massive heap of data files.
|
|
||||||
|
|
||||||
EDIT:
|
|
||||||
It's now possible to do OLS regression by streaming the source data twice
|
|
||||||
from source and using Octave to invert the temporary result matrix, and
|
|
||||||
that's what run_filter.py does in parallel. To do this on data you've
|
|
||||||
gathered by extract_rdcosts.py:
|
|
||||||
|
|
||||||
$ gcc filter_rdcosts.c -o frcosts_matrix
|
|
||||||
$ gcc ols_2ndpart.c -o ols_2ndpart
|
|
||||||
$ ./run_filter.py
|
|
||||||
|
|
||||||
Although you should probably adjust the run_filter.py params before actually
|
|
||||||
running it
|
|
35
rdcost-weight-tool/README.txt
Normal file
35
rdcost-weight-tool/README.txt
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
To extract the block costs, build Kvazaar as usual, and edit relevant
|
||||||
|
parameters in the beginning of extract_rdcosts.py and run_filter.py, most
|
||||||
|
importantly the number of cores and the set of video sequences you want to
|
||||||
|
encode to extract costs. Run extract_rdcosts.py, it will use Kvazaar to encode
|
||||||
|
each sequence and extract the costs measured there for the quantized blocks.
|
||||||
|
The costs are stored compressed and sorted by block QP, in the following
|
||||||
|
format:
|
||||||
|
|
||||||
|
Size (B) | Description
|
||||||
|
----------+------------
|
||||||
|
4 | size: Coeff group size, in int16's
|
||||||
|
4 | ccc: Coeff group's coding cost
|
||||||
|
size * 2 | coeffs: Coeff group data
|
||||||
|
|
||||||
|
To analyze the costs by running a linear regression over them, build the two
|
||||||
|
tools using:
|
||||||
|
|
||||||
|
$ gcc filter_rdcosts.c -O2 -o frcosts_matrix
|
||||||
|
$ gcc ols_2ndpart.c -O2 -o ols_2ndpart
|
||||||
|
|
||||||
|
Then run the regression in parallel by running run_filter.py. The reason to do
|
||||||
|
it this way is because the data is stored compressed, so there is no way to
|
||||||
|
mmap it in Matlab/Octave/something; the data sets are absolutely huge (larger
|
||||||
|
than reasonable amounts of RAM in a decent workstation), but this way we can
|
||||||
|
store the data compressed and process it in O(1) memory complexity, so it can
|
||||||
|
be done as widely parallelized as you have CPU cores. The result files each
|
||||||
|
consist of 4 numbers, which represent an approximate linear solution to the
|
||||||
|
corresponding set of costs: the price in bits of a coefficient whose absolute
|
||||||
|
value is a) 0, b) 1, c) 2, d) 3 or higher.
|
||||||
|
|
||||||
|
After that, run rdcost_do_avg.py. It will calculate a per-QP average of the
|
||||||
|
costs over the set of the sequences having been run (ie. for each QP, take the
|
||||||
|
results for that QP for each sequence, and calculate their average). This data
|
||||||
|
is what you can use to fill in the default_fast_coeff_cost_wts table in
|
||||||
|
src/fast_coeff_cost.h.
|
|
@ -7,14 +7,20 @@ import subprocess
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
logdir = os.path.join("/tmp", "rdcost", "logs")
|
# Where logs and sampled data will wind up, and where the sequences are read.
|
||||||
ofdir = os.path.join("/tmp", "rdcost", "data")
|
# Do note that the sequences variable is supposed to be a tuple, because you
|
||||||
|
# could have multiple sets of sequences.
|
||||||
|
logdir = "/tmp/rdcost/logs"
|
||||||
|
ofdir = "/tmp/rdcost/data"
|
||||||
|
sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",)
|
||||||
|
|
||||||
# Note that n_kvazaars * len(dest_qps) has to be less than the max number of
|
# Note that n_kvazaars * len(dest_qps) has to be less than the max number of
|
||||||
# fd's that a process can have (check it out: ulimit -a, likely 1024)
|
# fd's that a process can have (check it out: ulimit -a, likely 1024)
|
||||||
smt_threads = 8 # Kinda lazy, but just match this to your cpu
|
smt_threads = 8 # Kinda lazy, but just match this to your cpu
|
||||||
n_kvz_threads = 1 # How many threads each kvz instance is running?
|
n_kvz_threads = 1 # How many threads each kvz instance is running?
|
||||||
n_kvazaars = smt_threads // n_kvz_threads
|
n_kvazaars = smt_threads // n_kvz_threads
|
||||||
|
|
||||||
|
# You likely will not need to change anything below this line
|
||||||
kvz_srcdir = lambda path: os.path.join(
|
kvz_srcdir = lambda path: os.path.join(
|
||||||
os.path.dirname(
|
os.path.dirname(
|
||||||
os.path.dirname(
|
os.path.dirname(
|
||||||
|
@ -25,7 +31,6 @@ kvz_srcdir = lambda path: os.path.join(
|
||||||
|
|
||||||
dest_qps = tuple(range(51))
|
dest_qps = tuple(range(51))
|
||||||
base_qps = tuple(range(12, 43))
|
base_qps = tuple(range(12, 43))
|
||||||
sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",)
|
|
||||||
|
|
||||||
kvzargs = [kvz_srcdir("kvazaar"), "--threads", str(n_kvz_threads), "--preset=ultrafast", "--fastrd-sampling", "--fast-residual-cost=0"]
|
kvzargs = [kvz_srcdir("kvazaar"), "--threads", str(n_kvz_threads), "--preset=ultrafast", "--fastrd-sampling", "--fast-residual-cost=0"]
|
||||||
kvzenv = {"LD_LIBRARY_PATH": kvz_srcdir(".libs/")}
|
kvzenv = {"LD_LIBRARY_PATH": kvz_srcdir(".libs/")}
|
||||||
|
@ -144,6 +149,9 @@ def threadfunc(joblist):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
assert(isinstance(sequences, tuple))
|
assert(isinstance(sequences, tuple))
|
||||||
|
for d in (logdir, ofdir):
|
||||||
|
os.makedirs(d, exist_ok=True)
|
||||||
|
|
||||||
jobs = combinations(chain(map(glob.glob, sequences)), base_qps)
|
jobs = combinations(chain(map(glob.glob, sequences)), base_qps)
|
||||||
joblist = MTSafeIterable(jobs)
|
joblist = MTSafeIterable(jobs)
|
||||||
|
|
||||||
|
|
|
@ -10,13 +10,15 @@ import tempfile
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
# You should change these to your liking
|
||||||
n_threads = 8
|
n_threads = 8
|
||||||
datadirs = "/tmp/rdcost/data/"
|
datadirs = "/tmp/rdcost/data/"
|
||||||
|
resultdir = "/tmp/rdcost/coeff_buckets"
|
||||||
|
|
||||||
gzargs = ["gzip", "-d"]
|
gzargs = ["gzip", "-d"]
|
||||||
filtargs = ["./frcosts_matrix"]
|
filtargs = ["./frcosts_matrix"]
|
||||||
octargs = ["octave-cli", "invert_matrix.m"]
|
octargs = ["octave-cli", "invert_matrix.m"]
|
||||||
filt2args = ["./ols_2ndpart"]
|
filt2args = ["./ols_2ndpart"]
|
||||||
resultdir = os.path.join("/tmp", "rdcost", "coeff_buckets")
|
|
||||||
|
|
||||||
class MultiPipeManager:
|
class MultiPipeManager:
|
||||||
pipe_fn_template = "%02i.txt"
|
pipe_fn_template = "%02i.txt"
|
||||||
|
@ -135,6 +137,9 @@ def scan_datadirs(path):
|
||||||
yield job_name, glob.glob(os.path.join(seq_glob, qp_fn))
|
yield job_name, glob.glob(os.path.join(seq_glob, qp_fn))
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
for d in (datadirs, resultdir):
|
||||||
|
os.makedirs(d, exist_ok=True)
|
||||||
|
|
||||||
jobs = scan_datadirs(datadirs)
|
jobs = scan_datadirs(datadirs)
|
||||||
joblist = MTSafeIterable(iter(jobs))
|
joblist = MTSafeIterable(iter(jobs))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue