From 379cbd8901ed2a719bca0fa964fff4d3b084f0b0 Mon Sep 17 00:00:00 2001 From: Pauli Oikkonen Date: Mon, 11 Jan 2021 18:21:17 +0200 Subject: [PATCH] better documentation for the rdcost extraction --- rdcost-weight-tool/README-rdcost-thingy.txt | 33 ------------------- rdcost-weight-tool/README.txt | 35 +++++++++++++++++++++ rdcost-weight-tool/extract_rdcosts.py | 14 +++++++-- rdcost-weight-tool/run_filter.py | 7 ++++- 4 files changed, 52 insertions(+), 37 deletions(-) delete mode 100644 rdcost-weight-tool/README-rdcost-thingy.txt create mode 100644 rdcost-weight-tool/README.txt diff --git a/rdcost-weight-tool/README-rdcost-thingy.txt b/rdcost-weight-tool/README-rdcost-thingy.txt deleted file mode 100644 index 55e9392c..00000000 --- a/rdcost-weight-tool/README-rdcost-thingy.txt +++ /dev/null @@ -1,33 +0,0 @@ -Build Kvazaar as usual with make, then edit extract_rdcosts.py so that the -parameters suit your usage (the directories, num of threads and Kvazaar -params) and then run extract_rdcosts.py. It will run a lot of Kvazaar -instances in parallel to encode a lot of videos and sift off all the coeff -groups they measure RD cost for. The coeff groups will be written into the -relevant data file in the following format (although through GZIP): - -Size (B) | Description -----------+------------ -4 | size: Coeff group size, in int16's -4 | ccc: Coeff group's coding cost -size * 2 | coeffs: Coeff group data - -You can roll your own filter_rdcosts.c program to analyze the data the way -you want, and run it like: - -$ gzip -d < /path/to/compressed_datafile.gz | ./filter_rdcosts | less - -Maybe one day, there'll be a multithreaded script like extract_rdcosts.py to -automate and parallelize processing of a massive heap of data files. - -EDIT: -It's now possible to do OLS regression by streaming the source data twice -from source and using Octave to invert the temporary result matrix, and -that's what run_filter.py does in parallel. To do this on data you've -gathered by extract_rdcosts.py: - -$ gcc filter_rdcosts.c -o frcosts_matrix -$ gcc ols_2ndpart.c -o ols_2ndpart -$ ./run_filter.py - -Although you should probably adjust the run_filter.py params before actually -running it diff --git a/rdcost-weight-tool/README.txt b/rdcost-weight-tool/README.txt new file mode 100644 index 00000000..090bef97 --- /dev/null +++ b/rdcost-weight-tool/README.txt @@ -0,0 +1,35 @@ +To extract the block costs, build Kvazaar as usual, and edit relevant +parameters in the beginning of extract_rdcosts.py and run_filter.py, most +importantly the number of cores and the set of video sequences you want to +encode to extract costs. Run extract_rdcosts.py, it will use Kvazaar to encode +each sequence and extract the costs measured there for the quantized blocks. +The costs are stored compressed and sorted by block QP, in the following +format: + +Size (B) | Description +----------+------------ +4 | size: Coeff group size, in int16's +4 | ccc: Coeff group's coding cost +size * 2 | coeffs: Coeff group data + +To analyze the costs by running a linear regression over them, build the two +tools using: + +$ gcc filter_rdcosts.c -O2 -o frcosts_matrix +$ gcc ols_2ndpart.c -O2 -o ols_2ndpart + +Then run the regression in parallel by running run_filter.py. The reason to do +it this way is because the data is stored compressed, so there is no way to +mmap it in Matlab/Octave/something; the data sets are absolutely huge (larger +than reasonable amounts of RAM in a decent workstation), but this way we can +store the data compressed and process it in O(1) memory complexity, so it can +be done as widely parallelized as you have CPU cores. The result files each +consist of 4 numbers, which represent an approximate linear solution to the +corresponding set of costs: the price in bits of a coefficient whose absolute +value is a) 0, b) 1, c) 2, d) 3 or higher. + +After that, run rdcost_do_avg.py. It will calculate a per-QP average of the +costs over the set of the sequences having been run (ie. for each QP, take the +results for that QP for each sequence, and calculate their average). This data +is what you can use to fill in the default_fast_coeff_cost_wts table in +src/fast_coeff_cost.h. diff --git a/rdcost-weight-tool/extract_rdcosts.py b/rdcost-weight-tool/extract_rdcosts.py index a7a73fcb..a02ea038 100755 --- a/rdcost-weight-tool/extract_rdcosts.py +++ b/rdcost-weight-tool/extract_rdcosts.py @@ -7,14 +7,20 @@ import subprocess import threading import time -logdir = os.path.join("/tmp", "rdcost", "logs") -ofdir = os.path.join("/tmp", "rdcost", "data") +# Where logs and sampled data will wind up, and where the sequences are read. +# Do note that the sequences variable is supposed to be a tuple, because you +# could have multiple sets of sequences. +logdir = "/tmp/rdcost/logs" +ofdir = "/tmp/rdcost/data" +sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",) # Note that n_kvazaars * len(dest_qps) has to be less than the max number of # fd's that a process can have (check it out: ulimit -a, likely 1024) smt_threads = 8 # Kinda lazy, but just match this to your cpu n_kvz_threads = 1 # How many threads each kvz instance is running? n_kvazaars = smt_threads // n_kvz_threads + +# You likely will not need to change anything below this line kvz_srcdir = lambda path: os.path.join( os.path.dirname( os.path.dirname( @@ -25,7 +31,6 @@ kvz_srcdir = lambda path: os.path.join( dest_qps = tuple(range(51)) base_qps = tuple(range(12, 43)) -sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",) kvzargs = [kvz_srcdir("kvazaar"), "--threads", str(n_kvz_threads), "--preset=ultrafast", "--fastrd-sampling", "--fast-residual-cost=0"] kvzenv = {"LD_LIBRARY_PATH": kvz_srcdir(".libs/")} @@ -144,6 +149,9 @@ def threadfunc(joblist): def main(): assert(isinstance(sequences, tuple)) + for d in (logdir, ofdir): + os.makedirs(d, exist_ok=True) + jobs = combinations(chain(map(glob.glob, sequences)), base_qps) joblist = MTSafeIterable(jobs) diff --git a/rdcost-weight-tool/run_filter.py b/rdcost-weight-tool/run_filter.py index 5d5dd92f..693f9783 100755 --- a/rdcost-weight-tool/run_filter.py +++ b/rdcost-weight-tool/run_filter.py @@ -10,13 +10,15 @@ import tempfile import threading import time +# You should change these to your liking n_threads = 8 datadirs = "/tmp/rdcost/data/" +resultdir = "/tmp/rdcost/coeff_buckets" + gzargs = ["gzip", "-d"] filtargs = ["./frcosts_matrix"] octargs = ["octave-cli", "invert_matrix.m"] filt2args = ["./ols_2ndpart"] -resultdir = os.path.join("/tmp", "rdcost", "coeff_buckets") class MultiPipeManager: pipe_fn_template = "%02i.txt" @@ -135,6 +137,9 @@ def scan_datadirs(path): yield job_name, glob.glob(os.path.join(seq_glob, qp_fn)) def main(): + for d in (datadirs, resultdir): + os.makedirs(d, exist_ok=True) + jobs = scan_datadirs(datadirs) joblist = MTSafeIterable(iter(jobs))