diff --git a/rdcost-weight-tool/README-rdcost-thingy.txt b/rdcost-weight-tool/README-rdcost-thingy.txt
deleted file mode 100644
index 55e9392c..00000000
--- a/rdcost-weight-tool/README-rdcost-thingy.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-Build Kvazaar as usual with make, then edit extract_rdcosts.py so that the
-parameters suit your usage (the directories, num of threads and Kvazaar
-params) and then run extract_rdcosts.py. It will run a lot of Kvazaar
-instances in parallel to encode a lot of videos and sift off all the coeff
-groups they measure RD cost for. The coeff groups will be written into the
-relevant data file in the following format (although through GZIP):
-
-Size (B)  | Description
-----------+------------
-4         | size:   Coeff group size, in int16's
-4         | ccc:    Coeff group's coding cost
-size * 2  | coeffs: Coeff group data
-
-You can roll your own filter_rdcosts.c program to analyze the data the way
-you want, and run it like:
-
-$ gzip -d < /path/to/compressed_datafile.gz | ./filter_rdcosts | less
-
-Maybe one day, there'll be a multithreaded script like extract_rdcosts.py to
-automate and parallelize processing of a massive heap of data files.
-
-EDIT:
-It's now possible to do OLS regression by streaming the source data twice
-from source and using Octave to invert the temporary result matrix, and
-that's what run_filter.py does in parallel. To do this on data you've
-gathered by extract_rdcosts.py:
-
-$ gcc filter_rdcosts.c -o frcosts_matrix
-$ gcc ols_2ndpart.c -o ols_2ndpart
-$ ./run_filter.py
-
-Although you should probably adjust the run_filter.py params before actually
-running it
diff --git a/rdcost-weight-tool/README.txt b/rdcost-weight-tool/README.txt
new file mode 100644
index 00000000..090bef97
--- /dev/null
+++ b/rdcost-weight-tool/README.txt
@@ -0,0 +1,35 @@
+To extract the block costs, build Kvazaar as usual, and edit relevant
+parameters in the beginning of extract_rdcosts.py and run_filter.py, most
+importantly the number of cores and the set of video sequences you want to
+encode to extract costs. Run extract_rdcosts.py, it will use Kvazaar to encode
+each sequence and extract the costs measured there for the quantized blocks.
+The costs are stored compressed and sorted by block QP, in the following
+format:
+
+Size (B)  | Description
+----------+------------
+4         | size:   Coeff group size, in int16's
+4         | ccc:    Coeff group's coding cost
+size * 2  | coeffs: Coeff group data
+
+To analyze the costs by running a linear regression over them, build the two
+tools using:
+
+$ gcc filter_rdcosts.c -O2 -o frcosts_matrix
+$ gcc ols_2ndpart.c -O2 -o ols_2ndpart
+
+Then run the regression in parallel by running run_filter.py. The reason to do
+it this way is because the data is stored compressed, so there is no way to
+mmap it in Matlab/Octave/something; the data sets are absolutely huge (larger
+than reasonable amounts of RAM in a decent workstation), but this way we can
+store the data compressed and process it in O(1) memory complexity, so it can
+be done as widely parallelized as you have CPU cores. The result files each
+consist of 4 numbers, which represent an approximate linear solution to the
+corresponding set of costs: the price in bits of a coefficient whose absolute
+value is a) 0, b) 1, c) 2, d) 3 or higher.
+
+After that, run rdcost_do_avg.py. It will calculate a per-QP average of the
+costs over the set of the sequences having been run (ie. for each QP, take the
+results for that QP for each sequence, and calculate their average). This data
+is what you can use to fill in the default_fast_coeff_cost_wts table in
+src/fast_coeff_cost.h.
diff --git a/rdcost-weight-tool/extract_rdcosts.py b/rdcost-weight-tool/extract_rdcosts.py
index a7a73fcb..a02ea038 100755
--- a/rdcost-weight-tool/extract_rdcosts.py
+++ b/rdcost-weight-tool/extract_rdcosts.py
@@ -7,14 +7,20 @@ import subprocess
 import threading
 import time
 
-logdir = os.path.join("/tmp", "rdcost", "logs")
-ofdir  = os.path.join("/tmp", "rdcost", "data")
+# Where logs and sampled data will wind up, and where the sequences are read.
+# Do note that the sequences variable is supposed to be a tuple, because you
+# could have multiple sets of sequences.
+logdir    =  "/tmp/rdcost/logs"
+ofdir     =  "/tmp/rdcost/data"
+sequences = ("/opt/test_seqs/custom_seqs/*/*.yuv",)
 
 # Note that n_kvazaars * len(dest_qps) has to be less than the max number of
 # fd's that a process can have (check it out: ulimit -a, likely 1024)
 smt_threads   = 8 # Kinda lazy, but just match this to your cpu
 n_kvz_threads = 1 # How many threads each kvz instance is running?
 n_kvazaars    = smt_threads // n_kvz_threads
+
+# You likely will not need to change anything below this line
 kvz_srcdir    = lambda path: os.path.join(
                                  os.path.dirname(
                                      os.path.dirname(
@@ -25,7 +31,6 @@ kvz_srcdir    = lambda path: os.path.join(
 
 dest_qps      = tuple(range(51))
 base_qps      = tuple(range(12, 43))
-sequences     = ("/opt/test_seqs/custom_seqs/*/*.yuv",)
 
 kvzargs       = [kvz_srcdir("kvazaar"), "--threads", str(n_kvz_threads), "--preset=ultrafast", "--fastrd-sampling", "--fast-residual-cost=0"]
 kvzenv        = {"LD_LIBRARY_PATH": kvz_srcdir(".libs/")}
@@ -144,6 +149,9 @@ def threadfunc(joblist):
 
 def main():
     assert(isinstance(sequences, tuple))
+    for d in (logdir, ofdir):
+        os.makedirs(d, exist_ok=True)
+
     jobs = combinations(chain(map(glob.glob, sequences)), base_qps)
     joblist = MTSafeIterable(jobs)
 
diff --git a/rdcost-weight-tool/run_filter.py b/rdcost-weight-tool/run_filter.py
index 5d5dd92f..693f9783 100755
--- a/rdcost-weight-tool/run_filter.py
+++ b/rdcost-weight-tool/run_filter.py
@@ -10,13 +10,15 @@ import tempfile
 import threading
 import time
 
+# You should change these to your liking
 n_threads   = 8
 datadirs    = "/tmp/rdcost/data/"
+resultdir   = "/tmp/rdcost/coeff_buckets"
+
 gzargs      = ["gzip", "-d"]
 filtargs    = ["./frcosts_matrix"]
 octargs     = ["octave-cli", "invert_matrix.m"]
 filt2args   = ["./ols_2ndpart"]
-resultdir   = os.path.join("/tmp", "rdcost", "coeff_buckets")
 
 class MultiPipeManager:
     pipe_fn_template  = "%02i.txt"
@@ -135,6 +137,9 @@ def scan_datadirs(path):
             yield job_name, glob.glob(os.path.join(seq_glob, qp_fn))
 
 def main():
+    for d in (datadirs, resultdir):
+        os.makedirs(d, exist_ok=True)
+
     jobs = scan_datadirs(datadirs)
     joblist = MTSafeIterable(iter(jobs))