448 lines
13 KiB
Python
Executable file
448 lines
13 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
#
|
|
# updateDocumentToC.py
|
|
#
|
|
# Insert table of contents at top of Catch markdown documents.
|
|
#
|
|
# This script is distributed under the GNU General Public License v3.0
|
|
#
|
|
# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
|
|
# https://github.com/rasbt/markdown-toclify
|
|
#
|
|
|
|
import argparse
|
|
import glob
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from scriptCommon import catchPath
|
|
|
|
# Configuration:
|
|
|
|
minTocEntries = 4
|
|
|
|
headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
|
|
headingExcludeRelease = [1,3,4,5] # use level 1 headers for release-notes.md
|
|
|
|
documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
|
|
releaseNotesName = 'release-notes.md'
|
|
|
|
contentTitle = '**Contents**'
|
|
contentLineNo = 4
|
|
contentLineNdx = contentLineNo - 1
|
|
|
|
# End configuration
|
|
|
|
VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
|
|
|
|
def readLines(in_file):
|
|
"""Returns a list of lines from a input markdown file."""
|
|
|
|
with open(in_file, 'r') as inf:
|
|
in_contents = inf.read().split('\n')
|
|
return in_contents
|
|
|
|
def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
|
|
"""Removes existing [back to top] links and <a id> tags."""
|
|
|
|
if not remove:
|
|
return lines[:]
|
|
|
|
out = []
|
|
for l in lines:
|
|
if l.startswith(remove):
|
|
continue
|
|
out.append(l)
|
|
return out
|
|
|
|
def removeToC(lines):
|
|
"""Removes existing table of contents starting at index contentLineNdx."""
|
|
if not lines[contentLineNdx ].startswith(contentTitle):
|
|
return lines[:]
|
|
|
|
result_top = lines[:contentLineNdx]
|
|
|
|
pos = contentLineNdx + 1
|
|
while lines[pos].startswith('['):
|
|
pos = pos + 1
|
|
|
|
result_bottom = lines[pos + 1:]
|
|
|
|
return result_top + result_bottom
|
|
|
|
def dashifyHeadline(line):
|
|
"""
|
|
Takes a header line from a Markdown document and
|
|
returns a tuple of the
|
|
'#'-stripped version of the head line,
|
|
a string version for <a id=''></a> anchor tags,
|
|
and the level of the headline as integer.
|
|
E.g.,
|
|
>>> dashifyHeadline('### some header lvl3')
|
|
('Some header lvl3', 'some-header-lvl3', 3)
|
|
|
|
"""
|
|
stripped_right = line.rstrip('#')
|
|
stripped_both = stripped_right.lstrip('#')
|
|
level = len(stripped_right) - len(stripped_both)
|
|
stripped_wspace = stripped_both.strip()
|
|
|
|
# GitHub's sluggification works in an interesting way
|
|
# 1) '+', '/', '(', ')' and so on are just removed
|
|
# 2) spaces are converted into '-' directly
|
|
# 3) multiple -- are not collapsed
|
|
|
|
dashified = ''
|
|
for c in stripped_wspace:
|
|
if c in VALIDS:
|
|
dashified += c.lower()
|
|
elif c.isspace():
|
|
dashified += '-'
|
|
else:
|
|
# Unknown symbols are just removed
|
|
continue
|
|
|
|
return [stripped_wspace, dashified, level]
|
|
|
|
def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
|
|
"""
|
|
Gets headlines from the markdown document and creates anchor tags.
|
|
|
|
Keyword arguments:
|
|
lines: a list of sublists where every sublist
|
|
represents a line from a Markdown document.
|
|
id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
|
|
back_links: if true, adds "back to top" links below each headline
|
|
exclude_h: header levels to exclude. E.g., [2, 3]
|
|
excludes level 2 and 3 headings.
|
|
|
|
Returns a tuple of 2 lists:
|
|
1st list:
|
|
A modified version of the input list where
|
|
<a id="some-header"></a> anchor tags where inserted
|
|
above the header lines (if github is False).
|
|
|
|
2nd list:
|
|
A list of 3-value sublists, where the first value
|
|
represents the heading, the second value the string
|
|
that was inserted assigned to the IDs in the anchor tags,
|
|
and the third value is an integer that represents the headline level.
|
|
E.g.,
|
|
[['some header lvl3', 'some-header-lvl3', 3], ...]
|
|
|
|
"""
|
|
out_contents = []
|
|
headlines = []
|
|
for l in lines:
|
|
saw_headline = False
|
|
|
|
orig_len = len(l)
|
|
l_stripped = l.lstrip()
|
|
|
|
if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
|
|
|
|
# comply with new markdown standards
|
|
|
|
# not a headline if '#' not followed by whitespace '##no-header':
|
|
if not l.lstrip('#').startswith(' '):
|
|
continue
|
|
# not a headline if more than 6 '#':
|
|
if len(l) - len(l.lstrip('#')) > 6:
|
|
continue
|
|
# headers can be indented by at most 3 spaces:
|
|
if orig_len - len(l_stripped) > 3:
|
|
continue
|
|
|
|
# ignore empty headers
|
|
if not set(l) - {'#', ' '}:
|
|
continue
|
|
|
|
saw_headline = True
|
|
dashified = dashifyHeadline(l)
|
|
|
|
if not exclude_h or not dashified[-1] in exclude_h:
|
|
if id_tag:
|
|
id_tag = '<a class="mk-toclify" id="%s"></a>'\
|
|
% (dashified[1])
|
|
out_contents.append(id_tag)
|
|
headlines.append(dashified)
|
|
|
|
out_contents.append(l)
|
|
if back_links and saw_headline:
|
|
out_contents.append('[[back to top](#table-of-contents)]')
|
|
return out_contents, headlines
|
|
|
|
def positioningHeadlines(headlines):
|
|
"""
|
|
Strips unnecessary whitespaces/tabs if first header is not left-aligned
|
|
"""
|
|
left_just = False
|
|
for row in headlines:
|
|
if row[-1] == 1:
|
|
left_just = True
|
|
break
|
|
if not left_just:
|
|
for row in headlines:
|
|
row[-1] -= 1
|
|
return headlines
|
|
|
|
def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
|
|
"""
|
|
Creates the table of contents from the headline list
|
|
that was returned by the tagAndCollect function.
|
|
|
|
Keyword Arguments:
|
|
headlines: list of lists
|
|
e.g., ['Some header lvl3', 'some-header-lvl3', 3]
|
|
hyperlink: Creates hyperlinks in Markdown format if True,
|
|
e.g., '- [Some header lvl1](#some-header-lvl1)'
|
|
top_link: if True, add a id tag for linking the table
|
|
of contents itself (for the back-to-top-links)
|
|
no_toc_header: suppresses TOC header if True.
|
|
|
|
Returns a list of headlines for a table of contents
|
|
in Markdown format,
|
|
e.g., [' - [Some header lvl3](#some-header-lvl3)', ...]
|
|
|
|
"""
|
|
processed = []
|
|
if not no_toc_header:
|
|
if top_link:
|
|
processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
|
|
processed.append(contentTitle + '<br>')
|
|
|
|
for line in headlines:
|
|
if hyperlink:
|
|
item = '[%s](#%s)' % (line[0], line[1])
|
|
else:
|
|
item = '%s- %s' % ((line[2]-1)*' ', line[0])
|
|
processed.append(item + '<br>')
|
|
processed.append('\n')
|
|
return processed
|
|
|
|
def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
|
|
"""
|
|
Returns a string with the Markdown output contents incl.
|
|
the table of contents.
|
|
|
|
Keyword arguments:
|
|
toc_headlines: lines for the table of contents
|
|
as created by the createToc function.
|
|
body: contents of the Markdown file including
|
|
ID-anchor tags as returned by the
|
|
tagAndCollect function.
|
|
spacer: Adds vertical space after the table
|
|
of contents. Height in pixels.
|
|
placeholder: If a placeholder string is provided, the placeholder
|
|
will be replaced by the TOC instead of inserting the TOC at
|
|
the top of the document
|
|
|
|
"""
|
|
if spacer:
|
|
spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
|
|
toc_markdown = "\n".join(toc_headlines + spacer_line)
|
|
else:
|
|
toc_markdown = "\n".join(toc_headlines)
|
|
|
|
if placeholder:
|
|
body_markdown = "\n".join(body)
|
|
markdown = body_markdown.replace(placeholder, toc_markdown)
|
|
else:
|
|
body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
|
|
body_markdown_p2 = "\n".join(body[ contentLineNdx:])
|
|
markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
|
|
|
|
return markdown
|
|
|
|
def outputMarkdown(markdown_cont, output_file):
|
|
"""
|
|
Writes to an output file if `outfile` is a valid path.
|
|
|
|
"""
|
|
if output_file:
|
|
with open(output_file, 'w') as out:
|
|
out.write(markdown_cont)
|
|
|
|
def markdownToclify(
|
|
input_file,
|
|
output_file=None,
|
|
min_toc_len=2,
|
|
github=False,
|
|
back_to_top=False,
|
|
nolink=False,
|
|
no_toc_header=False,
|
|
spacer=0,
|
|
placeholder=None,
|
|
exclude_h=None):
|
|
""" Function to add table of contents to markdown files.
|
|
|
|
Parameters
|
|
-----------
|
|
input_file: str
|
|
Path to the markdown input file.
|
|
|
|
output_file: str (default: None)
|
|
Path to the markdown output file.
|
|
|
|
min_toc_len: int (default: 2)
|
|
Minimum number of entries to create a table of contents for.
|
|
|
|
github: bool (default: False)
|
|
Uses GitHub TOC syntax if True.
|
|
|
|
back_to_top: bool (default: False)
|
|
Inserts back-to-top links below headings if True.
|
|
|
|
nolink: bool (default: False)
|
|
Creates the table of contents without internal links if True.
|
|
|
|
no_toc_header: bool (default: False)
|
|
Suppresses the Table of Contents header if True
|
|
|
|
spacer: int (default: 0)
|
|
Inserts horizontal space (in pixels) after the table of contents.
|
|
|
|
placeholder: str (default: None)
|
|
Inserts the TOC at the placeholder string instead
|
|
of inserting the TOC at the top of the document.
|
|
|
|
exclude_h: list (default None)
|
|
Excludes header levels, e.g., if [2, 3], ignores header
|
|
levels 2 and 3 in the TOC.
|
|
|
|
Returns
|
|
-----------
|
|
changed: Boolean
|
|
True if the file has been updated, False otherwise.
|
|
|
|
"""
|
|
cleaned_contents = removeLines(
|
|
removeToC(readLines(input_file)),
|
|
remove=('[[back to top]', '<a class="mk-toclify"'))
|
|
|
|
processed_contents, raw_headlines = tagAndCollect(
|
|
cleaned_contents,
|
|
id_tag=not github,
|
|
back_links=back_to_top,
|
|
exclude_h=exclude_h)
|
|
|
|
# add table of contents?
|
|
if len(raw_headlines) < min_toc_len:
|
|
processed_headlines = []
|
|
else:
|
|
leftjustified_headlines = positioningHeadlines(raw_headlines)
|
|
|
|
processed_headlines = createToc(
|
|
leftjustified_headlines,
|
|
hyperlink=not nolink,
|
|
top_link=not nolink and not github,
|
|
no_toc_header=no_toc_header)
|
|
|
|
if nolink:
|
|
processed_contents = cleaned_contents
|
|
|
|
cont = buildMarkdown(
|
|
toc_headlines=processed_headlines,
|
|
body=processed_contents,
|
|
spacer=spacer,
|
|
placeholder=placeholder)
|
|
|
|
if output_file:
|
|
outputMarkdown(cont, output_file)
|
|
|
|
def isReleaseNotes(f):
|
|
return os.path.basename(f) == releaseNotesName
|
|
|
|
def excludeHeadingsFor(f):
|
|
return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
|
|
|
|
def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
|
|
"""Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
|
|
if verbose :
|
|
print( 'file: {}'.format(input_file))
|
|
|
|
output_file = input_file + '.tmp'
|
|
|
|
markdownToclify(
|
|
input_file=input_file,
|
|
output_file=output_file,
|
|
min_toc_len=min_toc_len,
|
|
github=True,
|
|
back_to_top=False,
|
|
nolink=False,
|
|
no_toc_header=False,
|
|
spacer=False,
|
|
placeholder=False,
|
|
exclude_h=excludeHeadingsFor(input_file))
|
|
|
|
# prevent race-condition (Python 3.3):
|
|
if sys.version_info >= (3, 3):
|
|
os.replace(output_file, input_file)
|
|
else:
|
|
os.remove(input_file)
|
|
os.rename(output_file, input_file)
|
|
|
|
return 1
|
|
|
|
def updateDocumentToC(paths, min_toc_len, verbose):
|
|
"""Add or update table of contents to specified paths. Return number of changed files"""
|
|
n = 0
|
|
for g in paths:
|
|
for f in glob.glob(g):
|
|
if os.path.isfile(f):
|
|
n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
|
|
return n
|
|
|
|
def updateDocumentToCMain():
|
|
"""Add or update table of contents to specified paths."""
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Add or update table of contents in markdown documents.',
|
|
epilog="""""",
|
|
formatter_class=argparse.RawTextHelpFormatter)
|
|
|
|
parser.add_argument(
|
|
'Input',
|
|
metavar='file',
|
|
type=str,
|
|
nargs=argparse.REMAINDER,
|
|
help='files to process, at default: docs/*.md')
|
|
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true',
|
|
help='report the name of the file being processed')
|
|
|
|
parser.add_argument(
|
|
'--min-toc-entries',
|
|
dest='minTocEntries',
|
|
default=minTocEntries,
|
|
type=int,
|
|
metavar='N',
|
|
help='the minimum number of entries to create a table of contents for [{default}]'.format(default=minTocEntries))
|
|
|
|
parser.add_argument(
|
|
'--remove-toc',
|
|
action='store_const',
|
|
dest='minTocEntries',
|
|
const=99,
|
|
help='remove all tables of contents')
|
|
|
|
args = parser.parse_args()
|
|
|
|
paths = args.Input if args.Input else [documentsDefault]
|
|
|
|
changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
|
|
|
|
if changedFiles > 0:
|
|
print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
|
|
else:
|
|
print( "No table of contents added or updated" )
|
|
|
|
if __name__ == '__main__':
|
|
updateDocumentToCMain()
|
|
|
|
# end of file
|