Rename to updateDocumentToC.py and adapt for use with Catch

adding missing GPL 3.0 license (thanks for noting @horenmar).
This commit is contained in:
Martin Moene 2017-08-29 21:34:43 +02:00 committed by Martin Hořeňovský
parent 7e9b53e40c
commit 61280e6d0a

View File

@ -1,44 +1,51 @@
#!/usr/bin/env python #!/usr/bin/env python
# #
# Sebastian Raschka 2014-2015 # updateDocumentToC.py
# #
# Python script that inserts a table of contents # Insert table of contents at top of Catch markdown documents.
# into markdown documents and creates the required
# internal links.
# #
# For more information about how internal links # This script is distributed under the GNU General Public License v3.0
# in HTML and Markdown documents work, please see
# #
# Creating a table of contents with internal links in # It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
# IPython Notebooks and Markdown documents:
# http://sebastianraschka.com/Articles/2014_ipython_internal_links.html
#
# Updates for this script will be available at
# https://github.com/rasbt/markdown-toclify # https://github.com/rasbt/markdown-toclify
# #
# for more information about the usage:
# markdown-toclify.py --help from __future__ import print_function
# from scriptCommon import catchPath
import argparse import argparse
import glob
import os
import re import re
import sys
# Configuration:
__version__ = '1.7.1' minTocEntries = 4
headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
headingExcludeRelease = [2,3,4,5] # use level 1 headers for release-notes.md
documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
releaseNotesName = 'release-notes.md'
contentTitle = '**Contents** '
contentLineNo = 4
contentLineNdx = contentLineNo - 1
# End configuration
VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&' VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
def readLines(in_file):
def read_lines(in_file):
"""Returns a list of lines from a input markdown file.""" """Returns a list of lines from a input markdown file."""
with open(in_file, 'r') as inf: with open(in_file, 'r') as inf:
in_contents = inf.read().split('\n') in_contents = inf.read().split('\n')
return in_contents return in_contents
def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
def remove_lines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
"""Removes existing [back to top] links and <a id> tags.""" """Removes existing [back to top] links and <a id> tags."""
if not remove: if not remove:
@ -51,8 +58,22 @@ def remove_lines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
out.append(l) out.append(l)
return out return out
def removeToC(lines):
"""Removes existing table of contents starting at index contentLineNdx."""
if not lines[contentLineNdx ].startswith(contentTitle):
return lines[:]
def dashify_headline(line): result_top = lines[:contentLineNdx]
pos = contentLineNdx + 1
while lines[pos].startswith('['):
pos = pos + 1
result_bottom = lines[pos + 1:]
return result_top + result_bottom
def dashifyHeadline(line):
""" """
Takes a header line from a Markdown document and Takes a header line from a Markdown document and
returns a tuple of the returns a tuple of the
@ -60,7 +81,7 @@ def dashify_headline(line):
a string version for <a id=''></a> anchor tags, a string version for <a id=''></a> anchor tags,
and the level of the headline as integer. and the level of the headline as integer.
E.g., E.g.,
>>> dashify_headline('### some header lvl3') >>> dashifyHeadline('### some header lvl3')
('Some header lvl3', 'some-header-lvl3', 3) ('Some header lvl3', 'some-header-lvl3', 3)
""" """
@ -84,8 +105,7 @@ def dashify_headline(line):
return [stripped_wspace, dashified, level] return [stripped_wspace, dashified, level]
def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
def tag_and_collect(lines, id_tag=True, back_links=False, exclude_h=None):
""" """
Gets headlines from the markdown document and creates anchor tags. Gets headlines from the markdown document and creates anchor tags.
@ -118,9 +138,9 @@ def tag_and_collect(lines, id_tag=True, back_links=False, exclude_h=None):
saw_headline = False saw_headline = False
orig_len = len(l) orig_len = len(l)
l = l.lstrip() l_stripped = l.lstrip()
if l.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')): if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
# comply with new markdown standards # comply with new markdown standards
@ -131,7 +151,7 @@ def tag_and_collect(lines, id_tag=True, back_links=False, exclude_h=None):
if len(l) - len(l.lstrip('#')) > 6: if len(l) - len(l.lstrip('#')) > 6:
continue continue
# headers can be indented by at most 3 spaces: # headers can be indented by at most 3 spaces:
if orig_len - len(l) > 3: if orig_len - len(l_stripped) > 3:
continue continue
# ignore empty headers # ignore empty headers
@ -139,7 +159,7 @@ def tag_and_collect(lines, id_tag=True, back_links=False, exclude_h=None):
continue continue
saw_headline = True saw_headline = True
dashified = dashify_headline(l) dashified = dashifyHeadline(l)
if not exclude_h or not dashified[-1] in exclude_h: if not exclude_h or not dashified[-1] in exclude_h:
if id_tag: if id_tag:
@ -153,8 +173,7 @@ def tag_and_collect(lines, id_tag=True, back_links=False, exclude_h=None):
out_contents.append('[[back to top](#table-of-contents)]') out_contents.append('[[back to top](#table-of-contents)]')
return out_contents, headlines return out_contents, headlines
def positioningHeadlines(headlines):
def positioning_headlines(headlines):
""" """
Strips unnecessary whitespaces/tabs if first header is not left-aligned Strips unnecessary whitespaces/tabs if first header is not left-aligned
""" """
@ -168,11 +187,10 @@ def positioning_headlines(headlines):
row[-1] -= 1 row[-1] -= 1
return headlines return headlines
def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
def create_toc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
""" """
Creates the table of contents from the headline list Creates the table of contents from the headline list
that was returned by the tag_and_collect function. that was returned by the tagAndCollect function.
Keyword Arguments: Keyword Arguments:
headlines: list of lists headlines: list of lists
@ -192,29 +210,28 @@ def create_toc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
if not no_toc_header: if not no_toc_header:
if top_link: if top_link:
processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n') processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
processed.append('# Table of Contents') processed.append(contentTitle)
for line in headlines: for line in headlines:
if hyperlink: if hyperlink:
item = '%s- [%s](#%s)' % ((line[2]-1)*' ', line[0], line[1]) item = '[%s](#%s) ' % (line[0], line[1])
else: else:
item = '%s- %s' % ((line[2]-1)*' ', line[0]) item = '%s- %s' % ((line[2]-1)*' ', line[0])
processed.append(item) processed.append(item)
processed.append('\n') processed.append('\n')
return processed return processed
def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
def build_markdown(toc_headlines, body, spacer=0, placeholder=None):
""" """
Returns a string with the Markdown output contents incl. Returns a string with the Markdown output contents incl.
the table of contents. the table of contents.
Keyword arguments: Keyword arguments:
toc_headlines: lines for the table of contents toc_headlines: lines for the table of contents
as created by the create_toc function. as created by the createToc function.
body: contents of the Markdown file including body: contents of the Markdown file including
ID-anchor tags as returned by the ID-anchor tags as returned by the
tag_and_collect function. tagAndCollect function.
spacer: Adds vertical space after the table spacer: Adds vertical space after the table
of contents. Height in pixels. of contents. Height in pixels.
placeholder: If a placeholder string is provided, the placeholder placeholder: If a placeholder string is provided, the placeholder
@ -228,17 +245,17 @@ def build_markdown(toc_headlines, body, spacer=0, placeholder=None):
else: else:
toc_markdown = "\n".join(toc_headlines) toc_markdown = "\n".join(toc_headlines)
body_markdown = "\n".join(body).strip()
if placeholder: if placeholder:
body_markdown = "\n".join(body)
markdown = body_markdown.replace(placeholder, toc_markdown) markdown = body_markdown.replace(placeholder, toc_markdown)
else: else:
markdown = toc_markdown + body_markdown body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
body_markdown_p2 = "\n".join(body[ contentLineNdx:])
markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
return markdown return markdown
def outputMarkdown(markdown_cont, output_file):
def output_markdown(markdown_cont, output_file):
""" """
Writes to an output file if `outfile` is a valid path. Writes to an output file if `outfile` is a valid path.
@ -247,11 +264,17 @@ def output_markdown(markdown_cont, output_file):
with open(output_file, 'w') as out: with open(output_file, 'w') as out:
out.write(markdown_cont) out.write(markdown_cont)
def markdownToclify(
def markdown_toclify(input_file, output_file=None, github=False, input_file,
back_to_top=False, nolink=False, output_file=None,
no_toc_header=False, spacer=0, placeholder=None, min_toc_len=2,
exclude_h=None): github=False,
back_to_top=False,
nolink=False,
no_toc_header=False,
spacer=0,
placeholder=None,
exclude_h=None):
""" Function to add table of contents to markdown files. """ Function to add table of contents to markdown files.
Parameters Parameters
@ -262,6 +285,9 @@ def markdown_toclify(input_file, output_file=None, github=False,
output_file: str (defaul: None) output_file: str (defaul: None)
Path to the markdown output file. Path to the markdown output file.
min_toc_len: int (default: 2)
Miniumum number of entries to create a table of contents for.
github: bool (default: False) github: bool (default: False)
Uses GitHub TOC syntax if True. Uses GitHub TOC syntax if True.
@ -287,114 +313,134 @@ def markdown_toclify(input_file, output_file=None, github=False,
Returns Returns
----------- -----------
cont: str changed: Boolean
Markdown contents including the TOC. True if the file has been updated, False otherwise.
""" """
raw_contents = read_lines(input_file) cleaned_contents = removeLines(
cleaned_contents = remove_lines(raw_contents, remove=('[[back to top]', '<a class="mk-toclify"')) removeToC(readLines(input_file)),
processed_contents, raw_headlines = tag_and_collect( remove=('[[back to top]', '<a class="mk-toclify"'))
cleaned_contents,
id_tag=not github,
back_links=back_to_top,
exclude_h=exclude_h,
)
leftjustified_headlines = positioning_headlines(raw_headlines) processed_contents, raw_headlines = tagAndCollect(
processed_headlines = create_toc(leftjustified_headlines, cleaned_contents,
hyperlink=not nolink, id_tag=not github,
top_link=not nolink and not github, back_links=back_to_top,
no_toc_header=no_toc_header) exclude_h=exclude_h)
# add table of contents?
if len(raw_headlines) < min_toc_len:
processed_headlines = []
else:
leftjustified_headlines = positioningHeadlines(raw_headlines)
processed_headlines = createToc(
leftjustified_headlines,
hyperlink=not nolink,
top_link=not nolink and not github,
no_toc_header=no_toc_header)
if nolink: if nolink:
processed_contents = cleaned_contents processed_contents = cleaned_contents
cont = build_markdown(toc_headlines=processed_headlines, cont = buildMarkdown(
body=processed_contents, toc_headlines=processed_headlines,
spacer=spacer, body=processed_contents,
placeholder=placeholder) spacer=spacer,
placeholder=placeholder)
if output_file: if output_file:
output_markdown(cont, output_file) outputMarkdown(cont, output_file)
return cont
def isReleaseNotes(f):
return os.path.basename(f) == releaseNotesName
def commandline(): def excludeHeadingsFor(f):
return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
"""Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
if verbose :
print( 'file: {}'.format(input_file))
output_file = input_file + '.tmp'
markdownToclify(
input_file=input_file,
output_file=output_file,
min_toc_len=min_toc_len,
github=True,
back_to_top=False,
nolink=False,
no_toc_header=False,
spacer=False,
placeholder=False,
exclude_h=excludeHeadingsFor(input_file))
# prevent race-condition (Python 3.3):
if sys.version_info >= (3, 3):
os.replace(output_file, input_file)
else:
os.remove(input_file)
os.rename(output_file, input_file)
return 1
def updateDocumentToC(paths, min_toc_len, verbose):
"""Add or update table of contents to specified paths. Return number of changed files"""
n = 0
for g in paths:
for f in glob.glob(g):
if os.path.isfile(f):
n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
return n
def updateDocumentToCMain():
"""Add or update table of contents to specified paths."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Python script that inserts a table of contents\n'\ description='Add or update table of contents in markdown documents.',
'into markdown documents and creates the required internal links.', epilog="""""",
epilog=""" Example: formatter_class=argparse.RawTextHelpFormatter)
markdown-toclify.py ~/Desktop/input.md -o ~/Desktop/output.md
For more information about how internal links in parser.add_argument(
HTML and Markdown documents work 'Input',
please see: metavar='file',
"Creating a table of contents with internal type=str,
links in IPython Notebooks and Markdown documents" nargs=argparse.REMAINDER,
(http://sebastianraschka.com/Articles/2014_ipython_internal_links.html) help='files to process, at default: docs/*.md')
Updates for this script will be available at parser.add_argument(
https://github.com/rasbt/markdown-toclify '-v', '--verbose',
action='store_true',
help='report the name of the file being processed')
""", parser.add_argument(
formatter_class=argparse.RawTextHelpFormatter '--min-toc-entries',
) dest='minTocEntries',
default=minTocEntries,
type=int,
metavar='N',
help='the minimum number of entries to create a table of contents for [{deflt}]'.format(deflt=minTocEntries))
parser.add_argument('InputFile', parser.add_argument(
metavar='input.md', '--remove-toc',
help='path to the Markdown input file') action='store_const',
parser.add_argument('-o', '--output', dest='minTocEntries',
metavar='output.md', const=99,
default=None, help='remove all tables of contents')
help='path to the Markdown output file')
parser.add_argument('-b', '--back_to_top',
action='store_true',
help='add [back to top] links.')
parser.add_argument('-g', '--github',
action='store_true',
help='omits id-anchor tags (recommended for GitHub)')
parser.add_argument('-s', '--spacer',
default=0,
type=int,
metavar='pixels',
help='add horizontal space (in pixels) after the table of contents')
parser.add_argument('-n', '--nolink',
action='store_true',
help='create the table of contents without internal links')
parser.add_argument('-e', '--exclude_h',
type=str,
default='',
help='exclude eading levels, e.g., "2,3" to exclude all level 2 and 3 headings')
parser.add_argument('--placeholder',
type=str,
help='inserts TOC at the placeholder string instead of inserting it on top of the document')
parser.add_argument('--no_toc_header',
action='store_true',
help='suppresses the Table of Contents header')
parser.add_argument('-v', '--version',
action='version',
version='%s' % __version__)
args = parser.parse_args() args = parser.parse_args()
if args.exclude_h: paths = args.Input if len(args.Input) > 0 else [documentsDefault]
exclude_h = [int(i) for i in args.exclude_h.split(',')]
changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
if changedFiles > 0:
print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
else: else:
exclude_h = None print( "No table of contents added or updated" )
cont = markdown_toclify(input_file=args.InputFile,
output_file=args.output,
github=args.github,
back_to_top=args.back_to_top,
nolink=args.nolink,
no_toc_header=args.no_toc_header,
spacer=args.spacer,
placeholder=args.placeholder,
exclude_h=exclude_h)
if not args.output:
print(cont)
if __name__ == '__main__': if __name__ == '__main__':
commandline() updateDocumentToCMain()
# end of file