Reorganize parser and style pass

2021-06-09 15:57:48 -07:00 · 2021-06-09 15:57:48 -07:00 · 40b031e198
commit 40b031e198
parent 80bb7a3d28
6 changed files with 280 additions and 234 deletions
--- a/amanuensis/parser/init.py
+++ b/amanuensis/parser/init.py
@ -2,13 +2,14 @@
 Module encapsulating all markdown parsing functionality.
 """

-from .core import normalize_title
-from .helpers import titlesort, filesafe_title
+from .core import RenderableVisitor
+from .helpers import normalize_title, filesafe_title, titlesort
 from .parsing import parse_raw_markdown

 __all__ = [
-	normalize_title.__name__,
-	titlesort.__name__,
-	filesafe_title.__name__,
-	parse_raw_markdown.__name__,
+    "RenderableVisitor",
+    "normalize_title",
+    "filesafe_title",
+    "titlesort",
+    "parse_raw_markdown",
 ]
--- a/amanuensis/parser/core.py
+++ b/amanuensis/parser/core.py
@ -5,32 +5,26 @@ which can be operated on by a visitor defining functions that hook off
 of the different token types.
 """

-import re
 from typing import Callable, Any, Sequence

-RenderHook = Callable[['Renderable'], Any]
-Spans = Sequence['Renderable']
+from .helpers import normalize_title


-def normalize_title(title: str) -> str:
-	"""
-	Normalizes strings as titles:
-	- Strips leading and trailing whitespace
-	- Merges internal whitespace into a single space
-	- Capitalizes the first word
-	"""
-	cleaned = re.sub(r'\s+', " ", title.strip())
-	return cleaned[:1].capitalize() + cleaned[1:]
+RenderHook = Callable[["Renderable"], Any]
+Spans = Sequence["Renderable"]


-class Renderable():
+class Renderable:
    """
    Base class for parsed markdown. Provides the `render()` method for
    visiting the token tree.
    """
-	def render(self: 'Renderable', renderer: 'RenderableVisitor'):
+
+    def render(self: "Renderable", renderer: "RenderableVisitor"):
        """
        Execute the apppropriate visitor method on this Renderable.
+        Visitors implement hooks by declaring methods whose names are
+        the name of a Renderable class.
        """
        hook: RenderHook = getattr(renderer, type(self).__name__, None)
        if hook:
@ -39,7 +33,8 @@ class Renderable():


 class TextSpan(Renderable):
-	"""An unstyled length of text."""
+    """A length of text."""
+
    def __init__(self, innertext: str):
        self.innertext = innertext

@ -49,20 +44,24 @@ class TextSpan(Renderable):

 class LineBreak(Renderable):
    """A line break within a paragraph."""
+
    def __str__(self):
        return "<break>"


 class SpanContainer(Renderable):
    """A formatting element that wraps some amount of text."""
+
    def __init__(self, spans: Spans):
        self.spans: Spans = spans

    def __str__(self):
-		return (f'[{type(self).__name__} '
-			+ f'{" ".join([str(span) for span in self.spans])}]')
+        return (
+            f"[{type(self).__name__} "
+            + f'{" ".join([str(span) for span in self.spans])}]'
+        )

-	def recurse(self, renderer: 'RenderableVisitor'):
+    def recurse(self, renderer: "RenderableVisitor"):
        return [child.render(renderer) for child in self.spans]


@ -88,22 +87,26 @@ class ItalicSpan(SpanContainer):

 class CitationSpan(SpanContainer):
    """A citation to another article."""
+
    def __init__(self, spans: Spans, cite_target: str):
        super().__init__(spans)
        # Normalize citation target on parse, since we don't want
        # abnormal title strings lying around causing trouble.
        self.cite_target: str = normalize_title(cite_target)

-	def __str__(self):
-		return (f'{{{" ".join([str(span) for span in self.spans])}'
-			+ f':{self.cite_target}}}')
+    def __str__(self) -> str:
+        return (
+            f'{{{" ".join([str(span) for span in self.spans])}'
+            + f":{self.cite_target}}}"
+        )


-class RenderableVisitor():
+class RenderableVisitor:
    """
    Default implementation of the visitor pattern. Executes once on
    each token in the tree and returns itself.
    """
+
    def TextSpan(self, span: TextSpan):
        return self

--- a/amanuensis/parser/helpers.py
+++ b/amanuensis/parser/helpers.py
@ -1,7 +1,22 @@
+"""
+Helper functions for manipulating titles during parsing
+"""
+
 import re
 import urllib.parse


+def normalize_title(title: str) -> str:
+    """
+    Normalizes strings as titles:
+    - Strips leading and trailing whitespace
+    - Merges internal whitespace into a single space
+    - Capitalizes the first word
+    """
+    cleaned = re.sub(r"\s+", " ", title.strip())
+    return cleaned[:1].capitalize() + cleaned[1:]
+
+
 def titlesort(title: str) -> str:
    """
    Strips articles off of titles for alphabetical sorting purposes
@ -20,9 +35,19 @@ def filesafe_title(title: str) -> str:
    """
    Makes an article title filename-safe.
    """
-	s = re.sub(r"\s+", '_', title)  # Replace whitespace with _
-	s = re.sub(r"~", '-', s)        # parse.quote doesn't catch ~
-	s = urllib.parse.quote(s)       # Encode all other characters
-	s = re.sub(r"%", "", s)         # Strip encoding %s
-	s = s[:64]                  	# Limit to 64 characters
+    # Replace whitespace with _
+    s = re.sub(r"\s+", "_", title)
+
+    # parse.quote doesn't catch ~
+    s = re.sub(r"~", "-", s)
+
+    # Encode all other characters
+    s = urllib.parse.quote(s)
+
+    # Strip encoding %s
+    s = re.sub(r"%", "", s)
+
+    # Limit to 64 characters
+    s = s[:64]
+
    return s
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@ -16,7 +16,7 @@ from .core import (
    ItalicSpan,
    CitationSpan,
    Renderable,
-	SpanContainer
+    SpanContainer,
 )

 Spans = Sequence[Renderable]
@ -28,7 +28,7 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
    """
    # Parse each paragraph individually, as no formatting applies
    # across paragraphs
-	paragraphs = re.split(r'\n\n+', text)
+    paragraphs = re.split(r"\n\n+", text)
    parse_results = list(map(parse_paragraph, paragraphs))
    return ParsedArticle(parse_results)

@ -36,7 +36,7 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
 def parse_paragraph(text: str) -> SpanContainer:
    # Parse the paragraph as a span of text
    text = text.strip()
-	if text and text[0] == '~':
+    if text and text[0] == "~":
        return SignatureParagraph(parse_paired_formatting(text[1:]))
    else:
        return BodyParagraph(parse_paired_formatting(text))
@ -46,7 +46,8 @@ def parse_paired_formatting(
    text: str,
    cite: bool = True,
    bold: bool = True,
-		italic: bool = True) -> Spans:
+    italic: bool = True,
+) -> Spans:
    # Find positions of any paired formatting
    first_cite = find_pair(text, "[[", "]]", cite)
    first_bold = find_pair(text, "**", "**", bold)
@ -68,7 +69,8 @@ def find_pair(
    text: str,
    open_tag: str,
    close_tag: str,
-		valid: bool) -> int:
+    valid: bool,
+) -> int:
    # If skipping, return -1
    if not valid:
        return -1
@ -84,7 +86,11 @@ def find_pair(
    return first


-def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
+def parse_citation(
+    text: str,
+    bold: bool = True,
+    italic: bool = True,
+) -> Spans:
    cite_open = text.find("[[")
    if cite_open > -1:
        cite_close = text.find("]]", cite_open + 2)
@ -94,51 +100,62 @@ def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
        spans_before = parse_breaks(text[:cite_open])
        # Continue parsing pair formatting after this one closes with all
        # three as valid choices
-		spans_after = parse_paired_formatting(text[cite_close + 2:])
+        spans_after = parse_paired_formatting(text[cite_close + 2 :])
        # Parse inner text and skip parsing for this format pair
-		text_inner = text[cite_open + 2:cite_close]
+        text_inner = text[cite_open + 2 : cite_close]
        # For citations specifically, we may need to split off a citation
        # target from the alias text
        inner_split = text_inner.split("|", 1)
        text_inner_actual, cite_target = inner_split[0], inner_split[-1]
-		spans_inner = parse_paired_formatting(text_inner_actual,
-			cite=False, bold=bold, italic=italic)
+        spans_inner = parse_paired_formatting(
+            text_inner_actual, cite=False, bold=bold, italic=italic
+        )
        citation = CitationSpan(spans_inner, cite_target)
        return [*spans_before, citation, *spans_after]
    # Should never happen
    return parse_breaks(text)


-def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
+def parse_bold(
+    text: str,
+    cite: bool = True,
+    italic: bool = True,
+) -> Spans:
    bold_open = text.find("**")
    if bold_open > -1:
        bold_close = text.find("**", bold_open + 2)
        # Should be no formatting behind us
        spans_before = parse_breaks(text[:bold_open])
        # Freely parse formatting after us
-		spans_after = parse_paired_formatting(text[bold_close + 2:])
+        spans_after = parse_paired_formatting(text[bold_close + 2 :])
        # Parse inner text minus bold parsing
-		text_inner = text[bold_open + 2:bold_close]
-		spans_inner = parse_paired_formatting(text_inner,
-			cite=cite, bold=False, italic=italic)
+        text_inner = text[bold_open + 2 : bold_close]
+        spans_inner = parse_paired_formatting(
+            text_inner, cite=cite, bold=False, italic=italic
+        )
        bold = BoldSpan(spans_inner)
        return [*spans_before, bold, *spans_after]
    # Should never happen
    return parse_italic(text)


-def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
+def parse_italic(
+    text: str,
+    cite: bool = True,
+    bold: bool = True,
+) -> Spans:
    italic_open = text.find("//")
    if italic_open > -1:
        italic_close = text.find("//", italic_open + 2)
        # Should be no formatting behind us
        spans_before = parse_breaks(text[:italic_open])
        # Freely parse formatting after us
-		spans_after = parse_paired_formatting(text[italic_close + 2:])
+        spans_after = parse_paired_formatting(text[italic_close + 2 :])
        # Parse inner text minus italic parsing
-		text_inner = text[italic_open + 2:italic_close]
-		spans_inner = parse_paired_formatting(text_inner,
-			cite=cite, bold=bold, italic=False)
+        text_inner = text[italic_open + 2 : italic_close]
+        spans_inner = parse_paired_formatting(
+            text_inner, cite=cite, bold=bold, italic=False
+        )
        italic = ItalicSpan(spans_inner)
        return [*spans_before, italic, *spans_after]
    # Should never happen
--- a/mypy.ini
+++ b/mypy.ini
@ -1,4 +1,4 @@
 [mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 ; mypy stable doesn't support pyproject.toml yet
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,11 +17,11 @@ black = "^21.5b2"
 mypy = "^0.812"

 [tool.black]
-extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
+extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"

 [tool.mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"

 [tool.pytest.ini_options]
 addopts = "--show-capture=log"