Reorganize parser and style pass

2021-06-09 15:57:48 -07:00 · 2021-06-09 15:57:48 -07:00 · 1c55d866a8
commit 1c55d866a8
parent ffa27be86d
6 changed files with 280 additions and 234 deletions
--- a/amanuensis/parser/init.py
+++ b/amanuensis/parser/init.py
@ -2,13 +2,14 @@
 Module encapsulating all markdown parsing functionality.
 """
-from .core import normalize_title
+from .core import RenderableVisitor
-from .helpers import titlesort, filesafe_title
+from .helpers import normalize_title, filesafe_title, titlesort
 from .parsing import parse_raw_markdown
 __all__ = [
-	normalize_title.__name__,
+    "RenderableVisitor",
-	titlesort.__name__,
+    "normalize_title",
-	filesafe_title.__name__,
+    "filesafe_title",
-	parse_raw_markdown.__name__,
+    "titlesort",
    "parse_raw_markdown",
 ]
--- a/amanuensis/parser/core.py
+++ b/amanuensis/parser/core.py
@ -5,32 +5,26 @@ which can be operated on by a visitor defining functions that hook off
 of the different token types.
 """
 import re
 from typing import Callable, Any, Sequence
-RenderHook = Callable[['Renderable'], Any]
+from .helpers import normalize_title
 Spans = Sequence['Renderable']
-def normalize_title(title: str) -> str:
+RenderHook = Callable[["Renderable"], Any]
-	"""
+Spans = Sequence["Renderable"]
 	Normalizes strings as titles:
 	- Strips leading and trailing whitespace
 	- Merges internal whitespace into a single space
 	- Capitalizes the first word
 	"""
 	cleaned = re.sub(r'\s+', " ", title.strip())
 	return cleaned[:1].capitalize() + cleaned[1:]
-class Renderable():
+class Renderable:
    """
    Base class for parsed markdown. Provides the `render()` method for
    visiting the token tree.
    """
-	def render(self: 'Renderable', renderer: 'RenderableVisitor'):
+
    def render(self: "Renderable", renderer: "RenderableVisitor"):
        """
        Execute the apppropriate visitor method on this Renderable.
        Visitors implement hooks by declaring methods whose names are
        the name of a Renderable class.
        """
        hook: RenderHook = getattr(renderer, type(self).__name__, None)
        if hook:
@ -39,7 +33,8 @@ class Renderable():
 class TextSpan(Renderable):
-	"""An unstyled length of text."""
+    """A length of text."""
    def __init__(self, innertext: str):
        self.innertext = innertext
@ -49,20 +44,24 @@ class TextSpan(Renderable):
 class LineBreak(Renderable):
    """A line break within a paragraph."""
    def __str__(self):
        return "<break>"
 class SpanContainer(Renderable):
    """A formatting element that wraps some amount of text."""
    def __init__(self, spans: Spans):
        self.spans: Spans = spans
    def __str__(self):
-		return (f'[{type(self).__name__} '
+        return (
-			+ f'{" ".join([str(span) for span in self.spans])}]')
+            f"[{type(self).__name__} "
            + f'{" ".join([str(span) for span in self.spans])}]'
        )
-	def recurse(self, renderer: 'RenderableVisitor'):
+    def recurse(self, renderer: "RenderableVisitor"):
        return [child.render(renderer) for child in self.spans]
@ -88,22 +87,26 @@ class ItalicSpan(SpanContainer):
 class CitationSpan(SpanContainer):
    """A citation to another article."""
    def __init__(self, spans: Spans, cite_target: str):
        super().__init__(spans)
        # Normalize citation target on parse, since we don't want
        # abnormal title strings lying around causing trouble.
        self.cite_target: str = normalize_title(cite_target)
-	def __str__(self):
+    def __str__(self) -> str:
-		return (f'{{{" ".join([str(span) for span in self.spans])}'
+        return (
-			+ f':{self.cite_target}}}')
+            f'{{{" ".join([str(span) for span in self.spans])}'
            + f":{self.cite_target}}}"
        )
-class RenderableVisitor():
+class RenderableVisitor:
    """
    Default implementation of the visitor pattern. Executes once on
    each token in the tree and returns itself.
    """
    def TextSpan(self, span: TextSpan):
        return self
--- a/amanuensis/parser/helpers.py
+++ b/amanuensis/parser/helpers.py
@ -1,7 +1,22 @@
 """
 Helper functions for manipulating titles during parsing
 """
 import re
 import urllib.parse
 def normalize_title(title: str) -> str:
    """
    Normalizes strings as titles:
    - Strips leading and trailing whitespace
    - Merges internal whitespace into a single space
    - Capitalizes the first word
    """
    cleaned = re.sub(r"\s+", " ", title.strip())
    return cleaned[:1].capitalize() + cleaned[1:]
 def titlesort(title: str) -> str:
    """
    Strips articles off of titles for alphabetical sorting purposes
@ -20,9 +35,19 @@ def filesafe_title(title: str) -> str:
    """
    Makes an article title filename-safe.
    """
-	s = re.sub(r"\s+", '_', title)  # Replace whitespace with _
+    # Replace whitespace with _
-	s = re.sub(r"~", '-', s)        # parse.quote doesn't catch ~
+    s = re.sub(r"\s+", "_", title)
-	s = urllib.parse.quote(s)       # Encode all other characters
+
-	s = re.sub(r"%", "", s)         # Strip encoding %s
+    # parse.quote doesn't catch ~
-	s = s[:64]                  	# Limit to 64 characters
+    s = re.sub(r"~", "-", s)
    # Encode all other characters
    s = urllib.parse.quote(s)
    # Strip encoding %s
    s = re.sub(r"%", "", s)
    # Limit to 64 characters
    s = s[:64]
    return s
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@ -16,7 +16,7 @@ from .core import (
    ItalicSpan,
    CitationSpan,
    Renderable,
-	SpanContainer
+    SpanContainer,
 )
 Spans = Sequence[Renderable]
@ -28,7 +28,7 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
    """
    # Parse each paragraph individually, as no formatting applies
    # across paragraphs
-	paragraphs = re.split(r'\n\n+', text)
+    paragraphs = re.split(r"\n\n+", text)
    parse_results = list(map(parse_paragraph, paragraphs))
    return ParsedArticle(parse_results)
@ -36,7 +36,7 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
 def parse_paragraph(text: str) -> SpanContainer:
    # Parse the paragraph as a span of text
    text = text.strip()
-	if text and text[0] == '~':
+    if text and text[0] == "~":
        return SignatureParagraph(parse_paired_formatting(text[1:]))
    else:
        return BodyParagraph(parse_paired_formatting(text))
@ -46,7 +46,8 @@ def parse_paired_formatting(
    text: str,
    cite: bool = True,
    bold: bool = True,
-		italic: bool = True) -> Spans:
+    italic: bool = True,
 ) -> Spans:
    # Find positions of any paired formatting
    first_cite = find_pair(text, "[[", "]]", cite)
    first_bold = find_pair(text, "**", "**", bold)
@ -68,7 +69,8 @@ def find_pair(
    text: str,
    open_tag: str,
    close_tag: str,
-		valid: bool) -> int:
+    valid: bool,
 ) -> int:
    # If skipping, return -1
    if not valid:
        return -1
@ -84,7 +86,11 @@ def find_pair(
    return first
-def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
+def parse_citation(
    text: str,
    bold: bool = True,
    italic: bool = True,
 ) -> Spans:
    cite_open = text.find("[[")
    if cite_open > -1:
        cite_close = text.find("]]", cite_open + 2)
@ -101,15 +107,20 @@ def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
        # target from the alias text
        inner_split = text_inner.split("|", 1)
        text_inner_actual, cite_target = inner_split[0], inner_split[-1]
-		spans_inner = parse_paired_formatting(text_inner_actual,
+        spans_inner = parse_paired_formatting(
-			cite=False, bold=bold, italic=italic)
+            text_inner_actual, cite=False, bold=bold, italic=italic
        )
        citation = CitationSpan(spans_inner, cite_target)
        return [*spans_before, citation, *spans_after]
    # Should never happen
    return parse_breaks(text)
-def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
+def parse_bold(
    text: str,
    cite: bool = True,
    italic: bool = True,
 ) -> Spans:
    bold_open = text.find("**")
    if bold_open > -1:
        bold_close = text.find("**", bold_open + 2)
@ -119,15 +130,20 @@ def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
        spans_after = parse_paired_formatting(text[bold_close + 2 :])
        # Parse inner text minus bold parsing
        text_inner = text[bold_open + 2 : bold_close]
-		spans_inner = parse_paired_formatting(text_inner,
+        spans_inner = parse_paired_formatting(
-			cite=cite, bold=False, italic=italic)
+            text_inner, cite=cite, bold=False, italic=italic
        )
        bold = BoldSpan(spans_inner)
        return [*spans_before, bold, *spans_after]
    # Should never happen
    return parse_italic(text)
-def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
+def parse_italic(
    text: str,
    cite: bool = True,
    bold: bool = True,
 ) -> Spans:
    italic_open = text.find("//")
    if italic_open > -1:
        italic_close = text.find("//", italic_open + 2)
@ -137,8 +153,9 @@ def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
        spans_after = parse_paired_formatting(text[italic_close + 2 :])
        # Parse inner text minus italic parsing
        text_inner = text[italic_open + 2 : italic_close]
-		spans_inner = parse_paired_formatting(text_inner,
+        spans_inner = parse_paired_formatting(
-			cite=cite, bold=bold, italic=False)
+            text_inner, cite=cite, bold=bold, italic=False
        )
        italic = ItalicSpan(spans_inner)
        return [*spans_before, italic, *spans_after]
    # Should never happen
--- a/mypy.ini
+++ b/mypy.ini
@ -1,4 +1,4 @@
 [mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 ; mypy stable doesn't support pyproject.toml yet
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,11 +17,11 @@ black = "^21.5b2"
 mypy = "^0.812"
 [tool.black]
-extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
+extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
 [tool.mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 [tool.pytest.ini_options]
 addopts = "--show-capture=log"