Add renderable visitor unit test

Add paragraph and article parser tests
Add citation parsing unit tests
2021-06-11 23:55:52 -07:00 · 2021-06-11 23:55:52 -07:00 · 2021-06-11 23:55:52 -07:00 · 2021-06-11 23:55:52 -07:00 · 2021-06-11 23:55:52 -07:00 · 2021-06-11 23:55:52 -07:00
13 changed files with 920 additions and 405 deletions
--- a/amanuensis/lexicon/gameloop.py
+++ b/amanuensis/lexicon/gameloop.py
@ -9,11 +9,94 @@ from amanuensis.config import ReadOnlyOrderedDict
 from amanuensis.models import LexiconModel, UserModel
 from amanuensis.parser import (
 	parse_raw_markdown,
 	GetCitations,
 	HtmlRenderer,
 	titlesort,
-	filesafe_title,
+	filesafe_title)
-	ConstraintAnalysis)
+from amanuensis.parser.core import RenderableVisitor
 class GetCitations(RenderableVisitor):
 	def __init__(self):
 		self.citations = []
 	def ParsedArticle(self, span):
 		span.recurse(self)
 		return self.citations
 	def CitationSpan(self, span):
 		self.citations.append(span.cite_target)
 		return self
 class ConstraintAnalysis(RenderableVisitor):
 	def __init__(self, lexicon: LexiconModel):
 		self.info: List[str] = []
 		self.warning: List[str] = []
 		self.error: List[str] = []
 		self.word_count: int = 0
 		self.citations: list = []
 		self.signatures: int = 0
 	def TextSpan(self, span):
 		self.word_count += len(re.split(r'\s+', span.innertext.strip()))
 		return self
 	def SignatureParagraph(self, span):
 		self.signatures += 1
 		span.recurse(self)
 		return self
 	def CitationSpan(self, span):
 		self.citations.append(span.cite_target)
 		span.recurse(self)
 		return self
 class HtmlRenderer(RenderableVisitor):
 	"""
 	Renders an article token tree into published article HTML.
 	"""
 	def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
 		self.lexicon_name: str = lexicon_name
 		self.written_articles: Iterable[str] = written_articles
 	def TextSpan(self, span):
 		return span.innertext
 	def LineBreak(self, span):
 		return '<br>'
 	def ParsedArticle(self, span):
 		return '\n'.join(span.recurse(self))
 	def BodyParagraph(self, span):
 		return f'<p>{"".join(span.recurse(self))}</p>'
 	def SignatureParagraph(self, span):
 		return (
 			'<hr><span class="signature"><p>'
 			f'{"".join(span.recurse(self))}'
 			'</p></span>'
 		)
 	def BoldSpan(self, span):
 		return f'<b>{"".join(span.recurse(self))}</b>'
 	def ItalicSpan(self, span):
 		return f'<i>{"".join(span.recurse(self))}</i>'
 	def CitationSpan(self, span):
 		if span.cite_target in self.written_articles:
 			link_class = ''
 		else:
 			link_class = ' class="phantom"'
 		# link = url_for(
 		# 	'lexicon.article',
 		# 	name=self.lexicon_name,
 		# 	title=filesafe_title(span.cite_target))
 		link = (f'/lexicon/{self.lexicon_name}'
 			+ f'/article/{filesafe_title(span.cite_target)}')
 		return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
 def get_player_characters(
--- a/amanuensis/lexicon/manage.py
+++ b/amanuensis/lexicon/manage.py
@ -13,7 +13,7 @@
 # from amanuensis.config.loader import AttrOrderedDict
 # from amanuensis.errors import ArgumentError
 # from amanuensis.lexicon import LexiconModel
-# from amanuensis.parser import parse_raw_markdown, GetCitations, HtmlRenderer, filesafe_title, titlesort
+# from amanuensis.parser import parse_raw_markdown, filesafe_title, titlesort
 # from amanuensis.resources import get_stream
--- a/amanuensis/parser/init.py
+++ b/amanuensis/parser/init.py
@ -2,19 +2,14 @@
 Module encapsulating all markdown parsing functionality.
 """
-from .analyze import ConstraintAnalysis, GetCitations
+from .core import RenderableVisitor
-from .core import normalize_title
+from .helpers import normalize_title, filesafe_title, titlesort
 from .helpers import titlesort, filesafe_title
 from .parsing import parse_raw_markdown
 from .render import PreviewHtmlRenderer, HtmlRenderer
 __all__ = [
-	ConstraintAnalysis.__name__,
+    "RenderableVisitor",
-	GetCitations.__name__,
+    "normalize_title",
-	normalize_title.__name__,
+    "filesafe_title",
-	titlesort.__name__,
+    "titlesort",
-	filesafe_title.__name__,
+    "parse_raw_markdown",
 	parse_raw_markdown.__name__,
 	PreviewHtmlRenderer.__name__,
 	HtmlRenderer.__name__,
 ]
--- a/amanuensis/parser/analyze.py
+++ b/amanuensis/parser/analyze.py
@ -1,49 +0,0 @@
 """
 Internal module encapsulating visitors that compute metrics on articles
 for verification against constraints.
 """
 import re
 from typing import List
 from amanuensis.models import LexiconModel
 from .core import RenderableVisitor
 class GetCitations(RenderableVisitor):
 	def __init__(self):
 		self.citations = []
 	def ParsedArticle(self, span):
 		span.recurse(self)
 		return self.citations
 	def CitationSpan(self, span):
 		self.citations.append(span.cite_target)
 		return self
 class ConstraintAnalysis(RenderableVisitor):
 	def __init__(self, lexicon: LexiconModel):
 		self.info: List[str] = []
 		self.warning: List[str] = []
 		self.error: List[str] = []
 		self.word_count: int = 0
 		self.citations: list = []
 		self.signatures: int = 0
 	def TextSpan(self, span):
 		self.word_count += len(re.split(r'\s+', span.innertext.strip()))
 		return self
 	def SignatureParagraph(self, span):
 		self.signatures += 1
 		span.recurse(self)
 		return self
 	def CitationSpan(self, span):
 		self.citations.append(span.cite_target)
 		span.recurse(self)
 		return self
--- a/amanuensis/parser/core.py
+++ b/amanuensis/parser/core.py
@ -5,32 +5,26 @@ which can be operated on by a visitor defining functions that hook off
 of the different token types.
 """
 import re
 from typing import Callable, Any, Sequence
-RenderHook = Callable[['Renderable'], Any]
+from .helpers import normalize_title
 Spans = Sequence['Renderable']
-def normalize_title(title: str) -> str:
+RenderHook = Callable[["Renderable"], Any]
-	"""
+Spans = Sequence["Renderable"]
 	Normalizes strings as titles:
 	- Strips leading and trailing whitespace
 	- Merges internal whitespace into a single space
 	- Capitalizes the first word
 	"""
 	cleaned = re.sub(r'\s+', " ", title.strip())
 	return cleaned[:1].capitalize() + cleaned[1:]
-class Renderable():
+class Renderable:
    """
    Base class for parsed markdown. Provides the `render()` method for
    visiting the token tree.
    """
-	def render(self: 'Renderable', renderer: 'RenderableVisitor'):
+
    def render(self: "Renderable", renderer: "RenderableVisitor"):
        """
        Execute the apppropriate visitor method on this Renderable.
        Visitors implement hooks by declaring methods whose names are
        the name of a Renderable class.
        """
        hook: RenderHook = getattr(renderer, type(self).__name__, None)
        if hook:
@ -39,30 +33,35 @@ class Renderable():
 class TextSpan(Renderable):
-	"""An unstyled length of text."""
+    """A length of text."""
    def __init__(self, innertext: str):
        self.innertext = innertext
-	def __str__(self):
+    def __repr__(self):
-		return f"[{self.innertext}]"
+        return f"<{self.innertext}>"
 class LineBreak(Renderable):
    """A line break within a paragraph."""
-	def __str__(self):
+
    def __repr__(self):
        return "<break>"
 class SpanContainer(Renderable):
    """A formatting element that wraps some amount of text."""
    def __init__(self, spans: Spans):
        self.spans: Spans = spans
-	def __str__(self):
+    def __repr__(self):
-		return (f'[{type(self).__name__} '
+        return (
-			+ f'{" ".join([str(span) for span in self.spans])}]')
+            f"<{type(self).__name__} "
            + f'{" ".join([repr(span) for span in self.spans])}>'
        )
-	def recurse(self, renderer: 'RenderableVisitor'):
+    def recurse(self, renderer: "RenderableVisitor"):
        return [child.render(renderer) for child in self.spans]
@ -88,22 +87,26 @@ class ItalicSpan(SpanContainer):
 class CitationSpan(SpanContainer):
    """A citation to another article."""
    def __init__(self, spans: Spans, cite_target: str):
        super().__init__(spans)
        # Normalize citation target on parse, since we don't want
        # abnormal title strings lying around causing trouble.
        self.cite_target: str = normalize_title(cite_target)
-	def __str__(self):
+    def __repr__(self) -> str:
-		return (f'{{{" ".join([str(span) for span in self.spans])}'
+        return (
-			+ f':{self.cite_target}}}')
+            f'{{{" ".join([repr(span) for span in self.spans])}'
            + f":{self.cite_target}}}"
        )
-class RenderableVisitor():
+class RenderableVisitor:
    """
    Default implementation of the visitor pattern. Executes once on
    each token in the tree and returns itself.
    """
    def TextSpan(self, span: TextSpan):
        return self
--- a/amanuensis/parser/helpers.py
+++ b/amanuensis/parser/helpers.py
@ -1,7 +1,22 @@
 """
 Helper functions for manipulating titles during parsing
 """
 import re
 import urllib.parse
 def normalize_title(title: str) -> str:
    """
    Normalizes strings as titles:
    - Strips leading and trailing whitespace
    - Merges internal whitespace into a single space
    - Capitalizes the first word
    """
    cleaned = re.sub(r"\s+", " ", title.strip())
    return cleaned[:1].capitalize() + cleaned[1:]
 def titlesort(title: str) -> str:
    """
    Strips articles off of titles for alphabetical sorting purposes
@ -20,9 +35,19 @@ def filesafe_title(title: str) -> str:
    """
    Makes an article title filename-safe.
    """
-	s = re.sub(r"\s+", '_', title)  # Replace whitespace with _
+    # Replace whitespace with _
-	s = re.sub(r"~", '-', s)        # parse.quote doesn't catch ~
+    s = re.sub(r"\s+", "_", title)
-	s = urllib.parse.quote(s)       # Encode all other characters
+
-	s = re.sub(r"%", "", s)         # Strip encoding %s
+    # parse.quote doesn't catch ~
-	s = s[:64]                  	# Limit to 64 characters
+    s = re.sub(r"~", "-", s)
    # Encode all other characters
    s = urllib.parse.quote(s)
    # Strip encoding %s
    s = re.sub(r"%", "", s)
    # Limit to 64 characters
    s = s[:64]
    return s
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@ -16,7 +16,7 @@ from .core import (
    ItalicSpan,
    CitationSpan,
    Renderable,
-	SpanContainer
+    SpanContainer,
 )
 Spans = Sequence[Renderable]
@ -28,15 +28,18 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
    """
    # Parse each paragraph individually, as no formatting applies
    # across paragraphs
-	paragraphs = re.split(r'\n\n+', text)
+    paragraphs = re.split(r"\n\n+", text)
    parse_results = list(map(parse_paragraph, paragraphs))
    return ParsedArticle(parse_results)
 def parse_paragraph(text: str) -> SpanContainer:
    """
    Parses a block of text into a paragraph object.
    """
    # Parse the paragraph as a span of text
    text = text.strip()
-	if text and text[0] == '~':
+    if text and text[0] == "~":
        return SignatureParagraph(parse_paired_formatting(text[1:]))
    else:
        return BodyParagraph(parse_paired_formatting(text))
@ -44,34 +47,44 @@ def parse_paragraph(text: str) -> SpanContainer:
 def parse_paired_formatting(
    text: str,
-		cite: bool = True,
+    in_cite: bool = False,
-		bold: bool = True,
+    in_bold: bool = False,
-		italic: bool = True) -> Spans:
+    in_italic: bool = False,
 ) -> Spans:
    """
    Parses citations, bolds, and italics, which can be nested inside each other.
    A single type cannot nest inside itself, which is controlled by setting the
    flag parameters to False.
    """
    # Find positions of any paired formatting
-	first_cite = find_pair(text, "[[", "]]", cite)
+    next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
-	first_bold = find_pair(text, "**", "**", bold)
+    next_bold = find_pair(text, "**", "**") if not in_bold else -1
-	first_italic = find_pair(text, "//", "//", italic)
+    next_italic = find_pair(text, "//", "//") if not in_italic else -1
-	# Load the possible parse handlers into the map
+    # Create a map from a formatting mark's distance to its parse handler
    handlers = {}
-	handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
+    handlers[next_cite] = lambda: parse_citation(
-	handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
+        text, in_bold=in_bold, in_italic=in_italic
-	handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
+    )
-	# If nothing was found, move on to the next parsing step
+    handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
    handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
    # Map the next parsing step at -1. If we're currently inside a formatting
    # mark pair, skip parsing line breaks, which are not allowed inside paired
    # marks.
    if in_cite or in_bold or in_italic:
        handlers[-1] = lambda: parse_text(text)
    else:
        handlers[-1] = lambda: parse_breaks(text)
-	# Choose a handler based on the earliest found result
+    # Choose the handler for the earliest found pair, or the default handler
-	finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
+    # at -1 if nothing was found.
    finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
    first = min(finds) if finds else -1
    return handlers[first]()
-def find_pair(
+def find_pair(text: str, open_tag: str, close_tag: str) -> int:
-		text: str,
+    """
-		open_tag: str,
+    Finds the beginning of a pair of formatting marks.
-		close_tag: str,
+    """
 		valid: bool) -> int:
 	# If skipping, return -1
 	if not valid:
 		return -1
    # If the open tag wasn't found, return -1
    first = text.find(open_tag)
    if first < 0:
@ -84,7 +97,14 @@ def find_pair(
    return first
-def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
+def parse_citation(
    text: str,
    in_bold: bool = False,
    in_italic: bool = False,
 ) -> Spans:
    """
    Parses text into a citation span.
    """
    cite_open = text.find("[[")
    if cite_open > -1:
        cite_close = text.find("]]", cite_open + 2)
@ -94,51 +114,69 @@ def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
        spans_before = parse_breaks(text[:cite_open])
        # Continue parsing pair formatting after this one closes with all
        # three as valid choices
-		spans_after = parse_paired_formatting(text[cite_close + 2:])
+        spans_after = parse_paired_formatting(text[cite_close + 2 :])
        # Parse inner text and skip parsing for this format pair
-		text_inner = text[cite_open + 2:cite_close]
+        text_inner = text[cite_open + 2 : cite_close]
-		# For citations specifically, we may need to split off a citation
+        # For citations specifically, try to split off a citation target.
-		# target from the alias text
+        # If there's no citation target to split, use the same text as the
        # citation text and the target.
        inner_split = text_inner.split("|", 1)
        text_inner_actual, cite_target = inner_split[0], inner_split[-1]
-		spans_inner = parse_paired_formatting(text_inner_actual,
+        spans_inner = parse_paired_formatting(
-			cite=False, bold=bold, italic=italic)
+            text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
        )
        citation = CitationSpan(spans_inner, cite_target)
        return [*spans_before, citation, *spans_after]
    # Should never happen
    return parse_breaks(text)
-def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
+def parse_bold(
    text: str,
    in_cite: bool = False,
    in_italic: bool = False,
 ) -> Spans:
    """
    Parses text into a bold span.
    """
    bold_open = text.find("**")
    if bold_open > -1:
        bold_close = text.find("**", bold_open + 2)
        # Should be no formatting behind us
        spans_before = parse_breaks(text[:bold_open])
        # Freely parse formatting after us
-		spans_after = parse_paired_formatting(text[bold_close + 2:])
+        spans_after = parse_paired_formatting(text[bold_close + 2 :])
        # Parse inner text minus bold parsing
-		text_inner = text[bold_open + 2:bold_close]
+        text_inner = text[bold_open + 2 : bold_close]
-		spans_inner = parse_paired_formatting(text_inner,
+        spans_inner = parse_paired_formatting(
-			cite=cite, bold=False, italic=italic)
+            text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
        )
        bold = BoldSpan(spans_inner)
        return [*spans_before, bold, *spans_after]
    # Should never happen
    return parse_italic(text)
-def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
+def parse_italic(
    text: str,
    in_cite: bool = False,
    in_bold: bool = False,
 ) -> Spans:
    """
    Parses text into an italic span.
    """
    italic_open = text.find("//")
    if italic_open > -1:
        italic_close = text.find("//", italic_open + 2)
        # Should be no formatting behind us
        spans_before = parse_breaks(text[:italic_open])
        # Freely parse formatting after us
-		spans_after = parse_paired_formatting(text[italic_close + 2:])
+        spans_after = parse_paired_formatting(text[italic_close + 2 :])
        # Parse inner text minus italic parsing
-		text_inner = text[italic_open + 2:italic_close]
+        text_inner = text[italic_open + 2 : italic_close]
-		spans_inner = parse_paired_formatting(text_inner,
+        spans_inner = parse_paired_formatting(
-			cite=cite, bold=bold, italic=False)
+            text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
        )
        italic = ItalicSpan(spans_inner)
        return [*spans_before, italic, *spans_after]
    # Should never happen
@ -146,11 +184,26 @@ def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
 def parse_breaks(text: str) -> Spans:
    """
    Parses intra-paragraph line breaks.
    """
    # Parse empty text into nothing
    if not text:
        return []
    # Split on the line break mark appearing at the end of the line
    splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
    # Put a LineBreak between each TextSpan
    spans: Spans = [
        splits[i // 2] if i % 2 == 0 else LineBreak()
        for i in range(0, 2 * len(splits) - 1)
    ]
    return spans
 def parse_text(text: str) -> Spans:
    """
    Parses text with no remaining parseable marks.
    """
    if not text:
        return []
    return [TextSpan(text)]
--- a/amanuensis/parser/render.py
+++ b/amanuensis/parser/render.py
@ -1,104 +0,0 @@
 """
 Internal module encapsulating visitors that render articles into
 readable formats.
 """
 from typing import Iterable
 from .core import RenderableVisitor
 from .helpers import filesafe_title
 class HtmlRenderer(RenderableVisitor):
 	"""
 	Renders an article token tree into published article HTML.
 	"""
 	def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
 		self.lexicon_name: str = lexicon_name
 		self.written_articles: Iterable[str] = written_articles
 	def TextSpan(self, span):
 		return span.innertext
 	def LineBreak(self, span):
 		return '<br>'
 	def ParsedArticle(self, span):
 		return '\n'.join(span.recurse(self))
 	def BodyParagraph(self, span):
 		return f'<p>{"".join(span.recurse(self))}</p>'
 	def SignatureParagraph(self, span):
 		return (
 			'<hr><span class="signature"><p>'
 			f'{"".join(span.recurse(self))}'
 			'</p></span>'
 		)
 	def BoldSpan(self, span):
 		return f'<b>{"".join(span.recurse(self))}</b>'
 	def ItalicSpan(self, span):
 		return f'<i>{"".join(span.recurse(self))}</i>'
 	def CitationSpan(self, span):
 		if span.cite_target in self.written_articles:
 			link_class = ''
 		else:
 			link_class = ' class="phantom"'
 		# link = url_for(
 		# 	'lexicon.article',
 		# 	name=self.lexicon_name,
 		# 	title=filesafe_title(span.cite_target))
 		link = (f'/lexicon/{self.lexicon_name}'
 			+ f'/article/{filesafe_title(span.cite_target)}')
 		return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
 class PreviewHtmlRenderer(RenderableVisitor):
 	def __init__(self, lexicon):
 		with lexicon.ctx.read('info') as info:
 			self.article_map = {
 				title: article.character
 				for title, article in info.items()
 			}
 		self.citations = []
 		self.contents = ""
 	def TextSpan(self, span):
 		return span.innertext
 	def LineBreak(self, span):
 		return '<br>'
 	def ParsedArticle(self, span):
 		self.contents = '\n'.join(span.recurse(self))
 		return self
 	def BodyParagraph(self, span):
 		return f'<p>{"".join(span.recurse(self))}</p>'
 	def SignatureParagraph(self, span):
 		return (
 			'<hr><span class="signature"><p>'
 			f'{"".join(span.recurse(self))}'
 			'</p></span>'
 		)
 	def BoldSpan(self, span):
 		return f'<b>{"".join(span.recurse(self))}</b>'
 	def ItalicSpan(self, span):
 		return f'<i>{"".join(span.recurse(self))}</i>'
 	def CitationSpan(self, span):
 		if span.cite_target in self.article_map:
 			if self.article_map.get(span.cite_target):
 				link_class = '[extant]'
 			else:
 				link_class = '[phantom]'
 		else:
 			link_class = '[new]'
 		self.citations.append(f'{span.cite_target} {link_class}')
 		return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
--- a/amanuensis/server/session/init.py
+++ b/amanuensis/server/session/init.py
@ -15,9 +15,7 @@ from amanuensis.lexicon import (
 	create_character_in_lexicon,
 	get_draft)
 from amanuensis.models import LexiconModel
-from amanuensis.parser import (
+from amanuensis.parser import parse_raw_markdown
 	parse_raw_markdown,
 	PreviewHtmlRenderer)
 from amanuensis.server.helpers import (
 	lexicon_param,
 	player_required,
@ -29,7 +27,7 @@ from .forms import (
 	LexiconPublishTurnForm,
 	LexiconConfigForm)
-from .editor import load_editor, new_draft, update_draft
+from .editor import load_editor, new_draft, update_draft, PreviewHtmlRenderer
 bp_session = Blueprint('session', __name__,
--- a/amanuensis/server/session/editor.py
+++ b/amanuensis/server/session/editor.py
@ -17,8 +17,56 @@ from amanuensis.lexicon import (
 from amanuensis.models import LexiconModel
 from amanuensis.parser import (
 	normalize_title,
-	parse_raw_markdown,
+	parse_raw_markdown)
-	PreviewHtmlRenderer)
+from amanuensis.parser.core import RenderableVisitor
 class PreviewHtmlRenderer(RenderableVisitor):
 	def __init__(self, lexicon):
 		with lexicon.ctx.read('info') as info:
 			self.article_map = {
 				title: article.character
 				for title, article in info.items()
 			}
 		self.citations = []
 		self.contents = ""
 	def TextSpan(self, span):
 		return span.innertext
 	def LineBreak(self, span):
 		return '<br>'
 	def ParsedArticle(self, span):
 		self.contents = '\n'.join(span.recurse(self))
 		return self
 	def BodyParagraph(self, span):
 		return f'<p>{"".join(span.recurse(self))}</p>'
 	def SignatureParagraph(self, span):
 		return (
 			'<hr><span class="signature"><p>'
 			f'{"".join(span.recurse(self))}'
 			'</p></span>'
 		)
 	def BoldSpan(self, span):
 		return f'<b>{"".join(span.recurse(self))}</b>'
 	def ItalicSpan(self, span):
 		return f'<i>{"".join(span.recurse(self))}</i>'
 	def CitationSpan(self, span):
 		if span.cite_target in self.article_map:
 			if self.article_map.get(span.cite_target):
 				link_class = '[extant]'
 			else:
 				link_class = '[phantom]'
 		else:
 			link_class = '[new]'
 		self.citations.append(f'{span.cite_target} {link_class}')
 		return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
 def load_editor(lexicon: LexiconModel, aid: str):
--- a/mypy.ini
+++ b/mypy.ini
@ -1,4 +1,4 @@
 [mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 ; mypy stable doesn't support pyproject.toml yet
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,11 +17,11 @@ black = "^21.5b2"
 mypy = "^0.812"
 [tool.black]
-extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
+extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
 [tool.mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 [tool.pytest.ini_options]
 addopts = "--show-capture=log"
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@ -0,0 +1,463 @@
 from typing import Sequence
 from amanuensis.parser.core import (
    TextSpan,
    LineBreak,
    ParsedArticle,
    BodyParagraph,
    SignatureParagraph,
    BoldSpan,
    ItalicSpan,
    CitationSpan,
    Renderable,
    SpanContainer,
    RenderableVisitor,
    Spans,
 )
 from amanuensis.parser.helpers import normalize_title, filesafe_title, titlesort
 from amanuensis.parser.parsing import (
    parse_breaks,
    parse_paired_formatting,
    parse_paragraph,
    parse_raw_markdown,
 )
 def assert_types(spans: Spans, types: Sequence, loc=None):
    """
    Asserts that a span list has the types specified.
    Each element in `types` should be either a span type or a list. The first
    element of the list is the container type and the remaining elements are the
    content types.
    """
    for i in range(max(len(spans), len(types))):
        i_loc = f"{loc}.{i}" if loc else f"{i}"
        # Check lengths are equal
        assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
        assert i < len(types), f"Type list unexpectedly short at {i_loc}"
        # Check types are equal
        span, span_type = spans[i], types[i]
        if isinstance(span_type, list):
            assert isinstance(
                span, SpanContainer
            ), f"Expected a span container at loc {i_loc}"
            assert (
                len(span.spans) == len(span_type) - 1
            ), f"Unexpected container size at loc {i_loc}"
            assert isinstance(
                span, span_type[0]
            ), f"Unexpected container type at loc {i_loc}"
            assert_types(span.spans, span_type[1:], loc=i_loc)
        else:
            assert isinstance(span, Renderable), f"Expected a span at loc {i_loc}"
            assert isinstance(span, span_type), f"Unexpected span type at loc {i_loc}"
 def assert_text(spans: Spans, texts: Sequence, loc=None):
    """
    Asserts that a span list has the inner text structure specified.
    Each element in `texts` should be either a string or a list of the same.
    """
    assert len(spans) == len(
        texts
    ), f"Unexpected text sequence length at loc {loc if loc else 'root'}"
    i = -1
    for span, text in zip(spans, texts):
        i += 1
        i_loc = f"{loc}.{i}" if loc else f"{i}"
        if isinstance(text, str):
            assert isinstance(span, TextSpan), f"Expected a text span at loc {i_loc}"
            assert span.innertext == text, f"Unexpected text at loc {i_loc}"
        elif isinstance(text, list):
            assert isinstance(
                span, SpanContainer
            ), f"Expected a span container at loc {i_loc}"
            assert_text(span.spans, text, loc=i_loc)
        else:
            assert isinstance(span, LineBreak), f"Expected a line break at loc {i_loc}"
 def test_parse_breaks():
    """Test parsing for intra-pragraph line break"""
    text: str
    spans: Spans
    # Only having a line break does nothing
    text = "One\nTwo"
    spans: Spans = parse_breaks(text)
    assert_types(spans, [TextSpan])
    assert_text(spans, [text])
    # Having the mark causes the text to be split across it
    text = r"One\\" + "\nTwo"
    spans: Spans = parse_breaks(text)
    assert_types(spans, [TextSpan, LineBreak, TextSpan])
    assert_text(spans, ["One", None, "Two"])
    # Multiple lines can be broken
    text = r"One\\" + "\n" + r"Two\\" + "\nThree"
    spans: Spans = parse_breaks(text)
    assert_types(spans, [TextSpan, LineBreak, TextSpan, LineBreak, TextSpan])
    assert_text(spans, ["One", None, "Two", None, "Three"])
    # The mark must be at the end of the line
    text = r"One\\ " + "\nTwo"
    spans: Spans = parse_breaks(text)
    assert_types(spans, (TextSpan,))
    assert_text(spans, [text])
 def test_parse_pairs_single():
    """Test parsing for bold and italic marks"""
    text: str
    spans: Spans
    # Empty pair marks should parse
    text = "****"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan]])
    text = "////"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[ItalicSpan]])
    # Pair marks with text inside should parse
    text = "**hello**"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, TextSpan]])
    assert_text(spans, [["hello"]])
    text = "//hello//"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[ItalicSpan, TextSpan]])
    assert_text(spans, [["hello"]])
    # Text outside of pair marks should parse on the same level
    text = "**hello** world"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
    assert_text(spans, [["hello"], " world"])
    text = "//hello// world"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
    assert_text(spans, [["hello"], " world"])
    # Text before, between, and after pair marks should parse
    text = "In the **beginning** was //the// Word"
    spans = parse_paired_formatting(text)
    assert_types(
        spans,
        [TextSpan, [BoldSpan, TextSpan], TextSpan, [ItalicSpan, TextSpan], TextSpan],
    )
    assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
 def test_parse_pairs_break():
    """Test pair marks with breaks"""
    text: str
    spans: Spans
    text = r"**glory\\" + "\nhammer**"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, TextSpan]])
    assert_text(spans, [["glory\\\\\nhammer"]])
    text = r"//glory\\" + "\nhammer//"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[ItalicSpan, TextSpan]])
    assert_text(spans, [["glory\\\\\nhammer"]])
    text = r"**glory\\" + "\n**hammer**"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
    assert_text(spans, [["glory\\\\\n"], "hammer**"])
    text = r"//glory\\" + "\n//hammer//"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
    assert_text(spans, [["glory\\\\\n"], "hammer//"])
 def test_parse_pairs_nested():
    """Test parsing for nesting bold and italic"""
    text: str
    spans: Spans
    # Simple nested test cases
    text = "**//hello//**"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, [ItalicSpan, TextSpan]]])
    assert_text(spans, [[["hello"]]])
    text = "//**world**//"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[ItalicSpan, [BoldSpan, TextSpan]]])
    assert_text(spans, [[["world"]]])
    # Overlap should only parse the first
    text = "**Hello//world**//"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
    assert_text(spans, [["Hello//world"], "//"])
 def test_normalize_title():
    """Test the title normalization used by the citation parser"""
    nt = normalize_title
    assert nt("hello") == "Hello"
    assert nt("  world  ") == "World"
    assert nt("Waiting for           Godot") == "Waiting for Godot"
    assert nt("lowercase letters") == "Lowercase letters"
 def test_parse_citation_single():
    """Test parsing citations, which have internal formatting"""
    text: str
    spans: Spans
    # Simple test cases
    text = "[[hello]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["hello"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "Hello"
    text = "[[hello|world]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["hello"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "World"
    text = "[[hello||world]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["hello"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "|world"
    text = "[[  hello  |  world  ]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["  hello  "]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "World"
    text = "[[faith|hope|love]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["faith"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "Hope|love"
    text = "[[ [[|]] ]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
    assert_text(spans, [[" [["], " ]]"])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == ""
 def test_parse_citation_break():
    """Test citations with breaks"""
    text: str
    spans: Spans
    text = "[[hello\\\\\nworld]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["hello\\\\\nworld"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "Hello\\\\ world"
    text = "[[one|two\\\\\nthree]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["one"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "Two\\\\ three"
 def test_parse_citation_nested():
    """Test nesting with citations"""
    text: str
    spans: Spans
    text = "[[**hello world**]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, [BoldSpan, TextSpan]]])
    assert_text(spans, [[["hello world"]]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "**hello world**"
    text = "[[**hello|world**]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan]])
    assert_text(spans, [["**hello"]])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "World**"
    text = "**[[hello world]]**"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, [CitationSpan, TextSpan]]])
    assert_text(spans, [[["hello world"]]])
    citation: CitationSpan = spans[0].spans[0]
    assert citation.cite_target == "Hello world"
    text = "**[[hello world**]]"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
    assert_text(spans, [["[[hello world"], "]]"])
    text = "[[**hello world]]**"
    spans = parse_paired_formatting(text)
    assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
    assert_text(spans, [["**hello world"], "**"])
    citation: CitationSpan = spans[0]
    assert citation.cite_target == "**hello world"
 def test_parse_paragraphs():
    """Test parsing paragraphs"""
    para: str
    span: SpanContainer
    # Body paragraph
    para = "\tIn the beginning was the Word."
    span = parse_paragraph(para)
    assert_types([span], [[BodyParagraph, TextSpan]])
    assert_text([span], [["In the beginning was the Word."]])
    # Signature paragraph
    para = "~Ersatz Scrivener, scholar extraordinaire"
    span = parse_paragraph(para)
    assert_types([span], [[SignatureParagraph, TextSpan]])
    assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
 def test_parse_article():
    """Test the full article parser"""
    article: str = (
        "Writing a **unit test** requires having test //content//.\n\n"
        "This content, of course, must be [[created|Writing test collateral]].\n\n"
        "~Bucky\\\\\nUnit test writer"
    )
    parsed: ParsedArticle = parse_raw_markdown(article)
    assert_types(
        [parsed],
        [
            [
                ParsedArticle,
                [
                    BodyParagraph,
                    TextSpan,
                    [BoldSpan, TextSpan],
                    TextSpan,
                    [ItalicSpan, TextSpan],
                    TextSpan,
                ],
                [BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
                [SignatureParagraph, TextSpan, LineBreak, TextSpan],
            ]
        ],
    )
    assert_text(
        [parsed],
        [
            [
                [
                    "Writing a ",
                    ["unit test"],
                    " requires having test ",
                    ["content"],
                    ".",
                ],
                ["This content, of course, must be ", ["created"], "."],
                ["Bucky", None, "Unit test writer"],
            ]
        ],
    )
 def test_visitor():
    """Test that a visitor dispatches to hooks correctly"""
    class TestVisitor(RenderableVisitor):
        def __init__(self):
            self.visited = []
        def TextSpan(self, span: TextSpan):
            assert isinstance(span, TextSpan)
            self.visited.append(span)
        def LineBreak(self, span: LineBreak):
            assert isinstance(span, LineBreak)
            self.visited.append(span)
        def ParsedArticle(self, span: ParsedArticle):
            assert isinstance(span, ParsedArticle)
            self.visited.append(span)
            span.recurse(self)
        def BodyParagraph(self, span: BodyParagraph):
            assert isinstance(span, BodyParagraph)
            self.visited.append(span)
            span.recurse(self)
        def SignatureParagraph(self, span: SignatureParagraph):
            assert isinstance(span, SignatureParagraph)
            self.visited.append(span)
            span.recurse(self)
        def BoldSpan(self, span: BoldSpan):
            assert isinstance(span, BoldSpan)
            self.visited.append(span)
            span.recurse(self)
        def ItalicSpan(self, span: ItalicSpan):
            assert isinstance(span, ItalicSpan)
            self.visited.append(span)
            span.recurse(self)
        def CitationSpan(self, span: CitationSpan):
            assert isinstance(span, CitationSpan)
            self.visited.append(span)
            span.recurse(self)
    article: str = (
        "Writing a **unit test** requires having test //content//.\n\n"
        "This content, of course, must be [[created|Writing test collateral]].\n\n"
        "~Bucky\\\\\nUnit test writer"
    )
    parsed: ParsedArticle = parse_raw_markdown(article)
    visitor = TestVisitor()
    # All the typecheck asserts pass
    parsed.render(visitor)
    # The test article should parse into these spans and visit in this (arbitrary) order
    type_order = [
        ParsedArticle,
        BodyParagraph,
        TextSpan,
        BoldSpan,
        TextSpan,
        TextSpan,
        ItalicSpan,
        TextSpan,
        TextSpan,
        BodyParagraph,
        TextSpan,
        CitationSpan,
        TextSpan,
        TextSpan,
        SignatureParagraph,
        TextSpan,
        LineBreak,
        TextSpan,
    ]
    assert len(visitor.visited) == len(type_order)
    for span, type in zip(visitor.visited, type_order):
        assert isinstance(span, type)
Author	SHA1	Message	Date
Tim Van Baak	00738b5a45	Add renderable visitor unit test	2021-06-11 23:55:52 -07:00
Tim Van Baak	fde12ac818	Add paragraph and article parser tests	2021-06-11 23:55:52 -07:00
Tim Van Baak	2c294f7f12	Add citation parsing unit tests	2021-06-11 23:55:52 -07:00
Tim Van Baak	6f380bd495	Fix parsing pair marks with line breaks inside	2021-06-11 23:55:52 -07:00
Tim Van Baak	7a847e96d3	Add unit tests for line breaks and simple pairs	2021-06-11 23:55:52 -07:00
Tim Van Baak	1c55d866a8	Reorganize parser and style pass	2021-06-11 23:55:52 -07:00
Tim Van Baak	ffa27be86d	Move render visitors out of parser module	2021-06-11 23:55:52 -07:00
Tim Van Baak	7eadaa0db4	Move analysis visitors out of parser module	2021-06-11 23:55:52 -07:00