From 7eadaa0db47641783bb917781ae7738f0414c738 Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Wed, 9 Jun 2021 15:39:14 -0700
Subject: [PATCH 1/8] Move analysis visitors out of parser module

---
 amanuensis/lexicon/gameloop.py | 43 ++++++++++++++++++++++++++---
 amanuensis/parser/__init__.py  |  3 ---
 amanuensis/parser/analyze.py   | 49 ----------------------------------
 3 files changed, 40 insertions(+), 55 deletions(-)
 delete mode 100644 amanuensis/parser/analyze.py

diff --git a/amanuensis/lexicon/gameloop.py b/amanuensis/lexicon/gameloop.py
index 1ce2072..3e51150 100644
--- a/amanuensis/lexicon/gameloop.py
+++ b/amanuensis/lexicon/gameloop.py
@@ -9,11 +9,48 @@ from amanuensis.config import ReadOnlyOrderedDict
 from amanuensis.models import LexiconModel, UserModel
 from amanuensis.parser import (
 	parse_raw_markdown,
-	GetCitations,
 	HtmlRenderer,
 	titlesort,
-	filesafe_title,
-	ConstraintAnalysis)
+	filesafe_title)
+from amanuensis.parser.core import RenderableVisitor
+
+
+class GetCitations(RenderableVisitor):
+	def __init__(self):
+		self.citations = []
+
+	def ParsedArticle(self, span):
+		span.recurse(self)
+		return self.citations
+
+	def CitationSpan(self, span):
+		self.citations.append(span.cite_target)
+		return self
+
+
+class ConstraintAnalysis(RenderableVisitor):
+	def __init__(self, lexicon: LexiconModel):
+		self.info: List[str] = []
+		self.warning: List[str] = []
+		self.error: List[str] = []
+
+		self.word_count: int = 0
+		self.citations: list = []
+		self.signatures: int = 0
+
+	def TextSpan(self, span):
+		self.word_count += len(re.split(r'\s+', span.innertext.strip()))
+		return self
+
+	def SignatureParagraph(self, span):
+		self.signatures += 1
+		span.recurse(self)
+		return self
+
+	def CitationSpan(self, span):
+		self.citations.append(span.cite_target)
+		span.recurse(self)
+		return self
 
 
 def get_player_characters(
diff --git a/amanuensis/parser/__init__.py b/amanuensis/parser/__init__.py
index 1de2c5d..5ef2072 100644
--- a/amanuensis/parser/__init__.py
+++ b/amanuensis/parser/__init__.py
@@ -2,15 +2,12 @@
 Module encapsulating all markdown parsing functionality.
 """
 
-from .analyze import ConstraintAnalysis, GetCitations
 from .core import normalize_title
 from .helpers import titlesort, filesafe_title
 from .parsing import parse_raw_markdown
 from .render import PreviewHtmlRenderer, HtmlRenderer
 
 __all__ = [
-	ConstraintAnalysis.__name__,
-	GetCitations.__name__,
 	normalize_title.__name__,
 	titlesort.__name__,
 	filesafe_title.__name__,
diff --git a/amanuensis/parser/analyze.py b/amanuensis/parser/analyze.py
deleted file mode 100644
index bf52354..0000000
--- a/amanuensis/parser/analyze.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Internal module encapsulating visitors that compute metrics on articles
-for verification against constraints.
-"""
-
-import re
-from typing import List
-
-from amanuensis.models import LexiconModel
-
-from .core import RenderableVisitor
-
-
-class GetCitations(RenderableVisitor):
-	def __init__(self):
-		self.citations = []
-
-	def ParsedArticle(self, span):
-		span.recurse(self)
-		return self.citations
-
-	def CitationSpan(self, span):
-		self.citations.append(span.cite_target)
-		return self
-
-
-class ConstraintAnalysis(RenderableVisitor):
-	def __init__(self, lexicon: LexiconModel):
-		self.info: List[str] = []
-		self.warning: List[str] = []
-		self.error: List[str] = []
-
-		self.word_count: int = 0
-		self.citations: list = []
-		self.signatures: int = 0
-
-	def TextSpan(self, span):
-		self.word_count += len(re.split(r'\s+', span.innertext.strip()))
-		return self
-
-	def SignatureParagraph(self, span):
-		self.signatures += 1
-		span.recurse(self)
-		return self
-
-	def CitationSpan(self, span):
-		self.citations.append(span.cite_target)
-		span.recurse(self)
-		return self
-- 
2.44.1


From ffa27be86dcb7392f055daf05d9889d492a4585d Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Wed, 9 Jun 2021 15:47:54 -0700
Subject: [PATCH 2/8] Move render visitors out of parser module

---
 amanuensis/lexicon/gameloop.py        |  48 +++++++++++-
 amanuensis/lexicon/manage.py          |   2 +-
 amanuensis/parser/__init__.py         |   3 -
 amanuensis/parser/render.py           | 104 --------------------------
 amanuensis/server/session/__init__.py |   6 +-
 amanuensis/server/session/editor.py   |  52 ++++++++++++-
 6 files changed, 100 insertions(+), 115 deletions(-)
 delete mode 100644 amanuensis/parser/render.py

diff --git a/amanuensis/lexicon/gameloop.py b/amanuensis/lexicon/gameloop.py
index 3e51150..d606d57 100644
--- a/amanuensis/lexicon/gameloop.py
+++ b/amanuensis/lexicon/gameloop.py
@@ -9,7 +9,6 @@ from amanuensis.config import ReadOnlyOrderedDict
 from amanuensis.models import LexiconModel, UserModel
 from amanuensis.parser import (
 	parse_raw_markdown,
-	HtmlRenderer,
 	titlesort,
 	filesafe_title)
 from amanuensis.parser.core import RenderableVisitor
@@ -53,6 +52,53 @@ class ConstraintAnalysis(RenderableVisitor):
 		return self
 
 
+class HtmlRenderer(RenderableVisitor):
+	"""
+	Renders an article token tree into published article HTML.
+	"""
+	def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
+		self.lexicon_name: str = lexicon_name
+		self.written_articles: Iterable[str] = written_articles
+
+	def TextSpan(self, span):
+		return span.innertext
+
+	def LineBreak(self, span):
+		return '<br>'
+
+	def ParsedArticle(self, span):
+		return '\n'.join(span.recurse(self))
+
+	def BodyParagraph(self, span):
+		return f'<p>{"".join(span.recurse(self))}</p>'
+
+	def SignatureParagraph(self, span):
+		return (
+			'<hr><span class="signature"><p>'
+			f'{"".join(span.recurse(self))}'
+			'</p></span>'
+		)
+
+	def BoldSpan(self, span):
+		return f'<b>{"".join(span.recurse(self))}</b>'
+
+	def ItalicSpan(self, span):
+		return f'<i>{"".join(span.recurse(self))}</i>'
+
+	def CitationSpan(self, span):
+		if span.cite_target in self.written_articles:
+			link_class = ''
+		else:
+			link_class = ' class="phantom"'
+		# link = url_for(
+		# 	'lexicon.article',
+		# 	name=self.lexicon_name,
+		# 	title=filesafe_title(span.cite_target))
+		link = (f'/lexicon/{self.lexicon_name}'
+			+ f'/article/{filesafe_title(span.cite_target)}')
+		return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
+
+
 def get_player_characters(
 	lexicon: LexiconModel,
 	uid: str) -> Iterable[ReadOnlyOrderedDict]:
diff --git a/amanuensis/lexicon/manage.py b/amanuensis/lexicon/manage.py
index bdfbeb0..eb7844b 100644
--- a/amanuensis/lexicon/manage.py
+++ b/amanuensis/lexicon/manage.py
@@ -13,7 +13,7 @@
 # from amanuensis.config.loader import AttrOrderedDict
 # from amanuensis.errors import ArgumentError
 # from amanuensis.lexicon import LexiconModel
-# from amanuensis.parser import parse_raw_markdown, GetCitations, HtmlRenderer, filesafe_title, titlesort
+# from amanuensis.parser import parse_raw_markdown, filesafe_title, titlesort
 # from amanuensis.resources import get_stream
 
 
diff --git a/amanuensis/parser/__init__.py b/amanuensis/parser/__init__.py
index 5ef2072..aff1bd4 100644
--- a/amanuensis/parser/__init__.py
+++ b/amanuensis/parser/__init__.py
@@ -5,13 +5,10 @@ Module encapsulating all markdown parsing functionality.
 from .core import normalize_title
 from .helpers import titlesort, filesafe_title
 from .parsing import parse_raw_markdown
-from .render import PreviewHtmlRenderer, HtmlRenderer
 
 __all__ = [
 	normalize_title.__name__,
 	titlesort.__name__,
 	filesafe_title.__name__,
 	parse_raw_markdown.__name__,
-	PreviewHtmlRenderer.__name__,
-	HtmlRenderer.__name__,
 ]
diff --git a/amanuensis/parser/render.py b/amanuensis/parser/render.py
deleted file mode 100644
index 9313c07..0000000
--- a/amanuensis/parser/render.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
-Internal module encapsulating visitors that render articles into
-readable formats.
-"""
-
-from typing import Iterable
-
-from .core import RenderableVisitor
-from .helpers import filesafe_title
-
-
-class HtmlRenderer(RenderableVisitor):
-	"""
-	Renders an article token tree into published article HTML.
-	"""
-	def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
-		self.lexicon_name: str = lexicon_name
-		self.written_articles: Iterable[str] = written_articles
-
-	def TextSpan(self, span):
-		return span.innertext
-
-	def LineBreak(self, span):
-		return '<br>'
-
-	def ParsedArticle(self, span):
-		return '\n'.join(span.recurse(self))
-
-	def BodyParagraph(self, span):
-		return f'<p>{"".join(span.recurse(self))}</p>'
-
-	def SignatureParagraph(self, span):
-		return (
-			'<hr><span class="signature"><p>'
-			f'{"".join(span.recurse(self))}'
-			'</p></span>'
-		)
-
-	def BoldSpan(self, span):
-		return f'<b>{"".join(span.recurse(self))}</b>'
-
-	def ItalicSpan(self, span):
-		return f'<i>{"".join(span.recurse(self))}</i>'
-
-	def CitationSpan(self, span):
-		if span.cite_target in self.written_articles:
-			link_class = ''
-		else:
-			link_class = ' class="phantom"'
-		# link = url_for(
-		# 	'lexicon.article',
-		# 	name=self.lexicon_name,
-		# 	title=filesafe_title(span.cite_target))
-		link = (f'/lexicon/{self.lexicon_name}'
-			+ f'/article/{filesafe_title(span.cite_target)}')
-		return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
-
-
-class PreviewHtmlRenderer(RenderableVisitor):
-	def __init__(self, lexicon):
-		with lexicon.ctx.read('info') as info:
-			self.article_map = {
-				title: article.character
-				for title, article in info.items()
-			}
-		self.citations = []
-		self.contents = ""
-
-	def TextSpan(self, span):
-		return span.innertext
-
-	def LineBreak(self, span):
-		return '<br>'
-
-	def ParsedArticle(self, span):
-		self.contents = '\n'.join(span.recurse(self))
-		return self
-
-	def BodyParagraph(self, span):
-		return f'<p>{"".join(span.recurse(self))}</p>'
-
-	def SignatureParagraph(self, span):
-		return (
-			'<hr><span class="signature"><p>'
-			f'{"".join(span.recurse(self))}'
-			'</p></span>'
-		)
-
-	def BoldSpan(self, span):
-		return f'<b>{"".join(span.recurse(self))}</b>'
-
-	def ItalicSpan(self, span):
-		return f'<i>{"".join(span.recurse(self))}</i>'
-
-	def CitationSpan(self, span):
-		if span.cite_target in self.article_map:
-			if self.article_map.get(span.cite_target):
-				link_class = '[extant]'
-			else:
-				link_class = '[phantom]'
-		else:
-			link_class = '[new]'
-		self.citations.append(f'{span.cite_target} {link_class}')
-		return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
diff --git a/amanuensis/server/session/__init__.py b/amanuensis/server/session/__init__.py
index 4c27787..743754d 100644
--- a/amanuensis/server/session/__init__.py
+++ b/amanuensis/server/session/__init__.py
@@ -15,9 +15,7 @@ from amanuensis.lexicon import (
 	create_character_in_lexicon,
 	get_draft)
 from amanuensis.models import LexiconModel
-from amanuensis.parser import (
-	parse_raw_markdown,
-	PreviewHtmlRenderer)
+from amanuensis.parser import parse_raw_markdown
 from amanuensis.server.helpers import (
 	lexicon_param,
 	player_required,
@@ -29,7 +27,7 @@ from .forms import (
 	LexiconPublishTurnForm,
 	LexiconConfigForm)
 
-from .editor import load_editor, new_draft, update_draft
+from .editor import load_editor, new_draft, update_draft, PreviewHtmlRenderer
 
 
 bp_session = Blueprint('session', __name__,
diff --git a/amanuensis/server/session/editor.py b/amanuensis/server/session/editor.py
index 8492966..79a3cb3 100644
--- a/amanuensis/server/session/editor.py
+++ b/amanuensis/server/session/editor.py
@@ -17,8 +17,56 @@ from amanuensis.lexicon import (
 from amanuensis.models import LexiconModel
 from amanuensis.parser import (
 	normalize_title,
-	parse_raw_markdown,
-	PreviewHtmlRenderer)
+	parse_raw_markdown)
+from amanuensis.parser.core import RenderableVisitor
+
+
+class PreviewHtmlRenderer(RenderableVisitor):
+	def __init__(self, lexicon):
+		with lexicon.ctx.read('info') as info:
+			self.article_map = {
+				title: article.character
+				for title, article in info.items()
+			}
+		self.citations = []
+		self.contents = ""
+
+	def TextSpan(self, span):
+		return span.innertext
+
+	def LineBreak(self, span):
+		return '<br>'
+
+	def ParsedArticle(self, span):
+		self.contents = '\n'.join(span.recurse(self))
+		return self
+
+	def BodyParagraph(self, span):
+		return f'<p>{"".join(span.recurse(self))}</p>'
+
+	def SignatureParagraph(self, span):
+		return (
+			'<hr><span class="signature"><p>'
+			f'{"".join(span.recurse(self))}'
+			'</p></span>'
+		)
+
+	def BoldSpan(self, span):
+		return f'<b>{"".join(span.recurse(self))}</b>'
+
+	def ItalicSpan(self, span):
+		return f'<i>{"".join(span.recurse(self))}</i>'
+
+	def CitationSpan(self, span):
+		if span.cite_target in self.article_map:
+			if self.article_map.get(span.cite_target):
+				link_class = '[extant]'
+			else:
+				link_class = '[phantom]'
+		else:
+			link_class = '[new]'
+		self.citations.append(f'{span.cite_target} {link_class}')
+		return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
 
 
 def load_editor(lexicon: LexiconModel, aid: str):
-- 
2.44.1


From 1c55d866a8054cc5627a342f453046a1fd7f007e Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Wed, 9 Jun 2021 15:57:48 -0700
Subject: [PATCH 3/8] Reorganize parser and style pass

---
 amanuensis/parser/__init__.py |  13 +-
 amanuensis/parser/core.py     | 167 ++++++++++-----------
 amanuensis/parser/helpers.py  |  65 ++++++---
 amanuensis/parser/parsing.py  | 263 ++++++++++++++++++----------------
 mypy.ini                      |   2 +-
 pyproject.toml                |   4 +-
 6 files changed, 280 insertions(+), 234 deletions(-)

diff --git a/amanuensis/parser/__init__.py b/amanuensis/parser/__init__.py
index aff1bd4..7aa5bd7 100644
--- a/amanuensis/parser/__init__.py
+++ b/amanuensis/parser/__init__.py
@@ -2,13 +2,14 @@
 Module encapsulating all markdown parsing functionality.
 """
 
-from .core import normalize_title
-from .helpers import titlesort, filesafe_title
+from .core import RenderableVisitor
+from .helpers import normalize_title, filesafe_title, titlesort
 from .parsing import parse_raw_markdown
 
 __all__ = [
-	normalize_title.__name__,
-	titlesort.__name__,
-	filesafe_title.__name__,
-	parse_raw_markdown.__name__,
+    "RenderableVisitor",
+    "normalize_title",
+    "filesafe_title",
+    "titlesort",
+    "parse_raw_markdown",
 ]
diff --git a/amanuensis/parser/core.py b/amanuensis/parser/core.py
index 76f15de..d50049a 100644
--- a/amanuensis/parser/core.py
+++ b/amanuensis/parser/core.py
@@ -5,131 +5,134 @@ which can be operated on by a visitor defining functions that hook off
 of the different token types.
 """
 
-import re
 from typing import Callable, Any, Sequence
 
-RenderHook = Callable[['Renderable'], Any]
-Spans = Sequence['Renderable']
+from .helpers import normalize_title
 
 
-def normalize_title(title: str) -> str:
-	"""
-	Normalizes strings as titles:
-	- Strips leading and trailing whitespace
-	- Merges internal whitespace into a single space
-	- Capitalizes the first word
-	"""
-	cleaned = re.sub(r'\s+', " ", title.strip())
-	return cleaned[:1].capitalize() + cleaned[1:]
+RenderHook = Callable[["Renderable"], Any]
+Spans = Sequence["Renderable"]
 
 
-class Renderable():
-	"""
-	Base class for parsed markdown. Provides the `render()` method for
-	visiting the token tree.
-	"""
-	def render(self: 'Renderable', renderer: 'RenderableVisitor'):
-		"""
-		Execute the apppropriate visitor method on this Renderable.
-		"""
-		hook: RenderHook = getattr(renderer, type(self).__name__, None)
-		if hook:
-			return hook(self)
-		return None
+class Renderable:
+    """
+    Base class for parsed markdown. Provides the `render()` method for
+    visiting the token tree.
+    """
+
+    def render(self: "Renderable", renderer: "RenderableVisitor"):
+        """
+        Execute the apppropriate visitor method on this Renderable.
+        Visitors implement hooks by declaring methods whose names are
+        the name of a Renderable class.
+        """
+        hook: RenderHook = getattr(renderer, type(self).__name__, None)
+        if hook:
+            return hook(self)
+        return None
 
 
 class TextSpan(Renderable):
-	"""An unstyled length of text."""
-	def __init__(self, innertext: str):
-		self.innertext = innertext
+    """A length of text."""
 
-	def __str__(self):
-		return f"[{self.innertext}]"
+    def __init__(self, innertext: str):
+        self.innertext = innertext
+
+    def __str__(self):
+        return f"[{self.innertext}]"
 
 
 class LineBreak(Renderable):
-	"""A line break within a paragraph."""
-	def __str__(self):
-		return "<break>"
+    """A line break within a paragraph."""
+
+    def __str__(self):
+        return "<break>"
 
 
 class SpanContainer(Renderable):
-	"""A formatting element that wraps some amount of text."""
-	def __init__(self, spans: Spans):
-		self.spans: Spans = spans
+    """A formatting element that wraps some amount of text."""
 
-	def __str__(self):
-		return (f'[{type(self).__name__} '
-			+ f'{" ".join([str(span) for span in self.spans])}]')
+    def __init__(self, spans: Spans):
+        self.spans: Spans = spans
 
-	def recurse(self, renderer: 'RenderableVisitor'):
-		return [child.render(renderer) for child in self.spans]
+    def __str__(self):
+        return (
+            f"[{type(self).__name__} "
+            + f'{" ".join([str(span) for span in self.spans])}]'
+        )
+
+    def recurse(self, renderer: "RenderableVisitor"):
+        return [child.render(renderer) for child in self.spans]
 
 
 class ParsedArticle(SpanContainer):
-	"""Token tree root node, containing some number of paragraph tokens."""
+    """Token tree root node, containing some number of paragraph tokens."""
 
 
 class BodyParagraph(SpanContainer):
-	"""A normal paragraph."""
+    """A normal paragraph."""
 
 
 class SignatureParagraph(SpanContainer):
-	"""A paragraph preceded by a signature mark."""
+    """A paragraph preceded by a signature mark."""
 
 
 class BoldSpan(SpanContainer):
-	"""A span of text inside bold marks."""
+    """A span of text inside bold marks."""
 
 
 class ItalicSpan(SpanContainer):
-	"""A span of text inside italic marks."""
+    """A span of text inside italic marks."""
 
 
 class CitationSpan(SpanContainer):
-	"""A citation to another article."""
-	def __init__(self, spans: Spans, cite_target: str):
-		super().__init__(spans)
-		# Normalize citation target on parse, since we don't want
-		# abnormal title strings lying around causing trouble.
-		self.cite_target: str = normalize_title(cite_target)
+    """A citation to another article."""
 
-	def __str__(self):
-		return (f'{{{" ".join([str(span) for span in self.spans])}'
-			+ f':{self.cite_target}}}')
+    def __init__(self, spans: Spans, cite_target: str):
+        super().__init__(spans)
+        # Normalize citation target on parse, since we don't want
+        # abnormal title strings lying around causing trouble.
+        self.cite_target: str = normalize_title(cite_target)
+
+    def __str__(self) -> str:
+        return (
+            f'{{{" ".join([str(span) for span in self.spans])}'
+            + f":{self.cite_target}}}"
+        )
 
 
-class RenderableVisitor():
-	"""
-	Default implementation of the visitor pattern. Executes once on
-	each token in the tree and returns itself.
-	"""
-	def TextSpan(self, span: TextSpan):
-		return self
+class RenderableVisitor:
+    """
+    Default implementation of the visitor pattern. Executes once on
+    each token in the tree and returns itself.
+    """
 
-	def LineBreak(self, span: LineBreak):
-		return self
+    def TextSpan(self, span: TextSpan):
+        return self
 
-	def ParsedArticle(self, span: ParsedArticle):
-		span.recurse(self)
-		return self
+    def LineBreak(self, span: LineBreak):
+        return self
 
-	def BodyParagraph(self, span: BodyParagraph):
-		span.recurse(self)
-		return self
+    def ParsedArticle(self, span: ParsedArticle):
+        span.recurse(self)
+        return self
 
-	def SignatureParagraph(self, span: SignatureParagraph):
-		span.recurse(self)
-		return self
+    def BodyParagraph(self, span: BodyParagraph):
+        span.recurse(self)
+        return self
 
-	def BoldSpan(self, span: BoldSpan):
-		span.recurse(self)
-		return self
+    def SignatureParagraph(self, span: SignatureParagraph):
+        span.recurse(self)
+        return self
 
-	def ItalicSpan(self, span: ItalicSpan):
-		span.recurse(self)
-		return self
+    def BoldSpan(self, span: BoldSpan):
+        span.recurse(self)
+        return self
 
-	def CitationSpan(self, span: CitationSpan):
-		span.recurse(self)
-		return self
+    def ItalicSpan(self, span: ItalicSpan):
+        span.recurse(self)
+        return self
+
+    def CitationSpan(self, span: CitationSpan):
+        span.recurse(self)
+        return self
diff --git a/amanuensis/parser/helpers.py b/amanuensis/parser/helpers.py
index e6eabfb..7f2123d 100644
--- a/amanuensis/parser/helpers.py
+++ b/amanuensis/parser/helpers.py
@@ -1,28 +1,53 @@
+"""
+Helper functions for manipulating titles during parsing
+"""
+
 import re
 import urllib.parse
 
 
+def normalize_title(title: str) -> str:
+    """
+    Normalizes strings as titles:
+    - Strips leading and trailing whitespace
+    - Merges internal whitespace into a single space
+    - Capitalizes the first word
+    """
+    cleaned = re.sub(r"\s+", " ", title.strip())
+    return cleaned[:1].capitalize() + cleaned[1:]
+
+
 def titlesort(title: str) -> str:
-	"""
-	Strips articles off of titles for alphabetical sorting purposes
-	"""
-	lower = title.lower()
-	if lower.startswith("the "):
-		return lower[4:]
-	if lower.startswith("an "):
-		return lower[3:]
-	if lower.startswith("a "):
-		return lower[2:]
-	return lower
+    """
+    Strips articles off of titles for alphabetical sorting purposes
+    """
+    lower = title.lower()
+    if lower.startswith("the "):
+        return lower[4:]
+    if lower.startswith("an "):
+        return lower[3:]
+    if lower.startswith("a "):
+        return lower[2:]
+    return lower
 
 
 def filesafe_title(title: str) -> str:
-	"""
-	Makes an article title filename-safe.
-	"""
-	s = re.sub(r"\s+", '_', title)  # Replace whitespace with _
-	s = re.sub(r"~", '-', s)        # parse.quote doesn't catch ~
-	s = urllib.parse.quote(s)       # Encode all other characters
-	s = re.sub(r"%", "", s)         # Strip encoding %s
-	s = s[:64]                  	# Limit to 64 characters
-	return s
+    """
+    Makes an article title filename-safe.
+    """
+    # Replace whitespace with _
+    s = re.sub(r"\s+", "_", title)
+
+    # parse.quote doesn't catch ~
+    s = re.sub(r"~", "-", s)
+
+    # Encode all other characters
+    s = urllib.parse.quote(s)
+
+    # Strip encoding %s
+    s = re.sub(r"%", "", s)
+
+    # Limit to 64 characters
+    s = s[:64]
+
+    return s
diff --git a/amanuensis/parser/parsing.py b/amanuensis/parser/parsing.py
index 4e0695f..c6bb50b 100644
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@@ -7,150 +7,167 @@ import re
 from typing import Sequence
 
 from .core import (
-	TextSpan,
-	LineBreak,
-	ParsedArticle,
-	BodyParagraph,
-	SignatureParagraph,
-	BoldSpan,
-	ItalicSpan,
-	CitationSpan,
-	Renderable,
-	SpanContainer
+    TextSpan,
+    LineBreak,
+    ParsedArticle,
+    BodyParagraph,
+    SignatureParagraph,
+    BoldSpan,
+    ItalicSpan,
+    CitationSpan,
+    Renderable,
+    SpanContainer,
 )
 
 Spans = Sequence[Renderable]
 
 
 def parse_raw_markdown(text: str) -> ParsedArticle:
-	"""
-	Parses a body of Lexipython markdown into a Renderable tree.
-	"""
-	# Parse each paragraph individually, as no formatting applies
-	# across paragraphs
-	paragraphs = re.split(r'\n\n+', text)
-	parse_results = list(map(parse_paragraph, paragraphs))
-	return ParsedArticle(parse_results)
+    """
+    Parses a body of Lexipython markdown into a Renderable tree.
+    """
+    # Parse each paragraph individually, as no formatting applies
+    # across paragraphs
+    paragraphs = re.split(r"\n\n+", text)
+    parse_results = list(map(parse_paragraph, paragraphs))
+    return ParsedArticle(parse_results)
 
 
 def parse_paragraph(text: str) -> SpanContainer:
-	# Parse the paragraph as a span of text
-	text = text.strip()
-	if text and text[0] == '~':
-		return SignatureParagraph(parse_paired_formatting(text[1:]))
-	else:
-		return BodyParagraph(parse_paired_formatting(text))
+    # Parse the paragraph as a span of text
+    text = text.strip()
+    if text and text[0] == "~":
+        return SignatureParagraph(parse_paired_formatting(text[1:]))
+    else:
+        return BodyParagraph(parse_paired_formatting(text))
 
 
 def parse_paired_formatting(
-		text: str,
-		cite: bool = True,
-		bold: bool = True,
-		italic: bool = True) -> Spans:
-	# Find positions of any paired formatting
-	first_cite = find_pair(text, "[[", "]]", cite)
-	first_bold = find_pair(text, "**", "**", bold)
-	first_italic = find_pair(text, "//", "//", italic)
-	# Load the possible parse handlers into the map
-	handlers = {}
-	handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
-	handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
-	handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
-	# If nothing was found, move on to the next parsing step
-	handlers[-1] = lambda: parse_breaks(text)
-	# Choose a handler based on the earliest found result
-	finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
-	first = min(finds) if finds else -1
-	return handlers[first]()
+    text: str,
+    cite: bool = True,
+    bold: bool = True,
+    italic: bool = True,
+) -> Spans:
+    # Find positions of any paired formatting
+    first_cite = find_pair(text, "[[", "]]", cite)
+    first_bold = find_pair(text, "**", "**", bold)
+    first_italic = find_pair(text, "//", "//", italic)
+    # Load the possible parse handlers into the map
+    handlers = {}
+    handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
+    handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
+    handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
+    # If nothing was found, move on to the next parsing step
+    handlers[-1] = lambda: parse_breaks(text)
+    # Choose a handler based on the earliest found result
+    finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
+    first = min(finds) if finds else -1
+    return handlers[first]()
 
 
 def find_pair(
-		text: str,
-		open_tag: str,
-		close_tag: str,
-		valid: bool) -> int:
-	# If skipping, return -1
-	if not valid:
-		return -1
-	# If the open tag wasn't found, return -1
-	first = text.find(open_tag)
-	if first < 0:
-		return -1
-	# If the close tag wasn't found after the open tag, return -1
-	second = text.find(close_tag, first + len(open_tag))
-	if second < 0:
-		return -1
-	# Otherwise, the pair exists
-	return first
+    text: str,
+    open_tag: str,
+    close_tag: str,
+    valid: bool,
+) -> int:
+    # If skipping, return -1
+    if not valid:
+        return -1
+    # If the open tag wasn't found, return -1
+    first = text.find(open_tag)
+    if first < 0:
+        return -1
+    # If the close tag wasn't found after the open tag, return -1
+    second = text.find(close_tag, first + len(open_tag))
+    if second < 0:
+        return -1
+    # Otherwise, the pair exists
+    return first
 
 
-def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
-	cite_open = text.find("[[")
-	if cite_open > -1:
-		cite_close = text.find("]]", cite_open + 2)
-		# Since we searched for pairs from the beginning, there should be no
-		# undetected pair formatting before this one, so move to the next
-		# level of parsing
-		spans_before = parse_breaks(text[:cite_open])
-		# Continue parsing pair formatting after this one closes with all
-		# three as valid choices
-		spans_after = parse_paired_formatting(text[cite_close + 2:])
-		# Parse inner text and skip parsing for this format pair
-		text_inner = text[cite_open + 2:cite_close]
-		# For citations specifically, we may need to split off a citation
-		# target from the alias text
-		inner_split = text_inner.split("|", 1)
-		text_inner_actual, cite_target = inner_split[0], inner_split[-1]
-		spans_inner = parse_paired_formatting(text_inner_actual,
-			cite=False, bold=bold, italic=italic)
-		citation = CitationSpan(spans_inner, cite_target)
-		return [*spans_before, citation, *spans_after]
-	# Should never happen
-	return parse_breaks(text)
+def parse_citation(
+    text: str,
+    bold: bool = True,
+    italic: bool = True,
+) -> Spans:
+    cite_open = text.find("[[")
+    if cite_open > -1:
+        cite_close = text.find("]]", cite_open + 2)
+        # Since we searched for pairs from the beginning, there should be no
+        # undetected pair formatting before this one, so move to the next
+        # level of parsing
+        spans_before = parse_breaks(text[:cite_open])
+        # Continue parsing pair formatting after this one closes with all
+        # three as valid choices
+        spans_after = parse_paired_formatting(text[cite_close + 2 :])
+        # Parse inner text and skip parsing for this format pair
+        text_inner = text[cite_open + 2 : cite_close]
+        # For citations specifically, we may need to split off a citation
+        # target from the alias text
+        inner_split = text_inner.split("|", 1)
+        text_inner_actual, cite_target = inner_split[0], inner_split[-1]
+        spans_inner = parse_paired_formatting(
+            text_inner_actual, cite=False, bold=bold, italic=italic
+        )
+        citation = CitationSpan(spans_inner, cite_target)
+        return [*spans_before, citation, *spans_after]
+    # Should never happen
+    return parse_breaks(text)
 
 
-def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
-	bold_open = text.find("**")
-	if bold_open > -1:
-		bold_close = text.find("**", bold_open + 2)
-		# Should be no formatting behind us
-		spans_before = parse_breaks(text[:bold_open])
-		# Freely parse formatting after us
-		spans_after = parse_paired_formatting(text[bold_close + 2:])
-		# Parse inner text minus bold parsing
-		text_inner = text[bold_open + 2:bold_close]
-		spans_inner = parse_paired_formatting(text_inner,
-			cite=cite, bold=False, italic=italic)
-		bold = BoldSpan(spans_inner)
-		return [*spans_before, bold, *spans_after]
-	# Should never happen
-	return parse_italic(text)
+def parse_bold(
+    text: str,
+    cite: bool = True,
+    italic: bool = True,
+) -> Spans:
+    bold_open = text.find("**")
+    if bold_open > -1:
+        bold_close = text.find("**", bold_open + 2)
+        # Should be no formatting behind us
+        spans_before = parse_breaks(text[:bold_open])
+        # Freely parse formatting after us
+        spans_after = parse_paired_formatting(text[bold_close + 2 :])
+        # Parse inner text minus bold parsing
+        text_inner = text[bold_open + 2 : bold_close]
+        spans_inner = parse_paired_formatting(
+            text_inner, cite=cite, bold=False, italic=italic
+        )
+        bold = BoldSpan(spans_inner)
+        return [*spans_before, bold, *spans_after]
+    # Should never happen
+    return parse_italic(text)
 
 
-def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
-	italic_open = text.find("//")
-	if italic_open > -1:
-		italic_close = text.find("//", italic_open + 2)
-		# Should be no formatting behind us
-		spans_before = parse_breaks(text[:italic_open])
-		# Freely parse formatting after us
-		spans_after = parse_paired_formatting(text[italic_close + 2:])
-		# Parse inner text minus italic parsing
-		text_inner = text[italic_open + 2:italic_close]
-		spans_inner = parse_paired_formatting(text_inner,
-			cite=cite, bold=bold, italic=False)
-		italic = ItalicSpan(spans_inner)
-		return [*spans_before, italic, *spans_after]
-	# Should never happen
-	return parse_breaks(text)
+def parse_italic(
+    text: str,
+    cite: bool = True,
+    bold: bool = True,
+) -> Spans:
+    italic_open = text.find("//")
+    if italic_open > -1:
+        italic_close = text.find("//", italic_open + 2)
+        # Should be no formatting behind us
+        spans_before = parse_breaks(text[:italic_open])
+        # Freely parse formatting after us
+        spans_after = parse_paired_formatting(text[italic_close + 2 :])
+        # Parse inner text minus italic parsing
+        text_inner = text[italic_open + 2 : italic_close]
+        spans_inner = parse_paired_formatting(
+            text_inner, cite=cite, bold=bold, italic=False
+        )
+        italic = ItalicSpan(spans_inner)
+        return [*spans_before, italic, *spans_after]
+    # Should never happen
+    return parse_breaks(text)
 
 
 def parse_breaks(text: str) -> Spans:
-	if not text:
-		return []
-	splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
-	spans: Spans = [
-		splits[i // 2] if i % 2 == 0 else LineBreak()
-		for i in range(0, 2 * len(splits) - 1)
-	]
-	return spans
+    if not text:
+        return []
+    splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
+    spans: Spans = [
+        splits[i // 2] if i % 2 == 0 else LineBreak()
+        for i in range(0, 2 * len(splits) - 1)
+    ]
+    return spans
diff --git a/mypy.ini b/mypy.ini
index 0d8ecb7..febf6cd 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,4 +1,4 @@
 [mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 ; mypy stable doesn't support pyproject.toml yet
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 0f28f9a..1070144 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,11 +17,11 @@ black = "^21.5b2"
 mypy = "^0.812"
 
 [tool.black]
-extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
+extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
 
 [tool.mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 
 [tool.pytest.ini_options]
 addopts = "--show-capture=log"
-- 
2.44.1


From 7a847e96d3403b91c67abd4d02bd893c008f7d52 Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Wed, 9 Jun 2021 18:20:23 -0700
Subject: [PATCH 4/8] Add unit tests for line breaks and simple pairs

---
 amanuensis/parser/core.py    |  16 ++--
 amanuensis/parser/parsing.py |  76 +++++++++------
 tests/test_parser.py         | 176 +++++++++++++++++++++++++++++++++++
 3 files changed, 233 insertions(+), 35 deletions(-)
 create mode 100644 tests/test_parser.py

diff --git a/amanuensis/parser/core.py b/amanuensis/parser/core.py
index d50049a..cd1b6a1 100644
--- a/amanuensis/parser/core.py
+++ b/amanuensis/parser/core.py
@@ -38,14 +38,14 @@ class TextSpan(Renderable):
     def __init__(self, innertext: str):
         self.innertext = innertext
 
-    def __str__(self):
-        return f"[{self.innertext}]"
+    def __repr__(self):
+        return f"<{self.innertext}>"
 
 
 class LineBreak(Renderable):
     """A line break within a paragraph."""
 
-    def __str__(self):
+    def __repr__(self):
         return "<break>"
 
 
@@ -55,10 +55,10 @@ class SpanContainer(Renderable):
     def __init__(self, spans: Spans):
         self.spans: Spans = spans
 
-    def __str__(self):
+    def __repr__(self):
         return (
-            f"[{type(self).__name__} "
-            + f'{" ".join([str(span) for span in self.spans])}]'
+            f"<{type(self).__name__} "
+            + f'{" ".join([repr(span) for span in self.spans])}>'
         )
 
     def recurse(self, renderer: "RenderableVisitor"):
@@ -94,9 +94,9 @@ class CitationSpan(SpanContainer):
         # abnormal title strings lying around causing trouble.
         self.cite_target: str = normalize_title(cite_target)
 
-    def __str__(self) -> str:
+    def __repr__(self) -> str:
         return (
-            f'{{{" ".join([str(span) for span in self.spans])}'
+            f'{{{" ".join([repr(span) for span in self.spans])}'
             + f":{self.cite_target}}}"
         )
 
diff --git a/amanuensis/parser/parsing.py b/amanuensis/parser/parsing.py
index c6bb50b..a16afae 100644
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@@ -34,6 +34,9 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
 
 
 def parse_paragraph(text: str) -> SpanContainer:
+    """
+    Parses a block of text into a paragraph object.
+    """
     # Parse the paragraph as a span of text
     text = text.strip()
     if text and text[0] == "~":
@@ -44,19 +47,28 @@ def parse_paragraph(text: str) -> SpanContainer:
 
 def parse_paired_formatting(
     text: str,
-    cite: bool = True,
-    bold: bool = True,
-    italic: bool = True,
+    can_cite: bool = True,
+    can_bold: bool = True,
+    can_italic: bool = True,
 ) -> Spans:
+    """
+    Parses citations, bolds, and italics, which can be nested inside each other.
+    """
     # Find positions of any paired formatting
-    first_cite = find_pair(text, "[[", "]]", cite)
-    first_bold = find_pair(text, "**", "**", bold)
-    first_italic = find_pair(text, "//", "//", italic)
+    first_cite = find_pair(text, "[[", "]]") if can_cite else -1
+    first_bold = find_pair(text, "**", "**") if can_bold else -1
+    first_italic = find_pair(text, "//", "//") if can_italic else -1
     # Load the possible parse handlers into the map
     handlers = {}
-    handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
-    handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
-    handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
+    handlers[first_cite] = lambda: parse_citation(
+        text, can_bold=can_bold, can_italic=can_italic
+    )
+    handlers[first_bold] = lambda: parse_bold(
+        text, can_cite=can_cite, can_italic=can_italic
+    )
+    handlers[first_italic] = lambda: parse_italic(
+        text, can_cite=can_cite, can_bold=can_bold
+    )
     # If nothing was found, move on to the next parsing step
     handlers[-1] = lambda: parse_breaks(text)
     # Choose a handler based on the earliest found result
@@ -65,15 +77,10 @@ def parse_paired_formatting(
     return handlers[first]()
 
 
-def find_pair(
-    text: str,
-    open_tag: str,
-    close_tag: str,
-    valid: bool,
-) -> int:
-    # If skipping, return -1
-    if not valid:
-        return -1
+def find_pair(text: str, open_tag: str, close_tag: str) -> int:
+    """
+    Finds the beginning of a pair of formatting marks.
+    """
     # If the open tag wasn't found, return -1
     first = text.find(open_tag)
     if first < 0:
@@ -88,9 +95,12 @@ def find_pair(
 
 def parse_citation(
     text: str,
-    bold: bool = True,
-    italic: bool = True,
+    can_bold: bool = True,
+    can_italic: bool = True,
 ) -> Spans:
+    """
+    Parses text into a citation span.
+    """
     cite_open = text.find("[[")
     if cite_open > -1:
         cite_close = text.find("]]", cite_open + 2)
@@ -108,7 +118,7 @@ def parse_citation(
         inner_split = text_inner.split("|", 1)
         text_inner_actual, cite_target = inner_split[0], inner_split[-1]
         spans_inner = parse_paired_formatting(
-            text_inner_actual, cite=False, bold=bold, italic=italic
+            text_inner_actual, can_cite=False, can_bold=can_bold, can_italic=can_italic
         )
         citation = CitationSpan(spans_inner, cite_target)
         return [*spans_before, citation, *spans_after]
@@ -118,9 +128,12 @@ def parse_citation(
 
 def parse_bold(
     text: str,
-    cite: bool = True,
-    italic: bool = True,
+    can_cite: bool = True,
+    can_italic: bool = True,
 ) -> Spans:
+    """
+    Parses text into a bold span.
+    """
     bold_open = text.find("**")
     if bold_open > -1:
         bold_close = text.find("**", bold_open + 2)
@@ -131,7 +144,7 @@ def parse_bold(
         # Parse inner text minus bold parsing
         text_inner = text[bold_open + 2 : bold_close]
         spans_inner = parse_paired_formatting(
-            text_inner, cite=cite, bold=False, italic=italic
+            text_inner, can_cite=can_cite, can_bold=False, can_italic=can_italic
         )
         bold = BoldSpan(spans_inner)
         return [*spans_before, bold, *spans_after]
@@ -141,9 +154,12 @@ def parse_bold(
 
 def parse_italic(
     text: str,
-    cite: bool = True,
-    bold: bool = True,
+    can_cite: bool = True,
+    can_bold: bool = True,
 ) -> Spans:
+    """
+    Parses text into an italic span.
+    """
     italic_open = text.find("//")
     if italic_open > -1:
         italic_close = text.find("//", italic_open + 2)
@@ -154,7 +170,7 @@ def parse_italic(
         # Parse inner text minus italic parsing
         text_inner = text[italic_open + 2 : italic_close]
         spans_inner = parse_paired_formatting(
-            text_inner, cite=cite, bold=bold, italic=False
+            text_inner, can_cite=can_cite, can_bold=can_bold, can_italic=False
         )
         italic = ItalicSpan(spans_inner)
         return [*spans_before, italic, *spans_after]
@@ -163,9 +179,15 @@ def parse_italic(
 
 
 def parse_breaks(text: str) -> Spans:
+    """
+    Parses intra-paragraph line breaks.
+    """
+    # Parse empty text into nothing
     if not text:
         return []
+    # Split on the line break mark appearing at the end of the line
     splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
+    # Put a LineBreak between each TextSpan
     spans: Spans = [
         splits[i // 2] if i % 2 == 0 else LineBreak()
         for i in range(0, 2 * len(splits) - 1)
diff --git a/tests/test_parser.py b/tests/test_parser.py
new file mode 100644
index 0000000..5a27765
--- /dev/null
+++ b/tests/test_parser.py
@@ -0,0 +1,176 @@
+from typing import Sequence
+
+from amanuensis.parser.core import (
+    TextSpan,
+    LineBreak,
+    ParsedArticle,
+    BodyParagraph,
+    SignatureParagraph,
+    BoldSpan,
+    ItalicSpan,
+    CitationSpan,
+    Renderable,
+    SpanContainer,
+    RenderableVisitor,
+    Spans,
+)
+from amanuensis.parser.helpers import normalize_title, filesafe_title, titlesort
+from amanuensis.parser.parsing import (
+    parse_breaks,
+    parse_paired_formatting,
+    parse_paragraph,
+    parse_raw_markdown,
+)
+
+
+def assert_types(spans: Spans, types: Sequence, loc=None):
+    """
+    Asserts that  a span list has the types specified.
+    Each element in `types` should be either a span type or a list. The first
+    element of the list is the container type and the remaining elements are the
+    content types.
+    """
+    assert len(spans) == len(
+        types
+    ), f"Unexpected type sequence length at loc {loc if loc else 'root'}"
+    i = -1
+    for span, span_type in zip(spans, types):
+        i += 1
+        i_loc = f"{loc}.{i}" if loc else f"{i}"
+        if isinstance(span_type, list):
+            assert isinstance(
+                span, SpanContainer
+            ), f"Expected a span container at loc {i_loc}"
+            assert (
+                len(span.spans) == len(span_type) - 1
+            ), f"Unexpected container size at loc {i_loc}"
+            assert isinstance(
+                span, span_type[0]
+            ), f"Unexpected container type at loc {i_loc}"
+            assert_types(span.spans, span_type[1:], loc=i_loc)
+        else:
+            assert isinstance(span, Renderable), f"Expected a span at loc {i_loc}"
+            assert isinstance(span, span_type), f"Unexpected span type at loc {i_loc}"
+
+
+def assert_text(spans: Spans, texts: Sequence, loc=None):
+    """
+    Asserts that a span list has the inner text structure specified.
+    Each element in `texts` should be either a string or a list of the same.
+    """
+    assert len(spans) == len(
+        texts
+    ), f"Unexpected text sequence length at loc {loc if loc else 'root'}"
+    i = -1
+    for span, text in zip(spans, texts):
+        i += 1
+        i_loc = f"{loc}.{i}" if loc else f"{i}"
+        if isinstance(text, str):
+            assert isinstance(span, TextSpan), f"Expected a text span at loc {i_loc}"
+            assert span.innertext == text, f"Unexpected text at loc {i_loc}"
+        elif isinstance(text, list):
+            assert isinstance(
+                span, SpanContainer
+            ), f"Expected a span container at loc {i_loc}"
+            assert_text(span.spans, text, loc=i_loc)
+        else:
+            assert isinstance(span, LineBreak), f"Expected a line break at loc {i_loc}"
+
+
+def test_parse_breaks():
+    """Test parsing for intra-pragraph line break"""
+    text: str
+    spans: Spans
+
+    # Only having a line break does nothing
+    text = "One\nTwo"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, [TextSpan])
+    assert_text(spans, [text])
+
+    # Having the mark causes the text to be split across it
+    text = r"One\\" + "\nTwo"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, [TextSpan, LineBreak, TextSpan])
+    assert_text(spans, ["One", None, "Two"])
+
+    # Multiple lines can be broken
+    text = r"One\\" + "\n" + r"Two\\" + "\nThree"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, [TextSpan, LineBreak, TextSpan, LineBreak, TextSpan])
+    assert_text(spans, ["One", None, "Two", None, "Three"])
+
+    # The mark must be at the end of the line
+    text = r"One\\ " + "\nTwo"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, (TextSpan,))
+    assert_text(spans, [text])
+
+
+def test_simple_single_parse_pairs():
+    """Test parsing for bold and italic marks"""
+    text: str
+    spans: Spans
+
+    # Empty pair marks should parse
+    text = "****"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan]])
+
+    text = "////"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan]])
+
+    # Pair marks with text inside should parse
+    text = "**hello**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+
+    text = "//hello//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+
+    # Text outside of pair marks should parse on the same level
+    text = "**hello** world"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["hello"], " world"])
+
+    text = "//hello// world"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
+    assert_text(spans, [["hello"], " world"])
+
+    # Text before, between, and after pair marks should parse
+    text = "In the **beginning** was //the// Word"
+    spans = parse_paired_formatting(text)
+    assert_types(
+        spans,
+        [TextSpan, [BoldSpan, TextSpan], TextSpan, [ItalicSpan, TextSpan], TextSpan],
+    )
+    assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
+
+
+def test_simple_nested_parse_pairs():
+    """Test parsing for nesting bold and italic"""
+    text: str
+    spans: Spans
+
+    # Simple nested test cases
+    text = "**//hello//**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, [ItalicSpan, TextSpan]]])
+    assert_text(spans, [[["hello"]]])
+
+    text = "//**world**//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, [BoldSpan, TextSpan]]])
+    assert_text(spans, [[["world"]]])
+
+    # Overlap should only parse the first
+    text = "**Hello//world**//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["Hello//world"], "//"])
-- 
2.44.1


From 6f380bd49565907536088be1963b5a60bee9194b Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Wed, 9 Jun 2021 20:14:32 -0700
Subject: [PATCH 5/8] Fix parsing pair marks with line breaks inside

---
 amanuensis/parser/parsing.py | 69 ++++++++++++++++++++++--------------
 tests/test_parser.py         | 26 ++++++++++++++
 2 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/amanuensis/parser/parsing.py b/amanuensis/parser/parsing.py
index a16afae..e2d7b1c 100644
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@@ -47,32 +47,40 @@ def parse_paragraph(text: str) -> SpanContainer:
 
 def parse_paired_formatting(
     text: str,
-    can_cite: bool = True,
-    can_bold: bool = True,
-    can_italic: bool = True,
+    in_cite: bool = False,
+    in_bold: bool = False,
+    in_italic: bool = False,
 ) -> Spans:
     """
     Parses citations, bolds, and italics, which can be nested inside each other.
+    A single type cannot nest inside itself, which is controlled by setting the
+    flag parameters to False.
     """
     # Find positions of any paired formatting
-    first_cite = find_pair(text, "[[", "]]") if can_cite else -1
-    first_bold = find_pair(text, "**", "**") if can_bold else -1
-    first_italic = find_pair(text, "//", "//") if can_italic else -1
-    # Load the possible parse handlers into the map
+    next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
+    next_bold = find_pair(text, "**", "**") if not in_bold else -1
+    next_italic = find_pair(text, "//", "//") if not in_italic else -1
+    # Create a map from a formatting mark's distance to its parse handler
     handlers = {}
-    handlers[first_cite] = lambda: parse_citation(
-        text, can_bold=can_bold, can_italic=can_italic
+    handlers[next_cite] = lambda: parse_citation(
+        text, in_bold=in_bold, in_italic=in_italic
     )
-    handlers[first_bold] = lambda: parse_bold(
-        text, can_cite=can_cite, can_italic=can_italic
+    handlers[next_bold] = lambda: parse_bold(
+        text, in_cite=in_cite, in_italic=in_italic
     )
-    handlers[first_italic] = lambda: parse_italic(
-        text, can_cite=can_cite, can_bold=can_bold
+    handlers[next_italic] = lambda: parse_italic(
+        text, in_cite=in_cite, in_bold=in_bold
     )
-    # If nothing was found, move on to the next parsing step
-    handlers[-1] = lambda: parse_breaks(text)
-    # Choose a handler based on the earliest found result
-    finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
+    # Map the next parsing step at -1. If we're currently inside a formatting
+    # mark pair, skip parsing line breaks, which are not allowed inside paired
+    # marks.
+    if in_cite or in_bold or in_italic:
+        handlers[-1] = lambda: parse_text(text)
+    else:
+        handlers[-1] = lambda: parse_breaks(text)
+    # Choose the handler for the earliest found pair, or the default handler
+    # at -1 if nothing was found.
+    finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
     first = min(finds) if finds else -1
     return handlers[first]()
 
@@ -95,8 +103,8 @@ def find_pair(text: str, open_tag: str, close_tag: str) -> int:
 
 def parse_citation(
     text: str,
-    can_bold: bool = True,
-    can_italic: bool = True,
+    in_bold: bool = False,
+    in_italic: bool = False,
 ) -> Spans:
     """
     Parses text into a citation span.
@@ -118,7 +126,7 @@ def parse_citation(
         inner_split = text_inner.split("|", 1)
         text_inner_actual, cite_target = inner_split[0], inner_split[-1]
         spans_inner = parse_paired_formatting(
-            text_inner_actual, can_cite=False, can_bold=can_bold, can_italic=can_italic
+            text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
         )
         citation = CitationSpan(spans_inner, cite_target)
         return [*spans_before, citation, *spans_after]
@@ -128,8 +136,8 @@ def parse_citation(
 
 def parse_bold(
     text: str,
-    can_cite: bool = True,
-    can_italic: bool = True,
+    in_cite: bool = False,
+    in_italic: bool = False,
 ) -> Spans:
     """
     Parses text into a bold span.
@@ -144,7 +152,7 @@ def parse_bold(
         # Parse inner text minus bold parsing
         text_inner = text[bold_open + 2 : bold_close]
         spans_inner = parse_paired_formatting(
-            text_inner, can_cite=can_cite, can_bold=False, can_italic=can_italic
+            text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
         )
         bold = BoldSpan(spans_inner)
         return [*spans_before, bold, *spans_after]
@@ -154,8 +162,8 @@ def parse_bold(
 
 def parse_italic(
     text: str,
-    can_cite: bool = True,
-    can_bold: bool = True,
+    in_cite: bool = False,
+    in_bold: bool = False,
 ) -> Spans:
     """
     Parses text into an italic span.
@@ -170,7 +178,7 @@ def parse_italic(
         # Parse inner text minus italic parsing
         text_inner = text[italic_open + 2 : italic_close]
         spans_inner = parse_paired_formatting(
-            text_inner, can_cite=can_cite, can_bold=can_bold, can_italic=False
+            text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
         )
         italic = ItalicSpan(spans_inner)
         return [*spans_before, italic, *spans_after]
@@ -193,3 +201,12 @@ def parse_breaks(text: str) -> Spans:
         for i in range(0, 2 * len(splits) - 1)
     ]
     return spans
+
+
+def parse_text(text: str) -> Spans:
+    """
+    Parses text with no remaining parseable marks.
+    """
+    if not text:
+        return []
+    return [TextSpan(text)]
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 5a27765..ec9ade8 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -153,6 +153,32 @@ def test_simple_single_parse_pairs():
     assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
 
 
+def test_simple_parse_pairs_with_break():
+    """Test pair marks with breaks"""
+    text: str
+    spans: Spans
+
+    text = r"**glory\\" + "\nhammer**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan]])
+    assert_text(spans, [["glory\\\\\nhammer"]])
+
+    text = r"//glory\\" + "\nhammer//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan]])
+    assert_text(spans, [["glory\\\\\nhammer"]])
+
+    text = r"**glory\\" + "\n**hammer**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["glory\\\\\n"], "hammer**"])
+
+    text = r"//glory\\" + "\n//hammer//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
+    assert_text(spans, [["glory\\\\\n"], "hammer//"])
+
+
 def test_simple_nested_parse_pairs():
     """Test parsing for nesting bold and italic"""
     text: str
-- 
2.44.1


From 2c294f7f1284b48e0cb2969b9f72e0bf9d71cf7d Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Wed, 9 Jun 2021 21:32:29 -0700
Subject: [PATCH 6/8] Add citation parsing unit tests

---
 amanuensis/parser/parsing.py |   5 +-
 tests/test_parser.py         | 123 ++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/amanuensis/parser/parsing.py b/amanuensis/parser/parsing.py
index e2d7b1c..3dd08c8 100644
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@@ -121,8 +121,9 @@ def parse_citation(
         spans_after = parse_paired_formatting(text[cite_close + 2 :])
         # Parse inner text and skip parsing for this format pair
         text_inner = text[cite_open + 2 : cite_close]
-        # For citations specifically, we may need to split off a citation
-        # target from the alias text
+        # For citations specifically, try to split off a citation target.
+        # If there's no citation target to split, use the same text as the
+        # citation text and the target.
         inner_split = text_inner.split("|", 1)
         text_inner_actual, cite_target = inner_split[0], inner_split[-1]
         spans_inner = parse_paired_formatting(
diff --git a/tests/test_parser.py b/tests/test_parser.py
index ec9ade8..269c815 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -107,7 +107,7 @@ def test_parse_breaks():
     assert_text(spans, [text])
 
 
-def test_simple_single_parse_pairs():
+def test_parse_pairs_single():
     """Test parsing for bold and italic marks"""
     text: str
     spans: Spans
@@ -153,7 +153,7 @@ def test_simple_single_parse_pairs():
     assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
 
 
-def test_simple_parse_pairs_with_break():
+def test_parse_pairs_break():
     """Test pair marks with breaks"""
     text: str
     spans: Spans
@@ -179,7 +179,7 @@ def test_simple_parse_pairs_with_break():
     assert_text(spans, [["glory\\\\\n"], "hammer//"])
 
 
-def test_simple_nested_parse_pairs():
+def test_parse_pairs_nested():
     """Test parsing for nesting bold and italic"""
     text: str
     spans: Spans
@@ -200,3 +200,120 @@ def test_simple_nested_parse_pairs():
     spans = parse_paired_formatting(text)
     assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
     assert_text(spans, [["Hello//world"], "//"])
+
+
+def test_normalize_title():
+    """Test the title normalization used by the citation parser"""
+    nt = normalize_title
+    assert nt("hello") == "Hello"
+    assert nt("  world  ") == "World"
+    assert nt("Waiting for           Godot") == "Waiting for Godot"
+    assert nt("lowercase letters") == "Lowercase letters"
+
+
+def test_parse_citation_single():
+    """Test parsing citations, which have internal formatting"""
+    text: str
+    spans: Spans
+
+    # Simple test cases
+    text = "[[hello]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Hello"
+
+    text = "[[hello|world]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "World"
+
+    text = "[[hello||world]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "|world"
+
+    text = "[[  hello  |  world  ]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["  hello  "]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "World"
+
+    text = "[[faith|hope|love]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["faith"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Hope|love"
+
+    text = "[[ [[|]] ]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
+    assert_text(spans, [[" [["], " ]]"])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == ""
+
+
+def test_parse_citation_break():
+    """Test citations with breaks"""
+    text: str
+    spans: Spans
+
+    text = "[[hello\\\\\nworld]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello\\\\\nworld"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Hello\\\\ world"
+
+    text = "[[one|two\\\\\nthree]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["one"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Two\\\\ three"
+
+
+def test_parse_citation_nested():
+    """Test nesting with citations"""
+    text: str
+    spans: Spans
+
+    text = "[[**hello world**]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, [BoldSpan, TextSpan]]])
+    assert_text(spans, [[["hello world"]]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "**hello world**"
+
+    text = "[[**hello|world**]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["**hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "World**"
+
+    text = "**[[hello world]]**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, [CitationSpan, TextSpan]]])
+    assert_text(spans, [[["hello world"]]])
+    citation: CitationSpan = spans[0].spans[0]
+    assert citation.cite_target == "Hello world"
+
+    text = "**[[hello world**]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["[[hello world"], "]]"])
+
+    text = "[[**hello world]]**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
+    assert_text(spans, [["**hello world"], "**"])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "**hello world"
-- 
2.44.1


From fde12ac818368e195b2f1faafb9029d0964acae7 Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Fri, 11 Jun 2021 23:26:28 -0700
Subject: [PATCH 7/8] Add paragraph and article parser tests

---
 amanuensis/parser/parsing.py |  8 +---
 tests/test_parser.py         | 77 ++++++++++++++++++++++++++++++++----
 2 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/amanuensis/parser/parsing.py b/amanuensis/parser/parsing.py
index 3dd08c8..da800e8 100644
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@@ -65,12 +65,8 @@ def parse_paired_formatting(
     handlers[next_cite] = lambda: parse_citation(
         text, in_bold=in_bold, in_italic=in_italic
     )
-    handlers[next_bold] = lambda: parse_bold(
-        text, in_cite=in_cite, in_italic=in_italic
-    )
-    handlers[next_italic] = lambda: parse_italic(
-        text, in_cite=in_cite, in_bold=in_bold
-    )
+    handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
+    handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
     # Map the next parsing step at -1. If we're currently inside a formatting
     # mark pair, skip parsing line breaks, which are not allowed inside paired
     # marks.
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 269c815..6be240b 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -25,18 +25,18 @@ from amanuensis.parser.parsing import (
 
 def assert_types(spans: Spans, types: Sequence, loc=None):
     """
-    Asserts that  a span list has the types specified.
+    Asserts that a span list has the types specified.
     Each element in `types` should be either a span type or a list. The first
     element of the list is the container type and the remaining elements are the
     content types.
     """
-    assert len(spans) == len(
-        types
-    ), f"Unexpected type sequence length at loc {loc if loc else 'root'}"
-    i = -1
-    for span, span_type in zip(spans, types):
-        i += 1
+    for i in range(max(len(spans), len(types))):
         i_loc = f"{loc}.{i}" if loc else f"{i}"
+        # Check lengths are equal
+        assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
+        assert i < len(types), f"Type list unexpectedly short at {i_loc}"
+        # Check types are equal
+        span, span_type = spans[i], types[i]
         if isinstance(span_type, list):
             assert isinstance(
                 span, SpanContainer
@@ -317,3 +317,66 @@ def test_parse_citation_nested():
     assert_text(spans, [["**hello world"], "**"])
     citation: CitationSpan = spans[0]
     assert citation.cite_target == "**hello world"
+
+
+def test_parse_paragraphs():
+    """Test parsing paragraphs"""
+    para: str
+    span: SpanContainer
+
+    # Body paragraph
+    para = "\tIn the beginning was the Word."
+    span = parse_paragraph(para)
+    assert_types([span], [[BodyParagraph, TextSpan]])
+    assert_text([span], [["In the beginning was the Word."]])
+
+    # Signature paragraph
+    para = "~Ersatz Scrivener, scholar extraordinaire"
+    span = parse_paragraph(para)
+    assert_types([span], [[SignatureParagraph, TextSpan]])
+    assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
+
+
+def test_parse_article():
+    """Test the full article parser"""
+    article: str = (
+        "Writing a **unit test** requires having test //content//.\n\n"
+        "This content, of course, must be [[created|Writing test collateral]].\n\n"
+        "~Bucky, unit test writer"
+    )
+    parsed: ParsedArticle = parse_raw_markdown(article)
+
+    assert_types(
+        [parsed],
+        [
+            [
+                ParsedArticle,
+                [
+                    BodyParagraph,
+                    TextSpan,
+                    [BoldSpan, TextSpan],
+                    TextSpan,
+                    [ItalicSpan, TextSpan],
+                    TextSpan,
+                ],
+                [BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
+                [SignatureParagraph, TextSpan],
+            ]
+        ],
+    )
+    assert_text(
+        [parsed],
+        [
+            [
+                [
+                    "Writing a ",
+                    ["unit test"],
+                    " requires having test ",
+                    ["content"],
+                    ".",
+                ],
+                ["This content, of course, must be ", ["created"], "."],
+                ["Bucky, unit test writer"],
+            ]
+        ],
+    )
-- 
2.44.1


From 00738b5a45b358d7cad4833ba8a5df17a467ac15 Mon Sep 17 00:00:00 2001
From: Tim Van Baak <tim.vanbaak@gmail.com>
Date: Fri, 11 Jun 2021 23:45:28 -0700
Subject: [PATCH 8/8] Add renderable visitor unit test

---
 tests/test_parser.py | 87 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 84 insertions(+), 3 deletions(-)

diff --git a/tests/test_parser.py b/tests/test_parser.py
index 6be240b..3409cb1 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -342,7 +342,7 @@ def test_parse_article():
     article: str = (
         "Writing a **unit test** requires having test //content//.\n\n"
         "This content, of course, must be [[created|Writing test collateral]].\n\n"
-        "~Bucky, unit test writer"
+        "~Bucky\\\\\nUnit test writer"
     )
     parsed: ParsedArticle = parse_raw_markdown(article)
 
@@ -360,7 +360,7 @@ def test_parse_article():
                     TextSpan,
                 ],
                 [BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
-                [SignatureParagraph, TextSpan],
+                [SignatureParagraph, TextSpan, LineBreak, TextSpan],
             ]
         ],
     )
@@ -376,7 +376,88 @@ def test_parse_article():
                     ".",
                 ],
                 ["This content, of course, must be ", ["created"], "."],
-                ["Bucky, unit test writer"],
+                ["Bucky", None, "Unit test writer"],
             ]
         ],
     )
+
+
+def test_visitor():
+    """Test that a visitor dispatches to hooks correctly"""
+
+    class TestVisitor(RenderableVisitor):
+        def __init__(self):
+            self.visited = []
+
+        def TextSpan(self, span: TextSpan):
+            assert isinstance(span, TextSpan)
+            self.visited.append(span)
+
+        def LineBreak(self, span: LineBreak):
+            assert isinstance(span, LineBreak)
+            self.visited.append(span)
+
+        def ParsedArticle(self, span: ParsedArticle):
+            assert isinstance(span, ParsedArticle)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def BodyParagraph(self, span: BodyParagraph):
+            assert isinstance(span, BodyParagraph)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def SignatureParagraph(self, span: SignatureParagraph):
+            assert isinstance(span, SignatureParagraph)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def BoldSpan(self, span: BoldSpan):
+            assert isinstance(span, BoldSpan)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def ItalicSpan(self, span: ItalicSpan):
+            assert isinstance(span, ItalicSpan)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def CitationSpan(self, span: CitationSpan):
+            assert isinstance(span, CitationSpan)
+            self.visited.append(span)
+            span.recurse(self)
+
+    article: str = (
+        "Writing a **unit test** requires having test //content//.\n\n"
+        "This content, of course, must be [[created|Writing test collateral]].\n\n"
+        "~Bucky\\\\\nUnit test writer"
+    )
+    parsed: ParsedArticle = parse_raw_markdown(article)
+
+    visitor = TestVisitor()
+    # All the typecheck asserts pass
+    parsed.render(visitor)
+    # The test article should parse into these spans and visit in this (arbitrary) order
+    type_order = [
+        ParsedArticle,
+        BodyParagraph,
+        TextSpan,
+        BoldSpan,
+        TextSpan,
+        TextSpan,
+        ItalicSpan,
+        TextSpan,
+        TextSpan,
+        BodyParagraph,
+        TextSpan,
+        CitationSpan,
+        TextSpan,
+        TextSpan,
+        SignatureParagraph,
+        TextSpan,
+        LineBreak,
+        TextSpan,
+    ]
+    assert len(visitor.visited) == len(type_order)
+    for span, type in zip(visitor.visited, type_order):
+        assert isinstance(span, type)
-- 
2.44.1