2021-06-12 17:28:19 +00:00
13 changed files with 920 additions and 405 deletions
--- a/amanuensis/lexicon/gameloop.py
+++ b/amanuensis/lexicon/gameloop.py
@ -9,11 +9,94 @@ from amanuensis.config import ReadOnlyOrderedDict
 from amanuensis.models import LexiconModel, UserModel
 from amanuensis.parser import (
 	parse_raw_markdown,
-	GetCitations,
-	HtmlRenderer,
 	titlesort,
-	filesafe_title,
-	ConstraintAnalysis)
+	filesafe_title)
+from amanuensis.parser.core import RenderableVisitor
+
+
+class GetCitations(RenderableVisitor):
+	def __init__(self):
+		self.citations = []
+
+	def ParsedArticle(self, span):
+		span.recurse(self)
+		return self.citations
+
+	def CitationSpan(self, span):
+		self.citations.append(span.cite_target)
+		return self
+
+
+class ConstraintAnalysis(RenderableVisitor):
+	def __init__(self, lexicon: LexiconModel):
+		self.info: List[str] = []
+		self.warning: List[str] = []
+		self.error: List[str] = []
+
+		self.word_count: int = 0
+		self.citations: list = []
+		self.signatures: int = 0
+
+	def TextSpan(self, span):
+		self.word_count += len(re.split(r'\s+', span.innertext.strip()))
+		return self
+
+	def SignatureParagraph(self, span):
+		self.signatures += 1
+		span.recurse(self)
+		return self
+
+	def CitationSpan(self, span):
+		self.citations.append(span.cite_target)
+		span.recurse(self)
+		return self
+
+
+class HtmlRenderer(RenderableVisitor):
+	"""
+	Renders an article token tree into published article HTML.
+	"""
+	def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
+		self.lexicon_name: str = lexicon_name
+		self.written_articles: Iterable[str] = written_articles
+
+	def TextSpan(self, span):
+		return span.innertext
+
+	def LineBreak(self, span):
+		return '<br>'
+
+	def ParsedArticle(self, span):
+		return '\n'.join(span.recurse(self))
+
+	def BodyParagraph(self, span):
+		return f'<p>{"".join(span.recurse(self))}</p>'
+
+	def SignatureParagraph(self, span):
+		return (
+			'<hr><span class="signature"><p>'
+			f'{"".join(span.recurse(self))}'
+			'</p></span>'
+		)
+
+	def BoldSpan(self, span):
+		return f'<b>{"".join(span.recurse(self))}</b>'
+
+	def ItalicSpan(self, span):
+		return f'<i>{"".join(span.recurse(self))}</i>'
+
+	def CitationSpan(self, span):
+		if span.cite_target in self.written_articles:
+			link_class = ''
+		else:
+			link_class = ' class="phantom"'
+		# link = url_for(
+		# 	'lexicon.article',
+		# 	name=self.lexicon_name,
+		# 	title=filesafe_title(span.cite_target))
+		link = (f'/lexicon/{self.lexicon_name}'
+			+ f'/article/{filesafe_title(span.cite_target)}')
+		return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'


 def get_player_characters(
--- a/amanuensis/lexicon/manage.py
+++ b/amanuensis/lexicon/manage.py
@ -13,7 +13,7 @@
 # from amanuensis.config.loader import AttrOrderedDict
 # from amanuensis.errors import ArgumentError
 # from amanuensis.lexicon import LexiconModel
-# from amanuensis.parser import parse_raw_markdown, GetCitations, HtmlRenderer, filesafe_title, titlesort
+# from amanuensis.parser import parse_raw_markdown, filesafe_title, titlesort
 # from amanuensis.resources import get_stream


--- a/amanuensis/parser/init.py
+++ b/amanuensis/parser/init.py
@ -2,19 +2,14 @@
 Module encapsulating all markdown parsing functionality.
 """

-from .analyze import ConstraintAnalysis, GetCitations
-from .core import normalize_title
-from .helpers import titlesort, filesafe_title
+from .core import RenderableVisitor
+from .helpers import normalize_title, filesafe_title, titlesort
 from .parsing import parse_raw_markdown
-from .render import PreviewHtmlRenderer, HtmlRenderer

 __all__ = [
-	ConstraintAnalysis.__name__,
-	GetCitations.__name__,
-	normalize_title.__name__,
-	titlesort.__name__,
-	filesafe_title.__name__,
-	parse_raw_markdown.__name__,
-	PreviewHtmlRenderer.__name__,
-	HtmlRenderer.__name__,
+    "RenderableVisitor",
+    "normalize_title",
+    "filesafe_title",
+    "titlesort",
+    "parse_raw_markdown",
 ]
--- a/amanuensis/parser/analyze.py
+++ b/amanuensis/parser/analyze.py
@ -1,49 +0,0 @@
-"""
-Internal module encapsulating visitors that compute metrics on articles
-for verification against constraints.
-"""
-
-import re
-from typing import List
-
-from amanuensis.models import LexiconModel
-
-from .core import RenderableVisitor
-
-
-class GetCitations(RenderableVisitor):
-	def __init__(self):
-		self.citations = []
-
-	def ParsedArticle(self, span):
-		span.recurse(self)
-		return self.citations
-
-	def CitationSpan(self, span):
-		self.citations.append(span.cite_target)
-		return self
-
-
-class ConstraintAnalysis(RenderableVisitor):
-	def __init__(self, lexicon: LexiconModel):
-		self.info: List[str] = []
-		self.warning: List[str] = []
-		self.error: List[str] = []
-
-		self.word_count: int = 0
-		self.citations: list = []
-		self.signatures: int = 0
-
-	def TextSpan(self, span):
-		self.word_count += len(re.split(r'\s+', span.innertext.strip()))
-		return self
-
-	def SignatureParagraph(self, span):
-		self.signatures += 1
-		span.recurse(self)
-		return self
-
-	def CitationSpan(self, span):
-		self.citations.append(span.cite_target)
-		span.recurse(self)
-		return self
--- a/amanuensis/parser/core.py
+++ b/amanuensis/parser/core.py
@ -5,32 +5,26 @@ which can be operated on by a visitor defining functions that hook off
 of the different token types.
 """

-import re
 from typing import Callable, Any, Sequence

-RenderHook = Callable[['Renderable'], Any]
-Spans = Sequence['Renderable']
+from .helpers import normalize_title


-def normalize_title(title: str) -> str:
-	"""
-	Normalizes strings as titles:
-	- Strips leading and trailing whitespace
-	- Merges internal whitespace into a single space
-	- Capitalizes the first word
-	"""
-	cleaned = re.sub(r'\s+', " ", title.strip())
-	return cleaned[:1].capitalize() + cleaned[1:]
+RenderHook = Callable[["Renderable"], Any]
+Spans = Sequence["Renderable"]


-class Renderable():
+class Renderable:
    """
    Base class for parsed markdown. Provides the `render()` method for
    visiting the token tree.
    """
-	def render(self: 'Renderable', renderer: 'RenderableVisitor'):
+
+    def render(self: "Renderable", renderer: "RenderableVisitor"):
        """
        Execute the apppropriate visitor method on this Renderable.
+        Visitors implement hooks by declaring methods whose names are
+        the name of a Renderable class.
        """
        hook: RenderHook = getattr(renderer, type(self).__name__, None)
        if hook:
@ -39,30 +33,35 @@ class Renderable():


 class TextSpan(Renderable):
-	"""An unstyled length of text."""
+    """A length of text."""
+
    def __init__(self, innertext: str):
        self.innertext = innertext

-	def __str__(self):
-		return f"[{self.innertext}]"
+    def __repr__(self):
+        return f"<{self.innertext}>"


 class LineBreak(Renderable):
    """A line break within a paragraph."""
-	def __str__(self):
+
+    def __repr__(self):
        return "<break>"


 class SpanContainer(Renderable):
    """A formatting element that wraps some amount of text."""
+
    def __init__(self, spans: Spans):
        self.spans: Spans = spans

-	def __str__(self):
-		return (f'[{type(self).__name__} '
-			+ f'{" ".join([str(span) for span in self.spans])}]')
+    def __repr__(self):
+        return (
+            f"<{type(self).__name__} "
+            + f'{" ".join([repr(span) for span in self.spans])}>'
+        )

-	def recurse(self, renderer: 'RenderableVisitor'):
+    def recurse(self, renderer: "RenderableVisitor"):
        return [child.render(renderer) for child in self.spans]


@ -88,22 +87,26 @@ class ItalicSpan(SpanContainer):

 class CitationSpan(SpanContainer):
    """A citation to another article."""
+
    def __init__(self, spans: Spans, cite_target: str):
        super().__init__(spans)
        # Normalize citation target on parse, since we don't want
        # abnormal title strings lying around causing trouble.
        self.cite_target: str = normalize_title(cite_target)

-	def __str__(self):
-		return (f'{{{" ".join([str(span) for span in self.spans])}'
-			+ f':{self.cite_target}}}')
+    def __repr__(self) -> str:
+        return (
+            f'{{{" ".join([repr(span) for span in self.spans])}'
+            + f":{self.cite_target}}}"
+        )


-class RenderableVisitor():
+class RenderableVisitor:
    """
    Default implementation of the visitor pattern. Executes once on
    each token in the tree and returns itself.
    """
+
    def TextSpan(self, span: TextSpan):
        return self

--- a/amanuensis/parser/helpers.py
+++ b/amanuensis/parser/helpers.py
@ -1,7 +1,22 @@
+"""
+Helper functions for manipulating titles during parsing
+"""
+
 import re
 import urllib.parse


+def normalize_title(title: str) -> str:
+    """
+    Normalizes strings as titles:
+    - Strips leading and trailing whitespace
+    - Merges internal whitespace into a single space
+    - Capitalizes the first word
+    """
+    cleaned = re.sub(r"\s+", " ", title.strip())
+    return cleaned[:1].capitalize() + cleaned[1:]
+
+
 def titlesort(title: str) -> str:
    """
    Strips articles off of titles for alphabetical sorting purposes
@ -20,9 +35,19 @@ def filesafe_title(title: str) -> str:
    """
    Makes an article title filename-safe.
    """
-	s = re.sub(r"\s+", '_', title)  # Replace whitespace with _
-	s = re.sub(r"~", '-', s)        # parse.quote doesn't catch ~
-	s = urllib.parse.quote(s)       # Encode all other characters
-	s = re.sub(r"%", "", s)         # Strip encoding %s
-	s = s[:64]                  	# Limit to 64 characters
+    # Replace whitespace with _
+    s = re.sub(r"\s+", "_", title)
+
+    # parse.quote doesn't catch ~
+    s = re.sub(r"~", "-", s)
+
+    # Encode all other characters
+    s = urllib.parse.quote(s)
+
+    # Strip encoding %s
+    s = re.sub(r"%", "", s)
+
+    # Limit to 64 characters
+    s = s[:64]
+
    return s
--- a/amanuensis/parser/parsing.py
+++ b/amanuensis/parser/parsing.py
@ -16,7 +16,7 @@ from .core import (
    ItalicSpan,
    CitationSpan,
    Renderable,
-	SpanContainer
+    SpanContainer,
 )

 Spans = Sequence[Renderable]
@ -28,15 +28,18 @@ def parse_raw_markdown(text: str) -> ParsedArticle:
    """
    # Parse each paragraph individually, as no formatting applies
    # across paragraphs
-	paragraphs = re.split(r'\n\n+', text)
+    paragraphs = re.split(r"\n\n+", text)
    parse_results = list(map(parse_paragraph, paragraphs))
    return ParsedArticle(parse_results)


 def parse_paragraph(text: str) -> SpanContainer:
+    """
+    Parses a block of text into a paragraph object.
+    """
    # Parse the paragraph as a span of text
    text = text.strip()
-	if text and text[0] == '~':
+    if text and text[0] == "~":
        return SignatureParagraph(parse_paired_formatting(text[1:]))
    else:
        return BodyParagraph(parse_paired_formatting(text))
@ -44,34 +47,44 @@ def parse_paragraph(text: str) -> SpanContainer:

 def parse_paired_formatting(
    text: str,
-		cite: bool = True,
-		bold: bool = True,
-		italic: bool = True) -> Spans:
+    in_cite: bool = False,
+    in_bold: bool = False,
+    in_italic: bool = False,
+) -> Spans:
+    """
+    Parses citations, bolds, and italics, which can be nested inside each other.
+    A single type cannot nest inside itself, which is controlled by setting the
+    flag parameters to False.
+    """
    # Find positions of any paired formatting
-	first_cite = find_pair(text, "[[", "]]", cite)
-	first_bold = find_pair(text, "**", "**", bold)
-	first_italic = find_pair(text, "//", "//", italic)
-	# Load the possible parse handlers into the map
+    next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
+    next_bold = find_pair(text, "**", "**") if not in_bold else -1
+    next_italic = find_pair(text, "//", "//") if not in_italic else -1
+    # Create a map from a formatting mark's distance to its parse handler
    handlers = {}
-	handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
-	handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
-	handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
-	# If nothing was found, move on to the next parsing step
+    handlers[next_cite] = lambda: parse_citation(
+        text, in_bold=in_bold, in_italic=in_italic
+    )
+    handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
+    handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
+    # Map the next parsing step at -1. If we're currently inside a formatting
+    # mark pair, skip parsing line breaks, which are not allowed inside paired
+    # marks.
+    if in_cite or in_bold or in_italic:
+        handlers[-1] = lambda: parse_text(text)
+    else:
        handlers[-1] = lambda: parse_breaks(text)
-	# Choose a handler based on the earliest found result
-	finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
+    # Choose the handler for the earliest found pair, or the default handler
+    # at -1 if nothing was found.
+    finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
    first = min(finds) if finds else -1
    return handlers[first]()


-def find_pair(
-		text: str,
-		open_tag: str,
-		close_tag: str,
-		valid: bool) -> int:
-	# If skipping, return -1
-	if not valid:
-		return -1
+def find_pair(text: str, open_tag: str, close_tag: str) -> int:
+    """
+    Finds the beginning of a pair of formatting marks.
+    """
    # If the open tag wasn't found, return -1
    first = text.find(open_tag)
    if first < 0:
@ -84,7 +97,14 @@ def find_pair(
    return first


-def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
+def parse_citation(
+    text: str,
+    in_bold: bool = False,
+    in_italic: bool = False,
+) -> Spans:
+    """
+    Parses text into a citation span.
+    """
    cite_open = text.find("[[")
    if cite_open > -1:
        cite_close = text.find("]]", cite_open + 2)
@ -97,19 +117,28 @@ def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
        spans_after = parse_paired_formatting(text[cite_close + 2 :])
        # Parse inner text and skip parsing for this format pair
        text_inner = text[cite_open + 2 : cite_close]
-		# For citations specifically, we may need to split off a citation
-		# target from the alias text
+        # For citations specifically, try to split off a citation target.
+        # If there's no citation target to split, use the same text as the
+        # citation text and the target.
        inner_split = text_inner.split("|", 1)
        text_inner_actual, cite_target = inner_split[0], inner_split[-1]
-		spans_inner = parse_paired_formatting(text_inner_actual,
-			cite=False, bold=bold, italic=italic)
+        spans_inner = parse_paired_formatting(
+            text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
+        )
        citation = CitationSpan(spans_inner, cite_target)
        return [*spans_before, citation, *spans_after]
    # Should never happen
    return parse_breaks(text)


-def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
+def parse_bold(
+    text: str,
+    in_cite: bool = False,
+    in_italic: bool = False,
+) -> Spans:
+    """
+    Parses text into a bold span.
+    """
    bold_open = text.find("**")
    if bold_open > -1:
        bold_close = text.find("**", bold_open + 2)
@ -119,15 +148,23 @@ def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
        spans_after = parse_paired_formatting(text[bold_close + 2 :])
        # Parse inner text minus bold parsing
        text_inner = text[bold_open + 2 : bold_close]
-		spans_inner = parse_paired_formatting(text_inner,
-			cite=cite, bold=False, italic=italic)
+        spans_inner = parse_paired_formatting(
+            text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
+        )
        bold = BoldSpan(spans_inner)
        return [*spans_before, bold, *spans_after]
    # Should never happen
    return parse_italic(text)


-def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
+def parse_italic(
+    text: str,
+    in_cite: bool = False,
+    in_bold: bool = False,
+) -> Spans:
+    """
+    Parses text into an italic span.
+    """
    italic_open = text.find("//")
    if italic_open > -1:
        italic_close = text.find("//", italic_open + 2)
@ -137,8 +174,9 @@ def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
        spans_after = parse_paired_formatting(text[italic_close + 2 :])
        # Parse inner text minus italic parsing
        text_inner = text[italic_open + 2 : italic_close]
-		spans_inner = parse_paired_formatting(text_inner,
-			cite=cite, bold=bold, italic=False)
+        spans_inner = parse_paired_formatting(
+            text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
+        )
        italic = ItalicSpan(spans_inner)
        return [*spans_before, italic, *spans_after]
    # Should never happen
@ -146,11 +184,26 @@ def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:


 def parse_breaks(text: str) -> Spans:
+    """
+    Parses intra-paragraph line breaks.
+    """
+    # Parse empty text into nothing
    if not text:
        return []
+    # Split on the line break mark appearing at the end of the line
    splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
+    # Put a LineBreak between each TextSpan
    spans: Spans = [
        splits[i // 2] if i % 2 == 0 else LineBreak()
        for i in range(0, 2 * len(splits) - 1)
    ]
    return spans
+
+
+def parse_text(text: str) -> Spans:
+    """
+    Parses text with no remaining parseable marks.
+    """
+    if not text:
+        return []
+    return [TextSpan(text)]
--- a/amanuensis/parser/render.py
+++ b/amanuensis/parser/render.py
@ -1,104 +0,0 @@
-"""
-Internal module encapsulating visitors that render articles into
-readable formats.
-"""
-
-from typing import Iterable
-
-from .core import RenderableVisitor
-from .helpers import filesafe_title
-
-
-class HtmlRenderer(RenderableVisitor):
-	"""
-	Renders an article token tree into published article HTML.
-	"""
-	def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
-		self.lexicon_name: str = lexicon_name
-		self.written_articles: Iterable[str] = written_articles
-
-	def TextSpan(self, span):
-		return span.innertext
-
-	def LineBreak(self, span):
-		return '<br>'
-
-	def ParsedArticle(self, span):
-		return '\n'.join(span.recurse(self))
-
-	def BodyParagraph(self, span):
-		return f'<p>{"".join(span.recurse(self))}</p>'
-
-	def SignatureParagraph(self, span):
-		return (
-			'<hr><span class="signature"><p>'
-			f'{"".join(span.recurse(self))}'
-			'</p></span>'
-		)
-
-	def BoldSpan(self, span):
-		return f'<b>{"".join(span.recurse(self))}</b>'
-
-	def ItalicSpan(self, span):
-		return f'<i>{"".join(span.recurse(self))}</i>'
-
-	def CitationSpan(self, span):
-		if span.cite_target in self.written_articles:
-			link_class = ''
-		else:
-			link_class = ' class="phantom"'
-		# link = url_for(
-		# 	'lexicon.article',
-		# 	name=self.lexicon_name,
-		# 	title=filesafe_title(span.cite_target))
-		link = (f'/lexicon/{self.lexicon_name}'
-			+ f'/article/{filesafe_title(span.cite_target)}')
-		return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
-
-
-class PreviewHtmlRenderer(RenderableVisitor):
-	def __init__(self, lexicon):
-		with lexicon.ctx.read('info') as info:
-			self.article_map = {
-				title: article.character
-				for title, article in info.items()
-			}
-		self.citations = []
-		self.contents = ""
-
-	def TextSpan(self, span):
-		return span.innertext
-
-	def LineBreak(self, span):
-		return '<br>'
-
-	def ParsedArticle(self, span):
-		self.contents = '\n'.join(span.recurse(self))
-		return self
-
-	def BodyParagraph(self, span):
-		return f'<p>{"".join(span.recurse(self))}</p>'
-
-	def SignatureParagraph(self, span):
-		return (
-			'<hr><span class="signature"><p>'
-			f'{"".join(span.recurse(self))}'
-			'</p></span>'
-		)
-
-	def BoldSpan(self, span):
-		return f'<b>{"".join(span.recurse(self))}</b>'
-
-	def ItalicSpan(self, span):
-		return f'<i>{"".join(span.recurse(self))}</i>'
-
-	def CitationSpan(self, span):
-		if span.cite_target in self.article_map:
-			if self.article_map.get(span.cite_target):
-				link_class = '[extant]'
-			else:
-				link_class = '[phantom]'
-		else:
-			link_class = '[new]'
-		self.citations.append(f'{span.cite_target} {link_class}')
-		return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
--- a/amanuensis/server/session/init.py
+++ b/amanuensis/server/session/init.py
@ -15,9 +15,7 @@ from amanuensis.lexicon import (
 	create_character_in_lexicon,
 	get_draft)
 from amanuensis.models import LexiconModel
-from amanuensis.parser import (
-	parse_raw_markdown,
-	PreviewHtmlRenderer)
+from amanuensis.parser import parse_raw_markdown
 from amanuensis.server.helpers import (
 	lexicon_param,
 	player_required,
@ -29,7 +27,7 @@ from .forms import (
 	LexiconPublishTurnForm,
 	LexiconConfigForm)

-from .editor import load_editor, new_draft, update_draft
+from .editor import load_editor, new_draft, update_draft, PreviewHtmlRenderer


 bp_session = Blueprint('session', __name__,
--- a/amanuensis/server/session/editor.py
+++ b/amanuensis/server/session/editor.py
@ -17,8 +17,56 @@ from amanuensis.lexicon import (
 from amanuensis.models import LexiconModel
 from amanuensis.parser import (
 	normalize_title,
-	parse_raw_markdown,
-	PreviewHtmlRenderer)
+	parse_raw_markdown)
+from amanuensis.parser.core import RenderableVisitor
+
+
+class PreviewHtmlRenderer(RenderableVisitor):
+	def __init__(self, lexicon):
+		with lexicon.ctx.read('info') as info:
+			self.article_map = {
+				title: article.character
+				for title, article in info.items()
+			}
+		self.citations = []
+		self.contents = ""
+
+	def TextSpan(self, span):
+		return span.innertext
+
+	def LineBreak(self, span):
+		return '<br>'
+
+	def ParsedArticle(self, span):
+		self.contents = '\n'.join(span.recurse(self))
+		return self
+
+	def BodyParagraph(self, span):
+		return f'<p>{"".join(span.recurse(self))}</p>'
+
+	def SignatureParagraph(self, span):
+		return (
+			'<hr><span class="signature"><p>'
+			f'{"".join(span.recurse(self))}'
+			'</p></span>'
+		)
+
+	def BoldSpan(self, span):
+		return f'<b>{"".join(span.recurse(self))}</b>'
+
+	def ItalicSpan(self, span):
+		return f'<i>{"".join(span.recurse(self))}</i>'
+
+	def CitationSpan(self, span):
+		if span.cite_target in self.article_map:
+			if self.article_map.get(span.cite_target):
+				link_class = '[extant]'
+			else:
+				link_class = '[phantom]'
+		else:
+			link_class = '[new]'
+		self.citations.append(f'{span.cite_target} {link_class}')
+		return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'


 def load_editor(lexicon: LexiconModel, aid: str):
--- a/mypy.ini
+++ b/mypy.ini
@ -1,4 +1,4 @@
 [mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
 ; mypy stable doesn't support pyproject.toml yet
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,11 +17,11 @@ black = "^21.5b2"
 mypy = "^0.812"

 [tool.black]
-extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
+extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"

 [tool.mypy]
 ignore_missing_imports = true
-exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
+exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"

 [tool.pytest.ini_options]
 addopts = "--show-capture=log"
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@ -0,0 +1,463 @@
+from typing import Sequence
+
+from amanuensis.parser.core import (
+    TextSpan,
+    LineBreak,
+    ParsedArticle,
+    BodyParagraph,
+    SignatureParagraph,
+    BoldSpan,
+    ItalicSpan,
+    CitationSpan,
+    Renderable,
+    SpanContainer,
+    RenderableVisitor,
+    Spans,
+)
+from amanuensis.parser.helpers import normalize_title, filesafe_title, titlesort
+from amanuensis.parser.parsing import (
+    parse_breaks,
+    parse_paired_formatting,
+    parse_paragraph,
+    parse_raw_markdown,
+)
+
+
+def assert_types(spans: Spans, types: Sequence, loc=None):
+    """
+    Asserts that a span list has the types specified.
+    Each element in `types` should be either a span type or a list. The first
+    element of the list is the container type and the remaining elements are the
+    content types.
+    """
+    for i in range(max(len(spans), len(types))):
+        i_loc = f"{loc}.{i}" if loc else f"{i}"
+        # Check lengths are equal
+        assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
+        assert i < len(types), f"Type list unexpectedly short at {i_loc}"
+        # Check types are equal
+        span, span_type = spans[i], types[i]
+        if isinstance(span_type, list):
+            assert isinstance(
+                span, SpanContainer
+            ), f"Expected a span container at loc {i_loc}"
+            assert (
+                len(span.spans) == len(span_type) - 1
+            ), f"Unexpected container size at loc {i_loc}"
+            assert isinstance(
+                span, span_type[0]
+            ), f"Unexpected container type at loc {i_loc}"
+            assert_types(span.spans, span_type[1:], loc=i_loc)
+        else:
+            assert isinstance(span, Renderable), f"Expected a span at loc {i_loc}"
+            assert isinstance(span, span_type), f"Unexpected span type at loc {i_loc}"
+
+
+def assert_text(spans: Spans, texts: Sequence, loc=None):
+    """
+    Asserts that a span list has the inner text structure specified.
+    Each element in `texts` should be either a string or a list of the same.
+    """
+    assert len(spans) == len(
+        texts
+    ), f"Unexpected text sequence length at loc {loc if loc else 'root'}"
+    i = -1
+    for span, text in zip(spans, texts):
+        i += 1
+        i_loc = f"{loc}.{i}" if loc else f"{i}"
+        if isinstance(text, str):
+            assert isinstance(span, TextSpan), f"Expected a text span at loc {i_loc}"
+            assert span.innertext == text, f"Unexpected text at loc {i_loc}"
+        elif isinstance(text, list):
+            assert isinstance(
+                span, SpanContainer
+            ), f"Expected a span container at loc {i_loc}"
+            assert_text(span.spans, text, loc=i_loc)
+        else:
+            assert isinstance(span, LineBreak), f"Expected a line break at loc {i_loc}"
+
+
+def test_parse_breaks():
+    """Test parsing for intra-pragraph line break"""
+    text: str
+    spans: Spans
+
+    # Only having a line break does nothing
+    text = "One\nTwo"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, [TextSpan])
+    assert_text(spans, [text])
+
+    # Having the mark causes the text to be split across it
+    text = r"One\\" + "\nTwo"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, [TextSpan, LineBreak, TextSpan])
+    assert_text(spans, ["One", None, "Two"])
+
+    # Multiple lines can be broken
+    text = r"One\\" + "\n" + r"Two\\" + "\nThree"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, [TextSpan, LineBreak, TextSpan, LineBreak, TextSpan])
+    assert_text(spans, ["One", None, "Two", None, "Three"])
+
+    # The mark must be at the end of the line
+    text = r"One\\ " + "\nTwo"
+    spans: Spans = parse_breaks(text)
+    assert_types(spans, (TextSpan,))
+    assert_text(spans, [text])
+
+
+def test_parse_pairs_single():
+    """Test parsing for bold and italic marks"""
+    text: str
+    spans: Spans
+
+    # Empty pair marks should parse
+    text = "****"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan]])
+
+    text = "////"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan]])
+
+    # Pair marks with text inside should parse
+    text = "**hello**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+
+    text = "//hello//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+
+    # Text outside of pair marks should parse on the same level
+    text = "**hello** world"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["hello"], " world"])
+
+    text = "//hello// world"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
+    assert_text(spans, [["hello"], " world"])
+
+    # Text before, between, and after pair marks should parse
+    text = "In the **beginning** was //the// Word"
+    spans = parse_paired_formatting(text)
+    assert_types(
+        spans,
+        [TextSpan, [BoldSpan, TextSpan], TextSpan, [ItalicSpan, TextSpan], TextSpan],
+    )
+    assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
+
+
+def test_parse_pairs_break():
+    """Test pair marks with breaks"""
+    text: str
+    spans: Spans
+
+    text = r"**glory\\" + "\nhammer**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan]])
+    assert_text(spans, [["glory\\\\\nhammer"]])
+
+    text = r"//glory\\" + "\nhammer//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan]])
+    assert_text(spans, [["glory\\\\\nhammer"]])
+
+    text = r"**glory\\" + "\n**hammer**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["glory\\\\\n"], "hammer**"])
+
+    text = r"//glory\\" + "\n//hammer//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
+    assert_text(spans, [["glory\\\\\n"], "hammer//"])
+
+
+def test_parse_pairs_nested():
+    """Test parsing for nesting bold and italic"""
+    text: str
+    spans: Spans
+
+    # Simple nested test cases
+    text = "**//hello//**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, [ItalicSpan, TextSpan]]])
+    assert_text(spans, [[["hello"]]])
+
+    text = "//**world**//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[ItalicSpan, [BoldSpan, TextSpan]]])
+    assert_text(spans, [[["world"]]])
+
+    # Overlap should only parse the first
+    text = "**Hello//world**//"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["Hello//world"], "//"])
+
+
+def test_normalize_title():
+    """Test the title normalization used by the citation parser"""
+    nt = normalize_title
+    assert nt("hello") == "Hello"
+    assert nt("  world  ") == "World"
+    assert nt("Waiting for           Godot") == "Waiting for Godot"
+    assert nt("lowercase letters") == "Lowercase letters"
+
+
+def test_parse_citation_single():
+    """Test parsing citations, which have internal formatting"""
+    text: str
+    spans: Spans
+
+    # Simple test cases
+    text = "[[hello]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Hello"
+
+    text = "[[hello|world]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "World"
+
+    text = "[[hello||world]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "|world"
+
+    text = "[[  hello  |  world  ]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["  hello  "]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "World"
+
+    text = "[[faith|hope|love]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["faith"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Hope|love"
+
+    text = "[[ [[|]] ]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
+    assert_text(spans, [[" [["], " ]]"])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == ""
+
+
+def test_parse_citation_break():
+    """Test citations with breaks"""
+    text: str
+    spans: Spans
+
+    text = "[[hello\\\\\nworld]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["hello\\\\\nworld"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Hello\\\\ world"
+
+    text = "[[one|two\\\\\nthree]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["one"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "Two\\\\ three"
+
+
+def test_parse_citation_nested():
+    """Test nesting with citations"""
+    text: str
+    spans: Spans
+
+    text = "[[**hello world**]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, [BoldSpan, TextSpan]]])
+    assert_text(spans, [[["hello world"]]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "**hello world**"
+
+    text = "[[**hello|world**]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan]])
+    assert_text(spans, [["**hello"]])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "World**"
+
+    text = "**[[hello world]]**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, [CitationSpan, TextSpan]]])
+    assert_text(spans, [[["hello world"]]])
+    citation: CitationSpan = spans[0].spans[0]
+    assert citation.cite_target == "Hello world"
+
+    text = "**[[hello world**]]"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
+    assert_text(spans, [["[[hello world"], "]]"])
+
+    text = "[[**hello world]]**"
+    spans = parse_paired_formatting(text)
+    assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
+    assert_text(spans, [["**hello world"], "**"])
+    citation: CitationSpan = spans[0]
+    assert citation.cite_target == "**hello world"
+
+
+def test_parse_paragraphs():
+    """Test parsing paragraphs"""
+    para: str
+    span: SpanContainer
+
+    # Body paragraph
+    para = "\tIn the beginning was the Word."
+    span = parse_paragraph(para)
+    assert_types([span], [[BodyParagraph, TextSpan]])
+    assert_text([span], [["In the beginning was the Word."]])
+
+    # Signature paragraph
+    para = "~Ersatz Scrivener, scholar extraordinaire"
+    span = parse_paragraph(para)
+    assert_types([span], [[SignatureParagraph, TextSpan]])
+    assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
+
+
+def test_parse_article():
+    """Test the full article parser"""
+    article: str = (
+        "Writing a **unit test** requires having test //content//.\n\n"
+        "This content, of course, must be [[created|Writing test collateral]].\n\n"
+        "~Bucky\\\\\nUnit test writer"
+    )
+    parsed: ParsedArticle = parse_raw_markdown(article)
+
+    assert_types(
+        [parsed],
+        [
+            [
+                ParsedArticle,
+                [
+                    BodyParagraph,
+                    TextSpan,
+                    [BoldSpan, TextSpan],
+                    TextSpan,
+                    [ItalicSpan, TextSpan],
+                    TextSpan,
+                ],
+                [BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
+                [SignatureParagraph, TextSpan, LineBreak, TextSpan],
+            ]
+        ],
+    )
+    assert_text(
+        [parsed],
+        [
+            [
+                [
+                    "Writing a ",
+                    ["unit test"],
+                    " requires having test ",
+                    ["content"],
+                    ".",
+                ],
+                ["This content, of course, must be ", ["created"], "."],
+                ["Bucky", None, "Unit test writer"],
+            ]
+        ],
+    )
+
+
+def test_visitor():
+    """Test that a visitor dispatches to hooks correctly"""
+
+    class TestVisitor(RenderableVisitor):
+        def __init__(self):
+            self.visited = []
+
+        def TextSpan(self, span: TextSpan):
+            assert isinstance(span, TextSpan)
+            self.visited.append(span)
+
+        def LineBreak(self, span: LineBreak):
+            assert isinstance(span, LineBreak)
+            self.visited.append(span)
+
+        def ParsedArticle(self, span: ParsedArticle):
+            assert isinstance(span, ParsedArticle)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def BodyParagraph(self, span: BodyParagraph):
+            assert isinstance(span, BodyParagraph)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def SignatureParagraph(self, span: SignatureParagraph):
+            assert isinstance(span, SignatureParagraph)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def BoldSpan(self, span: BoldSpan):
+            assert isinstance(span, BoldSpan)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def ItalicSpan(self, span: ItalicSpan):
+            assert isinstance(span, ItalicSpan)
+            self.visited.append(span)
+            span.recurse(self)
+
+        def CitationSpan(self, span: CitationSpan):
+            assert isinstance(span, CitationSpan)
+            self.visited.append(span)
+            span.recurse(self)
+
+    article: str = (
+        "Writing a **unit test** requires having test //content//.\n\n"
+        "This content, of course, must be [[created|Writing test collateral]].\n\n"
+        "~Bucky\\\\\nUnit test writer"
+    )
+    parsed: ParsedArticle = parse_raw_markdown(article)
+
+    visitor = TestVisitor()
+    # All the typecheck asserts pass
+    parsed.render(visitor)
+    # The test article should parse into these spans and visit in this (arbitrary) order
+    type_order = [
+        ParsedArticle,
+        BodyParagraph,
+        TextSpan,
+        BoldSpan,
+        TextSpan,
+        TextSpan,
+        ItalicSpan,
+        TextSpan,
+        TextSpan,
+        BodyParagraph,
+        TextSpan,
+        CitationSpan,
+        TextSpan,
+        TextSpan,
+        SignatureParagraph,
+        TextSpan,
+        LineBreak,
+        TextSpan,
+    ]
+    assert len(visitor.visited) == len(type_order)
+    for span, type in zip(visitor.visited, type_order):
+        assert isinstance(span, type)