Compare commits

..

No commits in common. "00738b5a45b358d7cad4833ba8a5df17a467ac15" and "06ec2e168e9fc21cc2879e4cac6e4a1290255064" have entirely different histories.

13 changed files with 405 additions and 920 deletions

View File

@ -9,94 +9,11 @@ from amanuensis.config import ReadOnlyOrderedDict
from amanuensis.models import LexiconModel, UserModel from amanuensis.models import LexiconModel, UserModel
from amanuensis.parser import ( from amanuensis.parser import (
parse_raw_markdown, parse_raw_markdown,
GetCitations,
HtmlRenderer,
titlesort, titlesort,
filesafe_title) filesafe_title,
from amanuensis.parser.core import RenderableVisitor ConstraintAnalysis)
class GetCitations(RenderableVisitor):
def __init__(self):
self.citations = []
def ParsedArticle(self, span):
span.recurse(self)
return self.citations
def CitationSpan(self, span):
self.citations.append(span.cite_target)
return self
class ConstraintAnalysis(RenderableVisitor):
def __init__(self, lexicon: LexiconModel):
self.info: List[str] = []
self.warning: List[str] = []
self.error: List[str] = []
self.word_count: int = 0
self.citations: list = []
self.signatures: int = 0
def TextSpan(self, span):
self.word_count += len(re.split(r'\s+', span.innertext.strip()))
return self
def SignatureParagraph(self, span):
self.signatures += 1
span.recurse(self)
return self
def CitationSpan(self, span):
self.citations.append(span.cite_target)
span.recurse(self)
return self
class HtmlRenderer(RenderableVisitor):
"""
Renders an article token tree into published article HTML.
"""
def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
self.lexicon_name: str = lexicon_name
self.written_articles: Iterable[str] = written_articles
def TextSpan(self, span):
return span.innertext
def LineBreak(self, span):
return '<br>'
def ParsedArticle(self, span):
return '\n'.join(span.recurse(self))
def BodyParagraph(self, span):
return f'<p>{"".join(span.recurse(self))}</p>'
def SignatureParagraph(self, span):
return (
'<hr><span class="signature"><p>'
f'{"".join(span.recurse(self))}'
'</p></span>'
)
def BoldSpan(self, span):
return f'<b>{"".join(span.recurse(self))}</b>'
def ItalicSpan(self, span):
return f'<i>{"".join(span.recurse(self))}</i>'
def CitationSpan(self, span):
if span.cite_target in self.written_articles:
link_class = ''
else:
link_class = ' class="phantom"'
# link = url_for(
# 'lexicon.article',
# name=self.lexicon_name,
# title=filesafe_title(span.cite_target))
link = (f'/lexicon/{self.lexicon_name}'
+ f'/article/{filesafe_title(span.cite_target)}')
return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
def get_player_characters( def get_player_characters(

View File

@ -13,7 +13,7 @@
# from amanuensis.config.loader import AttrOrderedDict # from amanuensis.config.loader import AttrOrderedDict
# from amanuensis.errors import ArgumentError # from amanuensis.errors import ArgumentError
# from amanuensis.lexicon import LexiconModel # from amanuensis.lexicon import LexiconModel
# from amanuensis.parser import parse_raw_markdown, filesafe_title, titlesort # from amanuensis.parser import parse_raw_markdown, GetCitations, HtmlRenderer, filesafe_title, titlesort
# from amanuensis.resources import get_stream # from amanuensis.resources import get_stream

View File

@ -2,14 +2,19 @@
Module encapsulating all markdown parsing functionality. Module encapsulating all markdown parsing functionality.
""" """
from .core import RenderableVisitor from .analyze import ConstraintAnalysis, GetCitations
from .helpers import normalize_title, filesafe_title, titlesort from .core import normalize_title
from .helpers import titlesort, filesafe_title
from .parsing import parse_raw_markdown from .parsing import parse_raw_markdown
from .render import PreviewHtmlRenderer, HtmlRenderer
__all__ = [ __all__ = [
"RenderableVisitor", ConstraintAnalysis.__name__,
"normalize_title", GetCitations.__name__,
"filesafe_title", normalize_title.__name__,
"titlesort", titlesort.__name__,
"parse_raw_markdown", filesafe_title.__name__,
parse_raw_markdown.__name__,
PreviewHtmlRenderer.__name__,
HtmlRenderer.__name__,
] ]

View File

@ -0,0 +1,49 @@
"""
Internal module encapsulating visitors that compute metrics on articles
for verification against constraints.
"""
import re
from typing import List
from amanuensis.models import LexiconModel
from .core import RenderableVisitor
class GetCitations(RenderableVisitor):
def __init__(self):
self.citations = []
def ParsedArticle(self, span):
span.recurse(self)
return self.citations
def CitationSpan(self, span):
self.citations.append(span.cite_target)
return self
class ConstraintAnalysis(RenderableVisitor):
def __init__(self, lexicon: LexiconModel):
self.info: List[str] = []
self.warning: List[str] = []
self.error: List[str] = []
self.word_count: int = 0
self.citations: list = []
self.signatures: int = 0
def TextSpan(self, span):
self.word_count += len(re.split(r'\s+', span.innertext.strip()))
return self
def SignatureParagraph(self, span):
self.signatures += 1
span.recurse(self)
return self
def CitationSpan(self, span):
self.citations.append(span.cite_target)
span.recurse(self)
return self

View File

@ -5,134 +5,131 @@ which can be operated on by a visitor defining functions that hook off
of the different token types. of the different token types.
""" """
import re
from typing import Callable, Any, Sequence from typing import Callable, Any, Sequence
from .helpers import normalize_title RenderHook = Callable[['Renderable'], Any]
Spans = Sequence['Renderable']
RenderHook = Callable[["Renderable"], Any] def normalize_title(title: str) -> str:
Spans = Sequence["Renderable"] """
Normalizes strings as titles:
- Strips leading and trailing whitespace
- Merges internal whitespace into a single space
- Capitalizes the first word
"""
cleaned = re.sub(r'\s+', " ", title.strip())
return cleaned[:1].capitalize() + cleaned[1:]
class Renderable: class Renderable():
""" """
Base class for parsed markdown. Provides the `render()` method for Base class for parsed markdown. Provides the `render()` method for
visiting the token tree. visiting the token tree.
""" """
def render(self: 'Renderable', renderer: 'RenderableVisitor'):
def render(self: "Renderable", renderer: "RenderableVisitor"): """
""" Execute the apppropriate visitor method on this Renderable.
Execute the apppropriate visitor method on this Renderable. """
Visitors implement hooks by declaring methods whose names are hook: RenderHook = getattr(renderer, type(self).__name__, None)
the name of a Renderable class. if hook:
""" return hook(self)
hook: RenderHook = getattr(renderer, type(self).__name__, None) return None
if hook:
return hook(self)
return None
class TextSpan(Renderable): class TextSpan(Renderable):
"""A length of text.""" """An unstyled length of text."""
def __init__(self, innertext: str):
self.innertext = innertext
def __init__(self, innertext: str): def __str__(self):
self.innertext = innertext return f"[{self.innertext}]"
def __repr__(self):
return f"<{self.innertext}>"
class LineBreak(Renderable): class LineBreak(Renderable):
"""A line break within a paragraph.""" """A line break within a paragraph."""
def __str__(self):
def __repr__(self): return "<break>"
return "<break>"
class SpanContainer(Renderable): class SpanContainer(Renderable):
"""A formatting element that wraps some amount of text.""" """A formatting element that wraps some amount of text."""
def __init__(self, spans: Spans):
self.spans: Spans = spans
def __init__(self, spans: Spans): def __str__(self):
self.spans: Spans = spans return (f'[{type(self).__name__} '
+ f'{" ".join([str(span) for span in self.spans])}]')
def __repr__(self): def recurse(self, renderer: 'RenderableVisitor'):
return ( return [child.render(renderer) for child in self.spans]
f"<{type(self).__name__} "
+ f'{" ".join([repr(span) for span in self.spans])}>'
)
def recurse(self, renderer: "RenderableVisitor"):
return [child.render(renderer) for child in self.spans]
class ParsedArticle(SpanContainer): class ParsedArticle(SpanContainer):
"""Token tree root node, containing some number of paragraph tokens.""" """Token tree root node, containing some number of paragraph tokens."""
class BodyParagraph(SpanContainer): class BodyParagraph(SpanContainer):
"""A normal paragraph.""" """A normal paragraph."""
class SignatureParagraph(SpanContainer): class SignatureParagraph(SpanContainer):
"""A paragraph preceded by a signature mark.""" """A paragraph preceded by a signature mark."""
class BoldSpan(SpanContainer): class BoldSpan(SpanContainer):
"""A span of text inside bold marks.""" """A span of text inside bold marks."""
class ItalicSpan(SpanContainer): class ItalicSpan(SpanContainer):
"""A span of text inside italic marks.""" """A span of text inside italic marks."""
class CitationSpan(SpanContainer): class CitationSpan(SpanContainer):
"""A citation to another article.""" """A citation to another article."""
def __init__(self, spans: Spans, cite_target: str):
super().__init__(spans)
# Normalize citation target on parse, since we don't want
# abnormal title strings lying around causing trouble.
self.cite_target: str = normalize_title(cite_target)
def __init__(self, spans: Spans, cite_target: str): def __str__(self):
super().__init__(spans) return (f'{{{" ".join([str(span) for span in self.spans])}'
# Normalize citation target on parse, since we don't want + f':{self.cite_target}}}')
# abnormal title strings lying around causing trouble.
self.cite_target: str = normalize_title(cite_target)
def __repr__(self) -> str:
return (
f'{{{" ".join([repr(span) for span in self.spans])}'
+ f":{self.cite_target}}}"
)
class RenderableVisitor: class RenderableVisitor():
""" """
Default implementation of the visitor pattern. Executes once on Default implementation of the visitor pattern. Executes once on
each token in the tree and returns itself. each token in the tree and returns itself.
""" """
def TextSpan(self, span: TextSpan):
return self
def TextSpan(self, span: TextSpan): def LineBreak(self, span: LineBreak):
return self return self
def LineBreak(self, span: LineBreak): def ParsedArticle(self, span: ParsedArticle):
return self span.recurse(self)
return self
def ParsedArticle(self, span: ParsedArticle): def BodyParagraph(self, span: BodyParagraph):
span.recurse(self) span.recurse(self)
return self return self
def BodyParagraph(self, span: BodyParagraph): def SignatureParagraph(self, span: SignatureParagraph):
span.recurse(self) span.recurse(self)
return self return self
def SignatureParagraph(self, span: SignatureParagraph): def BoldSpan(self, span: BoldSpan):
span.recurse(self) span.recurse(self)
return self return self
def BoldSpan(self, span: BoldSpan): def ItalicSpan(self, span: ItalicSpan):
span.recurse(self) span.recurse(self)
return self return self
def ItalicSpan(self, span: ItalicSpan): def CitationSpan(self, span: CitationSpan):
span.recurse(self) span.recurse(self)
return self return self
def CitationSpan(self, span: CitationSpan):
span.recurse(self)
return self

View File

@ -1,53 +1,28 @@
"""
Helper functions for manipulating titles during parsing
"""
import re import re
import urllib.parse import urllib.parse
def normalize_title(title: str) -> str:
"""
Normalizes strings as titles:
- Strips leading and trailing whitespace
- Merges internal whitespace into a single space
- Capitalizes the first word
"""
cleaned = re.sub(r"\s+", " ", title.strip())
return cleaned[:1].capitalize() + cleaned[1:]
def titlesort(title: str) -> str: def titlesort(title: str) -> str:
""" """
Strips articles off of titles for alphabetical sorting purposes Strips articles off of titles for alphabetical sorting purposes
""" """
lower = title.lower() lower = title.lower()
if lower.startswith("the "): if lower.startswith("the "):
return lower[4:] return lower[4:]
if lower.startswith("an "): if lower.startswith("an "):
return lower[3:] return lower[3:]
if lower.startswith("a "): if lower.startswith("a "):
return lower[2:] return lower[2:]
return lower return lower
def filesafe_title(title: str) -> str: def filesafe_title(title: str) -> str:
""" """
Makes an article title filename-safe. Makes an article title filename-safe.
""" """
# Replace whitespace with _ s = re.sub(r"\s+", '_', title) # Replace whitespace with _
s = re.sub(r"\s+", "_", title) s = re.sub(r"~", '-', s) # parse.quote doesn't catch ~
s = urllib.parse.quote(s) # Encode all other characters
# parse.quote doesn't catch ~ s = re.sub(r"%", "", s) # Strip encoding %s
s = re.sub(r"~", "-", s) s = s[:64] # Limit to 64 characters
return s
# Encode all other characters
s = urllib.parse.quote(s)
# Strip encoding %s
s = re.sub(r"%", "", s)
# Limit to 64 characters
s = s[:64]
return s

View File

@ -7,203 +7,150 @@ import re
from typing import Sequence from typing import Sequence
from .core import ( from .core import (
TextSpan, TextSpan,
LineBreak, LineBreak,
ParsedArticle, ParsedArticle,
BodyParagraph, BodyParagraph,
SignatureParagraph, SignatureParagraph,
BoldSpan, BoldSpan,
ItalicSpan, ItalicSpan,
CitationSpan, CitationSpan,
Renderable, Renderable,
SpanContainer, SpanContainer
) )
Spans = Sequence[Renderable] Spans = Sequence[Renderable]
def parse_raw_markdown(text: str) -> ParsedArticle: def parse_raw_markdown(text: str) -> ParsedArticle:
""" """
Parses a body of Lexipython markdown into a Renderable tree. Parses a body of Lexipython markdown into a Renderable tree.
""" """
# Parse each paragraph individually, as no formatting applies # Parse each paragraph individually, as no formatting applies
# across paragraphs # across paragraphs
paragraphs = re.split(r"\n\n+", text) paragraphs = re.split(r'\n\n+', text)
parse_results = list(map(parse_paragraph, paragraphs)) parse_results = list(map(parse_paragraph, paragraphs))
return ParsedArticle(parse_results) return ParsedArticle(parse_results)
def parse_paragraph(text: str) -> SpanContainer: def parse_paragraph(text: str) -> SpanContainer:
""" # Parse the paragraph as a span of text
Parses a block of text into a paragraph object. text = text.strip()
""" if text and text[0] == '~':
# Parse the paragraph as a span of text return SignatureParagraph(parse_paired_formatting(text[1:]))
text = text.strip() else:
if text and text[0] == "~": return BodyParagraph(parse_paired_formatting(text))
return SignatureParagraph(parse_paired_formatting(text[1:]))
else:
return BodyParagraph(parse_paired_formatting(text))
def parse_paired_formatting( def parse_paired_formatting(
text: str, text: str,
in_cite: bool = False, cite: bool = True,
in_bold: bool = False, bold: bool = True,
in_italic: bool = False, italic: bool = True) -> Spans:
) -> Spans: # Find positions of any paired formatting
""" first_cite = find_pair(text, "[[", "]]", cite)
Parses citations, bolds, and italics, which can be nested inside each other. first_bold = find_pair(text, "**", "**", bold)
A single type cannot nest inside itself, which is controlled by setting the first_italic = find_pair(text, "//", "//", italic)
flag parameters to False. # Load the possible parse handlers into the map
""" handlers = {}
# Find positions of any paired formatting handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
next_cite = find_pair(text, "[[", "]]") if not in_cite else -1 handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
next_bold = find_pair(text, "**", "**") if not in_bold else -1 handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
next_italic = find_pair(text, "//", "//") if not in_italic else -1 # If nothing was found, move on to the next parsing step
# Create a map from a formatting mark's distance to its parse handler handlers[-1] = lambda: parse_breaks(text)
handlers = {} # Choose a handler based on the earliest found result
handlers[next_cite] = lambda: parse_citation( finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
text, in_bold=in_bold, in_italic=in_italic first = min(finds) if finds else -1
) return handlers[first]()
handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
# Map the next parsing step at -1. If we're currently inside a formatting
# mark pair, skip parsing line breaks, which are not allowed inside paired
# marks.
if in_cite or in_bold or in_italic:
handlers[-1] = lambda: parse_text(text)
else:
handlers[-1] = lambda: parse_breaks(text)
# Choose the handler for the earliest found pair, or the default handler
# at -1 if nothing was found.
finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
first = min(finds) if finds else -1
return handlers[first]()
def find_pair(text: str, open_tag: str, close_tag: str) -> int: def find_pair(
""" text: str,
Finds the beginning of a pair of formatting marks. open_tag: str,
""" close_tag: str,
# If the open tag wasn't found, return -1 valid: bool) -> int:
first = text.find(open_tag) # If skipping, return -1
if first < 0: if not valid:
return -1 return -1
# If the close tag wasn't found after the open tag, return -1 # If the open tag wasn't found, return -1
second = text.find(close_tag, first + len(open_tag)) first = text.find(open_tag)
if second < 0: if first < 0:
return -1 return -1
# Otherwise, the pair exists # If the close tag wasn't found after the open tag, return -1
return first second = text.find(close_tag, first + len(open_tag))
if second < 0:
return -1
# Otherwise, the pair exists
return first
def parse_citation( def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
text: str, cite_open = text.find("[[")
in_bold: bool = False, if cite_open > -1:
in_italic: bool = False, cite_close = text.find("]]", cite_open + 2)
) -> Spans: # Since we searched for pairs from the beginning, there should be no
""" # undetected pair formatting before this one, so move to the next
Parses text into a citation span. # level of parsing
""" spans_before = parse_breaks(text[:cite_open])
cite_open = text.find("[[") # Continue parsing pair formatting after this one closes with all
if cite_open > -1: # three as valid choices
cite_close = text.find("]]", cite_open + 2) spans_after = parse_paired_formatting(text[cite_close + 2:])
# Since we searched for pairs from the beginning, there should be no # Parse inner text and skip parsing for this format pair
# undetected pair formatting before this one, so move to the next text_inner = text[cite_open + 2:cite_close]
# level of parsing # For citations specifically, we may need to split off a citation
spans_before = parse_breaks(text[:cite_open]) # target from the alias text
# Continue parsing pair formatting after this one closes with all inner_split = text_inner.split("|", 1)
# three as valid choices text_inner_actual, cite_target = inner_split[0], inner_split[-1]
spans_after = parse_paired_formatting(text[cite_close + 2 :]) spans_inner = parse_paired_formatting(text_inner_actual,
# Parse inner text and skip parsing for this format pair cite=False, bold=bold, italic=italic)
text_inner = text[cite_open + 2 : cite_close] citation = CitationSpan(spans_inner, cite_target)
# For citations specifically, try to split off a citation target. return [*spans_before, citation, *spans_after]
# If there's no citation target to split, use the same text as the # Should never happen
# citation text and the target. return parse_breaks(text)
inner_split = text_inner.split("|", 1)
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
spans_inner = parse_paired_formatting(
text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
)
citation = CitationSpan(spans_inner, cite_target)
return [*spans_before, citation, *spans_after]
# Should never happen
return parse_breaks(text)
def parse_bold( def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
text: str, bold_open = text.find("**")
in_cite: bool = False, if bold_open > -1:
in_italic: bool = False, bold_close = text.find("**", bold_open + 2)
) -> Spans: # Should be no formatting behind us
""" spans_before = parse_breaks(text[:bold_open])
Parses text into a bold span. # Freely parse formatting after us
""" spans_after = parse_paired_formatting(text[bold_close + 2:])
bold_open = text.find("**") # Parse inner text minus bold parsing
if bold_open > -1: text_inner = text[bold_open + 2:bold_close]
bold_close = text.find("**", bold_open + 2) spans_inner = parse_paired_formatting(text_inner,
# Should be no formatting behind us cite=cite, bold=False, italic=italic)
spans_before = parse_breaks(text[:bold_open]) bold = BoldSpan(spans_inner)
# Freely parse formatting after us return [*spans_before, bold, *spans_after]
spans_after = parse_paired_formatting(text[bold_close + 2 :]) # Should never happen
# Parse inner text minus bold parsing return parse_italic(text)
text_inner = text[bold_open + 2 : bold_close]
spans_inner = parse_paired_formatting(
text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
)
bold = BoldSpan(spans_inner)
return [*spans_before, bold, *spans_after]
# Should never happen
return parse_italic(text)
def parse_italic( def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
text: str, italic_open = text.find("//")
in_cite: bool = False, if italic_open > -1:
in_bold: bool = False, italic_close = text.find("//", italic_open + 2)
) -> Spans: # Should be no formatting behind us
""" spans_before = parse_breaks(text[:italic_open])
Parses text into an italic span. # Freely parse formatting after us
""" spans_after = parse_paired_formatting(text[italic_close + 2:])
italic_open = text.find("//") # Parse inner text minus italic parsing
if italic_open > -1: text_inner = text[italic_open + 2:italic_close]
italic_close = text.find("//", italic_open + 2) spans_inner = parse_paired_formatting(text_inner,
# Should be no formatting behind us cite=cite, bold=bold, italic=False)
spans_before = parse_breaks(text[:italic_open]) italic = ItalicSpan(spans_inner)
# Freely parse formatting after us return [*spans_before, italic, *spans_after]
spans_after = parse_paired_formatting(text[italic_close + 2 :]) # Should never happen
# Parse inner text minus italic parsing return parse_breaks(text)
text_inner = text[italic_open + 2 : italic_close]
spans_inner = parse_paired_formatting(
text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
)
italic = ItalicSpan(spans_inner)
return [*spans_before, italic, *spans_after]
# Should never happen
return parse_breaks(text)
def parse_breaks(text: str) -> Spans: def parse_breaks(text: str) -> Spans:
""" if not text:
Parses intra-paragraph line breaks. return []
""" splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
# Parse empty text into nothing spans: Spans = [
if not text: splits[i // 2] if i % 2 == 0 else LineBreak()
return [] for i in range(0, 2 * len(splits) - 1)
# Split on the line break mark appearing at the end of the line ]
splits: Spans = list(map(TextSpan, text.split("\\\\\n"))) return spans
# Put a LineBreak between each TextSpan
spans: Spans = [
splits[i // 2] if i % 2 == 0 else LineBreak()
for i in range(0, 2 * len(splits) - 1)
]
return spans
def parse_text(text: str) -> Spans:
"""
Parses text with no remaining parseable marks.
"""
if not text:
return []
return [TextSpan(text)]

104
amanuensis/parser/render.py Normal file
View File

@ -0,0 +1,104 @@
"""
Internal module encapsulating visitors that render articles into
readable formats.
"""
from typing import Iterable
from .core import RenderableVisitor
from .helpers import filesafe_title
class HtmlRenderer(RenderableVisitor):
"""
Renders an article token tree into published article HTML.
"""
def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
self.lexicon_name: str = lexicon_name
self.written_articles: Iterable[str] = written_articles
def TextSpan(self, span):
return span.innertext
def LineBreak(self, span):
return '<br>'
def ParsedArticle(self, span):
return '\n'.join(span.recurse(self))
def BodyParagraph(self, span):
return f'<p>{"".join(span.recurse(self))}</p>'
def SignatureParagraph(self, span):
return (
'<hr><span class="signature"><p>'
f'{"".join(span.recurse(self))}'
'</p></span>'
)
def BoldSpan(self, span):
return f'<b>{"".join(span.recurse(self))}</b>'
def ItalicSpan(self, span):
return f'<i>{"".join(span.recurse(self))}</i>'
def CitationSpan(self, span):
if span.cite_target in self.written_articles:
link_class = ''
else:
link_class = ' class="phantom"'
# link = url_for(
# 'lexicon.article',
# name=self.lexicon_name,
# title=filesafe_title(span.cite_target))
link = (f'/lexicon/{self.lexicon_name}'
+ f'/article/{filesafe_title(span.cite_target)}')
return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
class PreviewHtmlRenderer(RenderableVisitor):
def __init__(self, lexicon):
with lexicon.ctx.read('info') as info:
self.article_map = {
title: article.character
for title, article in info.items()
}
self.citations = []
self.contents = ""
def TextSpan(self, span):
return span.innertext
def LineBreak(self, span):
return '<br>'
def ParsedArticle(self, span):
self.contents = '\n'.join(span.recurse(self))
return self
def BodyParagraph(self, span):
return f'<p>{"".join(span.recurse(self))}</p>'
def SignatureParagraph(self, span):
return (
'<hr><span class="signature"><p>'
f'{"".join(span.recurse(self))}'
'</p></span>'
)
def BoldSpan(self, span):
return f'<b>{"".join(span.recurse(self))}</b>'
def ItalicSpan(self, span):
return f'<i>{"".join(span.recurse(self))}</i>'
def CitationSpan(self, span):
if span.cite_target in self.article_map:
if self.article_map.get(span.cite_target):
link_class = '[extant]'
else:
link_class = '[phantom]'
else:
link_class = '[new]'
self.citations.append(f'{span.cite_target} {link_class}')
return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'

View File

@ -15,7 +15,9 @@ from amanuensis.lexicon import (
create_character_in_lexicon, create_character_in_lexicon,
get_draft) get_draft)
from amanuensis.models import LexiconModel from amanuensis.models import LexiconModel
from amanuensis.parser import parse_raw_markdown from amanuensis.parser import (
parse_raw_markdown,
PreviewHtmlRenderer)
from amanuensis.server.helpers import ( from amanuensis.server.helpers import (
lexicon_param, lexicon_param,
player_required, player_required,
@ -27,7 +29,7 @@ from .forms import (
LexiconPublishTurnForm, LexiconPublishTurnForm,
LexiconConfigForm) LexiconConfigForm)
from .editor import load_editor, new_draft, update_draft, PreviewHtmlRenderer from .editor import load_editor, new_draft, update_draft
bp_session = Blueprint('session', __name__, bp_session = Blueprint('session', __name__,

View File

@ -17,56 +17,8 @@ from amanuensis.lexicon import (
from amanuensis.models import LexiconModel from amanuensis.models import LexiconModel
from amanuensis.parser import ( from amanuensis.parser import (
normalize_title, normalize_title,
parse_raw_markdown) parse_raw_markdown,
from amanuensis.parser.core import RenderableVisitor PreviewHtmlRenderer)
class PreviewHtmlRenderer(RenderableVisitor):
def __init__(self, lexicon):
with lexicon.ctx.read('info') as info:
self.article_map = {
title: article.character
for title, article in info.items()
}
self.citations = []
self.contents = ""
def TextSpan(self, span):
return span.innertext
def LineBreak(self, span):
return '<br>'
def ParsedArticle(self, span):
self.contents = '\n'.join(span.recurse(self))
return self
def BodyParagraph(self, span):
return f'<p>{"".join(span.recurse(self))}</p>'
def SignatureParagraph(self, span):
return (
'<hr><span class="signature"><p>'
f'{"".join(span.recurse(self))}'
'</p></span>'
)
def BoldSpan(self, span):
return f'<b>{"".join(span.recurse(self))}</b>'
def ItalicSpan(self, span):
return f'<i>{"".join(span.recurse(self))}</i>'
def CitationSpan(self, span):
if span.cite_target in self.article_map:
if self.article_map.get(span.cite_target):
link_class = '[extant]'
else:
link_class = '[phantom]'
else:
link_class = '[new]'
self.citations.append(f'{span.cite_target} {link_class}')
return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
def load_editor(lexicon: LexiconModel, aid: str): def load_editor(lexicon: LexiconModel, aid: str):

View File

@ -1,4 +1,4 @@
[mypy] [mypy]
ignore_missing_imports = true ignore_missing_imports = true
exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py" exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
; mypy stable doesn't support pyproject.toml yet ; mypy stable doesn't support pyproject.toml yet

View File

@ -17,11 +17,11 @@ black = "^21.5b2"
mypy = "^0.812" mypy = "^0.812"
[tool.black] [tool.black]
extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py" extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
[tool.mypy] [tool.mypy]
ignore_missing_imports = true ignore_missing_imports = true
exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py" exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
[tool.pytest.ini_options] [tool.pytest.ini_options]
addopts = "--show-capture=log" addopts = "--show-capture=log"

View File

@ -1,463 +0,0 @@
from typing import Sequence
from amanuensis.parser.core import (
TextSpan,
LineBreak,
ParsedArticle,
BodyParagraph,
SignatureParagraph,
BoldSpan,
ItalicSpan,
CitationSpan,
Renderable,
SpanContainer,
RenderableVisitor,
Spans,
)
from amanuensis.parser.helpers import normalize_title, filesafe_title, titlesort
from amanuensis.parser.parsing import (
parse_breaks,
parse_paired_formatting,
parse_paragraph,
parse_raw_markdown,
)
def assert_types(spans: Spans, types: Sequence, loc=None):
"""
Asserts that a span list has the types specified.
Each element in `types` should be either a span type or a list. The first
element of the list is the container type and the remaining elements are the
content types.
"""
for i in range(max(len(spans), len(types))):
i_loc = f"{loc}.{i}" if loc else f"{i}"
# Check lengths are equal
assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
assert i < len(types), f"Type list unexpectedly short at {i_loc}"
# Check types are equal
span, span_type = spans[i], types[i]
if isinstance(span_type, list):
assert isinstance(
span, SpanContainer
), f"Expected a span container at loc {i_loc}"
assert (
len(span.spans) == len(span_type) - 1
), f"Unexpected container size at loc {i_loc}"
assert isinstance(
span, span_type[0]
), f"Unexpected container type at loc {i_loc}"
assert_types(span.spans, span_type[1:], loc=i_loc)
else:
assert isinstance(span, Renderable), f"Expected a span at loc {i_loc}"
assert isinstance(span, span_type), f"Unexpected span type at loc {i_loc}"
def assert_text(spans: Spans, texts: Sequence, loc=None):
"""
Asserts that a span list has the inner text structure specified.
Each element in `texts` should be either a string or a list of the same.
"""
assert len(spans) == len(
texts
), f"Unexpected text sequence length at loc {loc if loc else 'root'}"
i = -1
for span, text in zip(spans, texts):
i += 1
i_loc = f"{loc}.{i}" if loc else f"{i}"
if isinstance(text, str):
assert isinstance(span, TextSpan), f"Expected a text span at loc {i_loc}"
assert span.innertext == text, f"Unexpected text at loc {i_loc}"
elif isinstance(text, list):
assert isinstance(
span, SpanContainer
), f"Expected a span container at loc {i_loc}"
assert_text(span.spans, text, loc=i_loc)
else:
assert isinstance(span, LineBreak), f"Expected a line break at loc {i_loc}"
def test_parse_breaks():
"""Test parsing for intra-pragraph line break"""
text: str
spans: Spans
# Only having a line break does nothing
text = "One\nTwo"
spans: Spans = parse_breaks(text)
assert_types(spans, [TextSpan])
assert_text(spans, [text])
# Having the mark causes the text to be split across it
text = r"One\\" + "\nTwo"
spans: Spans = parse_breaks(text)
assert_types(spans, [TextSpan, LineBreak, TextSpan])
assert_text(spans, ["One", None, "Two"])
# Multiple lines can be broken
text = r"One\\" + "\n" + r"Two\\" + "\nThree"
spans: Spans = parse_breaks(text)
assert_types(spans, [TextSpan, LineBreak, TextSpan, LineBreak, TextSpan])
assert_text(spans, ["One", None, "Two", None, "Three"])
# The mark must be at the end of the line
text = r"One\\ " + "\nTwo"
spans: Spans = parse_breaks(text)
assert_types(spans, (TextSpan,))
assert_text(spans, [text])
def test_parse_pairs_single():
"""Test parsing for bold and italic marks"""
text: str
spans: Spans
# Empty pair marks should parse
text = "****"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan]])
text = "////"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan]])
# Pair marks with text inside should parse
text = "**hello**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan]])
assert_text(spans, [["hello"]])
text = "//hello//"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, TextSpan]])
assert_text(spans, [["hello"]])
# Text outside of pair marks should parse on the same level
text = "**hello** world"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["hello"], " world"])
text = "//hello// world"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
assert_text(spans, [["hello"], " world"])
# Text before, between, and after pair marks should parse
text = "In the **beginning** was //the// Word"
spans = parse_paired_formatting(text)
assert_types(
spans,
[TextSpan, [BoldSpan, TextSpan], TextSpan, [ItalicSpan, TextSpan], TextSpan],
)
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
def test_parse_pairs_break():
"""Test pair marks with breaks"""
text: str
spans: Spans
text = r"**glory\\" + "\nhammer**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan]])
assert_text(spans, [["glory\\\\\nhammer"]])
text = r"//glory\\" + "\nhammer//"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, TextSpan]])
assert_text(spans, [["glory\\\\\nhammer"]])
text = r"**glory\\" + "\n**hammer**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["glory\\\\\n"], "hammer**"])
text = r"//glory\\" + "\n//hammer//"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
assert_text(spans, [["glory\\\\\n"], "hammer//"])
def test_parse_pairs_nested():
"""Test parsing for nesting bold and italic"""
text: str
spans: Spans
# Simple nested test cases
text = "**//hello//**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, [ItalicSpan, TextSpan]]])
assert_text(spans, [[["hello"]]])
text = "//**world**//"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, [BoldSpan, TextSpan]]])
assert_text(spans, [[["world"]]])
# Overlap should only parse the first
text = "**Hello//world**//"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["Hello//world"], "//"])
def test_normalize_title():
"""Test the title normalization used by the citation parser"""
nt = normalize_title
assert nt("hello") == "Hello"
assert nt(" world ") == "World"
assert nt("Waiting for Godot") == "Waiting for Godot"
assert nt("lowercase letters") == "Lowercase letters"
def test_parse_citation_single():
"""Test parsing citations, which have internal formatting"""
text: str
spans: Spans
# Simple test cases
text = "[[hello]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Hello"
text = "[[hello|world]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "World"
text = "[[hello||world]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "|world"
text = "[[ hello | world ]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [[" hello "]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "World"
text = "[[faith|hope|love]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["faith"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Hope|love"
text = "[[ [[|]] ]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
assert_text(spans, [[" [["], " ]]"])
citation: CitationSpan = spans[0]
assert citation.cite_target == ""
def test_parse_citation_break():
"""Test citations with breaks"""
text: str
spans: Spans
text = "[[hello\\\\\nworld]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello\\\\\nworld"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Hello\\\\ world"
text = "[[one|two\\\\\nthree]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["one"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Two\\\\ three"
def test_parse_citation_nested():
"""Test nesting with citations"""
text: str
spans: Spans
text = "[[**hello world**]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, [BoldSpan, TextSpan]]])
assert_text(spans, [[["hello world"]]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "**hello world**"
text = "[[**hello|world**]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["**hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "World**"
text = "**[[hello world]]**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, [CitationSpan, TextSpan]]])
assert_text(spans, [[["hello world"]]])
citation: CitationSpan = spans[0].spans[0]
assert citation.cite_target == "Hello world"
text = "**[[hello world**]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["[[hello world"], "]]"])
text = "[[**hello world]]**"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
assert_text(spans, [["**hello world"], "**"])
citation: CitationSpan = spans[0]
assert citation.cite_target == "**hello world"
def test_parse_paragraphs():
"""Test parsing paragraphs"""
para: str
span: SpanContainer
# Body paragraph
para = "\tIn the beginning was the Word."
span = parse_paragraph(para)
assert_types([span], [[BodyParagraph, TextSpan]])
assert_text([span], [["In the beginning was the Word."]])
# Signature paragraph
para = "~Ersatz Scrivener, scholar extraordinaire"
span = parse_paragraph(para)
assert_types([span], [[SignatureParagraph, TextSpan]])
assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
def test_parse_article():
"""Test the full article parser"""
article: str = (
"Writing a **unit test** requires having test //content//.\n\n"
"This content, of course, must be [[created|Writing test collateral]].\n\n"
"~Bucky\\\\\nUnit test writer"
)
parsed: ParsedArticle = parse_raw_markdown(article)
assert_types(
[parsed],
[
[
ParsedArticle,
[
BodyParagraph,
TextSpan,
[BoldSpan, TextSpan],
TextSpan,
[ItalicSpan, TextSpan],
TextSpan,
],
[BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
[SignatureParagraph, TextSpan, LineBreak, TextSpan],
]
],
)
assert_text(
[parsed],
[
[
[
"Writing a ",
["unit test"],
" requires having test ",
["content"],
".",
],
["This content, of course, must be ", ["created"], "."],
["Bucky", None, "Unit test writer"],
]
],
)
def test_visitor():
"""Test that a visitor dispatches to hooks correctly"""
class TestVisitor(RenderableVisitor):
def __init__(self):
self.visited = []
def TextSpan(self, span: TextSpan):
assert isinstance(span, TextSpan)
self.visited.append(span)
def LineBreak(self, span: LineBreak):
assert isinstance(span, LineBreak)
self.visited.append(span)
def ParsedArticle(self, span: ParsedArticle):
assert isinstance(span, ParsedArticle)
self.visited.append(span)
span.recurse(self)
def BodyParagraph(self, span: BodyParagraph):
assert isinstance(span, BodyParagraph)
self.visited.append(span)
span.recurse(self)
def SignatureParagraph(self, span: SignatureParagraph):
assert isinstance(span, SignatureParagraph)
self.visited.append(span)
span.recurse(self)
def BoldSpan(self, span: BoldSpan):
assert isinstance(span, BoldSpan)
self.visited.append(span)
span.recurse(self)
def ItalicSpan(self, span: ItalicSpan):
assert isinstance(span, ItalicSpan)
self.visited.append(span)
span.recurse(self)
def CitationSpan(self, span: CitationSpan):
assert isinstance(span, CitationSpan)
self.visited.append(span)
span.recurse(self)
article: str = (
"Writing a **unit test** requires having test //content//.\n\n"
"This content, of course, must be [[created|Writing test collateral]].\n\n"
"~Bucky\\\\\nUnit test writer"
)
parsed: ParsedArticle = parse_raw_markdown(article)
visitor = TestVisitor()
# All the typecheck asserts pass
parsed.render(visitor)
# The test article should parse into these spans and visit in this (arbitrary) order
type_order = [
ParsedArticle,
BodyParagraph,
TextSpan,
BoldSpan,
TextSpan,
TextSpan,
ItalicSpan,
TextSpan,
TextSpan,
BodyParagraph,
TextSpan,
CitationSpan,
TextSpan,
TextSpan,
SignatureParagraph,
TextSpan,
LineBreak,
TextSpan,
]
assert len(visitor.visited) == len(type_order)
for span, type in zip(visitor.visited, type_order):
assert isinstance(span, type)