Compare commits
No commits in common. "00738b5a45b358d7cad4833ba8a5df17a467ac15" and "06ec2e168e9fc21cc2879e4cac6e4a1290255064" have entirely different histories.
00738b5a45
...
06ec2e168e
|
@ -9,94 +9,11 @@ from amanuensis.config import ReadOnlyOrderedDict
|
||||||
from amanuensis.models import LexiconModel, UserModel
|
from amanuensis.models import LexiconModel, UserModel
|
||||||
from amanuensis.parser import (
|
from amanuensis.parser import (
|
||||||
parse_raw_markdown,
|
parse_raw_markdown,
|
||||||
|
GetCitations,
|
||||||
|
HtmlRenderer,
|
||||||
titlesort,
|
titlesort,
|
||||||
filesafe_title)
|
filesafe_title,
|
||||||
from amanuensis.parser.core import RenderableVisitor
|
ConstraintAnalysis)
|
||||||
|
|
||||||
|
|
||||||
class GetCitations(RenderableVisitor):
|
|
||||||
def __init__(self):
|
|
||||||
self.citations = []
|
|
||||||
|
|
||||||
def ParsedArticle(self, span):
|
|
||||||
span.recurse(self)
|
|
||||||
return self.citations
|
|
||||||
|
|
||||||
def CitationSpan(self, span):
|
|
||||||
self.citations.append(span.cite_target)
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
class ConstraintAnalysis(RenderableVisitor):
|
|
||||||
def __init__(self, lexicon: LexiconModel):
|
|
||||||
self.info: List[str] = []
|
|
||||||
self.warning: List[str] = []
|
|
||||||
self.error: List[str] = []
|
|
||||||
|
|
||||||
self.word_count: int = 0
|
|
||||||
self.citations: list = []
|
|
||||||
self.signatures: int = 0
|
|
||||||
|
|
||||||
def TextSpan(self, span):
|
|
||||||
self.word_count += len(re.split(r'\s+', span.innertext.strip()))
|
|
||||||
return self
|
|
||||||
|
|
||||||
def SignatureParagraph(self, span):
|
|
||||||
self.signatures += 1
|
|
||||||
span.recurse(self)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def CitationSpan(self, span):
|
|
||||||
self.citations.append(span.cite_target)
|
|
||||||
span.recurse(self)
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
class HtmlRenderer(RenderableVisitor):
|
|
||||||
"""
|
|
||||||
Renders an article token tree into published article HTML.
|
|
||||||
"""
|
|
||||||
def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
|
|
||||||
self.lexicon_name: str = lexicon_name
|
|
||||||
self.written_articles: Iterable[str] = written_articles
|
|
||||||
|
|
||||||
def TextSpan(self, span):
|
|
||||||
return span.innertext
|
|
||||||
|
|
||||||
def LineBreak(self, span):
|
|
||||||
return '<br>'
|
|
||||||
|
|
||||||
def ParsedArticle(self, span):
|
|
||||||
return '\n'.join(span.recurse(self))
|
|
||||||
|
|
||||||
def BodyParagraph(self, span):
|
|
||||||
return f'<p>{"".join(span.recurse(self))}</p>'
|
|
||||||
|
|
||||||
def SignatureParagraph(self, span):
|
|
||||||
return (
|
|
||||||
'<hr><span class="signature"><p>'
|
|
||||||
f'{"".join(span.recurse(self))}'
|
|
||||||
'</p></span>'
|
|
||||||
)
|
|
||||||
|
|
||||||
def BoldSpan(self, span):
|
|
||||||
return f'<b>{"".join(span.recurse(self))}</b>'
|
|
||||||
|
|
||||||
def ItalicSpan(self, span):
|
|
||||||
return f'<i>{"".join(span.recurse(self))}</i>'
|
|
||||||
|
|
||||||
def CitationSpan(self, span):
|
|
||||||
if span.cite_target in self.written_articles:
|
|
||||||
link_class = ''
|
|
||||||
else:
|
|
||||||
link_class = ' class="phantom"'
|
|
||||||
# link = url_for(
|
|
||||||
# 'lexicon.article',
|
|
||||||
# name=self.lexicon_name,
|
|
||||||
# title=filesafe_title(span.cite_target))
|
|
||||||
link = (f'/lexicon/{self.lexicon_name}'
|
|
||||||
+ f'/article/{filesafe_title(span.cite_target)}')
|
|
||||||
return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
|
|
||||||
|
|
||||||
|
|
||||||
def get_player_characters(
|
def get_player_characters(
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
# from amanuensis.config.loader import AttrOrderedDict
|
# from amanuensis.config.loader import AttrOrderedDict
|
||||||
# from amanuensis.errors import ArgumentError
|
# from amanuensis.errors import ArgumentError
|
||||||
# from amanuensis.lexicon import LexiconModel
|
# from amanuensis.lexicon import LexiconModel
|
||||||
# from amanuensis.parser import parse_raw_markdown, filesafe_title, titlesort
|
# from amanuensis.parser import parse_raw_markdown, GetCitations, HtmlRenderer, filesafe_title, titlesort
|
||||||
# from amanuensis.resources import get_stream
|
# from amanuensis.resources import get_stream
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,14 +2,19 @@
|
||||||
Module encapsulating all markdown parsing functionality.
|
Module encapsulating all markdown parsing functionality.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .core import RenderableVisitor
|
from .analyze import ConstraintAnalysis, GetCitations
|
||||||
from .helpers import normalize_title, filesafe_title, titlesort
|
from .core import normalize_title
|
||||||
|
from .helpers import titlesort, filesafe_title
|
||||||
from .parsing import parse_raw_markdown
|
from .parsing import parse_raw_markdown
|
||||||
|
from .render import PreviewHtmlRenderer, HtmlRenderer
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"RenderableVisitor",
|
ConstraintAnalysis.__name__,
|
||||||
"normalize_title",
|
GetCitations.__name__,
|
||||||
"filesafe_title",
|
normalize_title.__name__,
|
||||||
"titlesort",
|
titlesort.__name__,
|
||||||
"parse_raw_markdown",
|
filesafe_title.__name__,
|
||||||
|
parse_raw_markdown.__name__,
|
||||||
|
PreviewHtmlRenderer.__name__,
|
||||||
|
HtmlRenderer.__name__,
|
||||||
]
|
]
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
"""
|
||||||
|
Internal module encapsulating visitors that compute metrics on articles
|
||||||
|
for verification against constraints.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from amanuensis.models import LexiconModel
|
||||||
|
|
||||||
|
from .core import RenderableVisitor
|
||||||
|
|
||||||
|
|
||||||
|
class GetCitations(RenderableVisitor):
|
||||||
|
def __init__(self):
|
||||||
|
self.citations = []
|
||||||
|
|
||||||
|
def ParsedArticle(self, span):
|
||||||
|
span.recurse(self)
|
||||||
|
return self.citations
|
||||||
|
|
||||||
|
def CitationSpan(self, span):
|
||||||
|
self.citations.append(span.cite_target)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class ConstraintAnalysis(RenderableVisitor):
|
||||||
|
def __init__(self, lexicon: LexiconModel):
|
||||||
|
self.info: List[str] = []
|
||||||
|
self.warning: List[str] = []
|
||||||
|
self.error: List[str] = []
|
||||||
|
|
||||||
|
self.word_count: int = 0
|
||||||
|
self.citations: list = []
|
||||||
|
self.signatures: int = 0
|
||||||
|
|
||||||
|
def TextSpan(self, span):
|
||||||
|
self.word_count += len(re.split(r'\s+', span.innertext.strip()))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def SignatureParagraph(self, span):
|
||||||
|
self.signatures += 1
|
||||||
|
span.recurse(self)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def CitationSpan(self, span):
|
||||||
|
self.citations.append(span.cite_target)
|
||||||
|
span.recurse(self)
|
||||||
|
return self
|
|
@ -5,134 +5,131 @@ which can be operated on by a visitor defining functions that hook off
|
||||||
of the different token types.
|
of the different token types.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
from typing import Callable, Any, Sequence
|
from typing import Callable, Any, Sequence
|
||||||
|
|
||||||
from .helpers import normalize_title
|
RenderHook = Callable[['Renderable'], Any]
|
||||||
|
Spans = Sequence['Renderable']
|
||||||
|
|
||||||
|
|
||||||
RenderHook = Callable[["Renderable"], Any]
|
def normalize_title(title: str) -> str:
|
||||||
Spans = Sequence["Renderable"]
|
"""
|
||||||
|
Normalizes strings as titles:
|
||||||
|
- Strips leading and trailing whitespace
|
||||||
|
- Merges internal whitespace into a single space
|
||||||
|
- Capitalizes the first word
|
||||||
|
"""
|
||||||
|
cleaned = re.sub(r'\s+', " ", title.strip())
|
||||||
|
return cleaned[:1].capitalize() + cleaned[1:]
|
||||||
|
|
||||||
|
|
||||||
class Renderable:
|
class Renderable():
|
||||||
"""
|
"""
|
||||||
Base class for parsed markdown. Provides the `render()` method for
|
Base class for parsed markdown. Provides the `render()` method for
|
||||||
visiting the token tree.
|
visiting the token tree.
|
||||||
"""
|
"""
|
||||||
|
def render(self: 'Renderable', renderer: 'RenderableVisitor'):
|
||||||
def render(self: "Renderable", renderer: "RenderableVisitor"):
|
"""
|
||||||
"""
|
Execute the apppropriate visitor method on this Renderable.
|
||||||
Execute the apppropriate visitor method on this Renderable.
|
"""
|
||||||
Visitors implement hooks by declaring methods whose names are
|
hook: RenderHook = getattr(renderer, type(self).__name__, None)
|
||||||
the name of a Renderable class.
|
if hook:
|
||||||
"""
|
return hook(self)
|
||||||
hook: RenderHook = getattr(renderer, type(self).__name__, None)
|
return None
|
||||||
if hook:
|
|
||||||
return hook(self)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class TextSpan(Renderable):
|
class TextSpan(Renderable):
|
||||||
"""A length of text."""
|
"""An unstyled length of text."""
|
||||||
|
def __init__(self, innertext: str):
|
||||||
|
self.innertext = innertext
|
||||||
|
|
||||||
def __init__(self, innertext: str):
|
def __str__(self):
|
||||||
self.innertext = innertext
|
return f"[{self.innertext}]"
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"<{self.innertext}>"
|
|
||||||
|
|
||||||
|
|
||||||
class LineBreak(Renderable):
|
class LineBreak(Renderable):
|
||||||
"""A line break within a paragraph."""
|
"""A line break within a paragraph."""
|
||||||
|
def __str__(self):
|
||||||
def __repr__(self):
|
return "<break>"
|
||||||
return "<break>"
|
|
||||||
|
|
||||||
|
|
||||||
class SpanContainer(Renderable):
|
class SpanContainer(Renderable):
|
||||||
"""A formatting element that wraps some amount of text."""
|
"""A formatting element that wraps some amount of text."""
|
||||||
|
def __init__(self, spans: Spans):
|
||||||
|
self.spans: Spans = spans
|
||||||
|
|
||||||
def __init__(self, spans: Spans):
|
def __str__(self):
|
||||||
self.spans: Spans = spans
|
return (f'[{type(self).__name__} '
|
||||||
|
+ f'{" ".join([str(span) for span in self.spans])}]')
|
||||||
|
|
||||||
def __repr__(self):
|
def recurse(self, renderer: 'RenderableVisitor'):
|
||||||
return (
|
return [child.render(renderer) for child in self.spans]
|
||||||
f"<{type(self).__name__} "
|
|
||||||
+ f'{" ".join([repr(span) for span in self.spans])}>'
|
|
||||||
)
|
|
||||||
|
|
||||||
def recurse(self, renderer: "RenderableVisitor"):
|
|
||||||
return [child.render(renderer) for child in self.spans]
|
|
||||||
|
|
||||||
|
|
||||||
class ParsedArticle(SpanContainer):
|
class ParsedArticle(SpanContainer):
|
||||||
"""Token tree root node, containing some number of paragraph tokens."""
|
"""Token tree root node, containing some number of paragraph tokens."""
|
||||||
|
|
||||||
|
|
||||||
class BodyParagraph(SpanContainer):
|
class BodyParagraph(SpanContainer):
|
||||||
"""A normal paragraph."""
|
"""A normal paragraph."""
|
||||||
|
|
||||||
|
|
||||||
class SignatureParagraph(SpanContainer):
|
class SignatureParagraph(SpanContainer):
|
||||||
"""A paragraph preceded by a signature mark."""
|
"""A paragraph preceded by a signature mark."""
|
||||||
|
|
||||||
|
|
||||||
class BoldSpan(SpanContainer):
|
class BoldSpan(SpanContainer):
|
||||||
"""A span of text inside bold marks."""
|
"""A span of text inside bold marks."""
|
||||||
|
|
||||||
|
|
||||||
class ItalicSpan(SpanContainer):
|
class ItalicSpan(SpanContainer):
|
||||||
"""A span of text inside italic marks."""
|
"""A span of text inside italic marks."""
|
||||||
|
|
||||||
|
|
||||||
class CitationSpan(SpanContainer):
|
class CitationSpan(SpanContainer):
|
||||||
"""A citation to another article."""
|
"""A citation to another article."""
|
||||||
|
def __init__(self, spans: Spans, cite_target: str):
|
||||||
|
super().__init__(spans)
|
||||||
|
# Normalize citation target on parse, since we don't want
|
||||||
|
# abnormal title strings lying around causing trouble.
|
||||||
|
self.cite_target: str = normalize_title(cite_target)
|
||||||
|
|
||||||
def __init__(self, spans: Spans, cite_target: str):
|
def __str__(self):
|
||||||
super().__init__(spans)
|
return (f'{{{" ".join([str(span) for span in self.spans])}'
|
||||||
# Normalize citation target on parse, since we don't want
|
+ f':{self.cite_target}}}')
|
||||||
# abnormal title strings lying around causing trouble.
|
|
||||||
self.cite_target: str = normalize_title(cite_target)
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
return (
|
|
||||||
f'{{{" ".join([repr(span) for span in self.spans])}'
|
|
||||||
+ f":{self.cite_target}}}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class RenderableVisitor:
|
class RenderableVisitor():
|
||||||
"""
|
"""
|
||||||
Default implementation of the visitor pattern. Executes once on
|
Default implementation of the visitor pattern. Executes once on
|
||||||
each token in the tree and returns itself.
|
each token in the tree and returns itself.
|
||||||
"""
|
"""
|
||||||
|
def TextSpan(self, span: TextSpan):
|
||||||
|
return self
|
||||||
|
|
||||||
def TextSpan(self, span: TextSpan):
|
def LineBreak(self, span: LineBreak):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def LineBreak(self, span: LineBreak):
|
def ParsedArticle(self, span: ParsedArticle):
|
||||||
return self
|
span.recurse(self)
|
||||||
|
return self
|
||||||
|
|
||||||
def ParsedArticle(self, span: ParsedArticle):
|
def BodyParagraph(self, span: BodyParagraph):
|
||||||
span.recurse(self)
|
span.recurse(self)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def BodyParagraph(self, span: BodyParagraph):
|
def SignatureParagraph(self, span: SignatureParagraph):
|
||||||
span.recurse(self)
|
span.recurse(self)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def SignatureParagraph(self, span: SignatureParagraph):
|
def BoldSpan(self, span: BoldSpan):
|
||||||
span.recurse(self)
|
span.recurse(self)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def BoldSpan(self, span: BoldSpan):
|
def ItalicSpan(self, span: ItalicSpan):
|
||||||
span.recurse(self)
|
span.recurse(self)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def ItalicSpan(self, span: ItalicSpan):
|
def CitationSpan(self, span: CitationSpan):
|
||||||
span.recurse(self)
|
span.recurse(self)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def CitationSpan(self, span: CitationSpan):
|
|
||||||
span.recurse(self)
|
|
||||||
return self
|
|
||||||
|
|
|
@ -1,53 +1,28 @@
|
||||||
"""
|
|
||||||
Helper functions for manipulating titles during parsing
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
|
||||||
def normalize_title(title: str) -> str:
|
|
||||||
"""
|
|
||||||
Normalizes strings as titles:
|
|
||||||
- Strips leading and trailing whitespace
|
|
||||||
- Merges internal whitespace into a single space
|
|
||||||
- Capitalizes the first word
|
|
||||||
"""
|
|
||||||
cleaned = re.sub(r"\s+", " ", title.strip())
|
|
||||||
return cleaned[:1].capitalize() + cleaned[1:]
|
|
||||||
|
|
||||||
|
|
||||||
def titlesort(title: str) -> str:
|
def titlesort(title: str) -> str:
|
||||||
"""
|
"""
|
||||||
Strips articles off of titles for alphabetical sorting purposes
|
Strips articles off of titles for alphabetical sorting purposes
|
||||||
"""
|
"""
|
||||||
lower = title.lower()
|
lower = title.lower()
|
||||||
if lower.startswith("the "):
|
if lower.startswith("the "):
|
||||||
return lower[4:]
|
return lower[4:]
|
||||||
if lower.startswith("an "):
|
if lower.startswith("an "):
|
||||||
return lower[3:]
|
return lower[3:]
|
||||||
if lower.startswith("a "):
|
if lower.startswith("a "):
|
||||||
return lower[2:]
|
return lower[2:]
|
||||||
return lower
|
return lower
|
||||||
|
|
||||||
|
|
||||||
def filesafe_title(title: str) -> str:
|
def filesafe_title(title: str) -> str:
|
||||||
"""
|
"""
|
||||||
Makes an article title filename-safe.
|
Makes an article title filename-safe.
|
||||||
"""
|
"""
|
||||||
# Replace whitespace with _
|
s = re.sub(r"\s+", '_', title) # Replace whitespace with _
|
||||||
s = re.sub(r"\s+", "_", title)
|
s = re.sub(r"~", '-', s) # parse.quote doesn't catch ~
|
||||||
|
s = urllib.parse.quote(s) # Encode all other characters
|
||||||
# parse.quote doesn't catch ~
|
s = re.sub(r"%", "", s) # Strip encoding %s
|
||||||
s = re.sub(r"~", "-", s)
|
s = s[:64] # Limit to 64 characters
|
||||||
|
return s
|
||||||
# Encode all other characters
|
|
||||||
s = urllib.parse.quote(s)
|
|
||||||
|
|
||||||
# Strip encoding %s
|
|
||||||
s = re.sub(r"%", "", s)
|
|
||||||
|
|
||||||
# Limit to 64 characters
|
|
||||||
s = s[:64]
|
|
||||||
|
|
||||||
return s
|
|
||||||
|
|
|
@ -7,203 +7,150 @@ import re
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
|
|
||||||
from .core import (
|
from .core import (
|
||||||
TextSpan,
|
TextSpan,
|
||||||
LineBreak,
|
LineBreak,
|
||||||
ParsedArticle,
|
ParsedArticle,
|
||||||
BodyParagraph,
|
BodyParagraph,
|
||||||
SignatureParagraph,
|
SignatureParagraph,
|
||||||
BoldSpan,
|
BoldSpan,
|
||||||
ItalicSpan,
|
ItalicSpan,
|
||||||
CitationSpan,
|
CitationSpan,
|
||||||
Renderable,
|
Renderable,
|
||||||
SpanContainer,
|
SpanContainer
|
||||||
)
|
)
|
||||||
|
|
||||||
Spans = Sequence[Renderable]
|
Spans = Sequence[Renderable]
|
||||||
|
|
||||||
|
|
||||||
def parse_raw_markdown(text: str) -> ParsedArticle:
|
def parse_raw_markdown(text: str) -> ParsedArticle:
|
||||||
"""
|
"""
|
||||||
Parses a body of Lexipython markdown into a Renderable tree.
|
Parses a body of Lexipython markdown into a Renderable tree.
|
||||||
"""
|
"""
|
||||||
# Parse each paragraph individually, as no formatting applies
|
# Parse each paragraph individually, as no formatting applies
|
||||||
# across paragraphs
|
# across paragraphs
|
||||||
paragraphs = re.split(r"\n\n+", text)
|
paragraphs = re.split(r'\n\n+', text)
|
||||||
parse_results = list(map(parse_paragraph, paragraphs))
|
parse_results = list(map(parse_paragraph, paragraphs))
|
||||||
return ParsedArticle(parse_results)
|
return ParsedArticle(parse_results)
|
||||||
|
|
||||||
|
|
||||||
def parse_paragraph(text: str) -> SpanContainer:
|
def parse_paragraph(text: str) -> SpanContainer:
|
||||||
"""
|
# Parse the paragraph as a span of text
|
||||||
Parses a block of text into a paragraph object.
|
text = text.strip()
|
||||||
"""
|
if text and text[0] == '~':
|
||||||
# Parse the paragraph as a span of text
|
return SignatureParagraph(parse_paired_formatting(text[1:]))
|
||||||
text = text.strip()
|
else:
|
||||||
if text and text[0] == "~":
|
return BodyParagraph(parse_paired_formatting(text))
|
||||||
return SignatureParagraph(parse_paired_formatting(text[1:]))
|
|
||||||
else:
|
|
||||||
return BodyParagraph(parse_paired_formatting(text))
|
|
||||||
|
|
||||||
|
|
||||||
def parse_paired_formatting(
|
def parse_paired_formatting(
|
||||||
text: str,
|
text: str,
|
||||||
in_cite: bool = False,
|
cite: bool = True,
|
||||||
in_bold: bool = False,
|
bold: bool = True,
|
||||||
in_italic: bool = False,
|
italic: bool = True) -> Spans:
|
||||||
) -> Spans:
|
# Find positions of any paired formatting
|
||||||
"""
|
first_cite = find_pair(text, "[[", "]]", cite)
|
||||||
Parses citations, bolds, and italics, which can be nested inside each other.
|
first_bold = find_pair(text, "**", "**", bold)
|
||||||
A single type cannot nest inside itself, which is controlled by setting the
|
first_italic = find_pair(text, "//", "//", italic)
|
||||||
flag parameters to False.
|
# Load the possible parse handlers into the map
|
||||||
"""
|
handlers = {}
|
||||||
# Find positions of any paired formatting
|
handlers[first_cite] = lambda: parse_citation(text, bold=bold, italic=italic)
|
||||||
next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
|
handlers[first_bold] = lambda: parse_bold(text, cite=cite, italic=italic)
|
||||||
next_bold = find_pair(text, "**", "**") if not in_bold else -1
|
handlers[first_italic] = lambda: parse_italic(text, cite=cite, bold=bold)
|
||||||
next_italic = find_pair(text, "//", "//") if not in_italic else -1
|
# If nothing was found, move on to the next parsing step
|
||||||
# Create a map from a formatting mark's distance to its parse handler
|
handlers[-1] = lambda: parse_breaks(text)
|
||||||
handlers = {}
|
# Choose a handler based on the earliest found result
|
||||||
handlers[next_cite] = lambda: parse_citation(
|
finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
|
||||||
text, in_bold=in_bold, in_italic=in_italic
|
first = min(finds) if finds else -1
|
||||||
)
|
return handlers[first]()
|
||||||
handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
|
|
||||||
handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
|
|
||||||
# Map the next parsing step at -1. If we're currently inside a formatting
|
|
||||||
# mark pair, skip parsing line breaks, which are not allowed inside paired
|
|
||||||
# marks.
|
|
||||||
if in_cite or in_bold or in_italic:
|
|
||||||
handlers[-1] = lambda: parse_text(text)
|
|
||||||
else:
|
|
||||||
handlers[-1] = lambda: parse_breaks(text)
|
|
||||||
# Choose the handler for the earliest found pair, or the default handler
|
|
||||||
# at -1 if nothing was found.
|
|
||||||
finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
|
|
||||||
first = min(finds) if finds else -1
|
|
||||||
return handlers[first]()
|
|
||||||
|
|
||||||
|
|
||||||
def find_pair(text: str, open_tag: str, close_tag: str) -> int:
|
def find_pair(
|
||||||
"""
|
text: str,
|
||||||
Finds the beginning of a pair of formatting marks.
|
open_tag: str,
|
||||||
"""
|
close_tag: str,
|
||||||
# If the open tag wasn't found, return -1
|
valid: bool) -> int:
|
||||||
first = text.find(open_tag)
|
# If skipping, return -1
|
||||||
if first < 0:
|
if not valid:
|
||||||
return -1
|
return -1
|
||||||
# If the close tag wasn't found after the open tag, return -1
|
# If the open tag wasn't found, return -1
|
||||||
second = text.find(close_tag, first + len(open_tag))
|
first = text.find(open_tag)
|
||||||
if second < 0:
|
if first < 0:
|
||||||
return -1
|
return -1
|
||||||
# Otherwise, the pair exists
|
# If the close tag wasn't found after the open tag, return -1
|
||||||
return first
|
second = text.find(close_tag, first + len(open_tag))
|
||||||
|
if second < 0:
|
||||||
|
return -1
|
||||||
|
# Otherwise, the pair exists
|
||||||
|
return first
|
||||||
|
|
||||||
|
|
||||||
def parse_citation(
|
def parse_citation(text: str, bold: bool = True, italic: bool = True) -> Spans:
|
||||||
text: str,
|
cite_open = text.find("[[")
|
||||||
in_bold: bool = False,
|
if cite_open > -1:
|
||||||
in_italic: bool = False,
|
cite_close = text.find("]]", cite_open + 2)
|
||||||
) -> Spans:
|
# Since we searched for pairs from the beginning, there should be no
|
||||||
"""
|
# undetected pair formatting before this one, so move to the next
|
||||||
Parses text into a citation span.
|
# level of parsing
|
||||||
"""
|
spans_before = parse_breaks(text[:cite_open])
|
||||||
cite_open = text.find("[[")
|
# Continue parsing pair formatting after this one closes with all
|
||||||
if cite_open > -1:
|
# three as valid choices
|
||||||
cite_close = text.find("]]", cite_open + 2)
|
spans_after = parse_paired_formatting(text[cite_close + 2:])
|
||||||
# Since we searched for pairs from the beginning, there should be no
|
# Parse inner text and skip parsing for this format pair
|
||||||
# undetected pair formatting before this one, so move to the next
|
text_inner = text[cite_open + 2:cite_close]
|
||||||
# level of parsing
|
# For citations specifically, we may need to split off a citation
|
||||||
spans_before = parse_breaks(text[:cite_open])
|
# target from the alias text
|
||||||
# Continue parsing pair formatting after this one closes with all
|
inner_split = text_inner.split("|", 1)
|
||||||
# three as valid choices
|
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
|
||||||
spans_after = parse_paired_formatting(text[cite_close + 2 :])
|
spans_inner = parse_paired_formatting(text_inner_actual,
|
||||||
# Parse inner text and skip parsing for this format pair
|
cite=False, bold=bold, italic=italic)
|
||||||
text_inner = text[cite_open + 2 : cite_close]
|
citation = CitationSpan(spans_inner, cite_target)
|
||||||
# For citations specifically, try to split off a citation target.
|
return [*spans_before, citation, *spans_after]
|
||||||
# If there's no citation target to split, use the same text as the
|
# Should never happen
|
||||||
# citation text and the target.
|
return parse_breaks(text)
|
||||||
inner_split = text_inner.split("|", 1)
|
|
||||||
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
|
|
||||||
spans_inner = parse_paired_formatting(
|
|
||||||
text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
|
|
||||||
)
|
|
||||||
citation = CitationSpan(spans_inner, cite_target)
|
|
||||||
return [*spans_before, citation, *spans_after]
|
|
||||||
# Should never happen
|
|
||||||
return parse_breaks(text)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_bold(
|
def parse_bold(text: str, cite: bool = True, italic: bool = True) -> Spans:
|
||||||
text: str,
|
bold_open = text.find("**")
|
||||||
in_cite: bool = False,
|
if bold_open > -1:
|
||||||
in_italic: bool = False,
|
bold_close = text.find("**", bold_open + 2)
|
||||||
) -> Spans:
|
# Should be no formatting behind us
|
||||||
"""
|
spans_before = parse_breaks(text[:bold_open])
|
||||||
Parses text into a bold span.
|
# Freely parse formatting after us
|
||||||
"""
|
spans_after = parse_paired_formatting(text[bold_close + 2:])
|
||||||
bold_open = text.find("**")
|
# Parse inner text minus bold parsing
|
||||||
if bold_open > -1:
|
text_inner = text[bold_open + 2:bold_close]
|
||||||
bold_close = text.find("**", bold_open + 2)
|
spans_inner = parse_paired_formatting(text_inner,
|
||||||
# Should be no formatting behind us
|
cite=cite, bold=False, italic=italic)
|
||||||
spans_before = parse_breaks(text[:bold_open])
|
bold = BoldSpan(spans_inner)
|
||||||
# Freely parse formatting after us
|
return [*spans_before, bold, *spans_after]
|
||||||
spans_after = parse_paired_formatting(text[bold_close + 2 :])
|
# Should never happen
|
||||||
# Parse inner text minus bold parsing
|
return parse_italic(text)
|
||||||
text_inner = text[bold_open + 2 : bold_close]
|
|
||||||
spans_inner = parse_paired_formatting(
|
|
||||||
text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
|
|
||||||
)
|
|
||||||
bold = BoldSpan(spans_inner)
|
|
||||||
return [*spans_before, bold, *spans_after]
|
|
||||||
# Should never happen
|
|
||||||
return parse_italic(text)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_italic(
|
def parse_italic(text: str, cite: bool = True, bold: bool = True) -> Spans:
|
||||||
text: str,
|
italic_open = text.find("//")
|
||||||
in_cite: bool = False,
|
if italic_open > -1:
|
||||||
in_bold: bool = False,
|
italic_close = text.find("//", italic_open + 2)
|
||||||
) -> Spans:
|
# Should be no formatting behind us
|
||||||
"""
|
spans_before = parse_breaks(text[:italic_open])
|
||||||
Parses text into an italic span.
|
# Freely parse formatting after us
|
||||||
"""
|
spans_after = parse_paired_formatting(text[italic_close + 2:])
|
||||||
italic_open = text.find("//")
|
# Parse inner text minus italic parsing
|
||||||
if italic_open > -1:
|
text_inner = text[italic_open + 2:italic_close]
|
||||||
italic_close = text.find("//", italic_open + 2)
|
spans_inner = parse_paired_formatting(text_inner,
|
||||||
# Should be no formatting behind us
|
cite=cite, bold=bold, italic=False)
|
||||||
spans_before = parse_breaks(text[:italic_open])
|
italic = ItalicSpan(spans_inner)
|
||||||
# Freely parse formatting after us
|
return [*spans_before, italic, *spans_after]
|
||||||
spans_after = parse_paired_formatting(text[italic_close + 2 :])
|
# Should never happen
|
||||||
# Parse inner text minus italic parsing
|
return parse_breaks(text)
|
||||||
text_inner = text[italic_open + 2 : italic_close]
|
|
||||||
spans_inner = parse_paired_formatting(
|
|
||||||
text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
|
|
||||||
)
|
|
||||||
italic = ItalicSpan(spans_inner)
|
|
||||||
return [*spans_before, italic, *spans_after]
|
|
||||||
# Should never happen
|
|
||||||
return parse_breaks(text)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_breaks(text: str) -> Spans:
|
def parse_breaks(text: str) -> Spans:
|
||||||
"""
|
if not text:
|
||||||
Parses intra-paragraph line breaks.
|
return []
|
||||||
"""
|
splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
|
||||||
# Parse empty text into nothing
|
spans: Spans = [
|
||||||
if not text:
|
splits[i // 2] if i % 2 == 0 else LineBreak()
|
||||||
return []
|
for i in range(0, 2 * len(splits) - 1)
|
||||||
# Split on the line break mark appearing at the end of the line
|
]
|
||||||
splits: Spans = list(map(TextSpan, text.split("\\\\\n")))
|
return spans
|
||||||
# Put a LineBreak between each TextSpan
|
|
||||||
spans: Spans = [
|
|
||||||
splits[i // 2] if i % 2 == 0 else LineBreak()
|
|
||||||
for i in range(0, 2 * len(splits) - 1)
|
|
||||||
]
|
|
||||||
return spans
|
|
||||||
|
|
||||||
|
|
||||||
def parse_text(text: str) -> Spans:
|
|
||||||
"""
|
|
||||||
Parses text with no remaining parseable marks.
|
|
||||||
"""
|
|
||||||
if not text:
|
|
||||||
return []
|
|
||||||
return [TextSpan(text)]
|
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
"""
|
||||||
|
Internal module encapsulating visitors that render articles into
|
||||||
|
readable formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
|
from .core import RenderableVisitor
|
||||||
|
from .helpers import filesafe_title
|
||||||
|
|
||||||
|
|
||||||
|
class HtmlRenderer(RenderableVisitor):
|
||||||
|
"""
|
||||||
|
Renders an article token tree into published article HTML.
|
||||||
|
"""
|
||||||
|
def __init__(self, lexicon_name: str, written_articles: Iterable[str]):
|
||||||
|
self.lexicon_name: str = lexicon_name
|
||||||
|
self.written_articles: Iterable[str] = written_articles
|
||||||
|
|
||||||
|
def TextSpan(self, span):
|
||||||
|
return span.innertext
|
||||||
|
|
||||||
|
def LineBreak(self, span):
|
||||||
|
return '<br>'
|
||||||
|
|
||||||
|
def ParsedArticle(self, span):
|
||||||
|
return '\n'.join(span.recurse(self))
|
||||||
|
|
||||||
|
def BodyParagraph(self, span):
|
||||||
|
return f'<p>{"".join(span.recurse(self))}</p>'
|
||||||
|
|
||||||
|
def SignatureParagraph(self, span):
|
||||||
|
return (
|
||||||
|
'<hr><span class="signature"><p>'
|
||||||
|
f'{"".join(span.recurse(self))}'
|
||||||
|
'</p></span>'
|
||||||
|
)
|
||||||
|
|
||||||
|
def BoldSpan(self, span):
|
||||||
|
return f'<b>{"".join(span.recurse(self))}</b>'
|
||||||
|
|
||||||
|
def ItalicSpan(self, span):
|
||||||
|
return f'<i>{"".join(span.recurse(self))}</i>'
|
||||||
|
|
||||||
|
def CitationSpan(self, span):
|
||||||
|
if span.cite_target in self.written_articles:
|
||||||
|
link_class = ''
|
||||||
|
else:
|
||||||
|
link_class = ' class="phantom"'
|
||||||
|
# link = url_for(
|
||||||
|
# 'lexicon.article',
|
||||||
|
# name=self.lexicon_name,
|
||||||
|
# title=filesafe_title(span.cite_target))
|
||||||
|
link = (f'/lexicon/{self.lexicon_name}'
|
||||||
|
+ f'/article/{filesafe_title(span.cite_target)}')
|
||||||
|
return f'<a href="{link}"{link_class}>{"".join(span.recurse(self))}</a>'
|
||||||
|
|
||||||
|
|
||||||
|
class PreviewHtmlRenderer(RenderableVisitor):
|
||||||
|
def __init__(self, lexicon):
|
||||||
|
with lexicon.ctx.read('info') as info:
|
||||||
|
self.article_map = {
|
||||||
|
title: article.character
|
||||||
|
for title, article in info.items()
|
||||||
|
}
|
||||||
|
self.citations = []
|
||||||
|
self.contents = ""
|
||||||
|
|
||||||
|
def TextSpan(self, span):
|
||||||
|
return span.innertext
|
||||||
|
|
||||||
|
def LineBreak(self, span):
|
||||||
|
return '<br>'
|
||||||
|
|
||||||
|
def ParsedArticle(self, span):
|
||||||
|
self.contents = '\n'.join(span.recurse(self))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def BodyParagraph(self, span):
|
||||||
|
return f'<p>{"".join(span.recurse(self))}</p>'
|
||||||
|
|
||||||
|
def SignatureParagraph(self, span):
|
||||||
|
return (
|
||||||
|
'<hr><span class="signature"><p>'
|
||||||
|
f'{"".join(span.recurse(self))}'
|
||||||
|
'</p></span>'
|
||||||
|
)
|
||||||
|
|
||||||
|
def BoldSpan(self, span):
|
||||||
|
return f'<b>{"".join(span.recurse(self))}</b>'
|
||||||
|
|
||||||
|
def ItalicSpan(self, span):
|
||||||
|
return f'<i>{"".join(span.recurse(self))}</i>'
|
||||||
|
|
||||||
|
def CitationSpan(self, span):
|
||||||
|
if span.cite_target in self.article_map:
|
||||||
|
if self.article_map.get(span.cite_target):
|
||||||
|
link_class = '[extant]'
|
||||||
|
else:
|
||||||
|
link_class = '[phantom]'
|
||||||
|
else:
|
||||||
|
link_class = '[new]'
|
||||||
|
self.citations.append(f'{span.cite_target} {link_class}')
|
||||||
|
return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
|
|
@ -15,7 +15,9 @@ from amanuensis.lexicon import (
|
||||||
create_character_in_lexicon,
|
create_character_in_lexicon,
|
||||||
get_draft)
|
get_draft)
|
||||||
from amanuensis.models import LexiconModel
|
from amanuensis.models import LexiconModel
|
||||||
from amanuensis.parser import parse_raw_markdown
|
from amanuensis.parser import (
|
||||||
|
parse_raw_markdown,
|
||||||
|
PreviewHtmlRenderer)
|
||||||
from amanuensis.server.helpers import (
|
from amanuensis.server.helpers import (
|
||||||
lexicon_param,
|
lexicon_param,
|
||||||
player_required,
|
player_required,
|
||||||
|
@ -27,7 +29,7 @@ from .forms import (
|
||||||
LexiconPublishTurnForm,
|
LexiconPublishTurnForm,
|
||||||
LexiconConfigForm)
|
LexiconConfigForm)
|
||||||
|
|
||||||
from .editor import load_editor, new_draft, update_draft, PreviewHtmlRenderer
|
from .editor import load_editor, new_draft, update_draft
|
||||||
|
|
||||||
|
|
||||||
bp_session = Blueprint('session', __name__,
|
bp_session = Blueprint('session', __name__,
|
||||||
|
|
|
@ -17,56 +17,8 @@ from amanuensis.lexicon import (
|
||||||
from amanuensis.models import LexiconModel
|
from amanuensis.models import LexiconModel
|
||||||
from amanuensis.parser import (
|
from amanuensis.parser import (
|
||||||
normalize_title,
|
normalize_title,
|
||||||
parse_raw_markdown)
|
parse_raw_markdown,
|
||||||
from amanuensis.parser.core import RenderableVisitor
|
PreviewHtmlRenderer)
|
||||||
|
|
||||||
|
|
||||||
class PreviewHtmlRenderer(RenderableVisitor):
|
|
||||||
def __init__(self, lexicon):
|
|
||||||
with lexicon.ctx.read('info') as info:
|
|
||||||
self.article_map = {
|
|
||||||
title: article.character
|
|
||||||
for title, article in info.items()
|
|
||||||
}
|
|
||||||
self.citations = []
|
|
||||||
self.contents = ""
|
|
||||||
|
|
||||||
def TextSpan(self, span):
|
|
||||||
return span.innertext
|
|
||||||
|
|
||||||
def LineBreak(self, span):
|
|
||||||
return '<br>'
|
|
||||||
|
|
||||||
def ParsedArticle(self, span):
|
|
||||||
self.contents = '\n'.join(span.recurse(self))
|
|
||||||
return self
|
|
||||||
|
|
||||||
def BodyParagraph(self, span):
|
|
||||||
return f'<p>{"".join(span.recurse(self))}</p>'
|
|
||||||
|
|
||||||
def SignatureParagraph(self, span):
|
|
||||||
return (
|
|
||||||
'<hr><span class="signature"><p>'
|
|
||||||
f'{"".join(span.recurse(self))}'
|
|
||||||
'</p></span>'
|
|
||||||
)
|
|
||||||
|
|
||||||
def BoldSpan(self, span):
|
|
||||||
return f'<b>{"".join(span.recurse(self))}</b>'
|
|
||||||
|
|
||||||
def ItalicSpan(self, span):
|
|
||||||
return f'<i>{"".join(span.recurse(self))}</i>'
|
|
||||||
|
|
||||||
def CitationSpan(self, span):
|
|
||||||
if span.cite_target in self.article_map:
|
|
||||||
if self.article_map.get(span.cite_target):
|
|
||||||
link_class = '[extant]'
|
|
||||||
else:
|
|
||||||
link_class = '[phantom]'
|
|
||||||
else:
|
|
||||||
link_class = '[new]'
|
|
||||||
self.citations.append(f'{span.cite_target} {link_class}')
|
|
||||||
return f'<u>{"".join(span.recurse(self))}</u>[{len(self.citations)}]'
|
|
||||||
|
|
||||||
|
|
||||||
def load_editor(lexicon: LexiconModel, aid: str):
|
def load_editor(lexicon: LexiconModel, aid: str):
|
||||||
|
|
2
mypy.ini
2
mypy.ini
|
@ -1,4 +1,4 @@
|
||||||
[mypy]
|
[mypy]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
|
exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
|
||||||
; mypy stable doesn't support pyproject.toml yet
|
; mypy stable doesn't support pyproject.toml yet
|
|
@ -17,11 +17,11 @@ black = "^21.5b2"
|
||||||
mypy = "^0.812"
|
mypy = "^0.812"
|
||||||
|
|
||||||
[tool.black]
|
[tool.black]
|
||||||
extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
|
extend-exclude = "^/amanuensis/cli/.*|^/amanuensis/config/.*|^/amanuensis/lexicon/.*|^/amanuensis/log/.*|^/amanuensis/models/.*|^/amanuensis/parser/.*|^/amanuensis/resources/.*|^/amanuensis/server/.*|^/amanuensis/user/.*|^/amanuensis/__main__.py"
|
||||||
|
|
||||||
[tool.mypy]
|
[tool.mypy]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
|
exclude = "amanuensis/cli/.*|amanuensis/config/.*|amanuensis/lexicon/.*|amanuensis/log/.*|amanuensis/models/.*|amanuensis/parser/.*|amanuensis/resources/.*|amanuensis/server/.*|amanuensis/user/.*|amanuensis/__main__.py"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
addopts = "--show-capture=log"
|
addopts = "--show-capture=log"
|
||||||
|
|
|
@ -1,463 +0,0 @@
|
||||||
from typing import Sequence
|
|
||||||
|
|
||||||
from amanuensis.parser.core import (
|
|
||||||
TextSpan,
|
|
||||||
LineBreak,
|
|
||||||
ParsedArticle,
|
|
||||||
BodyParagraph,
|
|
||||||
SignatureParagraph,
|
|
||||||
BoldSpan,
|
|
||||||
ItalicSpan,
|
|
||||||
CitationSpan,
|
|
||||||
Renderable,
|
|
||||||
SpanContainer,
|
|
||||||
RenderableVisitor,
|
|
||||||
Spans,
|
|
||||||
)
|
|
||||||
from amanuensis.parser.helpers import normalize_title, filesafe_title, titlesort
|
|
||||||
from amanuensis.parser.parsing import (
|
|
||||||
parse_breaks,
|
|
||||||
parse_paired_formatting,
|
|
||||||
parse_paragraph,
|
|
||||||
parse_raw_markdown,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def assert_types(spans: Spans, types: Sequence, loc=None):
|
|
||||||
"""
|
|
||||||
Asserts that a span list has the types specified.
|
|
||||||
Each element in `types` should be either a span type or a list. The first
|
|
||||||
element of the list is the container type and the remaining elements are the
|
|
||||||
content types.
|
|
||||||
"""
|
|
||||||
for i in range(max(len(spans), len(types))):
|
|
||||||
i_loc = f"{loc}.{i}" if loc else f"{i}"
|
|
||||||
# Check lengths are equal
|
|
||||||
assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
|
|
||||||
assert i < len(types), f"Type list unexpectedly short at {i_loc}"
|
|
||||||
# Check types are equal
|
|
||||||
span, span_type = spans[i], types[i]
|
|
||||||
if isinstance(span_type, list):
|
|
||||||
assert isinstance(
|
|
||||||
span, SpanContainer
|
|
||||||
), f"Expected a span container at loc {i_loc}"
|
|
||||||
assert (
|
|
||||||
len(span.spans) == len(span_type) - 1
|
|
||||||
), f"Unexpected container size at loc {i_loc}"
|
|
||||||
assert isinstance(
|
|
||||||
span, span_type[0]
|
|
||||||
), f"Unexpected container type at loc {i_loc}"
|
|
||||||
assert_types(span.spans, span_type[1:], loc=i_loc)
|
|
||||||
else:
|
|
||||||
assert isinstance(span, Renderable), f"Expected a span at loc {i_loc}"
|
|
||||||
assert isinstance(span, span_type), f"Unexpected span type at loc {i_loc}"
|
|
||||||
|
|
||||||
|
|
||||||
def assert_text(spans: Spans, texts: Sequence, loc=None):
|
|
||||||
"""
|
|
||||||
Asserts that a span list has the inner text structure specified.
|
|
||||||
Each element in `texts` should be either a string or a list of the same.
|
|
||||||
"""
|
|
||||||
assert len(spans) == len(
|
|
||||||
texts
|
|
||||||
), f"Unexpected text sequence length at loc {loc if loc else 'root'}"
|
|
||||||
i = -1
|
|
||||||
for span, text in zip(spans, texts):
|
|
||||||
i += 1
|
|
||||||
i_loc = f"{loc}.{i}" if loc else f"{i}"
|
|
||||||
if isinstance(text, str):
|
|
||||||
assert isinstance(span, TextSpan), f"Expected a text span at loc {i_loc}"
|
|
||||||
assert span.innertext == text, f"Unexpected text at loc {i_loc}"
|
|
||||||
elif isinstance(text, list):
|
|
||||||
assert isinstance(
|
|
||||||
span, SpanContainer
|
|
||||||
), f"Expected a span container at loc {i_loc}"
|
|
||||||
assert_text(span.spans, text, loc=i_loc)
|
|
||||||
else:
|
|
||||||
assert isinstance(span, LineBreak), f"Expected a line break at loc {i_loc}"
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_breaks():
|
|
||||||
"""Test parsing for intra-pragraph line break"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
# Only having a line break does nothing
|
|
||||||
text = "One\nTwo"
|
|
||||||
spans: Spans = parse_breaks(text)
|
|
||||||
assert_types(spans, [TextSpan])
|
|
||||||
assert_text(spans, [text])
|
|
||||||
|
|
||||||
# Having the mark causes the text to be split across it
|
|
||||||
text = r"One\\" + "\nTwo"
|
|
||||||
spans: Spans = parse_breaks(text)
|
|
||||||
assert_types(spans, [TextSpan, LineBreak, TextSpan])
|
|
||||||
assert_text(spans, ["One", None, "Two"])
|
|
||||||
|
|
||||||
# Multiple lines can be broken
|
|
||||||
text = r"One\\" + "\n" + r"Two\\" + "\nThree"
|
|
||||||
spans: Spans = parse_breaks(text)
|
|
||||||
assert_types(spans, [TextSpan, LineBreak, TextSpan, LineBreak, TextSpan])
|
|
||||||
assert_text(spans, ["One", None, "Two", None, "Three"])
|
|
||||||
|
|
||||||
# The mark must be at the end of the line
|
|
||||||
text = r"One\\ " + "\nTwo"
|
|
||||||
spans: Spans = parse_breaks(text)
|
|
||||||
assert_types(spans, (TextSpan,))
|
|
||||||
assert_text(spans, [text])
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_pairs_single():
|
|
||||||
"""Test parsing for bold and italic marks"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
# Empty pair marks should parse
|
|
||||||
text = "****"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan]])
|
|
||||||
|
|
||||||
text = "////"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[ItalicSpan]])
|
|
||||||
|
|
||||||
# Pair marks with text inside should parse
|
|
||||||
text = "**hello**"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["hello"]])
|
|
||||||
|
|
||||||
text = "//hello//"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[ItalicSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["hello"]])
|
|
||||||
|
|
||||||
# Text outside of pair marks should parse on the same level
|
|
||||||
text = "**hello** world"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["hello"], " world"])
|
|
||||||
|
|
||||||
text = "//hello// world"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["hello"], " world"])
|
|
||||||
|
|
||||||
# Text before, between, and after pair marks should parse
|
|
||||||
text = "In the **beginning** was //the// Word"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(
|
|
||||||
spans,
|
|
||||||
[TextSpan, [BoldSpan, TextSpan], TextSpan, [ItalicSpan, TextSpan], TextSpan],
|
|
||||||
)
|
|
||||||
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_pairs_break():
|
|
||||||
"""Test pair marks with breaks"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
text = r"**glory\\" + "\nhammer**"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["glory\\\\\nhammer"]])
|
|
||||||
|
|
||||||
text = r"//glory\\" + "\nhammer//"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[ItalicSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["glory\\\\\nhammer"]])
|
|
||||||
|
|
||||||
text = r"**glory\\" + "\n**hammer**"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["glory\\\\\n"], "hammer**"])
|
|
||||||
|
|
||||||
text = r"//glory\\" + "\n//hammer//"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["glory\\\\\n"], "hammer//"])
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_pairs_nested():
|
|
||||||
"""Test parsing for nesting bold and italic"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
# Simple nested test cases
|
|
||||||
text = "**//hello//**"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, [ItalicSpan, TextSpan]]])
|
|
||||||
assert_text(spans, [[["hello"]]])
|
|
||||||
|
|
||||||
text = "//**world**//"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[ItalicSpan, [BoldSpan, TextSpan]]])
|
|
||||||
assert_text(spans, [[["world"]]])
|
|
||||||
|
|
||||||
# Overlap should only parse the first
|
|
||||||
text = "**Hello//world**//"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["Hello//world"], "//"])
|
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_title():
|
|
||||||
"""Test the title normalization used by the citation parser"""
|
|
||||||
nt = normalize_title
|
|
||||||
assert nt("hello") == "Hello"
|
|
||||||
assert nt(" world ") == "World"
|
|
||||||
assert nt("Waiting for Godot") == "Waiting for Godot"
|
|
||||||
assert nt("lowercase letters") == "Lowercase letters"
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_citation_single():
|
|
||||||
"""Test parsing citations, which have internal formatting"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
# Simple test cases
|
|
||||||
text = "[[hello]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["hello"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "Hello"
|
|
||||||
|
|
||||||
text = "[[hello|world]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["hello"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "World"
|
|
||||||
|
|
||||||
text = "[[hello||world]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["hello"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "|world"
|
|
||||||
|
|
||||||
text = "[[ hello | world ]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [[" hello "]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "World"
|
|
||||||
|
|
||||||
text = "[[faith|hope|love]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["faith"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "Hope|love"
|
|
||||||
|
|
||||||
text = "[[ [[|]] ]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [[" [["], " ]]"])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == ""
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_citation_break():
|
|
||||||
"""Test citations with breaks"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
text = "[[hello\\\\\nworld]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["hello\\\\\nworld"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "Hello\\\\ world"
|
|
||||||
|
|
||||||
text = "[[one|two\\\\\nthree]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["one"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "Two\\\\ three"
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_citation_nested():
|
|
||||||
"""Test nesting with citations"""
|
|
||||||
text: str
|
|
||||||
spans: Spans
|
|
||||||
|
|
||||||
text = "[[**hello world**]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, [BoldSpan, TextSpan]]])
|
|
||||||
assert_text(spans, [[["hello world"]]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "**hello world**"
|
|
||||||
|
|
||||||
text = "[[**hello|world**]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan]])
|
|
||||||
assert_text(spans, [["**hello"]])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "World**"
|
|
||||||
|
|
||||||
text = "**[[hello world]]**"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, [CitationSpan, TextSpan]]])
|
|
||||||
assert_text(spans, [[["hello world"]]])
|
|
||||||
citation: CitationSpan = spans[0].spans[0]
|
|
||||||
assert citation.cite_target == "Hello world"
|
|
||||||
|
|
||||||
text = "**[[hello world**]]"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["[[hello world"], "]]"])
|
|
||||||
|
|
||||||
text = "[[**hello world]]**"
|
|
||||||
spans = parse_paired_formatting(text)
|
|
||||||
assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
|
|
||||||
assert_text(spans, [["**hello world"], "**"])
|
|
||||||
citation: CitationSpan = spans[0]
|
|
||||||
assert citation.cite_target == "**hello world"
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_paragraphs():
|
|
||||||
"""Test parsing paragraphs"""
|
|
||||||
para: str
|
|
||||||
span: SpanContainer
|
|
||||||
|
|
||||||
# Body paragraph
|
|
||||||
para = "\tIn the beginning was the Word."
|
|
||||||
span = parse_paragraph(para)
|
|
||||||
assert_types([span], [[BodyParagraph, TextSpan]])
|
|
||||||
assert_text([span], [["In the beginning was the Word."]])
|
|
||||||
|
|
||||||
# Signature paragraph
|
|
||||||
para = "~Ersatz Scrivener, scholar extraordinaire"
|
|
||||||
span = parse_paragraph(para)
|
|
||||||
assert_types([span], [[SignatureParagraph, TextSpan]])
|
|
||||||
assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_article():
|
|
||||||
"""Test the full article parser"""
|
|
||||||
article: str = (
|
|
||||||
"Writing a **unit test** requires having test //content//.\n\n"
|
|
||||||
"This content, of course, must be [[created|Writing test collateral]].\n\n"
|
|
||||||
"~Bucky\\\\\nUnit test writer"
|
|
||||||
)
|
|
||||||
parsed: ParsedArticle = parse_raw_markdown(article)
|
|
||||||
|
|
||||||
assert_types(
|
|
||||||
[parsed],
|
|
||||||
[
|
|
||||||
[
|
|
||||||
ParsedArticle,
|
|
||||||
[
|
|
||||||
BodyParagraph,
|
|
||||||
TextSpan,
|
|
||||||
[BoldSpan, TextSpan],
|
|
||||||
TextSpan,
|
|
||||||
[ItalicSpan, TextSpan],
|
|
||||||
TextSpan,
|
|
||||||
],
|
|
||||||
[BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
|
|
||||||
[SignatureParagraph, TextSpan, LineBreak, TextSpan],
|
|
||||||
]
|
|
||||||
],
|
|
||||||
)
|
|
||||||
assert_text(
|
|
||||||
[parsed],
|
|
||||||
[
|
|
||||||
[
|
|
||||||
[
|
|
||||||
"Writing a ",
|
|
||||||
["unit test"],
|
|
||||||
" requires having test ",
|
|
||||||
["content"],
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
["This content, of course, must be ", ["created"], "."],
|
|
||||||
["Bucky", None, "Unit test writer"],
|
|
||||||
]
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_visitor():
|
|
||||||
"""Test that a visitor dispatches to hooks correctly"""
|
|
||||||
|
|
||||||
class TestVisitor(RenderableVisitor):
|
|
||||||
def __init__(self):
|
|
||||||
self.visited = []
|
|
||||||
|
|
||||||
def TextSpan(self, span: TextSpan):
|
|
||||||
assert isinstance(span, TextSpan)
|
|
||||||
self.visited.append(span)
|
|
||||||
|
|
||||||
def LineBreak(self, span: LineBreak):
|
|
||||||
assert isinstance(span, LineBreak)
|
|
||||||
self.visited.append(span)
|
|
||||||
|
|
||||||
def ParsedArticle(self, span: ParsedArticle):
|
|
||||||
assert isinstance(span, ParsedArticle)
|
|
||||||
self.visited.append(span)
|
|
||||||
span.recurse(self)
|
|
||||||
|
|
||||||
def BodyParagraph(self, span: BodyParagraph):
|
|
||||||
assert isinstance(span, BodyParagraph)
|
|
||||||
self.visited.append(span)
|
|
||||||
span.recurse(self)
|
|
||||||
|
|
||||||
def SignatureParagraph(self, span: SignatureParagraph):
|
|
||||||
assert isinstance(span, SignatureParagraph)
|
|
||||||
self.visited.append(span)
|
|
||||||
span.recurse(self)
|
|
||||||
|
|
||||||
def BoldSpan(self, span: BoldSpan):
|
|
||||||
assert isinstance(span, BoldSpan)
|
|
||||||
self.visited.append(span)
|
|
||||||
span.recurse(self)
|
|
||||||
|
|
||||||
def ItalicSpan(self, span: ItalicSpan):
|
|
||||||
assert isinstance(span, ItalicSpan)
|
|
||||||
self.visited.append(span)
|
|
||||||
span.recurse(self)
|
|
||||||
|
|
||||||
def CitationSpan(self, span: CitationSpan):
|
|
||||||
assert isinstance(span, CitationSpan)
|
|
||||||
self.visited.append(span)
|
|
||||||
span.recurse(self)
|
|
||||||
|
|
||||||
article: str = (
|
|
||||||
"Writing a **unit test** requires having test //content//.\n\n"
|
|
||||||
"This content, of course, must be [[created|Writing test collateral]].\n\n"
|
|
||||||
"~Bucky\\\\\nUnit test writer"
|
|
||||||
)
|
|
||||||
parsed: ParsedArticle = parse_raw_markdown(article)
|
|
||||||
|
|
||||||
visitor = TestVisitor()
|
|
||||||
# All the typecheck asserts pass
|
|
||||||
parsed.render(visitor)
|
|
||||||
# The test article should parse into these spans and visit in this (arbitrary) order
|
|
||||||
type_order = [
|
|
||||||
ParsedArticle,
|
|
||||||
BodyParagraph,
|
|
||||||
TextSpan,
|
|
||||||
BoldSpan,
|
|
||||||
TextSpan,
|
|
||||||
TextSpan,
|
|
||||||
ItalicSpan,
|
|
||||||
TextSpan,
|
|
||||||
TextSpan,
|
|
||||||
BodyParagraph,
|
|
||||||
TextSpan,
|
|
||||||
CitationSpan,
|
|
||||||
TextSpan,
|
|
||||||
TextSpan,
|
|
||||||
SignatureParagraph,
|
|
||||||
TextSpan,
|
|
||||||
LineBreak,
|
|
||||||
TextSpan,
|
|
||||||
]
|
|
||||||
assert len(visitor.visited) == len(type_order)
|
|
||||||
for span, type in zip(visitor.visited, type_order):
|
|
||||||
assert isinstance(span, type)
|
|
Loading…
Reference in New Issue