Add parser module
This commit is contained in:
parent
b22df32f04
commit
f7688429a8
|
@ -0,0 +1,6 @@
|
||||||
|
"""
|
||||||
|
Module encapsulating all markdown parsing functionality
|
||||||
|
"""
|
||||||
|
|
||||||
|
from amanuensis.parser.text import parse_raw_markdown
|
||||||
|
from amanuensis.parser.render import PreviewHtmlRenderer
|
|
@ -0,0 +1,25 @@
|
||||||
|
"""
|
||||||
|
Internal module encapsulating the render logic for parsed articles. Rendering
|
||||||
|
is done via a rough approximation of the visitor pattern.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class PreviewHtmlRenderer():
|
||||||
|
def TextSpan(self, span):
|
||||||
|
return span.innertext
|
||||||
|
def LineBreak(self, span):
|
||||||
|
return '<br>'
|
||||||
|
def ParsedArticle(self, span):
|
||||||
|
return '\n'.join([child.render(self) for child in span.spans])
|
||||||
|
def BodyParagraph(self, span):
|
||||||
|
return f'<p>{"".join([child.render(self) for child in span.spans])}</p>'
|
||||||
|
def SignatureParagraph(self, span):
|
||||||
|
return ('<hr><span class="signature"><p>'
|
||||||
|
f'{"".join([child.render(self) for child in span.spans])}'
|
||||||
|
'</p></span>')
|
||||||
|
def BoldSpan(self, span):
|
||||||
|
return f'<b>{"".join([child.render(self) for child in span.spans])}</b>'
|
||||||
|
def ItalicSpan(self, span):
|
||||||
|
return f'<i>{"".join([child.render(self) for child in span.spans])}</i>'
|
||||||
|
def CitationSpan(self, span):
|
||||||
|
return f'<a href="#">{span.cite_text}</a>'
|
|
@ -0,0 +1,118 @@
|
||||||
|
"""
|
||||||
|
Internal module encapsulating the parsing logic for Lexipython
|
||||||
|
markdown. Parse results are represented as a hierarchy of tokens, which
|
||||||
|
can be rendered by a renderer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class Renderable():
|
||||||
|
def render(self, renderer):
|
||||||
|
return getattr(renderer, type(self).__name__)(self)
|
||||||
|
|
||||||
|
class TextSpan(Renderable):
|
||||||
|
"""An unstyled length of text"""
|
||||||
|
def __init__(self, innertext):
|
||||||
|
self.innertext = innertext
|
||||||
|
def __str__(self):
|
||||||
|
return f"[{self.innertext}]"
|
||||||
|
|
||||||
|
class LineBreak(Renderable):
|
||||||
|
"""A line break within a paragraph"""
|
||||||
|
def __str__(self):
|
||||||
|
return "<break>"
|
||||||
|
|
||||||
|
class SpanContainer(Renderable):
|
||||||
|
"""A formatting element that wraps some amount of text"""
|
||||||
|
def __init__(self, spans):
|
||||||
|
self.spans = spans
|
||||||
|
def __str__(self):
|
||||||
|
return f"[{type(self).__name__} {' '.join([str(span) for span in self.spans])}]"
|
||||||
|
|
||||||
|
class ParsedArticle(SpanContainer):
|
||||||
|
"""Multiple paragraphs"""
|
||||||
|
|
||||||
|
class BodyParagraph(SpanContainer):
|
||||||
|
"""A normal paragraph"""
|
||||||
|
|
||||||
|
class SignatureParagraph(SpanContainer):
|
||||||
|
"""A paragraph preceded by a signature mark"""
|
||||||
|
|
||||||
|
class BoldSpan(SpanContainer):
|
||||||
|
"""A span of text inside bold marks"""
|
||||||
|
|
||||||
|
class ItalicSpan(SpanContainer):
|
||||||
|
"""A span of text inside italic marks"""
|
||||||
|
|
||||||
|
class CitationSpan(Renderable):
|
||||||
|
"""A citation to another article"""
|
||||||
|
def __init__(self, cite_text, cite_target):
|
||||||
|
self.cite_text = cite_text
|
||||||
|
self.cite_target = cite_target
|
||||||
|
def __str__(self):
|
||||||
|
return f"{{{self.cite_text}:{self.cite_target}}}"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_raw_markdown(text):
|
||||||
|
# Parse each paragraph individually, as no formatting applies
|
||||||
|
# across paragraphs
|
||||||
|
paragraphs = re.split(r'\n\n+', text)
|
||||||
|
parse_results = list(map(parse_paragraph, paragraphs))
|
||||||
|
return ParsedArticle(parse_results)
|
||||||
|
|
||||||
|
def parse_paragraph(text):
|
||||||
|
# Parse the paragraph as a span of text
|
||||||
|
text = text.strip()
|
||||||
|
if text and text[0] == '~':
|
||||||
|
return SignatureParagraph(parse_citations(text[1:]))
|
||||||
|
else:
|
||||||
|
return BodyParagraph(parse_citations(text))
|
||||||
|
|
||||||
|
def parse_citations(text):
|
||||||
|
cite_open = text.find("[[")
|
||||||
|
if cite_open > -1:
|
||||||
|
cite_close = text.find("]]", cite_open + 2)
|
||||||
|
spans_before = parse_bold(text[:cite_open])
|
||||||
|
spans_after = parse_citations(text[cite_close+2:])
|
||||||
|
text_inner = text[cite_open+2:cite_close]
|
||||||
|
alias_split = text_inner.split("|", 1)
|
||||||
|
citation = CitationSpan(alias_split[0], alias_split[-1])
|
||||||
|
return spans_before + [citation] + spans_after
|
||||||
|
# No citations, just parse the regular formatting
|
||||||
|
return parse_bold(text)
|
||||||
|
|
||||||
|
def parse_bold(text):
|
||||||
|
bold_open = text.find("**")
|
||||||
|
if bold_open > -1:
|
||||||
|
bold_close = text.find("**", bold_open + 2)
|
||||||
|
spans_before = parse_italic(text[:bold_open])
|
||||||
|
spans_after = parse_bold(text[bold_close+2:])
|
||||||
|
spans_inner = parse_italic(text[bold_open+2:bold_close])
|
||||||
|
bold = BoldSpan(spans_inner)
|
||||||
|
return spans_before + [bold] + spans_after
|
||||||
|
return parse_italic(text)
|
||||||
|
|
||||||
|
def parse_italic(text):
|
||||||
|
italic_open = text.find("//")
|
||||||
|
if italic_open > -1:
|
||||||
|
italic_close = text.find("//", italic_open + 2)
|
||||||
|
text_before = text[:italic_open]
|
||||||
|
text_inner = text[italic_open+2:italic_close]
|
||||||
|
text_after = text[italic_close+2:]
|
||||||
|
spans_before = parse_breaks(text_before)
|
||||||
|
spans_after = parse_italic(text_after)
|
||||||
|
spans_inner = parse_breaks(text_inner)
|
||||||
|
italic = ItalicSpan(spans_inner)
|
||||||
|
return spans_before + [italic] + spans_after
|
||||||
|
return parse_breaks(text)
|
||||||
|
|
||||||
|
def parse_breaks(text):
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
splits = list(map(TextSpan, text.split("\\\\\n")))
|
||||||
|
spans = [splits[0]]
|
||||||
|
for span in splits[1:]:
|
||||||
|
spans.append(LineBreak())
|
||||||
|
spans.append(span)
|
||||||
|
return spans
|
Loading…
Reference in New Issue