Incorporate parser into new code #12

Merged
Jaculabilis merged 8 commits from tvb/parser into develop 2021-06-12 17:28:19 +00:00
2 changed files with 72 additions and 13 deletions
Showing only changes of commit fde12ac818 - Show all commits

View File

@ -65,12 +65,8 @@ def parse_paired_formatting(
handlers[next_cite] = lambda: parse_citation( handlers[next_cite] = lambda: parse_citation(
text, in_bold=in_bold, in_italic=in_italic text, in_bold=in_bold, in_italic=in_italic
) )
handlers[next_bold] = lambda: parse_bold( handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
text, in_cite=in_cite, in_italic=in_italic handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
)
handlers[next_italic] = lambda: parse_italic(
text, in_cite=in_cite, in_bold=in_bold
)
# Map the next parsing step at -1. If we're currently inside a formatting # Map the next parsing step at -1. If we're currently inside a formatting
# mark pair, skip parsing line breaks, which are not allowed inside paired # mark pair, skip parsing line breaks, which are not allowed inside paired
# marks. # marks.

View File

@ -30,13 +30,13 @@ def assert_types(spans: Spans, types: Sequence, loc=None):
element of the list is the container type and the remaining elements are the element of the list is the container type and the remaining elements are the
content types. content types.
""" """
assert len(spans) == len( for i in range(max(len(spans), len(types))):
types
), f"Unexpected type sequence length at loc {loc if loc else 'root'}"
i = -1
for span, span_type in zip(spans, types):
i += 1
i_loc = f"{loc}.{i}" if loc else f"{i}" i_loc = f"{loc}.{i}" if loc else f"{i}"
# Check lengths are equal
assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
assert i < len(types), f"Type list unexpectedly short at {i_loc}"
# Check types are equal
span, span_type = spans[i], types[i]
if isinstance(span_type, list): if isinstance(span_type, list):
assert isinstance( assert isinstance(
span, SpanContainer span, SpanContainer
@ -317,3 +317,66 @@ def test_parse_citation_nested():
assert_text(spans, [["**hello world"], "**"]) assert_text(spans, [["**hello world"], "**"])
citation: CitationSpan = spans[0] citation: CitationSpan = spans[0]
assert citation.cite_target == "**hello world" assert citation.cite_target == "**hello world"
def test_parse_paragraphs():
"""Test parsing paragraphs"""
para: str
span: SpanContainer
# Body paragraph
para = "\tIn the beginning was the Word."
span = parse_paragraph(para)
assert_types([span], [[BodyParagraph, TextSpan]])
assert_text([span], [["In the beginning was the Word."]])
# Signature paragraph
para = "~Ersatz Scrivener, scholar extraordinaire"
span = parse_paragraph(para)
assert_types([span], [[SignatureParagraph, TextSpan]])
assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
def test_parse_article():
"""Test the full article parser"""
article: str = (
"Writing a **unit test** requires having test //content//.\n\n"
"This content, of course, must be [[created|Writing test collateral]].\n\n"
"~Bucky, unit test writer"
)
parsed: ParsedArticle = parse_raw_markdown(article)
assert_types(
[parsed],
[
[
ParsedArticle,
[
BodyParagraph,
TextSpan,
[BoldSpan, TextSpan],
TextSpan,
[ItalicSpan, TextSpan],
TextSpan,
],
[BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
[SignatureParagraph, TextSpan],
]
],
)
assert_text(
[parsed],
[
[
[
"Writing a ",
["unit test"],
" requires having test ",
["content"],
".",
],
["This content, of course, must be ", ["created"], "."],
["Bucky, unit test writer"],
]
],
)