Add paragraph and article parser tests

This commit is contained in:
Tim Van Baak 2021-06-11 23:26:28 -07:00
parent 2c294f7f12
commit fde12ac818
2 changed files with 72 additions and 13 deletions

View File

@ -65,12 +65,8 @@ def parse_paired_formatting(
handlers[next_cite] = lambda: parse_citation( handlers[next_cite] = lambda: parse_citation(
text, in_bold=in_bold, in_italic=in_italic text, in_bold=in_bold, in_italic=in_italic
) )
handlers[next_bold] = lambda: parse_bold( handlers[next_bold] = lambda: parse_bold(text, in_cite=in_cite, in_italic=in_italic)
text, in_cite=in_cite, in_italic=in_italic handlers[next_italic] = lambda: parse_italic(text, in_cite=in_cite, in_bold=in_bold)
)
handlers[next_italic] = lambda: parse_italic(
text, in_cite=in_cite, in_bold=in_bold
)
# Map the next parsing step at -1. If we're currently inside a formatting # Map the next parsing step at -1. If we're currently inside a formatting
# mark pair, skip parsing line breaks, which are not allowed inside paired # mark pair, skip parsing line breaks, which are not allowed inside paired
# marks. # marks.

View File

@ -25,18 +25,18 @@ from amanuensis.parser.parsing import (
def assert_types(spans: Spans, types: Sequence, loc=None): def assert_types(spans: Spans, types: Sequence, loc=None):
""" """
Asserts that a span list has the types specified. Asserts that a span list has the types specified.
Each element in `types` should be either a span type or a list. The first Each element in `types` should be either a span type or a list. The first
element of the list is the container type and the remaining elements are the element of the list is the container type and the remaining elements are the
content types. content types.
""" """
assert len(spans) == len( for i in range(max(len(spans), len(types))):
types
), f"Unexpected type sequence length at loc {loc if loc else 'root'}"
i = -1
for span, span_type in zip(spans, types):
i += 1
i_loc = f"{loc}.{i}" if loc else f"{i}" i_loc = f"{loc}.{i}" if loc else f"{i}"
# Check lengths are equal
assert i < len(spans), f"Span list unexpectedly short at {i_loc}"
assert i < len(types), f"Type list unexpectedly short at {i_loc}"
# Check types are equal
span, span_type = spans[i], types[i]
if isinstance(span_type, list): if isinstance(span_type, list):
assert isinstance( assert isinstance(
span, SpanContainer span, SpanContainer
@ -317,3 +317,66 @@ def test_parse_citation_nested():
assert_text(spans, [["**hello world"], "**"]) assert_text(spans, [["**hello world"], "**"])
citation: CitationSpan = spans[0] citation: CitationSpan = spans[0]
assert citation.cite_target == "**hello world" assert citation.cite_target == "**hello world"
def test_parse_paragraphs():
"""Test parsing paragraphs"""
para: str
span: SpanContainer
# Body paragraph
para = "\tIn the beginning was the Word."
span = parse_paragraph(para)
assert_types([span], [[BodyParagraph, TextSpan]])
assert_text([span], [["In the beginning was the Word."]])
# Signature paragraph
para = "~Ersatz Scrivener, scholar extraordinaire"
span = parse_paragraph(para)
assert_types([span], [[SignatureParagraph, TextSpan]])
assert_text([span], [["Ersatz Scrivener, scholar extraordinaire"]])
def test_parse_article():
"""Test the full article parser"""
article: str = (
"Writing a **unit test** requires having test //content//.\n\n"
"This content, of course, must be [[created|Writing test collateral]].\n\n"
"~Bucky, unit test writer"
)
parsed: ParsedArticle = parse_raw_markdown(article)
assert_types(
[parsed],
[
[
ParsedArticle,
[
BodyParagraph,
TextSpan,
[BoldSpan, TextSpan],
TextSpan,
[ItalicSpan, TextSpan],
TextSpan,
],
[BodyParagraph, TextSpan, [CitationSpan, TextSpan], TextSpan],
[SignatureParagraph, TextSpan],
]
],
)
assert_text(
[parsed],
[
[
[
"Writing a ",
["unit test"],
" requires having test ",
["content"],
".",
],
["This content, of course, must be ", ["created"], "."],
["Bucky, unit test writer"],
]
],
)