Fix parsing pair marks with line breaks inside

This commit is contained in:
Tim Van Baak 2021-06-09 20:14:32 -07:00
parent 7a847e96d3
commit 6f380bd495
2 changed files with 69 additions and 26 deletions

View File

@ -47,32 +47,40 @@ def parse_paragraph(text: str) -> SpanContainer:
def parse_paired_formatting(
text: str,
can_cite: bool = True,
can_bold: bool = True,
can_italic: bool = True,
in_cite: bool = False,
in_bold: bool = False,
in_italic: bool = False,
) -> Spans:
"""
Parses citations, bolds, and italics, which can be nested inside each other.
A single type cannot nest inside itself, which is controlled by setting the
flag parameters to False.
"""
# Find positions of any paired formatting
first_cite = find_pair(text, "[[", "]]") if can_cite else -1
first_bold = find_pair(text, "**", "**") if can_bold else -1
first_italic = find_pair(text, "//", "//") if can_italic else -1
# Load the possible parse handlers into the map
next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
next_bold = find_pair(text, "**", "**") if not in_bold else -1
next_italic = find_pair(text, "//", "//") if not in_italic else -1
# Create a map from a formatting mark's distance to its parse handler
handlers = {}
handlers[first_cite] = lambda: parse_citation(
text, can_bold=can_bold, can_italic=can_italic
handlers[next_cite] = lambda: parse_citation(
text, in_bold=in_bold, in_italic=in_italic
)
handlers[first_bold] = lambda: parse_bold(
text, can_cite=can_cite, can_italic=can_italic
handlers[next_bold] = lambda: parse_bold(
text, in_cite=in_cite, in_italic=in_italic
)
handlers[first_italic] = lambda: parse_italic(
text, can_cite=can_cite, can_bold=can_bold
handlers[next_italic] = lambda: parse_italic(
text, in_cite=in_cite, in_bold=in_bold
)
# If nothing was found, move on to the next parsing step
handlers[-1] = lambda: parse_breaks(text)
# Choose a handler based on the earliest found result
finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
# Map the next parsing step at -1. If we're currently inside a formatting
# mark pair, skip parsing line breaks, which are not allowed inside paired
# marks.
if in_cite or in_bold or in_italic:
handlers[-1] = lambda: parse_text(text)
else:
handlers[-1] = lambda: parse_breaks(text)
# Choose the handler for the earliest found pair, or the default handler
# at -1 if nothing was found.
finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
first = min(finds) if finds else -1
return handlers[first]()
@ -95,8 +103,8 @@ def find_pair(text: str, open_tag: str, close_tag: str) -> int:
def parse_citation(
text: str,
can_bold: bool = True,
can_italic: bool = True,
in_bold: bool = False,
in_italic: bool = False,
) -> Spans:
"""
Parses text into a citation span.
@ -118,7 +126,7 @@ def parse_citation(
inner_split = text_inner.split("|", 1)
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
spans_inner = parse_paired_formatting(
text_inner_actual, can_cite=False, can_bold=can_bold, can_italic=can_italic
text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
)
citation = CitationSpan(spans_inner, cite_target)
return [*spans_before, citation, *spans_after]
@ -128,8 +136,8 @@ def parse_citation(
def parse_bold(
text: str,
can_cite: bool = True,
can_italic: bool = True,
in_cite: bool = False,
in_italic: bool = False,
) -> Spans:
"""
Parses text into a bold span.
@ -144,7 +152,7 @@ def parse_bold(
# Parse inner text minus bold parsing
text_inner = text[bold_open + 2 : bold_close]
spans_inner = parse_paired_formatting(
text_inner, can_cite=can_cite, can_bold=False, can_italic=can_italic
text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
)
bold = BoldSpan(spans_inner)
return [*spans_before, bold, *spans_after]
@ -154,8 +162,8 @@ def parse_bold(
def parse_italic(
text: str,
can_cite: bool = True,
can_bold: bool = True,
in_cite: bool = False,
in_bold: bool = False,
) -> Spans:
"""
Parses text into an italic span.
@ -170,7 +178,7 @@ def parse_italic(
# Parse inner text minus italic parsing
text_inner = text[italic_open + 2 : italic_close]
spans_inner = parse_paired_formatting(
text_inner, can_cite=can_cite, can_bold=can_bold, can_italic=False
text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
)
italic = ItalicSpan(spans_inner)
return [*spans_before, italic, *spans_after]
@ -193,3 +201,12 @@ def parse_breaks(text: str) -> Spans:
for i in range(0, 2 * len(splits) - 1)
]
return spans
def parse_text(text: str) -> Spans:
"""
Parses text with no remaining parseable marks.
"""
if not text:
return []
return [TextSpan(text)]

View File

@ -153,6 +153,32 @@ def test_simple_single_parse_pairs():
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
def test_simple_parse_pairs_with_break():
"""Test pair marks with breaks"""
text: str
spans: Spans
text = r"**glory\\" + "\nhammer**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan]])
assert_text(spans, [["glory\\\\\nhammer"]])
text = r"//glory\\" + "\nhammer//"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, TextSpan]])
assert_text(spans, [["glory\\\\\nhammer"]])
text = r"**glory\\" + "\n**hammer**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["glory\\\\\n"], "hammer**"])
text = r"//glory\\" + "\n//hammer//"
spans = parse_paired_formatting(text)
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
assert_text(spans, [["glory\\\\\n"], "hammer//"])
def test_simple_nested_parse_pairs():
"""Test parsing for nesting bold and italic"""
text: str