Fix parsing pair marks with line breaks inside
This commit is contained in:
parent
7a847e96d3
commit
6f380bd495
|
@ -47,32 +47,40 @@ def parse_paragraph(text: str) -> SpanContainer:
|
|||
|
||||
def parse_paired_formatting(
|
||||
text: str,
|
||||
can_cite: bool = True,
|
||||
can_bold: bool = True,
|
||||
can_italic: bool = True,
|
||||
in_cite: bool = False,
|
||||
in_bold: bool = False,
|
||||
in_italic: bool = False,
|
||||
) -> Spans:
|
||||
"""
|
||||
Parses citations, bolds, and italics, which can be nested inside each other.
|
||||
A single type cannot nest inside itself, which is controlled by setting the
|
||||
flag parameters to False.
|
||||
"""
|
||||
# Find positions of any paired formatting
|
||||
first_cite = find_pair(text, "[[", "]]") if can_cite else -1
|
||||
first_bold = find_pair(text, "**", "**") if can_bold else -1
|
||||
first_italic = find_pair(text, "//", "//") if can_italic else -1
|
||||
# Load the possible parse handlers into the map
|
||||
next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
|
||||
next_bold = find_pair(text, "**", "**") if not in_bold else -1
|
||||
next_italic = find_pair(text, "//", "//") if not in_italic else -1
|
||||
# Create a map from a formatting mark's distance to its parse handler
|
||||
handlers = {}
|
||||
handlers[first_cite] = lambda: parse_citation(
|
||||
text, can_bold=can_bold, can_italic=can_italic
|
||||
handlers[next_cite] = lambda: parse_citation(
|
||||
text, in_bold=in_bold, in_italic=in_italic
|
||||
)
|
||||
handlers[first_bold] = lambda: parse_bold(
|
||||
text, can_cite=can_cite, can_italic=can_italic
|
||||
handlers[next_bold] = lambda: parse_bold(
|
||||
text, in_cite=in_cite, in_italic=in_italic
|
||||
)
|
||||
handlers[first_italic] = lambda: parse_italic(
|
||||
text, can_cite=can_cite, can_bold=can_bold
|
||||
handlers[next_italic] = lambda: parse_italic(
|
||||
text, in_cite=in_cite, in_bold=in_bold
|
||||
)
|
||||
# If nothing was found, move on to the next parsing step
|
||||
handlers[-1] = lambda: parse_breaks(text)
|
||||
# Choose a handler based on the earliest found result
|
||||
finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
|
||||
# Map the next parsing step at -1. If we're currently inside a formatting
|
||||
# mark pair, skip parsing line breaks, which are not allowed inside paired
|
||||
# marks.
|
||||
if in_cite or in_bold or in_italic:
|
||||
handlers[-1] = lambda: parse_text(text)
|
||||
else:
|
||||
handlers[-1] = lambda: parse_breaks(text)
|
||||
# Choose the handler for the earliest found pair, or the default handler
|
||||
# at -1 if nothing was found.
|
||||
finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
|
||||
first = min(finds) if finds else -1
|
||||
return handlers[first]()
|
||||
|
||||
|
@ -95,8 +103,8 @@ def find_pair(text: str, open_tag: str, close_tag: str) -> int:
|
|||
|
||||
def parse_citation(
|
||||
text: str,
|
||||
can_bold: bool = True,
|
||||
can_italic: bool = True,
|
||||
in_bold: bool = False,
|
||||
in_italic: bool = False,
|
||||
) -> Spans:
|
||||
"""
|
||||
Parses text into a citation span.
|
||||
|
@ -118,7 +126,7 @@ def parse_citation(
|
|||
inner_split = text_inner.split("|", 1)
|
||||
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
|
||||
spans_inner = parse_paired_formatting(
|
||||
text_inner_actual, can_cite=False, can_bold=can_bold, can_italic=can_italic
|
||||
text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
|
||||
)
|
||||
citation = CitationSpan(spans_inner, cite_target)
|
||||
return [*spans_before, citation, *spans_after]
|
||||
|
@ -128,8 +136,8 @@ def parse_citation(
|
|||
|
||||
def parse_bold(
|
||||
text: str,
|
||||
can_cite: bool = True,
|
||||
can_italic: bool = True,
|
||||
in_cite: bool = False,
|
||||
in_italic: bool = False,
|
||||
) -> Spans:
|
||||
"""
|
||||
Parses text into a bold span.
|
||||
|
@ -144,7 +152,7 @@ def parse_bold(
|
|||
# Parse inner text minus bold parsing
|
||||
text_inner = text[bold_open + 2 : bold_close]
|
||||
spans_inner = parse_paired_formatting(
|
||||
text_inner, can_cite=can_cite, can_bold=False, can_italic=can_italic
|
||||
text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
|
||||
)
|
||||
bold = BoldSpan(spans_inner)
|
||||
return [*spans_before, bold, *spans_after]
|
||||
|
@ -154,8 +162,8 @@ def parse_bold(
|
|||
|
||||
def parse_italic(
|
||||
text: str,
|
||||
can_cite: bool = True,
|
||||
can_bold: bool = True,
|
||||
in_cite: bool = False,
|
||||
in_bold: bool = False,
|
||||
) -> Spans:
|
||||
"""
|
||||
Parses text into an italic span.
|
||||
|
@ -170,7 +178,7 @@ def parse_italic(
|
|||
# Parse inner text minus italic parsing
|
||||
text_inner = text[italic_open + 2 : italic_close]
|
||||
spans_inner = parse_paired_formatting(
|
||||
text_inner, can_cite=can_cite, can_bold=can_bold, can_italic=False
|
||||
text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
|
||||
)
|
||||
italic = ItalicSpan(spans_inner)
|
||||
return [*spans_before, italic, *spans_after]
|
||||
|
@ -193,3 +201,12 @@ def parse_breaks(text: str) -> Spans:
|
|||
for i in range(0, 2 * len(splits) - 1)
|
||||
]
|
||||
return spans
|
||||
|
||||
|
||||
def parse_text(text: str) -> Spans:
|
||||
"""
|
||||
Parses text with no remaining parseable marks.
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
return [TextSpan(text)]
|
||||
|
|
|
@ -153,6 +153,32 @@ def test_simple_single_parse_pairs():
|
|||
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
|
||||
|
||||
|
||||
def test_simple_parse_pairs_with_break():
|
||||
"""Test pair marks with breaks"""
|
||||
text: str
|
||||
spans: Spans
|
||||
|
||||
text = r"**glory\\" + "\nhammer**"
|
||||
spans = parse_paired_formatting(text)
|
||||
assert_types(spans, [[BoldSpan, TextSpan]])
|
||||
assert_text(spans, [["glory\\\\\nhammer"]])
|
||||
|
||||
text = r"//glory\\" + "\nhammer//"
|
||||
spans = parse_paired_formatting(text)
|
||||
assert_types(spans, [[ItalicSpan, TextSpan]])
|
||||
assert_text(spans, [["glory\\\\\nhammer"]])
|
||||
|
||||
text = r"**glory\\" + "\n**hammer**"
|
||||
spans = parse_paired_formatting(text)
|
||||
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
|
||||
assert_text(spans, [["glory\\\\\n"], "hammer**"])
|
||||
|
||||
text = r"//glory\\" + "\n//hammer//"
|
||||
spans = parse_paired_formatting(text)
|
||||
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
|
||||
assert_text(spans, [["glory\\\\\n"], "hammer//"])
|
||||
|
||||
|
||||
def test_simple_nested_parse_pairs():
|
||||
"""Test parsing for nesting bold and italic"""
|
||||
text: str
|
||||
|
|
Loading…
Reference in New Issue