Fix parsing pair marks with line breaks inside
This commit is contained in:
parent
7a847e96d3
commit
6f380bd495
|
@ -47,32 +47,40 @@ def parse_paragraph(text: str) -> SpanContainer:
|
||||||
|
|
||||||
def parse_paired_formatting(
|
def parse_paired_formatting(
|
||||||
text: str,
|
text: str,
|
||||||
can_cite: bool = True,
|
in_cite: bool = False,
|
||||||
can_bold: bool = True,
|
in_bold: bool = False,
|
||||||
can_italic: bool = True,
|
in_italic: bool = False,
|
||||||
) -> Spans:
|
) -> Spans:
|
||||||
"""
|
"""
|
||||||
Parses citations, bolds, and italics, which can be nested inside each other.
|
Parses citations, bolds, and italics, which can be nested inside each other.
|
||||||
|
A single type cannot nest inside itself, which is controlled by setting the
|
||||||
|
flag parameters to False.
|
||||||
"""
|
"""
|
||||||
# Find positions of any paired formatting
|
# Find positions of any paired formatting
|
||||||
first_cite = find_pair(text, "[[", "]]") if can_cite else -1
|
next_cite = find_pair(text, "[[", "]]") if not in_cite else -1
|
||||||
first_bold = find_pair(text, "**", "**") if can_bold else -1
|
next_bold = find_pair(text, "**", "**") if not in_bold else -1
|
||||||
first_italic = find_pair(text, "//", "//") if can_italic else -1
|
next_italic = find_pair(text, "//", "//") if not in_italic else -1
|
||||||
# Load the possible parse handlers into the map
|
# Create a map from a formatting mark's distance to its parse handler
|
||||||
handlers = {}
|
handlers = {}
|
||||||
handlers[first_cite] = lambda: parse_citation(
|
handlers[next_cite] = lambda: parse_citation(
|
||||||
text, can_bold=can_bold, can_italic=can_italic
|
text, in_bold=in_bold, in_italic=in_italic
|
||||||
)
|
)
|
||||||
handlers[first_bold] = lambda: parse_bold(
|
handlers[next_bold] = lambda: parse_bold(
|
||||||
text, can_cite=can_cite, can_italic=can_italic
|
text, in_cite=in_cite, in_italic=in_italic
|
||||||
)
|
)
|
||||||
handlers[first_italic] = lambda: parse_italic(
|
handlers[next_italic] = lambda: parse_italic(
|
||||||
text, can_cite=can_cite, can_bold=can_bold
|
text, in_cite=in_cite, in_bold=in_bold
|
||||||
)
|
)
|
||||||
# If nothing was found, move on to the next parsing step
|
# Map the next parsing step at -1. If we're currently inside a formatting
|
||||||
handlers[-1] = lambda: parse_breaks(text)
|
# mark pair, skip parsing line breaks, which are not allowed inside paired
|
||||||
# Choose a handler based on the earliest found result
|
# marks.
|
||||||
finds = [i for i in (first_cite, first_bold, first_italic) if i > -1]
|
if in_cite or in_bold or in_italic:
|
||||||
|
handlers[-1] = lambda: parse_text(text)
|
||||||
|
else:
|
||||||
|
handlers[-1] = lambda: parse_breaks(text)
|
||||||
|
# Choose the handler for the earliest found pair, or the default handler
|
||||||
|
# at -1 if nothing was found.
|
||||||
|
finds = [i for i in (next_cite, next_bold, next_italic) if i > -1]
|
||||||
first = min(finds) if finds else -1
|
first = min(finds) if finds else -1
|
||||||
return handlers[first]()
|
return handlers[first]()
|
||||||
|
|
||||||
|
@ -95,8 +103,8 @@ def find_pair(text: str, open_tag: str, close_tag: str) -> int:
|
||||||
|
|
||||||
def parse_citation(
|
def parse_citation(
|
||||||
text: str,
|
text: str,
|
||||||
can_bold: bool = True,
|
in_bold: bool = False,
|
||||||
can_italic: bool = True,
|
in_italic: bool = False,
|
||||||
) -> Spans:
|
) -> Spans:
|
||||||
"""
|
"""
|
||||||
Parses text into a citation span.
|
Parses text into a citation span.
|
||||||
|
@ -118,7 +126,7 @@ def parse_citation(
|
||||||
inner_split = text_inner.split("|", 1)
|
inner_split = text_inner.split("|", 1)
|
||||||
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
|
text_inner_actual, cite_target = inner_split[0], inner_split[-1]
|
||||||
spans_inner = parse_paired_formatting(
|
spans_inner = parse_paired_formatting(
|
||||||
text_inner_actual, can_cite=False, can_bold=can_bold, can_italic=can_italic
|
text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic
|
||||||
)
|
)
|
||||||
citation = CitationSpan(spans_inner, cite_target)
|
citation = CitationSpan(spans_inner, cite_target)
|
||||||
return [*spans_before, citation, *spans_after]
|
return [*spans_before, citation, *spans_after]
|
||||||
|
@ -128,8 +136,8 @@ def parse_citation(
|
||||||
|
|
||||||
def parse_bold(
|
def parse_bold(
|
||||||
text: str,
|
text: str,
|
||||||
can_cite: bool = True,
|
in_cite: bool = False,
|
||||||
can_italic: bool = True,
|
in_italic: bool = False,
|
||||||
) -> Spans:
|
) -> Spans:
|
||||||
"""
|
"""
|
||||||
Parses text into a bold span.
|
Parses text into a bold span.
|
||||||
|
@ -144,7 +152,7 @@ def parse_bold(
|
||||||
# Parse inner text minus bold parsing
|
# Parse inner text minus bold parsing
|
||||||
text_inner = text[bold_open + 2 : bold_close]
|
text_inner = text[bold_open + 2 : bold_close]
|
||||||
spans_inner = parse_paired_formatting(
|
spans_inner = parse_paired_formatting(
|
||||||
text_inner, can_cite=can_cite, can_bold=False, can_italic=can_italic
|
text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic
|
||||||
)
|
)
|
||||||
bold = BoldSpan(spans_inner)
|
bold = BoldSpan(spans_inner)
|
||||||
return [*spans_before, bold, *spans_after]
|
return [*spans_before, bold, *spans_after]
|
||||||
|
@ -154,8 +162,8 @@ def parse_bold(
|
||||||
|
|
||||||
def parse_italic(
|
def parse_italic(
|
||||||
text: str,
|
text: str,
|
||||||
can_cite: bool = True,
|
in_cite: bool = False,
|
||||||
can_bold: bool = True,
|
in_bold: bool = False,
|
||||||
) -> Spans:
|
) -> Spans:
|
||||||
"""
|
"""
|
||||||
Parses text into an italic span.
|
Parses text into an italic span.
|
||||||
|
@ -170,7 +178,7 @@ def parse_italic(
|
||||||
# Parse inner text minus italic parsing
|
# Parse inner text minus italic parsing
|
||||||
text_inner = text[italic_open + 2 : italic_close]
|
text_inner = text[italic_open + 2 : italic_close]
|
||||||
spans_inner = parse_paired_formatting(
|
spans_inner = parse_paired_formatting(
|
||||||
text_inner, can_cite=can_cite, can_bold=can_bold, can_italic=False
|
text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True
|
||||||
)
|
)
|
||||||
italic = ItalicSpan(spans_inner)
|
italic = ItalicSpan(spans_inner)
|
||||||
return [*spans_before, italic, *spans_after]
|
return [*spans_before, italic, *spans_after]
|
||||||
|
@ -193,3 +201,12 @@ def parse_breaks(text: str) -> Spans:
|
||||||
for i in range(0, 2 * len(splits) - 1)
|
for i in range(0, 2 * len(splits) - 1)
|
||||||
]
|
]
|
||||||
return spans
|
return spans
|
||||||
|
|
||||||
|
|
||||||
|
def parse_text(text: str) -> Spans:
|
||||||
|
"""
|
||||||
|
Parses text with no remaining parseable marks.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
return [TextSpan(text)]
|
||||||
|
|
|
@ -153,6 +153,32 @@ def test_simple_single_parse_pairs():
|
||||||
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
|
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple_parse_pairs_with_break():
|
||||||
|
"""Test pair marks with breaks"""
|
||||||
|
text: str
|
||||||
|
spans: Spans
|
||||||
|
|
||||||
|
text = r"**glory\\" + "\nhammer**"
|
||||||
|
spans = parse_paired_formatting(text)
|
||||||
|
assert_types(spans, [[BoldSpan, TextSpan]])
|
||||||
|
assert_text(spans, [["glory\\\\\nhammer"]])
|
||||||
|
|
||||||
|
text = r"//glory\\" + "\nhammer//"
|
||||||
|
spans = parse_paired_formatting(text)
|
||||||
|
assert_types(spans, [[ItalicSpan, TextSpan]])
|
||||||
|
assert_text(spans, [["glory\\\\\nhammer"]])
|
||||||
|
|
||||||
|
text = r"**glory\\" + "\n**hammer**"
|
||||||
|
spans = parse_paired_formatting(text)
|
||||||
|
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
|
||||||
|
assert_text(spans, [["glory\\\\\n"], "hammer**"])
|
||||||
|
|
||||||
|
text = r"//glory\\" + "\n//hammer//"
|
||||||
|
spans = parse_paired_formatting(text)
|
||||||
|
assert_types(spans, [[ItalicSpan, TextSpan], TextSpan])
|
||||||
|
assert_text(spans, [["glory\\\\\n"], "hammer//"])
|
||||||
|
|
||||||
|
|
||||||
def test_simple_nested_parse_pairs():
|
def test_simple_nested_parse_pairs():
|
||||||
"""Test parsing for nesting bold and italic"""
|
"""Test parsing for nesting bold and italic"""
|
||||||
text: str
|
text: str
|
||||||
|
|
Loading…
Reference in New Issue