From 6f380bd49565907536088be1963b5a60bee9194b Mon Sep 17 00:00:00 2001 From: Tim Van Baak Date: Wed, 9 Jun 2021 20:14:32 -0700 Subject: [PATCH] Fix parsing pair marks with line breaks inside --- amanuensis/parser/parsing.py | 69 ++++++++++++++++++++++-------------- tests/test_parser.py | 26 ++++++++++++++ 2 files changed, 69 insertions(+), 26 deletions(-) diff --git a/amanuensis/parser/parsing.py b/amanuensis/parser/parsing.py index a16afae..e2d7b1c 100644 --- a/amanuensis/parser/parsing.py +++ b/amanuensis/parser/parsing.py @@ -47,32 +47,40 @@ def parse_paragraph(text: str) -> SpanContainer: def parse_paired_formatting( text: str, - can_cite: bool = True, - can_bold: bool = True, - can_italic: bool = True, + in_cite: bool = False, + in_bold: bool = False, + in_italic: bool = False, ) -> Spans: """ Parses citations, bolds, and italics, which can be nested inside each other. + A single type cannot nest inside itself, which is controlled by setting the + flag parameters to False. """ # Find positions of any paired formatting - first_cite = find_pair(text, "[[", "]]") if can_cite else -1 - first_bold = find_pair(text, "**", "**") if can_bold else -1 - first_italic = find_pair(text, "//", "//") if can_italic else -1 - # Load the possible parse handlers into the map + next_cite = find_pair(text, "[[", "]]") if not in_cite else -1 + next_bold = find_pair(text, "**", "**") if not in_bold else -1 + next_italic = find_pair(text, "//", "//") if not in_italic else -1 + # Create a map from a formatting mark's distance to its parse handler handlers = {} - handlers[first_cite] = lambda: parse_citation( - text, can_bold=can_bold, can_italic=can_italic + handlers[next_cite] = lambda: parse_citation( + text, in_bold=in_bold, in_italic=in_italic ) - handlers[first_bold] = lambda: parse_bold( - text, can_cite=can_cite, can_italic=can_italic + handlers[next_bold] = lambda: parse_bold( + text, in_cite=in_cite, in_italic=in_italic ) - handlers[first_italic] = lambda: parse_italic( - text, can_cite=can_cite, can_bold=can_bold + handlers[next_italic] = lambda: parse_italic( + text, in_cite=in_cite, in_bold=in_bold ) - # If nothing was found, move on to the next parsing step - handlers[-1] = lambda: parse_breaks(text) - # Choose a handler based on the earliest found result - finds = [i for i in (first_cite, first_bold, first_italic) if i > -1] + # Map the next parsing step at -1. If we're currently inside a formatting + # mark pair, skip parsing line breaks, which are not allowed inside paired + # marks. + if in_cite or in_bold or in_italic: + handlers[-1] = lambda: parse_text(text) + else: + handlers[-1] = lambda: parse_breaks(text) + # Choose the handler for the earliest found pair, or the default handler + # at -1 if nothing was found. + finds = [i for i in (next_cite, next_bold, next_italic) if i > -1] first = min(finds) if finds else -1 return handlers[first]() @@ -95,8 +103,8 @@ def find_pair(text: str, open_tag: str, close_tag: str) -> int: def parse_citation( text: str, - can_bold: bool = True, - can_italic: bool = True, + in_bold: bool = False, + in_italic: bool = False, ) -> Spans: """ Parses text into a citation span. @@ -118,7 +126,7 @@ def parse_citation( inner_split = text_inner.split("|", 1) text_inner_actual, cite_target = inner_split[0], inner_split[-1] spans_inner = parse_paired_formatting( - text_inner_actual, can_cite=False, can_bold=can_bold, can_italic=can_italic + text_inner_actual, in_cite=True, in_bold=in_bold, in_italic=in_italic ) citation = CitationSpan(spans_inner, cite_target) return [*spans_before, citation, *spans_after] @@ -128,8 +136,8 @@ def parse_citation( def parse_bold( text: str, - can_cite: bool = True, - can_italic: bool = True, + in_cite: bool = False, + in_italic: bool = False, ) -> Spans: """ Parses text into a bold span. @@ -144,7 +152,7 @@ def parse_bold( # Parse inner text minus bold parsing text_inner = text[bold_open + 2 : bold_close] spans_inner = parse_paired_formatting( - text_inner, can_cite=can_cite, can_bold=False, can_italic=can_italic + text_inner, in_cite=in_cite, in_bold=True, in_italic=in_italic ) bold = BoldSpan(spans_inner) return [*spans_before, bold, *spans_after] @@ -154,8 +162,8 @@ def parse_bold( def parse_italic( text: str, - can_cite: bool = True, - can_bold: bool = True, + in_cite: bool = False, + in_bold: bool = False, ) -> Spans: """ Parses text into an italic span. @@ -170,7 +178,7 @@ def parse_italic( # Parse inner text minus italic parsing text_inner = text[italic_open + 2 : italic_close] spans_inner = parse_paired_formatting( - text_inner, can_cite=can_cite, can_bold=can_bold, can_italic=False + text_inner, in_cite=in_cite, in_bold=in_bold, in_italic=True ) italic = ItalicSpan(spans_inner) return [*spans_before, italic, *spans_after] @@ -193,3 +201,12 @@ def parse_breaks(text: str) -> Spans: for i in range(0, 2 * len(splits) - 1) ] return spans + + +def parse_text(text: str) -> Spans: + """ + Parses text with no remaining parseable marks. + """ + if not text: + return [] + return [TextSpan(text)] diff --git a/tests/test_parser.py b/tests/test_parser.py index 5a27765..ec9ade8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -153,6 +153,32 @@ def test_simple_single_parse_pairs(): assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"]) +def test_simple_parse_pairs_with_break(): + """Test pair marks with breaks""" + text: str + spans: Spans + + text = r"**glory\\" + "\nhammer**" + spans = parse_paired_formatting(text) + assert_types(spans, [[BoldSpan, TextSpan]]) + assert_text(spans, [["glory\\\\\nhammer"]]) + + text = r"//glory\\" + "\nhammer//" + spans = parse_paired_formatting(text) + assert_types(spans, [[ItalicSpan, TextSpan]]) + assert_text(spans, [["glory\\\\\nhammer"]]) + + text = r"**glory\\" + "\n**hammer**" + spans = parse_paired_formatting(text) + assert_types(spans, [[BoldSpan, TextSpan], TextSpan]) + assert_text(spans, [["glory\\\\\n"], "hammer**"]) + + text = r"//glory\\" + "\n//hammer//" + spans = parse_paired_formatting(text) + assert_types(spans, [[ItalicSpan, TextSpan], TextSpan]) + assert_text(spans, [["glory\\\\\n"], "hammer//"]) + + def test_simple_nested_parse_pairs(): """Test parsing for nesting bold and italic""" text: str