Incorporate parser into new code #12

Merged
Jaculabilis merged 8 commits from tvb/parser into develop 2021-06-12 17:28:19 +00:00
2 changed files with 123 additions and 5 deletions
Showing only changes of commit 2c294f7f12 - Show all commits

View File

@ -121,8 +121,9 @@ def parse_citation(
spans_after = parse_paired_formatting(text[cite_close + 2 :]) spans_after = parse_paired_formatting(text[cite_close + 2 :])
# Parse inner text and skip parsing for this format pair # Parse inner text and skip parsing for this format pair
text_inner = text[cite_open + 2 : cite_close] text_inner = text[cite_open + 2 : cite_close]
# For citations specifically, we may need to split off a citation # For citations specifically, try to split off a citation target.
# target from the alias text # If there's no citation target to split, use the same text as the
# citation text and the target.
inner_split = text_inner.split("|", 1) inner_split = text_inner.split("|", 1)
text_inner_actual, cite_target = inner_split[0], inner_split[-1] text_inner_actual, cite_target = inner_split[0], inner_split[-1]
spans_inner = parse_paired_formatting( spans_inner = parse_paired_formatting(

View File

@ -107,7 +107,7 @@ def test_parse_breaks():
assert_text(spans, [text]) assert_text(spans, [text])
def test_simple_single_parse_pairs(): def test_parse_pairs_single():
"""Test parsing for bold and italic marks""" """Test parsing for bold and italic marks"""
text: str text: str
spans: Spans spans: Spans
@ -153,7 +153,7 @@ def test_simple_single_parse_pairs():
assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"]) assert_text(spans, ["In the ", ["beginning"], " was ", ["the"], " Word"])
def test_simple_parse_pairs_with_break(): def test_parse_pairs_break():
"""Test pair marks with breaks""" """Test pair marks with breaks"""
text: str text: str
spans: Spans spans: Spans
@ -179,7 +179,7 @@ def test_simple_parse_pairs_with_break():
assert_text(spans, [["glory\\\\\n"], "hammer//"]) assert_text(spans, [["glory\\\\\n"], "hammer//"])
def test_simple_nested_parse_pairs(): def test_parse_pairs_nested():
"""Test parsing for nesting bold and italic""" """Test parsing for nesting bold and italic"""
text: str text: str
spans: Spans spans: Spans
@ -200,3 +200,120 @@ def test_simple_nested_parse_pairs():
spans = parse_paired_formatting(text) spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan]) assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["Hello//world"], "//"]) assert_text(spans, [["Hello//world"], "//"])
def test_normalize_title():
"""Test the title normalization used by the citation parser"""
nt = normalize_title
assert nt("hello") == "Hello"
assert nt(" world ") == "World"
assert nt("Waiting for Godot") == "Waiting for Godot"
assert nt("lowercase letters") == "Lowercase letters"
def test_parse_citation_single():
"""Test parsing citations, which have internal formatting"""
text: str
spans: Spans
# Simple test cases
text = "[[hello]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Hello"
text = "[[hello|world]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "World"
text = "[[hello||world]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "|world"
text = "[[ hello | world ]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [[" hello "]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "World"
text = "[[faith|hope|love]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["faith"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Hope|love"
text = "[[ [[|]] ]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
assert_text(spans, [[" [["], " ]]"])
citation: CitationSpan = spans[0]
assert citation.cite_target == ""
def test_parse_citation_break():
"""Test citations with breaks"""
text: str
spans: Spans
text = "[[hello\\\\\nworld]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["hello\\\\\nworld"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Hello\\\\ world"
text = "[[one|two\\\\\nthree]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["one"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "Two\\\\ three"
def test_parse_citation_nested():
"""Test nesting with citations"""
text: str
spans: Spans
text = "[[**hello world**]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, [BoldSpan, TextSpan]]])
assert_text(spans, [[["hello world"]]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "**hello world**"
text = "[[**hello|world**]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan]])
assert_text(spans, [["**hello"]])
citation: CitationSpan = spans[0]
assert citation.cite_target == "World**"
text = "**[[hello world]]**"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, [CitationSpan, TextSpan]]])
assert_text(spans, [[["hello world"]]])
citation: CitationSpan = spans[0].spans[0]
assert citation.cite_target == "Hello world"
text = "**[[hello world**]]"
spans = parse_paired_formatting(text)
assert_types(spans, [[BoldSpan, TextSpan], TextSpan])
assert_text(spans, [["[[hello world"], "]]"])
text = "[[**hello world]]**"
spans = parse_paired_formatting(text)
assert_types(spans, [[CitationSpan, TextSpan], TextSpan])
assert_text(spans, [["**hello world"], "**"])
citation: CitationSpan = spans[0]
assert citation.cite_target == "**hello world"