From a9a025094a2254bbdbb413f583f0d6e2eede7d4d Mon Sep 17 00:00:00 2001 From: Tim Van Baak Date: Sat, 11 Apr 2020 11:45:08 -0700 Subject: [PATCH] Normalize parsed cite targets --- amanuensis/parser/tokenizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/amanuensis/parser/tokenizer.py b/amanuensis/parser/tokenizer.py index 1be4d9a..ca8f6b3 100644 --- a/amanuensis/parser/tokenizer.py +++ b/amanuensis/parser/tokenizer.py @@ -54,7 +54,8 @@ class CitationSpan(SpanContainer): """A citation to another article""" def __init__(self, spans, cite_target): super().__init__(spans) - self.cite_target = cite_target + # Normalize citation target by eliminating most whitespace + self.cite_target = re.sub(r'\s+', " ", cite_target.strip()) def __str__(self): return f"{{{' '.join([str(span) for span in self.spans])}:{self.cite_target}}}"