Refactor title normalization
This commit is contained in:
parent
3832a18d63
commit
49ed06fcb0
|
@ -0,0 +1,23 @@
|
|||
def normalize_title(title):
|
||||
"""
|
||||
Normalizes strings as titles:
|
||||
- Strips leading and trailing whitespace
|
||||
- Merges internal whitespace into a single space
|
||||
- Capitalizes the first word
|
||||
"""
|
||||
cleaned = re.sub(r'\s+', " ", title.strip())
|
||||
return cleaned[0:1].upper() + cleaned[1:]
|
||||
|
||||
def titlesort(title):
|
||||
"""
|
||||
Strips articles off of titles for alphabetical sorting purposes
|
||||
"""
|
||||
lower = title.lower()
|
||||
if lower.startswith("the "):
|
||||
return lower[4:]
|
||||
elif lower.startswith("an "):
|
||||
return lower[3:]
|
||||
elif lower.startswith("a "):
|
||||
return lower[2:]
|
||||
else:
|
||||
return lower
|
|
@ -6,6 +6,7 @@ can be rendered by a renderer.
|
|||
|
||||
import re
|
||||
|
||||
from amanuensis.parser.helpers import normalize_title
|
||||
|
||||
class Renderable():
|
||||
def render(self, renderer):
|
||||
|
@ -54,8 +55,8 @@ class CitationSpan(SpanContainer):
|
|||
"""A citation to another article"""
|
||||
def __init__(self, spans, cite_target):
|
||||
super().__init__(spans)
|
||||
# Normalize citation target by eliminating most whitespace
|
||||
self.cite_target = re.sub(r'\s+', " ", cite_target.strip())
|
||||
# Normalize citation target
|
||||
self.cite_target = normalize_title(cite_target)
|
||||
def __str__(self):
|
||||
return f"{{{' '.join([str(span) for span in self.spans])}:{self.cite_target}}}"
|
||||
|
||||
|
|
Loading…
Reference in New Issue