Refactor title normalization
This commit is contained in:
parent
3832a18d63
commit
49ed06fcb0
|
@ -0,0 +1,23 @@
|
||||||
|
def normalize_title(title):
|
||||||
|
"""
|
||||||
|
Normalizes strings as titles:
|
||||||
|
- Strips leading and trailing whitespace
|
||||||
|
- Merges internal whitespace into a single space
|
||||||
|
- Capitalizes the first word
|
||||||
|
"""
|
||||||
|
cleaned = re.sub(r'\s+', " ", title.strip())
|
||||||
|
return cleaned[0:1].upper() + cleaned[1:]
|
||||||
|
|
||||||
|
def titlesort(title):
|
||||||
|
"""
|
||||||
|
Strips articles off of titles for alphabetical sorting purposes
|
||||||
|
"""
|
||||||
|
lower = title.lower()
|
||||||
|
if lower.startswith("the "):
|
||||||
|
return lower[4:]
|
||||||
|
elif lower.startswith("an "):
|
||||||
|
return lower[3:]
|
||||||
|
elif lower.startswith("a "):
|
||||||
|
return lower[2:]
|
||||||
|
else:
|
||||||
|
return lower
|
|
@ -6,6 +6,7 @@ can be rendered by a renderer.
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from amanuensis.parser.helpers import normalize_title
|
||||||
|
|
||||||
class Renderable():
|
class Renderable():
|
||||||
def render(self, renderer):
|
def render(self, renderer):
|
||||||
|
@ -54,8 +55,8 @@ class CitationSpan(SpanContainer):
|
||||||
"""A citation to another article"""
|
"""A citation to another article"""
|
||||||
def __init__(self, spans, cite_target):
|
def __init__(self, spans, cite_target):
|
||||||
super().__init__(spans)
|
super().__init__(spans)
|
||||||
# Normalize citation target by eliminating most whitespace
|
# Normalize citation target
|
||||||
self.cite_target = re.sub(r'\s+', " ", cite_target.strip())
|
self.cite_target = normalize_title(cite_target)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f"{{{' '.join([str(span) for span in self.spans])}:{self.cite_target}}}"
|
return f"{{{' '.join([str(span) for span in self.spans])}:{self.cite_target}}}"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue