Move utility functions to utils.py
This commit is contained in:
parent
1e71ff0061
commit
deeaeafb39
|
@ -8,31 +8,6 @@ import re # For parsing lex content
|
||||||
import io # For writing pages out as UTF-8
|
import io # For writing pages out as UTF-8
|
||||||
import networkx # For pagerank analytics
|
import networkx # For pagerank analytics
|
||||||
from collections import defaultdict # For rank inversion in statistics
|
from collections import defaultdict # For rank inversion in statistics
|
||||||
from urllib import parse
|
|
||||||
|
|
||||||
# Short utility functions for handling titles
|
|
||||||
|
|
||||||
def titlecase(s):
|
|
||||||
"""Enforces capitalization of titles."""
|
|
||||||
s = s.strip()
|
|
||||||
return s[:1].capitalize() + s[1:]
|
|
||||||
|
|
||||||
def titleescape(s):
|
|
||||||
"""Makes an article title filename-safe."""
|
|
||||||
s = s.strip()
|
|
||||||
s = re.sub(r"\s+", '_', s) # Replace whitespace with _
|
|
||||||
s = parse.quote(s) # Encode all other characters
|
|
||||||
s = re.sub(r"%", "", s) # Strip encoding %s
|
|
||||||
if len(s) > 64: # If the result is unreasonably long,
|
|
||||||
s = hex(abs(hash(s)))[2:] # Replace it with a hex hash
|
|
||||||
return s
|
|
||||||
|
|
||||||
def titlestrip(s):
|
|
||||||
"""Strips certain prefixes for title sorting."""
|
|
||||||
if s.startswith("The "): return s[4:]
|
|
||||||
if s.startswith("An "): return s[3:]
|
|
||||||
if s.startswith("A "): return s[2:]
|
|
||||||
return s
|
|
||||||
|
|
||||||
# Main article class
|
# Main article class
|
||||||
|
|
||||||
|
@ -227,41 +202,6 @@ def populate(lexicon_articles):
|
||||||
article_by_title[target].citedby.add(article.title)
|
article_by_title[target].citedby.add(article.title)
|
||||||
return list(article_by_title.values())
|
return list(article_by_title.values())
|
||||||
|
|
||||||
def load_resource(filename, cache={}):
|
|
||||||
"""Loads files from the resources directory with caching."""
|
|
||||||
if filename not in cache:
|
|
||||||
cache[filename] = open("resources/" + filename, "r", encoding="utf8").read()
|
|
||||||
return cache[filename]
|
|
||||||
|
|
||||||
def load_config():
|
|
||||||
"""Loads values from the config file."""
|
|
||||||
config = {}
|
|
||||||
with open("lexicon.cfg", "r", encoding="utf8") as f:
|
|
||||||
line = f.readline()
|
|
||||||
while line:
|
|
||||||
# Skim lines until a value definition begins
|
|
||||||
conf_match = re.match(">>>([^>]+)>>>\s+", line)
|
|
||||||
if not conf_match:
|
|
||||||
line = f.readline()
|
|
||||||
continue
|
|
||||||
# Accumulate the conf value until the value ends
|
|
||||||
conf = conf_match.group(1)
|
|
||||||
conf_value = ""
|
|
||||||
line = f.readline()
|
|
||||||
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
|
|
||||||
while line and not conf_match:
|
|
||||||
conf_value += line
|
|
||||||
line = f.readline()
|
|
||||||
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
|
|
||||||
if not line:
|
|
||||||
raise SystemExit("Reached EOF while reading config value {}".format(conf))
|
|
||||||
config[conf] = conf_value.strip()
|
|
||||||
# Check that all necessary values were configured
|
|
||||||
for config_value in ['LEXICON_TITLE', 'PROMPT', 'SESSION_PAGE', "INDEX_LIST"]:
|
|
||||||
if config_value not in config:
|
|
||||||
raise SystemExit("Error: {} not set in lexipython.cfg".format(config_value))
|
|
||||||
return config
|
|
||||||
|
|
||||||
# Build functions
|
# Build functions
|
||||||
|
|
||||||
def build_contents_page(articles, config):
|
def build_contents_page(articles, config):
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
import os
|
||||||
|
from urllib import parse
|
||||||
|
|
||||||
|
# Short utility functions for handling titles
|
||||||
|
|
||||||
|
def titlecase(s):
|
||||||
|
"""
|
||||||
|
Capitalizes the first word.
|
||||||
|
"""
|
||||||
|
s = s.strip()
|
||||||
|
return s[:1].capitalize() + s[1:]
|
||||||
|
|
||||||
|
def titleescape(s):
|
||||||
|
"""
|
||||||
|
Makes an article title filename-safe.
|
||||||
|
"""
|
||||||
|
s = s.strip()
|
||||||
|
s = re.sub(r"\s+", '_', s) # Replace whitespace with _
|
||||||
|
s = parse.quote(s) # Encode all other characters
|
||||||
|
s = re.sub(r"%", "", s) # Strip encoding %s
|
||||||
|
if len(s) > 64: # If the result is unreasonably long,
|
||||||
|
s = hex(abs(hash(s)))[2:] # Replace it with a hex hash
|
||||||
|
return s
|
||||||
|
|
||||||
|
def titlestrip(s):
|
||||||
|
"""
|
||||||
|
Strips articles for title sorting.
|
||||||
|
"""
|
||||||
|
if s.startswith("The "): return s[4:]
|
||||||
|
if s.startswith("An "): return s[3:]
|
||||||
|
if s.startswith("A "): return s[2:]
|
||||||
|
return s
|
||||||
|
|
||||||
|
# Load functions
|
||||||
|
|
||||||
|
def load_resource(filename, cache={}):
|
||||||
|
"""Loads files from the resources directory with caching."""
|
||||||
|
if filename not in cache:
|
||||||
|
with open(os.path.join("src", "resources", filename), "r", encoding="utf8") as f:
|
||||||
|
cache[filename] = f.read()
|
||||||
|
return cache[filename]
|
||||||
|
|
||||||
|
def load_config(name):
|
||||||
|
"""
|
||||||
|
Loads values from a Lexicon's config file.
|
||||||
|
"""
|
||||||
|
config = {}
|
||||||
|
with open(os.path.join("lexicon", name, "lexicon.cfg"), "r", encoding="utf8") as f:
|
||||||
|
line = f.readline()
|
||||||
|
while line:
|
||||||
|
# Skim lines until a value definition begins
|
||||||
|
conf_match = re.match(">>>([^>]+)>>>\s+", line)
|
||||||
|
if not conf_match:
|
||||||
|
line = f.readline()
|
||||||
|
continue
|
||||||
|
# Accumulate the conf value until the value ends
|
||||||
|
conf = conf_match.group(1)
|
||||||
|
conf_value = ""
|
||||||
|
line = f.readline()
|
||||||
|
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
|
||||||
|
while line and not conf_match:
|
||||||
|
conf_value += line
|
||||||
|
line = f.readline()
|
||||||
|
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
|
||||||
|
if not line:
|
||||||
|
# TODO Not this
|
||||||
|
raise SystemExit("Reached EOF while reading config value {}".format(conf))
|
||||||
|
config[conf] = conf_value.strip()
|
||||||
|
# Check that all necessary values were configured
|
||||||
|
for config_value in ['LEXICON_TITLE', 'PROMPT', 'SESSION_PAGE', "INDEX_LIST"]:
|
||||||
|
if config_value not in config:
|
||||||
|
# TODO Not this either
|
||||||
|
raise SystemExit("Error: {} not set in lexipython.cfg".format(config_value))
|
||||||
|
return config
|
Loading…
Reference in New Issue