Move utility functions to utils.py

This commit is contained in:
Jaculabilis 2018-05-29 14:27:44 -07:00
parent 1e71ff0061
commit deeaeafb39
2 changed files with 74 additions and 60 deletions

View File

@ -8,31 +8,6 @@ import re # For parsing lex content
import io # For writing pages out as UTF-8 import io # For writing pages out as UTF-8
import networkx # For pagerank analytics import networkx # For pagerank analytics
from collections import defaultdict # For rank inversion in statistics from collections import defaultdict # For rank inversion in statistics
from urllib import parse
# Short utility functions for handling titles
def titlecase(s):
"""Enforces capitalization of titles."""
s = s.strip()
return s[:1].capitalize() + s[1:]
def titleescape(s):
"""Makes an article title filename-safe."""
s = s.strip()
s = re.sub(r"\s+", '_', s) # Replace whitespace with _
s = parse.quote(s) # Encode all other characters
s = re.sub(r"%", "", s) # Strip encoding %s
if len(s) > 64: # If the result is unreasonably long,
s = hex(abs(hash(s)))[2:] # Replace it with a hex hash
return s
def titlestrip(s):
"""Strips certain prefixes for title sorting."""
if s.startswith("The "): return s[4:]
if s.startswith("An "): return s[3:]
if s.startswith("A "): return s[2:]
return s
# Main article class # Main article class
@ -227,41 +202,6 @@ def populate(lexicon_articles):
article_by_title[target].citedby.add(article.title) article_by_title[target].citedby.add(article.title)
return list(article_by_title.values()) return list(article_by_title.values())
def load_resource(filename, cache={}):
"""Loads files from the resources directory with caching."""
if filename not in cache:
cache[filename] = open("resources/" + filename, "r", encoding="utf8").read()
return cache[filename]
def load_config():
"""Loads values from the config file."""
config = {}
with open("lexicon.cfg", "r", encoding="utf8") as f:
line = f.readline()
while line:
# Skim lines until a value definition begins
conf_match = re.match(">>>([^>]+)>>>\s+", line)
if not conf_match:
line = f.readline()
continue
# Accumulate the conf value until the value ends
conf = conf_match.group(1)
conf_value = ""
line = f.readline()
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
while line and not conf_match:
conf_value += line
line = f.readline()
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
if not line:
raise SystemExit("Reached EOF while reading config value {}".format(conf))
config[conf] = conf_value.strip()
# Check that all necessary values were configured
for config_value in ['LEXICON_TITLE', 'PROMPT', 'SESSION_PAGE', "INDEX_LIST"]:
if config_value not in config:
raise SystemExit("Error: {} not set in lexipython.cfg".format(config_value))
return config
# Build functions # Build functions
def build_contents_page(articles, config): def build_contents_page(articles, config):

74
src/utils.py Normal file
View File

@ -0,0 +1,74 @@
import os
from urllib import parse
# Short utility functions for handling titles
def titlecase(s):
"""
Capitalizes the first word.
"""
s = s.strip()
return s[:1].capitalize() + s[1:]
def titleescape(s):
"""
Makes an article title filename-safe.
"""
s = s.strip()
s = re.sub(r"\s+", '_', s) # Replace whitespace with _
s = parse.quote(s) # Encode all other characters
s = re.sub(r"%", "", s) # Strip encoding %s
if len(s) > 64: # If the result is unreasonably long,
s = hex(abs(hash(s)))[2:] # Replace it with a hex hash
return s
def titlestrip(s):
"""
Strips articles for title sorting.
"""
if s.startswith("The "): return s[4:]
if s.startswith("An "): return s[3:]
if s.startswith("A "): return s[2:]
return s
# Load functions
def load_resource(filename, cache={}):
"""Loads files from the resources directory with caching."""
if filename not in cache:
with open(os.path.join("src", "resources", filename), "r", encoding="utf8") as f:
cache[filename] = f.read()
return cache[filename]
def load_config(name):
"""
Loads values from a Lexicon's config file.
"""
config = {}
with open(os.path.join("lexicon", name, "lexicon.cfg"), "r", encoding="utf8") as f:
line = f.readline()
while line:
# Skim lines until a value definition begins
conf_match = re.match(">>>([^>]+)>>>\s+", line)
if not conf_match:
line = f.readline()
continue
# Accumulate the conf value until the value ends
conf = conf_match.group(1)
conf_value = ""
line = f.readline()
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
while line and not conf_match:
conf_value += line
line = f.readline()
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
if not line:
# TODO Not this
raise SystemExit("Reached EOF while reading config value {}".format(conf))
config[conf] = conf_value.strip()
# Check that all necessary values were configured
for config_value in ['LEXICON_TITLE', 'PROMPT', 'SESSION_PAGE', "INDEX_LIST"]:
if config_value not in config:
# TODO Not this either
raise SystemExit("Error: {} not set in lexipython.cfg".format(config_value))
return config