2017-08-27 05:14:16 +00:00
|
|
|
###############################
|
|
|
|
## Lexipython Lexicon engine ##
|
|
|
|
###############################
|
|
|
|
|
|
|
|
import sys # For argv and stderr
|
|
|
|
import os # For reading directories
|
|
|
|
import re # For parsing lex content
|
2017-11-06 07:53:25 +00:00
|
|
|
import io # For writing pages out as UTF-8
|
2017-08-27 05:14:16 +00:00
|
|
|
import networkx # For pagerank analytics
|
|
|
|
from collections import defaultdict # For rank inversion in statistics
|
2017-12-11 05:21:46 +00:00
|
|
|
from urllib import parse
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
# Short utility functions for handling titles
|
2017-08-27 05:14:16 +00:00
|
|
|
|
|
|
|
def titlecase(s):
|
2017-12-11 05:21:46 +00:00
|
|
|
"""Enforces capitalization of titles."""
|
2017-08-27 05:14:16 +00:00
|
|
|
s = s.strip()
|
|
|
|
return s[:1].capitalize() + s[1:]
|
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
def titleescape(s):
|
|
|
|
"""Makes an article title filename-safe."""
|
|
|
|
s = s.strip()
|
|
|
|
s = re.sub(r"\s+", '_', s) # Replace whitespace with _
|
|
|
|
s = parse.quote(s) # Encode all other characters
|
|
|
|
s = re.sub(r"%", "", s) # Strip encoding %s
|
|
|
|
if len(s) > 64: # If the result is unreasonably long,
|
|
|
|
s = hex(abs(hash(s)))[2:] # Replace it with a hex hash
|
2017-08-27 05:14:16 +00:00
|
|
|
return s
|
|
|
|
|
|
|
|
def titlestrip(s):
|
|
|
|
"""Strips certain prefixes for title sorting."""
|
|
|
|
if s.startswith("The "): return s[4:]
|
|
|
|
if s.startswith("An "): return s[3:]
|
|
|
|
if s.startswith("A "): return s[2:]
|
|
|
|
return s
|
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
# Main article class
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
class LexiconArticle:
|
|
|
|
"""
|
|
|
|
A Lexicon article and its metadata.
|
|
|
|
|
|
|
|
Members:
|
|
|
|
author string: the author of the article
|
|
|
|
turn integer: the turn the article was written for
|
|
|
|
title string: the article title
|
|
|
|
title_filesafe string: the title, escaped, used for filenames
|
|
|
|
content string: the HTML content, with citations replaced by format hooks
|
|
|
|
citations dict from format hook string to tuple of link alias and link target title
|
|
|
|
wcites list: titles of written articles cited
|
|
|
|
pcites list: titles of phantom articles cited
|
|
|
|
citedby list: titles of articles that cite this
|
|
|
|
The last three are filled in by populate().
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, author, turn, title, content, citations):
|
|
|
|
"""
|
|
|
|
Creates a LexiconArticle object with the given parameters.
|
|
|
|
"""
|
|
|
|
self.author = author
|
|
|
|
self.turn = turn
|
|
|
|
self.title = title
|
|
|
|
self.title_filesafe = titleescape(title)
|
|
|
|
self.content = content
|
|
|
|
self.citations = citations
|
|
|
|
self.wcites = set()
|
|
|
|
self.pcites = set()
|
|
|
|
self.citedby = set()
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
@staticmethod
|
|
|
|
def from_file_raw(raw_content):
|
|
|
|
"""
|
|
|
|
Parses the contents of a Lexipython source file into a LexiconArticle
|
|
|
|
object. If the source file is malformed, returns None.
|
|
|
|
"""
|
|
|
|
headers = raw_content.split('\n', 3)
|
|
|
|
if len(headers) != 4:
|
|
|
|
print("Header read error")
|
|
|
|
return None
|
|
|
|
author_header, turn_header, title_header, content_raw = headers
|
|
|
|
# Validate and sanitize the author header
|
|
|
|
if not author_header.startswith("# Author:"):
|
|
|
|
print("Author header missing")
|
|
|
|
return None
|
|
|
|
author = author_header[9:].strip()
|
|
|
|
# Validate and sanitize the turn header
|
|
|
|
if not turn_header.startswith("# Turn:"):
|
|
|
|
print("Turn header missing")
|
|
|
|
return None
|
|
|
|
turn = None
|
|
|
|
try:
|
|
|
|
turn = int(turn_header[7:].strip())
|
|
|
|
except:
|
|
|
|
print("Turn header error")
|
|
|
|
return None
|
|
|
|
# Validate and sanitize the title header
|
|
|
|
if not title_header.startswith("# Title:"):
|
|
|
|
print("Title header missing")
|
|
|
|
return None
|
|
|
|
title = titlecase(title_header[8:])
|
|
|
|
# Parse the content and extract citations
|
|
|
|
paras = re.split("\n\n+", content_raw.strip())
|
|
|
|
content = ""
|
|
|
|
citations = {}
|
|
|
|
format_id = 1
|
|
|
|
if not paras:
|
|
|
|
print("No content")
|
|
|
|
for para in paras:
|
|
|
|
# Escape angle brackets
|
|
|
|
para = re.sub("<", "<", para)
|
|
|
|
para = re.sub(">", ">", para)
|
|
|
|
# Replace bold and italic marks with tags
|
|
|
|
para = re.sub(r"//([^/]+)//", r"<i>\1</i>", para)
|
|
|
|
para = re.sub(r"\*\*([^*]+)\*\*", r"<b>\1</b>", para)
|
|
|
|
# Replace \\LF with <br>LF
|
|
|
|
para = re.sub(r"\\\\\n", "<br>\n", para)
|
|
|
|
# Abstract citations into the citation record
|
2017-08-27 05:14:16 +00:00
|
|
|
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
|
2017-12-11 05:21:46 +00:00
|
|
|
while link_match:
|
|
|
|
# Identify the citation text and cited article
|
|
|
|
cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3)
|
|
|
|
cite_title = titlecase(link_match.group(3))
|
|
|
|
# Record the citation
|
|
|
|
citations["c"+str(format_id)] = (cite_text, cite_title)
|
|
|
|
# Stitch the format id in place of the citation
|
|
|
|
para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):]
|
|
|
|
format_id += 1 # Increment to the next format citation
|
|
|
|
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
|
|
|
|
# Convert signature to right-aligned
|
|
|
|
if para[:1] == '~':
|
|
|
|
para = "<hr><span class=\"signature\"><p>" + para[1:] + "</p></span>\n"
|
|
|
|
else:
|
|
|
|
para = "<p>" + para + "</p>\n"
|
|
|
|
content += para
|
|
|
|
return LexiconArticle(author, turn, title, content, citations)
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
def build_page_content(self):
|
|
|
|
"""
|
|
|
|
Formats citations into the article content as normal HTML links
|
|
|
|
and returns the result.
|
|
|
|
"""
|
|
|
|
format_map = {
|
|
|
|
format_id: "<a href=\"{1}.html\"{2}>{0}</a>".format(
|
|
|
|
cite_tuple[0], titleescape(cite_tuple[1]),
|
|
|
|
"" if cite_tuple[1] in self.wcites else " class=\"phantom\"")
|
|
|
|
for format_id, cite_tuple in self.citations.items()
|
|
|
|
}
|
|
|
|
return self.content.format(**format_map)
|
|
|
|
|
|
|
|
def build_page_citeblock(self, prev_target, next_target):
|
|
|
|
"""
|
|
|
|
Builds the citeblock content HTML for use in regular article pages.
|
|
|
|
For each defined target, links the target page as Previous or Next.
|
|
|
|
"""
|
|
|
|
citeblock = "<div class=\"content citeblock\">\n"
|
|
|
|
# Prev/next links
|
|
|
|
if next_target is not None:
|
|
|
|
citeblock += "<p style=\"float:right\"><a href=\"{}.html\">Next →</a></p>\n".format(titleescape(next_target))
|
|
|
|
if prev_target is not None:
|
|
|
|
citeblock += "<p><a href=\"{}.html\">← Previous</a></p>\n".format(titleescape(prev_target))
|
|
|
|
elif next_target is not None:
|
|
|
|
citeblock += "<p> </p>\n"
|
|
|
|
# Citations
|
|
|
|
cites_links = [
|
|
|
|
"<a href=\"{1}.html\"{2}>{0}</a>".format(
|
|
|
|
title, titleescape(title),
|
|
|
|
"" if title in self.wcites else " class=\"phantom\"")
|
|
|
|
for title in sorted(self.wcites | self.pcites)]
|
|
|
|
cites_str = " | ".join(cites_links)
|
|
|
|
if len(cites_str) < 1: cites_str = "--"
|
|
|
|
citeblock += "<p>Citations: {}</p>\n".format(cites_str)
|
|
|
|
# Citedby
|
|
|
|
citedby_links = [
|
|
|
|
"<a href=\"{1}.html\">{0}</a>".format(
|
|
|
|
title, titleescape(title))
|
|
|
|
for title in self.citedby]
|
|
|
|
citedby_str = " | ".join(citedby_links)
|
|
|
|
if len(citedby_str) < 1: citedby_str = "--"
|
|
|
|
citeblock += "<p>Cited by: {}</p>\n</div>\n".format(citedby_str)
|
|
|
|
return citeblock
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
# Parsing functions for source intake
|
|
|
|
|
|
|
|
def parse_from_directory(directory):
|
2017-08-27 05:14:16 +00:00
|
|
|
"""
|
2017-12-11 05:21:46 +00:00
|
|
|
Reads and parses each source file in the given directory.
|
2017-08-27 05:14:16 +00:00
|
|
|
Input: directory, the path to the folder to read
|
2017-12-11 05:21:46 +00:00
|
|
|
Output: a list of parsed articles
|
2017-08-27 05:14:16 +00:00
|
|
|
"""
|
2017-12-11 05:21:46 +00:00
|
|
|
articles = []
|
|
|
|
print("Reading source files from", directory)
|
2017-08-27 05:14:16 +00:00
|
|
|
for filename in os.listdir(directory):
|
|
|
|
path = directory + filename
|
2017-12-11 05:21:46 +00:00
|
|
|
# Read only .txt files
|
|
|
|
if filename[-4:] == ".txt":
|
|
|
|
print(" Parsing", filename)
|
|
|
|
with open(path, "r", encoding="utf8") as src_file:
|
|
|
|
raw = src_file.read()
|
|
|
|
article = LexiconArticle.from_file_raw(raw)
|
|
|
|
if article is None:
|
|
|
|
print(" ERROR")
|
2017-08-27 05:14:16 +00:00
|
|
|
else:
|
2017-12-11 05:21:46 +00:00
|
|
|
print(" success:", article.title)
|
|
|
|
articles.append(article)
|
|
|
|
return articles
|
|
|
|
|
|
|
|
def populate(lexicon_articles):
|
|
|
|
"""
|
|
|
|
Given a list of lexicon articles, fills out citation information
|
|
|
|
for each article and creates phantom pages for missing articles.
|
|
|
|
"""
|
|
|
|
article_by_title = {article.title : article for article in lexicon_articles}
|
|
|
|
# Determine all articles that exist or should exist
|
|
|
|
extant_titles = set([citation[1] for article in lexicon_articles for citation in article.citations])
|
|
|
|
# Interlink all citations
|
|
|
|
for article in lexicon_articles:
|
|
|
|
for cite_tuple in article.citations.values():
|
|
|
|
target = cite_tuple[1]
|
|
|
|
# Create article objects for phantom citations
|
|
|
|
if target not in article_by_title:
|
|
|
|
article_by_title[target] = LexiconArticle(None, sys.maxsize, target, "<p><i>This entry hasn't been written yet.</i></p>", {})
|
|
|
|
# Interlink citations
|
|
|
|
if article_by_title[target].author is None:
|
|
|
|
article.pcites.add(target)
|
|
|
|
else:
|
|
|
|
article.wcites.add(target)
|
|
|
|
article_by_title[target].citedby.add(article.title)
|
|
|
|
return list(article_by_title.values())
|
2017-08-27 05:14:16 +00:00
|
|
|
|
|
|
|
def load_resource(filename, cache={}):
|
2017-12-11 05:21:46 +00:00
|
|
|
"""Loads files from the resources directory with caching."""
|
2017-08-27 05:14:16 +00:00
|
|
|
if filename not in cache:
|
2017-11-17 06:23:40 +00:00
|
|
|
cache[filename] = open("resources/" + filename, "r", encoding="utf8").read()
|
2017-08-27 05:14:16 +00:00
|
|
|
return cache[filename]
|
|
|
|
|
|
|
|
def load_config():
|
2017-12-11 05:21:46 +00:00
|
|
|
"""Loads values from the config file."""
|
2017-08-27 05:14:16 +00:00
|
|
|
config = {}
|
2017-11-17 06:23:40 +00:00
|
|
|
with open("lexicon.cfg", "r", encoding="utf8") as f:
|
2017-08-27 05:14:16 +00:00
|
|
|
line = f.readline()
|
|
|
|
while line:
|
|
|
|
# Skim lines until a value definition begins
|
|
|
|
conf_match = re.match(">>>([^>]+)>>>\s+", line)
|
|
|
|
if not conf_match:
|
|
|
|
line = f.readline()
|
|
|
|
continue
|
|
|
|
# Accumulate the conf value until the value ends
|
|
|
|
conf = conf_match.group(1)
|
|
|
|
conf_value = ""
|
|
|
|
line = f.readline()
|
|
|
|
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
|
|
|
|
while line and not conf_match:
|
|
|
|
conf_value += line
|
|
|
|
line = f.readline()
|
|
|
|
conf_match = re.match("<<<{0}<<<\s+".format(conf), line)
|
|
|
|
if not line:
|
|
|
|
raise SystemExit("Reached EOF while reading config value {}".format(conf))
|
|
|
|
config[conf] = conf_value.strip()
|
|
|
|
# Check that all necessary values were configured
|
2017-12-11 05:21:46 +00:00
|
|
|
for config_value in ['LEXICON_TITLE', 'PROMPT', 'SESSION_PAGE', "INDEX_LIST"]:
|
2017-08-27 05:14:16 +00:00
|
|
|
if config_value not in config:
|
|
|
|
raise SystemExit("Error: {} not set in lexipython.cfg".format(config_value))
|
|
|
|
return config
|
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
# Build functions
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
def build_contents_page(articles, config):
|
2017-08-27 05:14:16 +00:00
|
|
|
"""
|
2017-12-11 05:21:46 +00:00
|
|
|
Builds the full HTML of the contents page.
|
2017-08-27 05:14:16 +00:00
|
|
|
"""
|
|
|
|
content = ""
|
2017-12-11 05:21:46 +00:00
|
|
|
# Article counts
|
|
|
|
phantom_count = len([article for article in articles if article.author is None])
|
|
|
|
if phantom_count == 0:
|
2017-12-11 06:34:41 +00:00
|
|
|
content = "<p>There are <b>{0}</b> entries in this lexicon.</p>\n".format(len(articles))
|
2017-08-27 05:14:16 +00:00
|
|
|
else:
|
2017-12-11 05:21:46 +00:00
|
|
|
content = "<p>There are <b>{0}</b> entries, <b>{1}</b> written and <b>{2}</b> phantom.</p>\n".format(
|
|
|
|
len(articles), len(articles) - phantom_count, phantom_count)
|
|
|
|
# Prepare article links
|
|
|
|
link_by_title = {article.title : "<a href=\"../article/{1}.html\"{2}>{0}</a>".format(
|
|
|
|
article.title, article.title_filesafe,
|
|
|
|
"" if article.author is not None else " class=\"phantom\"")
|
|
|
|
for article in articles}
|
|
|
|
# Write the articles in alphabetical order
|
|
|
|
content += load_resource("contents.html")
|
|
|
|
content += "<div id=\"index-order\" style=\"display:block\">\n<ul>\n"
|
2017-08-27 05:14:16 +00:00
|
|
|
indices = config["INDEX_LIST"].split("\n")
|
2017-12-11 05:21:46 +00:00
|
|
|
alphabetical_order = sorted(articles, key=lambda a: a.title)
|
|
|
|
check_off = list(alphabetical_order)
|
2017-08-27 05:14:16 +00:00
|
|
|
for index_str in indices:
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "<h3>{0}</h3>\n".format(index_str)
|
|
|
|
for article in alphabetical_order:
|
|
|
|
if (titlestrip(article.title)[0].upper() in index_str):
|
|
|
|
check_off.remove(article)
|
|
|
|
content += "<li>"
|
|
|
|
content += link_by_title[article.title]
|
|
|
|
content += "</li>\n"
|
|
|
|
if len(check_off) > 0:
|
|
|
|
content += "<h3>&c.</h3>\n".format(index_str)
|
|
|
|
for article in check_off:
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "<li>"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += link_by_title[article.title]
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "</li>\n"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "</ul>\n</div>\n"
|
|
|
|
# Write the articles in turn order
|
|
|
|
content += "<div id=\"turn-order\" style=\"display:none\">\n<ul>\n"
|
|
|
|
latest_turn = max([article.turn for article in articles if article.author is not None])
|
|
|
|
turn_order = sorted(articles, key=lambda a: (a.turn, a.title))
|
|
|
|
check_off = list(turn_order)
|
|
|
|
for turn_num in range(1, latest_turn + 1):
|
|
|
|
content += "<h3>Turn {0}</h3>\n".format(turn_num)
|
|
|
|
for article in turn_order:
|
|
|
|
if article.turn == turn_num:
|
|
|
|
check_off.remove(article)
|
|
|
|
content += "<li>"
|
|
|
|
content += link_by_title[article.title]
|
|
|
|
content += "</li>\n"
|
|
|
|
if len(check_off) > 0:
|
|
|
|
content += "<h3>Unwritten</h3>\n"
|
|
|
|
for article in check_off:
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "<li>"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += link_by_title[article.title]
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "</li>\n"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "</ul>\n</div>\n"
|
|
|
|
# Fill in the page skeleton
|
2017-08-27 05:14:16 +00:00
|
|
|
entry_skeleton = load_resource("entry-page.html")
|
|
|
|
css = load_resource("lexicon.css")
|
|
|
|
return entry_skeleton.format(
|
|
|
|
title="Index of " + config["LEXICON_TITLE"],
|
|
|
|
lexicon=config["LEXICON_TITLE"],
|
|
|
|
css=css,
|
|
|
|
logo=config["LOGO_FILENAME"],
|
2017-12-11 05:21:46 +00:00
|
|
|
prompt=config["PROMPT"],
|
2017-08-27 05:14:16 +00:00
|
|
|
content=content,
|
|
|
|
citeblock="")
|
|
|
|
|
|
|
|
def build_rules_page(config):
|
|
|
|
"""
|
|
|
|
Builds the full HTML of the rules page.
|
|
|
|
"""
|
|
|
|
content = load_resource("rules.html")
|
|
|
|
# Fill in the entry skeleton
|
|
|
|
entry_skeleton = load_resource("entry-page.html")
|
|
|
|
css = load_resource("lexicon.css")
|
|
|
|
return entry_skeleton.format(
|
|
|
|
title="Rules",
|
|
|
|
lexicon=config["LEXICON_TITLE"],
|
|
|
|
css=css,
|
|
|
|
logo=config["LOGO_FILENAME"],
|
2017-12-11 05:21:46 +00:00
|
|
|
prompt=config["PROMPT"],
|
2017-08-27 05:14:16 +00:00
|
|
|
content=content,
|
|
|
|
citeblock="")
|
|
|
|
|
|
|
|
def build_formatting_page(config):
|
|
|
|
"""
|
|
|
|
Builds the full HTML of the formatting page.
|
|
|
|
"""
|
|
|
|
content = load_resource("formatting.html")
|
|
|
|
# Fill in the entry skeleton
|
|
|
|
entry_skeleton = load_resource("entry-page.html")
|
|
|
|
css = load_resource("lexicon.css")
|
|
|
|
return entry_skeleton.format(
|
|
|
|
title="Formatting",
|
|
|
|
lexicon=config["LEXICON_TITLE"],
|
|
|
|
css=css,
|
|
|
|
logo=config["LOGO_FILENAME"],
|
2017-12-11 05:21:46 +00:00
|
|
|
prompt=config["PROMPT"],
|
2017-08-27 05:14:16 +00:00
|
|
|
content=content,
|
|
|
|
citeblock="")
|
|
|
|
|
|
|
|
def build_session_page(config):
|
|
|
|
"""
|
|
|
|
Builds the full HTML of the session page.
|
|
|
|
"""
|
|
|
|
# Fill in the entry skeleton
|
|
|
|
entry_skeleton = load_resource("entry-page.html")
|
|
|
|
css = load_resource("lexicon.css")
|
|
|
|
return entry_skeleton.format(
|
2017-12-11 05:21:46 +00:00
|
|
|
title=config["LEXICON_TITLE"],
|
2017-08-27 05:14:16 +00:00
|
|
|
lexicon=config["LEXICON_TITLE"],
|
|
|
|
css=css,
|
|
|
|
logo=config["LOGO_FILENAME"],
|
2017-12-11 05:21:46 +00:00
|
|
|
prompt=config["PROMPT"],
|
|
|
|
content=config["SESSION_PAGE"],
|
2017-08-27 05:14:16 +00:00
|
|
|
citeblock="")
|
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
def build_statistics_page(articles, config):
|
2017-08-27 05:14:16 +00:00
|
|
|
"""
|
|
|
|
Builds the full HTML of the statistics page.
|
|
|
|
"""
|
|
|
|
content = ""
|
2017-12-11 05:21:46 +00:00
|
|
|
cite_map = {article.title : [cite_tuple[1] for cite_tuple in article.citations.values()] for article in articles}
|
|
|
|
# Pages by pagerank
|
|
|
|
content += "<div class=\"moveable\">\n"
|
|
|
|
content += "<p><u>Top 10 pages by page rank:</u><br>\n"
|
2017-08-27 05:14:16 +00:00
|
|
|
G = networkx.Graph()
|
|
|
|
for citer, citeds in cite_map.items():
|
|
|
|
for cited in citeds:
|
|
|
|
G.add_edge(citer, cited)
|
|
|
|
ranks = networkx.pagerank(G)
|
|
|
|
sranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
ranking = list(enumerate(map(lambda x: x[0], sranks)))
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "<br>\n".join(map(lambda x: "{0} – {1}".format(x[0]+1, x[1]), ranking[:10]))
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "</p>\n"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "</div>\n"
|
|
|
|
# Top numebr of citations made
|
|
|
|
content += "<div class=\"moveable\">\n"
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "<p><u>Most citations made from:</u><br>\n"
|
|
|
|
citation_tally = [(kv[0], len(kv[1])) for kv in cite_map.items()]
|
|
|
|
citation_count = defaultdict(list)
|
|
|
|
for title, count in citation_tally: citation_count[count].append(title)
|
|
|
|
content += "<br>\n".join(map(
|
2017-12-11 05:21:46 +00:00
|
|
|
lambda kv: "{0} – {1}".format(kv[0], "; ".join(kv[1])),
|
2017-08-27 05:14:16 +00:00
|
|
|
sorted(citation_count.items(), reverse=True)[:3]))
|
|
|
|
content += "</p>\n"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "</div>\n"
|
|
|
|
# Top number of times cited
|
|
|
|
content += "<div class=\"moveable\">\n"
|
2017-08-27 05:14:16 +00:00
|
|
|
content += "<p><u>Most citations made to:</u><br>\n"
|
|
|
|
all_cited = set([title for cites in cite_map.values() for title in cites])
|
|
|
|
cited_by_map = { cited: [citer for citer in cite_map.keys() if cited in cite_map[citer]] for cited in all_cited }
|
|
|
|
cited_tally = [(kv[0], len(kv[1])) for kv in cited_by_map.items()]
|
|
|
|
cited_count = defaultdict(list)
|
|
|
|
for title, count in cited_tally: cited_count[count].append(title)
|
|
|
|
content += "<br>\n".join(map(
|
2017-12-11 05:21:46 +00:00
|
|
|
lambda kv: "{0} – {1}".format(kv[0], "; ".join(kv[1])),
|
2017-08-27 05:14:16 +00:00
|
|
|
sorted(cited_count.items(), reverse=True)[:3]))
|
|
|
|
content += "</p>\n"
|
2017-12-11 05:21:46 +00:00
|
|
|
content += "</div>\n"
|
|
|
|
# Author pageranks
|
|
|
|
content += "<div class=\"moveable\">\n"
|
|
|
|
content += "<p><u>Author total page rank:</u><br>\n"
|
|
|
|
authors = sorted(set([article.author for article in articles if article.author is not None]))
|
|
|
|
articles_by = {author : [a for a in articles if a.author == author] for author in authors}
|
|
|
|
author_rank = {author : sum(map(lambda a: ranks[a.title], articles)) for author, articles in articles_by.items()}
|
|
|
|
content += "<br>\n".join(map(
|
|
|
|
lambda kv: "{0} – {1}".format(kv[0], round(kv[1], 3)),
|
|
|
|
sorted(author_rank.items(), key=lambda t:-t[1])))
|
|
|
|
content += "</p>\n"
|
|
|
|
content += "</div>\n"
|
|
|
|
# Author citations made
|
|
|
|
content += "<div class=\"moveable\">\n"
|
|
|
|
content += "<p><u>Citations made by author</u><br>\n"
|
|
|
|
author_cite_count = {author : sum(map(lambda a:len(a.wcites | a.pcites), articles)) for author, articles in articles_by.items()}
|
|
|
|
content += "<br>\n".join(map(
|
|
|
|
lambda kv: "{0} – {1}".format(kv[0], kv[1]),
|
|
|
|
sorted(author_cite_count.items(), key=lambda t:-t[1])))
|
|
|
|
content += "</p>\n"
|
|
|
|
content += "</div>\n"
|
|
|
|
# Author cited count
|
|
|
|
content += "<div class=\"moveable\">\n"
|
|
|
|
content += "<p><u>Citations made to author</u><br>\n"
|
|
|
|
cited_times = {author : 0 for author in authors}
|
|
|
|
for article in articles:
|
|
|
|
if article.author is not None:
|
|
|
|
cited_times[article.author] += len(article.citedby)
|
|
|
|
content += "<br>\n".join(map(
|
|
|
|
lambda kv: "{0} – {1}".format(kv[0], kv[1]),
|
|
|
|
sorted(cited_times.items(), key=lambda t:-t[1])))
|
|
|
|
content += "</p>\n"
|
|
|
|
content += "</div>\n"
|
|
|
|
|
2017-08-27 05:14:16 +00:00
|
|
|
# Fill in the entry skeleton
|
|
|
|
entry_skeleton = load_resource("entry-page.html")
|
|
|
|
css = load_resource("lexicon.css")
|
|
|
|
return entry_skeleton.format(
|
|
|
|
title="Statistics",
|
|
|
|
lexicon=config["LEXICON_TITLE"],
|
|
|
|
css=css,
|
|
|
|
logo=config["LOGO_FILENAME"],
|
2017-12-11 05:21:46 +00:00
|
|
|
prompt=config["PROMPT"],
|
2017-08-27 05:14:16 +00:00
|
|
|
content=content,
|
|
|
|
citeblock="")
|
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
def build_graphviz_file(cite_map):
|
|
|
|
"""
|
|
|
|
Builds a citation graph in dot format for Graphviz.
|
|
|
|
"""
|
|
|
|
result = []
|
|
|
|
result.append("digraph G {\n")
|
|
|
|
# Node labeling
|
|
|
|
written_entries = list(cite_map.keys())
|
|
|
|
phantom_entries = set([title for cites in cite_map.values() for title in cites if title not in written_entries])
|
|
|
|
node_labels = [title[:20] for title in written_entries + list(phantom_entries)]
|
|
|
|
node_names = [hash(i) for i in node_labels]
|
|
|
|
for i in range(len(node_labels)):
|
|
|
|
result.append("{} [label=\"{}\"];\n".format(node_names[i], node_labels[i]))
|
|
|
|
# Edges
|
|
|
|
for citer in written_entries:
|
|
|
|
for cited in cite_map[citer]:
|
|
|
|
result.append("{}->{};\n".format(hash(citer[:20]), hash(cited[:20])))
|
|
|
|
# Return result
|
|
|
|
result.append("overlap=false;\n}\n")
|
|
|
|
return "".join(result)#"…"
|
|
|
|
|
2017-08-27 05:14:16 +00:00
|
|
|
# Summative functions
|
|
|
|
|
|
|
|
def command_build(argv):
|
|
|
|
if len(argv) >= 3 and (argv[2] != "partial" and argv[2] != "full"):
|
2017-11-02 17:40:25 +00:00
|
|
|
print("unknown build type: " + argv[2])
|
2017-08-27 05:14:16 +00:00
|
|
|
return
|
2017-12-11 05:21:46 +00:00
|
|
|
# Load content
|
2017-08-27 05:14:16 +00:00
|
|
|
config = load_config()
|
2017-12-11 05:21:46 +00:00
|
|
|
entry_skeleton = load_resource("entry-page.html")
|
|
|
|
css = load_resource("lexicon.css")
|
|
|
|
articles = [article for article in parse_from_directory("raw/") if article is not None]
|
|
|
|
written_titles = [article.title for article in articles]
|
|
|
|
articles = sorted(populate(articles), key=lambda a: (a.turn, a.title))
|
|
|
|
#print(articles[13].title_filesafe)
|
|
|
|
#return
|
|
|
|
phantom_titles = [article.title for article in articles if article.title not in written_titles]
|
|
|
|
|
|
|
|
# Write the redirect page
|
|
|
|
print("Writing redirect page...")
|
|
|
|
with open("out/index.html", "w", encoding="utf8") as f:
|
|
|
|
f.write(load_resource("redirect.html").format(lexicon=config["LEXICON_TITLE"]))
|
|
|
|
|
|
|
|
# Write the article pages
|
|
|
|
print("Deleting old article pages...")
|
|
|
|
for filename in os.listdir("out/article/"):
|
2017-08-27 05:14:16 +00:00
|
|
|
if filename[-5:] == ".html":
|
2017-12-11 05:21:46 +00:00
|
|
|
os.remove("out/article/" + filename)
|
|
|
|
print("Writing article pages...")
|
|
|
|
l = len(articles)
|
|
|
|
for idx in range(l):
|
|
|
|
article = articles[idx]
|
|
|
|
with open("out/article/" + article.title_filesafe + ".html", "w", encoding="utf8") as f:
|
|
|
|
content = article.build_page_content()
|
|
|
|
citeblock = article.build_page_citeblock(
|
|
|
|
None if idx == 0 else articles[idx - 1].title,
|
|
|
|
None if idx == l-1 else articles[idx + 1].title)
|
|
|
|
article_html = entry_skeleton.format(
|
|
|
|
title = article.title,
|
|
|
|
lexicon = config["LEXICON_TITLE"],
|
|
|
|
css = css,
|
|
|
|
logo = config["LOGO_FILENAME"],
|
|
|
|
prompt = config["PROMPT"],
|
|
|
|
content = content,
|
|
|
|
citeblock = citeblock)
|
|
|
|
f.write(article_html)
|
|
|
|
print(" Wrote " + article.title)
|
|
|
|
|
|
|
|
# Write default pages
|
|
|
|
print("Writing default pages...")
|
|
|
|
with open("out/contents/index.html", "w", encoding="utf8") as f:
|
|
|
|
f.write(build_contents_page(articles, config))
|
|
|
|
print(" Wrote Contents")
|
|
|
|
with open("out/rules/index.html", "w", encoding="utf8") as f:
|
|
|
|
f.write(build_rules_page(config))
|
2017-11-02 17:40:25 +00:00
|
|
|
print(" Wrote Rules")
|
2017-12-11 05:21:46 +00:00
|
|
|
with open("out/formatting/index.html", "w", encoding="utf8") as f:
|
|
|
|
f.write(build_formatting_page(config))
|
2017-11-02 17:40:25 +00:00
|
|
|
print(" Wrote Formatting")
|
2017-12-11 05:21:46 +00:00
|
|
|
with open("out/session/index.html", "w", encoding="utf8") as f:
|
|
|
|
f.write(build_session_page(config))
|
2017-11-02 17:40:25 +00:00
|
|
|
print(" Wrote Session")
|
2017-12-11 05:21:46 +00:00
|
|
|
with open("out/statistics/index.html", "w", encoding="utf8") as f:
|
|
|
|
f.write(build_statistics_page(articles, config))
|
2017-11-02 17:40:25 +00:00
|
|
|
print(" Wrote Statistics")
|
2017-08-27 05:14:16 +00:00
|
|
|
|
2017-12-11 05:21:46 +00:00
|
|
|
# Write auxiliary files
|
|
|
|
# TODO: write graphviz file
|
|
|
|
# TODO: write compiled lexicon page
|
|
|
|
|
2017-08-27 05:14:16 +00:00
|
|
|
def main():
|
|
|
|
if len(sys.argv) < 2:
|
2017-11-02 17:40:25 +00:00
|
|
|
print("Available commands:")
|
|
|
|
print(" - build [partial] : Build the lexicon and generate phantom stubs for all unwritten articles.")
|
|
|
|
print(" - build full : Build the lexicon and generate Ersatz pages for all unwritten articles.")
|
2017-08-27 05:14:16 +00:00
|
|
|
elif sys.argv[1] == "build":
|
|
|
|
command_build(sys.argv)
|
|
|
|
else:
|
2017-11-02 17:40:25 +00:00
|
|
|
print("Unknown command: " + sys.argv[1])
|
2017-08-27 05:14:16 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|