import sys # For argv and stderr
import os # For reading directories
import re # For parsing lex content
import io # For writing pages out as UTF-8
import networkx # For pagerank analytics
from collections import defaultdict # For rank inversion in statistics
from src import utils
from src.article import LexiconArticle
class LexiconPage:
"""
An abstraction layer around formatting a Lexicon page skeleton with kwargs
so that kwargs that are constant across pages aren't repeated.
"""
def __init__(self, skeleton=None, page=None):
self.kwargs = {}
self.skeleton = skeleton
if page is not None:
self.skeleton = page.skeleton
self.kwargs = dict(page.kwargs)
def add_kwargs(self, **kwargs):
self.kwargs.update(kwargs)
def format(self, **kwargs):
total_kwargs = {**self.kwargs, **kwargs}
return self.skeleton.format(**total_kwargs)
def build_contents_page(page, articles, index_list):
"""
Builds the full HTML of the contents page.
"""
content = "
"
# Head the contents page with counts of written and phantom articles
phantom_count = len([article for article in articles if article.player is None])
if phantom_count == 0:
content += "
There are {0} entries in this lexicon.
\n".format(len(articles))
else:
content += "
There are {0} entries, {1} written and {2} phantom.
\n".format(
len(articles), len(articles) - phantom_count, phantom_count)
# Prepare article links
link_by_title = {article.title : "
{0}".format(
article.title, article.title_filesafe,
" class=\"phantom\"" if article.player is None else "")
for article in articles}
# Write the articles in alphabetical order
content += utils.load_resource("contents.html")
content += "
\n
\n"
indices = index_list.split("\n")
alphabetical_order = sorted(
articles,
key=lambda a: utils.titlesort(a.title))
check_off = list(alphabetical_order)
for index_str in indices:
content += "{0}
\n".format(index_str)
for article in alphabetical_order:
if (utils.titlesort(article.title)[0].upper() in index_str):
check_off.remove(article)
content += "- {}
\n".format(link_by_title[article.title])
if len(check_off) > 0:
content += "&c.
\n"
for article in check_off:
content += "- {}
\n".format(link_by_title[article.title])
content += "
\n
\n"
# Write the articles in turn order
content += "
\n
\n"
turn_numbers = [article.turn for article in articles if article.player is not None]
first_turn, last_turn = min(turn_numbers), max(turn_numbers)
turn_order = sorted(
articles,
key=lambda a: (a.turn, utils.titlesort(a.title)))
check_off = list(turn_order)
for turn_num in range(first_turn, last_turn + 1):
content += "Turn {0}
\n".format(turn_num)
for article in turn_order:
if article.turn == turn_num:
check_off.remove(article)
content += "- {}
\n".format(link_by_title[article.title])
if len(check_off) > 0:
content += "Unwritten
\n"
for article in check_off:
content += "- {}
\n".format(link_by_title[article.title])
content += "
\n
\n"
# Fill in the page skeleton
return page.format(title="Index", content=content)
def build_rules_page(page):
"""
Builds the full HTML of the rules page.
"""
content = utils.load_resource("rules.html")
# Fill in the entry skeleton
return page.format(title="Rules", content=content)
def build_formatting_page(page):
"""
Builds the full HTML of the formatting page.
"""
content = utils.load_resource("formatting.html")
# Fill in the entry skeleton
return page.format(title="Formatting", content=content)
def build_session_page(page, session_content):
"""
Builds the full HTML of the session page.
"""
# Fill in the entry skeleton
content = "
{}
".format(session_content)
return page.format(title="Session", content=content)
def reverse_statistics_dict(stats, reverse=True):
"""
Transforms a dictionary mapping titles to a value into a list of values
and lists of titles. The list is sorted by the value, and the titles are
sorted alphabetically.
"""
rev = {}
for key, value in stats.items():
if value not in rev:
rev[value] = []
rev[value].append(key)
for key, value in rev.items():
rev[key] = sorted(value, key=lambda t: utils.titlesort(t))
return sorted(rev.items(), key=lambda x:x[0], reverse=reverse)
def itemize(stats_list):
return map(lambda x: "{0} – {1}".format(x[0], "; ".join(x[1])), stats_list)
def build_statistics_page(page, articles):
"""
Builds the full HTML of the statistics page.
"""
content = ""
# Top pages by pagerank
# Compute pagerank for each article
G = networkx.Graph()
for article in articles:
for citation in article.citations:
G.add_edge(article.title, citation.target)
rank_by_article = networkx.pagerank(G)
# Get the top ten articles by pagerank
top_pageranks = reverse_statistics_dict(rank_by_article)[:10]
# Replace the pageranks with ordinals
top_ranked = enumerate(map(lambda x: x[1], top_pageranks), start=1)
# Format the ranks into strings
top_ranked_items = itemize(top_ranked)
# Write the statistics to the page
content += "
\n"
content += "Top 10 articles by page rank:
\n"
content += "
\n".join(top_ranked_items)
content += "
\n"
# Top number of citations made
citations_made = {article.title : len(article.citations) for article in articles}
top_citations = reverse_statistics_dict(citations_made)[:3]
top_citations_items = itemize(top_citations)
content += "
\n"
content += "Top articles by citations made:
\n"
content += "
\n".join(top_citations_items)
content += "
\n"
# Top number of times cited
citations_to = {article.title : len(article.citedby) for article in articles}
top_cited = reverse_statistics_dict(citations_to)[:3]
top_cited_items = itemize(top_cited)
content += "
\n"
content += "Most cited articles:
\n"
content += "
\n".join(top_cited_items)
content += "
\n"
# Top article length, roughly by words
article_length = {}
for article in articles:
format_map = {
"c"+str(c.id): c.text
for c in article.citations
}
plain_content = article.content.format(**format_map)
article_length[article.title] = len(plain_content.split())
top_length = reverse_statistics_dict(article_length)[:3]
top_length_items = itemize(top_length)
content += "
\n"
content += "Longest articles:
\n"
content += "
\n".join(top_length_items)
content += "
\n"
# Total word count
all_articles = []
for article in articles:
all_articles.append(article)
all_articles.extend(article.addendums)
turn_numbers = set([a.turn for a in articles if a.player is not None])
aggregate = {num: 0 for num in turn_numbers}
for turn_num in turn_numbers:
for article in all_articles:
if article.turn <= turn_num:
format_map = {
"c"+str(c.id): c.text
for c in article.citations
}
plain_content = article.content.format(**format_map)
aggregate[turn_num] += len(plain_content.split())
aggr_list = [(str(k), [str(v)]) for k,v in aggregate.items()]
content += "
\n"
content += "Aggregate word count by turn:
\n"
content += "
\n".join(itemize(aggr_list))
content += "
\n"
# Player pageranks
# Add addendums and recompute pagerank
for article in articles:
for addendum in article.addendums:
for citation in addendum.citations:
addendum_title = "{0.title}-T{0.turn}".format(addendum)
G.add_edge(addendum_title, citation.target)
rank_by_article_all = networkx.pagerank(G)
players = sorted(set([article.player for article in articles if article.player is not None]))
pagerank_by_player = {player: 0 for player in players}
for article in articles:
if article.player is not None:
pagerank_by_player[article.player] += (rank_by_article_all[article.title]
if article.title in rank_by_article_all else 0)
for addendum in article.addendums:
addendum_title = "{0.title}-T{0.turn}".format(addendum)
pagerank_by_player[addendum.player] += (rank_by_article_all[addendum_title]
if addendum_title in rank_by_article_all else 0)
for player in players:
pagerank_by_player[player] = round(pagerank_by_player[player], 3)
player_rank = reverse_statistics_dict(pagerank_by_player)
player_rank_items = itemize(player_rank)
content += "
\n"
content += "Player total page rank:
\n"
content += "
\n".join(player_rank_items)
content += "
\n"
# Player citations made
cite_count_by_player = {player: 0 for player in players}
for article in articles:
if article.player is not None:
unique_citations = set([a.target for a in article.citations])
cite_count_by_player[article.player] += len(unique_citations)
for addendum in article.addendums:
cite_count_by_player[addendum.player] += len(addendum.citations)
player_cites_made_ranks = reverse_statistics_dict(cite_count_by_player)
player_cites_made_items = itemize(player_cites_made_ranks)
content += "
\n"
content += "Citations made by player:
\n"
content += "
\n".join(player_cites_made_items)
content += "
\n"
# Player cited count
cited_times = {player : 0 for player in players}
for article in articles:
if article.player is not None:
cited_times[article.player] += len(article.citedby)
cited_times_ranked = reverse_statistics_dict(cited_times)
cited_times_items = itemize(cited_times_ranked)
content += "
\n"
content += "Citations made to player:
\n"
content += "
\n".join(cited_times_items)
content += "
\n"
# Lowest pagerank of written articles
exclude = [a.title for a in articles if a.player is None]
rank_by_written_only = {k:v for k,v in rank_by_article.items() if k not in exclude}
pageranks = reverse_statistics_dict(rank_by_written_only)
bot_ranked = list(enumerate(map(lambda x: x[1], pageranks), start=1))[-10:]
# Format the ranks into strings
bot_ranked_items = itemize(bot_ranked)
content += "
\n"
content += "Bottom 10 articles by pagerank:
\n"
content += "
\n".join(bot_ranked_items)
content += "
\n"
# Undercited articles
undercited = {a.title: len(a.citedby) for a in articles if len(a.citedby) <= 1}
undercited_items = itemize(reverse_statistics_dict(undercited))
content += "
\n"
content += "Undercited articles:
\n"
content += "
\n".join(undercited_items)
content += "
\n"
# Fill in the entry skeleton
return page.format(title="Statistics", content=content)
def build_graphviz_file(cite_map):
"""
Builds a citation graph in dot format for Graphviz.
"""
result = []
result.append("digraph G {\n")
# Node labeling
written_entries = list(cite_map.keys())
phantom_entries = set([title for cites in cite_map.values() for title in cites if title not in written_entries])
node_labels = [title[:20] for title in written_entries + list(phantom_entries)]
node_names = [hash(i) for i in node_labels]
for i in range(len(node_labels)):
result.append("{} [label=\"{}\"];\n".format(node_names[i], node_labels[i]))
# Edges
for citer in written_entries:
for cited in cite_map[citer]:
result.append("{}->{};\n".format(hash(citer[:20]), hash(cited[:20])))
# Return result
result.append("overlap=false;\n}\n")
return "".join(result)#"…"
def build_compiled_page(articles, config):
"""
Builds a page compiling all articles in the Lexicon.
"""
articles = sorted(
articles,
key=lambda a: (utils.titlesort(a.title)))
# Write the header
content = "
{}"\
""\
"\n".format(config["LEXICON_TITLE"])
# Write each article
for article in articles:
# Article title
content += "
{0.title}
".format(article)
# Article content
format_map = {
"c"+str(c.id) : c.format("{text}{id}")
for c in article.citations
}
article_content = article.content.format(**format_map)
article_content = article_content.replace("", "", 1)
content += article_content
# Article citations
cite_list = "
".join(
c.format("{id}. {target}")
for c in article.citations)
cite_block = "
{}
".format(cite_list)
content += cite_block
# Addendums
for addendum in article.addendums:
# Addendum content
format_map = {
"c"+str(c.id) : c.format("
{text}{id}")
for c in addendum.citations
}
article_content = addendum.content.format(**format_map)
content += article_content
# Addendum citations
cite_list = "
".join(
c.format("{id}. {target}")
for c in addendum.citations)
cite_block = "
{}
".format(cite_list)
content += cite_block
content += ""
return content
def build_all(path_prefix, lexicon_name):
"""
Builds all browsable articles and pages in the Lexicon.
"""
lex_path = os.path.join(path_prefix, lexicon_name)
# Load the Lexicon's peripherals
config = utils.load_config(lexicon_name)
page_skeleton = utils.load_resource("page-skeleton.html")
page = LexiconPage(skeleton=page_skeleton)
page.add_kwargs(
lexicon=config["LEXICON_TITLE"],
logo=config["LOGO_FILENAME"],
prompt=config["PROMPT"],
sort=config["DEFAULT_SORT"])
# Parse the written articles
articles = LexiconArticle.parse_from_directory(os.path.join(lex_path, "src"))
# Once they've been populated, the articles list has the titles of all articles
# Sort this by turn before title so prev/next links run in turn order
articles = sorted(
LexiconArticle.interlink(articles),
key=lambda a: (a.turn, utils.titlesort(a.title)))
def pathto(*els):
return os.path.join(lex_path, *els)
# Write the redirect page
print("Writing redirect page...")
with open(pathto("index.html"), "w", encoding="utf8") as f:
f.write(utils.load_resource("redirect.html").format(
lexicon=config["LEXICON_TITLE"], sort=config["DEFAULT_SORT"]))
# Write the article pages
print("Deleting old article pages...")
for filename in os.listdir(pathto("article")):
if filename[-5:] == ".html":
os.remove(pathto("article", filename))
print("Writing article pages...")
l = len(articles)
for idx in range(l):
article = articles[idx]
with open(pathto("article", article.title_filesafe + ".html"), "w", encoding="utf-8") as f:
content = article.build_default_content()
article_html = page.format(
title = article.title,
content = content)
f.write(article_html)
print(" Wrote " + article.title)
# Write default pages
print("Writing default pages...")
with open(pathto("contents", "index.html"), "w", encoding="utf-8") as f:
f.write(build_contents_page(page, articles, config["INDEX_LIST"]))
print(" Wrote Contents")
with open(pathto("rules", "index.html"), "w", encoding="utf-8") as f:
f.write(build_rules_page(page))
print(" Wrote Rules")
with open(pathto("formatting", "index.html"), "w", encoding="utf-8") as f:
f.write(build_formatting_page(page))
print(" Wrote Formatting")
with open(pathto("session", "index.html"), "w", encoding="utf-8") as f:
f.write(build_session_page(page, config["SESSION_PAGE"]))
print(" Wrote Session")
with open(pathto("statistics", "index.html"), "w", encoding="utf-8") as f:
f.write(build_statistics_page(page, articles))
print(" Wrote Statistics")
# Write auxiliary pages
if "PRINTABLE_FILE" in config and config["PRINTABLE_FILE"]:
with open(pathto(config["PRINTABLE_FILE"]), "w", encoding="utf-8") as f:
f.write(build_compiled_page(articles, config))
print(" Wrote compiled page to " + config["PRINTABLE_FILE"])
with open(pathto("editor.html"), "w", encoding="utf-8") as f:
editor = utils.load_resource("editor.html")
writtenArticles = ""
phantomArticles = ""
for article in articles:
citedby = {'"' + citer.player + '"' for citer in article.citedby}
if article.player is None:
phantomArticles += "{{title: \"{0}\", citedby: [{1}]}},".format(
article.title.replace("\"", "\\\""),
",".join(sorted(citedby)))
else:
writtenArticles += "{{title: \"{0}\", author: \"{1.player}\"}},".format(
article.title.replace("\"", "\\\""), article)
nextTurn = 0
if articles:
nextTurn = max([article.turn for article in articles if article.player is not None]) + 1
editor = editor.replace("//writtenArticles", writtenArticles)
editor = editor.replace("//phantomArticles", phantomArticles)
editor = editor.replace("TURNNUMBER", str(nextTurn))
f.write(editor)
# Check that authors aren't citing themselves
print("Running citation checks...")
for parent in articles:
for article in [parent] + parent.addendums:
for citation in article.citations:
if article.player == citation.article.player:
print(" {2}: {0} cites {1}".format(article.title, citation.target, article.player))
print()