import sys # For argv and stderr import os # For reading directories import re # For parsing lex content import io # For writing pages out as UTF-8 import networkx # For pagerank analytics from collections import defaultdict # For rank inversion in statistics import src.utils as utils def build_contents_page(articles, config): """ Builds the full HTML of the contents page. """ content = "" # Article counts phantom_count = len([article for article in articles if article.player is None]) if phantom_count == 0: content = "

There are {0} entries in this lexicon.

\n".format(len(articles)) else: content = "

There are {0} entries, {1} written and {2} phantom.

\n".format( len(articles), len(articles) - phantom_count, phantom_count) # Prepare article links link_by_title = {article.title : "{0}".format( article.title, article.title_filesafe, "" if article.player is not None else " class=\"phantom\"") for article in articles} # Write the articles in alphabetical order content += utils.load_resource("contents.html") content += "
\n\n
\n" # Write the articles in turn order content += "
\n\n
\n" # Fill in the page skeleton entry_skeleton = utils.load_resource("entry-page.html") css = utils.load_resource("lexicon.css") return entry_skeleton.format( title="Index of " + config["LEXICON_TITLE"], lexicon=config["LEXICON_TITLE"], css=css, logo=config["LOGO_FILENAME"], prompt=config["PROMPT"], content=content, citeblock="") def build_rules_page(config): """ Builds the full HTML of the rules page. """ content = utils.load_resource("rules.html") # Fill in the entry skeleton entry_skeleton = utils.load_resource("entry-page.html") css = utils.load_resource("lexicon.css") return entry_skeleton.format( title="Rules", lexicon=config["LEXICON_TITLE"], css=css, logo=config["LOGO_FILENAME"], prompt=config["PROMPT"], content=content, citeblock="") def build_formatting_page(config): """ Builds the full HTML of the formatting page. """ content = utils.load_resource("formatting.html") # Fill in the entry skeleton entry_skeleton = utils.load_resource("entry-page.html") css = utils.load_resource("lexicon.css") return entry_skeleton.format( title="Formatting", lexicon=config["LEXICON_TITLE"], css=css, logo=config["LOGO_FILENAME"], prompt=config["PROMPT"], content=content, citeblock="") def build_session_page(config): """ Builds the full HTML of the session page. """ # Fill in the entry skeleton entry_skeleton = utils.load_resource("entry-page.html") css = utils.load_resource("lexicon.css") return entry_skeleton.format( title=config["LEXICON_TITLE"], lexicon=config["LEXICON_TITLE"], css=css, logo=config["LOGO_FILENAME"], prompt=config["PROMPT"], content=config["SESSION_PAGE"], citeblock="") def build_statistics_page(articles, config): """ Builds the full HTML of the statistics page. """ content = "" cite_map = {article.title : [cite_tuple[1] for cite_tuple in article.citations.values()] for article in articles} # Pages by pagerank content += "
\n" content += "

Top 10 pages by page rank:
\n" G = networkx.Graph() for citer, citeds in cite_map.items(): for cited in citeds: G.add_edge(citer, cited) ranks = networkx.pagerank(G) sranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True) ranking = list(enumerate(map(lambda x: x[0], sranks))) content += "
\n".join(map(lambda x: "{0} – {1}".format(x[0]+1, x[1]), ranking[:10])) content += "

\n" content += "
\n" # Top number of citations made content += "
\n" content += "

Most citations made from:
\n" citation_tally = [(kv[0], len(kv[1])) for kv in cite_map.items()] citation_count = defaultdict(list) for title, count in citation_tally: citation_count[count].append(title) content += "
\n".join(map( lambda kv: "{0} – {1}".format(kv[0], "; ".join(kv[1])), sorted(citation_count.items(), reverse=True)[:3])) content += "

\n" content += "
\n" # Top number of times cited content += "
\n" content += "

Most citations made to:
\n" all_cited = set([title for cites in cite_map.values() for title in cites]) cited_by_map = { cited: [citer for citer in cite_map.keys() if cited in cite_map[citer]] for cited in all_cited } cited_tally = [(kv[0], len(kv[1])) for kv in cited_by_map.items()] cited_count = defaultdict(list) for title, count in cited_tally: cited_count[count].append(title) content += "
\n".join(map( lambda kv: "{0} – {1}".format(kv[0], "; ".join(kv[1])), sorted(cited_count.items(), reverse=True)[:3])) content += "

\n" content += "
\n" # player pageranks content += "
\n" content += "

Player total page rank:
\n" players = sorted(set([article.player for article in articles if article.player is not None])) articles_by = {player : [a for a in articles if a.player == player] for player in players} player_rank = {player : sum(map(lambda a: ranks[a.title], articles)) for player, articles in articles_by.items()} content += "
\n".join(map( lambda kv: "{0} – {1}".format(kv[0], round(kv[1], 3)), sorted(player_rank.items(), key=lambda t:t[1], reverse=True))) content += "

\n" content += "
\n" # Player citations made content += "
\n" content += "

Citations made by player
\n" player_cite_count = { player : sum(map(lambda a:len(a.wcites | a.pcites), articles)) for player, articles in articles_by.items()} content += "
\n".join(map( lambda kv: "{0} – {1}".format(kv[0], kv[1]), sorted(player_cite_count.items(), key=lambda t:t[1], reverse=True))) content += "

\n" content += "
\n" # player cited count content += "
\n" content += "

Citations made to player
\n" cited_times = {player : 0 for player in players} for article in articles: if article.player is not None: cited_times[article.player] += len(article.citedby) content += "
\n".join(map( lambda kv: "{0} – {1}".format(kv[0], kv[1]), sorted(cited_times.items(), key=lambda t:t[1], reverse=True))) content += "

\n" content += "
\n" # Fill in the entry skeleton entry_skeleton = utils.load_resource("entry-page.html") css = utils.load_resource("lexicon.css") return entry_skeleton.format( title="Statistics", lexicon=config["LEXICON_TITLE"], css=css, logo=config["LOGO_FILENAME"], prompt=config["PROMPT"], content=content, citeblock="") def build_graphviz_file(cite_map): """ Builds a citation graph in dot format for Graphviz. """ result = [] result.append("digraph G {\n") # Node labeling written_entries = list(cite_map.keys()) phantom_entries = set([title for cites in cite_map.values() for title in cites if title not in written_entries]) node_labels = [title[:20] for title in written_entries + list(phantom_entries)] node_names = [hash(i) for i in node_labels] for i in range(len(node_labels)): result.append("{} [label=\"{}\"];\n".format(node_names[i], node_labels[i])) # Edges for citer in written_entries: for cited in cite_map[citer]: result.append("{}->{};\n".format(hash(citer[:20]), hash(cited[:20]))) # Return result result.append("overlap=false;\n}\n") return "".join(result)#"…" # Summative functions # Write auxiliary files # TODO: write graphviz file # TODO: write compiled lexicon page