Statistics refactor and config hookup
This commit is contained in:
parent
8773f6b58f
commit
b3875fc7da
|
@ -1,12 +1,12 @@
|
||||||
import sys # For argv and stderr
|
# Standard library imports
|
||||||
import os # For reading directories
|
import os # For reading directories
|
||||||
import re # For parsing lex content
|
import re # For parsing lex content
|
||||||
import io # For writing pages out as UTF-8
|
|
||||||
import networkx # For pagerank analytics
|
|
||||||
from collections import defaultdict # For rank inversion in statistics
|
|
||||||
|
|
||||||
|
# Application imports
|
||||||
import utils
|
import utils
|
||||||
from article import LexiconArticle
|
from article import LexiconArticle
|
||||||
|
from statistics import LexiconStatistics
|
||||||
|
|
||||||
|
|
||||||
class LexiconPage:
|
class LexiconPage:
|
||||||
"""
|
"""
|
||||||
|
@ -181,191 +181,24 @@ def build_session_page(page, session_content):
|
||||||
content = "<div class=\"contentblock\">{}</div>".format(session_content)
|
content = "<div class=\"contentblock\">{}</div>".format(session_content)
|
||||||
return page.format(title="Session", content=content)
|
return page.format(title="Session", content=content)
|
||||||
|
|
||||||
def reverse_statistics_dict(stats, reverse=True):
|
def build_statistics_page(config, page, articles):
|
||||||
"""
|
# Read the config file for which stats to publish.
|
||||||
Transforms a dictionary mapping titles to a value into a list of values
|
lines = config['STATISTICS'].split("\n")
|
||||||
and lists of titles. The list is sorted by the value, and the titles are
|
stats = []
|
||||||
sorted alphabetically.
|
for line in lines:
|
||||||
"""
|
stat, toggle = line.split()
|
||||||
rev = {}
|
if toggle == "on":
|
||||||
for key, value in stats.items():
|
stats.append("stat_" + stat)
|
||||||
if value not in rev:
|
|
||||||
rev[value] = []
|
|
||||||
rev[value].append(key)
|
|
||||||
for key, value in rev.items():
|
|
||||||
rev[key] = sorted(value, key=lambda t: utils.titlesort(t))
|
|
||||||
return sorted(rev.items(), key=lambda x:x[0], reverse=reverse)
|
|
||||||
|
|
||||||
def itemize(stats_list):
|
# Create all the stats blocks.
|
||||||
return map(lambda x: "{0} – {1}".format(x[0], "; ".join(x[1])), stats_list)
|
lexicon_stats = LexiconStatistics(articles)
|
||||||
|
stats_blocks = []
|
||||||
def build_statistics_page(page, articles):
|
for stat in stats:
|
||||||
"""
|
if hasattr(lexicon_stats, stat):
|
||||||
Builds the full HTML of the statistics page.
|
stats_blocks.append(getattr(lexicon_stats, stat)())
|
||||||
|
else:
|
||||||
The existence of addendum articles complicates how some statistics are
|
print("ERROR: Bad stat {}".format(stat))
|
||||||
computed. An addendum is an article, with its own author, body, and
|
content = "\n".join(stats_blocks)
|
||||||
citations, but in a Lexicon it exists appended to another article. To handle
|
|
||||||
this, we distinguish an _article_ from a _page_. An article is a unit parsed
|
|
||||||
from a single source file. A page is a main article and all addendums under
|
|
||||||
the same title.
|
|
||||||
"""
|
|
||||||
min_turn = 0
|
|
||||||
max_turn = 0
|
|
||||||
article_by_title = {}
|
|
||||||
page_by_title = {}
|
|
||||||
players = set()
|
|
||||||
for main_article in articles:
|
|
||||||
key = main_article.title
|
|
||||||
page_by_title[key] = [main_article]
|
|
||||||
page_by_title[key].extend(main_article.addendums)
|
|
||||||
for article in [main_article] + main_article.addendums:
|
|
||||||
# Disambiguate articles by appending turn number to the title
|
|
||||||
key = "{0.title} (T{0.turn})".format(article)
|
|
||||||
article_by_title[key] = article
|
|
||||||
if article.player is not None:
|
|
||||||
min_turn = min(min_turn, article.turn)
|
|
||||||
max_turn = max(max_turn, article.turn)
|
|
||||||
players.add(article.player)
|
|
||||||
content = ""
|
|
||||||
stat_block = "<div class=\"contentblock\"><u>{0}</u><br>{1}</div>\n"
|
|
||||||
|
|
||||||
# Top pages by pagerank
|
|
||||||
# Compute pagerank for each page, including all articles
|
|
||||||
G = networkx.Graph()
|
|
||||||
for page_title, articles in page_by_title.items():
|
|
||||||
for article in articles:
|
|
||||||
for citation in article.citations:
|
|
||||||
G.add_edge(page_title, citation.target)
|
|
||||||
pagerank_by_title = networkx.pagerank(G)
|
|
||||||
for page_title, articles in page_by_title.items():
|
|
||||||
if page_title not in pagerank_by_title:
|
|
||||||
pagerank_by_title[page_title] = 0
|
|
||||||
# Get the top ten articles by pagerank
|
|
||||||
top_pageranks = reverse_statistics_dict(pagerank_by_title)[:10]
|
|
||||||
# Replace the pageranks with ordinals
|
|
||||||
top_ranked = enumerate(map(lambda x: x[1], top_pageranks), start=1)
|
|
||||||
# Format the ranks into strings
|
|
||||||
top_ranked_items = itemize(top_ranked)
|
|
||||||
# Write the statistics to the page
|
|
||||||
content += stat_block.format(
|
|
||||||
"Top 10 articles by page rank:",
|
|
||||||
"<br>".join(top_ranked_items))
|
|
||||||
|
|
||||||
# Pages cited/cited by
|
|
||||||
pages_cited = {page_title: set() for page_title in page_by_title.keys()}
|
|
||||||
pages_cited_by = {page_title: set() for page_title in page_by_title.keys()}
|
|
||||||
for page_title, articles in page_by_title.items():
|
|
||||||
for article in articles:
|
|
||||||
for citation in article.citations:
|
|
||||||
pages_cited[page_title].add(citation.target)
|
|
||||||
pages_cited_by[citation.target].add(page_title)
|
|
||||||
for page_title, cite_titles in pages_cited.items():
|
|
||||||
pages_cited[page_title] = len(cite_titles)
|
|
||||||
for page_title, cite_titles in pages_cited_by.items():
|
|
||||||
pages_cited_by[page_title] = len(cite_titles)
|
|
||||||
|
|
||||||
top_citations = reverse_statistics_dict(pages_cited)[:3]
|
|
||||||
top_citations_items = itemize(top_citations)
|
|
||||||
content += stat_block.format(
|
|
||||||
"Cited the most pages:",
|
|
||||||
"<br>".join(top_citations_items))
|
|
||||||
top_cited = reverse_statistics_dict(pages_cited_by)[:3]
|
|
||||||
top_cited_items = itemize(top_cited)
|
|
||||||
content += stat_block.format(
|
|
||||||
"Cited by the most pages:",
|
|
||||||
"<br>".join(top_cited_items))
|
|
||||||
|
|
||||||
# Top article length
|
|
||||||
article_length_by_title = {}
|
|
||||||
cumulative_article_length_by_turn = {
|
|
||||||
turn_num: 0
|
|
||||||
for turn_num in range(min_turn, max_turn + 1)
|
|
||||||
}
|
|
||||||
for article_title, article in article_by_title.items():
|
|
||||||
format_map = {
|
|
||||||
"c"+str(c.id): c.text
|
|
||||||
for c in article.citations
|
|
||||||
}
|
|
||||||
plain_content = article.content.format(**format_map)
|
|
||||||
word_count = len(plain_content.split())
|
|
||||||
article_length_by_title[article_title] = word_count
|
|
||||||
for turn_num in range(min_turn, max_turn + 1):
|
|
||||||
if article.turn <= turn_num:
|
|
||||||
cumulative_article_length_by_turn[turn_num] += word_count
|
|
||||||
top_length = reverse_statistics_dict(article_length_by_title)[:3]
|
|
||||||
top_length_items = itemize(top_length)
|
|
||||||
content += stat_block.format(
|
|
||||||
"Longest articles:",
|
|
||||||
"<br>".join(top_length_items))
|
|
||||||
|
|
||||||
# Total word count
|
|
||||||
len_list = [(str(k), [str(v)]) for k,v in cumulative_article_length_by_turn.items()]
|
|
||||||
content += stat_block.format(
|
|
||||||
"Aggregate word count by turn:",
|
|
||||||
"<br>".join(itemize(len_list)))
|
|
||||||
|
|
||||||
# Player pageranks
|
|
||||||
pagerank_by_player = {player: 0 for player in players}
|
|
||||||
for page_title, articles in page_by_title.items():
|
|
||||||
page_author = articles[0].player
|
|
||||||
if page_author is not None:
|
|
||||||
pagerank_by_player[page_author] += pagerank_by_title[page_title]
|
|
||||||
for player, pagerank in pagerank_by_player.items():
|
|
||||||
pagerank_by_player[player] = round(pagerank, 3)
|
|
||||||
player_rank = reverse_statistics_dict(pagerank_by_player)
|
|
||||||
player_rank_items = itemize(player_rank)
|
|
||||||
content += stat_block.format(
|
|
||||||
"Player aggregate page rank:",
|
|
||||||
"<br>".join(player_rank_items))
|
|
||||||
|
|
||||||
# Player citations made
|
|
||||||
pages_cited_by_player = {player: 0 for player in players}
|
|
||||||
for article_title, article in article_by_title.items():
|
|
||||||
if article.player is not None:
|
|
||||||
pages_cited_by_player[article.player] += len(article.citations)
|
|
||||||
player_cites_made_ranks = reverse_statistics_dict(pages_cited_by_player)
|
|
||||||
player_cites_made_items = itemize(player_cites_made_ranks)
|
|
||||||
content += "<div class=\"contentblock\">\n"
|
|
||||||
content += "<u>Citations made by player:</u><br>\n"
|
|
||||||
content += "<br>\n".join(player_cites_made_items)
|
|
||||||
content += "</div>\n"
|
|
||||||
|
|
||||||
# Player cited count
|
|
||||||
pages_cited_by_by_player = {player: 0 for player in players}
|
|
||||||
for page_title, articles in page_by_title.items():
|
|
||||||
page_author = articles[0].player
|
|
||||||
if page_author is not None:
|
|
||||||
pages_cited_by_by_player[page_author] += len(articles[0].citedby)
|
|
||||||
cited_times_ranked = reverse_statistics_dict(pages_cited_by_by_player)
|
|
||||||
cited_times_items = itemize(cited_times_ranked)
|
|
||||||
content += "<div class=\"contentblock\">\n"
|
|
||||||
content += "<u>Citations made to article by player:</u><br>\n"
|
|
||||||
content += "<br>\n".join(cited_times_items)
|
|
||||||
content += "</div>\n"
|
|
||||||
|
|
||||||
# Lowest pagerank of written articles
|
|
||||||
exclude = [a.title for a in articles if a.player is None]
|
|
||||||
rank_by_written_only = {k:v for k,v in pagerank_by_title.items() if k not in exclude}
|
|
||||||
pageranks = reverse_statistics_dict(rank_by_written_only)
|
|
||||||
bot_ranked = list(enumerate(map(lambda x: x[1], pageranks), start=1))[-10:]
|
|
||||||
# Format the ranks into strings
|
|
||||||
bot_ranked_items = itemize(bot_ranked)
|
|
||||||
content += "<div class=\"contentblock\">\n"
|
|
||||||
content += "<u>Bottom 10 articles by pagerank:</u><br>\n"
|
|
||||||
content += "<br>\n".join(bot_ranked_items)
|
|
||||||
content += "</div>\n"
|
|
||||||
|
|
||||||
# Undercited articles
|
|
||||||
undercited = {
|
|
||||||
page_title: len(articles[0].citedby)
|
|
||||||
for page_title, articles in page_by_title.items()
|
|
||||||
if len(articles[0].citedby) < 2}
|
|
||||||
undercited_items = itemize(reverse_statistics_dict(undercited))
|
|
||||||
content += "<div class=\"contentblock\">\n"
|
|
||||||
content += "<u>Undercited articles:</u><br>\n"
|
|
||||||
content += "<br>\n".join(undercited_items)
|
|
||||||
content += "</div>\n"
|
|
||||||
|
|
||||||
# Fill in the entry skeleton
|
# Fill in the entry skeleton
|
||||||
return page.format(title="Statistics", content=content)
|
return page.format(title="Statistics", content=content)
|
||||||
|
@ -620,7 +453,7 @@ def build_all(path_prefix, lexicon_name):
|
||||||
f.write(build_session_page(page, config["SESSION_PAGE"]))
|
f.write(build_session_page(page, config["SESSION_PAGE"]))
|
||||||
print(" Wrote Session")
|
print(" Wrote Session")
|
||||||
with open(pathto("statistics", "index.html"), "w", encoding="utf-8") as f:
|
with open(pathto("statistics", "index.html"), "w", encoding="utf-8") as f:
|
||||||
f.write(build_statistics_page(page, articles))
|
f.write(build_statistics_page(config, page, articles))
|
||||||
print(" Wrote Statistics")
|
print(" Wrote Statistics")
|
||||||
|
|
||||||
# Write auxiliary pages
|
# Write auxiliary pages
|
||||||
|
|
|
@ -48,6 +48,21 @@ char:WXYZ
|
||||||
etc:&c.
|
etc:&c.
|
||||||
<<<INDEX_LIST<<<
|
<<<INDEX_LIST<<<
|
||||||
|
|
||||||
|
# Toggles and order for whichs tatistics to display.
|
||||||
|
# Pagerank-based statistics require networkx to be installed.
|
||||||
|
>>>STATISTICS>>>
|
||||||
|
top_pagerank on
|
||||||
|
most_citations_made on
|
||||||
|
most_citations_to on
|
||||||
|
longest_article on
|
||||||
|
cumulative_wordcount off
|
||||||
|
player_pagerank on
|
||||||
|
player_citations_made on
|
||||||
|
player_citations_to on
|
||||||
|
bottom_pagerank off
|
||||||
|
undercited off
|
||||||
|
<<<STATISTICS<<<
|
||||||
|
|
||||||
# The default sorting to use on the contents page.
|
# The default sorting to use on the contents page.
|
||||||
# Allowed values are "index", "turn", and "player"
|
# Allowed values are "index", "turn", and "player"
|
||||||
>>>DEFAULT_SORT>>>
|
>>>DEFAULT_SORT>>>
|
||||||
|
|
|
@ -0,0 +1,317 @@
|
||||||
|
# Third party imports
|
||||||
|
try:
|
||||||
|
import networkx # For pagerank analytics
|
||||||
|
NETWORKX_ENABLED = True
|
||||||
|
except:
|
||||||
|
NETWORKX_ENABLED = False
|
||||||
|
|
||||||
|
# Application imports
|
||||||
|
from utils import titlesort
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_statistics_dict(stats, reverse=True):
|
||||||
|
"""
|
||||||
|
Transforms a dictionary mapping titles to a value into a list of values
|
||||||
|
and lists of titles. The list is sorted by the value, and the titles are
|
||||||
|
sorted alphabetically.
|
||||||
|
"""
|
||||||
|
rev = {}
|
||||||
|
for key, value in stats.items():
|
||||||
|
if value not in rev:
|
||||||
|
rev[value] = []
|
||||||
|
rev[value].append(key)
|
||||||
|
for key, value in rev.items():
|
||||||
|
rev[key] = sorted(value, key=lambda t: titlesort(t))
|
||||||
|
return sorted(rev.items(), key=lambda x:x[0], reverse=reverse)
|
||||||
|
|
||||||
|
|
||||||
|
def itemize(stats_list):
|
||||||
|
"""
|
||||||
|
Formats a list consisting of tuples of ranks and lists of ranked items.
|
||||||
|
"""
|
||||||
|
return map(lambda x: "{0} – {1}".format(x[0], "; ".join(x[1])), stats_list)
|
||||||
|
|
||||||
|
|
||||||
|
class LexiconStatistics():
|
||||||
|
"""
|
||||||
|
A wrapper for a persistent statistics context with some precomputed
|
||||||
|
values around for convenience.
|
||||||
|
|
||||||
|
The existence of addendum articles complicates how some statistics are
|
||||||
|
computed. An addendum is an article, with its own author, body, and
|
||||||
|
citations, but in a Lexicon it exists appended to another article. To handle
|
||||||
|
this, we distinguish an _article_ from a _page_. An article is a unit parsed
|
||||||
|
from a single source file. A page is a main article and all addendums under
|
||||||
|
the same title.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, articles):
|
||||||
|
self.articles = articles
|
||||||
|
self.min_turn = 0
|
||||||
|
self.max_turn = 0
|
||||||
|
self.players = set()
|
||||||
|
self.title_to_article = {}
|
||||||
|
self.title_to_page = {}
|
||||||
|
self.stat_block = "<div class=\"contentblock\"><u>{0}</u><br>{1}</div>\n"
|
||||||
|
# Pagerank may not be computable if networkx isn't installed.
|
||||||
|
self.title_to_pagerank = None
|
||||||
|
|
||||||
|
for main_article in articles:
|
||||||
|
page_title = main_article.title
|
||||||
|
self.title_to_page[page_title] = [main_article]
|
||||||
|
self.title_to_page[page_title].extend(main_article.addendums)
|
||||||
|
for article in self.title_to_page[page_title]:
|
||||||
|
# Disambiguate articles by appending turn number to the title
|
||||||
|
key = "{0.title} (T{0.turn})".format(article)
|
||||||
|
self.title_to_article[key] = article
|
||||||
|
if article.player is not None:
|
||||||
|
# Phantoms have turn MAXINT by convention
|
||||||
|
self.min_turn = min(self.min_turn, article.turn)
|
||||||
|
self.max_turn = max(self.max_turn, article.turn)
|
||||||
|
self.players.add(article.player)
|
||||||
|
|
||||||
|
def _try_populate_pagerank(self):
|
||||||
|
"""Computes pagerank if networkx is imported."""
|
||||||
|
if NETWORKX_ENABLED and self.title_to_pagerank is None:
|
||||||
|
# Create a citation graph linking page titles.
|
||||||
|
G = networkx.Graph()
|
||||||
|
for page_title, articles in self.title_to_page.items():
|
||||||
|
for article in articles:
|
||||||
|
for citation in article.citations:
|
||||||
|
G.add_edge(page_title, citation.target)
|
||||||
|
|
||||||
|
# Compute pagerank on the page citation graph.
|
||||||
|
self.title_to_pagerank = networkx.pagerank(G)
|
||||||
|
# Any article with no links in the citation graph have no pagerank.
|
||||||
|
# Assign these pagerank 0 to avoid key errors or missing pages in
|
||||||
|
# the stats.
|
||||||
|
for page_title, articles in self.title_to_page.items():
|
||||||
|
if page_title not in self.title_to_pagerank:
|
||||||
|
self.title_to_pagerank[page_title] = 0
|
||||||
|
|
||||||
|
def stat_top_pagerank(self):
|
||||||
|
"""Computes the top 10 pages by pagerank."""
|
||||||
|
self._try_populate_pagerank()
|
||||||
|
|
||||||
|
if not self.title_to_pagerank:
|
||||||
|
# If networkx was not successfully imported, skip the pagerank.
|
||||||
|
top_ranked_items = "networkx must be installed to compute pageranks."
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Get the top ten articles by pagerank.
|
||||||
|
top_pageranks = reverse_statistics_dict(self.title_to_pagerank)[:10]
|
||||||
|
# Replace the pageranks with ordinals.
|
||||||
|
top_ranked = enumerate(map(lambda x: x[1], top_pageranks), start=1)
|
||||||
|
# Format the ranks into strings.
|
||||||
|
top_ranked_items = itemize(top_ranked)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Top 10 articles by page rank:",
|
||||||
|
"<br>".join(top_ranked_items))
|
||||||
|
|
||||||
|
def stat_most_citations_made(self):
|
||||||
|
"""Computes the top 3 ranks for citations made FROM a page."""
|
||||||
|
# Determine which pages are cited from all articles on a page.
|
||||||
|
pages_cited = {
|
||||||
|
page_title: set()
|
||||||
|
for page_title in self.title_to_page.keys()}
|
||||||
|
for page_title, articles in self.title_to_page.items():
|
||||||
|
for article in articles:
|
||||||
|
for citation in article.citations:
|
||||||
|
pages_cited[page_title].add(citation.target)
|
||||||
|
# Compute the number of unique articles cited by a page.
|
||||||
|
for page_title, cite_titles in pages_cited.items():
|
||||||
|
pages_cited[page_title] = len(cite_titles)
|
||||||
|
|
||||||
|
# Reverse and itemize the citation counts.
|
||||||
|
top_citations = reverse_statistics_dict(pages_cited)[:3]
|
||||||
|
top_citations_items = itemize(top_citations)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Cited the most pages:",
|
||||||
|
"<br>".join(top_citations_items))
|
||||||
|
|
||||||
|
def stat_most_citations_to(self):
|
||||||
|
"""Computes the top 3 ranks for citations made TO a page."""
|
||||||
|
# Determine which pages cite a page.
|
||||||
|
pages_cited_by = {
|
||||||
|
page_title: set()
|
||||||
|
for page_title in self.title_to_page.keys()}
|
||||||
|
for page_title, articles in self.title_to_page.items():
|
||||||
|
for article in articles:
|
||||||
|
for citation in article.citations:
|
||||||
|
pages_cited_by[citation.target].add(page_title)
|
||||||
|
# Compute the number of unique articles that cite a page.
|
||||||
|
for page_title, cite_titles in pages_cited_by.items():
|
||||||
|
pages_cited_by[page_title] = len(cite_titles)
|
||||||
|
|
||||||
|
# Reverse and itemize the citation counts.
|
||||||
|
top_cited = reverse_statistics_dict(pages_cited_by)[:3]
|
||||||
|
top_cited_items = itemize(top_cited)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Cited by the most pages:",
|
||||||
|
"<br>".join(top_cited_items))
|
||||||
|
|
||||||
|
def stat_longest_article(self):
|
||||||
|
"""Computes the top 3 longest articles."""
|
||||||
|
# Compute the length of each article (not page).
|
||||||
|
title_to_article_length = {}
|
||||||
|
for article_title, article in self.title_to_article.items():
|
||||||
|
# Write all citation aliases into the article text to accurately
|
||||||
|
# compute word count as written.
|
||||||
|
format_map = {
|
||||||
|
"c"+str(c.id): c.text
|
||||||
|
for c in article.citations
|
||||||
|
}
|
||||||
|
plain_content = article.content.format(**format_map)
|
||||||
|
word_count = len(plain_content.split())
|
||||||
|
title_to_article_length[article_title] = word_count
|
||||||
|
|
||||||
|
# Reverse and itemize the article lengths.
|
||||||
|
top_length = reverse_statistics_dict(title_to_article_length)[:3]
|
||||||
|
top_length_items = itemize(top_length)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Longest articles:",
|
||||||
|
"<br>".join(top_length_items))
|
||||||
|
|
||||||
|
def stat_cumulative_wordcount(self):
|
||||||
|
"""Computes the cumulative word count of the lexicon."""
|
||||||
|
# Initialize all extant turns to 0.
|
||||||
|
turn_to_cumulative_wordcount = {
|
||||||
|
turn_num: 0
|
||||||
|
for turn_num in range(self.min_turn, self.max_turn + 1)
|
||||||
|
}
|
||||||
|
for article_title, article in self.title_to_article.items():
|
||||||
|
# Compute each article's word count.
|
||||||
|
format_map = {
|
||||||
|
"c"+str(c.id): c.text
|
||||||
|
for c in article.citations
|
||||||
|
}
|
||||||
|
plain_content = article.content.format(**format_map)
|
||||||
|
word_count = len(plain_content.split())
|
||||||
|
# Add the word count to each turn the article exists in.
|
||||||
|
for turn_num in range(self.min_turn, self.max_turn + 1):
|
||||||
|
if article.turn <= turn_num:
|
||||||
|
turn_to_cumulative_wordcount[turn_num] += word_count
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
len_list = [(str(k), [str(v)]) for k,v in turn_to_cumulative_wordcount.items()]
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Aggregate word count by turn:",
|
||||||
|
"<br>".join(itemize(len_list)))
|
||||||
|
|
||||||
|
def stat_player_pagerank(self):
|
||||||
|
"""Computes each player's share of the lexicon's pagerank scores."""
|
||||||
|
self._try_populate_pagerank()
|
||||||
|
|
||||||
|
if not self.title_to_pagerank:
|
||||||
|
# If networkx was not successfully imported, skip the pagerank.
|
||||||
|
player_rank_items = "networkx must be installed to compute pageranks."
|
||||||
|
|
||||||
|
else:
|
||||||
|
player_to_pagerank = {
|
||||||
|
player: 0
|
||||||
|
for player in self.players}
|
||||||
|
# Accumulate page pagerank to the main article's author.
|
||||||
|
for page_title, articles in self.title_to_page.items():
|
||||||
|
page_author = articles[0].player
|
||||||
|
if page_author is not None:
|
||||||
|
player_to_pagerank[page_author] += self.title_to_pagerank[page_title]
|
||||||
|
# Round pageranks off to 3 decimal places.
|
||||||
|
for player, pagerank in player_to_pagerank.items():
|
||||||
|
player_to_pagerank[player] = round(pagerank, 3)
|
||||||
|
|
||||||
|
# Reverse and itemize the aggregated pageranks.
|
||||||
|
player_rank = reverse_statistics_dict(player_to_pagerank)
|
||||||
|
player_rank_items = itemize(player_rank)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Player aggregate page rank:",
|
||||||
|
"<br>".join(player_rank_items))
|
||||||
|
|
||||||
|
def stat_player_citations_made(self):
|
||||||
|
"""Computes the total number of citations made BY each player."""
|
||||||
|
pages_cited_by_player = {
|
||||||
|
player: 0
|
||||||
|
for player in self.players}
|
||||||
|
# Add the number of citations from each authored article (not page).
|
||||||
|
for article_title, article in self.title_to_article.items():
|
||||||
|
if article.player is not None:
|
||||||
|
pages_cited_by_player[article.player] += len(article.citations)
|
||||||
|
|
||||||
|
# Reverse and itemize the counts.
|
||||||
|
player_cites_made_ranks = reverse_statistics_dict(pages_cited_by_player)
|
||||||
|
player_cites_made_items = itemize(player_cites_made_ranks)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Citations made by player:",
|
||||||
|
"<br>".join(player_cites_made_items))
|
||||||
|
|
||||||
|
def stat_player_citations_to(self):
|
||||||
|
"""Computes the total number of citations made TO each player's
|
||||||
|
authored pages."""
|
||||||
|
pages_cited_by_by_player = {
|
||||||
|
player: 0
|
||||||
|
for player in self.players}
|
||||||
|
# Add the number of citations made to each page (not article).
|
||||||
|
for page_title, articles in self.title_to_page.items():
|
||||||
|
page_author = articles[0].player
|
||||||
|
if page_author is not None:
|
||||||
|
pages_cited_by_by_player[page_author] += len(articles[0].citedby)
|
||||||
|
|
||||||
|
# Reverse and itemize the results.
|
||||||
|
cited_times_ranked = reverse_statistics_dict(pages_cited_by_by_player)
|
||||||
|
cited_times_items = itemize(cited_times_ranked)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Citations made to article by player:",
|
||||||
|
"<br>".join(cited_times_items))
|
||||||
|
|
||||||
|
def stat_bottom_pagerank(self):
|
||||||
|
"""Computes the bottom 10 pages by pagerank."""
|
||||||
|
self._try_populate_pagerank()
|
||||||
|
|
||||||
|
if not self.title_to_pagerank:
|
||||||
|
# If networkx was not successfully imported, skip the pagerank.
|
||||||
|
bot_ranked_items = "networkx must be installed to compute pageranks."
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Phantoms have no pagerank, because they don't cite anything.
|
||||||
|
exclude = [
|
||||||
|
a.title
|
||||||
|
for a in self.articles
|
||||||
|
if a.player is None]
|
||||||
|
rank_by_written_only = {
|
||||||
|
k:v
|
||||||
|
for k,v in self.title_to_pagerank.items()
|
||||||
|
if k not in exclude}
|
||||||
|
|
||||||
|
# Reverse, enumerate, and itemize the bottom 10 by pagerank.
|
||||||
|
pageranks = reverse_statistics_dict(rank_by_written_only)
|
||||||
|
bot_ranked = list(enumerate(map(lambda x: x[1], pageranks), start=1))[-10:]
|
||||||
|
bot_ranked_items = itemize(bot_ranked)
|
||||||
|
|
||||||
|
# Format the statistics block.
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Bottom 10 articles by page rank:",
|
||||||
|
"<br>".join(bot_ranked_items))
|
||||||
|
|
||||||
|
def stat_undercited(self):
|
||||||
|
"""Computes which articles have 0 or 1 citations made to them."""
|
||||||
|
undercited = {
|
||||||
|
page_title: len(articles[0].citedby)
|
||||||
|
for page_title, articles in self.title_to_page.items()
|
||||||
|
if len(articles[0].citedby) < 2}
|
||||||
|
undercited_items = itemize(reverse_statistics_dict(undercited))
|
||||||
|
return self.stat_block.format(
|
||||||
|
"Undercited articles:",
|
||||||
|
"<br>".join(undercited_items))
|
Loading…
Reference in New Issue