lexipython/lexipython/article.py

310 lines
11 KiB
Python

import os
import sys
import re
import utils
class LexiconCitation:
"""
Represents information about a single citation in a Lexicon article.
Members:
id int: citation id within the article, corresponding to a "{cN}"
format hook
text string: alias text linked to the citation target
target string: title of the article being cited
article LexiconArticle: article cited, None until interlink
"""
def __init__(self, id, citation_text, citation_target, article=None):
self.id = id
self.text = citation_text
self.target = citation_target
self.article = article
def __repr__(self):
return "<LexiconCitation(id={0.id}, text=\"{0.text}\", target=\"{0.target}\")>".format(self)
def __str__(self):
return "<[{0.id}]:[[{0.text}|{0.target}]]>".format(self)
def format(self, format_str):
return format_str.format(**self.__dict__)
class LexiconArticle:
"""
A Lexicon article and its metadata.
Members defined by __init__:
player string: player who wrote the article
turn integer: turn the article was written for
title string: article title
title_filesafe string: title, escaped, used for filenames
content string: HTML content, with citations replaced by format hooks
citations list of LexiconCitations: citations made by the article
link_class string: CSS class to interpolate (for styling phantoms)
Members undefined until interlink:
addendums list of LexiconArticles: addendum articles to this article
citedby set of LexiconArticles: articles that cite this article
prev_article LexiconArticle: the previous article in read order
next_article LexiconArticle: the next article in read order
"""
def __init__(self, player, turn, title, content, citations):
"""
Creates a LexiconArticle object with the given parameters.
"""
self.player = player
self.turn = turn
self.title = title
self.title_filesafe = utils.titleescape(title)
self.content = content
self.citations = citations
self.link_class = "class=\"phantom\"" if player is None else ""
self.addendums = []
self.citedby = set()
self.prev_article = None
self.next_article = None
def __repr__(self):
return "<LexiconArticle(title={0.title}, turn={0.turn}, player={0.player})>".format(self)
def __str__(self):
return "<\"{0.title}\", {0.player} turn {0.turn}>".format(self)
@staticmethod
def from_file_raw(raw_content):
"""
Parses the contents of a Lexipython source file into a LexiconArticle
object. If the source file is malformed, returns None.
"""
headers = raw_content.split('\n', 3)
if len(headers) != 4:
print("Header read error")
return None
player_header, turn_header, title_header, content_raw = headers
# Validate and sanitize the player header
if not player_header.startswith("# Player:"):
print("Player header missing or corrupted")
return None
player = player_header[9:].strip()
# Validate and sanitize the turn header
if not turn_header.startswith("# Turn:"):
print("Turn header missing or corrupted")
return None
turn = None
try:
turn = int(turn_header[7:].strip())
except:
print("Turn header error")
return None
# Validate and sanitize the title header
if not title_header.startswith("# Title:"):
print("Title header missing or corrupted")
return None
title = utils.titlecase(title_header[8:])
# Parse the content and extract citations
paras = re.split("\n\n+", content_raw.strip())
content = ""
citations = []
format_id = 1
if not paras:
print("No content")
for para in paras:
# Escape angle brackets
para = re.sub("<", "&lt;", para)
para = re.sub(">", "&gt;", para)
# Escape curly braces
para = re.sub("{", "&#123;", para)
para = re.sub("}", "&#125;", para)
# Replace bold and italic marks with tags
para = re.sub(r"//([^/]+)//", r"<i>\1</i>", para)
para = re.sub(r"\*\*([^*]+)\*\*", r"<b>\1</b>", para)
# Replace \\LF with <br>LF
para = re.sub(r"\\\\\n", "<br>\n", para)
# Abstract citations into the citation record
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
while link_match:
# Identify the citation text and cited article
cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3)
cite_title = utils.titlecase(re.sub(r"\s+", " ", link_match.group(3)))
# Record the citation
cite = LexiconCitation(format_id, cite_text, cite_title)
citations.append(cite)
# Stitch the format id in place of the citation
para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):]
format_id += 1 # Increment to the next format citation
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
# Convert signature to right-aligned
if para[:1] == '~':
para = "<hr><span class=\"signature\"><p>" + para[1:] + "</p></span>\n"
else:
para = "<p>" + para + "</p>\n"
content += para
return LexiconArticle(player, turn, title, content, citations)
@staticmethod
def parse_from_directory(directory):
"""
Reads and parses each source file in the given directory.
Input: directory, the path to the folder to read
Output: a list of parsed articles
"""
articles = []
print("Reading source files from", directory)
for filename in os.listdir(directory):
path = os.path.join(directory, filename)
# Read only .txt files
if filename[-4:] == ".txt":
print(" Parsing", filename)
with open(path, "r", encoding="utf8") as src_file:
raw = src_file.read()
article = LexiconArticle.from_file_raw(raw)
if article is None:
print(" ERROR")
else:
print(" success:", article.title)
articles.append(article)
return articles
@staticmethod
def interlink(lexicon_articles):
"""
Fills out fields on articles that require other articles for context.
Creates phantom articles.
"""
# Preliminary assertion that title/turn is unique
keys = set()
for article in lexicon_articles:
if (article.title, article.turn) in keys:
raise ValueError("Found two articles with title '{}' and turn '{}'".format(
article.title, article.turn))
keys.add((article.title, article.turn))
# Sort out which articles are addendums and which titles are phantoms
written_titles = set()
cited_titles = set()
article_by_title = {}
written_articles_ordered = sorted(lexicon_articles, key=lambda a: (a.turn, a.title))
for written_article in written_articles_ordered:
# Track main articles by title
if written_article.title not in written_titles:
article_by_title[written_article.title] = written_article
written_titles.add(written_article.title)
# Append addendums to their parents
else:
parent = article_by_title[written_article.title]
parent.addendums.append(written_article)
# Collect all cited titles
for citation in written_article.citations:
cited_titles.add(citation.target)
# Create articles for each phantom title
for title in cited_titles - written_titles:
phantom_article = LexiconArticle(
None, sys.maxsize, title,
"<p><i>This entry hasn't been written yet.</i></p>", {})
article_by_title[title] = phantom_article
# To interlink the articles, each citation needs to have its .article
# filled in, and that article needs its citedby updated.
for parent in article_by_title.values():
under_title = [parent] + parent.addendums
for citing_article in under_title:
for citation in citing_article.citations:
target_article = article_by_title[citation.target]
citation.article = target_article
target_article.citedby.add(citing_article)
# Sort the articles by turn and title, then fill in prev/next fields
articles_ordered = sorted(article_by_title.values(), key=lambda a: (a.turn, utils.titlesort(a.title)))
for i in range(len(articles_ordered)):
articles_ordered[i].prev_article = articles_ordered[i-1] if i != 0 else None
articles_ordered[i].next_article = articles_ordered[i+1] if i != len(articles_ordered)-1 else None
return articles_ordered
def build_default_content(self):
"""
Builds the contents of the content div for an article page.
"""
content = ""
# Build the main article content block
main_body = self.build_default_article_body()
content += "<div class=\"contentblock\"><h1>{}</h1>{}</div>\n".format(
self.title, main_body)
# Build the main citation content block
main_citations = self.build_default_citeblock()
if main_citations:
content += "<div class=\"contentblock citeblock\">{}</div>\n".format(
main_citations)
# Build any addendum content blocks
for addendum in self.addendums:
add_body = addendum.build_default_article_body()
content += "<div class=\"contentblock\">{}</div>\n".format(add_body)
add_citations = addendum.build_default_citeblock()
if add_citations:
content += "<div class=\"contentblock\">{}</div>\n".format(
add_citations)
# Build the prev/next block
prev_next = self.build_prev_next_block(
self.prev_article, self.next_article)
if prev_next:
content += "<div class=\"contentblock citeblock\">{}</div>\n".format(
prev_next)
return content
def build_default_article_body(self):
"""
Formats citations into the article text and returns the article body.
"""
format_map = {
"c"+str(c.id) : c.format("<a {article.link_class} "\
"href=\"{article.title_filesafe}.html\">{text}</a>")
for c in self.citations
}
return self.content.format(**format_map)
def build_default_citeblock(self):
"""
Builds the contents of a citation contentblock. Skips sections with no
content.
"""
content = ""
# Citations
cites_titles = set()
cites_links = []
for citation in sorted(self.citations, key=lambda c: (utils.titlesort(c.target), c.id)):
if citation.target not in cites_titles:
cites_titles.add(citation.target)
cites_links.append(
citation.format(
"<a {article.link_class} href=\"{article.title_filesafe}.html\">{article.title}</a>"))
cites_str = " / ".join(cites_links)
if len(cites_str) > 0:
content += "<p>Citations: {}</p>\n".format(cites_str)
# Citedby
citedby_titles = set()
citedby_links = []
for article in sorted(self.citedby, key=lambda a: (utils.titlesort(a.title), a.turn)):
if article.title not in citedby_titles:
citedby_titles.add(article.title)
citedby_links.append(
"<a {0.link_class} href=\"{0.title_filesafe}.html\">{0.title}</a>".format(article))
citedby_str = " / ".join(citedby_links)
if len(citedby_str) > 0:
content += "<p>Cited by: {}</p>\n".format(citedby_str)
return content
def build_prev_next_block(self, prev_article, next_article):
"""
For each defined target, links the target page as Previous or Next.
"""
content = ""
# Prev/next links:
if next_article is not None or prev_article is not None:
prev_link = ("<a {0.link_class} href=\"{0.title_filesafe}.html\">&#8592; Previous</a>".format(
prev_article)
if prev_article is not None else "")
next_link = ("<a {0.link_class} href=\"{0.title_filesafe}.html\">Next &#8594;</a>".format(
next_article)
if next_article is not None else "")
content += "<table><tr>\n<td>{}</td>\n<td>{}</td>\n</table></tr>\n".format(
prev_link, next_link)
return content