310 lines
11 KiB
Python
310 lines
11 KiB
Python
import os
|
|
import sys
|
|
import re
|
|
import utils
|
|
|
|
class LexiconCitation:
|
|
"""
|
|
Represents information about a single citation in a Lexicon article.
|
|
|
|
Members:
|
|
id int: citation id within the article, corresponding to a "{cN}"
|
|
format hook
|
|
text string: alias text linked to the citation target
|
|
target string: title of the article being cited
|
|
article LexiconArticle: article cited, None until interlink
|
|
"""
|
|
def __init__(self, id, citation_text, citation_target, article=None):
|
|
self.id = id
|
|
self.text = citation_text
|
|
self.target = citation_target
|
|
self.article = article
|
|
|
|
def __repr__(self):
|
|
return "<LexiconCitation(id={0.id}, text=\"{0.text}\", target=\"{0.target}\")>".format(self)
|
|
|
|
def __str__(self):
|
|
return "<[{0.id}]:[[{0.text}|{0.target}]]>".format(self)
|
|
|
|
def format(self, format_str):
|
|
return format_str.format(**self.__dict__)
|
|
|
|
class LexiconArticle:
|
|
"""
|
|
A Lexicon article and its metadata.
|
|
|
|
Members defined by __init__:
|
|
player string: player who wrote the article
|
|
turn integer: turn the article was written for
|
|
title string: article title
|
|
title_filesafe string: title, escaped, used for filenames
|
|
content string: HTML content, with citations replaced by format hooks
|
|
citations list of LexiconCitations: citations made by the article
|
|
link_class string: CSS class to interpolate (for styling phantoms)
|
|
|
|
Members undefined until interlink:
|
|
addendums list of LexiconArticles: addendum articles to this article
|
|
citedby set of LexiconArticles: articles that cite this article
|
|
prev_article LexiconArticle: the previous article in read order
|
|
next_article LexiconArticle: the next article in read order
|
|
"""
|
|
|
|
def __init__(self, player, turn, title, content, citations):
|
|
"""
|
|
Creates a LexiconArticle object with the given parameters.
|
|
"""
|
|
self.player = player
|
|
self.turn = turn
|
|
self.title = title
|
|
self.title_filesafe = utils.titleescape(title)
|
|
self.content = content
|
|
self.citations = citations
|
|
self.link_class = "class=\"phantom\"" if player is None else ""
|
|
self.addendums = []
|
|
self.citedby = set()
|
|
self.prev_article = None
|
|
self.next_article = None
|
|
|
|
def __repr__(self):
|
|
return "<LexiconArticle(title={0.title}, turn={0.turn}, player={0.player})>".format(self)
|
|
|
|
def __str__(self):
|
|
return "<\"{0.title}\", {0.player} turn {0.turn}>".format(self)
|
|
|
|
@staticmethod
|
|
def from_file_raw(raw_content):
|
|
"""
|
|
Parses the contents of a Lexipython source file into a LexiconArticle
|
|
object. If the source file is malformed, returns None.
|
|
"""
|
|
headers = raw_content.split('\n', 3)
|
|
if len(headers) != 4:
|
|
print("Header read error")
|
|
return None
|
|
player_header, turn_header, title_header, content_raw = headers
|
|
# Validate and sanitize the player header
|
|
if not player_header.startswith("# Player:"):
|
|
print("Player header missing or corrupted")
|
|
return None
|
|
player = player_header[9:].strip()
|
|
# Validate and sanitize the turn header
|
|
if not turn_header.startswith("# Turn:"):
|
|
print("Turn header missing or corrupted")
|
|
return None
|
|
turn = None
|
|
try:
|
|
turn = int(turn_header[7:].strip())
|
|
except:
|
|
print("Turn header error")
|
|
return None
|
|
# Validate and sanitize the title header
|
|
if not title_header.startswith("# Title:"):
|
|
print("Title header missing or corrupted")
|
|
return None
|
|
title = utils.titlecase(title_header[8:])
|
|
# Parse the content and extract citations
|
|
paras = re.split("\n\n+", content_raw.strip())
|
|
content = ""
|
|
citations = []
|
|
format_id = 1
|
|
if not paras:
|
|
print("No content")
|
|
for para in paras:
|
|
# Escape angle brackets
|
|
para = re.sub("<", "<", para)
|
|
para = re.sub(">", ">", para)
|
|
# Escape curly braces
|
|
para = re.sub("{", "{", para)
|
|
para = re.sub("}", "}", para)
|
|
# Replace bold and italic marks with tags
|
|
para = re.sub(r"//([^/]+)//", r"<i>\1</i>", para)
|
|
para = re.sub(r"\*\*([^*]+)\*\*", r"<b>\1</b>", para)
|
|
# Replace \\LF with <br>LF
|
|
para = re.sub(r"\\\\\n", "<br>\n", para)
|
|
# Abstract citations into the citation record
|
|
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
|
|
while link_match:
|
|
# Identify the citation text and cited article
|
|
cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3)
|
|
cite_title = utils.titlecase(re.sub(r"\s+", " ", link_match.group(3)))
|
|
# Record the citation
|
|
cite = LexiconCitation(format_id, cite_text, cite_title)
|
|
citations.append(cite)
|
|
# Stitch the format id in place of the citation
|
|
para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):]
|
|
format_id += 1 # Increment to the next format citation
|
|
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
|
|
# Convert signature to right-aligned
|
|
if para[:1] == '~':
|
|
para = "<hr><span class=\"signature\"><p>" + para[1:] + "</p></span>\n"
|
|
else:
|
|
para = "<p>" + para + "</p>\n"
|
|
content += para
|
|
return LexiconArticle(player, turn, title, content, citations)
|
|
|
|
@staticmethod
|
|
def parse_from_directory(directory):
|
|
"""
|
|
Reads and parses each source file in the given directory.
|
|
Input: directory, the path to the folder to read
|
|
Output: a list of parsed articles
|
|
"""
|
|
articles = []
|
|
print("Reading source files from", directory)
|
|
for filename in os.listdir(directory):
|
|
path = os.path.join(directory, filename)
|
|
# Read only .txt files
|
|
if filename[-4:] == ".txt":
|
|
print(" Parsing", filename)
|
|
with open(path, "r", encoding="utf8") as src_file:
|
|
raw = src_file.read()
|
|
article = LexiconArticle.from_file_raw(raw)
|
|
if article is None:
|
|
print(" ERROR")
|
|
else:
|
|
print(" success:", article.title)
|
|
articles.append(article)
|
|
return articles
|
|
|
|
@staticmethod
|
|
def interlink(lexicon_articles):
|
|
"""
|
|
Fills out fields on articles that require other articles for context.
|
|
Creates phantom articles.
|
|
"""
|
|
# Preliminary assertion that title/turn is unique
|
|
keys = set()
|
|
for article in lexicon_articles:
|
|
if (article.title, article.turn) in keys:
|
|
raise ValueError("Found two articles with title '{}' and turn '{}'".format(
|
|
article.title, article.turn))
|
|
keys.add((article.title, article.turn))
|
|
# Sort out which articles are addendums and which titles are phantoms
|
|
written_titles = set()
|
|
cited_titles = set()
|
|
article_by_title = {}
|
|
written_articles_ordered = sorted(lexicon_articles, key=lambda a: (a.turn, a.title))
|
|
for written_article in written_articles_ordered:
|
|
# Track main articles by title
|
|
if written_article.title not in written_titles:
|
|
article_by_title[written_article.title] = written_article
|
|
written_titles.add(written_article.title)
|
|
# Append addendums to their parents
|
|
else:
|
|
parent = article_by_title[written_article.title]
|
|
parent.addendums.append(written_article)
|
|
# Collect all cited titles
|
|
for citation in written_article.citations:
|
|
cited_titles.add(citation.target)
|
|
# Create articles for each phantom title
|
|
for title in cited_titles - written_titles:
|
|
phantom_article = LexiconArticle(
|
|
None, sys.maxsize, title,
|
|
"<p><i>This entry hasn't been written yet.</i></p>", {})
|
|
article_by_title[title] = phantom_article
|
|
# To interlink the articles, each citation needs to have its .article
|
|
# filled in, and that article needs its citedby updated.
|
|
for parent in article_by_title.values():
|
|
under_title = [parent] + parent.addendums
|
|
for citing_article in under_title:
|
|
for citation in citing_article.citations:
|
|
target_article = article_by_title[citation.target]
|
|
citation.article = target_article
|
|
target_article.citedby.add(citing_article)
|
|
# Sort the articles by turn and title, then fill in prev/next fields
|
|
articles_ordered = sorted(article_by_title.values(), key=lambda a: (a.turn, utils.titlesort(a.title)))
|
|
for i in range(len(articles_ordered)):
|
|
articles_ordered[i].prev_article = articles_ordered[i-1] if i != 0 else None
|
|
articles_ordered[i].next_article = articles_ordered[i+1] if i != len(articles_ordered)-1 else None
|
|
return articles_ordered
|
|
|
|
def build_default_content(self):
|
|
"""
|
|
Builds the contents of the content div for an article page.
|
|
"""
|
|
content = ""
|
|
# Build the main article content block
|
|
main_body = self.build_default_article_body()
|
|
content += "<div class=\"contentblock\"><h1>{}</h1>{}</div>\n".format(
|
|
self.title, main_body)
|
|
# Build the main citation content block
|
|
main_citations = self.build_default_citeblock()
|
|
if main_citations:
|
|
content += "<div class=\"contentblock citeblock\">{}</div>\n".format(
|
|
main_citations)
|
|
# Build any addendum content blocks
|
|
for addendum in self.addendums:
|
|
add_body = addendum.build_default_article_body()
|
|
content += "<div class=\"contentblock\">{}</div>\n".format(add_body)
|
|
add_citations = addendum.build_default_citeblock()
|
|
if add_citations:
|
|
content += "<div class=\"contentblock\">{}</div>\n".format(
|
|
add_citations)
|
|
# Build the prev/next block
|
|
prev_next = self.build_prev_next_block(
|
|
self.prev_article, self.next_article)
|
|
if prev_next:
|
|
content += "<div class=\"contentblock citeblock\">{}</div>\n".format(
|
|
prev_next)
|
|
return content
|
|
|
|
def build_default_article_body(self):
|
|
"""
|
|
Formats citations into the article text and returns the article body.
|
|
"""
|
|
format_map = {
|
|
"c"+str(c.id) : c.format("<a {article.link_class} "\
|
|
"href=\"{article.title_filesafe}.html\">{text}</a>")
|
|
for c in self.citations
|
|
}
|
|
return self.content.format(**format_map)
|
|
|
|
def build_default_citeblock(self):
|
|
"""
|
|
Builds the contents of a citation contentblock. Skips sections with no
|
|
content.
|
|
"""
|
|
content = ""
|
|
# Citations
|
|
cites_titles = set()
|
|
cites_links = []
|
|
for citation in sorted(self.citations, key=lambda c: (utils.titlesort(c.target), c.id)):
|
|
if citation.target not in cites_titles:
|
|
cites_titles.add(citation.target)
|
|
cites_links.append(
|
|
citation.format(
|
|
"<a {article.link_class} href=\"{article.title_filesafe}.html\">{article.title}</a>"))
|
|
cites_str = " / ".join(cites_links)
|
|
if len(cites_str) > 0:
|
|
content += "<p>Citations: {}</p>\n".format(cites_str)
|
|
# Citedby
|
|
citedby_titles = set()
|
|
citedby_links = []
|
|
for article in sorted(self.citedby, key=lambda a: (utils.titlesort(a.title), a.turn)):
|
|
if article.title not in citedby_titles:
|
|
citedby_titles.add(article.title)
|
|
citedby_links.append(
|
|
"<a {0.link_class} href=\"{0.title_filesafe}.html\">{0.title}</a>".format(article))
|
|
citedby_str = " / ".join(citedby_links)
|
|
if len(citedby_str) > 0:
|
|
content += "<p>Cited by: {}</p>\n".format(citedby_str)
|
|
|
|
return content
|
|
|
|
def build_prev_next_block(self, prev_article, next_article):
|
|
"""
|
|
For each defined target, links the target page as Previous or Next.
|
|
"""
|
|
content = ""
|
|
# Prev/next links:
|
|
if next_article is not None or prev_article is not None:
|
|
prev_link = ("<a {0.link_class} href=\"{0.title_filesafe}.html\">← Previous</a>".format(
|
|
prev_article)
|
|
if prev_article is not None else "")
|
|
next_link = ("<a {0.link_class} href=\"{0.title_filesafe}.html\">Next →</a>".format(
|
|
next_article)
|
|
if next_article is not None else "")
|
|
content += "<table><tr>\n<td>{}</td>\n<td>{}</td>\n</table></tr>\n".format(
|
|
prev_link, next_link)
|
|
return content
|