205 lines
7.4 KiB
Python
205 lines
7.4 KiB
Python
import os
|
|
import sys
|
|
import re
|
|
import src.utils as utils
|
|
|
|
class LexiconArticle:
|
|
"""
|
|
A Lexicon article and its metadata.
|
|
|
|
Members:
|
|
player string: the player of the article
|
|
turn integer: the turn the article was written for
|
|
title string: the article title
|
|
title_filesafe string: the title, escaped, used for filenames
|
|
content string: the HTML content, with citations replaced by format hooks
|
|
citations dict mapping format hook string to tuple of link alias and link target title
|
|
wcites list: titles of written articles cited
|
|
pcites list: titles of phantom articles cited
|
|
citedby list: titles of articles that cite this
|
|
The last three are filled in by populate().
|
|
"""
|
|
|
|
def __init__(self, player, turn, title, content, citations):
|
|
"""
|
|
Creates a LexiconArticle object with the given parameters.
|
|
"""
|
|
self.player = player
|
|
self.turn = turn
|
|
self.title = title
|
|
self.title_filesafe = utils.titleescape(title)
|
|
self.content = content
|
|
self.citations = citations
|
|
self.wcites = set()
|
|
self.pcites = set()
|
|
self.citedby = set()
|
|
|
|
@staticmethod
|
|
def from_file_raw(raw_content):
|
|
"""
|
|
Parses the contents of a Lexipython source file into a LexiconArticle
|
|
object. If the source file is malformed, returns None.
|
|
"""
|
|
headers = raw_content.split('\n', 3)
|
|
if len(headers) != 4:
|
|
print("Header read error")
|
|
return None
|
|
player_header, turn_header, title_header, content_raw = headers
|
|
# Validate and sanitize the player header
|
|
if not player_header.startswith("# Player:"):
|
|
print("Player header missing or corrupted")
|
|
return None
|
|
player = player_header[9:].strip()
|
|
# Validate and sanitize the turn header
|
|
if not turn_header.startswith("# Turn:"):
|
|
print("Turn header missing or corrupted")
|
|
return None
|
|
turn = None
|
|
try:
|
|
turn = int(turn_header[7:].strip())
|
|
except:
|
|
print("Turn header error")
|
|
return None
|
|
# Validate and sanitize the title header
|
|
if not title_header.startswith("# Title:"):
|
|
print("Title header missing or corrupted")
|
|
return None
|
|
title = utils.titlecase(title_header[8:])
|
|
# Parse the content and extract citations
|
|
paras = re.split("\n\n+", content_raw.strip())
|
|
content = ""
|
|
citations = {}
|
|
format_id = 1
|
|
if not paras:
|
|
print("No content")
|
|
for para in paras:
|
|
# Escape angle brackets
|
|
para = re.sub("<", "<", para)
|
|
para = re.sub(">", ">", para)
|
|
# Escape curly braces
|
|
para = re.sub("{", "{", para)
|
|
para = re.sub("}", "}", para)
|
|
# Replace bold and italic marks with tags
|
|
para = re.sub(r"//([^/]+)//", r"<i>\1</i>", para)
|
|
para = re.sub(r"\*\*([^*]+)\*\*", r"<b>\1</b>", para)
|
|
# Replace \\LF with <br>LF
|
|
para = re.sub(r"\\\\\n", "<br>\n", para)
|
|
# Abstract citations into the citation record
|
|
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
|
|
while link_match:
|
|
# Identify the citation text and cited article
|
|
cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3)
|
|
cite_title = utils.titlecase(re.sub(r"\s+", " ", link_match.group(3)))
|
|
# Record the citation
|
|
citations["c"+str(format_id)] = (cite_text, cite_title)
|
|
# Stitch the format id in place of the citation
|
|
para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):]
|
|
format_id += 1 # Increment to the next format citation
|
|
link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para)
|
|
# Convert signature to right-aligned
|
|
if para[:1] == '~':
|
|
para = "<hr><span class=\"signature\"><p>" + para[1:] + "</p></span>\n"
|
|
else:
|
|
para = "<p>" + para + "</p>\n"
|
|
content += para
|
|
return LexiconArticle(player, turn, title, content, citations)
|
|
|
|
@staticmethod
|
|
def parse_from_directory(directory):
|
|
"""
|
|
Reads and parses each source file in the given directory.
|
|
Input: directory, the path to the folder to read
|
|
Output: a list of parsed articles
|
|
"""
|
|
articles = []
|
|
print("Reading source files from", directory)
|
|
for filename in os.listdir(directory):
|
|
path = os.path.join(directory, filename)
|
|
# Read only .txt files
|
|
if filename[-4:] == ".txt":
|
|
print(" Parsing", filename)
|
|
with open(path, "r", encoding="utf8") as src_file:
|
|
raw = src_file.read()
|
|
article = LexiconArticle.from_file_raw(raw)
|
|
if article is None:
|
|
print(" ERROR")
|
|
else:
|
|
print(" success:", article.title)
|
|
articles.append(article)
|
|
return articles
|
|
|
|
@staticmethod
|
|
def populate(lexicon_articles):
|
|
"""
|
|
Given a list of lexicon articles, fills out citation information
|
|
for each article and creates phantom pages for missing articles.
|
|
"""
|
|
article_by_title = {article.title : article for article in lexicon_articles}
|
|
# Determine all articles that exist or should exist
|
|
extant_titles = set([citation[1] for article in lexicon_articles for citation in article.citations])
|
|
# Interlink all citations
|
|
for article in lexicon_articles:
|
|
for cite_tuple in article.citations.values():
|
|
target = cite_tuple[1]
|
|
# Create article objects for phantom citations
|
|
if target not in article_by_title:
|
|
article_by_title[target] = LexiconArticle(None, sys.maxsize, target, "<p><i>This entry hasn't been written yet.</i></p>", {})
|
|
# Interlink citations
|
|
if article_by_title[target].player is None:
|
|
article.pcites.add(target)
|
|
else:
|
|
article.wcites.add(target)
|
|
article_by_title[target].citedby.add(article.title)
|
|
return list(article_by_title.values())
|
|
|
|
def build_default_content(self):
|
|
"""
|
|
Formats citations into the article content as normal HTML links
|
|
and returns the result.
|
|
"""
|
|
format_map = {
|
|
format_id: "<a href=\"{1}.html\"{2}>{0}</a>".format(
|
|
cite_tuple[0], utils.titleescape(cite_tuple[1]),
|
|
"" if cite_tuple[1] in self.wcites else " class=\"phantom\"")
|
|
for format_id, cite_tuple in self.citations.items()
|
|
}
|
|
return self.content.format(**format_map)
|
|
|
|
def build_default_citeblock(self, prev_article, next_article):
|
|
"""
|
|
Builds the citeblock content HTML for use in regular article pages.
|
|
For each defined target, links the target page as Previous or Next.
|
|
"""
|
|
citeblock = "<div class=\"content citeblock\">\n"
|
|
# Prev/next links
|
|
if next_article is not None:
|
|
citeblock += "<p style=\"float:right\"><a href=\"{}.html\"{}>Next →</a></p>\n".format(
|
|
next_article.title_filesafe, " class=\"phantom\"" if next_article.player is None else "")
|
|
if prev_article is not None:
|
|
citeblock += "<p><a href=\"{}.html\"{}>← Previous</a></p>\n".format(
|
|
prev_article.title_filesafe, " class=\"phantom\"" if prev_article.player is None else "")
|
|
if next_article is None and prev_article is None:
|
|
citeblock += "<p> </p>\n"
|
|
# Citations
|
|
cites_links = [
|
|
"<a href=\"{1}.html\"{2}>{0}</a>".format(
|
|
title, utils.titleescape(title),
|
|
"" if title in self.wcites else " class=\"phantom\"")
|
|
for title in sorted(
|
|
self.wcites | self.pcites,
|
|
key=lambda t: utils.titlesort(t))]
|
|
cites_str = " | ".join(cites_links)
|
|
if len(cites_str) < 1: cites_str = "—"
|
|
citeblock += "<p>Citations: {}</p>\n".format(cites_str)
|
|
# Citedby
|
|
citedby_links = [
|
|
"<a href=\"{1}.html\">{0}</a>".format(
|
|
title, utils.titleescape(title))
|
|
for title in sorted(
|
|
self.citedby,
|
|
key=lambda t: utils.titlesort(t))]
|
|
citedby_str = " | ".join(citedby_links)
|
|
if len(citedby_str) < 1: citedby_str = "—"
|
|
citeblock += "<p>Cited by: {}</p>\n</div>\n".format(citedby_str)
|
|
return citeblock
|