import os import sys import re import src.utils as utils class LexiconArticle: """ A Lexicon article and its metadata. Members: player string: the player of the article turn integer: the turn the article was written for title string: the article title title_filesafe string: the title, escaped, used for filenames content string: the HTML content, with citations replaced by format hooks citations dict mapping format hook string to tuple of link alias and link target title wcites list: titles of written articles cited pcites list: titles of phantom articles cited citedby list: titles of articles that cite this The last three are filled in by populate(). """ def __init__(self, player, turn, title, content, citations): """ Creates a LexiconArticle object with the given parameters. """ self.player = player self.turn = turn self.title = title self.title_filesafe = utils.titleescape(title) self.content = content self.citations = citations self.wcites = set() self.pcites = set() self.citedby = set() @staticmethod def from_file_raw(raw_content): """ Parses the contents of a Lexipython source file into a LexiconArticle object. If the source file is malformed, returns None. """ headers = raw_content.split('\n', 3) if len(headers) != 4: print("Header read error") return None player_header, turn_header, title_header, content_raw = headers # Validate and sanitize the player header if not player_header.startswith("# Player:"): print("Player header missing or corrupted") return None player = player_header[9:].strip() # Validate and sanitize the turn header if not turn_header.startswith("# Turn:"): print("Turn header missing or corrupted") return None turn = None try: turn = int(turn_header[7:].strip()) except: print("Turn header error") return None # Validate and sanitize the title header if not title_header.startswith("# Title:"): print("Title header missing or corrupted") return None title = utils.titlecase(title_header[8:]) # Parse the content and extract citations paras = re.split("\n\n+", content_raw.strip()) content = "" citations = {} format_id = 1 if not paras: print("No content") for para in paras: # Escape angle brackets para = re.sub("<", "<", para) para = re.sub(">", ">", para) # Escape curly braces para = re.sub("{", "{", para) para = re.sub("}", "}", para) # Replace bold and italic marks with tags para = re.sub(r"//([^/]+)//", r"\1", para) para = re.sub(r"\*\*([^*]+)\*\*", r"\1", para) # Replace \\LF with
LF para = re.sub(r"\\\\\n", "
\n", para) # Abstract citations into the citation record link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para) while link_match: # Identify the citation text and cited article cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3) cite_title = utils.titlecase(re.sub(r"\s+", " ", link_match.group(3))) # Record the citation citations["c"+str(format_id)] = (cite_text, cite_title) # Stitch the format id in place of the citation para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):] format_id += 1 # Increment to the next format citation link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para) # Convert signature to right-aligned if para[:1] == '~': para = "

" + para[1:] + "

\n" else: para = "

" + para + "

\n" content += para return LexiconArticle(player, turn, title, content, citations) @staticmethod def parse_from_directory(directory): """ Reads and parses each source file in the given directory. Input: directory, the path to the folder to read Output: a list of parsed articles """ articles = [] print("Reading source files from", directory) for filename in os.listdir(directory): path = os.path.join(directory, filename) # Read only .txt files if filename[-4:] == ".txt": print(" Parsing", filename) with open(path, "r", encoding="utf8") as src_file: raw = src_file.read() article = LexiconArticle.from_file_raw(raw) if article is None: print(" ERROR") else: print(" success:", article.title) articles.append(article) return articles @staticmethod def populate(lexicon_articles): """ Given a list of lexicon articles, fills out citation information for each article and creates phantom pages for missing articles. """ article_by_title = {article.title : article for article in lexicon_articles} # Determine all articles that exist or should exist extant_titles = set([citation[1] for article in lexicon_articles for citation in article.citations]) # Interlink all citations for article in lexicon_articles: for cite_tuple in article.citations.values(): target = cite_tuple[1] # Create article objects for phantom citations if target not in article_by_title: article_by_title[target] = LexiconArticle(None, sys.maxsize, target, "

This entry hasn't been written yet.

", {}) # Interlink citations if article_by_title[target].player is None: article.pcites.add(target) else: article.wcites.add(target) article_by_title[target].citedby.add(article.title) return list(article_by_title.values()) def build_default_content(self): """ Formats citations into the article content as normal HTML links and returns the result. """ format_map = { format_id: "{0}".format( cite_tuple[0], utils.titleescape(cite_tuple[1]), "" if cite_tuple[1] in self.wcites else " class=\"phantom\"") for format_id, cite_tuple in self.citations.items() } return self.content.format(**format_map) def build_default_citeblock(self, prev_article, next_article): """ Builds the citeblock content HTML for use in regular article pages. For each defined target, links the target page as Previous or Next. """ citeblock = "
\n" # Prev/next links if next_article is not None: citeblock += "

Next →

\n".format( next_article.title_filesafe, " class=\"phantom\"" if next_article.player is None else "") if prev_article is not None: citeblock += "

← Previous

\n".format( prev_article.title_filesafe, " class=\"phantom\"" if prev_article.player is None else "") if next_article is None and prev_article is None: citeblock += "

 

\n" # Citations cites_links = [ "{0}".format( title, utils.titleescape(title), "" if title in self.wcites else " class=\"phantom\"") for title in sorted( self.wcites | self.pcites, key=lambda t: utils.titlesort(t))] cites_str = " | ".join(cites_links) if len(cites_str) < 1: cites_str = "—" citeblock += "

Citations: {}

\n".format(cites_str) # Citedby citedby_links = [ "{0}".format( title, utils.titleescape(title)) for title in sorted( self.citedby, key=lambda t: utils.titlesort(t))] citedby_str = " | ".join(citedby_links) if len(citedby_str) < 1: citedby_str = "—" citeblock += "

Cited by: {}

\n
\n".format(citedby_str) return citeblock