LF para = re.sub(r"\\\\\n", "
\n", para) # Abstract citations into the citation record link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para) while link_match: # Identify the citation text and cited article cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3) cite_title = titlecase(link_match.group(3).strip()) # Record the citation parsed["citations"]["c"+str(format_id)] = (cite_text, cite_title) # Stitch the format id in place of the citation para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):] format_id += 1 # Increment to the next format citation link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para) # Convert signature to right-aligned if para[:1] == '~': para = "
" + para[1:] + "
\n" else: para = "" + para + "
\n" parsed["content"] += para if not parsed["content"]: return {"error": "No content parsed"} return parsed def parse_lex(lex_contents): """ Parses the contents of a lex file into HTML and abstracts citations. Input: lex_contents, the read contents of a lex file Output: A dictionary in the following format: {"title":Citations: {cites}
\n"\ "Cited by: {citedby}
\n"\ "This entry hasn't been written yet.
" # Build the stub citeblock cites, citedby = citation_lists(title, cite_map) citedby_str = " | ".join([lf(None, title, title) for title in citedby]) citeblock = ""\ "Cited by: {citedby}
\n"\ "[The handwriting is completely illegible.]
\n"\ "Ersatz Scrivener
\n" # Build the stub citeblock citedby = [citer_title for citer_title, cited_titles in cite_map.items() if title in cited_titles] citedby_str = " | ".join([lf(None, title, title) for title in citedby]) citeblock = ""\ "Citations: [Illegible]
\n"\ "Cited by: {citedby}
\n"\ "There are {0} entries in this lexicon.
\n- \n".format(len(titles))
else:
content = "
- " content += lf(None, title, title) content += " \n" if len(titles) > 0: content += "
- " content += lf(None, title, title) content += " \n" content += "
There are {0} entries, {1} written and {2} phantom.
\n- \n".format(
len(titles), len(cite_map.keys()), len(titles) - len(cite_map.keys()))
# Write all of the entries out as links under their indices
lf = link_formatter(cite_map.keys())
indices = config["INDEX_LIST"].split("\n")
for index_str in indices:
content += "
{0}
".format(index_str) index_titles = [] for c in index_str.upper(): for title in titles: if (titlestrip(title)[0] == c): index_titles.append(title) for title in index_titles: titles.remove(title) content += "&c.
".format(index_str) for title in titles: content += "Top 10 by page rank:
\n"
G = networkx.Graph()
for citer, citeds in cite_map.items():
for cited in citeds:
G.add_edge(citer, cited)
ranks = networkx.pagerank(G)
sranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True)
ranking = list(enumerate(map(lambda x: x[0], sranks)))
content += "
\n".join(map(lambda x: "{0} - {1}".format(x[0]+1, x[1]), ranking[:10]))
content += "
Most citations made from:
\n"
citation_tally = [(kv[0], len(kv[1])) for kv in cite_map.items()]
citation_count = defaultdict(list)
for title, count in citation_tally: citation_count[count].append(title)
content += "
\n".join(map(
lambda kv: "{0} - {1}".format(kv[0], "; ".join(kv[1])),
sorted(citation_count.items(), reverse=True)[:3]))
content += "
Most citations made to:
\n"
all_cited = set([title for cites in cite_map.values() for title in cites])
cited_by_map = { cited: [citer for citer in cite_map.keys() if cited in cite_map[citer]] for cited in all_cited }
cited_tally = [(kv[0], len(kv[1])) for kv in cited_by_map.items()]
cited_count = defaultdict(list)
for title, count in cited_tally: cited_count[count].append(title)
content += "
\n".join(map(
lambda kv: "{0} - {1}".format(kv[0], "; ".join(kv[1])),
sorted(cited_count.items(), reverse=True)[:3]))
#cited_count = map(lambda kv: (kv[0], len(kv[1])), cited_by_map.items())
#cited_count_sort = sorted(cited_count, key=lambda x: x[1], reverse=True)
#top_cited_count = [kv for kv in cited_count_sort if kv[1] >= cited_count_sort[:5][-1][1]]
#content += "
\n".join(map(lambda x: "{0} - {1}".format(x[1], x[0]), top_cited_count))
content += "