LF + para = re.sub(r"\\\\\n", "
\n", para) + # Abstract citations into the citation record + link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para) + while link_match: + # Identify the citation text and cited article + cite_text = link_match.group(2) if link_match.group(2) else link_match.group(3) + cite_title = titlecase(link_match.group(3).strip()) + # Record the citation + parsed["citations"]["c"+str(format_id)] = (cite_text, cite_title) + # Stitch the format id in place of the citation + para = para[:link_match.start(0)] + "{c"+str(format_id)+"}" + para[link_match.end(0):] + format_id += 1 # Increment to the next format citation + link_match = re.search(r"\[\[(([^|\[\]]+)\|)?([^|\[\]]+)\]\]", para) + # Convert signature to right-aligned + if para[:1] == '~': + para = "
" + para[1:] + "
\n" + else: + para = "" + para + "
\n" + parsed["content"] += para + if not parsed["content"]: + return {"error": "No content parsed"} + return parsed + +def parse_lex(lex_contents): + """ + Parses the contents of a lex file into HTML and abstracts citations. + Input: lex_contents, the read contents of a lex file + Output: A dictionary in the following format: + {"title":Citations: {cites}
\n"\ + "Cited by: {citedby}
\n"\ + "This entry hasn't been written yet.
" + # Build the stub citeblock + cites, citedby = citation_lists(title, cite_map) + citedby_str = " | ".join([lf(None, title, title) for title in citedby]) + citeblock = ""\ + "Cited by: {citedby}
\n"\ + "[The handwriting is completely illegible.]
\n"\ + "Ersatz Scrivener
\n" + # Build the stub citeblock + cites, citedby = citation_lists(title, cite_map) + citedby_str = " | ".join([lf(None, title, title) for title in citedby]) + citeblock = ""\ + "Citations: [Illegible]
\n"\ + "Cited by: {citedby}
\n"\ + "There are {0} entries in this lexicon.
\n- \n".format(len(titles))
+ else:
+ content = "
- " + content += lf(None, title, title) + content += " \n" + if len(titles) > 0: + content += "
- " + content += lf(None, title, title) + content += " \n" + content += "
There are {0} entries, {1} written and {2} phantom.
\n- \n".format(
+ len(titles), len(cite_map.keys()), len(titles) - len(cite_map.keys()))
+ # Write all of the entries out as links under their indices
+ lf = link_formatter(cite_map.keys())
+ indices = config["INDEX_LIST"].split("\n")
+ for index_str in indices:
+ content += "
{0}
".format(index_str) + index_titles = [] + for c in index_str.upper(): + for title in titles: + if (titlestrip(title)[0] == c): + index_titles.append(title) + for title in index_titles: + titles.remove(title) + content += "&c.
".format(index_str) + for title in titles: + content += "Top 10 by page rank:
\n"
+ G = networkx.Graph()
+ for citer, citeds in cite_map.items():
+ for cited in citeds:
+ G.add_edge(citer, cited)
+ ranks = networkx.pagerank(G)
+ sranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True)
+ ranking = list(enumerate(map(lambda x: x[0], sranks)))
+ content += "
\n".join(map(lambda x: "{0} - {1}".format(x[0]+1, x[1]), ranking[:10]))
+ content += "
Most citations made from:
\n"
+ citation_tally = [(kv[0], len(kv[1])) for kv in cite_map.items()]
+ citation_count = defaultdict(list)
+ for title, count in citation_tally: citation_count[count].append(title)
+ content += "
\n".join(map(
+ lambda kv: "{0} - {1}".format(kv[0], "; ".join(kv[1])),
+ sorted(citation_count.items(), reverse=True)[:3]))
+ content += "
Most citations made to:
\n"
+ all_cited = set([title for cites in cite_map.values() for title in cites])
+ cited_by_map = { cited: [citer for citer in cite_map.keys() if cited in cite_map[citer]] for cited in all_cited }
+ cited_tally = [(kv[0], len(kv[1])) for kv in cited_by_map.items()]
+ cited_count = defaultdict(list)
+ for title, count in cited_tally: cited_count[count].append(title)
+ content += "
\n".join(map(
+ lambda kv: "{0} - {1}".format(kv[0], "; ".join(kv[1])),
+ sorted(cited_count.items(), reverse=True)[:3]))
+ #cited_count = map(lambda kv: (kv[0], len(kv[1])), cited_by_map.items())
+ #cited_count_sort = sorted(cited_count, key=lambda x: x[1], reverse=True)
+ #top_cited_count = [kv for kv in cited_count_sort if kv[1] >= cited_count_sort[:5][-1][1]]
+ #content += "
\n".join(map(lambda x: "{0} - {1}".format(x[1], x[0]), top_cited_count))
+ content += "