2023-09-11 22:38:24 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
import argparse
|
2023-09-21 16:16:43 +00:00
|
|
|
from datetime import datetime, timezone
|
2023-09-11 22:38:24 +00:00
|
|
|
import copy
|
|
|
|
import os
|
|
|
|
import pathlib
|
|
|
|
import shutil
|
|
|
|
|
|
|
|
import bs4
|
2023-09-21 16:16:43 +00:00
|
|
|
from feedgen.feed import FeedGenerator
|
2023-09-11 22:38:24 +00:00
|
|
|
import markdown
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser()
|
2023-09-20 04:29:41 +00:00
|
|
|
parser.add_argument("out", help="output directory")
|
2023-09-11 22:38:24 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2023-09-20 04:29:41 +00:00
|
|
|
src = pathlib.Path("src")
|
2023-09-12 00:15:40 +00:00
|
|
|
out = pathlib.Path(args.out)
|
2023-09-11 22:38:24 +00:00
|
|
|
|
2023-09-12 02:02:00 +00:00
|
|
|
md = markdown.Markdown(extensions=["attr_list", "footnotes", "md_in_html", "meta"])
|
2023-09-13 00:31:04 +00:00
|
|
|
comment_md = markdown.Markdown()
|
2023-09-11 22:38:24 +00:00
|
|
|
|
2023-09-21 16:16:43 +00:00
|
|
|
# Map of feed url -> FeedGenerator object
|
|
|
|
feeds = {}
|
|
|
|
build_date = datetime.now(timezone.utc)
|
|
|
|
|
2023-09-11 22:38:24 +00:00
|
|
|
# Load the template
|
|
|
|
template = bs4.BeautifulSoup(
|
|
|
|
(src / ".template.html").read_text(encoding="utf8"),
|
|
|
|
features="html.parser")
|
|
|
|
|
2023-09-11 23:17:14 +00:00
|
|
|
count = 0
|
2023-09-12 00:15:40 +00:00
|
|
|
for dirpath, _, filenames in os.walk(src):
|
|
|
|
dirpath = pathlib.Path(dirpath).relative_to(src)
|
2023-09-11 22:38:24 +00:00
|
|
|
for filename in filenames:
|
|
|
|
if filename[0] == ".":
|
|
|
|
continue # Skip dotfiles
|
2023-09-11 23:17:14 +00:00
|
|
|
count += 1
|
2023-09-11 22:38:24 +00:00
|
|
|
|
|
|
|
path = src / dirpath / filename
|
|
|
|
dest = out / dirpath / filename
|
|
|
|
os.makedirs(dest.parent, exist_ok=True)
|
|
|
|
|
2023-09-12 02:02:00 +00:00
|
|
|
# Copy any file types without content processing
|
2023-09-12 17:18:53 +00:00
|
|
|
if filename.rsplit(".")[-1] not in ("md",):
|
2023-09-12 02:02:00 +00:00
|
|
|
print("Copying ", path)
|
|
|
|
shutil.copyfile(path, dest)
|
|
|
|
continue
|
|
|
|
|
2023-09-11 22:38:24 +00:00
|
|
|
# Preprocess markdown into html
|
|
|
|
if dest.name.endswith(".md"):
|
|
|
|
print("Converting", path)
|
2023-09-11 23:17:14 +00:00
|
|
|
md.reset()
|
2023-09-11 22:38:24 +00:00
|
|
|
dest = dest.with_suffix(".html")
|
2023-09-12 17:18:53 +00:00
|
|
|
content = md.convert(path.read_text(encoding="utf8"))
|
|
|
|
meta = md.Meta or {}
|
2023-09-11 22:38:24 +00:00
|
|
|
|
2023-09-12 00:48:03 +00:00
|
|
|
# Inject content into the template
|
|
|
|
page_content = bs4.BeautifulSoup(content, features="html.parser")
|
|
|
|
page = copy.copy(template)
|
|
|
|
article = page.new_tag("article")
|
|
|
|
article.append(page_content)
|
|
|
|
page.article.replace_with(article)
|
|
|
|
|
2023-09-12 22:14:31 +00:00
|
|
|
# Rewrite links with markdown extensions
|
|
|
|
for a in page.css.select("a[href]"):
|
|
|
|
if a["href"].endswith(".md"):
|
|
|
|
a["href"] = a["href"][:-3] + ".html"
|
|
|
|
|
2023-09-12 00:48:03 +00:00
|
|
|
# Inject path into the nav
|
|
|
|
for i in range(len(dirpath.parts)):
|
|
|
|
a = page.new_tag("a")
|
|
|
|
a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/"
|
|
|
|
a.string = dirpath.parts[i]
|
|
|
|
page.nav.append(a)
|
|
|
|
page.nav.append(page.new_string("/"))
|
|
|
|
|
|
|
|
# Apply metadata to the template
|
|
|
|
if meta_title := meta.get("title"):
|
2023-09-12 04:55:35 +00:00
|
|
|
page.title.string = meta_title[0]
|
|
|
|
page.header.h1.string = meta_title[0]
|
|
|
|
|
|
|
|
if meta_date := meta.get("date"):
|
|
|
|
p = page.new_tag("p")
|
|
|
|
p["class"] = "metadata"
|
|
|
|
p.string = "Date: " + meta_date[0]
|
|
|
|
page.header.append(p)
|
|
|
|
|
|
|
|
if meta_author := meta.get("author"):
|
|
|
|
p = page.new_tag("p")
|
|
|
|
p["class"] = "metadata"
|
|
|
|
p.string = "Author: " + meta_author[0]
|
|
|
|
page.header.append(p)
|
|
|
|
|
|
|
|
if meta_source := meta.get("source"):
|
|
|
|
for source_url in meta_source:
|
|
|
|
a = page.new_tag("a")
|
|
|
|
a["href"] = source_url
|
|
|
|
a.string = source_url
|
|
|
|
p = page.new_tag("p")
|
|
|
|
p["class"] = "metadata"
|
|
|
|
p.string = "URL: "
|
|
|
|
p.append(a)
|
|
|
|
page.header.append(p)
|
2023-09-12 00:48:03 +00:00
|
|
|
|
2023-09-12 19:08:04 +00:00
|
|
|
if meta_comment := meta.get("comment"):
|
|
|
|
for comment in meta_comment:
|
|
|
|
aside = page.new_tag("aside")
|
2023-09-13 00:31:04 +00:00
|
|
|
html = bs4.BeautifulSoup(comment_md.convert(comment), features="html.parser")
|
|
|
|
aside.extend(html.p.contents)
|
2023-09-12 19:08:04 +00:00
|
|
|
page.header.append(aside)
|
|
|
|
|
2023-09-21 16:16:43 +00:00
|
|
|
# RSS metadata
|
|
|
|
if "feed" in meta and "pubdate" in meta:
|
|
|
|
pubdate = datetime.fromisoformat(meta["pubdate"][0])
|
|
|
|
link = f"https://www.alogoulogoi.com/{dest.relative_to(out).as_posix()}"
|
|
|
|
for feed in meta["feed"]:
|
|
|
|
if feed not in feeds:
|
|
|
|
feeds[feed] = []
|
|
|
|
feeds[feed].append({
|
|
|
|
"title": meta_title[0],
|
|
|
|
"link": link,
|
|
|
|
"description": "",
|
|
|
|
"pubdate": pubdate,
|
|
|
|
})
|
|
|
|
|
2023-09-12 17:18:53 +00:00
|
|
|
# Write the fully templated page
|
|
|
|
print("Writing ", dest)
|
|
|
|
dest.write_text(str(page))
|
2023-09-11 22:38:24 +00:00
|
|
|
|
2023-09-21 16:16:43 +00:00
|
|
|
for feed, items in feeds.items():
|
|
|
|
fg = FeedGenerator()
|
|
|
|
fg.title(f"alogoulogoi /{feed}/")
|
|
|
|
fg.link(href=f"https://www.alogoulogoi.com/{feed}/feed.xml")
|
|
|
|
fg.description("Blog posts from alogoulogoi")
|
|
|
|
fg.language("en-us")
|
|
|
|
fg.lastBuildDate(build_date)
|
|
|
|
for item in sorted(items, key=lambda i: i["pubdate"]):
|
|
|
|
entry = fg.add_entry()
|
|
|
|
entry.title(item["title"])
|
|
|
|
entry.link(href=item["link"])
|
|
|
|
entry.description(item["description"])
|
|
|
|
entry.published(item["pubdate"])
|
|
|
|
rss_path = (out / feed / "feed.xml")
|
|
|
|
os.makedirs(rss_path.parent, exist_ok=True)
|
|
|
|
rss_path.write_bytes(fg.rss_str(pretty=True))
|
|
|
|
|
2023-09-11 23:17:14 +00:00
|
|
|
print("Processed", count, "files")
|
|
|
|
|
2023-09-11 22:38:24 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|