1
0
Fork 0
www/build.py

146 lines
4.8 KiB
Python
Raw Normal View History

2023-09-11 22:38:24 +00:00
#!/usr/bin/env python
import argparse
2023-09-21 16:16:43 +00:00
from datetime import datetime, timezone
2023-09-11 22:38:24 +00:00
import copy
import os
import pathlib
import shutil
import subprocess
2023-09-11 22:38:24 +00:00
import bs4
2023-09-21 16:16:43 +00:00
from feedgen.feed import FeedGenerator
2023-09-11 22:38:24 +00:00
import markdown
def get_pubdate(src_path):
pubdate = subprocess.run(
2024-08-30 16:38:18 +00:00
["git", "log", "--follow", "--diff-filter=A", "--format=%aI", "--", src_path],
capture_output=True,
text=True,
check=True,
)
return datetime.fromisoformat(pubdate.stdout.strip())
2023-09-11 22:38:24 +00:00
def main():
parser = argparse.ArgumentParser()
2023-09-20 04:29:41 +00:00
parser.add_argument("out", help="output directory")
2023-09-11 22:38:24 +00:00
args = parser.parse_args()
2023-09-20 04:29:41 +00:00
src = pathlib.Path("src")
2023-09-12 00:15:40 +00:00
out = pathlib.Path(args.out)
2023-09-11 22:38:24 +00:00
2023-12-18 17:50:25 +00:00
md = markdown.Markdown(extensions=[
# Set HTML attributes with {#id}
"attr_list",
# Footnotes [^1]
"footnotes",
# Parse markdown within HTML[markdown] blocks
"md_in_html",
# "YAML" frontmatter metadata
"meta",
])
2023-09-13 00:31:04 +00:00
comment_md = markdown.Markdown()
2023-09-11 22:38:24 +00:00
2023-09-21 16:16:43 +00:00
# Map of feed url -> FeedGenerator object
feeds = {}
build_date = datetime.now(timezone.utc)
2023-09-11 22:38:24 +00:00
# Load the template
template = bs4.BeautifulSoup(
(src / ".template.html").read_text(encoding="utf8"),
features="html.parser")
2023-09-11 23:17:14 +00:00
count = 0
2023-09-12 00:15:40 +00:00
for dirpath, _, filenames in os.walk(src):
dirpath = pathlib.Path(dirpath).relative_to(src)
2023-09-11 22:38:24 +00:00
for filename in filenames:
if filename[0] == ".":
continue # Skip dotfiles
2023-09-11 23:17:14 +00:00
count += 1
2023-09-11 22:38:24 +00:00
path = src / dirpath / filename
dest = out / dirpath / filename
os.makedirs(dest.parent, exist_ok=True)
# Copy any file types without content processing
2023-09-12 17:18:53 +00:00
if filename.rsplit(".")[-1] not in ("md",):
print("Copying ", path)
shutil.copyfile(path, dest)
continue
2023-09-11 22:38:24 +00:00
# Preprocess markdown into html
if dest.name.endswith(".md"):
print("Converting", path)
2023-09-11 23:17:14 +00:00
md.reset()
2023-09-11 22:38:24 +00:00
dest = dest.with_suffix(".html")
2023-09-12 17:18:53 +00:00
content = md.convert(path.read_text(encoding="utf8"))
meta = md.Meta or {}
2023-09-11 22:38:24 +00:00
2023-09-12 00:48:03 +00:00
# Inject content into the template
page_content = bs4.BeautifulSoup(content, features="html.parser")
page = copy.copy(template)
article = page.new_tag("article")
article.append(page_content)
page.article.replace_with(article)
2023-09-12 22:14:31 +00:00
# Rewrite links with markdown extensions
for a in page.css.select("a[href]"):
if a["href"].endswith(".md"):
a["href"] = a["href"][:-3] + ".html"
2023-09-12 00:48:03 +00:00
# Inject path into the nav
for i in range(len(dirpath.parts)):
a = page.new_tag("a")
a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/"
a.string = dirpath.parts[i]
page.nav.append(a)
page.nav.append(page.new_string("/"))
# Apply metadata to the template
if meta_title := meta.get("title"):
2023-09-12 04:55:35 +00:00
page.title.string = meta_title[0]
page.header.h1.string = meta_title[0]
2023-09-21 16:16:43 +00:00
# RSS metadata
if "feed" in meta:
pubdate = get_pubdate(path.as_posix())
2023-09-21 16:16:43 +00:00
link = f"https://www.alogoulogoi.com/{dest.relative_to(out).as_posix()}"
for feed in meta["feed"]:
if feed not in feeds:
feeds[feed] = []
feeds[feed].append({
"title": meta_title[0],
"link": link,
"description": "",
"pubdate": pubdate,
})
2023-09-12 17:18:53 +00:00
# Write the fully templated page
print("Writing ", dest)
dest.write_text(str(page))
2023-09-11 22:38:24 +00:00
2023-09-21 16:16:43 +00:00
for feed, items in feeds.items():
fg = FeedGenerator()
fg.title(f"alogoulogoi /{feed}/")
fg.link(href=f"https://www.alogoulogoi.com/{feed}/feed.xml")
2024-08-30 16:38:18 +00:00
fg.description("Posts from alogoulogoi")
2023-09-21 16:16:43 +00:00
fg.language("en-us")
fg.lastBuildDate(build_date)
for item in sorted(items, key=lambda i: i["pubdate"]):
entry = fg.add_entry()
entry.title(item["title"])
entry.link(href=item["link"])
entry.description(item["description"])
entry.published(item["pubdate"])
rss_path = (out / feed / "feed.xml")
os.makedirs(rss_path.parent, exist_ok=True)
rss_path.write_bytes(fg.rss_str(pretty=True))
2023-09-11 23:17:14 +00:00
print("Processed", count, "files")
2023-09-11 22:38:24 +00:00
if __name__ == "__main__":
main()