#!/usr/bin/env python import argparse from datetime import datetime, timezone import copy import os import pathlib import shutil import subprocess import bs4 from feedgen.feed import FeedGenerator import markdown def get_pubdate(src_path): pubdate = subprocess.run( ["git", "log", "--follow", "--diff-filter=A", "--format=%aI", "--", src_path], capture_output=True, text=True, check=True, ) return datetime.fromisoformat(pubdate.stdout.strip()) def main(): parser = argparse.ArgumentParser() parser.add_argument("out", help="output directory") args = parser.parse_args() src = pathlib.Path("src") out = pathlib.Path(args.out) md = markdown.Markdown(extensions=[ # Set HTML attributes with {#id} "attr_list", # Footnotes [^1] "footnotes", # Parse markdown within HTML[markdown] blocks "md_in_html", # "YAML" frontmatter metadata "meta", ]) comment_md = markdown.Markdown() # Map of feed url -> FeedGenerator object feeds = {} build_date = datetime.now(timezone.utc) # Load the template template = bs4.BeautifulSoup( (src / ".template.html").read_text(encoding="utf8"), features="html.parser") count = 0 for dirpath, _, filenames in os.walk(src): dirpath = pathlib.Path(dirpath).relative_to(src) for filename in filenames: if filename[0] == ".": continue # Skip dotfiles count += 1 path = src / dirpath / filename dest = out / dirpath / filename os.makedirs(dest.parent, exist_ok=True) # Copy any file types without content processing if filename.rsplit(".")[-1] not in ("md",): print("Copying ", path) shutil.copyfile(path, dest) continue # Preprocess markdown into html if dest.name.endswith(".md"): print("Converting", path) md.reset() dest = dest.with_suffix(".html") content = md.convert(path.read_text(encoding="utf8")) meta = md.Meta or {} # Inject content into the template page_content = bs4.BeautifulSoup(content, features="html.parser") page = copy.copy(template) article = page.new_tag("article") article.append(page_content) page.article.replace_with(article) # Rewrite links with markdown extensions for a in page.css.select("a[href]"): if a["href"].endswith(".md"): a["href"] = a["href"][:-3] + ".html" # Inject path into the nav for i in range(len(dirpath.parts)): a = page.new_tag("a") a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/" a.string = dirpath.parts[i] page.nav.append(a) page.nav.append(page.new_string("/")) # Apply metadata to the template if meta_title := meta.get("title"): page.title.string = meta_title[0] page.header.h1.string = meta_title[0] # RSS metadata if "feed" in meta: pubdate = get_pubdate(path.as_posix()) link = f"https://www.alogoulogoi.com/{dest.relative_to(out).as_posix()}" for feed in meta["feed"]: if feed not in feeds: feeds[feed] = [] feeds[feed].append({ "title": meta_title[0], "link": link, "description": "", "pubdate": pubdate, }) # Write the fully templated page print("Writing ", dest) dest.write_text(str(page)) for feed, items in feeds.items(): fg = FeedGenerator() fg.title(f"alogoulogoi /{feed}/") fg.link(href=f"https://www.alogoulogoi.com/{feed}/feed.xml") fg.description("Posts from alogoulogoi") fg.language("en-us") fg.lastBuildDate(build_date) for item in sorted(items, key=lambda i: i["pubdate"]): entry = fg.add_entry() entry.title(item["title"]) entry.link(href=item["link"]) entry.description(item["description"]) entry.published(item["pubdate"]) rss_path = (out / feed / "feed.xml") os.makedirs(rss_path.parent, exist_ok=True) rss_path.write_bytes(fg.rss_str(pretty=True)) print("Processed", count, "files") if __name__ == "__main__": main()