www/build.py

#!/usr/bin/env python

import argparse
from datetime import datetime, timezone
import copy
import os
import pathlib
import shutil
import subprocess

import bs4
from feedgen.feed import FeedGenerator
import markdown


def get_pubdate(src_path):
    pubdate = subprocess.run(
        ["git", "log", "--follow", "--diff-filter=A", "--format=%aI", "--", src_path],
        capture_output=True,
        text=True,
        check=True,
    )
    return datetime.fromisoformat(pubdate.stdout.strip())


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("out", help="output directory")
    args = parser.parse_args()

    src = pathlib.Path("src")
    out = pathlib.Path(args.out)

    md = markdown.Markdown(extensions=[
        # Set HTML attributes with {#id}
        "attr_list",
        # Footnotes [^1]
        "footnotes",
        # Parse markdown within HTML[markdown] blocks
        "md_in_html",
        # "YAML" frontmatter metadata
        "meta",
    ])
    comment_md = markdown.Markdown()

    # Map of feed url -> FeedGenerator object
    feeds = {}
    build_date = datetime.now(timezone.utc)

    # Load the template
    template = bs4.BeautifulSoup(
        (src / ".template.html").read_text(encoding="utf8"),
        features="html.parser")

    count = 0
    for dirpath, _, filenames in os.walk(src):
        dirpath = pathlib.Path(dirpath).relative_to(src)
        for filename in filenames:
            if filename[0] == ".":
                continue  # Skip dotfiles
            count += 1

            path = src / dirpath / filename
            dest = out / dirpath / filename
            os.makedirs(dest.parent, exist_ok=True)

            # Copy any file types without content processing
            if filename.rsplit(".")[-1] not in ("md",):
                print("Copying   ", path)
                shutil.copyfile(path, dest)
                continue

            # Preprocess markdown into html
            if dest.name.endswith(".md"):
                print("Converting", path)
                md.reset()
                dest = dest.with_suffix(".html")
                content = md.convert(path.read_text(encoding="utf8"))
                meta = md.Meta or {}

                # Inject content into the template
                page_content = bs4.BeautifulSoup(content, features="html.parser")
                page = copy.copy(template)
                article = page.new_tag("article")
                article.append(page_content)
                page.article.replace_with(article)

                # Rewrite links with markdown extensions
                for a in page.css.select("a[href]"):
                    if a["href"].endswith(".md"):
                        a["href"] = a["href"][:-3] + ".html"

                # Inject path into the nav
                for i in range(len(dirpath.parts)):
                    a = page.new_tag("a")
                    a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/"
                    a.string = dirpath.parts[i]
                    page.nav.append(a)
                    page.nav.append(page.new_string("/"))

                # Apply metadata to the template
                if meta_title := meta.get("title"):
                    page.title.string = meta_title[0]
                    page.header.h1.string = meta_title[0]

                # RSS metadata
                if "feed" in meta:
                    pubdate = get_pubdate(path.as_posix())
                    link = f"https://www.alogoulogoi.com/{dest.relative_to(out).as_posix()}"
                    for feed in meta["feed"]:
                        if feed not in feeds:
                            feeds[feed] = []
                        feeds[feed].append({
                            "title": meta_title[0],
                            "link": link,
                            "description": "",
                            "pubdate": pubdate,
                        })

                # Write the fully templated page
                print("Writing   ", dest)
                dest.write_text(str(page))

    for feed, items in feeds.items():
        fg = FeedGenerator()
        fg.title(f"alogoulogoi /{feed}/")
        fg.link(href=f"https://www.alogoulogoi.com/{feed}/feed.xml")
        fg.description("Posts from alogoulogoi")
        fg.language("en-us")
        fg.lastBuildDate(build_date)
        for item in sorted(items, key=lambda i: i["pubdate"]):
            entry = fg.add_entry()
            entry.title(item["title"])
            entry.link(href=item["link"])
            entry.description(item["description"])
            entry.published(item["pubdate"])
        rss_path = (out / feed / "feed.xml")
        os.makedirs(rss_path.parent, exist_ok=True)
        rss_path.write_bytes(fg.rss_str(pretty=True))

    print("Processed", count, "files")


if __name__ == "__main__":
    main()
Initial commit 2023-09-11 22:38:24 +00:00			`#!/usr/bin/env python`

			`import argparse`
Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`from datetime import datetime, timezone`
Initial commit 2023-09-11 22:38:24 +00:00			`import copy`
			`import os`
			`import pathlib`
			`import shutil`
Derive blog post pubdate from git instead of metadata 2024-06-25 20:59:10 +00:00			`import subprocess`
Initial commit 2023-09-11 22:38:24 +00:00
			`import bs4`
Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`from feedgen.feed import FeedGenerator`
Initial commit 2023-09-11 22:38:24 +00:00			`import markdown`


Derive blog post pubdate from git instead of metadata 2024-06-25 20:59:10 +00:00			`def get_pubdate(src_path):`
			`pubdate = subprocess.run(`
Move /blog to /post 2024-08-30 16:38:18 +00:00			`["git", "log", "--follow", "--diff-filter=A", "--format=%aI", "--", src_path],`
Derive blog post pubdate from git instead of metadata 2024-06-25 20:59:10 +00:00			`capture_output=True,`
			`text=True,`
			`check=True,`
			`)`
			`return datetime.fromisoformat(pubdate.stdout.strip())`


Initial commit 2023-09-11 22:38:24 +00:00			`def main():`
			`parser = argparse.ArgumentParser()`
Build to publish in a separate path 2023-09-20 04:29:41 +00:00			`parser.add_argument("out", help="output directory")`
Initial commit 2023-09-11 22:38:24 +00:00			`args = parser.parse_args()`

Build to publish in a separate path 2023-09-20 04:29:41 +00:00			`src = pathlib.Path("src")`
use relative paths in build 2023-09-12 00:15:40 +00:00			`out = pathlib.Path(args.out)`
Initial commit 2023-09-11 22:38:24 +00:00
Describe pymarkdown extensions 2023-12-18 17:50:25 +00:00			`md = markdown.Markdown(extensions=[`
			`# Set HTML attributes with {#id}`
			`"attr_list",`
			`# Footnotes [^1]`
			`"footnotes",`
			`# Parse markdown within HTML[markdown] blocks`
			`"md_in_html",`
			`# "YAML" frontmatter metadata`
			`"meta",`
			`])`
Support basic markdown in meta.comment 2023-09-13 00:31:04 +00:00			`comment_md = markdown.Markdown()`
Initial commit 2023-09-11 22:38:24 +00:00
Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`# Map of feed url -> FeedGenerator object`
			`feeds = {}`
			`build_date = datetime.now(timezone.utc)`

Initial commit 2023-09-11 22:38:24 +00:00			`# Load the template`
			`template = bs4.BeautifulSoup(`
			`(src / ".template.html").read_text(encoding="utf8"),`
			`features="html.parser")`

Add footnote extension support 2023-09-11 23:17:14 +00:00			`count = 0`
use relative paths in build 2023-09-12 00:15:40 +00:00			`for dirpath, _, filenames in os.walk(src):`
			`dirpath = pathlib.Path(dirpath).relative_to(src)`
Initial commit 2023-09-11 22:38:24 +00:00			`for filename in filenames:`
			`if filename[0] == ".":`
			`continue # Skip dotfiles`
Add footnote extension support 2023-09-11 23:17:14 +00:00			`count += 1`
Initial commit 2023-09-11 22:38:24 +00:00
			`path = src / dirpath / filename`
			`dest = out / dirpath / filename`
			`os.makedirs(dest.parent, exist_ok=True)`

Rewrite horse philosophy page to markdown 2023-09-12 02:02:00 +00:00			`# Copy any file types without content processing`
Add LD and SL3 pages 2023-09-12 17:18:53 +00:00			`if filename.rsplit(".")[-1] not in ("md",):`
Rewrite horse philosophy page to markdown 2023-09-12 02:02:00 +00:00			`print("Copying ", path)`
			`shutil.copyfile(path, dest)`
			`continue`

Initial commit 2023-09-11 22:38:24 +00:00			`# Preprocess markdown into html`
			`if dest.name.endswith(".md"):`
			`print("Converting", path)`
Add footnote extension support 2023-09-11 23:17:14 +00:00			`md.reset()`
Initial commit 2023-09-11 22:38:24 +00:00			`dest = dest.with_suffix(".html")`
Add LD and SL3 pages 2023-09-12 17:18:53 +00:00			`content = md.convert(path.read_text(encoding="utf8"))`
			`meta = md.Meta or {}`
Initial commit 2023-09-11 22:38:24 +00:00
Allow txt content and deny robots 2023-09-12 00:48:03 +00:00			`# Inject content into the template`
			`page_content = bs4.BeautifulSoup(content, features="html.parser")`
			`page = copy.copy(template)`
			`article = page.new_tag("article")`
			`article.append(page_content)`
			`page.article.replace_with(article)`

Rewrite md->html links automatically 2023-09-12 22:14:31 +00:00			`# Rewrite links with markdown extensions`
			`for a in page.css.select("a[href]"):`
			`if a["href"].endswith(".md"):`
			`a["href"] = a["href"][:-3] + ".html"`

Allow txt content and deny robots 2023-09-12 00:48:03 +00:00			`# Inject path into the nav`
			`for i in range(len(dirpath.parts)):`
			`a = page.new_tag("a")`
			`a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/"`
			`a.string = dirpath.parts[i]`
			`page.nav.append(a)`
			`page.nav.append(page.new_string("/"))`

			`# Apply metadata to the template`
			`if meta_title := meta.get("title"):`
Surface metadata in header 2023-09-12 04:55:35 +00:00			`page.title.string = meta_title[0]`
			`page.header.h1.string = meta_title[0]`

Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`# RSS metadata`
Derive blog post pubdate from git instead of metadata 2024-06-25 20:59:10 +00:00			`if "feed" in meta:`
			`pubdate = get_pubdate(path.as_posix())`
Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`link = f"https://www.alogoulogoi.com/{dest.relative_to(out).as_posix()}"`
			`for feed in meta["feed"]:`
			`if feed not in feeds:`
			`feeds[feed] = []`
			`feeds[feed].append({`
			`"title": meta_title[0],`
			`"link": link,`
			`"description": "",`
			`"pubdate": pubdate,`
			`})`

Add LD and SL3 pages 2023-09-12 17:18:53 +00:00			`# Write the fully templated page`
			`print("Writing ", dest)`
			`dest.write_text(str(page))`
Initial commit 2023-09-11 22:38:24 +00:00
Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`for feed, items in feeds.items():`
			`fg = FeedGenerator()`
			`fg.title(f"alogoulogoi /{feed}/")`
			`fg.link(href=f"https://www.alogoulogoi.com/{feed}/feed.xml")`
Move /blog to /post 2024-08-30 16:38:18 +00:00			`fg.description("Posts from alogoulogoi")`
Add RSS feed functionality 2023-09-21 16:16:43 +00:00			`fg.language("en-us")`
			`fg.lastBuildDate(build_date)`
			`for item in sorted(items, key=lambda i: i["pubdate"]):`
			`entry = fg.add_entry()`
			`entry.title(item["title"])`
			`entry.link(href=item["link"])`
			`entry.description(item["description"])`
			`entry.published(item["pubdate"])`
			`rss_path = (out / feed / "feed.xml")`
			`os.makedirs(rss_path.parent, exist_ok=True)`
			`rss_path.write_bytes(fg.rss_str(pretty=True))`

Add footnote extension support 2023-09-11 23:17:14 +00:00			`print("Processed", count, "files")`

Initial commit 2023-09-11 22:38:24 +00:00
			`if __name__ == "__main__":`
			`main()`