www/build.py

#!/usr/bin/env python

import argparse
import copy
import os
import pathlib
import shutil

import bs4
import markdown


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("src", help="source directory", nargs="?", default="./src")
    parser.add_argument("out", help="output directory", nargs="?", default="./out")
    args = parser.parse_args()

    src = pathlib.Path(args.src)
    out = pathlib.Path(args.out)

    md = markdown.Markdown(extensions=["attr_list", "footnotes", "md_in_html", "meta"])

    # Clean the output directory
    if out.exists():
        print("Removing  ", out)
        shutil.rmtree(out)

    # Load the template
    template = bs4.BeautifulSoup(
        (src / ".template.html").read_text(encoding="utf8"),
        features="html.parser")

    count = 0
    for dirpath, _, filenames in os.walk(src):
        dirpath = pathlib.Path(dirpath).relative_to(src)
        for filename in filenames:
            if filename[0] == ".":
                continue  # Skip dotfiles
            count += 1

            path = src / dirpath / filename
            dest = out / dirpath / filename
            os.makedirs(dest.parent, exist_ok=True)

            # Copy any file types without content processing
            if filename.rsplit(".")[-1] not in ("html", "md"):
                print("Copying   ", path)
                shutil.copyfile(path, dest)
                continue

            content = path.read_text(encoding="utf8")
            meta = {}

            # Preprocess markdown into html
            if dest.name.endswith(".md"):
                print("Converting", path)
                md.reset()
                dest = dest.with_suffix(".html")
                content = md.convert(content)
                meta = md.Meta

            if dest.name.endswith("html"):
                # Inject content into the template
                page_content = bs4.BeautifulSoup(content, features="html.parser")
                page = copy.copy(template)
                article = page.new_tag("article")
                article.append(page_content)
                page.article.replace_with(article)

                # Inject path into the nav
                for i in range(len(dirpath.parts)):
                    a = page.new_tag("a")
                    a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/"
                    a.string = dirpath.parts[i]
                    page.nav.append(a)
                    page.nav.append(page.new_string("/"))

                # Apply metadata to the template
                if meta_title := meta.get("title"):
                    page.title.string = meta_title[0]
                    page.header.h1.string = meta_title[0]

                if meta_date := meta.get("date"):
                    p = page.new_tag("p")
                    p["class"] = "metadata"
                    p.string = "Date: " + meta_date[0]
                    page.header.append(p)

                if meta_author := meta.get("author"):
                    p = page.new_tag("p")
                    p["class"] = "metadata"
                    p.string = "Author: " + meta_author[0]
                    page.header.append(p)

                if meta_source := meta.get("source"):
                    for source_url in meta_source:
                        a = page.new_tag("a")
                        a["href"] = source_url
                        a.string = source_url
                        p = page.new_tag("p")
                        p["class"] = "metadata"
                        p.string = "URL: "
                        p.append(a)
                        page.header.append(p)

                # The fully templated page is the new content
                content = str(page)

            # Write the destination file
            print("Writing   ", dest)
            dest.write_text(content)

    print("Processed", count, "files")


if __name__ == "__main__":
    main()
Initial commit 2023-09-11 22:38:24 +00:00			`#!/usr/bin/env python`

			`import argparse`
			`import copy`
			`import os`
			`import pathlib`
			`import shutil`

			`import bs4`
			`import markdown`


			`def main():`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("src", help="source directory", nargs="?", default="./src")`
			`parser.add_argument("out", help="output directory", nargs="?", default="./out")`
			`args = parser.parse_args()`

use relative paths in build 2023-09-12 00:15:40 +00:00			`src = pathlib.Path(args.src)`
			`out = pathlib.Path(args.out)`
Initial commit 2023-09-11 22:38:24 +00:00
Rewrite horse philosophy page to markdown 2023-09-12 02:02:00 +00:00			`md = markdown.Markdown(extensions=["attr_list", "footnotes", "md_in_html", "meta"])`
Initial commit 2023-09-11 22:38:24 +00:00
			`# Clean the output directory`
			`if out.exists():`
			`print("Removing ", out)`
			`shutil.rmtree(out)`

			`# Load the template`
			`template = bs4.BeautifulSoup(`
			`(src / ".template.html").read_text(encoding="utf8"),`
			`features="html.parser")`

Add footnote extension support 2023-09-11 23:17:14 +00:00			`count = 0`
use relative paths in build 2023-09-12 00:15:40 +00:00			`for dirpath, _, filenames in os.walk(src):`
			`dirpath = pathlib.Path(dirpath).relative_to(src)`
Initial commit 2023-09-11 22:38:24 +00:00			`for filename in filenames:`
			`if filename[0] == ".":`
			`continue # Skip dotfiles`
Add footnote extension support 2023-09-11 23:17:14 +00:00			`count += 1`
Initial commit 2023-09-11 22:38:24 +00:00
			`path = src / dirpath / filename`
			`dest = out / dirpath / filename`
			`os.makedirs(dest.parent, exist_ok=True)`

Rewrite horse philosophy page to markdown 2023-09-12 02:02:00 +00:00			`# Copy any file types without content processing`
			`if filename.rsplit(".")[-1] not in ("html", "md"):`
			`print("Copying ", path)`
			`shutil.copyfile(path, dest)`
			`continue`

Initial commit 2023-09-11 22:38:24 +00:00			`content = path.read_text(encoding="utf8")`
			`meta = {}`

			`# Preprocess markdown into html`
			`if dest.name.endswith(".md"):`
			`print("Converting", path)`
Add footnote extension support 2023-09-11 23:17:14 +00:00			`md.reset()`
Initial commit 2023-09-11 22:38:24 +00:00			`dest = dest.with_suffix(".html")`
			`content = md.convert(content)`
			`meta = md.Meta`

Allow txt content and deny robots 2023-09-12 00:48:03 +00:00			`if dest.name.endswith("html"):`
			`# Inject content into the template`
			`page_content = bs4.BeautifulSoup(content, features="html.parser")`
			`page = copy.copy(template)`
			`article = page.new_tag("article")`
			`article.append(page_content)`
			`page.article.replace_with(article)`

			`# Inject path into the nav`
			`for i in range(len(dirpath.parts)):`
			`a = page.new_tag("a")`
			`a["href"] = "/" + "/".join(dirpath.parts[:i+1]) + "/"`
			`a.string = dirpath.parts[i]`
			`page.nav.append(a)`
			`page.nav.append(page.new_string("/"))`

			`# Apply metadata to the template`
			`if meta_title := meta.get("title"):`
Surface metadata in header 2023-09-12 04:55:35 +00:00			`page.title.string = meta_title[0]`
			`page.header.h1.string = meta_title[0]`

			`if meta_date := meta.get("date"):`
			`p = page.new_tag("p")`
			`p["class"] = "metadata"`
			`p.string = "Date: " + meta_date[0]`
			`page.header.append(p)`

			`if meta_author := meta.get("author"):`
			`p = page.new_tag("p")`
			`p["class"] = "metadata"`
			`p.string = "Author: " + meta_author[0]`
			`page.header.append(p)`

			`if meta_source := meta.get("source"):`
			`for source_url in meta_source:`
			`a = page.new_tag("a")`
			`a["href"] = source_url`
			`a.string = source_url`
			`p = page.new_tag("p")`
			`p["class"] = "metadata"`
			`p.string = "URL: "`
			`p.append(a)`
			`page.header.append(p)`
Allow txt content and deny robots 2023-09-12 00:48:03 +00:00
			`# The fully templated page is the new content`
			`content = str(page)`
Initial commit 2023-09-11 22:38:24 +00:00
			`# Write the destination file`
			`print("Writing ", dest)`
Allow txt content and deny robots 2023-09-12 00:48:03 +00:00			`dest.write_text(content)`
Initial commit 2023-09-11 22:38:24 +00:00
Add footnote extension support 2023-09-11 23:17:14 +00:00			`print("Processed", count, "files")`

Initial commit 2023-09-11 22:38:24 +00:00
			`if __name__ == "__main__":`
			`main()`