Finish index document building

This commit is contained in:
Tim Van Baak 2021-02-11 18:09:29 -08:00
parent 1cf820377d
commit aac4c8477f
2 changed files with 69 additions and 14 deletions

View File

@ -2,7 +2,6 @@
Logic for operations that depend on a whole collection of documents. Logic for operations that depend on a whole collection of documents.
""" """
import os import os
from pathlib import Path
from redstring.parser import load, TagOptions, DocumentTag, TabOptions, DocumentTab, Document from redstring.parser import load, TagOptions, DocumentTag, TabOptions, DocumentTab, Document
@ -11,16 +10,60 @@ def generate_index_document(directory: str) -> Document:
""" """
Generate a document describing a document collection. Generate a document describing a document collection.
""" """
dirpath = Path(directory) categories: dict = {}
document_info: list = []
for filename in os.listdir(dirpath):
with open(dirpath / filename) as f:
document = load(f)
document_id = document.get('id')
title = document.get('title')
document_info.append((document_id, title))
tag: DocumentTag = DocumentTag('Unsorted', str(document_info), TagOptions(), []) for filename in os.listdir(directory):
tab: DocumentTab = DocumentTab('index', [tag], TabOptions()) with open(os.path.join(directory, filename)) as f:
doc: Document = Document([tab]) document: Document = load(f)
return doc
# Check if this document specifies a tab, and create it if necessary.
category = document.get_tag('category')
if not category:
category = 'index'
if category not in categories:
categories[category] = {}
category_tab = categories[category]
# Check if this document specifies a topic, and create it if necessary.
topic = document.get_tag('topic')
if not topic:
topic = 'uncategorized'
if '.' in topic:
topic, subtopic = topic.split('.', maxsplit=1)
else:
subtopic = None
if topic not in category_tab:
category_tab[topic] = []
topic_tag = category_tab[topic]
# Save the title and id.
doc_id = document.get_tag('id').value
if doc_title_tag := document.get_tag('title'):
doc_title = doc_title_tag.value
else:
doc_title = None
topic_tag.append((doc_id, doc_title))
# Build an index document
def document_link(info):
doc_id, doc_title = info
return (
f'<a href="/doc/{doc_id}">{doc_title} ({doc_id})</a>'
if doc_title else
f'<a href="/doc/{doc_id}">{doc_id}</a>'
)
built_tabs: list = []
for category in sorted(categories.keys()):
built_tags: list = []
for topic in sorted(categories[category].keys()):
docs = sorted(categories[category][topic], key=lambda x: x[0])
doc_links = map(document_link, docs)
value = '- ' + '<br>- '.join(doc_links)
built_tags.append(DocumentTag(topic, value, TagOptions(), []))
built_tabs.append(DocumentTab(category, built_tags, TabOptions()))
return Document(built_tabs)

View File

@ -130,6 +130,12 @@ class DocumentTab:
self.tags: List[DocumentTag] = tags self.tags: List[DocumentTag] = tags
self.options: TabOptions = options self.options: TabOptions = options
def get_tag(self, name: str):
for tag in self.tags:
if tag.name == name:
return tag
return None
class Document: class Document:
""" """
@ -141,7 +147,13 @@ class Document:
def __iter__(self): def __iter__(self):
return self.tabs.__iter__() return self.tabs.__iter__()
def get(self, name: str): def get_tab(self, name: str):
for tab in self.tabs:
if tab.name == name:
return tab
return None
def get_tag(self, name: str):
for tab in self.tabs: for tab in self.tabs:
for tag in tab.tags: for tag in tab.tags:
if tag.name == name: if tag.name == name: