Finish index document building

This commit is contained in:
Tim Van Baak 2021-02-11 18:09:29 -08:00
parent 1cf820377d
commit aac4c8477f
2 changed files with 69 additions and 14 deletions

View File

@ -2,7 +2,6 @@
Logic for operations that depend on a whole collection of documents.
"""
import os
from pathlib import Path
from redstring.parser import load, TagOptions, DocumentTag, TabOptions, DocumentTab, Document
@ -11,16 +10,60 @@ def generate_index_document(directory: str) -> Document:
"""
Generate a document describing a document collection.
"""
dirpath = Path(directory)
document_info: list = []
for filename in os.listdir(dirpath):
with open(dirpath / filename) as f:
document = load(f)
document_id = document.get('id')
title = document.get('title')
document_info.append((document_id, title))
categories: dict = {}
tag: DocumentTag = DocumentTag('Unsorted', str(document_info), TagOptions(), [])
tab: DocumentTab = DocumentTab('index', [tag], TabOptions())
doc: Document = Document([tab])
return doc
for filename in os.listdir(directory):
with open(os.path.join(directory, filename)) as f:
document: Document = load(f)
# Check if this document specifies a tab, and create it if necessary.
category = document.get_tag('category')
if not category:
category = 'index'
if category not in categories:
categories[category] = {}
category_tab = categories[category]
# Check if this document specifies a topic, and create it if necessary.
topic = document.get_tag('topic')
if not topic:
topic = 'uncategorized'
if '.' in topic:
topic, subtopic = topic.split('.', maxsplit=1)
else:
subtopic = None
if topic not in category_tab:
category_tab[topic] = []
topic_tag = category_tab[topic]
# Save the title and id.
doc_id = document.get_tag('id').value
if doc_title_tag := document.get_tag('title'):
doc_title = doc_title_tag.value
else:
doc_title = None
topic_tag.append((doc_id, doc_title))
# Build an index document
def document_link(info):
doc_id, doc_title = info
return (
f'<a href="/doc/{doc_id}">{doc_title} ({doc_id})</a>'
if doc_title else
f'<a href="/doc/{doc_id}">{doc_id}</a>'
)
built_tabs: list = []
for category in sorted(categories.keys()):
built_tags: list = []
for topic in sorted(categories[category].keys()):
docs = sorted(categories[category][topic], key=lambda x: x[0])
doc_links = map(document_link, docs)
value = '- ' + '<br>- '.join(doc_links)
built_tags.append(DocumentTag(topic, value, TagOptions(), []))
built_tabs.append(DocumentTab(category, built_tags, TabOptions()))
return Document(built_tabs)

View File

@ -130,6 +130,12 @@ class DocumentTab:
self.tags: List[DocumentTag] = tags
self.options: TabOptions = options
def get_tag(self, name: str):
for tag in self.tags:
if tag.name == name:
return tag
return None
class Document:
"""
@ -141,7 +147,13 @@ class Document:
def __iter__(self):
return self.tabs.__iter__()
def get(self, name: str):
def get_tab(self, name: str):
for tab in self.tabs:
if tab.name == name:
return tab
return None
def get_tag(self, name: str):
for tab in self.tabs:
for tag in tab.tags:
if tag.name == name: