Finish index document building
This commit is contained in:
parent
1cf820377d
commit
aac4c8477f
|
@ -2,7 +2,6 @@
|
||||||
Logic for operations that depend on a whole collection of documents.
|
Logic for operations that depend on a whole collection of documents.
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from redstring.parser import load, TagOptions, DocumentTag, TabOptions, DocumentTab, Document
|
from redstring.parser import load, TagOptions, DocumentTag, TabOptions, DocumentTab, Document
|
||||||
|
|
||||||
|
@ -11,16 +10,60 @@ def generate_index_document(directory: str) -> Document:
|
||||||
"""
|
"""
|
||||||
Generate a document describing a document collection.
|
Generate a document describing a document collection.
|
||||||
"""
|
"""
|
||||||
dirpath = Path(directory)
|
categories: dict = {}
|
||||||
document_info: list = []
|
|
||||||
for filename in os.listdir(dirpath):
|
|
||||||
with open(dirpath / filename) as f:
|
|
||||||
document = load(f)
|
|
||||||
document_id = document.get('id')
|
|
||||||
title = document.get('title')
|
|
||||||
document_info.append((document_id, title))
|
|
||||||
|
|
||||||
tag: DocumentTag = DocumentTag('Unsorted', str(document_info), TagOptions(), [])
|
for filename in os.listdir(directory):
|
||||||
tab: DocumentTab = DocumentTab('index', [tag], TabOptions())
|
with open(os.path.join(directory, filename)) as f:
|
||||||
doc: Document = Document([tab])
|
document: Document = load(f)
|
||||||
return doc
|
|
||||||
|
# Check if this document specifies a tab, and create it if necessary.
|
||||||
|
category = document.get_tag('category')
|
||||||
|
if not category:
|
||||||
|
category = 'index'
|
||||||
|
if category not in categories:
|
||||||
|
categories[category] = {}
|
||||||
|
category_tab = categories[category]
|
||||||
|
|
||||||
|
# Check if this document specifies a topic, and create it if necessary.
|
||||||
|
topic = document.get_tag('topic')
|
||||||
|
if not topic:
|
||||||
|
topic = 'uncategorized'
|
||||||
|
if '.' in topic:
|
||||||
|
topic, subtopic = topic.split('.', maxsplit=1)
|
||||||
|
else:
|
||||||
|
subtopic = None
|
||||||
|
if topic not in category_tab:
|
||||||
|
category_tab[topic] = []
|
||||||
|
topic_tag = category_tab[topic]
|
||||||
|
|
||||||
|
# Save the title and id.
|
||||||
|
doc_id = document.get_tag('id').value
|
||||||
|
if doc_title_tag := document.get_tag('title'):
|
||||||
|
doc_title = doc_title_tag.value
|
||||||
|
else:
|
||||||
|
doc_title = None
|
||||||
|
|
||||||
|
topic_tag.append((doc_id, doc_title))
|
||||||
|
|
||||||
|
# Build an index document
|
||||||
|
def document_link(info):
|
||||||
|
doc_id, doc_title = info
|
||||||
|
return (
|
||||||
|
f'<a href="/doc/{doc_id}">{doc_title} ({doc_id})</a>'
|
||||||
|
if doc_title else
|
||||||
|
f'<a href="/doc/{doc_id}">{doc_id}</a>'
|
||||||
|
)
|
||||||
|
|
||||||
|
built_tabs: list = []
|
||||||
|
for category in sorted(categories.keys()):
|
||||||
|
built_tags: list = []
|
||||||
|
|
||||||
|
for topic in sorted(categories[category].keys()):
|
||||||
|
docs = sorted(categories[category][topic], key=lambda x: x[0])
|
||||||
|
doc_links = map(document_link, docs)
|
||||||
|
value = '- ' + '<br>- '.join(doc_links)
|
||||||
|
built_tags.append(DocumentTag(topic, value, TagOptions(), []))
|
||||||
|
|
||||||
|
built_tabs.append(DocumentTab(category, built_tags, TabOptions()))
|
||||||
|
|
||||||
|
return Document(built_tabs)
|
||||||
|
|
|
@ -130,6 +130,12 @@ class DocumentTab:
|
||||||
self.tags: List[DocumentTag] = tags
|
self.tags: List[DocumentTag] = tags
|
||||||
self.options: TabOptions = options
|
self.options: TabOptions = options
|
||||||
|
|
||||||
|
def get_tag(self, name: str):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.name == name:
|
||||||
|
return tag
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Document:
|
class Document:
|
||||||
"""
|
"""
|
||||||
|
@ -141,7 +147,13 @@ class Document:
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self.tabs.__iter__()
|
return self.tabs.__iter__()
|
||||||
|
|
||||||
def get(self, name: str):
|
def get_tab(self, name: str):
|
||||||
|
for tab in self.tabs:
|
||||||
|
if tab.name == name:
|
||||||
|
return tab
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_tag(self, name: str):
|
||||||
for tab in self.tabs:
|
for tab in self.tabs:
|
||||||
for tag in tab.tags:
|
for tag in tab.tags:
|
||||||
if tag.name == name:
|
if tag.name == name:
|
||||||
|
|
Loading…
Reference in New Issue