Add import workflow with deflated object model
This commit is contained in:
parent
7c9c2232e2
commit
ecd9d67881
13
inquisitor/configs.py
Normal file
13
inquisitor/configs.py
Normal file
@ -0,0 +1,13 @@
|
||||
import os
|
||||
import logging
|
||||
|
||||
DUNGEON_PATH = os.path.abspath(os.environ.get("INQUISITOR_DUNGEON") or "./dungeon")
|
||||
SOURCES_PATH = os.path.abspath(os.environ.get("INQUISITOR_SOURCES") or "./sources")
|
||||
|
||||
logger = logging.getLogger("inquisitor")
|
||||
logger.setLevel(logging.INFO)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('[{levelname}] {message}', style="{")
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
22
inquisitor/error.py
Normal file
22
inquisitor/error.py
Normal file
@ -0,0 +1,22 @@
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
import random
|
||||
|
||||
from configs import DUNGEON_PATH, logger
|
||||
|
||||
logger = logging.getLogger("inquisitor")
|
||||
|
||||
def as_item(title, body=None):
|
||||
iid = '{:x}'.format(random.getrandbits(16 * 4))
|
||||
item = {
|
||||
'id': iid,
|
||||
'source': 'inquisitor',
|
||||
'title': title,
|
||||
}
|
||||
if body is not None:
|
||||
item['body'] = '<pre>{}</pre>'.format(body)
|
||||
path = os.path.join(DUNGEON_PATH, 'inquisitor', iid + ".item")
|
||||
logger.error(json.dumps(item))
|
||||
with open(path, 'w') as f:
|
||||
f.write(json.dumps(item, indent=2))
|
124
inquisitor/importer.py
Normal file
124
inquisitor/importer.py
Normal file
@ -0,0 +1,124 @@
|
||||
import os
|
||||
import traceback
|
||||
import importlib.util
|
||||
import json
|
||||
|
||||
import error
|
||||
from configs import SOURCES_PATH, DUNGEON_PATH, logger
|
||||
import loader
|
||||
import timestamp
|
||||
|
||||
def update_sources(*source_names):
|
||||
for source_name in source_names:
|
||||
try:
|
||||
source_module = load_source(source_name)
|
||||
except Exception as e:
|
||||
error.as_item("Error importing source '{}'".format(source_name), traceback.format_exc())
|
||||
continue
|
||||
|
||||
try:
|
||||
logger.info("Updating source '{}'".format(source_name))
|
||||
new_count, del_count = update_source(source_name, source_module.fetch_new)
|
||||
logger.info("{} new item{}, {} deleted item{}".format(
|
||||
new_count, "s" if new_count != 1 else "",
|
||||
del_count, "s" if del_count != 1 else ""))
|
||||
except Exception as e:
|
||||
error.as_item("Error updating source '{}'".format(source_name), traceback.format_exc())
|
||||
|
||||
def load_source(source_name):
|
||||
"""
|
||||
Attempts to load the source module with the given name. Raises an exception on failure.
|
||||
"""
|
||||
# Push the sources directory
|
||||
cwd = os.getcwd()
|
||||
os.chdir(SOURCES_PATH)
|
||||
# Check if the named source is present.
|
||||
source_file_name = source_name + ".py"
|
||||
if not os.path.isfile(source_file_name):
|
||||
os.chdir(cwd)
|
||||
raise FileNotFoundError("Missing '{}' in '{}'".format(source_name, SOURCES_PATH))
|
||||
# Try to import the source module.
|
||||
logger.debug("Loading module {}".format(source_file_name))
|
||||
spec = importlib.util.spec_from_file_location("itemsource", source_file_name)
|
||||
itemsource = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(itemsource)
|
||||
if not hasattr(itemsource, 'fetch_new'):
|
||||
raise ImportError("Missing fetch_new in '{}'".format(source_file_name))
|
||||
# Since the source is valid, get or create the source cell.
|
||||
os.chdir(cwd)
|
||||
cell_path = os.path.join(DUNGEON_PATH, source_name)
|
||||
return itemsource
|
||||
|
||||
def update_source(source_name, fetch_new):
|
||||
"""
|
||||
Attempts to update the given source. Raises an exception if the source does.
|
||||
"""
|
||||
cell_path = os.path.join(DUNGEON_PATH, source_name)
|
||||
|
||||
# Get the existing items.
|
||||
prior_items, errors = loader.load_items(source_name)
|
||||
logger.debug("Found {} prior items".format(len(prior_items)))
|
||||
|
||||
# Get the new items.
|
||||
state = loader.load_state(source_name)
|
||||
new_items = fetch_new(state)
|
||||
logger.debug("Fetched {} items".format(len(new_items)))
|
||||
state.flush()
|
||||
|
||||
new_count = 0
|
||||
del_count = 0
|
||||
for item in new_items:
|
||||
populate_new(item)
|
||||
|
||||
if item['id'] not in prior_items:
|
||||
# If the item is new, write it.
|
||||
new_count += 1
|
||||
s = json.dumps(item)
|
||||
path = os.path.join(DUNGEON_PATH, item['source'], item['id'])
|
||||
with open(path, 'w', encoding="utf8") as f:
|
||||
f.write(s)
|
||||
|
||||
else:
|
||||
# If the item is extant and still active, overwrite its values.
|
||||
prior_item = prior_items[item['id']]
|
||||
if prior_item['active']:
|
||||
populate_old(prior_item, item)
|
||||
# Remove the id from the list to track its continued presence
|
||||
# in the source's queue of new items.
|
||||
del prior_items[item['id']]
|
||||
|
||||
# Any remaining extant items are considered old. Old items are removed
|
||||
# when they are both inactive and past their ttl date.
|
||||
now = timestamp.now()
|
||||
for prior_id, prior_item in prior_items.items():
|
||||
ttl_date = prior_item['created'] + prior_item['ttl']
|
||||
if not prior_item['active'] and ttl_date < now:
|
||||
del_count += 1
|
||||
file_path = os.path.join(DUNGEON_PATH, prior_item['source'], prior_item['id'] + ".item")
|
||||
os.remove(file_path)
|
||||
|
||||
# Return counts
|
||||
return new_count, del_count
|
||||
|
||||
def populate_new(item):
|
||||
# id and source are required fields
|
||||
item['active'] = True
|
||||
if 'created' not in item: item['created'] = timestamp.now()
|
||||
if 'title' not in item: item['title'] = item['id']
|
||||
if 'link' not in item: item['link'] = None
|
||||
if 'time' not in item: item['time'] = None
|
||||
if 'author' not in item: item['author'] = None
|
||||
if 'body' not in item: item['body'] = None
|
||||
if 'tags' not in item: item['tags'] = [item['source']]
|
||||
if 'ttl' not in item: item['ttl'] = 0
|
||||
|
||||
def populate_old(prior, new):
|
||||
prior.set({
|
||||
'title': new['title'],
|
||||
'link': new['link'],
|
||||
'time': new['time'],
|
||||
'author': new['author'],
|
||||
'body': new['body'],
|
||||
'tags': new['tags'],
|
||||
'ttl': new['ttl'],
|
||||
})
|
61
inquisitor/loader.py
Normal file
61
inquisitor/loader.py
Normal file
@ -0,0 +1,61 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
from configs import DUNGEON_PATH
|
||||
import error
|
||||
|
||||
class WritethroughDict():
|
||||
"""A wrapper for a dictionary saved to the disk."""
|
||||
def __init__(self, path):
|
||||
if not os.path.isfile(path):
|
||||
raise FileNotFoundError(path)
|
||||
self.path = path
|
||||
with open(path) as f:
|
||||
self.item = json.loads(f.read())
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.item[key]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.item[key] = value
|
||||
self.flush()
|
||||
|
||||
def set(self, dict):
|
||||
for key, value in dict.items():
|
||||
self.item[key] = value
|
||||
self.flush()
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.item
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.item)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.item)
|
||||
|
||||
def flush(self):
|
||||
s = json.dumps(self.item, indent=2)
|
||||
with open(self.path, 'w', encoding="utf8") as f:
|
||||
f.write(s)
|
||||
|
||||
def load_state(source_name):
|
||||
"""Loads the state dictionary for a source."""
|
||||
state_path = os.path.join(DUNGEON_PATH, source_name, "state")
|
||||
return WritethroughDict(state_path)
|
||||
|
||||
def load_items(source_name):
|
||||
"""
|
||||
Returns a map of ids to items and a list of unreadable files.
|
||||
"""
|
||||
cell_path = os.path.join(DUNGEON_PATH, source_name)
|
||||
items = {}
|
||||
errors = []
|
||||
for filename in os.listdir(cell_path):
|
||||
try:
|
||||
path = os.path.join(cell_path, filename)
|
||||
item = WritethroughDict(path)
|
||||
items[item['id']] = item
|
||||
except Exception as e:
|
||||
errors.append(filename)
|
||||
return items, errors
|
9
inquisitor/timestamp.py
Normal file
9
inquisitor/timestamp.py
Normal file
@ -0,0 +1,9 @@
|
||||
import time
|
||||
import datetime
|
||||
|
||||
def now():
|
||||
return int(time.time())
|
||||
|
||||
def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"):
|
||||
dt = datetime.datetime.fromtimestamp(ts)
|
||||
return dt.strftime(formatstr)
|
Loading…
Reference in New Issue
Block a user