Clean up source update code
This commit is contained in:
parent
024d81336d
commit
c116476487
|
@ -107,23 +107,17 @@ def command_add(args):
|
|||
|
||||
source = args.source or 'inquisitor'
|
||||
cell_path = os.path.join(DUNGEON_PATH, source)
|
||||
if not os.path.isdir(cell_path):
|
||||
if args.create:
|
||||
os.mkdir(cell_path)
|
||||
state_path = os.path.join(cell_path, "state")
|
||||
with open(state_path, 'w', encoding='utf8') as f:
|
||||
f.write(json.dumps({}))
|
||||
else:
|
||||
from inquisitor.sources import ensure_cell
|
||||
ensure_cell(source)
|
||||
elif not os.path.isdir(cell_path):
|
||||
logger.error("Source '{}' does not exist".format(source))
|
||||
return -1
|
||||
|
||||
from inquisitor.sources import populate_new
|
||||
item = {
|
||||
'id': '{:x}'.format(random.getrandbits(16 * 4)),
|
||||
'source': 'inquisitor'
|
||||
'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)),
|
||||
'source': source,
|
||||
}
|
||||
if args.id: item['id'] = str(args.id)
|
||||
if args.source: item['source'] = str(args.source)
|
||||
if args.title: item['title'] = str(args.title)
|
||||
if args.link: item['link'] = str(args.link)
|
||||
if args.time: item['time'] = int(args.time)
|
||||
|
@ -133,13 +127,10 @@ def command_add(args):
|
|||
if args.ttl: item['ttl'] = int(args.ttl)
|
||||
if args.ttd: item['ttd'] = int(args.ttd)
|
||||
if args.tts: item['tts'] = int(args.tts)
|
||||
populate_new(item['source'], item)
|
||||
|
||||
s = json.dumps(item, indent=2)
|
||||
path = os.path.join(DUNGEON_PATH, item['source'], item['id'] + '.item')
|
||||
with open(path, 'w', encoding='utf8') as f:
|
||||
f.write(s)
|
||||
logger.info(item)
|
||||
from inquisitor.loader import new_item
|
||||
saved_item = new_item(source, item)
|
||||
logger.info(saved_item)
|
||||
|
||||
|
||||
def command_feed(args):
|
||||
|
|
|
@ -45,11 +45,6 @@ class WritethroughDict():
|
|||
self.item[key] = value
|
||||
self.flush()
|
||||
|
||||
def set(self, dict):
|
||||
for key, value in dict.items():
|
||||
self.item[key] = value
|
||||
self.flush()
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.item
|
||||
|
||||
|
@ -76,6 +71,60 @@ def load_item(source_name, item_id):
|
|||
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
|
||||
return WritethroughDict.load(item_path)
|
||||
|
||||
|
||||
def item_exists(source_name, item_id):
|
||||
"""
|
||||
Checks for the existence of an item.
|
||||
"""
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
|
||||
return os.path.isfile(item_path)
|
||||
|
||||
|
||||
def get_item_ids(cell_name):
|
||||
"""
|
||||
Returns a list of item ids in the given cell.
|
||||
"""
|
||||
cell_path = os.path.join(DUNGEON_PATH, cell_name)
|
||||
return [
|
||||
filename[:-5]
|
||||
for filename in os.listdir(cell_path)
|
||||
if filename.endswith('.item')
|
||||
]
|
||||
|
||||
|
||||
def new_item(source_name, item):
|
||||
"""
|
||||
Creates a new item with the fields in the provided dictionary.
|
||||
Initializes other fields to their default values.
|
||||
"""
|
||||
# id is required
|
||||
if 'id' not in item:
|
||||
raise Exception(f'Cannot create item with no id. Value = {item}')
|
||||
|
||||
# source must be filled in, so if it is absent it is auto-populated with
|
||||
# source_name. Note: this allows sources to fill in a different source.
|
||||
if 'source' not in item:
|
||||
item['source'] = source_name
|
||||
|
||||
# active is forced to True for new items
|
||||
item['active'] = True
|
||||
|
||||
# created is forced to the current timestamp
|
||||
item['created'] = timestamp.now()
|
||||
|
||||
# title is auto-populated with the id if missing
|
||||
if 'title' not in item:
|
||||
item['title'] = item['id']
|
||||
|
||||
# tags is auto-populated if missing (not if empty!)
|
||||
if 'tags' not in item:
|
||||
item['tags'] = [source_name]
|
||||
|
||||
# All other fields are optional.
|
||||
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item')
|
||||
return WritethroughDict.create(item_path, item)
|
||||
|
||||
|
||||
def load_items(source_name):
|
||||
"""
|
||||
Returns a map of ids to items and a list of unreadable files.
|
||||
|
|
|
@ -9,6 +9,19 @@ from inquisitor import loader, timestamp, error
|
|||
from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
|
||||
|
||||
|
||||
USE_NEWEST = (
|
||||
'title',
|
||||
'tags',
|
||||
'link',
|
||||
'time'
|
||||
'author',
|
||||
'body',
|
||||
'ttl',
|
||||
'ttd',
|
||||
'tts',
|
||||
)
|
||||
|
||||
|
||||
def ensure_cell(name):
|
||||
"""
|
||||
Creates a cell in the dungeon. Idempotent.
|
||||
|
@ -92,54 +105,59 @@ def update_source(source_name, fetch_new):
|
|||
"""
|
||||
Attempts to update the given source. Raises an exception if the source does.
|
||||
"""
|
||||
# Get the existing items from the source's cell.
|
||||
prior_items, errors = loader.load_items(source_name)
|
||||
if any(errors):
|
||||
raise Exception(f'Can\'t update source "{source_name}", some items are corrupt')
|
||||
logger.debug("Found {} prior items".format(len(prior_items)))
|
||||
# Get a list of item ids that already existed in this source's cell.
|
||||
prior_ids = loader.get_item_ids(source_name)
|
||||
logger.debug(f'Found {len(prior_ids)} prior items')
|
||||
|
||||
# Get the feed items from the source's fetch method.
|
||||
state = loader.load_state(source_name)
|
||||
fetched = fetch_new(state)
|
||||
fetched_items = {item['id']: item for item in fetched}
|
||||
state.flush()
|
||||
logger.debug(f'Fetched {len(fetched)} items')
|
||||
fetched_items = {item['id']: item for item in fetched}
|
||||
|
||||
# Populate all the fetched items with required or auto-generated fields.
|
||||
# This also provides an opportunity to throw if the source isn't returning
|
||||
# valid items.
|
||||
for item in fetched_items.values():
|
||||
populate_new(source_name, item)
|
||||
logger.debug("Fetched {} items".format(len(fetched_items)))
|
||||
# Determine which items are new and which are updates.
|
||||
# We query the file system here instead of checking against this source's
|
||||
# item ids from above because sources are allowed to generate in other
|
||||
# sources' cells.
|
||||
new_items = []
|
||||
updated_items = []
|
||||
for item in fetched:
|
||||
item_source = item.get('source', source_name)
|
||||
if loader.item_exists(item_source, item['id']):
|
||||
updated_items.append(item)
|
||||
else:
|
||||
new_items.append(item)
|
||||
|
||||
# Write all the new fetched items to the source's cell.
|
||||
new_items = [
|
||||
item for item in fetched_items.values()
|
||||
if item['id'] not in prior_items]
|
||||
# Write all the new items to the source's cell.
|
||||
for item in new_items:
|
||||
s = json.dumps(item)
|
||||
path = os.path.join(DUNGEON_PATH, item['source'], item['id'] + ".item")
|
||||
with open(path, 'w', encoding='utf8') as f:
|
||||
f.write(s)
|
||||
item_source = item.get('source', source_name)
|
||||
loader.new_item(item_source, item)
|
||||
|
||||
# Update the extant items using the fetched item's values.
|
||||
extant_items = [
|
||||
item for item in fetched_items.values()
|
||||
if item['id'] in prior_items]
|
||||
for item in extant_items:
|
||||
# The items in prior_items are writethrough dicts.
|
||||
prior_item = prior_items[item['id']]
|
||||
# Only bother updating active items.
|
||||
if prior_item['active']:
|
||||
populate_old(prior_item, item)
|
||||
# Update the other items using the fetched items' values.
|
||||
for new_item in updated_items:
|
||||
old_item = loader.load_item(new_item['source'], new_item['id'])
|
||||
for field in USE_NEWEST:
|
||||
if field in new_item and old_item[field] != new_item[field]:
|
||||
old_item[field] = new_item[field]
|
||||
if 'callback' in new_item:
|
||||
old_callback = old_item.get('callback', {})
|
||||
# Because of the way this update happens, any fields that are set
|
||||
# in the callback when the item is new will keep their original
|
||||
# values, as those values reappear in new_item on subsequent
|
||||
# updates.
|
||||
old_item['callback'] = {**old_item['callback'], **new_item['callback']}
|
||||
|
||||
# In general, items are removed when they are old (not found in the last
|
||||
# fetch) and inactive. Some item fields can change this basic behavior.
|
||||
del_count = 0
|
||||
now = timestamp.now()
|
||||
old_items = [
|
||||
item for item in prior_items.values()
|
||||
if item['id'] not in fetched_items]
|
||||
for item in old_items:
|
||||
fetched_ids = [item['id'] for item in updated_items]
|
||||
old_item_ids = [
|
||||
item_id for item_id in prior_ids
|
||||
if item_id not in fetched_ids]
|
||||
for item_id in old_item_ids:
|
||||
item = loader.load_item(source_name, item_id)
|
||||
remove = not item['active']
|
||||
# The time-to-live field protects an item from removal until expiry.
|
||||
# This is mainly used to avoid old items resurfacing when their source
|
||||
|
@ -170,35 +188,6 @@ def update_source(source_name, fetch_new):
|
|||
len(new_items), "s" if len(new_items) != 1 else "",
|
||||
del_count, "s" if del_count != 1 else ""))
|
||||
|
||||
def populate_new(source_name, item):
|
||||
# id is required
|
||||
if 'id' not in item:
|
||||
raise Exception(f'Source "{source_name}" returned an item with no id')
|
||||
# source is auto-populated with the source name if missing
|
||||
# Note: this allows sources to create items in other cells!
|
||||
if 'source' not in item: item['source'] = source_name
|
||||
# active is forced to True for new items
|
||||
item['active'] = True
|
||||
# created is forced to the current timestamp
|
||||
item['created'] = timestamp.now()
|
||||
# title is auto-populated with the id if missing
|
||||
if 'title' not in item: item['title'] = item['id']
|
||||
# tags is auto-populated if missing (not if empty!)
|
||||
if 'tags' not in item: item['tags'] = [source_name]
|
||||
# link, time, author, body, ttl, ttd, tts, callback are optional
|
||||
|
||||
def populate_old(prior, new):
|
||||
# Not updated: id, source, active, created
|
||||
if 'title' in new: prior['title'] = new['title']
|
||||
if 'tags' in new: prior['tags'] = new['tags']
|
||||
if 'link' in new: prior['link'] = new['link']
|
||||
if 'time' in new: prior['time'] = new['time']
|
||||
if 'author' in new: prior['author'] = new['author']
|
||||
if 'body' in new: prior['body'] = new['body']
|
||||
if 'ttl' in new: prior['ttl'] = new['ttl']
|
||||
if 'ttd' in new: prior['ttd'] = new['ttd']
|
||||
if 'tts' in new: prior['tts'] = new['tts']
|
||||
if 'callback' in new: prior['callback'] = new['callback']
|
||||
|
||||
def item_callback(source_name, itemid):
|
||||
try:
|
||||
|
|
Loading…
Reference in New Issue