Initial black linting pass

This commit is contained in:
Tim Van Baak 2022-12-28 12:44:48 -08:00
parent 52e32917d5
commit 42544bd30b
11 changed files with 1099 additions and 1022 deletions

View File

@ -1,2 +1,2 @@
from inquisitor.configs import CACHE_PATH from inquisitor.configs import CACHE_PATH
from inquisitor.templates import cache_image, LinearCrawler, RedditScraper from inquisitor.templates import cache_image, LinearCrawler, RedditScraper

View File

@ -1,2 +1,3 @@
from cli import main from cli import main
main()
main()

View File

@ -8,13 +8,14 @@ from flask import Flask, render_template, request, jsonify, abort, redirect, url
# Application imports # Application imports
from inquisitor.configs import ( from inquisitor.configs import (
DUNGEON_PATH, DUNGEON_PATH,
SOURCES_PATH, SOURCES_PATH,
CACHE_PATH, CACHE_PATH,
subfeeds, subfeeds,
get_subfeed_overrides, get_subfeed_overrides,
logger, logger,
init_default_logging) init_default_logging,
)
from inquisitor import sources, loader, timestamp from inquisitor import sources, loader, timestamp
# Globals # Globals
@ -22,173 +23,202 @@ app = Flask(__name__)
def make_query_link(text, wl, bl): def make_query_link(text, wl, bl):
wlp = "only=" + ",".join(wl) wlp = "only=" + ",".join(wl)
blp = "not=" + ",".join(bl) blp = "not=" + ",".join(bl)
params = [p for p in (wlp, blp) if not p.endswith("=")] params = [p for p in (wlp, blp) if not p.endswith("=")]
query = "?{}".format("&".join(params)) query = "?{}".format("&".join(params))
return '<a href="{1}">{0}</a>'.format(text, query) return '<a href="{1}">{0}</a>'.format(text, query)
@app.template_filter("datetimeformat") @app.template_filter("datetimeformat")
def datetimeformat(value): def datetimeformat(value):
return timestamp.stamp_to_readable(value) if value is not None else "" return timestamp.stamp_to_readable(value) if value is not None else ""
@app.route("/") @app.route("/")
def root(): def root():
return redirect(url_for('feed')) return redirect(url_for("feed"))
@app.route("/feed/") @app.route("/feed/")
def feed(): def feed():
return feed_for_sources(source_names=None) return feed_for_sources(source_names=None)
@app.route("/feed/<string:feed_name>/") @app.route("/feed/<string:feed_name>/")
def subfeed(feed_name): def subfeed(feed_name):
# Check for and apply subfeed overrides # Check for and apply subfeed overrides
subfeed_overrides = get_subfeed_overrides() subfeed_overrides = get_subfeed_overrides()
subfeed_config = subfeed_overrides or subfeeds or {} subfeed_config = subfeed_overrides or subfeeds or {}
# The built-in inquisitor subfeed contains sources not in another subfeed # The built-in inquisitor subfeed contains sources not in another subfeed
if feed_name == 'inquisitor': if feed_name == "inquisitor":
all_sources = os.listdir(DUNGEON_PATH) all_sources = os.listdir(DUNGEON_PATH)
for subfeed, sources in subfeed_config.items(): for subfeed, sources in subfeed_config.items():
for source_name in sources: for source_name in sources:
if source_name in all_sources: if source_name in all_sources:
all_sources.remove(source_name) all_sources.remove(source_name)
return feed_for_sources(all_sources) return feed_for_sources(all_sources)
if feed_name not in subfeed_config:
return abort(404)
return feed_for_sources(subfeed_config[feed_name])
if feed_name not in subfeed_config:
return abort(404)
return feed_for_sources(subfeed_config[feed_name])
def feed_for_sources(source_names): def feed_for_sources(source_names):
# Determine exclusion filters # Determine exclusion filters
filters = [] filters = []
wl_param = request.args.get('only') wl_param = request.args.get("only")
wl = wl_param.split(",") if wl_param else [] wl = wl_param.split(",") if wl_param else []
bl_param = request.args.get('not') bl_param = request.args.get("not")
bl = bl_param.split(",") if bl_param else [] bl = bl_param.split(",") if bl_param else []
if wl: if wl:
filters.append(lambda item: not any([tag in wl for tag in item['tags']])) filters.append(lambda item: not any([tag in wl for tag in item["tags"]]))
if bl: if bl:
filters.append(lambda item: any([tag in bl for tag in item['tags']])) filters.append(lambda item: any([tag in bl for tag in item["tags"]]))
# Get all active+filtered items and all active tags # Get all active+filtered items and all active tags
total = 0 total = 0
items, errors = loader.load_active_items(source_names) items, errors = loader.load_active_items(source_names)
active_items = [] active_items = []
active_tags = {} active_tags = {}
for item in items: for item in items:
if item['active']: if item["active"]:
for tag in item['tags']: for tag in item["tags"]:
if tag not in active_tags: active_tags[tag] = 0 if tag not in active_tags:
active_tags[tag] += 1 active_tags[tag] = 0
# active_tags |= set(item['tags']) active_tags[tag] += 1
total += 1 # active_tags |= set(item['tags'])
if not any(map(lambda f: f(item), filters)): total += 1
active_items.append(item) if not any(map(lambda f: f(item), filters)):
# Sort items by time active_items.append(item)
active_items.sort(key=lambda i: i['time'] if 'time' in i and i['time'] else i['created'] if 'created' in i and i['created'] else 0) # Sort items by time
active_items.sort(
key=lambda i: i["time"]
if "time" in i and i["time"]
else i["created"]
if "created" in i and i["created"]
else 0
)
logger.info("Returning {} of {} items".format(len(active_items), total)) logger.info("Returning {} of {} items".format(len(active_items), total))
if errors: if errors:
read_ex = { read_ex = {
'title': 'Read errors', "title": "Read errors",
'body': "<pre>{}</pre>".format("\n\n".join(errors)), "body": "<pre>{}</pre>".format("\n\n".join(errors)),
'created': None, "created": None,
} }
active_items.insert(0, read_ex) active_items.insert(0, read_ex)
if total > 0: if total > 0:
# Create the feed control item # Create the feed control item
link_table = ["<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format( link_table = [
total, make_query_link("all", [], []))] "<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()): total, make_query_link("all", [], [])
links = [count] )
links.append(make_query_link(tag, [tag], [])) ]
if tag in wl: for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()):
new_wl = [t for t in wl if t != tag] links = [count]
links.append(make_query_link("-only", new_wl, bl)) links.append(make_query_link(tag, [tag], []))
else: if tag in wl:
new_bl = [t for t in bl if t != tag] new_wl = [t for t in wl if t != tag]
links.append(make_query_link("+only", wl + [tag], new_bl)) links.append(make_query_link("-only", new_wl, bl))
if tag in bl: else:
new_bl = [t for t in bl if t != tag] new_bl = [t for t in bl if t != tag]
links.append(make_query_link("-not", wl, new_bl)) links.append(make_query_link("+only", wl + [tag], new_bl))
else: if tag in bl:
new_wl = [t for t in wl if t != tag] new_bl = [t for t in bl if t != tag]
links.append(make_query_link("+not", new_wl, bl + [tag])) links.append(make_query_link("-not", wl, new_bl))
link_table.append("<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(*links)) else:
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table)) new_wl = [t for t in wl if t != tag]
links.append(make_query_link("+not", new_wl, bl + [tag]))
link_table.append(
"<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(
*links
)
)
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table))
feed_control = { feed_control = {
'title': 'Feed Control [{}/{}]'.format(len(active_items), total), "title": "Feed Control [{}/{}]".format(len(active_items), total),
'body': body, "body": body,
} }
active_items.insert(0, feed_control) active_items.insert(0, feed_control)
selection = active_items[:100] selection = active_items[:100]
return render_template("feed.jinja2", return render_template(
items=selection, "feed.jinja2",
mdeac=[ items=selection,
{'source': item['source'], 'itemid': item['id']} mdeac=[
for item in selection {"source": item["source"], "itemid": item["id"]}
if 'id' in item]) for item in selection
if "id" in item
],
)
@app.route("/deactivate/", methods=['POST'])
@app.route("/deactivate/", methods=["POST"])
def deactivate(): def deactivate():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid']) item = loader.load_item(params["source"], params["itemid"])
if item['active']: if item["active"]:
logger.debug(f"Deactivating {params['source']}/{params['itemid']}") logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
item['active'] = False item["active"] = False
return jsonify({'active': item['active']}) return jsonify({"active": item["active"]})
@app.route("/punt/", methods=['POST'])
@app.route("/punt/", methods=["POST"])
def punt(): def punt():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid']) item = loader.load_item(params["source"], params["itemid"])
tomorrow = datetime.now() + timedelta(days=1) tomorrow = datetime.now() + timedelta(days=1)
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0) morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
til_then = morning.timestamp() - item['created'] til_then = morning.timestamp() - item["created"]
item['tts'] = til_then item["tts"] = til_then
return jsonify(item.item) return jsonify(item.item)
@app.route("/mass-deactivate/", methods=['POST'])
@app.route("/mass-deactivate/", methods=["POST"])
def mass_deactivate(): def mass_deactivate():
params = request.get_json() params = request.get_json()
if 'items' not in params: if "items" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
for info in params.get('items', []): for info in params.get("items", []):
source = info['source'] source = info["source"]
itemid = info['itemid'] itemid = info["itemid"]
item = loader.load_item(source, itemid) item = loader.load_item(source, itemid)
if item['active']: if item["active"]:
logger.debug(f"Deactivating {info['source']}/{info['itemid']}") logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
item['active'] = False item["active"] = False
return jsonify({}) return jsonify({})
@app.route("/callback/", methods=['POST'])
@app.route("/callback/", methods=["POST"])
def callback(): def callback():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
logger.info('Executing callback for {}/{}'.format(params['source'], params['itemid'])) logger.info(
sources.item_callback(params['source'], params['itemid']) "Executing callback for {}/{}".format(params["source"], params["itemid"])
return jsonify({}) )
sources.item_callback(params["source"], params["itemid"])
return jsonify({})
@app.route('/cache/<path:cache_path>')
@app.route("/cache/<path:cache_path>")
def cache(cache_path): def cache(cache_path):
path = os.path.join(CACHE_PATH, cache_path) path = os.path.join(CACHE_PATH, cache_path)
if not os.path.isfile(path): if not os.path.isfile(path):
return abort(404) return abort(404)
with open(path, 'rb') as f: with open(path, "rb") as f:
return f.read() return f.read()
def wsgi(): def wsgi():
init_default_logging() init_default_logging()
return app return app

View File

@ -11,288 +11,324 @@ from inquisitor.configs import logger, DUNGEON_PATH, SOURCES_PATH, add_logging_h
def command_test(args): def command_test(args):
"""Echo config file values.""" """Echo config file values."""
from inquisitor.configs.resolver import ( from inquisitor.configs.resolver import (
config_path, config_path,
CONFIG_DATA, data_path, CONFIG_DATA,
CONFIG_SOURCES, source_path, data_path,
CONFIG_CACHE, cache_path, CONFIG_SOURCES,
CONFIG_LOGFILE, log_file, source_path,
CONFIG_VERBOSE, is_verbose, CONFIG_CACHE,
CONFIG_SUBFEEDS, subfeeds, cache_path,
) CONFIG_LOGFILE,
subfeeds = '; '.join( log_file,
'{0}: {1}'.format( CONFIG_VERBOSE,
sf_name, is_verbose,
' '.join(sf_sources) CONFIG_SUBFEEDS,
) subfeeds,
for sf_name, sf_sources )
in subfeeds.items()
) if subfeeds else '' subfeeds = (
print(f'Inquisitor configured from {config_path}') "; ".join(
print(f' {CONFIG_DATA} = {data_path}') "{0}: {1}".format(sf_name, " ".join(sf_sources))
print(f' {CONFIG_SOURCES} = {source_path}') for sf_name, sf_sources in subfeeds.items()
print(f' {CONFIG_CACHE} = {cache_path}') )
print(f' {CONFIG_LOGFILE} = {log_file}') if subfeeds
print(f' {CONFIG_VERBOSE} = {is_verbose}') else ""
print(f' {CONFIG_SUBFEEDS} = {subfeeds}') )
return 0 print(f"Inquisitor configured from {config_path}")
print(f" {CONFIG_DATA} = {data_path}")
print(f" {CONFIG_SOURCES} = {source_path}")
print(f" {CONFIG_CACHE} = {cache_path}")
print(f" {CONFIG_LOGFILE} = {log_file}")
print(f" {CONFIG_VERBOSE} = {is_verbose}")
print(f" {CONFIG_SUBFEEDS} = {subfeeds}")
return 0
def command_update(args): def command_update(args):
"""Fetch and store new items from the specified sources.""" """Fetch and store new items from the specified sources."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor update", prog="inquisitor update", description=command_update.__doc__, add_help=False
description=command_update.__doc__, )
add_help=False) parser.add_argument("source", nargs="*", help="Sources to update.")
parser.add_argument("source", args = parser.parse_args(args)
nargs="*",
help="Sources to update.")
args = parser.parse_args(args)
if len(args.source) == 0: if len(args.source) == 0:
parser.print_help() parser.print_help()
return 0 return 0
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
return -1 "Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
if not os.path.isdir(SOURCES_PATH): )
logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources") return -1
if not os.path.isdir(SOURCES_PATH):
logger.error(
"Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources"
)
# Update sources # Update sources
from inquisitor.sources import update_sources from inquisitor.sources import update_sources
update_sources(*args.source)
return 0 update_sources(*args.source)
return 0
def command_deactivate(args): def command_deactivate(args):
"""Deactivate all items in the specified dungeon cells.""" """Deactivate all items in the specified dungeon cells."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor deactivate", prog="inquisitor deactivate",
description=command_deactivate.__doc__, description=command_deactivate.__doc__,
add_help=False) add_help=False,
parser.add_argument("source", )
nargs="*", parser.add_argument("source", nargs="*", help="Cells to deactivate.")
help="Cells to deactivate.") parser.add_argument("--tag", help="Only deactivate items with this tag")
parser.add_argument("--tag", parser.add_argument(
help="Only deactivate items with this tag") "--title", help="Only deactivate items with titles containing this substring"
parser.add_argument("--title", )
help="Only deactivate items with titles containing this substring") args = parser.parse_args(args)
args = parser.parse_args(args)
if len(args.source) == 0: if len(args.source) == 0:
parser.print_help() parser.print_help()
return 0 return 0
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
return -1 "Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
# Deactivate all items in each source. # Deactivate all items in each source.
from inquisitor.loader import load_items from inquisitor.loader import load_items
for source_name in args.source:
path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(path):
logger.warning("'{}' is not an extant source".format(source_name))
count = 0
items, _ = load_items(source_name)
for item in items.values():
if args.tag and args.tag not in item['tags']:
continue
if args.title and args.title not in item['title']:
continue
if item['active']:
item['active'] = False
count += 1
logger.info("Deactivated {} items in '{}'".format(count, source_name))
return 0 for source_name in args.source:
path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(path):
logger.warning("'{}' is not an extant source".format(source_name))
count = 0
items, _ = load_items(source_name)
for item in items.values():
if args.tag and args.tag not in item["tags"]:
continue
if args.title and args.title not in item["title"]:
continue
if item["active"]:
item["active"] = False
count += 1
logger.info("Deactivated {} items in '{}'".format(count, source_name))
return 0
def command_add(args): def command_add(args):
"""Creates an item.""" """Creates an item."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor add", prog="inquisitor add", description=command_add.__doc__, add_help=False
description=command_add.__doc__, )
add_help=False) parser.add_argument("--id", help="String")
parser.add_argument("--id", help="String") parser.add_argument("--source", help="String")
parser.add_argument("--source", help="String") parser.add_argument("--title", help="String")
parser.add_argument("--title", help="String") parser.add_argument("--link", help="URL")
parser.add_argument("--link", help="URL") parser.add_argument("--time", type=int, help="Unix timestmap")
parser.add_argument("--time", type=int, help="Unix timestmap") parser.add_argument("--author", help="String")
parser.add_argument("--author", help="String") parser.add_argument("--body", help="HTML")
parser.add_argument("--body", help="HTML") parser.add_argument("--tags", help="Comma-separated list")
parser.add_argument("--tags", help="Comma-separated list") parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds")
parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds") parser.add_argument("--ttd", type=int, help="Cleanup force in seconds")
parser.add_argument("--ttd", type=int, help="Cleanup force in seconds") parser.add_argument("--tts", type=int, help="Display delay in seconds")
parser.add_argument("--tts", type=int, help="Display delay in seconds") parser.add_argument(
parser.add_argument("--create", action="store_true", help="Create source if it doesn't exist") "--create", action="store_true", help="Create source if it doesn't exist"
args = parser.parse_args(args) )
args = parser.parse_args(args)
if not args.title: if not args.title:
parser.print_help() parser.print_help()
return 0 return 0
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
return -1 "Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
source = args.source or 'inquisitor' source = args.source or "inquisitor"
cell_path = os.path.join(DUNGEON_PATH, source) cell_path = os.path.join(DUNGEON_PATH, source)
if args.create: if args.create:
from inquisitor.sources import ensure_cell from inquisitor.sources import ensure_cell
ensure_cell(source)
elif not os.path.isdir(cell_path):
logger.error("Source '{}' does not exist".format(source))
return -1
item = { ensure_cell(source)
'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)), elif not os.path.isdir(cell_path):
'source': source, logger.error("Source '{}' does not exist".format(source))
} return -1
if args.title: item['title'] = str(args.title)
if args.link: item['link'] = str(args.link)
if args.time: item['time'] = int(args.time)
if args.author: item['author'] = str(args.author)
if args.body: item['body'] = str(args.body)
if args.tags: item['tags'] = [str(tag) for tag in args.tags.split(",")]
if args.ttl: item['ttl'] = int(args.ttl)
if args.ttd: item['ttd'] = int(args.ttd)
if args.tts: item['tts'] = int(args.tts)
from inquisitor.loader import new_item item = {
saved_item = new_item(source, item) "id": args.id or "{:x}".format(random.getrandbits(16 * 4)),
logger.info(saved_item) "source": source,
}
if args.title:
item["title"] = str(args.title)
if args.link:
item["link"] = str(args.link)
if args.time:
item["time"] = int(args.time)
if args.author:
item["author"] = str(args.author)
if args.body:
item["body"] = str(args.body)
if args.tags:
item["tags"] = [str(tag) for tag in args.tags.split(",")]
if args.ttl:
item["ttl"] = int(args.ttl)
if args.ttd:
item["ttd"] = int(args.ttd)
if args.tts:
item["tts"] = int(args.tts)
from inquisitor.loader import new_item
saved_item = new_item(source, item)
logger.info(saved_item)
def command_feed(args): def command_feed(args):
"""Print the current feed.""" """Print the current feed."""
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
return -1 "Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
import shutil import shutil
from inquisitor import loader from inquisitor import loader
from inquisitor import timestamp from inquisitor import timestamp
items, errors = loader.load_active_items(source_names=None) items, errors = loader.load_active_items(source_names=None)
if not items and not errors: if not items and not errors:
print("Feed is empty") print("Feed is empty")
return 0 return 0
if errors: if errors:
items.insert(0, { items.insert(
'title': '{} read errors: {}'.format(len(errors), ' '.join(errors)), 0,
'body': "\n".join(errors) {
}) "title": "{} read errors: {}".format(len(errors), " ".join(errors)),
"body": "\n".join(errors),
},
)
size = shutil.get_terminal_size((80, 20)) size = shutil.get_terminal_size((80, 20))
width = min(80, size.columns) width = min(80, size.columns)
for item in items: for item in items:
title = item['title'] if 'title' in item else "" title = item["title"] if "title" in item else ""
titles = [title] titles = [title]
while len(titles[-1]) > width - 4: while len(titles[-1]) > width - 4:
i = titles[-1][:width - 4].rfind(' ') i = titles[-1][: width - 4].rfind(" ")
titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()] titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()]
print('+' + (width - 2) * '-' + '+') print("+" + (width - 2) * "-" + "+")
for title in titles: for title in titles:
print("| {0:<{1}} |".format(title, width - 4)) print("| {0:<{1}} |".format(title, width - 4))
print("|{0:<{1}}|".format("", width - 2)) print("|{0:<{1}}|".format("", width - 2))
info1 = "" info1 = ""
if 'author' in title and item['author']: if "author" in title and item["author"]:
info1 += item['author'] + " " info1 += item["author"] + " "
if 'time' in item and item['time']: if "time" in item and item["time"]:
info1 += timestamp.stamp_to_readable(item['time']) info1 += timestamp.stamp_to_readable(item["time"])
print("| {0:<{1}} |".format(info1, width - 4)) print("| {0:<{1}} |".format(info1, width - 4))
created = timestamp.stamp_to_readable(item['created']) if 'created' in item else "" created = (
info2 = "{0} {1} {2}".format( timestamp.stamp_to_readable(item["created"]) if "created" in item else ""
item.get('source', ''), item.get('id', ''), created) )
print("| {0:<{1}} |".format(info2, width - 4)) info2 = "{0} {1} {2}".format(
print('+' + (width - 2) * '-' + '+') item.get("source", ""), item.get("id", ""), created
print() )
print("| {0:<{1}} |".format(info2, width - 4))
print("+" + (width - 2) * "-" + "+")
print()
def command_run(args): def command_run(args):
"""Run the default Flask server.""" """Run the default Flask server."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor run", prog="inquisitor run", description=command_run.__doc__, add_help=False
description=command_run.__doc__, )
add_help=False) parser.add_argument("--debug", action="store_true")
parser.add_argument("--debug", action="store_true") parser.add_argument("--port", type=int, default=5000)
parser.add_argument("--port", type=int, default=5000) args = parser.parse_args(args)
args = parser.parse_args(args)
try: try:
from inquisitor.app import app from inquisitor.app import app
app.run(port=args.port, debug=args.debug)
return 0 app.run(port=args.port, debug=args.debug)
except Exception as e: return 0
logger.error(e) except Exception as e:
return -1 logger.error(e)
return -1
def command_help(args): def command_help(args):
"""Print this help message and exit.""" """Print this help message and exit."""
print_usage() print_usage()
return 0 return 0
def nocommand(args): def nocommand(args):
print("command required") print("command required")
return 0 return 0
def main(): def main():
"""CLI entry point""" """CLI entry point"""
# Enable piping # Enable piping
from signal import signal, SIGPIPE, SIG_DFL from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)
# Collect the commands from this module signal(SIGPIPE, SIG_DFL)
import inquisitor.cli
commands = {
name[8:] : func
for name, func in vars(inquisitor.cli).items()
if name.startswith('command_')
}
descriptions = "\n".join([
"- {0}: {1}".format(name, func.__doc__)
for name, func in commands.items()])
# Set up the parser # Collect the commands from this module
parser = argparse.ArgumentParser( import inquisitor.cli
description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False)
parser.add_argument("command",
nargs="?",
default="help",
help="The command to execute",
choices=commands,
metavar="command")
parser.add_argument("args",
nargs=argparse.REMAINDER,
help="Command arguments",
metavar="args")
parser.add_argument("-v",
action="store_true",
dest="verbose",
help="Enable debug logging")
# Extract the usage print for command_help commands = {
global print_usage name[8:]: func
print_usage = parser.print_help for name, func in vars(inquisitor.cli).items()
if name.startswith("command_")
}
descriptions = "\n".join(
["- {0}: {1}".format(name, func.__doc__) for name, func in commands.items()]
)
args = parser.parse_args() # Set up the parser
parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False,
)
parser.add_argument(
"command",
nargs="?",
default="help",
help="The command to execute",
choices=commands,
metavar="command",
)
parser.add_argument(
"args", nargs=argparse.REMAINDER, help="Command arguments", metavar="args"
)
parser.add_argument(
"-v", action="store_true", dest="verbose", help="Enable debug logging"
)
# Initialize a console logger # Extract the usage print for command_help
add_logging_handler(verbose=args.verbose, log_filename=None) global print_usage
print_usage = parser.print_help
# Execute command args = parser.parse_args()
try:
command = commands.get(args.command, nocommand) # Initialize a console logger
sys.exit(command(args.args)) add_logging_handler(verbose=args.verbose, log_filename=None)
except BrokenPipeError:
# See https://docs.python.org/3.10/library/signal.html#note-on-sigpipe # Execute command
devnull = os.open(os.devnull, os.O_WRONLY) try:
os.dup2(devnull, sys.stdout.fileno()) command = commands.get(args.command, nocommand)
sys.exit(1) sys.exit(command(args.args))
except BrokenPipeError:
# See https://docs.python.org/3.10/library/signal.html#note-on-sigpipe
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
sys.exit(1)

View File

@ -1,10 +1,5 @@
from .resolver import data_path as DUNGEON_PATH from .resolver import data_path as DUNGEON_PATH
from .resolver import source_path as SOURCES_PATH from .resolver import source_path as SOURCES_PATH
from .resolver import cache_path as CACHE_PATH from .resolver import cache_path as CACHE_PATH
from .resolver import ( from .resolver import logger, subfeeds
logger, from .resolver import add_logging_handler, init_default_logging, get_subfeed_overrides
subfeeds)
from .resolver import (
add_logging_handler,
init_default_logging,
get_subfeed_overrides)

View File

@ -4,174 +4,172 @@ import logging
# Constants governing config resolution: # Constants governing config resolution:
# Path to the config file, containing key-value pairs of the other settings # Path to the config file, containing key-value pairs of the other settings
CONFIG_ENVVAR = 'INQUISITOR_CONFIG' CONFIG_ENVVAR = "INQUISITOR_CONFIG"
DEFAULT_CONFIG_PATH = '/etc/inquisitor.conf' DEFAULT_CONFIG_PATH = "/etc/inquisitor.conf"
# Path to the folder where items are stored # Path to the folder where items are stored
CONFIG_DATA = 'DataPath' CONFIG_DATA = "DataPath"
DEFAULT_DATA_PATH = '/var/inquisitor/data/' DEFAULT_DATA_PATH = "/var/inquisitor/data/"
# Path to the folder where source modules are stored # Path to the folder where source modules are stored
CONFIG_SOURCES = 'SourcePath' CONFIG_SOURCES = "SourcePath"
DEFAULT_SOURCES_PATH = '/var/inquisitor/sources/' DEFAULT_SOURCES_PATH = "/var/inquisitor/sources/"
# Path to the folder where cached files are stored # Path to the folder where cached files are stored
CONFIG_CACHE = 'CachePath' CONFIG_CACHE = "CachePath"
DEFAULT_CACHE_PATH = '/var/inquisitor/cache/' DEFAULT_CACHE_PATH = "/var/inquisitor/cache/"
# Path to a log file where logging will be redirected # Path to a log file where logging will be redirected
CONFIG_LOGFILE = 'LogFile' CONFIG_LOGFILE = "LogFile"
DEFAULT_LOG_FILE = None DEFAULT_LOG_FILE = None
# Whether logging is verbose # Whether logging is verbose
CONFIG_VERBOSE = 'Verbose' CONFIG_VERBOSE = "Verbose"
DEFAULT_VERBOSITY = 'false' DEFAULT_VERBOSITY = "false"
# Subfeed source lists, with each subfeed config separated by lines and # Subfeed source lists, with each subfeed config separated by lines and
# sources within a subfeed separated by spaces # sources within a subfeed separated by spaces
CONFIG_SUBFEEDS = 'Subfeeds' CONFIG_SUBFEEDS = "Subfeeds"
DEFAULT_SUBFEEDS = None DEFAULT_SUBFEEDS = None
SUBFEED_CONFIG_FILE = 'subfeeds.conf' SUBFEED_CONFIG_FILE = "subfeeds.conf"
def read_config_file(config_path): def read_config_file(config_path):
""" """
Reads a config file of key-value pairs, where non-blank lines are Reads a config file of key-value pairs, where non-blank lines are
either comments beginning with the character '#' or keys and values either comments beginning with the character '#' or keys and values
separated by the character '='. separated by the character '='.
""" """
# Parse the config file into key-value pairs # Parse the config file into key-value pairs
if not os.path.isfile(config_path): if not os.path.isfile(config_path):
raise FileNotFoundError(f'No config file found at {config_path}, try setting {CONFIG_ENVVAR}') raise FileNotFoundError(
accumulated_configs = {} f"No config file found at {config_path}, try setting {CONFIG_ENVVAR}"
current_key = None )
with open(config_path, 'r', encoding='utf8') as cfg: accumulated_configs = {}
line_no = 0 current_key = None
for line in cfg: with open(config_path, "r", encoding="utf8") as cfg:
line_no += 1 line_no = 0
# Skip blank lines and comments for line in cfg:
if not line.strip() or line.lstrip().startswith('#'): line_no += 1
continue # Skip blank lines and comments
# Accumulate config keyvalue pairs if not line.strip() or line.lstrip().startswith("#"):
if '=' in line: continue
# "key = value" begins a new keyvalue pair # Accumulate config keyvalue pairs
current_key, value = line.split('=', maxsplit=1) if "=" in line:
current_key = current_key.strip() # "key = value" begins a new keyvalue pair
accumulated_configs[current_key] = value.strip() current_key, value = line.split("=", maxsplit=1)
else: current_key = current_key.strip()
# If there's no '=' and no previous key, throw accumulated_configs[current_key] = value.strip()
if not current_key: else:
raise ValueError(f'Invalid config format on line {line_no}') # If there's no '=' and no previous key, throw
else: if not current_key:
accumulated_configs[current_key] += '\n' + line.strip() raise ValueError(f"Invalid config format on line {line_no}")
else:
accumulated_configs[current_key] += "\n" + line.strip()
return accumulated_configs return accumulated_configs
def parse_subfeed_value(value): def parse_subfeed_value(value):
sf_defs = [sf.strip() for sf in value.split('\n') if sf.strip()] sf_defs = [sf.strip() for sf in value.split("\n") if sf.strip()]
subfeeds = {} subfeeds = {}
for sf_def in sf_defs: for sf_def in sf_defs:
if ':' not in sf_def: if ":" not in sf_def:
raise ValueError(f'Invalid subfeed definition: {sf_def}') raise ValueError(f"Invalid subfeed definition: {sf_def}")
sf_name, sf_sources = sf_def.split(':', maxsplit=1) sf_name, sf_sources = sf_def.split(":", maxsplit=1)
sf_sources = sf_sources.split() sf_sources = sf_sources.split()
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources] subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
return subfeeds return subfeeds
# Read envvar for config file location, with fallback to default # Read envvar for config file location, with fallback to default
config_path = os.path.abspath( config_path = os.path.abspath(os.environ.get(CONFIG_ENVVAR) or DEFAULT_CONFIG_PATH)
os.environ.get(CONFIG_ENVVAR) or
DEFAULT_CONFIG_PATH
)
configs = read_config_file(config_path) configs = read_config_file(config_path)
# Extract and validate config values # Extract and validate config values
data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH
if not os.path.isabs(data_path): if not os.path.isabs(data_path):
raise ValueError(f'Non-absolute data path: {data_path}') raise ValueError(f"Non-absolute data path: {data_path}")
if not os.path.isdir(data_path): if not os.path.isdir(data_path):
raise FileNotFoundError(f'Cannot find directory {data_path}') raise FileNotFoundError(f"Cannot find directory {data_path}")
source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH
if not os.path.isabs(source_path): if not os.path.isabs(source_path):
raise ValueError(f'Non-absolute source path: {source_path}') raise ValueError(f"Non-absolute source path: {source_path}")
if not os.path.isdir(source_path): if not os.path.isdir(source_path):
raise FileNotFoundError(f'Cannot find directory {source_path}') raise FileNotFoundError(f"Cannot find directory {source_path}")
cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH
if not os.path.isabs(cache_path): if not os.path.isabs(cache_path):
raise ValueError(f'Non-absolute cache path: {cache_path}') raise ValueError(f"Non-absolute cache path: {cache_path}")
if not os.path.isdir(cache_path): if not os.path.isdir(cache_path):
raise FileNotFoundError(f'Cannot find directory {cache_path}') raise FileNotFoundError(f"Cannot find directory {cache_path}")
log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE
if log_file and not os.path.isabs(log_file): if log_file and not os.path.isabs(log_file):
raise ValueError(f'Non-absolute log file path: {log_file}') raise ValueError(f"Non-absolute log file path: {log_file}")
is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY
if is_verbose != 'true' and is_verbose != 'false': if is_verbose != "true" and is_verbose != "false":
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}') raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
is_verbose = (is_verbose == 'true') is_verbose = is_verbose == "true"
subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS
if subfeeds: if subfeeds:
subfeeds = parse_subfeed_value(subfeeds) subfeeds = parse_subfeed_value(subfeeds)
def get_subfeed_overrides(): def get_subfeed_overrides():
""" """
Check for and parse the secondary subfeed configuration file Check for and parse the secondary subfeed configuration file
""" """
path = os.path.join(source_path, SUBFEED_CONFIG_FILE) path = os.path.join(source_path, SUBFEED_CONFIG_FILE)
if not os.path.isfile(path): if not os.path.isfile(path):
return None return None
overrides = read_config_file(path) overrides = read_config_file(path)
if CONFIG_SUBFEEDS not in overrides: if CONFIG_SUBFEEDS not in overrides:
return None return None
value = overrides[CONFIG_SUBFEEDS] value = overrides[CONFIG_SUBFEEDS]
if not value: if not value:
return None return None
parsed_value = parse_subfeed_value(value) parsed_value = parse_subfeed_value(value)
return parsed_value return parsed_value
# Set up logging # Set up logging
logger = logging.getLogger("inquisitor") logger = logging.getLogger("inquisitor")
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
def add_logging_handler(verbose, log_filename): def add_logging_handler(verbose, log_filename):
""" """
Adds a logging handler according to the given settings Adds a logging handler according to the given settings
""" """
log_format = ( log_format = (
'[{asctime}] [{levelname}:{filename}:{lineno}] {message}' "[{asctime}] [{levelname}:{filename}:{lineno}] {message}"
if verbose else if verbose
'[{levelname}] {message}' else "[{levelname}] {message}"
) )
formatter = logging.Formatter(log_format, style='{') formatter = logging.Formatter(log_format, style="{")
log_level = ( log_level = logging.DEBUG if verbose else logging.INFO
logging.DEBUG handler = (
if verbose else logging.handlers.RotatingFileHandler(
logging.INFO log_filename,
) encoding="utf8",
handler = ( maxBytes=2**22, # 4 MB per log file
logging.handlers.RotatingFileHandler( backupCount=4,
log_filename, ) # 16 MB total
encoding='utf8', if log_filename
maxBytes=2**22, # 4 MB per log file else logging.StreamHandler()
backupCount=4) # 16 MB total )
if log_filename else handler.setFormatter(formatter)
logging.StreamHandler() handler.setLevel(log_level)
)
handler.setFormatter(formatter) logger.addHandler(handler)
handler.setLevel(log_level)
logger.addHandler(handler)
def init_default_logging(): def init_default_logging():
add_logging_handler(is_verbose, log_file) add_logging_handler(is_verbose, log_file)

View File

@ -8,19 +8,20 @@ from inquisitor.configs import DUNGEON_PATH, logger
logger = logging.getLogger("inquisitor") logger = logging.getLogger("inquisitor")
def as_item(title, body=None): def as_item(title, body=None):
iid = '{:x}'.format(random.getrandbits(16 * 4)) iid = "{:x}".format(random.getrandbits(16 * 4))
item = { item = {
'id': iid, "id": iid,
'source': 'inquisitor', "source": "inquisitor",
'title': title, "title": title,
'active': True, "active": True,
'created': timestamp.now(), "created": timestamp.now(),
'tags': ['inquisitor', 'error'], "tags": ["inquisitor", "error"],
} }
if body is not None: if body is not None:
item['body'] = '<pre>{}</pre>'.format(body) item["body"] = "<pre>{}</pre>".format(body)
path = os.path.join(DUNGEON_PATH, 'inquisitor', iid + ".item") path = os.path.join(DUNGEON_PATH, "inquisitor", iid + ".item")
logger.error(json.dumps(item)) logger.error(json.dumps(item))
with open(path, 'w') as f: with open(path, "w") as f:
f.write(json.dumps(item, indent=2)) f.write(json.dumps(item, indent=2))

View File

@ -7,180 +7,180 @@ from inquisitor import error
from inquisitor import timestamp from inquisitor import timestamp
class WritethroughDict(): class WritethroughDict:
"""A wrapper for a dictionary saved to the file system.""" """A wrapper for a dictionary saved to the file system."""
@staticmethod @staticmethod
def create(path, item): def create(path, item):
""" """
Creates a writethrough dictionary from a dictionary in memory and Creates a writethrough dictionary from a dictionary in memory and
initializes a file to save it. initializes a file to save it.
""" """
if os.path.isfile(path): if os.path.isfile(path):
raise FileExistsError(path) raise FileExistsError(path)
wd = WritethroughDict(path, item) wd = WritethroughDict(path, item)
wd.flush() wd.flush()
return wd return wd
@staticmethod @staticmethod
def load(path): def load(path):
""" """
Creates a writethrough dictionary from an existing file in the Creates a writethrough dictionary from an existing file in the
file system. file system.
""" """
if not os.path.isfile(path): if not os.path.isfile(path):
raise FileNotFoundError(path) raise FileNotFoundError(path)
with open(path) as f: with open(path) as f:
item = json.load(f) item = json.load(f)
return WritethroughDict(path, item) return WritethroughDict(path, item)
def __init__(self, path, item): def __init__(self, path, item):
self.path = path self.path = path
self.item = item self.item = item
def __getitem__(self, key): def __getitem__(self, key):
return self.item[key] return self.item[key]
def get(self, *args, **kwargs): def get(self, *args, **kwargs):
return self.item.get(*args, **kwargs) return self.item.get(*args, **kwargs)
def __setitem__(self, key, value): def __setitem__(self, key, value):
self.item[key] = value self.item[key] = value
self.flush() self.flush()
def __contains__(self, key): def __contains__(self, key):
return key in self.item return key in self.item
def __repr__(self): def __repr__(self):
return repr(self.item) return repr(self.item)
def __str__(self): def __str__(self):
return str(self.item) return str(self.item)
def flush(self): def flush(self):
s = json.dumps(self.item, indent=2) s = json.dumps(self.item, indent=2)
with open(self.path, 'w', encoding="utf8") as f: with open(self.path, "w", encoding="utf8") as f:
f.write(s) f.write(s)
def load_state(source_name): def load_state(source_name):
"""Loads the state dictionary for a source.""" """Loads the state dictionary for a source."""
state_path = os.path.join(DUNGEON_PATH, source_name, "state") state_path = os.path.join(DUNGEON_PATH, source_name, "state")
return WritethroughDict.load(state_path) return WritethroughDict.load(state_path)
def load_item(source_name, item_id): def load_item(source_name, item_id):
"""Loads an item from a source.""" """Loads an item from a source."""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item') item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return WritethroughDict.load(item_path) return WritethroughDict.load(item_path)
def item_exists(source_name, item_id): def item_exists(source_name, item_id):
""" """
Checks for the existence of an item. Checks for the existence of an item.
""" """
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item') item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return os.path.isfile(item_path) return os.path.isfile(item_path)
def get_item_ids(cell_name): def get_item_ids(cell_name):
""" """
Returns a list of item ids in the given cell. Returns a list of item ids in the given cell.
""" """
cell_path = os.path.join(DUNGEON_PATH, cell_name) cell_path = os.path.join(DUNGEON_PATH, cell_name)
return [ return [
filename[:-5] filename[:-5]
for filename in os.listdir(cell_path) for filename in os.listdir(cell_path)
if filename.endswith('.item') if filename.endswith(".item")
] ]
def new_item(source_name, item): def new_item(source_name, item):
""" """
Creates a new item with the fields in the provided dictionary. Creates a new item with the fields in the provided dictionary.
Initializes other fields to their default values. Initializes other fields to their default values.
""" """
# id is required # id is required
if 'id' not in item: if "id" not in item:
raise Exception(f'Cannot create item with no id. Value = {item}') raise Exception(f"Cannot create item with no id. Value = {item}")
# source must be filled in, so if it is absent it is auto-populated with # source must be filled in, so if it is absent it is auto-populated with
# source_name. Note: this allows sources to fill in a different source. # source_name. Note: this allows sources to fill in a different source.
if 'source' not in item: if "source" not in item:
item['source'] = source_name item["source"] = source_name
# active is forced to True for new items # active is forced to True for new items
item['active'] = True item["active"] = True
# created is forced to the current timestamp # created is forced to the current timestamp
item['created'] = timestamp.now() item["created"] = timestamp.now()
# title is auto-populated with the id if missing # title is auto-populated with the id if missing
if 'title' not in item: if "title" not in item:
item['title'] = item['id'] item["title"] = item["id"]
# tags is auto-populated if missing (not if empty!) # tags is auto-populated if missing (not if empty!)
if 'tags' not in item: if "tags" not in item:
item['tags'] = [source_name] item["tags"] = [source_name]
# All other fields are optional. # All other fields are optional.
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item') item_path = os.path.join(DUNGEON_PATH, item["source"], f'{item["id"]}.item')
return WritethroughDict.create(item_path, item) return WritethroughDict.create(item_path, item)
def delete_item(source_name, item_id): def delete_item(source_name, item_id):
""" """
Delete an item. Delete an item.
""" """
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item') item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
os.remove(item_path) os.remove(item_path)
def load_items(source_name): def load_items(source_name):
""" """
Returns a map of ids to items and a list of unreadable files. Returns a map of ids to items and a list of unreadable files.
""" """
cell_path = os.path.join(DUNGEON_PATH, source_name) cell_path = os.path.join(DUNGEON_PATH, source_name)
items = {} items = {}
errors = [] errors = []
for filename in os.listdir(cell_path): for filename in os.listdir(cell_path):
if filename.endswith('.item'): if filename.endswith(".item"):
try: try:
item = load_item(source_name, filename[:-5]) item = load_item(source_name, filename[:-5])
items[item['id']] = item items[item["id"]] = item
except Exception: except Exception:
errors.append(filename) errors.append(filename)
return items, errors return items, errors
def load_active_items(source_names): def load_active_items(source_names):
""" """
Returns a list of active items and a list of unreadable items. If Returns a list of active items and a list of unreadable items. If
`source_names` is defined, load only from sources in that list. `source_names` is defined, load only from sources in that list.
""" """
items = [] items = []
errors = [] errors = []
now = timestamp.now() now = timestamp.now()
check_list = source_names or os.listdir(DUNGEON_PATH) check_list = source_names or os.listdir(DUNGEON_PATH)
for source_name in check_list: for source_name in check_list:
source_path = os.path.join(DUNGEON_PATH, source_name) source_path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(source_path): if not os.path.isdir(source_path):
logger.warning(f'Skipping nonexistent source {source_name}') logger.warning(f"Skipping nonexistent source {source_name}")
continue continue
for filename in os.listdir(source_path): for filename in os.listdir(source_path):
if not filename.endswith('.item'): if not filename.endswith(".item"):
continue continue
try: try:
item = load_item(source_name, filename[:-5]) item = load_item(source_name, filename[:-5])
# The time-to-show field hides items until an expiry date. # The time-to-show field hides items until an expiry date.
if 'tts' in item: if "tts" in item:
tts_date = item['created'] + item['tts'] tts_date = item["created"] + item["tts"]
if now < tts_date: if now < tts_date:
continue continue
# Don't show inactive items # Don't show inactive items
if not item['active']: if not item["active"]:
continue continue
items.append(item) items.append(item)
except Exception: except Exception:
errors.append(filename) errors.append(filename)
return items, errors return items, errors

View File

@ -10,229 +10,234 @@ from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
USE_NEWEST = ( USE_NEWEST = (
'title', "title",
'tags', "tags",
'link', "link",
'time' "time" "author",
'author', "body",
'body', "ttl",
'ttl', "ttd",
'ttd', "tts",
'tts',
) )
class InquisitorStubSource: class InquisitorStubSource:
"""A dummy source-like object for clearing out ad-hoc inquisitor items""" """A dummy source-like object for clearing out ad-hoc inquisitor items"""
def fetch_new(self, state):
return [] def fetch_new(self, state):
return []
def ensure_cell(name): def ensure_cell(name):
""" """
Creates a cell in the dungeon. Idempotent. Creates a cell in the dungeon. Idempotent.
""" """
cell_path = os.path.join(DUNGEON_PATH, name) cell_path = os.path.join(DUNGEON_PATH, name)
if not os.path.isdir(cell_path): if not os.path.isdir(cell_path):
logger.info(f'Creating cell for source "{name}"') logger.info(f'Creating cell for source "{name}"')
os.mkdir(cell_path) os.mkdir(cell_path)
state_path = os.path.join(cell_path, 'state') state_path = os.path.join(cell_path, "state")
if not os.path.isfile(state_path): if not os.path.isfile(state_path):
with open(state_path, 'w', encoding='utf8') as state: with open(state_path, "w", encoding="utf8") as state:
json.dump({}, state) json.dump({}, state)
def update_sources(*source_names): def update_sources(*source_names):
""" """
Attempts to update each given source. Attempts to update each given source.
""" """
for source_name in source_names: for source_name in source_names:
# Import the source # Import the source
try: try:
source_module = load_source(source_name) source_module = load_source(source_name)
except Exception: except Exception:
error.as_item( error.as_item(
f'Error importing source "{source_name}"', f'Error importing source "{source_name}"', traceback.format_exc()
traceback.format_exc()) )
continue continue
# If it doesn't have a cell yet, create one # If it doesn't have a cell yet, create one
try: try:
ensure_cell(source_name) ensure_cell(source_name)
except Exception: except Exception:
error.as_item( error.as_item(
f'Error initializing source "{source_name}"', f'Error initializing source "{source_name}"', traceback.format_exc()
traceback.format_exc()) )
continue continue
# Update the source # Update the source
try: try:
logger.info(f'Updating source "{source_name}"') logger.info(f'Updating source "{source_name}"')
update_source(source_name, source_module) update_source(source_name, source_module)
except Exception: except Exception:
error.as_item( error.as_item(
f'Error updating source "{source_name}"', f'Error updating source "{source_name}"', traceback.format_exc()
traceback.format_exc()) )
def load_source(source_name): def load_source(source_name):
""" """
Attempts to load the source module with the given name. Attempts to load the source module with the given name.
Raises an exception on failure. Raises an exception on failure.
""" """
if source_name == 'inquisitor': if source_name == "inquisitor":
return InquisitorStubSource() return InquisitorStubSource()
cwd = os.getcwd() cwd = os.getcwd()
try: try:
# Push the sources directory. # Push the sources directory.
os.chdir(SOURCES_PATH) os.chdir(SOURCES_PATH)
# Make the sources directory importable while working with sources. # Make the sources directory importable while working with sources.
if SOURCES_PATH not in sys.path: if SOURCES_PATH not in sys.path:
sys.path.insert(0, SOURCES_PATH) sys.path.insert(0, SOURCES_PATH)
# Check if the named source is present. # Check if the named source is present.
source_file_name = source_name + '.py' source_file_name = source_name + ".py"
if not os.path.isfile(source_file_name): if not os.path.isfile(source_file_name):
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"') raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
# Import the source module by file path. # Import the source module by file path.
logger.debug(f'Loading module "{source_file_name}"') logger.debug(f'Loading module "{source_file_name}"')
spec = importlib.util.spec_from_file_location(source_name, source_file_name) spec = importlib.util.spec_from_file_location(source_name, source_file_name)
itemsource = importlib.util.module_from_spec(spec) itemsource = importlib.util.module_from_spec(spec)
spec.loader.exec_module(itemsource) spec.loader.exec_module(itemsource)
itemsource = importlib.import_module(source_name) itemsource = importlib.import_module(source_name)
# Require fetch_new(). # Require fetch_new().
if not hasattr(itemsource, 'fetch_new'): if not hasattr(itemsource, "fetch_new"):
raise ImportError(f'Missing fetch_new in "{source_file_name}"') raise ImportError(f'Missing fetch_new in "{source_file_name}"')
return itemsource return itemsource
finally: finally:
os.chdir(cwd) os.chdir(cwd)
if SOURCES_PATH in sys.path: if SOURCES_PATH in sys.path:
sys.path.remove(SOURCES_PATH) sys.path.remove(SOURCES_PATH)
def update_source(source_name, source): def update_source(source_name, source):
""" """
Attempts to update the given source. Raises an exception if the source does. Attempts to update the given source. Raises an exception if the source does.
""" """
# Get a list of item ids that already existed in this source's cell. # Get a list of item ids that already existed in this source's cell.
prior_ids = loader.get_item_ids(source_name) prior_ids = loader.get_item_ids(source_name)
logger.debug(f'Found {len(prior_ids)} prior items') logger.debug(f"Found {len(prior_ids)} prior items")
# Get the feed items from the source's fetch method. # Get the feed items from the source's fetch method.
state = loader.load_state(source_name) state = loader.load_state(source_name)
fetched = source.fetch_new(state) fetched = source.fetch_new(state)
state.flush() state.flush()
logger.debug(f'Fetched {len(fetched)} items') logger.debug(f"Fetched {len(fetched)} items")
fetched_items = {item['id']: item for item in fetched} fetched_items = {item["id"]: item for item in fetched}
# Determine which items are new and which are updates. # Determine which items are new and which are updates.
# We query the file system here instead of checking against this source's # We query the file system here instead of checking against this source's
# item ids from above because sources are allowed to generate in other # item ids from above because sources are allowed to generate in other
# sources' cells. # sources' cells.
new_items = [] new_items = []
updated_items = [] updated_items = []
for item in fetched: for item in fetched:
item_source = item.get('source', source_name) item_source = item.get("source", source_name)
if loader.item_exists(item_source, item['id']): if loader.item_exists(item_source, item["id"]):
updated_items.append(item) updated_items.append(item)
else: else:
new_items.append(item) new_items.append(item)
# Write all the new items to the source's cell. # Write all the new items to the source's cell.
has_create_handler = hasattr(source, 'on_create') has_create_handler = hasattr(source, "on_create")
for item in new_items: for item in new_items:
item_source = item.get('source', source_name) item_source = item.get("source", source_name)
created_item = loader.new_item(item_source, item) created_item = loader.new_item(item_source, item)
if has_create_handler: if has_create_handler:
# Because some sources do not return items more than once, # Because some sources do not return items more than once,
# exceptions in the on-create handler must be squashed. # exceptions in the on-create handler must be squashed.
try: try:
source.on_create(state, created_item) source.on_create(state, created_item)
except: except:
error.as_item( error.as_item(
f'Exception in {source_name}.on_create', f"Exception in {source_name}.on_create", traceback.format_exc()
traceback.format_exc()) )
# Update the other items using the fetched items' values. # Update the other items using the fetched items' values.
for new_item in updated_items: for new_item in updated_items:
old_item = loader.load_item(new_item['source'], new_item['id']) old_item = loader.load_item(new_item["source"], new_item["id"])
for field in USE_NEWEST: for field in USE_NEWEST:
if field in new_item and old_item[field] != new_item[field]: if field in new_item and old_item[field] != new_item[field]:
old_item[field] = new_item[field] old_item[field] = new_item[field]
if 'callback' in new_item: if "callback" in new_item:
old_callback = old_item.get('callback', {}) old_callback = old_item.get("callback", {})
# Because of the way this update happens, any fields that are set # Because of the way this update happens, any fields that are set
# in the callback when the item is new will keep their original # in the callback when the item is new will keep their original
# values, as those values reappear in new_item on subsequent # values, as those values reappear in new_item on subsequent
# updates. # updates.
old_item['callback'] = {**old_item['callback'], **new_item['callback']} old_item["callback"] = {**old_item["callback"], **new_item["callback"]}
# In general, items are removed when they are old (not found in the last # In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior. # fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0 del_count = 0
now = timestamp.now() now = timestamp.now()
has_delete_handler = hasattr(source, 'on_delete') has_delete_handler = hasattr(source, "on_delete")
fetched_ids = [item['id'] for item in updated_items] fetched_ids = [item["id"] for item in updated_items]
old_item_ids = [ old_item_ids = [item_id for item_id in prior_ids if item_id not in fetched_ids]
item_id for item_id in prior_ids for item_id in old_item_ids:
if item_id not in fetched_ids] item = loader.load_item(source_name, item_id)
for item_id in old_item_ids: remove = not item["active"]
item = loader.load_item(source_name, item_id) # The time-to-live field protects an item from removal until expiry.
remove = not item['active'] # This is mainly used to avoid old items resurfacing when their source
# The time-to-live field protects an item from removal until expiry. # cannot guarantee monotonicity.
# This is mainly used to avoid old items resurfacing when their source if "ttl" in item:
# cannot guarantee monotonicity. ttl_date = item["created"] + item["ttl"]
if 'ttl' in item: if ttl_date > now:
ttl_date = item['created'] + item['ttl'] continue
if ttl_date > now: # The time-to-die field can force an active item to be removed.
continue if "ttd" in item:
# The time-to-die field can force an active item to be removed. ttd_date = item["created"] + item["ttd"]
if 'ttd' in item: if ttd_date < now:
ttd_date = item['created'] + item['ttd'] remove = True
if ttd_date < now: # Items to be removed are deleted
remove = True if remove:
# Items to be removed are deleted try:
if remove: if has_delete_handler:
try: # Run the delete handler so exceptions prevent deletions
if has_delete_handler: source.on_delete(state, item)
# Run the delete handler so exceptions prevent deletions loader.delete_item(source_name, item["id"])
source.on_delete(state, item) del_count += 1
loader.delete_item(source_name, item['id']) except:
del_count += 1 error.as_item(
except: f'Failed to delete {source_name}/{item["id"]}',
error.as_item( traceback.format_exc(),
f'Failed to delete {source_name}/{item["id"]}', )
traceback.format_exc())
# Note update timestamp in state # Note update timestamp in state
state['last_updated'] = timestamp.now() state["last_updated"] = timestamp.now()
# Log counts # Log counts
logger.info("{} new item{}, {} deleted item{}".format( logger.info(
len(new_items), "s" if len(new_items) != 1 else "", "{} new item{}, {} deleted item{}".format(
del_count, "s" if del_count != 1 else "")) len(new_items),
"s" if len(new_items) != 1 else "",
del_count,
"s" if del_count != 1 else "",
)
)
def item_callback(source_name, itemid): def item_callback(source_name, itemid):
try: try:
# Load the module with the callback function # Load the module with the callback function
source_module = load_source(source_name) source_module = load_source(source_name)
if not hasattr(source_module, 'callback'): if not hasattr(source_module, "callback"):
raise ImportError(f"Missing callback in '{source_name}'") raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item # Load the source state and the origin item
state = loader.load_state(source_name) state = loader.load_state(source_name)
item = loader.load_item(source_name, itemid) item = loader.load_item(source_name, itemid)
# Execute callback # Execute callback
source_module.callback(state, item) source_module.callback(state, item)
# Save any changes # Save any changes
item.flush() item.flush()
state.flush() state.flush()
except Exception: except Exception:
error.as_item( error.as_item(
f"Error executing callback for {source_name}/{itemid}", f"Error executing callback for {source_name}/{itemid}",
traceback.format_exc()) traceback.format_exc(),
)

View File

@ -17,212 +17,221 @@ import requests
# Module imports # Module imports
from inquisitor import CACHE_PATH from inquisitor import CACHE_PATH
logger = logging.getLogger('inquisitor.templates') logger = logging.getLogger("inquisitor.templates")
def cache_image(source, url, filename): def cache_image(source, url, filename):
# Define some paths # Define some paths
path = os.path.join(CACHE_PATH, source) path = os.path.join(CACHE_PATH, source)
file_path = os.path.join(path, filename) file_path = os.path.join(path, filename)
cached_url = f'/cache/{source}/{filename}' cached_url = f"/cache/{source}/{filename}"
# Ensure cache folder # Ensure cache folder
if not os.path.isdir(path): if not os.path.isdir(path):
os.mkdir(path) os.mkdir(path)
# Fetch url # Fetch url
logger.info(f'Caching {url} to {file_path}') logger.info(f"Caching {url} to {file_path}")
response = requests.get(url) response = requests.get(url)
# Write file to disk # Write file to disk
with open(file_path, 'wb') as f: with open(file_path, "wb") as f:
f.write(response.content) f.write(response.content)
# Return the inquisitor path to the file # Return the inquisitor path to the file
return cached_url return cached_url
class LinearCrawler: class LinearCrawler:
""" """
An engine for generating items from web sources that link content An engine for generating items from web sources that link content
together in a linear fashion, such as webcomics. together in a linear fashion, such as webcomics.
""" """
def fetch_new(self, state):
items = []
max_iter = self.max_iterations() - 1
new = self.try_fetch(state)
items.extend(new)
for iter in range(max_iter):
sleep(1)
# If we've already gotten some items out of this fetch, we don't
# want to lose them and have the state still be set to the next
# page, so we wrap further calls in a try block and force return
# if we hit an error.
try:
new = self.try_fetch(state)
except:
new = []
items.extend(new)
# Cut out early if there was nothing returned
if not new:
break
return items
def try_fetch(self, state): def fetch_new(self, state):
# Check for whether a new page should be crawled items = []
if 'current_page' not in state: max_iter = self.max_iterations() - 1
next_page = self.get_start_url() new = self.try_fetch(state)
else: items.extend(new)
current = state['current_page'] for iter in range(max_iter):
response = requests.get(current) sleep(1)
soup = BeautifulSoup(response.text, features='html.parser') # If we've already gotten some items out of this fetch, we don't
next_page = self.get_next_page_url(current, soup) # want to lose them and have the state still be set to the next
if not next_page: # page, so we wrap further calls in a try block and force return
return [] # nothing new # if we hit an error.
try:
new = self.try_fetch(state)
except:
new = []
items.extend(new)
# Cut out early if there was nothing returned
if not new:
break
return items
# Download the new page def try_fetch(self, state):
logger.info('Fetching ' + next_page) # Check for whether a new page should be crawled
response = requests.get(next_page) if "current_page" not in state:
soup = BeautifulSoup(response.text, features="html.parser") next_page = self.get_start_url()
else:
current = state["current_page"]
response = requests.get(current)
soup = BeautifulSoup(response.text, features="html.parser")
next_page = self.get_next_page_url(current, soup)
if not next_page:
return [] # nothing new
# Create an item from the page # Download the new page
item = self.make_item(next_page, soup) logger.info("Fetching " + next_page)
response = requests.get(next_page)
soup = BeautifulSoup(response.text, features="html.parser")
# Update the state and return the item # Create an item from the page
state['current_page'] = next_page item = self.make_item(next_page, soup)
return [item]
def max_iterations(self): # Update the state and return the item
return 3 state["current_page"] = next_page
return [item]
def get_start_url(self): def max_iterations(self):
raise NotImplementedError('get_start_url is required') return 3
def get_next_page_url(self, url, soup): def get_start_url(self):
raise NotImplementedError('get_next_page_url is required') raise NotImplementedError("get_start_url is required")
def make_item(self, url, soup): def get_next_page_url(self, url, soup):
raise NotImplementedError('make_item is required') raise NotImplementedError("get_next_page_url is required")
def make_item(self, url, soup):
raise NotImplementedError("make_item is required")
class RedditScraper: class RedditScraper:
""" """
An engine for generating items from subreddits. An engine for generating items from subreddits.
Requires defining source, subreddit_name Requires defining source, subreddit_name
fetch new with RedditScraper.fetch_new(state, __name__, reddit) fetch new with RedditScraper.fetch_new(state, __name__, reddit)
""" """
@staticmethod
def fetch_new(state, name, reddit):
items = []
for name, obj in inspect.getmembers(sys.modules[name]):
if (inspect.isclass(obj)
and issubclass(obj, RedditScraper)
and obj is not RedditScraper
):
sub_items = obj(reddit).get_items()
items.extend(sub_items)
return items
def __init__(self, reddit): @staticmethod
self.reddit = reddit def fetch_new(state, name, reddit):
items = []
for name, obj in inspect.getmembers(sys.modules[name]):
if (
inspect.isclass(obj)
and issubclass(obj, RedditScraper)
and obj is not RedditScraper
):
sub_items = obj(reddit).get_items()
items.extend(sub_items)
return items
def get_items(self): def __init__(self, reddit):
sub_name = self.subreddit_name self.reddit = reddit
logger.info(f'Fetching posts from r/{sub_name}')
subreddit = self.reddit.subreddit(sub_name)
posts = self.subreddit_page(subreddit)
items = []
for post in posts:
if self.filter_post(post):
items.append(self.item_from_post(post))
return items
def item_from_post(self, post): def get_items(self):
item = { sub_name = self.subreddit_name
'source': self.source, logger.info(f"Fetching posts from r/{sub_name}")
'id': post.id, subreddit = self.reddit.subreddit(sub_name)
'title': self.get_title(post), posts = self.subreddit_page(subreddit)
'link': self.get_link(post), items = []
'time': post.created_utc, for post in posts:
'author': '/u/' + (post.author.name if post.author else "[deleted]"), if self.filter_post(post):
'body': self.get_body(post), items.append(self.item_from_post(post))
'tags': self.get_tags(post), return items
'ttl': self.get_ttl(post),
}
ttl = self.get_ttl(post)
if ttl is not None: item['ttl'] = ttl
ttd = self.get_ttd(post)
if ttd is not None: item['ttd'] = ttd
tts = self.get_tts(post)
if tts is not None: item['tts'] = tts
callback = self.get_callback(post)
if callback is not None: item['callback'] = callback
return item
def subreddit_page(self, subreddit): def item_from_post(self, post):
return subreddit.hot(limit=25) item = {
"source": self.source,
"id": post.id,
"title": self.get_title(post),
"link": self.get_link(post),
"time": post.created_utc,
"author": "/u/" + (post.author.name if post.author else "[deleted]"),
"body": self.get_body(post),
"tags": self.get_tags(post),
"ttl": self.get_ttl(post),
}
ttl = self.get_ttl(post)
if ttl is not None:
item["ttl"] = ttl
ttd = self.get_ttd(post)
if ttd is not None:
item["ttd"] = ttd
tts = self.get_tts(post)
if tts is not None:
item["tts"] = tts
callback = self.get_callback(post)
if callback is not None:
item["callback"] = callback
return item
def filter_post(self, post): def subreddit_page(self, subreddit):
return True return subreddit.hot(limit=25)
def get_title(self, post): def filter_post(self, post):
s = '[S] ' if post.spoiler else '' return True
nsfw = '[NSFW] ' if post.over_18 else ''
return f'{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}'
def get_link(self, post): def get_title(self, post):
return f'https://reddit.com{post.permalink}' s = "[S] " if post.spoiler else ""
nsfw = "[NSFW] " if post.over_18 else ""
return f"{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}"
def get_body(self, post): def get_link(self, post):
parts = [] return f"https://reddit.com{post.permalink}"
if not post.is_self:
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
if hasattr(post, 'preview'):
try:
previews = post.preview['images'][0]['resolutions']
small_previews = [p for p in previews if p['width'] < 800]
preview = sorted(small_previews, key=lambda p:-p['width'])[0]
parts.append(f'<img src="{preview["url"]}">')
except:
pass
if getattr(post, 'is_gallery', False):
try:
for gallery_item in post.gallery_data['items']:
media_id = gallery_item['media_id']
metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata['p'] if p['x'] < 800]
preview = sorted(small_previews, key=lambda p:-p['x'])[0]
parts.append(f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>')
parts.append(f'<img src="{preview["u"]}">')
except:
pass
if post.selftext:
limit = post.selftext[1024:].find(' ')
preview_body = post.selftext[:1024 + limit]
if len(preview_body) < len(post.selftext):
preview_body += '[...]'
parts.append(f'<p>{preview_body}</p>')
return '<br><hr>'.join(parts)
def get_tags(self, post): def get_body(self, post):
tags = ['reddit', post.subreddit_name_prefixed[2:]] parts = []
if post.over_18: if not post.is_self:
tags.append('nsfw') parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
return tags if hasattr(post, "preview"):
try:
previews = post.preview["images"][0]["resolutions"]
small_previews = [p for p in previews if p["width"] < 800]
preview = sorted(small_previews, key=lambda p: -p["width"])[0]
parts.append(f'<img src="{preview["url"]}">')
except:
pass
if getattr(post, "is_gallery", False):
try:
for gallery_item in post.gallery_data["items"]:
media_id = gallery_item["media_id"]
metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata["p"] if p["x"] < 800]
preview = sorted(small_previews, key=lambda p: -p["x"])[0]
parts.append(
f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>'
)
parts.append(f'<img src="{preview["u"]}">')
except:
pass
if post.selftext:
limit = post.selftext[1024:].find(" ")
preview_body = post.selftext[: 1024 + limit]
if len(preview_body) < len(post.selftext):
preview_body += "[...]"
parts.append(f"<p>{preview_body}</p>")
return "<br><hr>".join(parts)
def get_ttl(self, post): def get_tags(self, post):
return 60 * 60 * 24 * 7 # 1 week tags = ["reddit", post.subreddit_name_prefixed[2:]]
if post.over_18:
tags.append("nsfw")
return tags
def get_ttd(self, post): def get_ttl(self, post):
return None return 60 * 60 * 24 * 7 # 1 week
def get_tts(self, post): def get_ttd(self, post):
return None return None
def get_callback(self, post): def get_tts(self, post):
return None return None
def callback(self, state, item): def get_callback(self, post):
raise NotImplementedError('callback') return None
def on_create(self, state, item): def callback(self, state, item):
raise NotImplementedError('on_create') raise NotImplementedError("callback")
def on_delete(self, state, item): def on_create(self, state, item):
raise NotImplementedError('on_delete') raise NotImplementedError("on_create")
def on_delete(self, state, item):
raise NotImplementedError("on_delete")

View File

@ -1,9 +1,11 @@
import time import time
import datetime import datetime
def now(): def now():
return int(time.time()) return int(time.time())
def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"): def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"):
dt = datetime.datetime.fromtimestamp(ts) dt = datetime.datetime.fromtimestamp(ts)
return dt.strftime(formatstr) return dt.strftime(formatstr)