Initial black linting pass

This commit is contained in:
Tim Van Baak 2022-12-28 12:44:48 -08:00
parent 52e32917d5
commit 42544bd30b
11 changed files with 1099 additions and 1022 deletions

View File

@ -1,2 +1,3 @@
from cli import main from cli import main
main() main()

View File

@ -14,7 +14,8 @@ from inquisitor.configs import (
subfeeds, subfeeds,
get_subfeed_overrides, get_subfeed_overrides,
logger, logger,
init_default_logging) init_default_logging,
)
from inquisitor import sources, loader, timestamp from inquisitor import sources, loader, timestamp
# Globals # Globals
@ -28,18 +29,22 @@ def make_query_link(text, wl, bl):
query = "?{}".format("&".join(params)) query = "?{}".format("&".join(params))
return '<a href="{1}">{0}</a>'.format(text, query) return '<a href="{1}">{0}</a>'.format(text, query)
@app.template_filter("datetimeformat") @app.template_filter("datetimeformat")
def datetimeformat(value): def datetimeformat(value):
return timestamp.stamp_to_readable(value) if value is not None else "" return timestamp.stamp_to_readable(value) if value is not None else ""
@app.route("/") @app.route("/")
def root(): def root():
return redirect(url_for('feed')) return redirect(url_for("feed"))
@app.route("/feed/") @app.route("/feed/")
def feed(): def feed():
return feed_for_sources(source_names=None) return feed_for_sources(source_names=None)
@app.route("/feed/<string:feed_name>/") @app.route("/feed/<string:feed_name>/")
def subfeed(feed_name): def subfeed(feed_name):
# Check for and apply subfeed overrides # Check for and apply subfeed overrides
@ -47,7 +52,7 @@ def subfeed(feed_name):
subfeed_config = subfeed_overrides or subfeeds or {} subfeed_config = subfeed_overrides or subfeeds or {}
# The built-in inquisitor subfeed contains sources not in another subfeed # The built-in inquisitor subfeed contains sources not in another subfeed
if feed_name == 'inquisitor': if feed_name == "inquisitor":
all_sources = os.listdir(DUNGEON_PATH) all_sources = os.listdir(DUNGEON_PATH)
for subfeed, sources in subfeed_config.items(): for subfeed, sources in subfeed_config.items():
for source_name in sources: for source_name in sources:
@ -59,17 +64,18 @@ def subfeed(feed_name):
return abort(404) return abort(404)
return feed_for_sources(subfeed_config[feed_name]) return feed_for_sources(subfeed_config[feed_name])
def feed_for_sources(source_names): def feed_for_sources(source_names):
# Determine exclusion filters # Determine exclusion filters
filters = [] filters = []
wl_param = request.args.get('only') wl_param = request.args.get("only")
wl = wl_param.split(",") if wl_param else [] wl = wl_param.split(",") if wl_param else []
bl_param = request.args.get('not') bl_param = request.args.get("not")
bl = bl_param.split(",") if bl_param else [] bl = bl_param.split(",") if bl_param else []
if wl: if wl:
filters.append(lambda item: not any([tag in wl for tag in item['tags']])) filters.append(lambda item: not any([tag in wl for tag in item["tags"]]))
if bl: if bl:
filters.append(lambda item: any([tag in bl for tag in item['tags']])) filters.append(lambda item: any([tag in bl for tag in item["tags"]]))
# Get all active+filtered items and all active tags # Get all active+filtered items and all active tags
total = 0 total = 0
@ -77,30 +83,40 @@ def feed_for_sources(source_names):
active_items = [] active_items = []
active_tags = {} active_tags = {}
for item in items: for item in items:
if item['active']: if item["active"]:
for tag in item['tags']: for tag in item["tags"]:
if tag not in active_tags: active_tags[tag] = 0 if tag not in active_tags:
active_tags[tag] = 0
active_tags[tag] += 1 active_tags[tag] += 1
# active_tags |= set(item['tags']) # active_tags |= set(item['tags'])
total += 1 total += 1
if not any(map(lambda f: f(item), filters)): if not any(map(lambda f: f(item), filters)):
active_items.append(item) active_items.append(item)
# Sort items by time # Sort items by time
active_items.sort(key=lambda i: i['time'] if 'time' in i and i['time'] else i['created'] if 'created' in i and i['created'] else 0) active_items.sort(
key=lambda i: i["time"]
if "time" in i and i["time"]
else i["created"]
if "created" in i and i["created"]
else 0
)
logger.info("Returning {} of {} items".format(len(active_items), total)) logger.info("Returning {} of {} items".format(len(active_items), total))
if errors: if errors:
read_ex = { read_ex = {
'title': 'Read errors', "title": "Read errors",
'body': "<pre>{}</pre>".format("\n\n".join(errors)), "body": "<pre>{}</pre>".format("\n\n".join(errors)),
'created': None, "created": None,
} }
active_items.insert(0, read_ex) active_items.insert(0, read_ex)
if total > 0: if total > 0:
# Create the feed control item # Create the feed control item
link_table = ["<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format( link_table = [
total, make_query_link("all", [], []))] "<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
total, make_query_link("all", [], [])
)
]
for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()): for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()):
links = [count] links = [count]
links.append(make_query_link(tag, [tag], [])) links.append(make_query_link(tag, [tag], []))
@ -116,76 +132,90 @@ def feed_for_sources(source_names):
else: else:
new_wl = [t for t in wl if t != tag] new_wl = [t for t in wl if t != tag]
links.append(make_query_link("+not", new_wl, bl + [tag])) links.append(make_query_link("+not", new_wl, bl + [tag]))
link_table.append("<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(*links)) link_table.append(
"<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(
*links
)
)
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table)) body = '<table class="feed-control">{}</table>'.format("\n".join(link_table))
feed_control = { feed_control = {
'title': 'Feed Control [{}/{}]'.format(len(active_items), total), "title": "Feed Control [{}/{}]".format(len(active_items), total),
'body': body, "body": body,
} }
active_items.insert(0, feed_control) active_items.insert(0, feed_control)
selection = active_items[:100] selection = active_items[:100]
return render_template("feed.jinja2", return render_template(
"feed.jinja2",
items=selection, items=selection,
mdeac=[ mdeac=[
{'source': item['source'], 'itemid': item['id']} {"source": item["source"], "itemid": item["id"]}
for item in selection for item in selection
if 'id' in item]) if "id" in item
],
)
@app.route("/deactivate/", methods=['POST'])
@app.route("/deactivate/", methods=["POST"])
def deactivate(): def deactivate():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid']) item = loader.load_item(params["source"], params["itemid"])
if item['active']: if item["active"]:
logger.debug(f"Deactivating {params['source']}/{params['itemid']}") logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
item['active'] = False item["active"] = False
return jsonify({'active': item['active']}) return jsonify({"active": item["active"]})
@app.route("/punt/", methods=['POST'])
@app.route("/punt/", methods=["POST"])
def punt(): def punt():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid']) item = loader.load_item(params["source"], params["itemid"])
tomorrow = datetime.now() + timedelta(days=1) tomorrow = datetime.now() + timedelta(days=1)
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0) morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
til_then = morning.timestamp() - item['created'] til_then = morning.timestamp() - item["created"]
item['tts'] = til_then item["tts"] = til_then
return jsonify(item.item) return jsonify(item.item)
@app.route("/mass-deactivate/", methods=['POST'])
@app.route("/mass-deactivate/", methods=["POST"])
def mass_deactivate(): def mass_deactivate():
params = request.get_json() params = request.get_json()
if 'items' not in params: if "items" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
for info in params.get('items', []): for info in params.get("items", []):
source = info['source'] source = info["source"]
itemid = info['itemid'] itemid = info["itemid"]
item = loader.load_item(source, itemid) item = loader.load_item(source, itemid)
if item['active']: if item["active"]:
logger.debug(f"Deactivating {info['source']}/{info['itemid']}") logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
item['active'] = False item["active"] = False
return jsonify({}) return jsonify({})
@app.route("/callback/", methods=['POST'])
@app.route("/callback/", methods=["POST"])
def callback(): def callback():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
logger.info('Executing callback for {}/{}'.format(params['source'], params['itemid'])) logger.info(
sources.item_callback(params['source'], params['itemid']) "Executing callback for {}/{}".format(params["source"], params["itemid"])
)
sources.item_callback(params["source"], params["itemid"])
return jsonify({}) return jsonify({})
@app.route('/cache/<path:cache_path>')
@app.route("/cache/<path:cache_path>")
def cache(cache_path): def cache(cache_path):
path = os.path.join(CACHE_PATH, cache_path) path = os.path.join(CACHE_PATH, cache_path)
if not os.path.isfile(path): if not os.path.isfile(path):
return abort(404) return abort(404)
with open(path, 'rb') as f: with open(path, "rb") as f:
return f.read() return f.read()

View File

@ -14,53 +14,62 @@ def command_test(args):
"""Echo config file values.""" """Echo config file values."""
from inquisitor.configs.resolver import ( from inquisitor.configs.resolver import (
config_path, config_path,
CONFIG_DATA, data_path, CONFIG_DATA,
CONFIG_SOURCES, source_path, data_path,
CONFIG_CACHE, cache_path, CONFIG_SOURCES,
CONFIG_LOGFILE, log_file, source_path,
CONFIG_VERBOSE, is_verbose, CONFIG_CACHE,
CONFIG_SUBFEEDS, subfeeds, cache_path,
CONFIG_LOGFILE,
log_file,
CONFIG_VERBOSE,
is_verbose,
CONFIG_SUBFEEDS,
subfeeds,
) )
subfeeds = '; '.join(
'{0}: {1}'.format( subfeeds = (
sf_name, "; ".join(
' '.join(sf_sources) "{0}: {1}".format(sf_name, " ".join(sf_sources))
for sf_name, sf_sources in subfeeds.items()
) )
for sf_name, sf_sources if subfeeds
in subfeeds.items() else ""
) if subfeeds else '' )
print(f'Inquisitor configured from {config_path}') print(f"Inquisitor configured from {config_path}")
print(f' {CONFIG_DATA} = {data_path}') print(f" {CONFIG_DATA} = {data_path}")
print(f' {CONFIG_SOURCES} = {source_path}') print(f" {CONFIG_SOURCES} = {source_path}")
print(f' {CONFIG_CACHE} = {cache_path}') print(f" {CONFIG_CACHE} = {cache_path}")
print(f' {CONFIG_LOGFILE} = {log_file}') print(f" {CONFIG_LOGFILE} = {log_file}")
print(f' {CONFIG_VERBOSE} = {is_verbose}') print(f" {CONFIG_VERBOSE} = {is_verbose}")
print(f' {CONFIG_SUBFEEDS} = {subfeeds}') print(f" {CONFIG_SUBFEEDS} = {subfeeds}")
return 0 return 0
def command_update(args): def command_update(args):
"""Fetch and store new items from the specified sources.""" """Fetch and store new items from the specified sources."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor update", prog="inquisitor update", description=command_update.__doc__, add_help=False
description=command_update.__doc__, )
add_help=False) parser.add_argument("source", nargs="*", help="Sources to update.")
parser.add_argument("source",
nargs="*",
help="Sources to update.")
args = parser.parse_args(args) args = parser.parse_args(args)
if len(args.source) == 0: if len(args.source) == 0:
parser.print_help() parser.print_help()
return 0 return 0
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1 return -1
if not os.path.isdir(SOURCES_PATH): if not os.path.isdir(SOURCES_PATH):
logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources") logger.error(
"Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources"
)
# Update sources # Update sources
from inquisitor.sources import update_sources from inquisitor.sources import update_sources
update_sources(*args.source) update_sources(*args.source)
return 0 return 0
@ -70,25 +79,27 @@ def command_deactivate(args):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor deactivate", prog="inquisitor deactivate",
description=command_deactivate.__doc__, description=command_deactivate.__doc__,
add_help=False) add_help=False,
parser.add_argument("source", )
nargs="*", parser.add_argument("source", nargs="*", help="Cells to deactivate.")
help="Cells to deactivate.") parser.add_argument("--tag", help="Only deactivate items with this tag")
parser.add_argument("--tag", parser.add_argument(
help="Only deactivate items with this tag") "--title", help="Only deactivate items with titles containing this substring"
parser.add_argument("--title", )
help="Only deactivate items with titles containing this substring")
args = parser.parse_args(args) args = parser.parse_args(args)
if len(args.source) == 0: if len(args.source) == 0:
parser.print_help() parser.print_help()
return 0 return 0
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1 return -1
# Deactivate all items in each source. # Deactivate all items in each source.
from inquisitor.loader import load_items from inquisitor.loader import load_items
for source_name in args.source: for source_name in args.source:
path = os.path.join(DUNGEON_PATH, source_name) path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(path): if not os.path.isdir(path):
@ -96,12 +107,12 @@ def command_deactivate(args):
count = 0 count = 0
items, _ = load_items(source_name) items, _ = load_items(source_name)
for item in items.values(): for item in items.values():
if args.tag and args.tag not in item['tags']: if args.tag and args.tag not in item["tags"]:
continue continue
if args.title and args.title not in item['title']: if args.title and args.title not in item["title"]:
continue continue
if item['active']: if item["active"]:
item['active'] = False item["active"] = False
count += 1 count += 1
logger.info("Deactivated {} items in '{}'".format(count, source_name)) logger.info("Deactivated {} items in '{}'".format(count, source_name))
@ -111,9 +122,8 @@ def command_deactivate(args):
def command_add(args): def command_add(args):
"""Creates an item.""" """Creates an item."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor add", prog="inquisitor add", description=command_add.__doc__, add_help=False
description=command_add.__doc__, )
add_help=False)
parser.add_argument("--id", help="String") parser.add_argument("--id", help="String")
parser.add_argument("--source", help="String") parser.add_argument("--source", help="String")
parser.add_argument("--title", help="String") parser.add_argument("--title", help="String")
@ -125,40 +135,55 @@ def command_add(args):
parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds") parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds")
parser.add_argument("--ttd", type=int, help="Cleanup force in seconds") parser.add_argument("--ttd", type=int, help="Cleanup force in seconds")
parser.add_argument("--tts", type=int, help="Display delay in seconds") parser.add_argument("--tts", type=int, help="Display delay in seconds")
parser.add_argument("--create", action="store_true", help="Create source if it doesn't exist") parser.add_argument(
"--create", action="store_true", help="Create source if it doesn't exist"
)
args = parser.parse_args(args) args = parser.parse_args(args)
if not args.title: if not args.title:
parser.print_help() parser.print_help()
return 0 return 0
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1 return -1
source = args.source or 'inquisitor' source = args.source or "inquisitor"
cell_path = os.path.join(DUNGEON_PATH, source) cell_path = os.path.join(DUNGEON_PATH, source)
if args.create: if args.create:
from inquisitor.sources import ensure_cell from inquisitor.sources import ensure_cell
ensure_cell(source) ensure_cell(source)
elif not os.path.isdir(cell_path): elif not os.path.isdir(cell_path):
logger.error("Source '{}' does not exist".format(source)) logger.error("Source '{}' does not exist".format(source))
return -1 return -1
item = { item = {
'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)), "id": args.id or "{:x}".format(random.getrandbits(16 * 4)),
'source': source, "source": source,
} }
if args.title: item['title'] = str(args.title) if args.title:
if args.link: item['link'] = str(args.link) item["title"] = str(args.title)
if args.time: item['time'] = int(args.time) if args.link:
if args.author: item['author'] = str(args.author) item["link"] = str(args.link)
if args.body: item['body'] = str(args.body) if args.time:
if args.tags: item['tags'] = [str(tag) for tag in args.tags.split(",")] item["time"] = int(args.time)
if args.ttl: item['ttl'] = int(args.ttl) if args.author:
if args.ttd: item['ttd'] = int(args.ttd) item["author"] = str(args.author)
if args.tts: item['tts'] = int(args.tts) if args.body:
item["body"] = str(args.body)
if args.tags:
item["tags"] = [str(tag) for tag in args.tags.split(",")]
if args.ttl:
item["ttl"] = int(args.ttl)
if args.ttd:
item["ttd"] = int(args.ttd)
if args.tts:
item["tts"] = int(args.tts)
from inquisitor.loader import new_item from inquisitor.loader import new_item
saved_item = new_item(source, item) saved_item = new_item(source, item)
logger.info(saved_item) logger.info(saved_item)
@ -166,7 +191,9 @@ def command_add(args):
def command_feed(args): def command_feed(args):
"""Print the current feed.""" """Print the current feed."""
if not os.path.isdir(DUNGEON_PATH): if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon") logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1 return -1
import shutil import shutil
@ -179,50 +206,56 @@ def command_feed(args):
return 0 return 0
if errors: if errors:
items.insert(0, { items.insert(
'title': '{} read errors: {}'.format(len(errors), ' '.join(errors)), 0,
'body': "\n".join(errors) {
}) "title": "{} read errors: {}".format(len(errors), " ".join(errors)),
"body": "\n".join(errors),
},
)
size = shutil.get_terminal_size((80, 20)) size = shutil.get_terminal_size((80, 20))
width = min(80, size.columns) width = min(80, size.columns)
for item in items: for item in items:
title = item['title'] if 'title' in item else "" title = item["title"] if "title" in item else ""
titles = [title] titles = [title]
while len(titles[-1]) > width - 4: while len(titles[-1]) > width - 4:
i = titles[-1][:width - 4].rfind(' ') i = titles[-1][: width - 4].rfind(" ")
titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()] titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()]
print('+' + (width - 2) * '-' + '+') print("+" + (width - 2) * "-" + "+")
for title in titles: for title in titles:
print("| {0:<{1}} |".format(title, width - 4)) print("| {0:<{1}} |".format(title, width - 4))
print("|{0:<{1}}|".format("", width - 2)) print("|{0:<{1}}|".format("", width - 2))
info1 = "" info1 = ""
if 'author' in title and item['author']: if "author" in title and item["author"]:
info1 += item['author'] + " " info1 += item["author"] + " "
if 'time' in item and item['time']: if "time" in item and item["time"]:
info1 += timestamp.stamp_to_readable(item['time']) info1 += timestamp.stamp_to_readable(item["time"])
print("| {0:<{1}} |".format(info1, width - 4)) print("| {0:<{1}} |".format(info1, width - 4))
created = timestamp.stamp_to_readable(item['created']) if 'created' in item else "" created = (
timestamp.stamp_to_readable(item["created"]) if "created" in item else ""
)
info2 = "{0} {1} {2}".format( info2 = "{0} {1} {2}".format(
item.get('source', ''), item.get('id', ''), created) item.get("source", ""), item.get("id", ""), created
)
print("| {0:<{1}} |".format(info2, width - 4)) print("| {0:<{1}} |".format(info2, width - 4))
print('+' + (width - 2) * '-' + '+') print("+" + (width - 2) * "-" + "+")
print() print()
def command_run(args): def command_run(args):
"""Run the default Flask server.""" """Run the default Flask server."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="inquisitor run", prog="inquisitor run", description=command_run.__doc__, add_help=False
description=command_run.__doc__, )
add_help=False)
parser.add_argument("--debug", action="store_true") parser.add_argument("--debug", action="store_true")
parser.add_argument("--port", type=int, default=5000) parser.add_argument("--port", type=int, default=5000)
args = parser.parse_args(args) args = parser.parse_args(args)
try: try:
from inquisitor.app import app from inquisitor.app import app
app.run(port=args.port, debug=args.debug) app.run(port=args.port, debug=args.debug)
return 0 return 0
except Exception as e: except Exception as e:
@ -245,38 +278,41 @@ def main():
"""CLI entry point""" """CLI entry point"""
# Enable piping # Enable piping
from signal import signal, SIGPIPE, SIG_DFL from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL) signal(SIGPIPE, SIG_DFL)
# Collect the commands from this module # Collect the commands from this module
import inquisitor.cli import inquisitor.cli
commands = { commands = {
name[8:]: func name[8:]: func
for name, func in vars(inquisitor.cli).items() for name, func in vars(inquisitor.cli).items()
if name.startswith('command_') if name.startswith("command_")
} }
descriptions = "\n".join([ descriptions = "\n".join(
"- {0}: {1}".format(name, func.__doc__) ["- {0}: {1}".format(name, func.__doc__) for name, func in commands.items()]
for name, func in commands.items()]) )
# Set up the parser # Set up the parser
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(descriptions), description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False) add_help=False,
parser.add_argument("command", )
parser.add_argument(
"command",
nargs="?", nargs="?",
default="help", default="help",
help="The command to execute", help="The command to execute",
choices=commands, choices=commands,
metavar="command") metavar="command",
parser.add_argument("args", )
nargs=argparse.REMAINDER, parser.add_argument(
help="Command arguments", "args", nargs=argparse.REMAINDER, help="Command arguments", metavar="args"
metavar="args") )
parser.add_argument("-v", parser.add_argument(
action="store_true", "-v", action="store_true", dest="verbose", help="Enable debug logging"
dest="verbose", )
help="Enable debug logging")
# Extract the usage print for command_help # Extract the usage print for command_help
global print_usage global print_usage

View File

@ -1,10 +1,5 @@
from .resolver import data_path as DUNGEON_PATH from .resolver import data_path as DUNGEON_PATH
from .resolver import source_path as SOURCES_PATH from .resolver import source_path as SOURCES_PATH
from .resolver import cache_path as CACHE_PATH from .resolver import cache_path as CACHE_PATH
from .resolver import ( from .resolver import logger, subfeeds
logger, from .resolver import add_logging_handler, init_default_logging, get_subfeed_overrides
subfeeds)
from .resolver import (
add_logging_handler,
init_default_logging,
get_subfeed_overrides)

View File

@ -4,34 +4,34 @@ import logging
# Constants governing config resolution: # Constants governing config resolution:
# Path to the config file, containing key-value pairs of the other settings # Path to the config file, containing key-value pairs of the other settings
CONFIG_ENVVAR = 'INQUISITOR_CONFIG' CONFIG_ENVVAR = "INQUISITOR_CONFIG"
DEFAULT_CONFIG_PATH = '/etc/inquisitor.conf' DEFAULT_CONFIG_PATH = "/etc/inquisitor.conf"
# Path to the folder where items are stored # Path to the folder where items are stored
CONFIG_DATA = 'DataPath' CONFIG_DATA = "DataPath"
DEFAULT_DATA_PATH = '/var/inquisitor/data/' DEFAULT_DATA_PATH = "/var/inquisitor/data/"
# Path to the folder where source modules are stored # Path to the folder where source modules are stored
CONFIG_SOURCES = 'SourcePath' CONFIG_SOURCES = "SourcePath"
DEFAULT_SOURCES_PATH = '/var/inquisitor/sources/' DEFAULT_SOURCES_PATH = "/var/inquisitor/sources/"
# Path to the folder where cached files are stored # Path to the folder where cached files are stored
CONFIG_CACHE = 'CachePath' CONFIG_CACHE = "CachePath"
DEFAULT_CACHE_PATH = '/var/inquisitor/cache/' DEFAULT_CACHE_PATH = "/var/inquisitor/cache/"
# Path to a log file where logging will be redirected # Path to a log file where logging will be redirected
CONFIG_LOGFILE = 'LogFile' CONFIG_LOGFILE = "LogFile"
DEFAULT_LOG_FILE = None DEFAULT_LOG_FILE = None
# Whether logging is verbose # Whether logging is verbose
CONFIG_VERBOSE = 'Verbose' CONFIG_VERBOSE = "Verbose"
DEFAULT_VERBOSITY = 'false' DEFAULT_VERBOSITY = "false"
# Subfeed source lists, with each subfeed config separated by lines and # Subfeed source lists, with each subfeed config separated by lines and
# sources within a subfeed separated by spaces # sources within a subfeed separated by spaces
CONFIG_SUBFEEDS = 'Subfeeds' CONFIG_SUBFEEDS = "Subfeeds"
DEFAULT_SUBFEEDS = None DEFAULT_SUBFEEDS = None
SUBFEED_CONFIG_FILE = 'subfeeds.conf' SUBFEED_CONFIG_FILE = "subfeeds.conf"
def read_config_file(config_path): def read_config_file(config_path):
@ -43,79 +43,78 @@ def read_config_file(config_path):
# Parse the config file into key-value pairs # Parse the config file into key-value pairs
if not os.path.isfile(config_path): if not os.path.isfile(config_path):
raise FileNotFoundError(f'No config file found at {config_path}, try setting {CONFIG_ENVVAR}') raise FileNotFoundError(
f"No config file found at {config_path}, try setting {CONFIG_ENVVAR}"
)
accumulated_configs = {} accumulated_configs = {}
current_key = None current_key = None
with open(config_path, 'r', encoding='utf8') as cfg: with open(config_path, "r", encoding="utf8") as cfg:
line_no = 0 line_no = 0
for line in cfg: for line in cfg:
line_no += 1 line_no += 1
# Skip blank lines and comments # Skip blank lines and comments
if not line.strip() or line.lstrip().startswith('#'): if not line.strip() or line.lstrip().startswith("#"):
continue continue
# Accumulate config keyvalue pairs # Accumulate config keyvalue pairs
if '=' in line: if "=" in line:
# "key = value" begins a new keyvalue pair # "key = value" begins a new keyvalue pair
current_key, value = line.split('=', maxsplit=1) current_key, value = line.split("=", maxsplit=1)
current_key = current_key.strip() current_key = current_key.strip()
accumulated_configs[current_key] = value.strip() accumulated_configs[current_key] = value.strip()
else: else:
# If there's no '=' and no previous key, throw # If there's no '=' and no previous key, throw
if not current_key: if not current_key:
raise ValueError(f'Invalid config format on line {line_no}') raise ValueError(f"Invalid config format on line {line_no}")
else: else:
accumulated_configs[current_key] += '\n' + line.strip() accumulated_configs[current_key] += "\n" + line.strip()
return accumulated_configs return accumulated_configs
def parse_subfeed_value(value): def parse_subfeed_value(value):
sf_defs = [sf.strip() for sf in value.split('\n') if sf.strip()] sf_defs = [sf.strip() for sf in value.split("\n") if sf.strip()]
subfeeds = {} subfeeds = {}
for sf_def in sf_defs: for sf_def in sf_defs:
if ':' not in sf_def: if ":" not in sf_def:
raise ValueError(f'Invalid subfeed definition: {sf_def}') raise ValueError(f"Invalid subfeed definition: {sf_def}")
sf_name, sf_sources = sf_def.split(':', maxsplit=1) sf_name, sf_sources = sf_def.split(":", maxsplit=1)
sf_sources = sf_sources.split() sf_sources = sf_sources.split()
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources] subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
return subfeeds return subfeeds
# Read envvar for config file location, with fallback to default # Read envvar for config file location, with fallback to default
config_path = os.path.abspath( config_path = os.path.abspath(os.environ.get(CONFIG_ENVVAR) or DEFAULT_CONFIG_PATH)
os.environ.get(CONFIG_ENVVAR) or
DEFAULT_CONFIG_PATH
)
configs = read_config_file(config_path) configs = read_config_file(config_path)
# Extract and validate config values # Extract and validate config values
data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH
if not os.path.isabs(data_path): if not os.path.isabs(data_path):
raise ValueError(f'Non-absolute data path: {data_path}') raise ValueError(f"Non-absolute data path: {data_path}")
if not os.path.isdir(data_path): if not os.path.isdir(data_path):
raise FileNotFoundError(f'Cannot find directory {data_path}') raise FileNotFoundError(f"Cannot find directory {data_path}")
source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH
if not os.path.isabs(source_path): if not os.path.isabs(source_path):
raise ValueError(f'Non-absolute source path: {source_path}') raise ValueError(f"Non-absolute source path: {source_path}")
if not os.path.isdir(source_path): if not os.path.isdir(source_path):
raise FileNotFoundError(f'Cannot find directory {source_path}') raise FileNotFoundError(f"Cannot find directory {source_path}")
cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH
if not os.path.isabs(cache_path): if not os.path.isabs(cache_path):
raise ValueError(f'Non-absolute cache path: {cache_path}') raise ValueError(f"Non-absolute cache path: {cache_path}")
if not os.path.isdir(cache_path): if not os.path.isdir(cache_path):
raise FileNotFoundError(f'Cannot find directory {cache_path}') raise FileNotFoundError(f"Cannot find directory {cache_path}")
log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE
if log_file and not os.path.isabs(log_file): if log_file and not os.path.isabs(log_file):
raise ValueError(f'Non-absolute log file path: {log_file}') raise ValueError(f"Non-absolute log file path: {log_file}")
is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY
if is_verbose != 'true' and is_verbose != 'false': if is_verbose != "true" and is_verbose != "false":
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}') raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
is_verbose = (is_verbose == 'true') is_verbose = is_verbose == "true"
subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS
if subfeeds: if subfeeds:
@ -143,35 +142,34 @@ def get_subfeed_overrides():
logger = logging.getLogger("inquisitor") logger = logging.getLogger("inquisitor")
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
def add_logging_handler(verbose, log_filename): def add_logging_handler(verbose, log_filename):
""" """
Adds a logging handler according to the given settings Adds a logging handler according to the given settings
""" """
log_format = ( log_format = (
'[{asctime}] [{levelname}:{filename}:{lineno}] {message}' "[{asctime}] [{levelname}:{filename}:{lineno}] {message}"
if verbose else if verbose
'[{levelname}] {message}' else "[{levelname}] {message}"
) )
formatter = logging.Formatter(log_format, style='{') formatter = logging.Formatter(log_format, style="{")
log_level = ( log_level = logging.DEBUG if verbose else logging.INFO
logging.DEBUG
if verbose else
logging.INFO
)
handler = ( handler = (
logging.handlers.RotatingFileHandler( logging.handlers.RotatingFileHandler(
log_filename, log_filename,
encoding='utf8', encoding="utf8",
maxBytes=2**22, # 4 MB per log file maxBytes=2**22, # 4 MB per log file
backupCount=4) # 16 MB total backupCount=4,
if log_filename else ) # 16 MB total
logging.StreamHandler() if log_filename
else logging.StreamHandler()
) )
handler.setFormatter(formatter) handler.setFormatter(formatter)
handler.setLevel(log_level) handler.setLevel(log_level)
logger.addHandler(handler) logger.addHandler(handler)
def init_default_logging(): def init_default_logging():
add_logging_handler(is_verbose, log_file) add_logging_handler(is_verbose, log_file)

View File

@ -8,19 +8,20 @@ from inquisitor.configs import DUNGEON_PATH, logger
logger = logging.getLogger("inquisitor") logger = logging.getLogger("inquisitor")
def as_item(title, body=None): def as_item(title, body=None):
iid = '{:x}'.format(random.getrandbits(16 * 4)) iid = "{:x}".format(random.getrandbits(16 * 4))
item = { item = {
'id': iid, "id": iid,
'source': 'inquisitor', "source": "inquisitor",
'title': title, "title": title,
'active': True, "active": True,
'created': timestamp.now(), "created": timestamp.now(),
'tags': ['inquisitor', 'error'], "tags": ["inquisitor", "error"],
} }
if body is not None: if body is not None:
item['body'] = '<pre>{}</pre>'.format(body) item["body"] = "<pre>{}</pre>".format(body)
path = os.path.join(DUNGEON_PATH, 'inquisitor', iid + ".item") path = os.path.join(DUNGEON_PATH, "inquisitor", iid + ".item")
logger.error(json.dumps(item)) logger.error(json.dumps(item))
with open(path, 'w') as f: with open(path, "w") as f:
f.write(json.dumps(item, indent=2)) f.write(json.dumps(item, indent=2))

View File

@ -7,7 +7,7 @@ from inquisitor import error
from inquisitor import timestamp from inquisitor import timestamp
class WritethroughDict(): class WritethroughDict:
"""A wrapper for a dictionary saved to the file system.""" """A wrapper for a dictionary saved to the file system."""
@staticmethod @staticmethod
@ -59,7 +59,7 @@ class WritethroughDict():
def flush(self): def flush(self):
s = json.dumps(self.item, indent=2) s = json.dumps(self.item, indent=2)
with open(self.path, 'w', encoding="utf8") as f: with open(self.path, "w", encoding="utf8") as f:
f.write(s) f.write(s)
@ -71,7 +71,7 @@ def load_state(source_name):
def load_item(source_name, item_id): def load_item(source_name, item_id):
"""Loads an item from a source.""" """Loads an item from a source."""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item') item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return WritethroughDict.load(item_path) return WritethroughDict.load(item_path)
@ -79,7 +79,7 @@ def item_exists(source_name, item_id):
""" """
Checks for the existence of an item. Checks for the existence of an item.
""" """
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item') item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return os.path.isfile(item_path) return os.path.isfile(item_path)
@ -91,7 +91,7 @@ def get_item_ids(cell_name):
return [ return [
filename[:-5] filename[:-5]
for filename in os.listdir(cell_path) for filename in os.listdir(cell_path)
if filename.endswith('.item') if filename.endswith(".item")
] ]
@ -101,30 +101,30 @@ def new_item(source_name, item):
Initializes other fields to their default values. Initializes other fields to their default values.
""" """
# id is required # id is required
if 'id' not in item: if "id" not in item:
raise Exception(f'Cannot create item with no id. Value = {item}') raise Exception(f"Cannot create item with no id. Value = {item}")
# source must be filled in, so if it is absent it is auto-populated with # source must be filled in, so if it is absent it is auto-populated with
# source_name. Note: this allows sources to fill in a different source. # source_name. Note: this allows sources to fill in a different source.
if 'source' not in item: if "source" not in item:
item['source'] = source_name item["source"] = source_name
# active is forced to True for new items # active is forced to True for new items
item['active'] = True item["active"] = True
# created is forced to the current timestamp # created is forced to the current timestamp
item['created'] = timestamp.now() item["created"] = timestamp.now()
# title is auto-populated with the id if missing # title is auto-populated with the id if missing
if 'title' not in item: if "title" not in item:
item['title'] = item['id'] item["title"] = item["id"]
# tags is auto-populated if missing (not if empty!) # tags is auto-populated if missing (not if empty!)
if 'tags' not in item: if "tags" not in item:
item['tags'] = [source_name] item["tags"] = [source_name]
# All other fields are optional. # All other fields are optional.
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item') item_path = os.path.join(DUNGEON_PATH, item["source"], f'{item["id"]}.item')
return WritethroughDict.create(item_path, item) return WritethroughDict.create(item_path, item)
@ -132,7 +132,7 @@ def delete_item(source_name, item_id):
""" """
Delete an item. Delete an item.
""" """
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item') item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
os.remove(item_path) os.remove(item_path)
@ -144,10 +144,10 @@ def load_items(source_name):
items = {} items = {}
errors = [] errors = []
for filename in os.listdir(cell_path): for filename in os.listdir(cell_path):
if filename.endswith('.item'): if filename.endswith(".item"):
try: try:
item = load_item(source_name, filename[:-5]) item = load_item(source_name, filename[:-5])
items[item['id']] = item items[item["id"]] = item
except Exception: except Exception:
errors.append(filename) errors.append(filename)
return items, errors return items, errors
@ -165,20 +165,20 @@ def load_active_items(source_names):
for source_name in check_list: for source_name in check_list:
source_path = os.path.join(DUNGEON_PATH, source_name) source_path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(source_path): if not os.path.isdir(source_path):
logger.warning(f'Skipping nonexistent source {source_name}') logger.warning(f"Skipping nonexistent source {source_name}")
continue continue
for filename in os.listdir(source_path): for filename in os.listdir(source_path):
if not filename.endswith('.item'): if not filename.endswith(".item"):
continue continue
try: try:
item = load_item(source_name, filename[:-5]) item = load_item(source_name, filename[:-5])
# The time-to-show field hides items until an expiry date. # The time-to-show field hides items until an expiry date.
if 'tts' in item: if "tts" in item:
tts_date = item['created'] + item['tts'] tts_date = item["created"] + item["tts"]
if now < tts_date: if now < tts_date:
continue continue
# Don't show inactive items # Don't show inactive items
if not item['active']: if not item["active"]:
continue continue
items.append(item) items.append(item)
except Exception: except Exception:

View File

@ -10,20 +10,20 @@ from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
USE_NEWEST = ( USE_NEWEST = (
'title', "title",
'tags', "tags",
'link', "link",
'time' "time" "author",
'author', "body",
'body', "ttl",
'ttl', "ttd",
'ttd', "tts",
'tts',
) )
class InquisitorStubSource: class InquisitorStubSource:
"""A dummy source-like object for clearing out ad-hoc inquisitor items""" """A dummy source-like object for clearing out ad-hoc inquisitor items"""
def fetch_new(self, state): def fetch_new(self, state):
return [] return []
@ -36,9 +36,9 @@ def ensure_cell(name):
if not os.path.isdir(cell_path): if not os.path.isdir(cell_path):
logger.info(f'Creating cell for source "{name}"') logger.info(f'Creating cell for source "{name}"')
os.mkdir(cell_path) os.mkdir(cell_path)
state_path = os.path.join(cell_path, 'state') state_path = os.path.join(cell_path, "state")
if not os.path.isfile(state_path): if not os.path.isfile(state_path):
with open(state_path, 'w', encoding='utf8') as state: with open(state_path, "w", encoding="utf8") as state:
json.dump({}, state) json.dump({}, state)
@ -52,8 +52,8 @@ def update_sources(*source_names):
source_module = load_source(source_name) source_module = load_source(source_name)
except Exception: except Exception:
error.as_item( error.as_item(
f'Error importing source "{source_name}"', f'Error importing source "{source_name}"', traceback.format_exc()
traceback.format_exc()) )
continue continue
# If it doesn't have a cell yet, create one # If it doesn't have a cell yet, create one
@ -61,8 +61,8 @@ def update_sources(*source_names):
ensure_cell(source_name) ensure_cell(source_name)
except Exception: except Exception:
error.as_item( error.as_item(
f'Error initializing source "{source_name}"', f'Error initializing source "{source_name}"', traceback.format_exc()
traceback.format_exc()) )
continue continue
# Update the source # Update the source
@ -71,8 +71,8 @@ def update_sources(*source_names):
update_source(source_name, source_module) update_source(source_name, source_module)
except Exception: except Exception:
error.as_item( error.as_item(
f'Error updating source "{source_name}"', f'Error updating source "{source_name}"', traceback.format_exc()
traceback.format_exc()) )
def load_source(source_name): def load_source(source_name):
@ -80,7 +80,7 @@ def load_source(source_name):
Attempts to load the source module with the given name. Attempts to load the source module with the given name.
Raises an exception on failure. Raises an exception on failure.
""" """
if source_name == 'inquisitor': if source_name == "inquisitor":
return InquisitorStubSource() return InquisitorStubSource()
cwd = os.getcwd() cwd = os.getcwd()
@ -92,7 +92,7 @@ def load_source(source_name):
sys.path.insert(0, SOURCES_PATH) sys.path.insert(0, SOURCES_PATH)
# Check if the named source is present. # Check if the named source is present.
source_file_name = source_name + '.py' source_file_name = source_name + ".py"
if not os.path.isfile(source_file_name): if not os.path.isfile(source_file_name):
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"') raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
@ -104,7 +104,7 @@ def load_source(source_name):
itemsource = importlib.import_module(source_name) itemsource = importlib.import_module(source_name)
# Require fetch_new(). # Require fetch_new().
if not hasattr(itemsource, 'fetch_new'): if not hasattr(itemsource, "fetch_new"):
raise ImportError(f'Missing fetch_new in "{source_file_name}"') raise ImportError(f'Missing fetch_new in "{source_file_name}"')
return itemsource return itemsource
@ -121,14 +121,14 @@ def update_source(source_name, source):
""" """
# Get a list of item ids that already existed in this source's cell. # Get a list of item ids that already existed in this source's cell.
prior_ids = loader.get_item_ids(source_name) prior_ids = loader.get_item_ids(source_name)
logger.debug(f'Found {len(prior_ids)} prior items') logger.debug(f"Found {len(prior_ids)} prior items")
# Get the feed items from the source's fetch method. # Get the feed items from the source's fetch method.
state = loader.load_state(source_name) state = loader.load_state(source_name)
fetched = source.fetch_new(state) fetched = source.fetch_new(state)
state.flush() state.flush()
logger.debug(f'Fetched {len(fetched)} items') logger.debug(f"Fetched {len(fetched)} items")
fetched_items = {item['id']: item for item in fetched} fetched_items = {item["id"]: item for item in fetched}
# Determine which items are new and which are updates. # Determine which items are new and which are updates.
# We query the file system here instead of checking against this source's # We query the file system here instead of checking against this source's
@ -137,16 +137,16 @@ def update_source(source_name, source):
new_items = [] new_items = []
updated_items = [] updated_items = []
for item in fetched: for item in fetched:
item_source = item.get('source', source_name) item_source = item.get("source", source_name)
if loader.item_exists(item_source, item['id']): if loader.item_exists(item_source, item["id"]):
updated_items.append(item) updated_items.append(item)
else: else:
new_items.append(item) new_items.append(item)
# Write all the new items to the source's cell. # Write all the new items to the source's cell.
has_create_handler = hasattr(source, 'on_create') has_create_handler = hasattr(source, "on_create")
for item in new_items: for item in new_items:
item_source = item.get('source', source_name) item_source = item.get("source", source_name)
created_item = loader.new_item(item_source, item) created_item = loader.new_item(item_source, item)
if has_create_handler: if has_create_handler:
# Because some sources do not return items more than once, # Because some sources do not return items more than once,
@ -155,45 +155,43 @@ def update_source(source_name, source):
source.on_create(state, created_item) source.on_create(state, created_item)
except: except:
error.as_item( error.as_item(
f'Exception in {source_name}.on_create', f"Exception in {source_name}.on_create", traceback.format_exc()
traceback.format_exc()) )
# Update the other items using the fetched items' values. # Update the other items using the fetched items' values.
for new_item in updated_items: for new_item in updated_items:
old_item = loader.load_item(new_item['source'], new_item['id']) old_item = loader.load_item(new_item["source"], new_item["id"])
for field in USE_NEWEST: for field in USE_NEWEST:
if field in new_item and old_item[field] != new_item[field]: if field in new_item and old_item[field] != new_item[field]:
old_item[field] = new_item[field] old_item[field] = new_item[field]
if 'callback' in new_item: if "callback" in new_item:
old_callback = old_item.get('callback', {}) old_callback = old_item.get("callback", {})
# Because of the way this update happens, any fields that are set # Because of the way this update happens, any fields that are set
# in the callback when the item is new will keep their original # in the callback when the item is new will keep their original
# values, as those values reappear in new_item on subsequent # values, as those values reappear in new_item on subsequent
# updates. # updates.
old_item['callback'] = {**old_item['callback'], **new_item['callback']} old_item["callback"] = {**old_item["callback"], **new_item["callback"]}
# In general, items are removed when they are old (not found in the last # In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior. # fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0 del_count = 0
now = timestamp.now() now = timestamp.now()
has_delete_handler = hasattr(source, 'on_delete') has_delete_handler = hasattr(source, "on_delete")
fetched_ids = [item['id'] for item in updated_items] fetched_ids = [item["id"] for item in updated_items]
old_item_ids = [ old_item_ids = [item_id for item_id in prior_ids if item_id not in fetched_ids]
item_id for item_id in prior_ids
if item_id not in fetched_ids]
for item_id in old_item_ids: for item_id in old_item_ids:
item = loader.load_item(source_name, item_id) item = loader.load_item(source_name, item_id)
remove = not item['active'] remove = not item["active"]
# The time-to-live field protects an item from removal until expiry. # The time-to-live field protects an item from removal until expiry.
# This is mainly used to avoid old items resurfacing when their source # This is mainly used to avoid old items resurfacing when their source
# cannot guarantee monotonicity. # cannot guarantee monotonicity.
if 'ttl' in item: if "ttl" in item:
ttl_date = item['created'] + item['ttl'] ttl_date = item["created"] + item["ttl"]
if ttl_date > now: if ttl_date > now:
continue continue
# The time-to-die field can force an active item to be removed. # The time-to-die field can force an active item to be removed.
if 'ttd' in item: if "ttd" in item:
ttd_date = item['created'] + item['ttd'] ttd_date = item["created"] + item["ttd"]
if ttd_date < now: if ttd_date < now:
remove = True remove = True
# Items to be removed are deleted # Items to be removed are deleted
@ -202,27 +200,33 @@ def update_source(source_name, source):
if has_delete_handler: if has_delete_handler:
# Run the delete handler so exceptions prevent deletions # Run the delete handler so exceptions prevent deletions
source.on_delete(state, item) source.on_delete(state, item)
loader.delete_item(source_name, item['id']) loader.delete_item(source_name, item["id"])
del_count += 1 del_count += 1
except: except:
error.as_item( error.as_item(
f'Failed to delete {source_name}/{item["id"]}', f'Failed to delete {source_name}/{item["id"]}',
traceback.format_exc()) traceback.format_exc(),
)
# Note update timestamp in state # Note update timestamp in state
state['last_updated'] = timestamp.now() state["last_updated"] = timestamp.now()
# Log counts # Log counts
logger.info("{} new item{}, {} deleted item{}".format( logger.info(
len(new_items), "s" if len(new_items) != 1 else "", "{} new item{}, {} deleted item{}".format(
del_count, "s" if del_count != 1 else "")) len(new_items),
"s" if len(new_items) != 1 else "",
del_count,
"s" if del_count != 1 else "",
)
)
def item_callback(source_name, itemid): def item_callback(source_name, itemid):
try: try:
# Load the module with the callback function # Load the module with the callback function
source_module = load_source(source_name) source_module = load_source(source_name)
if not hasattr(source_module, 'callback'): if not hasattr(source_module, "callback"):
raise ImportError(f"Missing callback in '{source_name}'") raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item # Load the source state and the origin item
state = loader.load_state(source_name) state = loader.load_state(source_name)
@ -235,4 +239,5 @@ def item_callback(source_name, itemid):
except Exception: except Exception:
error.as_item( error.as_item(
f"Error executing callback for {source_name}/{itemid}", f"Error executing callback for {source_name}/{itemid}",
traceback.format_exc()) traceback.format_exc(),
)

View File

@ -17,22 +17,22 @@ import requests
# Module imports # Module imports
from inquisitor import CACHE_PATH from inquisitor import CACHE_PATH
logger = logging.getLogger('inquisitor.templates') logger = logging.getLogger("inquisitor.templates")
def cache_image(source, url, filename): def cache_image(source, url, filename):
# Define some paths # Define some paths
path = os.path.join(CACHE_PATH, source) path = os.path.join(CACHE_PATH, source)
file_path = os.path.join(path, filename) file_path = os.path.join(path, filename)
cached_url = f'/cache/{source}/{filename}' cached_url = f"/cache/{source}/{filename}"
# Ensure cache folder # Ensure cache folder
if not os.path.isdir(path): if not os.path.isdir(path):
os.mkdir(path) os.mkdir(path)
# Fetch url # Fetch url
logger.info(f'Caching {url} to {file_path}') logger.info(f"Caching {url} to {file_path}")
response = requests.get(url) response = requests.get(url)
# Write file to disk # Write file to disk
with open(file_path, 'wb') as f: with open(file_path, "wb") as f:
f.write(response.content) f.write(response.content)
# Return the inquisitor path to the file # Return the inquisitor path to the file
return cached_url return cached_url
@ -43,6 +43,7 @@ class LinearCrawler:
An engine for generating items from web sources that link content An engine for generating items from web sources that link content
together in a linear fashion, such as webcomics. together in a linear fashion, such as webcomics.
""" """
def fetch_new(self, state): def fetch_new(self, state):
items = [] items = []
max_iter = self.max_iterations() - 1 max_iter = self.max_iterations() - 1
@ -66,18 +67,18 @@ class LinearCrawler:
def try_fetch(self, state): def try_fetch(self, state):
# Check for whether a new page should be crawled # Check for whether a new page should be crawled
if 'current_page' not in state: if "current_page" not in state:
next_page = self.get_start_url() next_page = self.get_start_url()
else: else:
current = state['current_page'] current = state["current_page"]
response = requests.get(current) response = requests.get(current)
soup = BeautifulSoup(response.text, features='html.parser') soup = BeautifulSoup(response.text, features="html.parser")
next_page = self.get_next_page_url(current, soup) next_page = self.get_next_page_url(current, soup)
if not next_page: if not next_page:
return [] # nothing new return [] # nothing new
# Download the new page # Download the new page
logger.info('Fetching ' + next_page) logger.info("Fetching " + next_page)
response = requests.get(next_page) response = requests.get(next_page)
soup = BeautifulSoup(response.text, features="html.parser") soup = BeautifulSoup(response.text, features="html.parser")
@ -85,20 +86,20 @@ class LinearCrawler:
item = self.make_item(next_page, soup) item = self.make_item(next_page, soup)
# Update the state and return the item # Update the state and return the item
state['current_page'] = next_page state["current_page"] = next_page
return [item] return [item]
def max_iterations(self): def max_iterations(self):
return 3 return 3
def get_start_url(self): def get_start_url(self):
raise NotImplementedError('get_start_url is required') raise NotImplementedError("get_start_url is required")
def get_next_page_url(self, url, soup): def get_next_page_url(self, url, soup):
raise NotImplementedError('get_next_page_url is required') raise NotImplementedError("get_next_page_url is required")
def make_item(self, url, soup): def make_item(self, url, soup):
raise NotImplementedError('make_item is required') raise NotImplementedError("make_item is required")
class RedditScraper: class RedditScraper:
@ -107,11 +108,13 @@ class RedditScraper:
Requires defining source, subreddit_name Requires defining source, subreddit_name
fetch new with RedditScraper.fetch_new(state, __name__, reddit) fetch new with RedditScraper.fetch_new(state, __name__, reddit)
""" """
@staticmethod @staticmethod
def fetch_new(state, name, reddit): def fetch_new(state, name, reddit):
items = [] items = []
for name, obj in inspect.getmembers(sys.modules[name]): for name, obj in inspect.getmembers(sys.modules[name]):
if (inspect.isclass(obj) if (
inspect.isclass(obj)
and issubclass(obj, RedditScraper) and issubclass(obj, RedditScraper)
and obj is not RedditScraper and obj is not RedditScraper
): ):
@ -124,7 +127,7 @@ class RedditScraper:
def get_items(self): def get_items(self):
sub_name = self.subreddit_name sub_name = self.subreddit_name
logger.info(f'Fetching posts from r/{sub_name}') logger.info(f"Fetching posts from r/{sub_name}")
subreddit = self.reddit.subreddit(sub_name) subreddit = self.reddit.subreddit(sub_name)
posts = self.subreddit_page(subreddit) posts = self.subreddit_page(subreddit)
items = [] items = []
@ -135,24 +138,28 @@ class RedditScraper:
def item_from_post(self, post): def item_from_post(self, post):
item = { item = {
'source': self.source, "source": self.source,
'id': post.id, "id": post.id,
'title': self.get_title(post), "title": self.get_title(post),
'link': self.get_link(post), "link": self.get_link(post),
'time': post.created_utc, "time": post.created_utc,
'author': '/u/' + (post.author.name if post.author else "[deleted]"), "author": "/u/" + (post.author.name if post.author else "[deleted]"),
'body': self.get_body(post), "body": self.get_body(post),
'tags': self.get_tags(post), "tags": self.get_tags(post),
'ttl': self.get_ttl(post), "ttl": self.get_ttl(post),
} }
ttl = self.get_ttl(post) ttl = self.get_ttl(post)
if ttl is not None: item['ttl'] = ttl if ttl is not None:
item["ttl"] = ttl
ttd = self.get_ttd(post) ttd = self.get_ttd(post)
if ttd is not None: item['ttd'] = ttd if ttd is not None:
item["ttd"] = ttd
tts = self.get_tts(post) tts = self.get_tts(post)
if tts is not None: item['tts'] = tts if tts is not None:
item["tts"] = tts
callback = self.get_callback(post) callback = self.get_callback(post)
if callback is not None: item['callback'] = callback if callback is not None:
item["callback"] = callback
return item return item
def subreddit_page(self, subreddit): def subreddit_page(self, subreddit):
@ -162,48 +169,50 @@ class RedditScraper:
return True return True
def get_title(self, post): def get_title(self, post):
s = '[S] ' if post.spoiler else '' s = "[S] " if post.spoiler else ""
nsfw = '[NSFW] ' if post.over_18 else '' nsfw = "[NSFW] " if post.over_18 else ""
return f'{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}' return f"{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}"
def get_link(self, post): def get_link(self, post):
return f'https://reddit.com{post.permalink}' return f"https://reddit.com{post.permalink}"
def get_body(self, post): def get_body(self, post):
parts = [] parts = []
if not post.is_self: if not post.is_self:
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>') parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
if hasattr(post, 'preview'): if hasattr(post, "preview"):
try: try:
previews = post.preview['images'][0]['resolutions'] previews = post.preview["images"][0]["resolutions"]
small_previews = [p for p in previews if p['width'] < 800] small_previews = [p for p in previews if p["width"] < 800]
preview = sorted(small_previews, key=lambda p:-p['width'])[0] preview = sorted(small_previews, key=lambda p: -p["width"])[0]
parts.append(f'<img src="{preview["url"]}">') parts.append(f'<img src="{preview["url"]}">')
except: except:
pass pass
if getattr(post, 'is_gallery', False): if getattr(post, "is_gallery", False):
try: try:
for gallery_item in post.gallery_data['items']: for gallery_item in post.gallery_data["items"]:
media_id = gallery_item['media_id'] media_id = gallery_item["media_id"]
metadata = post.media_metadata[media_id] metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata['p'] if p['x'] < 800] small_previews = [p for p in metadata["p"] if p["x"] < 800]
preview = sorted(small_previews, key=lambda p:-p['x'])[0] preview = sorted(small_previews, key=lambda p: -p["x"])[0]
parts.append(f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>') parts.append(
f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>'
)
parts.append(f'<img src="{preview["u"]}">') parts.append(f'<img src="{preview["u"]}">')
except: except:
pass pass
if post.selftext: if post.selftext:
limit = post.selftext[1024:].find(' ') limit = post.selftext[1024:].find(" ")
preview_body = post.selftext[: 1024 + limit] preview_body = post.selftext[: 1024 + limit]
if len(preview_body) < len(post.selftext): if len(preview_body) < len(post.selftext):
preview_body += '[...]' preview_body += "[...]"
parts.append(f'<p>{preview_body}</p>') parts.append(f"<p>{preview_body}</p>")
return '<br><hr>'.join(parts) return "<br><hr>".join(parts)
def get_tags(self, post): def get_tags(self, post):
tags = ['reddit', post.subreddit_name_prefixed[2:]] tags = ["reddit", post.subreddit_name_prefixed[2:]]
if post.over_18: if post.over_18:
tags.append('nsfw') tags.append("nsfw")
return tags return tags
def get_ttl(self, post): def get_ttl(self, post):
@ -219,10 +228,10 @@ class RedditScraper:
return None return None
def callback(self, state, item): def callback(self, state, item):
raise NotImplementedError('callback') raise NotImplementedError("callback")
def on_create(self, state, item): def on_create(self, state, item):
raise NotImplementedError('on_create') raise NotImplementedError("on_create")
def on_delete(self, state, item): def on_delete(self, state, item):
raise NotImplementedError('on_delete') raise NotImplementedError("on_delete")

View File

@ -1,9 +1,11 @@
import time import time
import datetime import datetime
def now(): def now():
return int(time.time()) return int(time.time())
def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"): def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"):
dt = datetime.datetime.fromtimestamp(ts) dt = datetime.datetime.fromtimestamp(ts)
return dt.strftime(formatstr) return dt.strftime(formatstr)