Initial black linting pass

This commit is contained in:
Tim Van Baak 2022-12-28 12:44:48 -08:00
parent 52e32917d5
commit 42544bd30b
11 changed files with 1099 additions and 1022 deletions

View File

@ -1,2 +1,3 @@
from cli import main
main()

View File

@ -14,7 +14,8 @@ from inquisitor.configs import (
subfeeds,
get_subfeed_overrides,
logger,
init_default_logging)
init_default_logging,
)
from inquisitor import sources, loader, timestamp
# Globals
@ -28,18 +29,22 @@ def make_query_link(text, wl, bl):
query = "?{}".format("&".join(params))
return '<a href="{1}">{0}</a>'.format(text, query)
@app.template_filter("datetimeformat")
def datetimeformat(value):
return timestamp.stamp_to_readable(value) if value is not None else ""
@app.route("/")
def root():
return redirect(url_for('feed'))
return redirect(url_for("feed"))
@app.route("/feed/")
def feed():
return feed_for_sources(source_names=None)
@app.route("/feed/<string:feed_name>/")
def subfeed(feed_name):
# Check for and apply subfeed overrides
@ -47,7 +52,7 @@ def subfeed(feed_name):
subfeed_config = subfeed_overrides or subfeeds or {}
# The built-in inquisitor subfeed contains sources not in another subfeed
if feed_name == 'inquisitor':
if feed_name == "inquisitor":
all_sources = os.listdir(DUNGEON_PATH)
for subfeed, sources in subfeed_config.items():
for source_name in sources:
@ -59,17 +64,18 @@ def subfeed(feed_name):
return abort(404)
return feed_for_sources(subfeed_config[feed_name])
def feed_for_sources(source_names):
# Determine exclusion filters
filters = []
wl_param = request.args.get('only')
wl_param = request.args.get("only")
wl = wl_param.split(",") if wl_param else []
bl_param = request.args.get('not')
bl_param = request.args.get("not")
bl = bl_param.split(",") if bl_param else []
if wl:
filters.append(lambda item: not any([tag in wl for tag in item['tags']]))
filters.append(lambda item: not any([tag in wl for tag in item["tags"]]))
if bl:
filters.append(lambda item: any([tag in bl for tag in item['tags']]))
filters.append(lambda item: any([tag in bl for tag in item["tags"]]))
# Get all active+filtered items and all active tags
total = 0
@ -77,30 +83,40 @@ def feed_for_sources(source_names):
active_items = []
active_tags = {}
for item in items:
if item['active']:
for tag in item['tags']:
if tag not in active_tags: active_tags[tag] = 0
if item["active"]:
for tag in item["tags"]:
if tag not in active_tags:
active_tags[tag] = 0
active_tags[tag] += 1
# active_tags |= set(item['tags'])
total += 1
if not any(map(lambda f: f(item), filters)):
active_items.append(item)
# Sort items by time
active_items.sort(key=lambda i: i['time'] if 'time' in i and i['time'] else i['created'] if 'created' in i and i['created'] else 0)
active_items.sort(
key=lambda i: i["time"]
if "time" in i and i["time"]
else i["created"]
if "created" in i and i["created"]
else 0
)
logger.info("Returning {} of {} items".format(len(active_items), total))
if errors:
read_ex = {
'title': 'Read errors',
'body': "<pre>{}</pre>".format("\n\n".join(errors)),
'created': None,
"title": "Read errors",
"body": "<pre>{}</pre>".format("\n\n".join(errors)),
"created": None,
}
active_items.insert(0, read_ex)
if total > 0:
# Create the feed control item
link_table = ["<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
total, make_query_link("all", [], []))]
link_table = [
"<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
total, make_query_link("all", [], [])
)
]
for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()):
links = [count]
links.append(make_query_link(tag, [tag], []))
@ -116,76 +132,90 @@ def feed_for_sources(source_names):
else:
new_wl = [t for t in wl if t != tag]
links.append(make_query_link("+not", new_wl, bl + [tag]))
link_table.append("<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(*links))
link_table.append(
"<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(
*links
)
)
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table))
feed_control = {
'title': 'Feed Control [{}/{}]'.format(len(active_items), total),
'body': body,
"title": "Feed Control [{}/{}]".format(len(active_items), total),
"body": body,
}
active_items.insert(0, feed_control)
selection = active_items[:100]
return render_template("feed.jinja2",
return render_template(
"feed.jinja2",
items=selection,
mdeac=[
{'source': item['source'], 'itemid': item['id']}
{"source": item["source"], "itemid": item["id"]}
for item in selection
if 'id' in item])
if "id" in item
],
)
@app.route("/deactivate/", methods=['POST'])
@app.route("/deactivate/", methods=["POST"])
def deactivate():
params = request.get_json()
if 'source' not in params and 'itemid' not in params:
if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid'])
if item['active']:
item = loader.load_item(params["source"], params["itemid"])
if item["active"]:
logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
item['active'] = False
return jsonify({'active': item['active']})
item["active"] = False
return jsonify({"active": item["active"]})
@app.route("/punt/", methods=['POST'])
@app.route("/punt/", methods=["POST"])
def punt():
params = request.get_json()
if 'source' not in params and 'itemid' not in params:
if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid'])
item = loader.load_item(params["source"], params["itemid"])
tomorrow = datetime.now() + timedelta(days=1)
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
til_then = morning.timestamp() - item['created']
item['tts'] = til_then
til_then = morning.timestamp() - item["created"]
item["tts"] = til_then
return jsonify(item.item)
@app.route("/mass-deactivate/", methods=['POST'])
@app.route("/mass-deactivate/", methods=["POST"])
def mass_deactivate():
params = request.get_json()
if 'items' not in params:
if "items" not in params:
logger.error("Bad request params: {}".format(params))
for info in params.get('items', []):
source = info['source']
itemid = info['itemid']
for info in params.get("items", []):
source = info["source"]
itemid = info["itemid"]
item = loader.load_item(source, itemid)
if item['active']:
if item["active"]:
logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
item['active'] = False
item["active"] = False
return jsonify({})
@app.route("/callback/", methods=['POST'])
@app.route("/callback/", methods=["POST"])
def callback():
params = request.get_json()
if 'source' not in params and 'itemid' not in params:
if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params))
logger.info('Executing callback for {}/{}'.format(params['source'], params['itemid']))
sources.item_callback(params['source'], params['itemid'])
logger.info(
"Executing callback for {}/{}".format(params["source"], params["itemid"])
)
sources.item_callback(params["source"], params["itemid"])
return jsonify({})
@app.route('/cache/<path:cache_path>')
@app.route("/cache/<path:cache_path>")
def cache(cache_path):
path = os.path.join(CACHE_PATH, cache_path)
if not os.path.isfile(path):
return abort(404)
with open(path, 'rb') as f:
with open(path, "rb") as f:
return f.read()

View File

@ -14,53 +14,62 @@ def command_test(args):
"""Echo config file values."""
from inquisitor.configs.resolver import (
config_path,
CONFIG_DATA, data_path,
CONFIG_SOURCES, source_path,
CONFIG_CACHE, cache_path,
CONFIG_LOGFILE, log_file,
CONFIG_VERBOSE, is_verbose,
CONFIG_SUBFEEDS, subfeeds,
CONFIG_DATA,
data_path,
CONFIG_SOURCES,
source_path,
CONFIG_CACHE,
cache_path,
CONFIG_LOGFILE,
log_file,
CONFIG_VERBOSE,
is_verbose,
CONFIG_SUBFEEDS,
subfeeds,
)
subfeeds = '; '.join(
'{0}: {1}'.format(
sf_name,
' '.join(sf_sources)
subfeeds = (
"; ".join(
"{0}: {1}".format(sf_name, " ".join(sf_sources))
for sf_name, sf_sources in subfeeds.items()
)
for sf_name, sf_sources
in subfeeds.items()
) if subfeeds else ''
print(f'Inquisitor configured from {config_path}')
print(f' {CONFIG_DATA} = {data_path}')
print(f' {CONFIG_SOURCES} = {source_path}')
print(f' {CONFIG_CACHE} = {cache_path}')
print(f' {CONFIG_LOGFILE} = {log_file}')
print(f' {CONFIG_VERBOSE} = {is_verbose}')
print(f' {CONFIG_SUBFEEDS} = {subfeeds}')
if subfeeds
else ""
)
print(f"Inquisitor configured from {config_path}")
print(f" {CONFIG_DATA} = {data_path}")
print(f" {CONFIG_SOURCES} = {source_path}")
print(f" {CONFIG_CACHE} = {cache_path}")
print(f" {CONFIG_LOGFILE} = {log_file}")
print(f" {CONFIG_VERBOSE} = {is_verbose}")
print(f" {CONFIG_SUBFEEDS} = {subfeeds}")
return 0
def command_update(args):
"""Fetch and store new items from the specified sources."""
parser = argparse.ArgumentParser(
prog="inquisitor update",
description=command_update.__doc__,
add_help=False)
parser.add_argument("source",
nargs="*",
help="Sources to update.")
prog="inquisitor update", description=command_update.__doc__, add_help=False
)
parser.add_argument("source", nargs="*", help="Sources to update.")
args = parser.parse_args(args)
if len(args.source) == 0:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
if not os.path.isdir(SOURCES_PATH):
logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources")
logger.error(
"Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources"
)
# Update sources
from inquisitor.sources import update_sources
update_sources(*args.source)
return 0
@ -70,25 +79,27 @@ def command_deactivate(args):
parser = argparse.ArgumentParser(
prog="inquisitor deactivate",
description=command_deactivate.__doc__,
add_help=False)
parser.add_argument("source",
nargs="*",
help="Cells to deactivate.")
parser.add_argument("--tag",
help="Only deactivate items with this tag")
parser.add_argument("--title",
help="Only deactivate items with titles containing this substring")
add_help=False,
)
parser.add_argument("source", nargs="*", help="Cells to deactivate.")
parser.add_argument("--tag", help="Only deactivate items with this tag")
parser.add_argument(
"--title", help="Only deactivate items with titles containing this substring"
)
args = parser.parse_args(args)
if len(args.source) == 0:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
# Deactivate all items in each source.
from inquisitor.loader import load_items
for source_name in args.source:
path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(path):
@ -96,12 +107,12 @@ def command_deactivate(args):
count = 0
items, _ = load_items(source_name)
for item in items.values():
if args.tag and args.tag not in item['tags']:
if args.tag and args.tag not in item["tags"]:
continue
if args.title and args.title not in item['title']:
if args.title and args.title not in item["title"]:
continue
if item['active']:
item['active'] = False
if item["active"]:
item["active"] = False
count += 1
logger.info("Deactivated {} items in '{}'".format(count, source_name))
@ -111,9 +122,8 @@ def command_deactivate(args):
def command_add(args):
"""Creates an item."""
parser = argparse.ArgumentParser(
prog="inquisitor add",
description=command_add.__doc__,
add_help=False)
prog="inquisitor add", description=command_add.__doc__, add_help=False
)
parser.add_argument("--id", help="String")
parser.add_argument("--source", help="String")
parser.add_argument("--title", help="String")
@ -125,40 +135,55 @@ def command_add(args):
parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds")
parser.add_argument("--ttd", type=int, help="Cleanup force in seconds")
parser.add_argument("--tts", type=int, help="Display delay in seconds")
parser.add_argument("--create", action="store_true", help="Create source if it doesn't exist")
parser.add_argument(
"--create", action="store_true", help="Create source if it doesn't exist"
)
args = parser.parse_args(args)
if not args.title:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
source = args.source or 'inquisitor'
source = args.source or "inquisitor"
cell_path = os.path.join(DUNGEON_PATH, source)
if args.create:
from inquisitor.sources import ensure_cell
ensure_cell(source)
elif not os.path.isdir(cell_path):
logger.error("Source '{}' does not exist".format(source))
return -1
item = {
'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)),
'source': source,
"id": args.id or "{:x}".format(random.getrandbits(16 * 4)),
"source": source,
}
if args.title: item['title'] = str(args.title)
if args.link: item['link'] = str(args.link)
if args.time: item['time'] = int(args.time)
if args.author: item['author'] = str(args.author)
if args.body: item['body'] = str(args.body)
if args.tags: item['tags'] = [str(tag) for tag in args.tags.split(",")]
if args.ttl: item['ttl'] = int(args.ttl)
if args.ttd: item['ttd'] = int(args.ttd)
if args.tts: item['tts'] = int(args.tts)
if args.title:
item["title"] = str(args.title)
if args.link:
item["link"] = str(args.link)
if args.time:
item["time"] = int(args.time)
if args.author:
item["author"] = str(args.author)
if args.body:
item["body"] = str(args.body)
if args.tags:
item["tags"] = [str(tag) for tag in args.tags.split(",")]
if args.ttl:
item["ttl"] = int(args.ttl)
if args.ttd:
item["ttd"] = int(args.ttd)
if args.tts:
item["tts"] = int(args.tts)
from inquisitor.loader import new_item
saved_item = new_item(source, item)
logger.info(saved_item)
@ -166,7 +191,9 @@ def command_add(args):
def command_feed(args):
"""Print the current feed."""
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
import shutil
@ -179,50 +206,56 @@ def command_feed(args):
return 0
if errors:
items.insert(0, {
'title': '{} read errors: {}'.format(len(errors), ' '.join(errors)),
'body': "\n".join(errors)
})
items.insert(
0,
{
"title": "{} read errors: {}".format(len(errors), " ".join(errors)),
"body": "\n".join(errors),
},
)
size = shutil.get_terminal_size((80, 20))
width = min(80, size.columns)
for item in items:
title = item['title'] if 'title' in item else ""
title = item["title"] if "title" in item else ""
titles = [title]
while len(titles[-1]) > width - 4:
i = titles[-1][:width - 4].rfind(' ')
i = titles[-1][: width - 4].rfind(" ")
titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()]
print('+' + (width - 2) * '-' + '+')
print("+" + (width - 2) * "-" + "+")
for title in titles:
print("| {0:<{1}} |".format(title, width - 4))
print("|{0:<{1}}|".format("", width - 2))
info1 = ""
if 'author' in title and item['author']:
info1 += item['author'] + " "
if 'time' in item and item['time']:
info1 += timestamp.stamp_to_readable(item['time'])
if "author" in title and item["author"]:
info1 += item["author"] + " "
if "time" in item and item["time"]:
info1 += timestamp.stamp_to_readable(item["time"])
print("| {0:<{1}} |".format(info1, width - 4))
created = timestamp.stamp_to_readable(item['created']) if 'created' in item else ""
created = (
timestamp.stamp_to_readable(item["created"]) if "created" in item else ""
)
info2 = "{0} {1} {2}".format(
item.get('source', ''), item.get('id', ''), created)
item.get("source", ""), item.get("id", ""), created
)
print("| {0:<{1}} |".format(info2, width - 4))
print('+' + (width - 2) * '-' + '+')
print("+" + (width - 2) * "-" + "+")
print()
def command_run(args):
"""Run the default Flask server."""
parser = argparse.ArgumentParser(
prog="inquisitor run",
description=command_run.__doc__,
add_help=False)
prog="inquisitor run", description=command_run.__doc__, add_help=False
)
parser.add_argument("--debug", action="store_true")
parser.add_argument("--port", type=int, default=5000)
args = parser.parse_args(args)
try:
from inquisitor.app import app
app.run(port=args.port, debug=args.debug)
return 0
except Exception as e:
@ -245,38 +278,41 @@ def main():
"""CLI entry point"""
# Enable piping
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)
# Collect the commands from this module
import inquisitor.cli
commands = {
name[8:]: func
for name, func in vars(inquisitor.cli).items()
if name.startswith('command_')
if name.startswith("command_")
}
descriptions = "\n".join([
"- {0}: {1}".format(name, func.__doc__)
for name, func in commands.items()])
descriptions = "\n".join(
["- {0}: {1}".format(name, func.__doc__) for name, func in commands.items()]
)
# Set up the parser
parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False)
parser.add_argument("command",
add_help=False,
)
parser.add_argument(
"command",
nargs="?",
default="help",
help="The command to execute",
choices=commands,
metavar="command")
parser.add_argument("args",
nargs=argparse.REMAINDER,
help="Command arguments",
metavar="args")
parser.add_argument("-v",
action="store_true",
dest="verbose",
help="Enable debug logging")
metavar="command",
)
parser.add_argument(
"args", nargs=argparse.REMAINDER, help="Command arguments", metavar="args"
)
parser.add_argument(
"-v", action="store_true", dest="verbose", help="Enable debug logging"
)
# Extract the usage print for command_help
global print_usage

View File

@ -1,10 +1,5 @@
from .resolver import data_path as DUNGEON_PATH
from .resolver import source_path as SOURCES_PATH
from .resolver import cache_path as CACHE_PATH
from .resolver import (
logger,
subfeeds)
from .resolver import (
add_logging_handler,
init_default_logging,
get_subfeed_overrides)
from .resolver import logger, subfeeds
from .resolver import add_logging_handler, init_default_logging, get_subfeed_overrides

View File

@ -4,34 +4,34 @@ import logging
# Constants governing config resolution:
# Path to the config file, containing key-value pairs of the other settings
CONFIG_ENVVAR = 'INQUISITOR_CONFIG'
DEFAULT_CONFIG_PATH = '/etc/inquisitor.conf'
CONFIG_ENVVAR = "INQUISITOR_CONFIG"
DEFAULT_CONFIG_PATH = "/etc/inquisitor.conf"
# Path to the folder where items are stored
CONFIG_DATA = 'DataPath'
DEFAULT_DATA_PATH = '/var/inquisitor/data/'
CONFIG_DATA = "DataPath"
DEFAULT_DATA_PATH = "/var/inquisitor/data/"
# Path to the folder where source modules are stored
CONFIG_SOURCES = 'SourcePath'
DEFAULT_SOURCES_PATH = '/var/inquisitor/sources/'
CONFIG_SOURCES = "SourcePath"
DEFAULT_SOURCES_PATH = "/var/inquisitor/sources/"
# Path to the folder where cached files are stored
CONFIG_CACHE = 'CachePath'
DEFAULT_CACHE_PATH = '/var/inquisitor/cache/'
CONFIG_CACHE = "CachePath"
DEFAULT_CACHE_PATH = "/var/inquisitor/cache/"
# Path to a log file where logging will be redirected
CONFIG_LOGFILE = 'LogFile'
CONFIG_LOGFILE = "LogFile"
DEFAULT_LOG_FILE = None
# Whether logging is verbose
CONFIG_VERBOSE = 'Verbose'
DEFAULT_VERBOSITY = 'false'
CONFIG_VERBOSE = "Verbose"
DEFAULT_VERBOSITY = "false"
# Subfeed source lists, with each subfeed config separated by lines and
# sources within a subfeed separated by spaces
CONFIG_SUBFEEDS = 'Subfeeds'
CONFIG_SUBFEEDS = "Subfeeds"
DEFAULT_SUBFEEDS = None
SUBFEED_CONFIG_FILE = 'subfeeds.conf'
SUBFEED_CONFIG_FILE = "subfeeds.conf"
def read_config_file(config_path):
@ -43,79 +43,78 @@ def read_config_file(config_path):
# Parse the config file into key-value pairs
if not os.path.isfile(config_path):
raise FileNotFoundError(f'No config file found at {config_path}, try setting {CONFIG_ENVVAR}')
raise FileNotFoundError(
f"No config file found at {config_path}, try setting {CONFIG_ENVVAR}"
)
accumulated_configs = {}
current_key = None
with open(config_path, 'r', encoding='utf8') as cfg:
with open(config_path, "r", encoding="utf8") as cfg:
line_no = 0
for line in cfg:
line_no += 1
# Skip blank lines and comments
if not line.strip() or line.lstrip().startswith('#'):
if not line.strip() or line.lstrip().startswith("#"):
continue
# Accumulate config keyvalue pairs
if '=' in line:
if "=" in line:
# "key = value" begins a new keyvalue pair
current_key, value = line.split('=', maxsplit=1)
current_key, value = line.split("=", maxsplit=1)
current_key = current_key.strip()
accumulated_configs[current_key] = value.strip()
else:
# If there's no '=' and no previous key, throw
if not current_key:
raise ValueError(f'Invalid config format on line {line_no}')
raise ValueError(f"Invalid config format on line {line_no}")
else:
accumulated_configs[current_key] += '\n' + line.strip()
accumulated_configs[current_key] += "\n" + line.strip()
return accumulated_configs
def parse_subfeed_value(value):
sf_defs = [sf.strip() for sf in value.split('\n') if sf.strip()]
sf_defs = [sf.strip() for sf in value.split("\n") if sf.strip()]
subfeeds = {}
for sf_def in sf_defs:
if ':' not in sf_def:
raise ValueError(f'Invalid subfeed definition: {sf_def}')
sf_name, sf_sources = sf_def.split(':', maxsplit=1)
if ":" not in sf_def:
raise ValueError(f"Invalid subfeed definition: {sf_def}")
sf_name, sf_sources = sf_def.split(":", maxsplit=1)
sf_sources = sf_sources.split()
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
return subfeeds
# Read envvar for config file location, with fallback to default
config_path = os.path.abspath(
os.environ.get(CONFIG_ENVVAR) or
DEFAULT_CONFIG_PATH
)
config_path = os.path.abspath(os.environ.get(CONFIG_ENVVAR) or DEFAULT_CONFIG_PATH)
configs = read_config_file(config_path)
# Extract and validate config values
data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH
if not os.path.isabs(data_path):
raise ValueError(f'Non-absolute data path: {data_path}')
raise ValueError(f"Non-absolute data path: {data_path}")
if not os.path.isdir(data_path):
raise FileNotFoundError(f'Cannot find directory {data_path}')
raise FileNotFoundError(f"Cannot find directory {data_path}")
source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH
if not os.path.isabs(source_path):
raise ValueError(f'Non-absolute source path: {source_path}')
raise ValueError(f"Non-absolute source path: {source_path}")
if not os.path.isdir(source_path):
raise FileNotFoundError(f'Cannot find directory {source_path}')
raise FileNotFoundError(f"Cannot find directory {source_path}")
cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH
if not os.path.isabs(cache_path):
raise ValueError(f'Non-absolute cache path: {cache_path}')
raise ValueError(f"Non-absolute cache path: {cache_path}")
if not os.path.isdir(cache_path):
raise FileNotFoundError(f'Cannot find directory {cache_path}')
raise FileNotFoundError(f"Cannot find directory {cache_path}")
log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE
if log_file and not os.path.isabs(log_file):
raise ValueError(f'Non-absolute log file path: {log_file}')
raise ValueError(f"Non-absolute log file path: {log_file}")
is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY
if is_verbose != 'true' and is_verbose != 'false':
if is_verbose != "true" and is_verbose != "false":
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
is_verbose = (is_verbose == 'true')
is_verbose = is_verbose == "true"
subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS
if subfeeds:
@ -143,35 +142,34 @@ def get_subfeed_overrides():
logger = logging.getLogger("inquisitor")
logger.setLevel(logging.DEBUG)
def add_logging_handler(verbose, log_filename):
"""
Adds a logging handler according to the given settings
"""
log_format = (
'[{asctime}] [{levelname}:{filename}:{lineno}] {message}'
if verbose else
'[{levelname}] {message}'
"[{asctime}] [{levelname}:{filename}:{lineno}] {message}"
if verbose
else "[{levelname}] {message}"
)
formatter = logging.Formatter(log_format, style='{')
formatter = logging.Formatter(log_format, style="{")
log_level = (
logging.DEBUG
if verbose else
logging.INFO
)
log_level = logging.DEBUG if verbose else logging.INFO
handler = (
logging.handlers.RotatingFileHandler(
log_filename,
encoding='utf8',
encoding="utf8",
maxBytes=2**22, # 4 MB per log file
backupCount=4) # 16 MB total
if log_filename else
logging.StreamHandler()
backupCount=4,
) # 16 MB total
if log_filename
else logging.StreamHandler()
)
handler.setFormatter(formatter)
handler.setLevel(log_level)
logger.addHandler(handler)
def init_default_logging():
add_logging_handler(is_verbose, log_file)

View File

@ -8,19 +8,20 @@ from inquisitor.configs import DUNGEON_PATH, logger
logger = logging.getLogger("inquisitor")
def as_item(title, body=None):
iid = '{:x}'.format(random.getrandbits(16 * 4))
iid = "{:x}".format(random.getrandbits(16 * 4))
item = {
'id': iid,
'source': 'inquisitor',
'title': title,
'active': True,
'created': timestamp.now(),
'tags': ['inquisitor', 'error'],
"id": iid,
"source": "inquisitor",
"title": title,
"active": True,
"created": timestamp.now(),
"tags": ["inquisitor", "error"],
}
if body is not None:
item['body'] = '<pre>{}</pre>'.format(body)
path = os.path.join(DUNGEON_PATH, 'inquisitor', iid + ".item")
item["body"] = "<pre>{}</pre>".format(body)
path = os.path.join(DUNGEON_PATH, "inquisitor", iid + ".item")
logger.error(json.dumps(item))
with open(path, 'w') as f:
with open(path, "w") as f:
f.write(json.dumps(item, indent=2))

View File

@ -7,7 +7,7 @@ from inquisitor import error
from inquisitor import timestamp
class WritethroughDict():
class WritethroughDict:
"""A wrapper for a dictionary saved to the file system."""
@staticmethod
@ -59,7 +59,7 @@ class WritethroughDict():
def flush(self):
s = json.dumps(self.item, indent=2)
with open(self.path, 'w', encoding="utf8") as f:
with open(self.path, "w", encoding="utf8") as f:
f.write(s)
@ -71,7 +71,7 @@ def load_state(source_name):
def load_item(source_name, item_id):
"""Loads an item from a source."""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return WritethroughDict.load(item_path)
@ -79,7 +79,7 @@ def item_exists(source_name, item_id):
"""
Checks for the existence of an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return os.path.isfile(item_path)
@ -91,7 +91,7 @@ def get_item_ids(cell_name):
return [
filename[:-5]
for filename in os.listdir(cell_path)
if filename.endswith('.item')
if filename.endswith(".item")
]
@ -101,30 +101,30 @@ def new_item(source_name, item):
Initializes other fields to their default values.
"""
# id is required
if 'id' not in item:
raise Exception(f'Cannot create item with no id. Value = {item}')
if "id" not in item:
raise Exception(f"Cannot create item with no id. Value = {item}")
# source must be filled in, so if it is absent it is auto-populated with
# source_name. Note: this allows sources to fill in a different source.
if 'source' not in item:
item['source'] = source_name
if "source" not in item:
item["source"] = source_name
# active is forced to True for new items
item['active'] = True
item["active"] = True
# created is forced to the current timestamp
item['created'] = timestamp.now()
item["created"] = timestamp.now()
# title is auto-populated with the id if missing
if 'title' not in item:
item['title'] = item['id']
if "title" not in item:
item["title"] = item["id"]
# tags is auto-populated if missing (not if empty!)
if 'tags' not in item:
item['tags'] = [source_name]
if "tags" not in item:
item["tags"] = [source_name]
# All other fields are optional.
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item')
item_path = os.path.join(DUNGEON_PATH, item["source"], f'{item["id"]}.item')
return WritethroughDict.create(item_path, item)
@ -132,7 +132,7 @@ def delete_item(source_name, item_id):
"""
Delete an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
os.remove(item_path)
@ -144,10 +144,10 @@ def load_items(source_name):
items = {}
errors = []
for filename in os.listdir(cell_path):
if filename.endswith('.item'):
if filename.endswith(".item"):
try:
item = load_item(source_name, filename[:-5])
items[item['id']] = item
items[item["id"]] = item
except Exception:
errors.append(filename)
return items, errors
@ -165,20 +165,20 @@ def load_active_items(source_names):
for source_name in check_list:
source_path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(source_path):
logger.warning(f'Skipping nonexistent source {source_name}')
logger.warning(f"Skipping nonexistent source {source_name}")
continue
for filename in os.listdir(source_path):
if not filename.endswith('.item'):
if not filename.endswith(".item"):
continue
try:
item = load_item(source_name, filename[:-5])
# The time-to-show field hides items until an expiry date.
if 'tts' in item:
tts_date = item['created'] + item['tts']
if "tts" in item:
tts_date = item["created"] + item["tts"]
if now < tts_date:
continue
# Don't show inactive items
if not item['active']:
if not item["active"]:
continue
items.append(item)
except Exception:

View File

@ -10,20 +10,20 @@ from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
USE_NEWEST = (
'title',
'tags',
'link',
'time'
'author',
'body',
'ttl',
'ttd',
'tts',
"title",
"tags",
"link",
"time" "author",
"body",
"ttl",
"ttd",
"tts",
)
class InquisitorStubSource:
"""A dummy source-like object for clearing out ad-hoc inquisitor items"""
def fetch_new(self, state):
return []
@ -36,9 +36,9 @@ def ensure_cell(name):
if not os.path.isdir(cell_path):
logger.info(f'Creating cell for source "{name}"')
os.mkdir(cell_path)
state_path = os.path.join(cell_path, 'state')
state_path = os.path.join(cell_path, "state")
if not os.path.isfile(state_path):
with open(state_path, 'w', encoding='utf8') as state:
with open(state_path, "w", encoding="utf8") as state:
json.dump({}, state)
@ -52,8 +52,8 @@ def update_sources(*source_names):
source_module = load_source(source_name)
except Exception:
error.as_item(
f'Error importing source "{source_name}"',
traceback.format_exc())
f'Error importing source "{source_name}"', traceback.format_exc()
)
continue
# If it doesn't have a cell yet, create one
@ -61,8 +61,8 @@ def update_sources(*source_names):
ensure_cell(source_name)
except Exception:
error.as_item(
f'Error initializing source "{source_name}"',
traceback.format_exc())
f'Error initializing source "{source_name}"', traceback.format_exc()
)
continue
# Update the source
@ -71,8 +71,8 @@ def update_sources(*source_names):
update_source(source_name, source_module)
except Exception:
error.as_item(
f'Error updating source "{source_name}"',
traceback.format_exc())
f'Error updating source "{source_name}"', traceback.format_exc()
)
def load_source(source_name):
@ -80,7 +80,7 @@ def load_source(source_name):
Attempts to load the source module with the given name.
Raises an exception on failure.
"""
if source_name == 'inquisitor':
if source_name == "inquisitor":
return InquisitorStubSource()
cwd = os.getcwd()
@ -92,7 +92,7 @@ def load_source(source_name):
sys.path.insert(0, SOURCES_PATH)
# Check if the named source is present.
source_file_name = source_name + '.py'
source_file_name = source_name + ".py"
if not os.path.isfile(source_file_name):
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
@ -104,7 +104,7 @@ def load_source(source_name):
itemsource = importlib.import_module(source_name)
# Require fetch_new().
if not hasattr(itemsource, 'fetch_new'):
if not hasattr(itemsource, "fetch_new"):
raise ImportError(f'Missing fetch_new in "{source_file_name}"')
return itemsource
@ -121,14 +121,14 @@ def update_source(source_name, source):
"""
# Get a list of item ids that already existed in this source's cell.
prior_ids = loader.get_item_ids(source_name)
logger.debug(f'Found {len(prior_ids)} prior items')
logger.debug(f"Found {len(prior_ids)} prior items")
# Get the feed items from the source's fetch method.
state = loader.load_state(source_name)
fetched = source.fetch_new(state)
state.flush()
logger.debug(f'Fetched {len(fetched)} items')
fetched_items = {item['id']: item for item in fetched}
logger.debug(f"Fetched {len(fetched)} items")
fetched_items = {item["id"]: item for item in fetched}
# Determine which items are new and which are updates.
# We query the file system here instead of checking against this source's
@ -137,16 +137,16 @@ def update_source(source_name, source):
new_items = []
updated_items = []
for item in fetched:
item_source = item.get('source', source_name)
if loader.item_exists(item_source, item['id']):
item_source = item.get("source", source_name)
if loader.item_exists(item_source, item["id"]):
updated_items.append(item)
else:
new_items.append(item)
# Write all the new items to the source's cell.
has_create_handler = hasattr(source, 'on_create')
has_create_handler = hasattr(source, "on_create")
for item in new_items:
item_source = item.get('source', source_name)
item_source = item.get("source", source_name)
created_item = loader.new_item(item_source, item)
if has_create_handler:
# Because some sources do not return items more than once,
@ -155,45 +155,43 @@ def update_source(source_name, source):
source.on_create(state, created_item)
except:
error.as_item(
f'Exception in {source_name}.on_create',
traceback.format_exc())
f"Exception in {source_name}.on_create", traceback.format_exc()
)
# Update the other items using the fetched items' values.
for new_item in updated_items:
old_item = loader.load_item(new_item['source'], new_item['id'])
old_item = loader.load_item(new_item["source"], new_item["id"])
for field in USE_NEWEST:
if field in new_item and old_item[field] != new_item[field]:
old_item[field] = new_item[field]
if 'callback' in new_item:
old_callback = old_item.get('callback', {})
if "callback" in new_item:
old_callback = old_item.get("callback", {})
# Because of the way this update happens, any fields that are set
# in the callback when the item is new will keep their original
# values, as those values reappear in new_item on subsequent
# updates.
old_item['callback'] = {**old_item['callback'], **new_item['callback']}
old_item["callback"] = {**old_item["callback"], **new_item["callback"]}
# In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0
now = timestamp.now()
has_delete_handler = hasattr(source, 'on_delete')
fetched_ids = [item['id'] for item in updated_items]
old_item_ids = [
item_id for item_id in prior_ids
if item_id not in fetched_ids]
has_delete_handler = hasattr(source, "on_delete")
fetched_ids = [item["id"] for item in updated_items]
old_item_ids = [item_id for item_id in prior_ids if item_id not in fetched_ids]
for item_id in old_item_ids:
item = loader.load_item(source_name, item_id)
remove = not item['active']
remove = not item["active"]
# The time-to-live field protects an item from removal until expiry.
# This is mainly used to avoid old items resurfacing when their source
# cannot guarantee monotonicity.
if 'ttl' in item:
ttl_date = item['created'] + item['ttl']
if "ttl" in item:
ttl_date = item["created"] + item["ttl"]
if ttl_date > now:
continue
# The time-to-die field can force an active item to be removed.
if 'ttd' in item:
ttd_date = item['created'] + item['ttd']
if "ttd" in item:
ttd_date = item["created"] + item["ttd"]
if ttd_date < now:
remove = True
# Items to be removed are deleted
@ -202,27 +200,33 @@ def update_source(source_name, source):
if has_delete_handler:
# Run the delete handler so exceptions prevent deletions
source.on_delete(state, item)
loader.delete_item(source_name, item['id'])
loader.delete_item(source_name, item["id"])
del_count += 1
except:
error.as_item(
f'Failed to delete {source_name}/{item["id"]}',
traceback.format_exc())
traceback.format_exc(),
)
# Note update timestamp in state
state['last_updated'] = timestamp.now()
state["last_updated"] = timestamp.now()
# Log counts
logger.info("{} new item{}, {} deleted item{}".format(
len(new_items), "s" if len(new_items) != 1 else "",
del_count, "s" if del_count != 1 else ""))
logger.info(
"{} new item{}, {} deleted item{}".format(
len(new_items),
"s" if len(new_items) != 1 else "",
del_count,
"s" if del_count != 1 else "",
)
)
def item_callback(source_name, itemid):
try:
# Load the module with the callback function
source_module = load_source(source_name)
if not hasattr(source_module, 'callback'):
if not hasattr(source_module, "callback"):
raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item
state = loader.load_state(source_name)
@ -235,4 +239,5 @@ def item_callback(source_name, itemid):
except Exception:
error.as_item(
f"Error executing callback for {source_name}/{itemid}",
traceback.format_exc())
traceback.format_exc(),
)

View File

@ -17,22 +17,22 @@ import requests
# Module imports
from inquisitor import CACHE_PATH
logger = logging.getLogger('inquisitor.templates')
logger = logging.getLogger("inquisitor.templates")
def cache_image(source, url, filename):
# Define some paths
path = os.path.join(CACHE_PATH, source)
file_path = os.path.join(path, filename)
cached_url = f'/cache/{source}/{filename}'
cached_url = f"/cache/{source}/{filename}"
# Ensure cache folder
if not os.path.isdir(path):
os.mkdir(path)
# Fetch url
logger.info(f'Caching {url} to {file_path}')
logger.info(f"Caching {url} to {file_path}")
response = requests.get(url)
# Write file to disk
with open(file_path, 'wb') as f:
with open(file_path, "wb") as f:
f.write(response.content)
# Return the inquisitor path to the file
return cached_url
@ -43,6 +43,7 @@ class LinearCrawler:
An engine for generating items from web sources that link content
together in a linear fashion, such as webcomics.
"""
def fetch_new(self, state):
items = []
max_iter = self.max_iterations() - 1
@ -66,18 +67,18 @@ class LinearCrawler:
def try_fetch(self, state):
# Check for whether a new page should be crawled
if 'current_page' not in state:
if "current_page" not in state:
next_page = self.get_start_url()
else:
current = state['current_page']
current = state["current_page"]
response = requests.get(current)
soup = BeautifulSoup(response.text, features='html.parser')
soup = BeautifulSoup(response.text, features="html.parser")
next_page = self.get_next_page_url(current, soup)
if not next_page:
return [] # nothing new
# Download the new page
logger.info('Fetching ' + next_page)
logger.info("Fetching " + next_page)
response = requests.get(next_page)
soup = BeautifulSoup(response.text, features="html.parser")
@ -85,20 +86,20 @@ class LinearCrawler:
item = self.make_item(next_page, soup)
# Update the state and return the item
state['current_page'] = next_page
state["current_page"] = next_page
return [item]
def max_iterations(self):
return 3
def get_start_url(self):
raise NotImplementedError('get_start_url is required')
raise NotImplementedError("get_start_url is required")
def get_next_page_url(self, url, soup):
raise NotImplementedError('get_next_page_url is required')
raise NotImplementedError("get_next_page_url is required")
def make_item(self, url, soup):
raise NotImplementedError('make_item is required')
raise NotImplementedError("make_item is required")
class RedditScraper:
@ -107,11 +108,13 @@ class RedditScraper:
Requires defining source, subreddit_name
fetch new with RedditScraper.fetch_new(state, __name__, reddit)
"""
@staticmethod
def fetch_new(state, name, reddit):
items = []
for name, obj in inspect.getmembers(sys.modules[name]):
if (inspect.isclass(obj)
if (
inspect.isclass(obj)
and issubclass(obj, RedditScraper)
and obj is not RedditScraper
):
@ -124,7 +127,7 @@ class RedditScraper:
def get_items(self):
sub_name = self.subreddit_name
logger.info(f'Fetching posts from r/{sub_name}')
logger.info(f"Fetching posts from r/{sub_name}")
subreddit = self.reddit.subreddit(sub_name)
posts = self.subreddit_page(subreddit)
items = []
@ -135,24 +138,28 @@ class RedditScraper:
def item_from_post(self, post):
item = {
'source': self.source,
'id': post.id,
'title': self.get_title(post),
'link': self.get_link(post),
'time': post.created_utc,
'author': '/u/' + (post.author.name if post.author else "[deleted]"),
'body': self.get_body(post),
'tags': self.get_tags(post),
'ttl': self.get_ttl(post),
"source": self.source,
"id": post.id,
"title": self.get_title(post),
"link": self.get_link(post),
"time": post.created_utc,
"author": "/u/" + (post.author.name if post.author else "[deleted]"),
"body": self.get_body(post),
"tags": self.get_tags(post),
"ttl": self.get_ttl(post),
}
ttl = self.get_ttl(post)
if ttl is not None: item['ttl'] = ttl
if ttl is not None:
item["ttl"] = ttl
ttd = self.get_ttd(post)
if ttd is not None: item['ttd'] = ttd
if ttd is not None:
item["ttd"] = ttd
tts = self.get_tts(post)
if tts is not None: item['tts'] = tts
if tts is not None:
item["tts"] = tts
callback = self.get_callback(post)
if callback is not None: item['callback'] = callback
if callback is not None:
item["callback"] = callback
return item
def subreddit_page(self, subreddit):
@ -162,48 +169,50 @@ class RedditScraper:
return True
def get_title(self, post):
s = '[S] ' if post.spoiler else ''
nsfw = '[NSFW] ' if post.over_18 else ''
return f'{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}'
s = "[S] " if post.spoiler else ""
nsfw = "[NSFW] " if post.over_18 else ""
return f"{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}"
def get_link(self, post):
return f'https://reddit.com{post.permalink}'
return f"https://reddit.com{post.permalink}"
def get_body(self, post):
parts = []
if not post.is_self:
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
if hasattr(post, 'preview'):
if hasattr(post, "preview"):
try:
previews = post.preview['images'][0]['resolutions']
small_previews = [p for p in previews if p['width'] < 800]
preview = sorted(small_previews, key=lambda p:-p['width'])[0]
previews = post.preview["images"][0]["resolutions"]
small_previews = [p for p in previews if p["width"] < 800]
preview = sorted(small_previews, key=lambda p: -p["width"])[0]
parts.append(f'<img src="{preview["url"]}">')
except:
pass
if getattr(post, 'is_gallery', False):
if getattr(post, "is_gallery", False):
try:
for gallery_item in post.gallery_data['items']:
media_id = gallery_item['media_id']
for gallery_item in post.gallery_data["items"]:
media_id = gallery_item["media_id"]
metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata['p'] if p['x'] < 800]
preview = sorted(small_previews, key=lambda p:-p['x'])[0]
parts.append(f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>')
small_previews = [p for p in metadata["p"] if p["x"] < 800]
preview = sorted(small_previews, key=lambda p: -p["x"])[0]
parts.append(
f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>'
)
parts.append(f'<img src="{preview["u"]}">')
except:
pass
if post.selftext:
limit = post.selftext[1024:].find(' ')
limit = post.selftext[1024:].find(" ")
preview_body = post.selftext[: 1024 + limit]
if len(preview_body) < len(post.selftext):
preview_body += '[...]'
parts.append(f'<p>{preview_body}</p>')
return '<br><hr>'.join(parts)
preview_body += "[...]"
parts.append(f"<p>{preview_body}</p>")
return "<br><hr>".join(parts)
def get_tags(self, post):
tags = ['reddit', post.subreddit_name_prefixed[2:]]
tags = ["reddit", post.subreddit_name_prefixed[2:]]
if post.over_18:
tags.append('nsfw')
tags.append("nsfw")
return tags
def get_ttl(self, post):
@ -219,10 +228,10 @@ class RedditScraper:
return None
def callback(self, state, item):
raise NotImplementedError('callback')
raise NotImplementedError("callback")
def on_create(self, state, item):
raise NotImplementedError('on_create')
raise NotImplementedError("on_create")
def on_delete(self, state, item):
raise NotImplementedError('on_delete')
raise NotImplementedError("on_delete")

View File

@ -1,9 +1,11 @@
import time
import datetime
def now():
return int(time.time())
def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"):
dt = datetime.datetime.fromtimestamp(ts)
return dt.strftime(formatstr)