Compare commits

...

9 Commits

15 changed files with 1387 additions and 1139 deletions

View File

@ -13,24 +13,59 @@
systems = [ "aarch64-linux" "x86_64-linux" ];
each = system:
let
pkgs = nixpkgs.legacyPackages.${system};
in
{
packages.${system}.default = (import nixpkgs {
pkgs = (import nixpkgs {
inherit system;
overlays = [ self.overlays.default ];
}).inquisitor;
});
in {
packages.${system} = {
default = self.packages.${system}.inquisitor;
inquisitor = pkgs.inquisitor;
env = pkgs.inquisitor.dependencyEnv;
};
devShells.${system}.default = pkgs.mkShell {
buildInputs = [ (pkgs.python3.withPackages (p: [p.poetry])) ];
devShells.${system} = {
default = self.devShells.${system}.inquisitor;
inquisitor = pkgs.mkShell {
buildInputs = [ (pkgs.python3.withPackages (p: [p.poetry])) ];
shellHook = ''
PS1="(inquisitor) $PS1"
'';
};
sources = pkgs.mkShell {
buildInputs = [ self.packages.${system}.env ];
shellHook = ''
PS1="(sources) $PS1"
'';
};
};
checks.${system}.test-module = let
test-lib = import "${nixpkgs}/nixos/lib/testing-python.nix" {
inherit system;
};
in
test-lib.makeTest {
name = "inquisitor-test-module";
nodes = {
host = { ... }: {
imports = [ self.nixosModules.default ];
services.inquisitor.enable = true;
};
};
testScript = ''
start_all()
host.wait_for_unit("multi-user.target")
host.succeed("[ -e /var/lib/inquisitor ]")
'';
};
};
in (my-flake.outputs-for each systems) // {
overlays.default = final: prev: {
inquisitor = final.poetry2nix.mkPoetryApplication {
projectDir = ./.;
};
};
in (my-flake.outputs-for each systems) //
{
overlays.default = final: prev: {
inquisitor = (final.poetry2nix.mkPoetryApplication {
projectDir = ./.;
}).dependencyEnv;
};
nixosModules.default = import ./module.nix self;
};
}

View File

@ -1,2 +1,2 @@
from inquisitor.configs import CACHE_PATH
from inquisitor.templates import cache_image, LinearCrawler, RedditScraper
from inquisitor.templates import cache_image, LinearCrawler, RedditScraper

View File

@ -1,2 +1,3 @@
from cli import main
main()
main()

View File

@ -8,13 +8,14 @@ from flask import Flask, render_template, request, jsonify, abort, redirect, url
# Application imports
from inquisitor.configs import (
DUNGEON_PATH,
SOURCES_PATH,
CACHE_PATH,
subfeeds,
get_subfeed_overrides,
logger,
init_default_logging)
DUNGEON_PATH,
SOURCES_PATH,
CACHE_PATH,
subfeeds,
get_subfeed_overrides,
logger,
init_default_logging,
)
from inquisitor import sources, loader, timestamp
# Globals
@ -22,173 +23,202 @@ app = Flask(__name__)
def make_query_link(text, wl, bl):
wlp = "only=" + ",".join(wl)
blp = "not=" + ",".join(bl)
params = [p for p in (wlp, blp) if not p.endswith("=")]
query = "?{}".format("&".join(params))
return '<a href="{1}">{0}</a>'.format(text, query)
wlp = "only=" + ",".join(wl)
blp = "not=" + ",".join(bl)
params = [p for p in (wlp, blp) if not p.endswith("=")]
query = "?{}".format("&".join(params))
return '<a href="{1}">{0}</a>'.format(text, query)
@app.template_filter("datetimeformat")
def datetimeformat(value):
return timestamp.stamp_to_readable(value) if value is not None else ""
return timestamp.stamp_to_readable(value) if value is not None else ""
@app.route("/")
def root():
return redirect(url_for('feed'))
return redirect(url_for("feed"))
@app.route("/feed/")
def feed():
return feed_for_sources(source_names=None)
return feed_for_sources(source_names=None)
@app.route("/feed/<string:feed_name>/")
def subfeed(feed_name):
# Check for and apply subfeed overrides
subfeed_overrides = get_subfeed_overrides()
subfeed_config = subfeed_overrides or subfeeds or {}
# Check for and apply subfeed overrides
subfeed_overrides = get_subfeed_overrides()
subfeed_config = subfeed_overrides or subfeeds or {}
# The built-in inquisitor subfeed contains sources not in another subfeed
if feed_name == 'inquisitor':
all_sources = os.listdir(DUNGEON_PATH)
for subfeed, sources in subfeed_config.items():
for source_name in sources:
if source_name in all_sources:
all_sources.remove(source_name)
return feed_for_sources(all_sources)
# The built-in inquisitor subfeed contains sources not in another subfeed
if feed_name == "inquisitor":
all_sources = os.listdir(DUNGEON_PATH)
for subfeed, sources in subfeed_config.items():
for source_name in sources:
if source_name in all_sources:
all_sources.remove(source_name)
return feed_for_sources(all_sources)
if feed_name not in subfeed_config:
return abort(404)
return feed_for_sources(subfeed_config[feed_name])
if feed_name not in subfeed_config:
return abort(404)
return feed_for_sources(subfeed_config[feed_name])
def feed_for_sources(source_names):
# Determine exclusion filters
filters = []
wl_param = request.args.get('only')
wl = wl_param.split(",") if wl_param else []
bl_param = request.args.get('not')
bl = bl_param.split(",") if bl_param else []
if wl:
filters.append(lambda item: not any([tag in wl for tag in item['tags']]))
if bl:
filters.append(lambda item: any([tag in bl for tag in item['tags']]))
# Determine exclusion filters
filters = []
wl_param = request.args.get("only")
wl = wl_param.split(",") if wl_param else []
bl_param = request.args.get("not")
bl = bl_param.split(",") if bl_param else []
if wl:
filters.append(lambda item: not any([tag in wl for tag in item["tags"]]))
if bl:
filters.append(lambda item: any([tag in bl for tag in item["tags"]]))
# Get all active+filtered items and all active tags
total = 0
items, errors = loader.load_active_items(source_names)
active_items = []
active_tags = {}
for item in items:
if item['active']:
for tag in item['tags']:
if tag not in active_tags: active_tags[tag] = 0
active_tags[tag] += 1
# active_tags |= set(item['tags'])
total += 1
if not any(map(lambda f: f(item), filters)):
active_items.append(item)
# Sort items by time
active_items.sort(key=lambda i: i['time'] if 'time' in i and i['time'] else i['created'] if 'created' in i and i['created'] else 0)
# Get all active+filtered items and all active tags
total = 0
items, errors = loader.load_active_items(source_names)
active_items = []
active_tags = {}
for item in items:
if item["active"]:
for tag in item["tags"]:
if tag not in active_tags:
active_tags[tag] = 0
active_tags[tag] += 1
# active_tags |= set(item['tags'])
total += 1
if not any(map(lambda f: f(item), filters)):
active_items.append(item)
# Sort items by time
active_items.sort(
key=lambda i: i["time"]
if "time" in i and i["time"]
else i["created"]
if "created" in i and i["created"]
else 0
)
logger.info("Returning {} of {} items".format(len(active_items), total))
if errors:
read_ex = {
'title': 'Read errors',
'body': "<pre>{}</pre>".format("\n\n".join(errors)),
'created': None,
}
active_items.insert(0, read_ex)
logger.info("Returning {} of {} items".format(len(active_items), total))
if errors:
read_ex = {
"title": "Read errors",
"body": "<pre>{}</pre>".format("\n\n".join(errors)),
"created": None,
}
active_items.insert(0, read_ex)
if total > 0:
# Create the feed control item
link_table = ["<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
total, make_query_link("all", [], []))]
for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()):
links = [count]
links.append(make_query_link(tag, [tag], []))
if tag in wl:
new_wl = [t for t in wl if t != tag]
links.append(make_query_link("-only", new_wl, bl))
else:
new_bl = [t for t in bl if t != tag]
links.append(make_query_link("+only", wl + [tag], new_bl))
if tag in bl:
new_bl = [t for t in bl if t != tag]
links.append(make_query_link("-not", wl, new_bl))
else:
new_wl = [t for t in wl if t != tag]
links.append(make_query_link("+not", new_wl, bl + [tag]))
link_table.append("<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(*links))
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table))
if total > 0:
# Create the feed control item
link_table = [
"<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
total, make_query_link("all", [], [])
)
]
for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()):
links = [count]
links.append(make_query_link(tag, [tag], []))
if tag in wl:
new_wl = [t for t in wl if t != tag]
links.append(make_query_link("-only", new_wl, bl))
else:
new_bl = [t for t in bl if t != tag]
links.append(make_query_link("+only", wl + [tag], new_bl))
if tag in bl:
new_bl = [t for t in bl if t != tag]
links.append(make_query_link("-not", wl, new_bl))
else:
new_wl = [t for t in wl if t != tag]
links.append(make_query_link("+not", new_wl, bl + [tag]))
link_table.append(
"<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(
*links
)
)
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table))
feed_control = {
'title': 'Feed Control [{}/{}]'.format(len(active_items), total),
'body': body,
}
active_items.insert(0, feed_control)
feed_control = {
"title": "Feed Control [{}/{}]".format(len(active_items), total),
"body": body,
}
active_items.insert(0, feed_control)
selection = active_items[:100]
selection = active_items[:100]
return render_template("feed.jinja2",
items=selection,
mdeac=[
{'source': item['source'], 'itemid': item['id']}
for item in selection
if 'id' in item])
return render_template(
"feed.jinja2",
items=selection,
mdeac=[
{"source": item["source"], "itemid": item["id"]}
for item in selection
if "id" in item
],
)
@app.route("/deactivate/", methods=['POST'])
@app.route("/deactivate/", methods=["POST"])
def deactivate():
params = request.get_json()
if 'source' not in params and 'itemid' not in params:
logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid'])
if item['active']:
logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
item['active'] = False
return jsonify({'active': item['active']})
params = request.get_json()
if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params))
item = loader.load_item(params["source"], params["itemid"])
if item["active"]:
logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
item["active"] = False
return jsonify({"active": item["active"]})
@app.route("/punt/", methods=['POST'])
@app.route("/punt/", methods=["POST"])
def punt():
params = request.get_json()
if 'source' not in params and 'itemid' not in params:
logger.error("Bad request params: {}".format(params))
item = loader.load_item(params['source'], params['itemid'])
tomorrow = datetime.now() + timedelta(days=1)
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
til_then = morning.timestamp() - item['created']
item['tts'] = til_then
return jsonify(item.item)
params = request.get_json()
if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params))
item = loader.load_item(params["source"], params["itemid"])
tomorrow = datetime.now() + timedelta(days=1)
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
til_then = morning.timestamp() - item["created"]
item["tts"] = til_then
return jsonify(item.item)
@app.route("/mass-deactivate/", methods=['POST'])
@app.route("/mass-deactivate/", methods=["POST"])
def mass_deactivate():
params = request.get_json()
if 'items' not in params:
logger.error("Bad request params: {}".format(params))
for info in params.get('items', []):
source = info['source']
itemid = info['itemid']
item = loader.load_item(source, itemid)
if item['active']:
logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
item['active'] = False
return jsonify({})
params = request.get_json()
if "items" not in params:
logger.error("Bad request params: {}".format(params))
for info in params.get("items", []):
source = info["source"]
itemid = info["itemid"]
item = loader.load_item(source, itemid)
if item["active"]:
logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
item["active"] = False
return jsonify({})
@app.route("/callback/", methods=['POST'])
@app.route("/callback/", methods=["POST"])
def callback():
params = request.get_json()
if 'source' not in params and 'itemid' not in params:
logger.error("Bad request params: {}".format(params))
logger.info('Executing callback for {}/{}'.format(params['source'], params['itemid']))
sources.item_callback(params['source'], params['itemid'])
return jsonify({})
params = request.get_json()
if "source" not in params and "itemid" not in params:
logger.error("Bad request params: {}".format(params))
logger.info(
"Executing callback for {}/{}".format(params["source"], params["itemid"])
)
sources.item_callback(params["source"], params["itemid"])
return jsonify({})
@app.route('/cache/<path:cache_path>')
@app.route("/cache/<path:cache_path>")
def cache(cache_path):
path = os.path.join(CACHE_PATH, cache_path)
if not os.path.isfile(path):
return abort(404)
with open(path, 'rb') as f:
return f.read()
path = os.path.join(CACHE_PATH, cache_path)
if not os.path.isfile(path):
return abort(404)
with open(path, "rb") as f:
return f.read()
def wsgi():
init_default_logging()
return app
init_default_logging()
return app

View File

@ -11,280 +11,324 @@ from inquisitor.configs import logger, DUNGEON_PATH, SOURCES_PATH, add_logging_h
def command_test(args):
"""Echo config file values."""
from inquisitor.configs.resolver import (
config_path,
CONFIG_DATA, data_path,
CONFIG_SOURCES, source_path,
CONFIG_CACHE, cache_path,
CONFIG_LOGFILE, log_file,
CONFIG_VERBOSE, is_verbose,
CONFIG_SUBFEEDS, subfeeds,
)
subfeeds = '; '.join(
'{0}: {1}'.format(
sf_name,
' '.join(sf_sources)
)
for sf_name, sf_sources
in subfeeds.items()
) if subfeeds else ''
print(f'Inquisitor configured from {config_path}')
print(f' {CONFIG_DATA} = {data_path}')
print(f' {CONFIG_SOURCES} = {source_path}')
print(f' {CONFIG_CACHE} = {cache_path}')
print(f' {CONFIG_LOGFILE} = {log_file}')
print(f' {CONFIG_VERBOSE} = {is_verbose}')
print(f' {CONFIG_SUBFEEDS} = {subfeeds}')
return 0
"""Echo config file values."""
from inquisitor.configs.resolver import (
config_path,
CONFIG_DATA,
data_path,
CONFIG_SOURCES,
source_path,
CONFIG_CACHE,
cache_path,
CONFIG_LOGFILE,
log_file,
CONFIG_VERBOSE,
is_verbose,
CONFIG_SUBFEEDS,
subfeeds,
)
subfeeds = (
"; ".join(
"{0}: {1}".format(sf_name, " ".join(sf_sources))
for sf_name, sf_sources in subfeeds.items()
)
if subfeeds
else ""
)
print(f"Inquisitor configured from {config_path}")
print(f" {CONFIG_DATA} = {data_path}")
print(f" {CONFIG_SOURCES} = {source_path}")
print(f" {CONFIG_CACHE} = {cache_path}")
print(f" {CONFIG_LOGFILE} = {log_file}")
print(f" {CONFIG_VERBOSE} = {is_verbose}")
print(f" {CONFIG_SUBFEEDS} = {subfeeds}")
return 0
def command_update(args):
"""Fetch and store new items from the specified sources."""
parser = argparse.ArgumentParser(
prog="inquisitor update",
description=command_update.__doc__,
add_help=False)
parser.add_argument("source",
nargs="*",
help="Sources to update.")
args = parser.parse_args(args)
"""Fetch and store new items from the specified sources."""
parser = argparse.ArgumentParser(
prog="inquisitor update", description=command_update.__doc__, add_help=False
)
parser.add_argument("source", nargs="*", help="Sources to update.")
args = parser.parse_args(args)
if len(args.source) == 0:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
return -1
if not os.path.isdir(SOURCES_PATH):
logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources")
if len(args.source) == 0:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
if not os.path.isdir(SOURCES_PATH):
logger.error(
"Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources"
)
# Update sources
from inquisitor.sources import update_sources
update_sources(*args.source)
return 0
# Update sources
from inquisitor.sources import update_sources
update_sources(*args.source)
return 0
def command_deactivate(args):
"""Deactivate all items in the specified dungeon cells."""
parser = argparse.ArgumentParser(
prog="inquisitor deactivate",
description=command_deactivate.__doc__,
add_help=False)
parser.add_argument("source",
nargs="*",
help="Cells to deactivate.")
parser.add_argument("--tag",
help="Only deactivate items with this tag")
parser.add_argument("--title",
help="Only deactivate items with titles containing this substring")
args = parser.parse_args(args)
"""Deactivate all items in the specified dungeon cells."""
parser = argparse.ArgumentParser(
prog="inquisitor deactivate",
description=command_deactivate.__doc__,
add_help=False,
)
parser.add_argument("source", nargs="*", help="Cells to deactivate.")
parser.add_argument("--tag", help="Only deactivate items with this tag")
parser.add_argument(
"--title", help="Only deactivate items with titles containing this substring"
)
args = parser.parse_args(args)
if len(args.source) == 0:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
return -1
if len(args.source) == 0:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
# Deactivate all items in each source.
from inquisitor.loader import load_items
for source_name in args.source:
path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(path):
logger.warning("'{}' is not an extant source".format(source_name))
count = 0
items, _ = load_items(source_name)
for item in items.values():
if args.tag and args.tag not in item['tags']:
continue
if args.title and args.title not in item['title']:
continue
if item['active']:
item['active'] = False
count += 1
logger.info("Deactivated {} items in '{}'".format(count, source_name))
# Deactivate all items in each source.
from inquisitor.loader import load_items
return 0
for source_name in args.source:
path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(path):
logger.warning("'{}' is not an extant source".format(source_name))
count = 0
items, _ = load_items(source_name)
for item in items.values():
if args.tag and args.tag not in item["tags"]:
continue
if args.title and args.title not in item["title"]:
continue
if item["active"]:
item["active"] = False
count += 1
logger.info("Deactivated {} items in '{}'".format(count, source_name))
return 0
def command_add(args):
"""Creates an item."""
parser = argparse.ArgumentParser(
prog="inquisitor add",
description=command_add.__doc__,
add_help=False)
parser.add_argument("--id", help="String")
parser.add_argument("--source", help="String")
parser.add_argument("--title", help="String")
parser.add_argument("--link", help="URL")
parser.add_argument("--time", type=int, help="Unix timestmap")
parser.add_argument("--author", help="String")
parser.add_argument("--body", help="HTML")
parser.add_argument("--tags", help="Comma-separated list")
parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds")
parser.add_argument("--ttd", type=int, help="Cleanup force in seconds")
parser.add_argument("--tts", type=int, help="Display delay in seconds")
parser.add_argument("--create", action="store_true", help="Create source if it doesn't exist")
args = parser.parse_args(args)
"""Creates an item."""
parser = argparse.ArgumentParser(
prog="inquisitor add", description=command_add.__doc__, add_help=False
)
parser.add_argument("--id", help="String")
parser.add_argument("--source", help="String")
parser.add_argument("--title", help="String")
parser.add_argument("--link", help="URL")
parser.add_argument("--time", type=int, help="Unix timestmap")
parser.add_argument("--author", help="String")
parser.add_argument("--body", help="HTML")
parser.add_argument("--tags", help="Comma-separated list")
parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds")
parser.add_argument("--ttd", type=int, help="Cleanup force in seconds")
parser.add_argument("--tts", type=int, help="Display delay in seconds")
parser.add_argument(
"--create", action="store_true", help="Create source if it doesn't exist"
)
args = parser.parse_args(args)
if not args.title:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
return -1
if not args.title:
parser.print_help()
return 0
if not os.path.isdir(DUNGEON_PATH):
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
source = args.source or 'inquisitor'
cell_path = os.path.join(DUNGEON_PATH, source)
if args.create:
from inquisitor.sources import ensure_cell
ensure_cell(source)
elif not os.path.isdir(cell_path):
logger.error("Source '{}' does not exist".format(source))
return -1
source = args.source or "inquisitor"
cell_path = os.path.join(DUNGEON_PATH, source)
if args.create:
from inquisitor.sources import ensure_cell
item = {
'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)),
'source': source,
}
if args.title: item['title'] = str(args.title)
if args.link: item['link'] = str(args.link)
if args.time: item['time'] = int(args.time)
if args.author: item['author'] = str(args.author)
if args.body: item['body'] = str(args.body)
if args.tags: item['tags'] = [str(tag) for tag in args.tags.split(",")]
if args.ttl: item['ttl'] = int(args.ttl)
if args.ttd: item['ttd'] = int(args.ttd)
if args.tts: item['tts'] = int(args.tts)
ensure_cell(source)
elif not os.path.isdir(cell_path):
logger.error("Source '{}' does not exist".format(source))
return -1
from inquisitor.loader import new_item
saved_item = new_item(source, item)
logger.info(saved_item)
item = {
"id": args.id or "{:x}".format(random.getrandbits(16 * 4)),
"source": source,
}
if args.title:
item["title"] = str(args.title)
if args.link:
item["link"] = str(args.link)
if args.time:
item["time"] = int(args.time)
if args.author:
item["author"] = str(args.author)
if args.body:
item["body"] = str(args.body)
if args.tags:
item["tags"] = [str(tag) for tag in args.tags.split(",")]
if args.ttl:
item["ttl"] = int(args.ttl)
if args.ttd:
item["ttd"] = int(args.ttd)
if args.tts:
item["tts"] = int(args.tts)
from inquisitor.loader import new_item
saved_item = new_item(source, item)
logger.info(saved_item)
def command_feed(args):
"""Print the current feed."""
if not os.path.isdir(DUNGEON_PATH):
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
return -1
"""Print the current feed."""
if not os.path.isdir(DUNGEON_PATH):
logger.error(
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
)
return -1
import shutil
from inquisitor import loader
from inquisitor import timestamp
import shutil
from inquisitor import loader
from inquisitor import timestamp
items, errors = loader.load_active_items(source_names=None)
if not items and not errors:
print("Feed is empty")
return 0
items, errors = loader.load_active_items(source_names=None)
if not items and not errors:
print("Feed is empty")
return 0
if errors:
items.insert(0, {
'title': '{} read errors: {}'.format(len(errors), ' '.join(errors)),
'body': "\n".join(errors)
})
if errors:
items.insert(
0,
{
"title": "{} read errors: {}".format(len(errors), " ".join(errors)),
"body": "\n".join(errors),
},
)
size = shutil.get_terminal_size((80, 20))
width = min(80, size.columns)
size = shutil.get_terminal_size((80, 20))
width = min(80, size.columns)
for item in items:
title = item['title'] if 'title' in item else ""
titles = [title]
while len(titles[-1]) > width - 4:
i = titles[-1][:width - 4].rfind(' ')
titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()]
print('+' + (width - 2) * '-' + '+')
for title in titles:
print("| {0:<{1}} |".format(title, width - 4))
print("|{0:<{1}}|".format("", width - 2))
info1 = ""
if 'author' in title and item['author']:
info1 += item['author'] + " "
if 'time' in item and item['time']:
info1 += timestamp.stamp_to_readable(item['time'])
print("| {0:<{1}} |".format(info1, width - 4))
created = timestamp.stamp_to_readable(item['created']) if 'created' in item else ""
info2 = "{0} {1} {2}".format(
item.get('source', ''), item.get('id', ''), created)
print("| {0:<{1}} |".format(info2, width - 4))
print('+' + (width - 2) * '-' + '+')
print()
for item in items:
title = item["title"] if "title" in item else ""
titles = [title]
while len(titles[-1]) > width - 4:
i = titles[-1][: width - 4].rfind(" ")
titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()]
print("+" + (width - 2) * "-" + "+")
for title in titles:
print("| {0:<{1}} |".format(title, width - 4))
print("|{0:<{1}}|".format("", width - 2))
info1 = ""
if "author" in title and item["author"]:
info1 += item["author"] + " "
if "time" in item and item["time"]:
info1 += timestamp.stamp_to_readable(item["time"])
print("| {0:<{1}} |".format(info1, width - 4))
created = (
timestamp.stamp_to_readable(item["created"]) if "created" in item else ""
)
info2 = "{0} {1} {2}".format(
item.get("source", ""), item.get("id", ""), created
)
print("| {0:<{1}} |".format(info2, width - 4))
print("+" + (width - 2) * "-" + "+")
print()
def command_run(args):
"""Run the default Flask server."""
parser = argparse.ArgumentParser(
prog="inquisitor run",
description=command_run.__doc__,
add_help=False)
parser.add_argument("--debug", action="store_true")
parser.add_argument("--port", type=int, default=5000)
args = parser.parse_args(args)
"""Run the default Flask server."""
parser = argparse.ArgumentParser(
prog="inquisitor run", description=command_run.__doc__, add_help=False
)
parser.add_argument("--debug", action="store_true")
parser.add_argument("--port", type=int, default=5000)
args = parser.parse_args(args)
try:
from inquisitor.app import app
app.run(port=args.port, debug=args.debug)
return 0
except Exception as e:
logger.error(e)
return -1
try:
from inquisitor.app import app
app.run(port=args.port, debug=args.debug)
return 0
except Exception as e:
logger.error(e)
return -1
def command_help(args):
"""Print this help message and exit."""
print_usage()
return 0
"""Print this help message and exit."""
print_usage()
return 0
def nocommand(args):
print("command required")
return 0
def main():
"""CLI entry point"""
# Enable piping
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)
"""CLI entry point"""
# Enable piping
from signal import signal, SIGPIPE, SIG_DFL
# Collect the commands from this module
import inquisitor.cli
commands = {
name[8:] : func
for name, func in vars(inquisitor.cli).items()
if name.startswith('command_')
}
descriptions = "\n".join([
"- {0}: {1}".format(name, func.__doc__)
for name, func in commands.items()])
signal(SIGPIPE, SIG_DFL)
# Set up the parser
parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False)
parser.add_argument("command",
nargs="?",
default="help",
help="The command to execute",
choices=commands,
metavar="command")
parser.add_argument("args",
nargs=argparse.REMAINDER,
help="Command arguments",
metavar="args")
parser.add_argument("-v",
action="store_true",
dest="verbose",
help="Enable debug logging")
# Collect the commands from this module
import inquisitor.cli
# Extract the usage print for command_help
global print_usage
print_usage = parser.print_help
commands = {
name[8:]: func
for name, func in vars(inquisitor.cli).items()
if name.startswith("command_")
}
descriptions = "\n".join(
["- {0}: {1}".format(name, func.__doc__) for name, func in commands.items()]
)
args = parser.parse_args()
# Set up the parser
parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False,
)
parser.add_argument(
"command",
nargs="?",
default="help",
help="The command to execute",
choices=commands,
metavar="command",
)
parser.add_argument(
"args", nargs=argparse.REMAINDER, help="Command arguments", metavar="args"
)
parser.add_argument(
"-v", action="store_true", dest="verbose", help="Enable debug logging"
)
# Initialize a console logger
add_logging_handler(verbose=args.verbose, log_filename=None)
# Extract the usage print for command_help
global print_usage
print_usage = parser.print_help
# Execute command
if args.command:
sys.exit(commands[args.command](args.args))
else:
print("command required")
sys.exit(0)
args = parser.parse_args()
# Initialize a console logger
add_logging_handler(verbose=args.verbose, log_filename=None)
# Execute command
try:
command = commands.get(args.command, nocommand)
sys.exit(command(args.args))
except BrokenPipeError:
# See https://docs.python.org/3.10/library/signal.html#note-on-sigpipe
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
sys.exit(1)

View File

@ -1,10 +1,5 @@
from .resolver import data_path as DUNGEON_PATH
from .resolver import source_path as SOURCES_PATH
from .resolver import cache_path as CACHE_PATH
from .resolver import (
logger,
subfeeds)
from .resolver import (
add_logging_handler,
init_default_logging,
get_subfeed_overrides)
from .resolver import logger, subfeeds
from .resolver import add_logging_handler, init_default_logging, get_subfeed_overrides

View File

@ -4,174 +4,172 @@ import logging
# Constants governing config resolution:
# Path to the config file, containing key-value pairs of the other settings
CONFIG_ENVVAR = 'INQUISITOR_CONFIG'
DEFAULT_CONFIG_PATH = '/etc/inquisitor.conf'
CONFIG_ENVVAR = "INQUISITOR_CONFIG"
DEFAULT_CONFIG_PATH = "/etc/inquisitor.conf"
# Path to the folder where items are stored
CONFIG_DATA = 'DataPath'
DEFAULT_DATA_PATH = '/var/inquisitor/data/'
CONFIG_DATA = "DataPath"
DEFAULT_DATA_PATH = "/var/inquisitor/data/"
# Path to the folder where source modules are stored
CONFIG_SOURCES = 'SourcePath'
DEFAULT_SOURCES_PATH = '/var/inquisitor/sources/'
CONFIG_SOURCES = "SourcePath"
DEFAULT_SOURCES_PATH = "/var/inquisitor/sources/"
# Path to the folder where cached files are stored
CONFIG_CACHE = 'CachePath'
DEFAULT_CACHE_PATH = '/var/inquisitor/cache/'
CONFIG_CACHE = "CachePath"
DEFAULT_CACHE_PATH = "/var/inquisitor/cache/"
# Path to a log file where logging will be redirected
CONFIG_LOGFILE = 'LogFile'
CONFIG_LOGFILE = "LogFile"
DEFAULT_LOG_FILE = None
# Whether logging is verbose
CONFIG_VERBOSE = 'Verbose'
DEFAULT_VERBOSITY = 'false'
CONFIG_VERBOSE = "Verbose"
DEFAULT_VERBOSITY = "false"
# Subfeed source lists, with each subfeed config separated by lines and
# sources within a subfeed separated by spaces
CONFIG_SUBFEEDS = 'Subfeeds'
CONFIG_SUBFEEDS = "Subfeeds"
DEFAULT_SUBFEEDS = None
SUBFEED_CONFIG_FILE = 'subfeeds.conf'
SUBFEED_CONFIG_FILE = "subfeeds.conf"
def read_config_file(config_path):
"""
Reads a config file of key-value pairs, where non-blank lines are
either comments beginning with the character '#' or keys and values
separated by the character '='.
"""
# Parse the config file into key-value pairs
if not os.path.isfile(config_path):
"""
Reads a config file of key-value pairs, where non-blank lines are
either comments beginning with the character '#' or keys and values
separated by the character '='.
"""
# Parse the config file into key-value pairs
if not os.path.isfile(config_path):
raise FileNotFoundError(f'No config file found at {config_path}, try setting {CONFIG_ENVVAR}')
accumulated_configs = {}
current_key = None
with open(config_path, 'r', encoding='utf8') as cfg:
line_no = 0
for line in cfg:
line_no += 1
# Skip blank lines and comments
if not line.strip() or line.lstrip().startswith('#'):
continue
# Accumulate config keyvalue pairs
if '=' in line:
# "key = value" begins a new keyvalue pair
current_key, value = line.split('=', maxsplit=1)
current_key = current_key.strip()
accumulated_configs[current_key] = value.strip()
else:
# If there's no '=' and no previous key, throw
if not current_key:
raise ValueError(f'Invalid config format on line {line_no}')
else:
accumulated_configs[current_key] += '\n' + line.strip()
raise FileNotFoundError(
f"No config file found at {config_path}, try setting {CONFIG_ENVVAR}"
)
accumulated_configs = {}
current_key = None
with open(config_path, "r", encoding="utf8") as cfg:
line_no = 0
for line in cfg:
line_no += 1
# Skip blank lines and comments
if not line.strip() or line.lstrip().startswith("#"):
continue
# Accumulate config keyvalue pairs
if "=" in line:
# "key = value" begins a new keyvalue pair
current_key, value = line.split("=", maxsplit=1)
current_key = current_key.strip()
accumulated_configs[current_key] = value.strip()
else:
# If there's no '=' and no previous key, throw
if not current_key:
raise ValueError(f"Invalid config format on line {line_no}")
else:
accumulated_configs[current_key] += "\n" + line.strip()
return accumulated_configs
return accumulated_configs
def parse_subfeed_value(value):
sf_defs = [sf.strip() for sf in value.split('\n') if sf.strip()]
subfeeds = {}
for sf_def in sf_defs:
if ':' not in sf_def:
raise ValueError(f'Invalid subfeed definition: {sf_def}')
sf_name, sf_sources = sf_def.split(':', maxsplit=1)
sf_sources = sf_sources.split()
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
return subfeeds
sf_defs = [sf.strip() for sf in value.split("\n") if sf.strip()]
subfeeds = {}
for sf_def in sf_defs:
if ":" not in sf_def:
raise ValueError(f"Invalid subfeed definition: {sf_def}")
sf_name, sf_sources = sf_def.split(":", maxsplit=1)
sf_sources = sf_sources.split()
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
return subfeeds
# Read envvar for config file location, with fallback to default
config_path = os.path.abspath(
os.environ.get(CONFIG_ENVVAR) or
DEFAULT_CONFIG_PATH
)
config_path = os.path.abspath(os.environ.get(CONFIG_ENVVAR) or DEFAULT_CONFIG_PATH)
configs = read_config_file(config_path)
# Extract and validate config values
data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH
if not os.path.isabs(data_path):
raise ValueError(f'Non-absolute data path: {data_path}')
raise ValueError(f"Non-absolute data path: {data_path}")
if not os.path.isdir(data_path):
raise FileNotFoundError(f'Cannot find directory {data_path}')
raise FileNotFoundError(f"Cannot find directory {data_path}")
source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH
if not os.path.isabs(source_path):
raise ValueError(f'Non-absolute source path: {source_path}')
raise ValueError(f"Non-absolute source path: {source_path}")
if not os.path.isdir(source_path):
raise FileNotFoundError(f'Cannot find directory {source_path}')
raise FileNotFoundError(f"Cannot find directory {source_path}")
cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH
if not os.path.isabs(cache_path):
raise ValueError(f'Non-absolute cache path: {cache_path}')
raise ValueError(f"Non-absolute cache path: {cache_path}")
if not os.path.isdir(cache_path):
raise FileNotFoundError(f'Cannot find directory {cache_path}')
raise FileNotFoundError(f"Cannot find directory {cache_path}")
log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE
if log_file and not os.path.isabs(log_file):
raise ValueError(f'Non-absolute log file path: {log_file}')
raise ValueError(f"Non-absolute log file path: {log_file}")
is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY
if is_verbose != 'true' and is_verbose != 'false':
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
is_verbose = (is_verbose == 'true')
if is_verbose != "true" and is_verbose != "false":
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
is_verbose = is_verbose == "true"
subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS
if subfeeds:
subfeeds = parse_subfeed_value(subfeeds)
subfeeds = parse_subfeed_value(subfeeds)
def get_subfeed_overrides():
"""
Check for and parse the secondary subfeed configuration file
"""
path = os.path.join(source_path, SUBFEED_CONFIG_FILE)
if not os.path.isfile(path):
return None
overrides = read_config_file(path)
if CONFIG_SUBFEEDS not in overrides:
return None
value = overrides[CONFIG_SUBFEEDS]
if not value:
return None
parsed_value = parse_subfeed_value(value)
return parsed_value
"""
Check for and parse the secondary subfeed configuration file
"""
path = os.path.join(source_path, SUBFEED_CONFIG_FILE)
if not os.path.isfile(path):
return None
overrides = read_config_file(path)
if CONFIG_SUBFEEDS not in overrides:
return None
value = overrides[CONFIG_SUBFEEDS]
if not value:
return None
parsed_value = parse_subfeed_value(value)
return parsed_value
# Set up logging
logger = logging.getLogger("inquisitor")
logger.setLevel(logging.DEBUG)
def add_logging_handler(verbose, log_filename):
"""
Adds a logging handler according to the given settings
"""
log_format = (
'[{asctime}] [{levelname}:{filename}:{lineno}] {message}'
if verbose else
'[{levelname}] {message}'
)
formatter = logging.Formatter(log_format, style='{')
"""
Adds a logging handler according to the given settings
"""
log_format = (
"[{asctime}] [{levelname}:{filename}:{lineno}] {message}"
if verbose
else "[{levelname}] {message}"
)
formatter = logging.Formatter(log_format, style="{")
log_level = (
logging.DEBUG
if verbose else
logging.INFO
)
handler = (
logging.handlers.RotatingFileHandler(
log_filename,
encoding='utf8',
maxBytes=2**22, # 4 MB per log file
backupCount=4) # 16 MB total
if log_filename else
logging.StreamHandler()
)
handler.setFormatter(formatter)
handler.setLevel(log_level)
log_level = logging.DEBUG if verbose else logging.INFO
handler = (
logging.handlers.RotatingFileHandler(
log_filename,
encoding="utf8",
maxBytes=2**22, # 4 MB per log file
backupCount=4,
) # 16 MB total
if log_filename
else logging.StreamHandler()
)
handler.setFormatter(formatter)
handler.setLevel(log_level)
logger.addHandler(handler)
logger.addHandler(handler)
def init_default_logging():
add_logging_handler(is_verbose, log_file)
add_logging_handler(is_verbose, log_file)

View File

@ -8,19 +8,20 @@ from inquisitor.configs import DUNGEON_PATH, logger
logger = logging.getLogger("inquisitor")
def as_item(title, body=None):
iid = '{:x}'.format(random.getrandbits(16 * 4))
item = {
'id': iid,
'source': 'inquisitor',
'title': title,
'active': True,
'created': timestamp.now(),
'tags': ['inquisitor', 'error'],
}
if body is not None:
item['body'] = '<pre>{}</pre>'.format(body)
path = os.path.join(DUNGEON_PATH, 'inquisitor', iid + ".item")
logger.error(json.dumps(item))
with open(path, 'w') as f:
f.write(json.dumps(item, indent=2))
iid = "{:x}".format(random.getrandbits(16 * 4))
item = {
"id": iid,
"source": "inquisitor",
"title": title,
"active": True,
"created": timestamp.now(),
"tags": ["inquisitor", "error"],
}
if body is not None:
item["body"] = "<pre>{}</pre>".format(body)
path = os.path.join(DUNGEON_PATH, "inquisitor", iid + ".item")
logger.error(json.dumps(item))
with open(path, "w") as f:
f.write(json.dumps(item, indent=2))

View File

@ -7,180 +7,180 @@ from inquisitor import error
from inquisitor import timestamp
class WritethroughDict():
"""A wrapper for a dictionary saved to the file system."""
class WritethroughDict:
"""A wrapper for a dictionary saved to the file system."""
@staticmethod
def create(path, item):
"""
Creates a writethrough dictionary from a dictionary in memory and
initializes a file to save it.
"""
if os.path.isfile(path):
raise FileExistsError(path)
wd = WritethroughDict(path, item)
wd.flush()
return wd
@staticmethod
def create(path, item):
"""
Creates a writethrough dictionary from a dictionary in memory and
initializes a file to save it.
"""
if os.path.isfile(path):
raise FileExistsError(path)
wd = WritethroughDict(path, item)
wd.flush()
return wd
@staticmethod
def load(path):
"""
Creates a writethrough dictionary from an existing file in the
file system.
"""
if not os.path.isfile(path):
raise FileNotFoundError(path)
with open(path) as f:
item = json.load(f)
return WritethroughDict(path, item)
@staticmethod
def load(path):
"""
Creates a writethrough dictionary from an existing file in the
file system.
"""
if not os.path.isfile(path):
raise FileNotFoundError(path)
with open(path) as f:
item = json.load(f)
return WritethroughDict(path, item)
def __init__(self, path, item):
self.path = path
self.item = item
def __init__(self, path, item):
self.path = path
self.item = item
def __getitem__(self, key):
return self.item[key]
def __getitem__(self, key):
return self.item[key]
def get(self, *args, **kwargs):
return self.item.get(*args, **kwargs)
def get(self, *args, **kwargs):
return self.item.get(*args, **kwargs)
def __setitem__(self, key, value):
self.item[key] = value
self.flush()
def __setitem__(self, key, value):
self.item[key] = value
self.flush()
def __contains__(self, key):
return key in self.item
def __contains__(self, key):
return key in self.item
def __repr__(self):
return repr(self.item)
def __repr__(self):
return repr(self.item)
def __str__(self):
return str(self.item)
def __str__(self):
return str(self.item)
def flush(self):
s = json.dumps(self.item, indent=2)
with open(self.path, 'w', encoding="utf8") as f:
f.write(s)
def flush(self):
s = json.dumps(self.item, indent=2)
with open(self.path, "w", encoding="utf8") as f:
f.write(s)
def load_state(source_name):
"""Loads the state dictionary for a source."""
state_path = os.path.join(DUNGEON_PATH, source_name, "state")
return WritethroughDict.load(state_path)
"""Loads the state dictionary for a source."""
state_path = os.path.join(DUNGEON_PATH, source_name, "state")
return WritethroughDict.load(state_path)
def load_item(source_name, item_id):
"""Loads an item from a source."""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
return WritethroughDict.load(item_path)
"""Loads an item from a source."""
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return WritethroughDict.load(item_path)
def item_exists(source_name, item_id):
"""
Checks for the existence of an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
return os.path.isfile(item_path)
"""
Checks for the existence of an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
return os.path.isfile(item_path)
def get_item_ids(cell_name):
"""
Returns a list of item ids in the given cell.
"""
cell_path = os.path.join(DUNGEON_PATH, cell_name)
return [
filename[:-5]
for filename in os.listdir(cell_path)
if filename.endswith('.item')
]
"""
Returns a list of item ids in the given cell.
"""
cell_path = os.path.join(DUNGEON_PATH, cell_name)
return [
filename[:-5]
for filename in os.listdir(cell_path)
if filename.endswith(".item")
]
def new_item(source_name, item):
"""
Creates a new item with the fields in the provided dictionary.
Initializes other fields to their default values.
"""
# id is required
if 'id' not in item:
raise Exception(f'Cannot create item with no id. Value = {item}')
"""
Creates a new item with the fields in the provided dictionary.
Initializes other fields to their default values.
"""
# id is required
if "id" not in item:
raise Exception(f"Cannot create item with no id. Value = {item}")
# source must be filled in, so if it is absent it is auto-populated with
# source_name. Note: this allows sources to fill in a different source.
if 'source' not in item:
item['source'] = source_name
# source must be filled in, so if it is absent it is auto-populated with
# source_name. Note: this allows sources to fill in a different source.
if "source" not in item:
item["source"] = source_name
# active is forced to True for new items
item['active'] = True
# active is forced to True for new items
item["active"] = True
# created is forced to the current timestamp
item['created'] = timestamp.now()
# created is forced to the current timestamp
item["created"] = timestamp.now()
# title is auto-populated with the id if missing
if 'title' not in item:
item['title'] = item['id']
# title is auto-populated with the id if missing
if "title" not in item:
item["title"] = item["id"]
# tags is auto-populated if missing (not if empty!)
if 'tags' not in item:
item['tags'] = [source_name]
# tags is auto-populated if missing (not if empty!)
if "tags" not in item:
item["tags"] = [source_name]
# All other fields are optional.
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item')
return WritethroughDict.create(item_path, item)
# All other fields are optional.
item_path = os.path.join(DUNGEON_PATH, item["source"], f'{item["id"]}.item')
return WritethroughDict.create(item_path, item)
def delete_item(source_name, item_id):
"""
Delete an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
os.remove(item_path)
"""
Delete an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
os.remove(item_path)
def load_items(source_name):
"""
Returns a map of ids to items and a list of unreadable files.
"""
cell_path = os.path.join(DUNGEON_PATH, source_name)
items = {}
errors = []
for filename in os.listdir(cell_path):
if filename.endswith('.item'):
try:
item = load_item(source_name, filename[:-5])
items[item['id']] = item
except Exception:
errors.append(filename)
return items, errors
"""
Returns a map of ids to items and a list of unreadable files.
"""
cell_path = os.path.join(DUNGEON_PATH, source_name)
items = {}
errors = []
for filename in os.listdir(cell_path):
if filename.endswith(".item"):
try:
item = load_item(source_name, filename[:-5])
items[item["id"]] = item
except Exception:
errors.append(filename)
return items, errors
def load_active_items(source_names):
"""
Returns a list of active items and a list of unreadable items. If
`source_names` is defined, load only from sources in that list.
"""
items = []
errors = []
now = timestamp.now()
check_list = source_names or os.listdir(DUNGEON_PATH)
for source_name in check_list:
source_path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(source_path):
logger.warning(f'Skipping nonexistent source {source_name}')
continue
for filename in os.listdir(source_path):
if not filename.endswith('.item'):
continue
try:
item = load_item(source_name, filename[:-5])
# The time-to-show field hides items until an expiry date.
if 'tts' in item:
tts_date = item['created'] + item['tts']
if now < tts_date:
continue
# Don't show inactive items
if not item['active']:
continue
items.append(item)
except Exception:
errors.append(filename)
return items, errors
"""
Returns a list of active items and a list of unreadable items. If
`source_names` is defined, load only from sources in that list.
"""
items = []
errors = []
now = timestamp.now()
check_list = source_names or os.listdir(DUNGEON_PATH)
for source_name in check_list:
source_path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(source_path):
logger.warning(f"Skipping nonexistent source {source_name}")
continue
for filename in os.listdir(source_path):
if not filename.endswith(".item"):
continue
try:
item = load_item(source_name, filename[:-5])
# The time-to-show field hides items until an expiry date.
if "tts" in item:
tts_date = item["created"] + item["tts"]
if now < tts_date:
continue
# Don't show inactive items
if not item["active"]:
continue
items.append(item)
except Exception:
errors.append(filename)
return items, errors

View File

@ -10,229 +10,234 @@ from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
USE_NEWEST = (
'title',
'tags',
'link',
'time'
'author',
'body',
'ttl',
'ttd',
'tts',
"title",
"tags",
"link",
"time" "author",
"body",
"ttl",
"ttd",
"tts",
)
class InquisitorStubSource:
"""A dummy source-like object for clearing out ad-hoc inquisitor items"""
def fetch_new(self, state):
return []
"""A dummy source-like object for clearing out ad-hoc inquisitor items"""
def fetch_new(self, state):
return []
def ensure_cell(name):
"""
Creates a cell in the dungeon. Idempotent.
"""
cell_path = os.path.join(DUNGEON_PATH, name)
if not os.path.isdir(cell_path):
logger.info(f'Creating cell for source "{name}"')
os.mkdir(cell_path)
state_path = os.path.join(cell_path, 'state')
if not os.path.isfile(state_path):
with open(state_path, 'w', encoding='utf8') as state:
json.dump({}, state)
"""
Creates a cell in the dungeon. Idempotent.
"""
cell_path = os.path.join(DUNGEON_PATH, name)
if not os.path.isdir(cell_path):
logger.info(f'Creating cell for source "{name}"')
os.mkdir(cell_path)
state_path = os.path.join(cell_path, "state")
if not os.path.isfile(state_path):
with open(state_path, "w", encoding="utf8") as state:
json.dump({}, state)
def update_sources(*source_names):
"""
Attempts to update each given source.
"""
for source_name in source_names:
# Import the source
try:
source_module = load_source(source_name)
except Exception:
error.as_item(
f'Error importing source "{source_name}"',
traceback.format_exc())
continue
"""
Attempts to update each given source.
"""
for source_name in source_names:
# Import the source
try:
source_module = load_source(source_name)
except Exception:
error.as_item(
f'Error importing source "{source_name}"', traceback.format_exc()
)
continue
# If it doesn't have a cell yet, create one
try:
ensure_cell(source_name)
except Exception:
error.as_item(
f'Error initializing source "{source_name}"',
traceback.format_exc())
continue
# If it doesn't have a cell yet, create one
try:
ensure_cell(source_name)
except Exception:
error.as_item(
f'Error initializing source "{source_name}"', traceback.format_exc()
)
continue
# Update the source
try:
logger.info(f'Updating source "{source_name}"')
update_source(source_name, source_module)
except Exception:
error.as_item(
f'Error updating source "{source_name}"',
traceback.format_exc())
# Update the source
try:
logger.info(f'Updating source "{source_name}"')
update_source(source_name, source_module)
except Exception:
error.as_item(
f'Error updating source "{source_name}"', traceback.format_exc()
)
def load_source(source_name):
"""
Attempts to load the source module with the given name.
Raises an exception on failure.
"""
if source_name == 'inquisitor':
return InquisitorStubSource()
"""
Attempts to load the source module with the given name.
Raises an exception on failure.
"""
if source_name == "inquisitor":
return InquisitorStubSource()
cwd = os.getcwd()
try:
# Push the sources directory.
os.chdir(SOURCES_PATH)
# Make the sources directory importable while working with sources.
if SOURCES_PATH not in sys.path:
sys.path.insert(0, SOURCES_PATH)
cwd = os.getcwd()
try:
# Push the sources directory.
os.chdir(SOURCES_PATH)
# Make the sources directory importable while working with sources.
if SOURCES_PATH not in sys.path:
sys.path.insert(0, SOURCES_PATH)
# Check if the named source is present.
source_file_name = source_name + '.py'
if not os.path.isfile(source_file_name):
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
# Check if the named source is present.
source_file_name = source_name + ".py"
if not os.path.isfile(source_file_name):
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
# Import the source module by file path.
logger.debug(f'Loading module "{source_file_name}"')
spec = importlib.util.spec_from_file_location(source_name, source_file_name)
itemsource = importlib.util.module_from_spec(spec)
spec.loader.exec_module(itemsource)
itemsource = importlib.import_module(source_name)
# Import the source module by file path.
logger.debug(f'Loading module "{source_file_name}"')
spec = importlib.util.spec_from_file_location(source_name, source_file_name)
itemsource = importlib.util.module_from_spec(spec)
spec.loader.exec_module(itemsource)
itemsource = importlib.import_module(source_name)
# Require fetch_new().
if not hasattr(itemsource, 'fetch_new'):
raise ImportError(f'Missing fetch_new in "{source_file_name}"')
# Require fetch_new().
if not hasattr(itemsource, "fetch_new"):
raise ImportError(f'Missing fetch_new in "{source_file_name}"')
return itemsource
return itemsource
finally:
os.chdir(cwd)
if SOURCES_PATH in sys.path:
sys.path.remove(SOURCES_PATH)
finally:
os.chdir(cwd)
if SOURCES_PATH in sys.path:
sys.path.remove(SOURCES_PATH)
def update_source(source_name, source):
"""
Attempts to update the given source. Raises an exception if the source does.
"""
# Get a list of item ids that already existed in this source's cell.
prior_ids = loader.get_item_ids(source_name)
logger.debug(f'Found {len(prior_ids)} prior items')
"""
Attempts to update the given source. Raises an exception if the source does.
"""
# Get a list of item ids that already existed in this source's cell.
prior_ids = loader.get_item_ids(source_name)
logger.debug(f"Found {len(prior_ids)} prior items")
# Get the feed items from the source's fetch method.
state = loader.load_state(source_name)
fetched = source.fetch_new(state)
state.flush()
logger.debug(f'Fetched {len(fetched)} items')
fetched_items = {item['id']: item for item in fetched}
# Get the feed items from the source's fetch method.
state = loader.load_state(source_name)
fetched = source.fetch_new(state)
state.flush()
logger.debug(f"Fetched {len(fetched)} items")
fetched_items = {item["id"]: item for item in fetched}
# Determine which items are new and which are updates.
# We query the file system here instead of checking against this source's
# item ids from above because sources are allowed to generate in other
# sources' cells.
new_items = []
updated_items = []
for item in fetched:
item_source = item.get('source', source_name)
if loader.item_exists(item_source, item['id']):
updated_items.append(item)
else:
new_items.append(item)
# Determine which items are new and which are updates.
# We query the file system here instead of checking against this source's
# item ids from above because sources are allowed to generate in other
# sources' cells.
new_items = []
updated_items = []
for item in fetched:
item_source = item.get("source", source_name)
if loader.item_exists(item_source, item["id"]):
updated_items.append(item)
else:
new_items.append(item)
# Write all the new items to the source's cell.
has_create_handler = hasattr(source, 'on_create')
for item in new_items:
item_source = item.get('source', source_name)
created_item = loader.new_item(item_source, item)
if has_create_handler:
# Because some sources do not return items more than once,
# exceptions in the on-create handler must be squashed.
try:
source.on_create(state, created_item)
except:
error.as_item(
f'Exception in {source_name}.on_create',
traceback.format_exc())
# Write all the new items to the source's cell.
has_create_handler = hasattr(source, "on_create")
for item in new_items:
item_source = item.get("source", source_name)
created_item = loader.new_item(item_source, item)
if has_create_handler:
# Because some sources do not return items more than once,
# exceptions in the on-create handler must be squashed.
try:
source.on_create(state, created_item)
except:
error.as_item(
f"Exception in {source_name}.on_create", traceback.format_exc()
)
# Update the other items using the fetched items' values.
for new_item in updated_items:
old_item = loader.load_item(new_item['source'], new_item['id'])
for field in USE_NEWEST:
if field in new_item and old_item[field] != new_item[field]:
old_item[field] = new_item[field]
if 'callback' in new_item:
old_callback = old_item.get('callback', {})
# Because of the way this update happens, any fields that are set
# in the callback when the item is new will keep their original
# values, as those values reappear in new_item on subsequent
# updates.
old_item['callback'] = {**old_item['callback'], **new_item['callback']}
# Update the other items using the fetched items' values.
for new_item in updated_items:
old_item = loader.load_item(new_item["source"], new_item["id"])
for field in USE_NEWEST:
if field in new_item and old_item[field] != new_item[field]:
old_item[field] = new_item[field]
if "callback" in new_item:
old_callback = old_item.get("callback", {})
# Because of the way this update happens, any fields that are set
# in the callback when the item is new will keep their original
# values, as those values reappear in new_item on subsequent
# updates.
old_item["callback"] = {**old_item["callback"], **new_item["callback"]}
# In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0
now = timestamp.now()
has_delete_handler = hasattr(source, 'on_delete')
fetched_ids = [item['id'] for item in updated_items]
old_item_ids = [
item_id for item_id in prior_ids
if item_id not in fetched_ids]
for item_id in old_item_ids:
item = loader.load_item(source_name, item_id)
remove = not item['active']
# The time-to-live field protects an item from removal until expiry.
# This is mainly used to avoid old items resurfacing when their source
# cannot guarantee monotonicity.
if 'ttl' in item:
ttl_date = item['created'] + item['ttl']
if ttl_date > now:
continue
# The time-to-die field can force an active item to be removed.
if 'ttd' in item:
ttd_date = item['created'] + item['ttd']
if ttd_date < now:
remove = True
# Items to be removed are deleted
if remove:
try:
if has_delete_handler:
# Run the delete handler so exceptions prevent deletions
source.on_delete(state, item)
loader.delete_item(source_name, item['id'])
del_count += 1
except:
error.as_item(
f'Failed to delete {source_name}/{item["id"]}',
traceback.format_exc())
# In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0
now = timestamp.now()
has_delete_handler = hasattr(source, "on_delete")
fetched_ids = [item["id"] for item in updated_items]
old_item_ids = [item_id for item_id in prior_ids if item_id not in fetched_ids]
for item_id in old_item_ids:
item = loader.load_item(source_name, item_id)
remove = not item["active"]
# The time-to-live field protects an item from removal until expiry.
# This is mainly used to avoid old items resurfacing when their source
# cannot guarantee monotonicity.
if "ttl" in item:
ttl_date = item["created"] + item["ttl"]
if ttl_date > now:
continue
# The time-to-die field can force an active item to be removed.
if "ttd" in item:
ttd_date = item["created"] + item["ttd"]
if ttd_date < now:
remove = True
# Items to be removed are deleted
if remove:
try:
if has_delete_handler:
# Run the delete handler so exceptions prevent deletions
source.on_delete(state, item)
loader.delete_item(source_name, item["id"])
del_count += 1
except:
error.as_item(
f'Failed to delete {source_name}/{item["id"]}',
traceback.format_exc(),
)
# Note update timestamp in state
state['last_updated'] = timestamp.now()
# Note update timestamp in state
state["last_updated"] = timestamp.now()
# Log counts
logger.info("{} new item{}, {} deleted item{}".format(
len(new_items), "s" if len(new_items) != 1 else "",
del_count, "s" if del_count != 1 else ""))
# Log counts
logger.info(
"{} new item{}, {} deleted item{}".format(
len(new_items),
"s" if len(new_items) != 1 else "",
del_count,
"s" if del_count != 1 else "",
)
)
def item_callback(source_name, itemid):
try:
# Load the module with the callback function
source_module = load_source(source_name)
if not hasattr(source_module, 'callback'):
raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item
state = loader.load_state(source_name)
item = loader.load_item(source_name, itemid)
# Execute callback
source_module.callback(state, item)
# Save any changes
item.flush()
state.flush()
except Exception:
error.as_item(
f"Error executing callback for {source_name}/{itemid}",
traceback.format_exc())
try:
# Load the module with the callback function
source_module = load_source(source_name)
if not hasattr(source_module, "callback"):
raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item
state = loader.load_state(source_name)
item = loader.load_item(source_name, itemid)
# Execute callback
source_module.callback(state, item)
# Save any changes
item.flush()
state.flush()
except Exception:
error.as_item(
f"Error executing callback for {source_name}/{itemid}",
traceback.format_exc(),
)

View File

@ -17,212 +17,221 @@ import requests
# Module imports
from inquisitor import CACHE_PATH
logger = logging.getLogger('inquisitor.templates')
logger = logging.getLogger("inquisitor.templates")
def cache_image(source, url, filename):
# Define some paths
path = os.path.join(CACHE_PATH, source)
file_path = os.path.join(path, filename)
cached_url = f'/cache/{source}/{filename}'
# Ensure cache folder
if not os.path.isdir(path):
os.mkdir(path)
# Fetch url
logger.info(f'Caching {url} to {file_path}')
response = requests.get(url)
# Write file to disk
with open(file_path, 'wb') as f:
f.write(response.content)
# Return the inquisitor path to the file
return cached_url
# Define some paths
path = os.path.join(CACHE_PATH, source)
file_path = os.path.join(path, filename)
cached_url = f"/cache/{source}/{filename}"
# Ensure cache folder
if not os.path.isdir(path):
os.mkdir(path)
# Fetch url
logger.info(f"Caching {url} to {file_path}")
response = requests.get(url)
# Write file to disk
with open(file_path, "wb") as f:
f.write(response.content)
# Return the inquisitor path to the file
return cached_url
class LinearCrawler:
"""
An engine for generating items from web sources that link content
together in a linear fashion, such as webcomics.
"""
def fetch_new(self, state):
items = []
max_iter = self.max_iterations() - 1
new = self.try_fetch(state)
items.extend(new)
for iter in range(max_iter):
sleep(1)
# If we've already gotten some items out of this fetch, we don't
# want to lose them and have the state still be set to the next
# page, so we wrap further calls in a try block and force return
# if we hit an error.
try:
new = self.try_fetch(state)
except:
new = []
items.extend(new)
# Cut out early if there was nothing returned
if not new:
break
return items
"""
An engine for generating items from web sources that link content
together in a linear fashion, such as webcomics.
"""
def try_fetch(self, state):
# Check for whether a new page should be crawled
if 'current_page' not in state:
next_page = self.get_start_url()
else:
current = state['current_page']
response = requests.get(current)
soup = BeautifulSoup(response.text, features='html.parser')
next_page = self.get_next_page_url(current, soup)
if not next_page:
return [] # nothing new
def fetch_new(self, state):
items = []
max_iter = self.max_iterations() - 1
new = self.try_fetch(state)
items.extend(new)
for iter in range(max_iter):
sleep(1)
# If we've already gotten some items out of this fetch, we don't
# want to lose them and have the state still be set to the next
# page, so we wrap further calls in a try block and force return
# if we hit an error.
try:
new = self.try_fetch(state)
except:
new = []
items.extend(new)
# Cut out early if there was nothing returned
if not new:
break
return items
# Download the new page
logger.info('Fetching ' + next_page)
response = requests.get(next_page)
soup = BeautifulSoup(response.text, features="html.parser")
def try_fetch(self, state):
# Check for whether a new page should be crawled
if "current_page" not in state:
next_page = self.get_start_url()
else:
current = state["current_page"]
response = requests.get(current)
soup = BeautifulSoup(response.text, features="html.parser")
next_page = self.get_next_page_url(current, soup)
if not next_page:
return [] # nothing new
# Create an item from the page
item = self.make_item(next_page, soup)
# Download the new page
logger.info("Fetching " + next_page)
response = requests.get(next_page)
soup = BeautifulSoup(response.text, features="html.parser")
# Update the state and return the item
state['current_page'] = next_page
return [item]
# Create an item from the page
item = self.make_item(next_page, soup)
def max_iterations(self):
return 3
# Update the state and return the item
state["current_page"] = next_page
return [item]
def get_start_url(self):
raise NotImplementedError('get_start_url is required')
def max_iterations(self):
return 3
def get_next_page_url(self, url, soup):
raise NotImplementedError('get_next_page_url is required')
def get_start_url(self):
raise NotImplementedError("get_start_url is required")
def make_item(self, url, soup):
raise NotImplementedError('make_item is required')
def get_next_page_url(self, url, soup):
raise NotImplementedError("get_next_page_url is required")
def make_item(self, url, soup):
raise NotImplementedError("make_item is required")
class RedditScraper:
"""
An engine for generating items from subreddits.
Requires defining source, subreddit_name
fetch new with RedditScraper.fetch_new(state, __name__, reddit)
"""
@staticmethod
def fetch_new(state, name, reddit):
items = []
for name, obj in inspect.getmembers(sys.modules[name]):
if (inspect.isclass(obj)
and issubclass(obj, RedditScraper)
and obj is not RedditScraper
):
sub_items = obj(reddit).get_items()
items.extend(sub_items)
return items
"""
An engine for generating items from subreddits.
Requires defining source, subreddit_name
fetch new with RedditScraper.fetch_new(state, __name__, reddit)
"""
def __init__(self, reddit):
self.reddit = reddit
@staticmethod
def fetch_new(state, name, reddit):
items = []
for name, obj in inspect.getmembers(sys.modules[name]):
if (
inspect.isclass(obj)
and issubclass(obj, RedditScraper)
and obj is not RedditScraper
):
sub_items = obj(reddit).get_items()
items.extend(sub_items)
return items
def get_items(self):
sub_name = self.subreddit_name
logger.info(f'Fetching posts from r/{sub_name}')
subreddit = self.reddit.subreddit(sub_name)
posts = self.subreddit_page(subreddit)
items = []
for post in posts:
if self.filter_post(post):
items.append(self.item_from_post(post))
return items
def __init__(self, reddit):
self.reddit = reddit
def item_from_post(self, post):
item = {
'source': self.source,
'id': post.id,
'title': self.get_title(post),
'link': self.get_link(post),
'time': post.created_utc,
'author': '/u/' + (post.author.name if post.author else "[deleted]"),
'body': self.get_body(post),
'tags': self.get_tags(post),
'ttl': self.get_ttl(post),
}
ttl = self.get_ttl(post)
if ttl is not None: item['ttl'] = ttl
ttd = self.get_ttd(post)
if ttd is not None: item['ttd'] = ttd
tts = self.get_tts(post)
if tts is not None: item['tts'] = tts
callback = self.get_callback(post)
if callback is not None: item['callback'] = callback
return item
def get_items(self):
sub_name = self.subreddit_name
logger.info(f"Fetching posts from r/{sub_name}")
subreddit = self.reddit.subreddit(sub_name)
posts = self.subreddit_page(subreddit)
items = []
for post in posts:
if self.filter_post(post):
items.append(self.item_from_post(post))
return items
def subreddit_page(self, subreddit):
return subreddit.hot(limit=25)
def item_from_post(self, post):
item = {
"source": self.source,
"id": post.id,
"title": self.get_title(post),
"link": self.get_link(post),
"time": post.created_utc,
"author": "/u/" + (post.author.name if post.author else "[deleted]"),
"body": self.get_body(post),
"tags": self.get_tags(post),
"ttl": self.get_ttl(post),
}
ttl = self.get_ttl(post)
if ttl is not None:
item["ttl"] = ttl
ttd = self.get_ttd(post)
if ttd is not None:
item["ttd"] = ttd
tts = self.get_tts(post)
if tts is not None:
item["tts"] = tts
callback = self.get_callback(post)
if callback is not None:
item["callback"] = callback
return item
def filter_post(self, post):
return True
def subreddit_page(self, subreddit):
return subreddit.hot(limit=25)
def get_title(self, post):
s = '[S] ' if post.spoiler else ''
nsfw = '[NSFW] ' if post.over_18 else ''
return f'{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}'
def filter_post(self, post):
return True
def get_link(self, post):
return f'https://reddit.com{post.permalink}'
def get_title(self, post):
s = "[S] " if post.spoiler else ""
nsfw = "[NSFW] " if post.over_18 else ""
return f"{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}"
def get_body(self, post):
parts = []
if not post.is_self:
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
if hasattr(post, 'preview'):
try:
previews = post.preview['images'][0]['resolutions']
small_previews = [p for p in previews if p['width'] < 800]
preview = sorted(small_previews, key=lambda p:-p['width'])[0]
parts.append(f'<img src="{preview["url"]}">')
except:
pass
if getattr(post, 'is_gallery', False):
try:
for gallery_item in post.gallery_data['items']:
media_id = gallery_item['media_id']
metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata['p'] if p['x'] < 800]
preview = sorted(small_previews, key=lambda p:-p['x'])[0]
parts.append(f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>')
parts.append(f'<img src="{preview["u"]}">')
except:
pass
if post.selftext:
limit = post.selftext[1024:].find(' ')
preview_body = post.selftext[:1024 + limit]
if len(preview_body) < len(post.selftext):
preview_body += '[...]'
parts.append(f'<p>{preview_body}</p>')
return '<br><hr>'.join(parts)
def get_link(self, post):
return f"https://reddit.com{post.permalink}"
def get_tags(self, post):
tags = ['reddit', post.subreddit_name_prefixed[2:]]
if post.over_18:
tags.append('nsfw')
return tags
def get_body(self, post):
parts = []
if not post.is_self:
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
if hasattr(post, "preview"):
try:
previews = post.preview["images"][0]["resolutions"]
small_previews = [p for p in previews if p["width"] < 800]
preview = sorted(small_previews, key=lambda p: -p["width"])[0]
parts.append(f'<img src="{preview["url"]}">')
except:
pass
if getattr(post, "is_gallery", False):
try:
for gallery_item in post.gallery_data["items"]:
media_id = gallery_item["media_id"]
metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata["p"] if p["x"] < 800]
preview = sorted(small_previews, key=lambda p: -p["x"])[0]
parts.append(
f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>'
)
parts.append(f'<img src="{preview["u"]}">')
except:
pass
if post.selftext:
limit = post.selftext[1024:].find(" ")
preview_body = post.selftext[: 1024 + limit]
if len(preview_body) < len(post.selftext):
preview_body += "[...]"
parts.append(f"<p>{preview_body}</p>")
return "<br><hr>".join(parts)
def get_ttl(self, post):
return 60 * 60 * 24 * 7 # 1 week
def get_tags(self, post):
tags = ["reddit", post.subreddit_name_prefixed[2:]]
if post.over_18:
tags.append("nsfw")
return tags
def get_ttd(self, post):
return None
def get_ttl(self, post):
return 60 * 60 * 24 * 7 # 1 week
def get_tts(self, post):
return None
def get_ttd(self, post):
return None
def get_callback(self, post):
return None
def get_tts(self, post):
return None
def callback(self, state, item):
raise NotImplementedError('callback')
def get_callback(self, post):
return None
def on_create(self, state, item):
raise NotImplementedError('on_create')
def callback(self, state, item):
raise NotImplementedError("callback")
def on_delete(self, state, item):
raise NotImplementedError('on_delete')
def on_create(self, state, item):
raise NotImplementedError("on_create")
def on_delete(self, state, item):
raise NotImplementedError("on_delete")

View File

@ -1,9 +1,11 @@
import time
import datetime
def now():
return int(time.time())
return int(time.time())
def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"):
dt = datetime.datetime.fromtimestamp(ts)
return dt.strftime(formatstr)
dt = datetime.datetime.fromtimestamp(ts)
return dt.strftime(formatstr)

154
module.nix Normal file
View File

@ -0,0 +1,154 @@
flake: { config, lib, pkgs, ... }:
let
inherit (lib) mkIf mkOption types;
cfg = config.services.inquisitor;
in
{
options = {
services.inquisitor = {
enable = mkOption {
type = types.bool;
default = true;
description = "Enable the Inquisitor aggregator.";
};
listen.addr = mkOption {
type = types.str;
default = "0.0.0.0";
description = "Listen address passed to nginx.";
};
listen.port = mkOption {
type = types.port;
default = 80;
description = "Listen port passed to nginx.";
};
};
};
config =
let
# Get the inquisitor package from the flake.
inquisitor = flake.packages.${pkgs.stdenv.hostPlatform.system}.default;
# Define the inquisitor state directory.
stateDir = "/var/lib/inquisitor";
# Define an scp helper for item callbacks to use.
scp-helper = pkgs.writeShellScriptBin "scp-helper" ''
${pkgs.openssh}/bin/scp -i ${stateDir}/.ssh/inquisitor.key -oStrictHostKeyChecking=no "$@"
'';
# Define the inquisitor service user.
svcUser = {
name = "inquisitor";
group = "inquisitor";
description = "Inquisitor service user";
isSystemUser = true;
shell = pkgs.bashInteractive;
packages = [ inquisitor pkgs.cron ];
};
# Create a config file pointing to the state directory.
inqConfig = pkgs.writeTextFile {
name = "inquisitor.conf";
text = ''
DataPath = ${stateDir}/data/
SourcePath = ${stateDir}/sources/
CachePath = ${stateDir}/cache/
Verbose = false
LogFile = ${stateDir}/inquisitor.log
'';
};
# Create a setup script to ensure the service directory state.
inqSetup = pkgs.writeShellScript "inquisitor-setup.sh" ''
# Ensure the required directories exist.
${pkgs.coreutils}/bin/mkdir -p ${stateDir}/data/inquisitor/
${pkgs.coreutils}/bin/mkdir -p ${stateDir}/sources/
${pkgs.coreutils}/bin/mkdir -p ${stateDir}/cache/
if [ ! -f ${stateDir}/data/inquisitor/state ]; then
${pkgs.coreutils}/bin/echo "{}" > ${stateDir}/data/inquisitor/state
fi
# Ensure the service owns the folders.
${pkgs.coreutils}/bin/chown -R ${svcUser.name} ${stateDir}
# Ensure the scp helper is present
if [ -f ${stateDir}/scp-helper ]; then
${pkgs.coreutils}/bin/rm ${stateDir}/scp-helper
fi
ln -s -t ${stateDir}/scp-helper ${scp-helper}/bin/scp-helper
'';
# Create a run script for the service.
inqRun = pkgs.writeShellScript "inquisitor-run.sh" ''
cd ${stateDir}
${inquisitor}/bin/gunicorn \
--bind=localhost:24133 \
--workers=4 \
--timeout 120 \
--log-level debug \
"inquisitor.app:wsgi()"
'';
# Create a wrapper to execute the cli as the service user.
# (needed to avoid creating files in the state dir the service can't read)
inqWrapper = pkgs.writeShellScriptBin "inq" ''
sudo --user=${svcUser.name} ${inquisitor}/bin/inquisitor "$@"
'';
in mkIf cfg.enable
{
users.users.inquisitor = svcUser;
users.groups.inquisitor = {};
# Link the config in /etc to avoid envvar shenanigans
environment.etc."inquisitor.conf".source = inqConfig;
# Give all users the wrapper program.
environment.systemPackages = [ inqWrapper ];
# Allow the sudo in the cli wrapper without password.
security.sudo.extraRules = [{
commands = [{
command = "${inquisitor}/bin/inquisitor";
options = [ "NOPASSWD" ];
}];
runAs = svcUser.name;
groups = [ "users" ];
}];
# Run the setup script on activation.
system.activationScripts.inquisitorSetup = "${inqSetup}";
# Set up the inquisitor service.
systemd.services.inquisitor = {
description = "Inquisitor server";
script = "${inqRun}";
serviceConfig = {
User = svcUser.name;
Type = "simple";
};
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
enable = true;
};
# Set up the nginx reverse proxy to the server.
services.nginx.enable = true;
services.nginx.virtualHosts.inquisitorHost = {
listen = [ cfg.listen ];
locations."/".extraConfig = ''
access_log /var/log/nginx/access.inquisitor.log;
proxy_buffering off;
proxy_pass http://localhost:24133/;
'';
};
networking.firewall.allowedTCPPorts = [ cfg.listen.port ];
# Enable cron so the service can use it to schedule fetches.
services.cron.enable = true;
};
}

180
poetry.lock generated
View File

@ -13,6 +13,27 @@ soupsieve = ">1.2"
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "black"
version = "22.12.0"
description = "The uncompromising code formatter."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
click = ">=8.0.0"
mypy-extensions = ">=0.4.3"
pathspec = ">=0.9.0"
platformdirs = ">=2"
tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""}
[package.extras]
colorama = ["colorama (>=0.4.3)"]
d = ["aiohttp (>=3.7.4)"]
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
uvloop = ["uvloop (>=0.15.2)"]
[[package]]
name = "certifi"
version = "2022.12.7"
@ -51,17 +72,6 @@ category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
[[package]]
name = "feedparser"
version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = true
python-versions = ">=3.6"
[package.dependencies]
sgmllib3k = "*"
[[package]]
name = "flask"
version = "2.2.2"
@ -85,7 +95,7 @@ name = "gunicorn"
version = "20.1.0"
description = "WSGI HTTP Server for UNIX"
category = "main"
optional = true
optional = false
python-versions = ">=3.5"
[package.dependencies]
@ -136,41 +146,32 @@ optional = false
python-versions = ">=3.7"
[[package]]
name = "praw"
version = "7.6.1"
description = "PRAW, an acronym for \"Python Reddit API Wrapper\", is a python package that allows for simple access to Reddit's API."
category = "main"
optional = true
python-versions = "~=3.7"
[package.dependencies]
prawcore = ">=2.1,<3"
update-checker = ">=0.18"
websocket-client = ">=0.54.0"
[package.extras]
ci = ["coveralls"]
dev = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.3.0,<0.5)", "packaging", "pre-commit", "pytest (>=2.7.3)", "requests (>=2.20.1,<3)", "sphinx", "sphinx-rtd-dark-mode", "sphinx-rtd-theme"]
lint = ["pre-commit", "sphinx", "sphinx-rtd-dark-mode", "sphinx-rtd-theme"]
readthedocs = ["sphinx", "sphinx-rtd-dark-mode", "sphinx-rtd-theme"]
test = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.3.0,<0.5)", "pytest (>=2.7.3)", "requests (>=2.20.1,<3)"]
name = "mypy-extensions"
version = "0.4.3"
description = "Experimental type system extensions for programs checked with the mypy typechecker."
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "prawcore"
version = "2.3.0"
description = "Low-level communication layer for PRAW 4+."
category = "main"
optional = true
python-versions = "~=3.6"
name = "pathspec"
version = "0.10.3"
description = "Utility library for gitignore style pattern matching of file paths."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.dependencies]
requests = ">=2.6.0,<3.0"
[[package]]
name = "platformdirs"
version = "2.6.2"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
category = "dev"
optional = false
python-versions = ">=3.7"
[package.extras]
ci = ["coveralls"]
dev = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.4.0,<0.5)", "betamax-serializers (>=0.2.0,<0.3)", "black", "flake8", "flynt", "mock (>=0.8)", "pre-commit", "pydocstyle", "pytest", "testfixtures (>4.13.2,<7)"]
lint = ["black", "flake8", "flynt", "pre-commit", "pydocstyle"]
test = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.4.0,<0.5)", "betamax-serializers (>=0.2.0,<0.3)", "mock (>=0.8)", "pytest", "testfixtures (>4.13.2,<7)"]
docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.5)"]
test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
[[package]]
name = "requests"
@ -195,7 +196,7 @@ name = "setuptools"
version = "65.6.3"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
category = "main"
optional = true
optional = false
python-versions = ">=3.7"
[package.extras]
@ -203,14 +204,6 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-g
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
[[package]]
name = "sgmllib3k"
version = "1.0.0"
description = "Py3k port of sgmllib."
category = "main"
optional = true
python-versions = "*"
[[package]]
name = "soupsieve"
version = "2.3.2.post1"
@ -220,20 +213,12 @@ optional = false
python-versions = ">=3.6"
[[package]]
name = "update-checker"
version = "0.18.0"
description = "A python module that will check for package updates."
category = "main"
optional = true
python-versions = "*"
[package.dependencies]
requests = ">=2.3.0"
[package.extras]
dev = ["black", "flake8", "pytest (>=2.7.3)"]
lint = ["black", "flake8"]
test = ["pytest (>=2.7.3)"]
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
category = "dev"
optional = false
python-versions = ">=3.7"
[[package]]
name = "urllib3"
@ -248,19 +233,6 @@ brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "websocket-client"
version = "1.4.2"
description = "WebSocket client for Python with low level API options"
category = "main"
optional = true
python-versions = ">=3.7"
[package.extras]
docs = ["Sphinx (>=3.4)", "sphinx-rtd-theme (>=0.5)"]
optional = ["python-socks", "wsaccel"]
test = ["websockets"]
[[package]]
name = "werkzeug"
version = "2.2.2"
@ -275,19 +247,30 @@ MarkupSafe = ">=2.1.1"
[package.extras]
watchdog = ["watchdog"]
[extras]
dev = ["praw", "gunicorn", "feedparser"]
[metadata]
lock-version = "1.1"
python-versions = "^3.10"
content-hash = "2d6e1c8843f9821ef246fdeeeef572bcdaa2db452bbaa74531e8af91f8d89bb5"
content-hash = "fbb022ff050d55a4b5814e1ce7abcf002c5e32cc552d4171cd780dc99d0aa640"
[metadata.files]
beautifulsoup4 = [
{file = "beautifulsoup4-4.11.1-py3-none-any.whl", hash = "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30"},
{file = "beautifulsoup4-4.11.1.tar.gz", hash = "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"},
]
black = [
{file = "black-22.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eedd20838bd5d75b80c9f5487dbcb06836a43833a37846cf1d8c1cc01cef59d"},
{file = "black-22.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:159a46a4947f73387b4d83e87ea006dbb2337eab6c879620a3ba52699b1f4351"},
{file = "black-22.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30b212bffeb1e252b31dd269dfae69dd17e06d92b87ad26e23890f3efea366f"},
{file = "black-22.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:7412e75863aa5c5411886804678b7d083c7c28421210180d67dfd8cf1221e1f4"},
{file = "black-22.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c116eed0efb9ff870ded8b62fe9f28dd61ef6e9ddd28d83d7d264a38417dcee2"},
{file = "black-22.12.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1f58cbe16dfe8c12b7434e50ff889fa479072096d79f0a7f25e4ab8e94cd8350"},
{file = "black-22.12.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77d86c9f3db9b1bf6761244bc0b3572a546f5fe37917a044e02f3166d5aafa7d"},
{file = "black-22.12.0-cp38-cp38-win_amd64.whl", hash = "sha256:82d9fe8fee3401e02e79767016b4907820a7dc28d70d137eb397b92ef3cc5bfc"},
{file = "black-22.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101c69b23df9b44247bd88e1d7e90154336ac4992502d4197bdac35dd7ee3320"},
{file = "black-22.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:559c7a1ba9a006226f09e4916060982fd27334ae1998e7a38b3f33a37f7a2148"},
{file = "black-22.12.0-py3-none-any.whl", hash = "sha256:436cc9167dd28040ad90d3b404aec22cedf24a6e4d7de221bec2730ec0c97bcf"},
{file = "black-22.12.0.tar.gz", hash = "sha256:229351e5a18ca30f447bf724d007f890f97e13af070bb6ad4c0a441cd7596a2f"},
]
certifi = [
{file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
{file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
@ -304,10 +287,6 @@ colorama = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
feedparser = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
flask = [
{file = "Flask-2.2.2-py3-none-any.whl", hash = "sha256:b9c46cc36662a7949f34b52d8ec7bb59c0d74ba08ba6cb9ce9adc1d8676d9526"},
{file = "Flask-2.2.2.tar.gz", hash = "sha256:642c450d19c4ad482f96729bd2a8f6d32554aa1e231f4f6b4e7e5264b16cca2b"},
@ -370,13 +349,17 @@ markupsafe = [
{file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"},
{file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"},
]
praw = [
{file = "praw-7.6.1-py3-none-any.whl", hash = "sha256:33ac091cd061d9bd607ae231d4ea40025060a4ecb5e11baa5ce9a25c6d2f5a6b"},
{file = "praw-7.6.1.tar.gz", hash = "sha256:07fc95ffc52187351ab77b81fa6910c66e8fa084faf4b060b90864ad186dfb9e"},
mypy-extensions = [
{file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"},
{file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"},
]
prawcore = [
{file = "prawcore-2.3.0-py3-none-any.whl", hash = "sha256:48c17db447fa06a13ca3e722201f443031437708daa736c05a1df895fbcceff5"},
{file = "prawcore-2.3.0.tar.gz", hash = "sha256:daf1ccd4b7a80dc4e6567d48336d782e94a9a6dad83770fc2edf76dc9a84f56d"},
pathspec = [
{file = "pathspec-0.10.3-py3-none-any.whl", hash = "sha256:3c95343af8b756205e2aba76e843ba9520a24dd84f68c22b9f93251507509dd6"},
{file = "pathspec-0.10.3.tar.gz", hash = "sha256:56200de4077d9d0791465aa9095a01d421861e405b5096955051deefd697d6f6"},
]
platformdirs = [
{file = "platformdirs-2.6.2-py3-none-any.whl", hash = "sha256:83c8f6d04389165de7c9b6f0c682439697887bca0aa2f1c87ef1826be3584490"},
{file = "platformdirs-2.6.2.tar.gz", hash = "sha256:e1fea1fe471b9ff8332e229df3cb7de4f53eeea4998d3b6bfff542115e998bd2"},
]
requests = [
{file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"},
@ -386,25 +369,18 @@ setuptools = [
{file = "setuptools-65.6.3-py3-none-any.whl", hash = "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54"},
{file = "setuptools-65.6.3.tar.gz", hash = "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75"},
]
sgmllib3k = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]
soupsieve = [
{file = "soupsieve-2.3.2.post1-py3-none-any.whl", hash = "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759"},
{file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"},
]
update-checker = [
{file = "update_checker-0.18.0-py3-none-any.whl", hash = "sha256:cbba64760a36fe2640d80d85306e8fe82b6816659190993b7bdabadee4d4bbfd"},
{file = "update_checker-0.18.0.tar.gz", hash = "sha256:6a2d45bb4ac585884a6b03f9eade9161cedd9e8111545141e9aa9058932acb13"},
tomli = [
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
{file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
]
urllib3 = [
{file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"},
{file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"},
]
websocket-client = [
{file = "websocket-client-1.4.2.tar.gz", hash = "sha256:d6e8f90ca8e2dd4e8027c4561adeb9456b54044312dba655e7cae652ceb9ae59"},
{file = "websocket_client-1.4.2-py3-none-any.whl", hash = "sha256:d6b06432f184438d99ac1f456eaf22fe1ade524c3dd16e661142dc54e9cba574"},
]
werkzeug = [
{file = "Werkzeug-2.2.2-py3-none-any.whl", hash = "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5"},
{file = "Werkzeug-2.2.2.tar.gz", hash = "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f"},

View File

@ -11,16 +11,14 @@ python = "^3.10"
flask = "^2.2.2"
requests = "^2.28.1"
beautifulsoup4 = "^4.11.1"
praw = {version = "^7.6.1", optional = true}
gunicorn = {version = "^20.1.0", optional = true}
feedparser = {version = "^6.0.10", optional = true}
[tool.poetry.extras]
dev = ["praw", "gunicorn", "feedparser"]
gunicorn = "^20.1.0"
[tool.poetry.scripts]
inquisitor = "inquisitor.cli:main"
[tool.poetry.group.dev.dependencies]
black = "^22.12.0"
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"