Compare commits

...

52 Commits

Author SHA1 Message Date
Jaculabilis dd3a8016d3 Revert default.nix change 2022-12-10 23:52:40 +00:00
Jaculabilis 4b2d488dbb Move package def to flake and update to 22.11 2022-12-10 20:48:00 +00:00
Jaculabilis 37e1d6e182 Update optional dependencies 2022-12-10 20:46:55 +00:00
Tim Van Baak 74ab0d9542 Add the other default package output format for compat 2022-12-02 07:20:15 +00:00
Tim Van Baak c66a76add4 Update dependencies 2022-12-02 07:08:37 +00:00
Tim Van Baak 006a3b8560 Add flake 2022-12-02 06:01:36 +00:00
Tim Van Baak db05f08cf2 Update default.nix nixpkgs rev 2022-12-02 05:54:44 +00:00
Tim Van Baak a6d961aba9 Fix summary display 2021-03-17 20:59:48 -07:00
Tim Van Baak 8e2369eff9 Prevent dupliate callbacks 2021-03-17 20:59:31 -07:00
Tim Van Baak 5d0c5b1ae3 Add previews for reddit galleries 2021-03-08 13:13:41 -08:00
Tim Van Baak 77ad88124f Show link text in reddit link post body 2021-03-08 13:13:25 -08:00
Tim Van Baak 1416c2e518 Fix test error when no subfeeds are present 2021-03-08 13:12:59 -08:00
Tim Van Baak 5b675acd28 Fix default reddit source ttl type 2020-12-31 20:07:51 -08:00
Tim Van Baak 0172d5b70e Add feedparser as a dependency 2020-12-31 19:05:41 -08:00
Tim Van Baak a678b67019 Support subfeeds.conf and default subfeed 2020-12-30 14:01:31 -08:00
Tim Van Baak ec2382e1bd Change file handler to rotate logs 2020-12-30 13:06:12 -08:00
Tim Van Baak 193b445bd5 Initialize default logging in wsgi mode 2020-12-30 12:53:55 -08:00
Tim Van Baak 97bb18fc3e Add gunicorn to dev dependencies 2020-12-30 12:53:41 -08:00
Tim Van Baak 4320cb63e5 Add port argument to run cli 2020-12-30 12:22:49 -08:00
Tim Van Baak 1d44d0ea4a Add __main__ so flask debug works 2020-12-30 12:20:52 -08:00
Tim Van Baak 13a0e8c97b Add favicon and title 2020-12-30 10:33:27 -08:00
Tim Van Baak af7f84cae7 Rename feed.html 2020-12-30 10:13:27 -08:00
Tim Van Baak 933596d25c Fix reddit sources needing to be named "reddit" 2020-12-29 20:54:48 -08:00
Tim Van Baak d10b4f9205 Enable updating the inquisitor source 2020-12-29 20:50:25 -08:00
Tim Van Baak 4ded4ded42 Fix routing 2020-12-29 20:15:54 -08:00
Tim Van Baak 428cf8df35 Re-fix source importing 2020-12-29 20:15:54 -08:00
Tim Van Baak a1a15153c0 Make extra dependencies installable 2020-12-29 20:15:54 -08:00
Tim Van Baak 4315cfa7be Add praw and gunicorn as optional dependencies instead of dev 2020-12-29 16:24:04 -08:00
Tim Van Baak 9001bd8f92 Make package buildable by nix 2020-12-29 01:22:19 -08:00
Tim Van Baak 8aae7beecd Add poetry config and lock files 2020-12-29 00:11:12 -08:00
Tim Van Baak 9ca0670fbd Add shell.nix for using poetry 2020-12-28 23:13:17 -08:00
Tim Van Baak 79574e3241 Add wsgi interface 2020-12-28 21:30:31 -08:00
Tim Van Baak 27e04e601a Add subfeed routes 2020-12-28 17:47:36 -08:00
Tim Van Baak 9843fd93c4 Add subfeed configs 2020-12-28 17:07:34 -08:00
Tim Van Baak e0f4eec15a Tighten up Verbose options 2020-12-28 15:58:07 -08:00
Tim Van Baak b3293fe87b Fix source importing 2020-12-28 15:52:52 -08:00
Tim Van Baak 997df5f54f Move console script entry to cli module 2020-12-28 15:12:29 -08:00
Tim Van Baak 6c25692d3d Trim gitignore 2020-12-28 14:47:37 -08:00
Tim Van Baak 8ddc3751f1 Add console script 2020-12-28 14:47:25 -08:00
Tim Van Baak 5fa0c9d553 Add test command for checking the config file 2020-12-28 14:47:15 -08:00
Tim Van Baak 16188a3f3a Refactor configuration to use a file exclusively 2020-12-28 14:46:26 -08:00
Tim Van Baak 9e31bd74b1 Add reddit scraper to source templates 2020-08-14 11:12:49 -07:00
Tim Van Baak 7a85aa3dac Add source engine template for webcomics 2020-08-11 23:53:21 -07:00
Tim Van Baak 78c3f44735 Add cache for non-item content 2020-08-11 23:52:06 -07:00
Tim Van Baak 814de5f094 Add exceptions to item triggers 2020-08-11 23:52:06 -07:00
Tim Van Baak 364480c08d Add create and delete triggers 2020-08-06 23:48:10 -07:00
Tim Van Baak c116476487 Clean up source update code 2020-08-06 23:12:59 -07:00
Tim Van Baak 024d81336d Clean up item loading code 2020-08-06 15:38:24 -07:00
Tim Van Baak a9e313225f Add import demo source 2020-08-06 14:43:55 -07:00
Tim Van Baak d72c0326fb Clean up source loading code 2020-08-06 14:43:46 -07:00
Tim Van Baak 6009a23283 Rename importer to sources 2020-08-06 13:49:36 -07:00
Tim Van Baak c3a83a80cc Add requirements.txt 2020-08-06 13:33:11 -07:00
24 changed files with 1578 additions and 374 deletions

8
.gitignore vendored
View File

@ -121,11 +121,3 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
# Praw files
praw.ini
# Inquisitor scratch directory
dungeon/

8
default.nix Normal file
View File

@ -0,0 +1,8 @@
{ pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/4d2b37a84fad1091b9de401eb450aae66f1a741e.tar.gz") {}
}:
let
app = pkgs.poetry2nix.mkPoetryApplication {
projectDir = ./.;
};
in app.dependencyEnv

44
flake.lock Normal file
View File

@ -0,0 +1,44 @@
{
"nodes": {
"flake-compat": {
"flake": false,
"locked": {
"lastModified": 1668681692,
"narHash": "sha256-Ht91NGdewz8IQLtWZ9LCeNXMSXHUss+9COoqu6JLmXU=",
"owner": "edolstra",
"repo": "flake-compat",
"rev": "009399224d5e398d03b22badca40a37ac85412a1",
"type": "github"
},
"original": {
"owner": "edolstra",
"repo": "flake-compat",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1669833724,
"narHash": "sha256-/HEZNyGbnQecrgJnfE8d0WC5c1xuPSD2LUpB6YXlg4c=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "4d2b37a84fad1091b9de401eb450aae66f1a741e",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "refs/tags/22.11",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-compat": "flake-compat",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

27
flake.nix Normal file
View File

@ -0,0 +1,27 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs?ref=refs/tags/22.11";
flake-compat = {
url = "github:edolstra/flake-compat";
flake = false;
};
};
outputs = { self, nixpkgs, flake-compat }:
let
system = "x86_64-linux";
pkgs = nixpkgs.legacyPackages.${system};
in
{
packages.${system}.default =
(pkgs.poetry2nix.mkPoetryApplication {
projectDir = ./.;
}).dependencyEnv;
defaultPackage.${system} = self.packages.${system}.default;
devShell.${system} = pkgs.mkShell {
buildInputs = [ (pkgs.python3.withPackages (p: [p.poetry])) ];
};
};
}

View File

@ -0,0 +1,2 @@
from inquisitor.configs import CACHE_PATH
from inquisitor.templates import cache_image, LinearCrawler, RedditScraper

View File

@ -1,67 +1,2 @@
# Standard library imports from cli import main
import argparse main()
# Application imports
from inquisitor import cli
from inquisitor import configs
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)
def parse_args(valid_commands):
command_descs = "\n".join([
"- {0}: {1}".format(name, func.__doc__)
for name, func in valid_commands.items()])
parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(command_descs),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False)
parser.add_argument("command",
nargs="?",
default="help",
help="The command to execute",
choices=valid_commands,
metavar="command")
parser.add_argument("args",
nargs=argparse.REMAINDER,
help="Command arguments",
metavar="args")
parser.add_argument("-v",
action="store_true",
dest="verbose",
help="Enable debug logging")
global print_usage
print_usage = parser.print_help
return parser.parse_args()
def command_help(args):
"""Print this help message and exit."""
print_usage()
return 0
def main():
# Enumerate valid commands.
commands = {
name[8:] : func
for name, func in vars(cli).items()
if name.startswith("command_")}
commands['help'] = command_help
args = parse_args(commands)
# Configure logging.
if args.verbose:
configs.log_verbose()
# Execute command.
if args.command:
return commands[args.command](args.args)
if __name__ == "__main__":
import sys
sys.exit(main())

View File

@ -4,11 +4,18 @@ import os
import traceback import traceback
# Third party imports # Third party imports
from flask import Flask, render_template, request, jsonify from flask import Flask, render_template, request, jsonify, abort, redirect, url_for
# Application imports # Application imports
from inquisitor.configs import logger, DUNGEON_PATH from inquisitor.configs import (
from inquisitor import importer, loader, timestamp DUNGEON_PATH,
SOURCES_PATH,
CACHE_PATH,
subfeeds,
get_subfeed_overrides,
logger,
init_default_logging)
from inquisitor import sources, loader, timestamp
# Globals # Globals
app = Flask(__name__) app = Flask(__name__)
@ -27,6 +34,32 @@ def datetimeformat(value):
@app.route("/") @app.route("/")
def root(): def root():
return redirect(url_for('feed'))
@app.route("/feed/")
def feed():
return feed_for_sources(source_names=None)
@app.route("/feed/<string:feed_name>/")
def subfeed(feed_name):
# Check for and apply subfeed overrides
subfeed_overrides = get_subfeed_overrides()
subfeed_config = subfeed_overrides or subfeeds or {}
# The built-in inquisitor subfeed contains sources not in another subfeed
if feed_name == 'inquisitor':
all_sources = os.listdir(DUNGEON_PATH)
for subfeed, sources in subfeed_config.items():
for source_name in sources:
if source_name in all_sources:
all_sources.remove(source_name)
return feed_for_sources(all_sources)
if feed_name not in subfeed_config:
return abort(404)
return feed_for_sources(subfeed_config[feed_name])
def feed_for_sources(source_names):
# Determine exclusion filters # Determine exclusion filters
filters = [] filters = []
wl_param = request.args.get('only') wl_param = request.args.get('only')
@ -40,7 +73,7 @@ def root():
# Get all active+filtered items and all active tags # Get all active+filtered items and all active tags
total = 0 total = 0
items, errors = loader.load_active_items() items, errors = loader.load_active_items(source_names)
active_items = [] active_items = []
active_tags = {} active_tags = {}
for item in items: for item in items:
@ -94,7 +127,7 @@ def root():
selection = active_items[:100] selection = active_items[:100]
return render_template("feed.html", return render_template("feed.jinja2",
items=selection, items=selection,
mdeac=[ mdeac=[
{'source': item['source'], 'itemid': item['id']} {'source': item['source'], 'itemid': item['id']}
@ -106,8 +139,7 @@ def deactivate():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if 'source' not in params and 'itemid' not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
item = loader.WritethroughDict(os.path.join( item = loader.load_item(params['source'], params['itemid'])
DUNGEON_PATH, params['source'], params['itemid'] + '.item'))
if item['active']: if item['active']:
logger.debug(f"Deactivating {params['source']}/{params['itemid']}") logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
item['active'] = False item['active'] = False
@ -118,8 +150,7 @@ def punt():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if 'source' not in params and 'itemid' not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
item = loader.WritethroughDict(os.path.join( item = loader.load_item(params['source'], params['itemid'])
DUNGEON_PATH, params['source'], params['itemid'] + '.item'))
tomorrow = datetime.now() + timedelta(days=1) tomorrow = datetime.now() + timedelta(days=1)
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0) morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
til_then = morning.timestamp() - item['created'] til_then = morning.timestamp() - item['created']
@ -134,8 +165,7 @@ def mass_deactivate():
for info in params.get('items', []): for info in params.get('items', []):
source = info['source'] source = info['source']
itemid = info['itemid'] itemid = info['itemid']
item = loader.WritethroughDict(os.path.join( item = loader.load_item(source, itemid)
DUNGEON_PATH, source, itemid + ".item"))
if item['active']: if item['active']:
logger.debug(f"Deactivating {info['source']}/{info['itemid']}") logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
item['active'] = False item['active'] = False
@ -146,5 +176,19 @@ def callback():
params = request.get_json() params = request.get_json()
if 'source' not in params and 'itemid' not in params: if 'source' not in params and 'itemid' not in params:
logger.error("Bad request params: {}".format(params)) logger.error("Bad request params: {}".format(params))
importer.item_callback(params['source'], params['itemid']) logger.info('Executing callback for {}/{}'.format(params['source'], params['itemid']))
sources.item_callback(params['source'], params['itemid'])
return jsonify({}) return jsonify({})
@app.route('/cache/<path:cache_path>')
def cache(cache_path):
path = os.path.join(CACHE_PATH, cache_path)
if not os.path.isfile(path):
return abort(404)
with open(path, 'rb') as f:
return f.read()
def wsgi():
init_default_logging()
return app

View File

@ -4,9 +4,39 @@ import json
import logging import logging
import os import os
import random import random
import sys
# Application imports # Application imports
from inquisitor.configs import logger, DUNGEON_PATH, SOURCES_PATH from inquisitor.configs import logger, DUNGEON_PATH, SOURCES_PATH, add_logging_handler
def command_test(args):
"""Echo config file values."""
from inquisitor.configs.resolver import (
config_path,
CONFIG_DATA, data_path,
CONFIG_SOURCES, source_path,
CONFIG_CACHE, cache_path,
CONFIG_LOGFILE, log_file,
CONFIG_VERBOSE, is_verbose,
CONFIG_SUBFEEDS, subfeeds,
)
subfeeds = '; '.join(
'{0}: {1}'.format(
sf_name,
' '.join(sf_sources)
)
for sf_name, sf_sources
in subfeeds.items()
) if subfeeds else ''
print(f'Inquisitor configured from {config_path}')
print(f' {CONFIG_DATA} = {data_path}')
print(f' {CONFIG_SOURCES} = {source_path}')
print(f' {CONFIG_CACHE} = {cache_path}')
print(f' {CONFIG_LOGFILE} = {log_file}')
print(f' {CONFIG_VERBOSE} = {is_verbose}')
print(f' {CONFIG_SUBFEEDS} = {subfeeds}')
return 0
def command_update(args): def command_update(args):
@ -30,7 +60,7 @@ def command_update(args):
logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources") logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources")
# Update sources # Update sources
from inquisitor.importer import update_sources from inquisitor.sources import update_sources
update_sources(*args.source) update_sources(*args.source)
return 0 return 0
@ -107,23 +137,17 @@ def command_add(args):
source = args.source or 'inquisitor' source = args.source or 'inquisitor'
cell_path = os.path.join(DUNGEON_PATH, source) cell_path = os.path.join(DUNGEON_PATH, source)
if not os.path.isdir(cell_path): if args.create:
if args.create: from inquisitor.sources import ensure_cell
os.mkdir(cell_path) ensure_cell(source)
state_path = os.path.join(cell_path, "state") elif not os.path.isdir(cell_path):
with open(state_path, 'w', encoding='utf8') as f: logger.error("Source '{}' does not exist".format(source))
f.write(json.dumps({})) return -1
else:
logger.error("Source '{}' does not exist".format(source))
return -1
from inquisitor.importer import populate_new
item = { item = {
'id': '{:x}'.format(random.getrandbits(16 * 4)), 'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)),
'source': 'inquisitor' 'source': source,
} }
if args.id: item['id'] = str(args.id)
if args.source: item['source'] = str(args.source)
if args.title: item['title'] = str(args.title) if args.title: item['title'] = str(args.title)
if args.link: item['link'] = str(args.link) if args.link: item['link'] = str(args.link)
if args.time: item['time'] = int(args.time) if args.time: item['time'] = int(args.time)
@ -133,13 +157,10 @@ def command_add(args):
if args.ttl: item['ttl'] = int(args.ttl) if args.ttl: item['ttl'] = int(args.ttl)
if args.ttd: item['ttd'] = int(args.ttd) if args.ttd: item['ttd'] = int(args.ttd)
if args.tts: item['tts'] = int(args.tts) if args.tts: item['tts'] = int(args.tts)
populate_new(item['source'], item)
s = json.dumps(item, indent=2) from inquisitor.loader import new_item
path = os.path.join(DUNGEON_PATH, item['source'], item['id'] + '.item') saved_item = new_item(source, item)
with open(path, 'w', encoding='utf8') as f: logger.info(saved_item)
f.write(s)
logger.info(item)
def command_feed(args): def command_feed(args):
@ -152,15 +173,14 @@ def command_feed(args):
from inquisitor import loader from inquisitor import loader
from inquisitor import timestamp from inquisitor import timestamp
items, errors = loader.load_active_items() items, errors = loader.load_active_items(source_names=None)
if not items and not errors: if not items and not errors:
print("Feed is empty") print("Feed is empty")
return 0 return 0
if errors: if errors:
items.insert(0, { items.insert(0, {
'id': 'read-errors', 'title': '{} read errors: {}'.format(len(errors), ' '.join(errors)),
'title': '{} read errors'.format(len(errors)),
'body': "\n".join(errors) 'body': "\n".join(errors)
}) })
@ -185,7 +205,7 @@ def command_feed(args):
print("| {0:<{1}} |".format(info1, width - 4)) print("| {0:<{1}} |".format(info1, width - 4))
created = timestamp.stamp_to_readable(item['created']) if 'created' in item else "" created = timestamp.stamp_to_readable(item['created']) if 'created' in item else ""
info2 = "{0} {1} {2}".format( info2 = "{0} {1} {2}".format(
item['source'], item['id'], created) item.get('source', ''), item.get('id', ''), created)
print("| {0:<{1}} |".format(info2, width - 4)) print("| {0:<{1}} |".format(info2, width - 4))
print('+' + (width - 2) * '-' + '+') print('+' + (width - 2) * '-' + '+')
print() print()
@ -198,12 +218,73 @@ def command_run(args):
description=command_run.__doc__, description=command_run.__doc__,
add_help=False) add_help=False)
parser.add_argument("--debug", action="store_true") parser.add_argument("--debug", action="store_true")
parser.add_argument("--port", type=int, default=5000)
args = parser.parse_args(args) args = parser.parse_args(args)
try: try:
from inquisitor.app import app from inquisitor.app import app
app.run(debug=args.debug) app.run(port=args.port, debug=args.debug)
return 0 return 0
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
return -1 return -1
def command_help(args):
"""Print this help message and exit."""
print_usage()
return 0
def main():
"""CLI entry point"""
# Enable piping
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)
# Collect the commands from this module
import inquisitor.cli
commands = {
name[8:] : func
for name, func in vars(inquisitor.cli).items()
if name.startswith('command_')
}
descriptions = "\n".join([
"- {0}: {1}".format(name, func.__doc__)
for name, func in commands.items()])
# Set up the parser
parser = argparse.ArgumentParser(
description="Available commands:\n{}\n".format(descriptions),
formatter_class=argparse.RawDescriptionHelpFormatter,
add_help=False)
parser.add_argument("command",
nargs="?",
default="help",
help="The command to execute",
choices=commands,
metavar="command")
parser.add_argument("args",
nargs=argparse.REMAINDER,
help="Command arguments",
metavar="args")
parser.add_argument("-v",
action="store_true",
dest="verbose",
help="Enable debug logging")
# Extract the usage print for command_help
global print_usage
print_usage = parser.print_help
args = parser.parse_args()
# Initialize a console logger
add_logging_handler(verbose=args.verbose, log_filename=None)
# Execute command
if args.command:
sys.exit(commands[args.command](args.args))
else:
print("command required")
sys.exit(0)

View File

@ -1,23 +0,0 @@
import os
import logging
DUNGEON_PATH = os.path.abspath(os.environ.get("INQUISITOR_DUNGEON") or "./dungeon")
SOURCES_PATH = os.path.abspath(os.environ.get("INQUISITOR_SOURCES") or "./sources")
logger = logging.getLogger("inquisitor")
handler = logging.StreamHandler()
logger.addHandler(handler)
def log_normal():
logger.setLevel(logging.INFO)
handler.setLevel(logging.INFO)
formatter = logging.Formatter('[{levelname}] {message}', style="{")
handler.setFormatter(formatter)
def log_verbose():
logger.setLevel(logging.DEBUG)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('[{asctime}] [{levelname}:{filename}:{lineno}] {message}', style="{")
handler.setFormatter(formatter)
log_normal()

View File

@ -0,0 +1,10 @@
from .resolver import data_path as DUNGEON_PATH
from .resolver import source_path as SOURCES_PATH
from .resolver import cache_path as CACHE_PATH
from .resolver import (
logger,
subfeeds)
from .resolver import (
add_logging_handler,
init_default_logging,
get_subfeed_overrides)

View File

@ -0,0 +1,176 @@
import os
import logging
# Constants governing config resolution:
# Path to the config file, containing key-value pairs of the other settings
CONFIG_ENVVAR = 'INQUISITOR_CONFIG'
DEFAULT_CONFIG_PATH = '/etc/inquisitor.conf'
# Path to the folder where items are stored
CONFIG_DATA = 'DataPath'
DEFAULT_DATA_PATH = '/var/inquisitor/data/'
# Path to the folder where source modules are stored
CONFIG_SOURCES = 'SourcePath'
DEFAULT_SOURCES_PATH = '/var/inquisitor/sources/'
# Path to the folder where cached files are stored
CONFIG_CACHE = 'CachePath'
DEFAULT_CACHE_PATH = '/var/inquisitor/cache/'
# Path to a log file where logging will be redirected
CONFIG_LOGFILE = 'LogFile'
DEFAULT_LOG_FILE = None
# Whether logging is verbose
CONFIG_VERBOSE = 'Verbose'
DEFAULT_VERBOSITY = 'false'
# Subfeed source lists, with each subfeed config separated by lines and
# sources within a subfeed separated by spaces
CONFIG_SUBFEEDS = 'Subfeeds'
DEFAULT_SUBFEEDS = None
SUBFEED_CONFIG_FILE = 'subfeeds.conf'
def read_config_file(config_path):
"""
Reads a config file of key-value pairs, where non-blank lines are
either comments beginning with the character '#' or keys and values
separated by the character '='.
"""
# Parse the config file into key-value pairs
if not os.path.isfile(config_path):
raise FileNotFoundError(f'No config file found at {config_path}')
accumulated_configs = {}
current_key = None
with open(config_path, 'r', encoding='utf8') as cfg:
line_no = 0
for line in cfg:
line_no += 1
# Skip blank lines and comments
if not line.strip() or line.lstrip().startswith('#'):
continue
# Accumulate config keyvalue pairs
if '=' in line:
# "key = value" begins a new keyvalue pair
current_key, value = line.split('=', maxsplit=1)
current_key = current_key.strip()
accumulated_configs[current_key] = value.strip()
else:
# If there's no '=' and no previous key, throw
if not current_key:
raise ValueError(f'Invalid config format on line {line_no}')
else:
accumulated_configs[current_key] += '\n' + line.strip()
return accumulated_configs
def parse_subfeed_value(value):
sf_defs = [sf.strip() for sf in value.split('\n') if sf.strip()]
subfeeds = {}
for sf_def in sf_defs:
if ':' not in sf_def:
raise ValueError(f'Invalid subfeed definition: {sf_def}')
sf_name, sf_sources = sf_def.split(':', maxsplit=1)
sf_sources = sf_sources.split()
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
return subfeeds
# Read envvar for config file location, with fallback to default
config_path = os.path.abspath(
os.environ.get(CONFIG_ENVVAR) or
DEFAULT_CONFIG_PATH
)
configs = read_config_file(config_path)
# Extract and validate config values
data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH
if not os.path.isabs(data_path):
raise ValueError(f'Non-absolute data path: {data_path}')
if not os.path.isdir(data_path):
raise FileNotFoundError(f'Cannot find directory {data_path}')
source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH
if not os.path.isabs(source_path):
raise ValueError(f'Non-absolute source path: {source_path}')
if not os.path.isdir(source_path):
raise FileNotFoundError(f'Cannot find directory {source_path}')
cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH
if not os.path.isabs(cache_path):
raise ValueError(f'Non-absolute cache path: {cache_path}')
if not os.path.isdir(cache_path):
raise FileNotFoundError(f'Cannot find directory {cache_path}')
log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE
if log_file and not os.path.isabs(log_file):
raise ValueError(f'Non-absolute log file path: {log_file}')
is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY
if is_verbose != 'true' and is_verbose != 'false':
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
is_verbose = (is_verbose == 'true')
subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS
if subfeeds:
subfeeds = parse_subfeed_value(subfeeds)
def get_subfeed_overrides():
"""
Check for and parse the secondary subfeed configuration file
"""
path = os.path.join(source_path, SUBFEED_CONFIG_FILE)
if not os.path.isfile(path):
return None
overrides = read_config_file(path)
if CONFIG_SUBFEEDS not in overrides:
return None
value = overrides[CONFIG_SUBFEEDS]
if not value:
return None
parsed_value = parse_subfeed_value(value)
return parsed_value
# Set up logging
logger = logging.getLogger("inquisitor")
logger.setLevel(logging.DEBUG)
def add_logging_handler(verbose, log_filename):
"""
Adds a logging handler according to the given settings
"""
log_format = (
'[{asctime}] [{levelname}:{filename}:{lineno}] {message}'
if verbose else
'[{levelname}] {message}'
)
formatter = logging.Formatter(log_format, style='{')
log_level = (
logging.DEBUG
if verbose else
logging.INFO
)
handler = (
logging.handlers.RotatingFileHandler(
log_filename,
encoding='utf8',
maxBytes=2**22, # 4 MB per log file
backupCount=4) # 16 MB total
if log_filename else
logging.StreamHandler()
)
handler.setFormatter(formatter)
handler.setLevel(log_level)
logger.addHandler(handler)
def init_default_logging():
add_logging_handler(is_verbose, log_file)

View File

@ -1,189 +0,0 @@
import os
import traceback
import importlib.util
import json
import sys
from inquisitor import loader, timestamp, error
from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
def update_sources(*source_names):
sys.path.append(SOURCES_PATH)
for source_name in source_names:
try:
source_module = load_source(source_name)
except Exception:
error.as_item("Error importing source '{}'".format(source_name), traceback.format_exc())
continue
cell_path = os.path.join(DUNGEON_PATH, source_name)
if not os.path.isdir(cell_path):
try:
logger.info("Creating cell for source '{}'".format(source_name))
os.mkdir(cell_path)
state_path = os.path.join(cell_path, "state")
with open(state_path, 'w', encoding='utf8') as f:
f.write(json.dumps({}))
except Exception:
error.as_item("Error initializing source '{}'".format(source_name), traceback.format_exc())
continue
try:
logger.info("Updating source '{}'".format(source_name))
new_count, del_count = update_source(source_name, source_module.fetch_new)
logger.info("{} new item{}, {} deleted item{}".format(
new_count, "s" if new_count != 1 else "",
del_count, "s" if del_count != 1 else ""))
except Exception:
error.as_item("Error updating source '{}'".format(source_name), traceback.format_exc())
def load_source(source_name):
"""
Attempts to load the source module with the given name. Raises an exception on failure.
"""
# Push the sources directory
cwd = os.getcwd()
try:
os.chdir(SOURCES_PATH)
# Check if the named source is present.
source_file_name = source_name + ".py"
if not os.path.isfile(source_file_name):
raise FileNotFoundError("Missing '{}' in '{}'".format(source_name, SOURCES_PATH))
# Try to import the source module.
logger.debug("Loading module {}".format(source_file_name))
spec = importlib.util.spec_from_file_location("itemsource", source_file_name)
itemsource = importlib.util.module_from_spec(spec)
spec.loader.exec_module(itemsource)
if not hasattr(itemsource, 'fetch_new'):
raise ImportError("Missing fetch_new in '{}'".format(source_file_name))
# Since the source is valid, get or create the source cell.
return itemsource
finally:
os.chdir(cwd)
def update_source(source_name, fetch_new):
"""
Attempts to update the given source. Raises an exception if the source does.
"""
# Get the existing items from the source's cell.
prior_items, errors = loader.load_items(source_name)
if any(errors):
raise Exception(f'Can\'t update source "{source_name}", some items are corrupt')
logger.debug("Found {} prior items".format(len(prior_items)))
# Get the feed items from the source's fetch method.
state = loader.load_state(source_name)
fetched = fetch_new(state)
fetched_items = {item['id']: item for item in fetched}
state.flush()
# Populate all the fetched items with required or auto-generated fields.
# This also provides an opportunity to throw if the source isn't returning
# valid items.
for item in fetched_items.values():
populate_new(source_name, item)
logger.debug("Fetched {} items".format(len(fetched_items)))
# Write all the new fetched items to the source's cell.
new_items = [
item for item in fetched_items.values()
if item['id'] not in prior_items]
for item in new_items:
s = json.dumps(item)
path = os.path.join(DUNGEON_PATH, item['source'], item['id'] + ".item")
with open(path, 'w', encoding='utf8') as f:
f.write(s)
# Update the extant items using the fetched item's values.
extant_items = [
item for item in fetched_items.values()
if item['id'] in prior_items]
for item in extant_items:
# The items in prior_items are writethrough dicts.
prior_item = prior_items[item['id']]
# Only bother updating active items.
if prior_item['active']:
populate_old(prior_item, item)
# In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0
now = timestamp.now()
old_items = [
item for item in prior_items.values()
if item['id'] not in fetched_items]
for item in old_items:
remove = not item['active']
# The time-to-live field protects an item from removal until expiry.
# This is mainly used to avoid old items resurfacing when their source
# cannot guarantee monotonicity.
if 'ttl' in item:
ttl_date = item['created'] + item['ttl']
if ttl_date > now:
continue
# The time-to-die field can force an active item to be removed.
if 'ttd' in item:
ttd_date = item['created'] + item['ttd']
if ttd_date < now:
remove = True
# Items to be removed are deleted
if remove:
del_count += 1
file_path = os.path.join(DUNGEON_PATH, item['source'], item['id'] + ".item")
try:
os.remove(file_path)
except:
error.as_item("Failed to delete {}".format(file_path))
# Note update timestamp in state
state['last_updated'] = timestamp.now()
# Return counts
return len(new_items), del_count
def populate_new(source_name, item):
# id is required
if 'id' not in item:
raise Exception(f'Source "{source_name}" returned an item with no id')
# source is auto-populated with the source name if missing
# Note: this allows sources to create items in other cells!
if 'source' not in item: item['source'] = source_name
# active is forced to True for new items
item['active'] = True
# created is forced to the current timestamp
item['created'] = timestamp.now()
# title is auto-populated with the id if missing
if 'title' not in item: item['title'] = item['id']
# tags is auto-populated if missing (not if empty!)
if 'tags' not in item: item['tags'] = [source_name]
# link, time, author, body, ttl, ttd, tts, callback are optional
def populate_old(prior, new):
# Not updated: id, source, active, created
if 'title' in new: prior['title'] = new['title']
if 'tags' in new: prior['tags'] = new['tags']
if 'link' in new: prior['link'] = new['link']
if 'time' in new: prior['time'] = new['time']
if 'author' in new: prior['author'] = new['author']
if 'body' in new: prior['body'] = new['body']
if 'ttl' in new: prior['ttl'] = new['ttl']
if 'ttd' in new: prior['ttd'] = new['ttd']
if 'tts' in new: prior['tts'] = new['tts']
if 'callback' in new: prior['callback'] = new['callback']
def item_callback(source_name, itemid):
try:
# Load the module with the callback function
source_module = load_source(source_name)
if not hasattr(source_module, 'callback'):
raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item
state = loader.load_state(source_name)
item = loader.WritethroughDict(os.path.join(DUNGEON_PATH, source_name, itemid + ".item"))
# Execute callback
source_module.callback(state, item)
# Save any changes
item.flush()
state.flush()
except Exception:
error.as_item(f"Error executing callback for {source_name}/{itemid}", traceback.format_exc())

View File

@ -1,31 +1,53 @@
import os import os
import json import json
from inquisitor.configs import DUNGEON_PATH
from inquisitor.configs import DUNGEON_PATH, logger
from inquisitor import error from inquisitor import error
from inquisitor import timestamp from inquisitor import timestamp
class WritethroughDict(): class WritethroughDict():
"""A wrapper for a dictionary saved to the disk.""" """A wrapper for a dictionary saved to the file system."""
def __init__(self, path):
@staticmethod
def create(path, item):
"""
Creates a writethrough dictionary from a dictionary in memory and
initializes a file to save it.
"""
if os.path.isfile(path):
raise FileExistsError(path)
wd = WritethroughDict(path, item)
wd.flush()
return wd
@staticmethod
def load(path):
"""
Creates a writethrough dictionary from an existing file in the
file system.
"""
if not os.path.isfile(path): if not os.path.isfile(path):
raise FileNotFoundError(path) raise FileNotFoundError(path)
self.path = path
with open(path) as f: with open(path) as f:
self.item = json.loads(f.read()) item = json.load(f)
return WritethroughDict(path, item)
def __init__(self, path, item):
self.path = path
self.item = item
def __getitem__(self, key): def __getitem__(self, key):
return self.item[key] return self.item[key]
def get(self, *args, **kwargs):
return self.item.get(*args, **kwargs)
def __setitem__(self, key, value): def __setitem__(self, key, value):
self.item[key] = value self.item[key] = value
self.flush() self.flush()
def set(self, dict):
for key, value in dict.items():
self.item[key] = value
self.flush()
def __contains__(self, key): def __contains__(self, key):
return key in self.item return key in self.item
@ -40,10 +62,79 @@ class WritethroughDict():
with open(self.path, 'w', encoding="utf8") as f: with open(self.path, 'w', encoding="utf8") as f:
f.write(s) f.write(s)
def load_state(source_name): def load_state(source_name):
"""Loads the state dictionary for a source.""" """Loads the state dictionary for a source."""
state_path = os.path.join(DUNGEON_PATH, source_name, "state") state_path = os.path.join(DUNGEON_PATH, source_name, "state")
return WritethroughDict(state_path) return WritethroughDict.load(state_path)
def load_item(source_name, item_id):
"""Loads an item from a source."""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
return WritethroughDict.load(item_path)
def item_exists(source_name, item_id):
"""
Checks for the existence of an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
return os.path.isfile(item_path)
def get_item_ids(cell_name):
"""
Returns a list of item ids in the given cell.
"""
cell_path = os.path.join(DUNGEON_PATH, cell_name)
return [
filename[:-5]
for filename in os.listdir(cell_path)
if filename.endswith('.item')
]
def new_item(source_name, item):
"""
Creates a new item with the fields in the provided dictionary.
Initializes other fields to their default values.
"""
# id is required
if 'id' not in item:
raise Exception(f'Cannot create item with no id. Value = {item}')
# source must be filled in, so if it is absent it is auto-populated with
# source_name. Note: this allows sources to fill in a different source.
if 'source' not in item:
item['source'] = source_name
# active is forced to True for new items
item['active'] = True
# created is forced to the current timestamp
item['created'] = timestamp.now()
# title is auto-populated with the id if missing
if 'title' not in item:
item['title'] = item['id']
# tags is auto-populated if missing (not if empty!)
if 'tags' not in item:
item['tags'] = [source_name]
# All other fields are optional.
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item')
return WritethroughDict.create(item_path, item)
def delete_item(source_name, item_id):
"""
Delete an item.
"""
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
os.remove(item_path)
def load_items(source_name): def load_items(source_name):
""" """
@ -55,36 +146,41 @@ def load_items(source_name):
for filename in os.listdir(cell_path): for filename in os.listdir(cell_path):
if filename.endswith('.item'): if filename.endswith('.item'):
try: try:
path = os.path.join(cell_path, filename) item = load_item(source_name, filename[:-5])
item = WritethroughDict(path)
items[item['id']] = item items[item['id']] = item
except Exception: except Exception:
errors.append(filename) errors.append(filename)
return items, errors return items, errors
def load_active_items():
def load_active_items(source_names):
""" """
Returns a list of active items and a list of unreadable items. Returns a list of active items and a list of unreadable items. If
`source_names` is defined, load only from sources in that list.
""" """
items = [] items = []
errors = [] errors = []
now = timestamp.now() now = timestamp.now()
for cell_name in os.listdir(DUNGEON_PATH): check_list = source_names or os.listdir(DUNGEON_PATH)
cell_path = os.path.join(DUNGEON_PATH, cell_name) for source_name in check_list:
for filename in os.listdir(cell_path): source_path = os.path.join(DUNGEON_PATH, source_name)
if filename.endswith('.item'): if not os.path.isdir(source_path):
try: logger.warning(f'Skipping nonexistent source {source_name}')
path = os.path.join(cell_path, filename) continue
item = WritethroughDict(path) for filename in os.listdir(source_path):
# The time-to-show field hides items until an expiry date. if not filename.endswith('.item'):
if 'tts' in item: continue
tts_date = item['created'] + item['tts'] try:
if now < tts_date: item = load_item(source_name, filename[:-5])
continue # The time-to-show field hides items until an expiry date.
# Don't show inactive items if 'tts' in item:
if not item['active']: tts_date = item['created'] + item['tts']
if now < tts_date:
continue continue
items.append(item) # Don't show inactive items
except Exception: if not item['active']:
errors.append(filename) continue
items.append(item)
except Exception:
errors.append(filename)
return items, errors return items, errors

238
inquisitor/sources.py Normal file
View File

@ -0,0 +1,238 @@
import os
import traceback
import importlib.util
import json
import sys
from inquisitor import loader, timestamp, error
from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
USE_NEWEST = (
'title',
'tags',
'link',
'time'
'author',
'body',
'ttl',
'ttd',
'tts',
)
class InquisitorStubSource:
"""A dummy source-like object for clearing out ad-hoc inquisitor items"""
def fetch_new(self, state):
return []
def ensure_cell(name):
"""
Creates a cell in the dungeon. Idempotent.
"""
cell_path = os.path.join(DUNGEON_PATH, name)
if not os.path.isdir(cell_path):
logger.info(f'Creating cell for source "{name}"')
os.mkdir(cell_path)
state_path = os.path.join(cell_path, 'state')
if not os.path.isfile(state_path):
with open(state_path, 'w', encoding='utf8') as state:
json.dump({}, state)
def update_sources(*source_names):
"""
Attempts to update each given source.
"""
for source_name in source_names:
# Import the source
try:
source_module = load_source(source_name)
except Exception:
error.as_item(
f'Error importing source "{source_name}"',
traceback.format_exc())
continue
# If it doesn't have a cell yet, create one
try:
ensure_cell(source_name)
except Exception:
error.as_item(
f'Error initializing source "{source_name}"',
traceback.format_exc())
continue
# Update the source
try:
logger.info(f'Updating source "{source_name}"')
update_source(source_name, source_module)
except Exception:
error.as_item(
f'Error updating source "{source_name}"',
traceback.format_exc())
def load_source(source_name):
"""
Attempts to load the source module with the given name.
Raises an exception on failure.
"""
if source_name == 'inquisitor':
return InquisitorStubSource()
cwd = os.getcwd()
try:
# Push the sources directory.
os.chdir(SOURCES_PATH)
# Make the sources directory importable while working with sources.
if SOURCES_PATH not in sys.path:
sys.path.insert(0, SOURCES_PATH)
# Check if the named source is present.
source_file_name = source_name + '.py'
if not os.path.isfile(source_file_name):
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
# Import the source module by file path.
logger.debug(f'Loading module "{source_file_name}"')
spec = importlib.util.spec_from_file_location(source_name, source_file_name)
itemsource = importlib.util.module_from_spec(spec)
spec.loader.exec_module(itemsource)
itemsource = importlib.import_module(source_name)
# Require fetch_new().
if not hasattr(itemsource, 'fetch_new'):
raise ImportError(f'Missing fetch_new in "{source_file_name}"')
return itemsource
finally:
os.chdir(cwd)
if SOURCES_PATH in sys.path:
sys.path.remove(SOURCES_PATH)
def update_source(source_name, source):
"""
Attempts to update the given source. Raises an exception if the source does.
"""
# Get a list of item ids that already existed in this source's cell.
prior_ids = loader.get_item_ids(source_name)
logger.debug(f'Found {len(prior_ids)} prior items')
# Get the feed items from the source's fetch method.
state = loader.load_state(source_name)
fetched = source.fetch_new(state)
state.flush()
logger.debug(f'Fetched {len(fetched)} items')
fetched_items = {item['id']: item for item in fetched}
# Determine which items are new and which are updates.
# We query the file system here instead of checking against this source's
# item ids from above because sources are allowed to generate in other
# sources' cells.
new_items = []
updated_items = []
for item in fetched:
item_source = item.get('source', source_name)
if loader.item_exists(item_source, item['id']):
updated_items.append(item)
else:
new_items.append(item)
# Write all the new items to the source's cell.
has_create_handler = hasattr(source, 'on_create')
for item in new_items:
item_source = item.get('source', source_name)
created_item = loader.new_item(item_source, item)
if has_create_handler:
# Because some sources do not return items more than once,
# exceptions in the on-create handler must be squashed.
try:
source.on_create(state, created_item)
except:
error.as_item(
f'Exception in {source_name}.on_create',
traceback.format_exc())
# Update the other items using the fetched items' values.
for new_item in updated_items:
old_item = loader.load_item(new_item['source'], new_item['id'])
for field in USE_NEWEST:
if field in new_item and old_item[field] != new_item[field]:
old_item[field] = new_item[field]
if 'callback' in new_item:
old_callback = old_item.get('callback', {})
# Because of the way this update happens, any fields that are set
# in the callback when the item is new will keep their original
# values, as those values reappear in new_item on subsequent
# updates.
old_item['callback'] = {**old_item['callback'], **new_item['callback']}
# In general, items are removed when they are old (not found in the last
# fetch) and inactive. Some item fields can change this basic behavior.
del_count = 0
now = timestamp.now()
has_delete_handler = hasattr(source, 'on_delete')
fetched_ids = [item['id'] for item in updated_items]
old_item_ids = [
item_id for item_id in prior_ids
if item_id not in fetched_ids]
for item_id in old_item_ids:
item = loader.load_item(source_name, item_id)
remove = not item['active']
# The time-to-live field protects an item from removal until expiry.
# This is mainly used to avoid old items resurfacing when their source
# cannot guarantee monotonicity.
if 'ttl' in item:
ttl_date = item['created'] + item['ttl']
if ttl_date > now:
continue
# The time-to-die field can force an active item to be removed.
if 'ttd' in item:
ttd_date = item['created'] + item['ttd']
if ttd_date < now:
remove = True
# Items to be removed are deleted
if remove:
try:
if has_delete_handler:
# Run the delete handler so exceptions prevent deletions
source.on_delete(state, item)
loader.delete_item(source_name, item['id'])
del_count += 1
except:
error.as_item(
f'Failed to delete {source_name}/{item["id"]}',
traceback.format_exc())
# Note update timestamp in state
state['last_updated'] = timestamp.now()
# Log counts
logger.info("{} new item{}, {} deleted item{}".format(
len(new_items), "s" if len(new_items) != 1 else "",
del_count, "s" if del_count != 1 else ""))
def item_callback(source_name, itemid):
try:
# Load the module with the callback function
source_module = load_source(source_name)
if not hasattr(source_module, 'callback'):
raise ImportError(f"Missing callback in '{source_name}'")
# Load the source state and the origin item
state = loader.load_state(source_name)
item = loader.load_item(source_name, itemid)
# Execute callback
source_module.callback(state, item)
# Save any changes
item.flush()
state.flush()
except Exception:
error.as_item(
f"Error executing callback for {source_name}/{itemid}",
traceback.format_exc())

228
inquisitor/templates.py Normal file
View File

@ -0,0 +1,228 @@
"""
Generates a dummy item.
"""
# Standard library imports
from datetime import datetime
import inspect
import logging
import os
import random
from time import sleep
import sys
# Third-party library imports
from bs4 import BeautifulSoup
import requests
# Module imports
from inquisitor import CACHE_PATH
logger = logging.getLogger('inquisitor.templates')
def cache_image(source, url, filename):
# Define some paths
path = os.path.join(CACHE_PATH, source)
file_path = os.path.join(path, filename)
cached_url = f'/cache/{source}/{filename}'
# Ensure cache folder
if not os.path.isdir(path):
os.mkdir(path)
# Fetch url
logger.info(f'Caching {url} to {file_path}')
response = requests.get(url)
# Write file to disk
with open(file_path, 'wb') as f:
f.write(response.content)
# Return the inquisitor path to the file
return cached_url
class LinearCrawler:
"""
An engine for generating items from web sources that link content
together in a linear fashion, such as webcomics.
"""
def fetch_new(self, state):
items = []
max_iter = self.max_iterations() - 1
new = self.try_fetch(state)
items.extend(new)
for iter in range(max_iter):
sleep(1)
# If we've already gotten some items out of this fetch, we don't
# want to lose them and have the state still be set to the next
# page, so we wrap further calls in a try block and force return
# if we hit an error.
try:
new = self.try_fetch(state)
except:
new = []
items.extend(new)
# Cut out early if there was nothing returned
if not new:
break
return items
def try_fetch(self, state):
# Check for whether a new page should be crawled
if 'current_page' not in state:
next_page = self.get_start_url()
else:
current = state['current_page']
response = requests.get(current)
soup = BeautifulSoup(response.text, features='html.parser')
next_page = self.get_next_page_url(current, soup)
if not next_page:
return [] # nothing new
# Download the new page
logger.info('Fetching ' + next_page)
response = requests.get(next_page)
soup = BeautifulSoup(response.text, features="html.parser")
# Create an item from the page
item = self.make_item(next_page, soup)
# Update the state and return the item
state['current_page'] = next_page
return [item]
def max_iterations(self):
return 3
def get_start_url(self):
raise NotImplementedError('get_start_url is required')
def get_next_page_url(self, url, soup):
raise NotImplementedError('get_next_page_url is required')
def make_item(self, url, soup):
raise NotImplementedError('make_item is required')
class RedditScraper:
"""
An engine for generating items from subreddits.
Requires defining source, subreddit_name
fetch new with RedditScraper.fetch_new(state, __name__, reddit)
"""
@staticmethod
def fetch_new(state, name, reddit):
items = []
for name, obj in inspect.getmembers(sys.modules[name]):
if (inspect.isclass(obj)
and issubclass(obj, RedditScraper)
and obj is not RedditScraper
):
sub_items = obj(reddit).get_items()
items.extend(sub_items)
return items
def __init__(self, reddit):
self.reddit = reddit
def get_items(self):
sub_name = self.subreddit_name
logger.info(f'Fetching posts from r/{sub_name}')
subreddit = self.reddit.subreddit(sub_name)
posts = self.subreddit_page(subreddit)
items = []
for post in posts:
if self.filter_post(post):
items.append(self.item_from_post(post))
return items
def item_from_post(self, post):
item = {
'source': self.source,
'id': post.id,
'title': self.get_title(post),
'link': self.get_link(post),
'time': post.created_utc,
'author': '/u/' + (post.author.name if post.author else "[deleted]"),
'body': self.get_body(post),
'tags': self.get_tags(post),
'ttl': self.get_ttl(post),
}
ttl = self.get_ttl(post)
if ttl is not None: item['ttl'] = ttl
ttd = self.get_ttd(post)
if ttd is not None: item['ttd'] = ttd
tts = self.get_tts(post)
if tts is not None: item['tts'] = tts
callback = self.get_callback(post)
if callback is not None: item['callback'] = callback
return item
def subreddit_page(self, subreddit):
return subreddit.hot(limit=25)
def filter_post(self, post):
return True
def get_title(self, post):
s = '[S] ' if post.spoiler else ''
nsfw = '[NSFW] ' if post.over_18 else ''
return f'{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}'
def get_link(self, post):
return f'https://reddit.com{post.permalink}'
def get_body(self, post):
parts = []
if not post.is_self:
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
if hasattr(post, 'preview'):
try:
previews = post.preview['images'][0]['resolutions']
small_previews = [p for p in previews if p['width'] < 800]
preview = sorted(small_previews, key=lambda p:-p['width'])[0]
parts.append(f'<img src="{preview["url"]}">')
except:
pass
if getattr(post, 'is_gallery', False):
try:
for gallery_item in post.gallery_data['items']:
media_id = gallery_item['media_id']
metadata = post.media_metadata[media_id]
small_previews = [p for p in metadata['p'] if p['x'] < 800]
preview = sorted(small_previews, key=lambda p:-p['x'])[0]
parts.append(f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>')
parts.append(f'<img src="{preview["u"]}">')
except:
pass
if post.selftext:
limit = post.selftext[1024:].find(' ')
preview_body = post.selftext[:1024 + limit]
if len(preview_body) < len(post.selftext):
preview_body += '[...]'
parts.append(f'<p>{preview_body}</p>')
return '<br><hr>'.join(parts)
def get_tags(self, post):
tags = ['reddit', post.subreddit_name_prefixed[2:]]
if post.over_18:
tags.append('nsfw')
return tags
def get_ttl(self, post):
return 60 * 60 * 24 * 7 # 1 week
def get_ttd(self, post):
return None
def get_tts(self, post):
return None
def get_callback(self, post):
return None
def callback(self, state, item):
raise NotImplementedError('callback')
def on_create(self, state, item):
raise NotImplementedError('on_create')
def on_delete(self, state, item):
raise NotImplementedError('on_delete')

View File

@ -1,6 +1,8 @@
<html> <html>
<head> <head>
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<title>Inquisitor{% if items %} ({{ items|length - 1 }}){% endif %}</title>
<link rel="icon" type="image/png" href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAALGPC/xhBQAAAAlwSFlzAAAOwgAADsIBFShKgAAAABh0RVh0U29mdHdhcmUAcGFpbnQubmV0IDQuMS41ZEdYUgAAAGFJREFUOE+lkFEKwDAIxXrzXXB3ckMm9EnAV/YRCxFCcUXEL3Jc77NDjpDA/VGL3RFWYEICfeGC8oQc9IPuCAnQDcoRVmBCAn3hgvKEHPSD7ggJ0A3KEVZgQgJ94YLSJ9YDUzNGDXGZ/JEAAAAASUVORK5CYII=">
<style> <style>
div#wrapper { max-width: 700px; margin: 0 auto; } div#wrapper { max-width: 700px; margin: 0 auto; }
.readable-item { .readable-item {
@ -26,6 +28,7 @@
.item-info { color: rgba(0, 0, 0, 0.7); } .item-info { color: rgba(0, 0, 0, 0.7); }
.readable-item img { max-width: 100%; } .readable-item img { max-width: 100%; }
button, summary { cursor: pointer; } button, summary { cursor: pointer; }
summary { display: block; }
summary:focus { outline: 1px dotted gray; } summary:focus { outline: 1px dotted gray; }
.strikethru span, .strikethru p { text-decoration: line-through; } .strikethru span, .strikethru p { text-decoration: line-through; }
.fade span, .fade p { color: rgba(0, 0, 0, 0.2); } .fade span, .fade p { color: rgba(0, 0, 0, 0.2); }
@ -34,7 +37,7 @@
</style> </style>
<script> <script>
var deactivate = function (source, itemid) { var deactivate = function (source, itemid) {
fetch('deactivate/', { fetch('/deactivate/', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json; charset=UTF-8', 'Content-Type': 'application/json; charset=UTF-8',
@ -50,7 +53,7 @@
}); });
}; };
var punt = function (source, itemid) { var punt = function (source, itemid) {
fetch('punt/', { fetch('/punt/', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json; charset=UTF-8', 'Content-Type': 'application/json; charset=UTF-8',
@ -66,7 +69,7 @@
}); });
}; };
var mdeactivate = function (items) { var mdeactivate = function (items) {
fetch('mass-deactivate/', { fetch('/mass-deactivate/', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json; charset=UTF-8', 'Content-Type': 'application/json; charset=UTF-8',
@ -74,11 +77,12 @@
body: JSON.stringify({items: items}), body: JSON.stringify({items: items}),
}) })
.then(function () { .then(function () {
location.reload() location.reload();
}); });
}; };
var callback = function (source, itemid) { var callback = function (source, itemid) {
fetch('callback/', { document.getElementById(source + "-" + itemid + "-callback").disabled = true;
fetch('/callback/', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json; charset=UTF-8', 'Content-Type': 'application/json; charset=UTF-8',
@ -105,7 +109,7 @@
<p>{{item.body|safe}}</p> <p>{{item.body|safe}}</p>
{% endif %} {% endif %}
{% if item.callback %} {% if item.callback %}
<p><button onclick="javascript:callback('{{item.source}}', '{{item.id}}')">Callback</button></p> <p><button id="{{item.source}}-{{item.id}}-callback" onclick="javascript:callback('{{item.source}}', '{{item.id}}')">Callback</button></p>
{% endif %} {% endif %}
</details> </details>
{% else %}<span class="item-title">{{item.title}}</span><br> {% else %}<span class="item-title">{{item.title}}</span><br>

411
poetry.lock generated Normal file
View File

@ -0,0 +1,411 @@
[[package]]
name = "beautifulsoup4"
version = "4.11.1"
description = "Screen-scraping library"
category = "main"
optional = false
python-versions = ">=3.6.0"
[package.dependencies]
soupsieve = ">1.2"
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "certifi"
version = "2022.12.7"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "charset-normalizer"
version = "2.1.1"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main"
optional = false
python-versions = ">=3.6.0"
[package.extras]
unicode-backport = ["unicodedata2"]
[[package]]
name = "click"
version = "8.1.3"
description = "Composable command line interface toolkit"
category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
colorama = {version = "*", markers = "platform_system == \"Windows\""}
[[package]]
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
[[package]]
name = "feedparser"
version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = true
python-versions = ">=3.6"
[package.dependencies]
sgmllib3k = "*"
[[package]]
name = "flask"
version = "2.2.2"
description = "A simple framework for building complex web applications."
category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
click = ">=8.0"
itsdangerous = ">=2.0"
Jinja2 = ">=3.0"
Werkzeug = ">=2.2.2"
[package.extras]
async = ["asgiref (>=3.2)"]
dotenv = ["python-dotenv"]
[[package]]
name = "gunicorn"
version = "20.1.0"
description = "WSGI HTTP Server for UNIX"
category = "main"
optional = true
python-versions = ">=3.5"
[package.dependencies]
setuptools = ">=3.0"
[package.extras]
eventlet = ["eventlet (>=0.24.1)"]
gevent = ["gevent (>=1.4.0)"]
setproctitle = ["setproctitle"]
tornado = ["tornado (>=0.2)"]
[[package]]
name = "idna"
version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]]
name = "itsdangerous"
version = "2.1.2"
description = "Safely pass data to untrusted environments and back."
category = "main"
optional = false
python-versions = ">=3.7"
[[package]]
name = "jinja2"
version = "3.1.2"
description = "A very fast and expressive template engine."
category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
MarkupSafe = ">=2.0"
[package.extras]
i18n = ["Babel (>=2.7)"]
[[package]]
name = "markupsafe"
version = "2.1.1"
description = "Safely add untrusted strings to HTML/XML markup."
category = "main"
optional = false
python-versions = ">=3.7"
[[package]]
name = "praw"
version = "7.6.1"
description = "PRAW, an acronym for \"Python Reddit API Wrapper\", is a python package that allows for simple access to Reddit's API."
category = "main"
optional = true
python-versions = "~=3.7"
[package.dependencies]
prawcore = ">=2.1,<3"
update-checker = ">=0.18"
websocket-client = ">=0.54.0"
[package.extras]
ci = ["coveralls"]
dev = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.3.0,<0.5)", "packaging", "pre-commit", "pytest (>=2.7.3)", "requests (>=2.20.1,<3)", "sphinx", "sphinx-rtd-dark-mode", "sphinx-rtd-theme"]
lint = ["pre-commit", "sphinx", "sphinx-rtd-dark-mode", "sphinx-rtd-theme"]
readthedocs = ["sphinx", "sphinx-rtd-dark-mode", "sphinx-rtd-theme"]
test = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.3.0,<0.5)", "pytest (>=2.7.3)", "requests (>=2.20.1,<3)"]
[[package]]
name = "prawcore"
version = "2.3.0"
description = "Low-level communication layer for PRAW 4+."
category = "main"
optional = true
python-versions = "~=3.6"
[package.dependencies]
requests = ">=2.6.0,<3.0"
[package.extras]
ci = ["coveralls"]
dev = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.4.0,<0.5)", "betamax-serializers (>=0.2.0,<0.3)", "black", "flake8", "flynt", "mock (>=0.8)", "pre-commit", "pydocstyle", "pytest", "testfixtures (>4.13.2,<7)"]
lint = ["black", "flake8", "flynt", "pre-commit", "pydocstyle"]
test = ["betamax (>=0.8,<0.9)", "betamax-matchers (>=0.4.0,<0.5)", "betamax-serializers (>=0.2.0,<0.3)", "mock (>=0.8)", "pytest", "testfixtures (>4.13.2,<7)"]
[[package]]
name = "requests"
version = "2.28.1"
description = "Python HTTP for Humans."
category = "main"
optional = false
python-versions = ">=3.7, <4"
[package.dependencies]
certifi = ">=2017.4.17"
charset-normalizer = ">=2,<3"
idna = ">=2.5,<4"
urllib3 = ">=1.21.1,<1.27"
[package.extras]
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
[[package]]
name = "setuptools"
version = "65.6.3"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
category = "main"
optional = true
python-versions = ">=3.7"
[package.extras]
docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
[[package]]
name = "sgmllib3k"
version = "1.0.0"
description = "Py3k port of sgmllib."
category = "main"
optional = true
python-versions = "*"
[[package]]
name = "soupsieve"
version = "2.3.2.post1"
description = "A modern CSS selector implementation for Beautiful Soup."
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "update-checker"
version = "0.18.0"
description = "A python module that will check for package updates."
category = "main"
optional = true
python-versions = "*"
[package.dependencies]
requests = ">=2.3.0"
[package.extras]
dev = ["black", "flake8", "pytest (>=2.7.3)"]
lint = ["black", "flake8"]
test = ["pytest (>=2.7.3)"]
[[package]]
name = "urllib3"
version = "1.26.13"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[package.extras]
brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[[package]]
name = "websocket-client"
version = "1.4.2"
description = "WebSocket client for Python with low level API options"
category = "main"
optional = true
python-versions = ">=3.7"
[package.extras]
docs = ["Sphinx (>=3.4)", "sphinx-rtd-theme (>=0.5)"]
optional = ["python-socks", "wsaccel"]
test = ["websockets"]
[[package]]
name = "werkzeug"
version = "2.2.2"
description = "The comprehensive WSGI web application library."
category = "main"
optional = false
python-versions = ">=3.7"
[package.dependencies]
MarkupSafe = ">=2.1.1"
[package.extras]
watchdog = ["watchdog"]
[extras]
dev = ["praw", "gunicorn", "feedparser"]
[metadata]
lock-version = "1.1"
python-versions = "^3.10"
content-hash = "2d6e1c8843f9821ef246fdeeeef572bcdaa2db452bbaa74531e8af91f8d89bb5"
[metadata.files]
beautifulsoup4 = [
{file = "beautifulsoup4-4.11.1-py3-none-any.whl", hash = "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30"},
{file = "beautifulsoup4-4.11.1.tar.gz", hash = "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"},
]
certifi = [
{file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
{file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
]
charset-normalizer = [
{file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"},
{file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"},
]
click = [
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
]
colorama = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
feedparser = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
flask = [
{file = "Flask-2.2.2-py3-none-any.whl", hash = "sha256:b9c46cc36662a7949f34b52d8ec7bb59c0d74ba08ba6cb9ce9adc1d8676d9526"},
{file = "Flask-2.2.2.tar.gz", hash = "sha256:642c450d19c4ad482f96729bd2a8f6d32554aa1e231f4f6b4e7e5264b16cca2b"},
]
gunicorn = [
{file = "gunicorn-20.1.0-py3-none-any.whl", hash = "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e"},
{file = "gunicorn-20.1.0.tar.gz", hash = "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"},
]
idna = [
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
]
itsdangerous = [
{file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"},
{file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"},
]
jinja2 = [
{file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
{file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
]
markupsafe = [
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812"},
{file = "MarkupSafe-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a"},
{file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e"},
{file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5"},
{file = "MarkupSafe-2.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4"},
{file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f"},
{file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e"},
{file = "MarkupSafe-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933"},
{file = "MarkupSafe-2.1.1-cp310-cp310-win32.whl", hash = "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6"},
{file = "MarkupSafe-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-win32.whl", hash = "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff"},
{file = "MarkupSafe-2.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a"},
{file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452"},
{file = "MarkupSafe-2.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003"},
{file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1"},
{file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601"},
{file = "MarkupSafe-2.1.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925"},
{file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f"},
{file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88"},
{file = "MarkupSafe-2.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63"},
{file = "MarkupSafe-2.1.1-cp38-cp38-win32.whl", hash = "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1"},
{file = "MarkupSafe-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7"},
{file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a"},
{file = "MarkupSafe-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f"},
{file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6"},
{file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77"},
{file = "MarkupSafe-2.1.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603"},
{file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7"},
{file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135"},
{file = "MarkupSafe-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96"},
{file = "MarkupSafe-2.1.1-cp39-cp39-win32.whl", hash = "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c"},
{file = "MarkupSafe-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247"},
{file = "MarkupSafe-2.1.1.tar.gz", hash = "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b"},
]
praw = [
{file = "praw-7.6.1-py3-none-any.whl", hash = "sha256:33ac091cd061d9bd607ae231d4ea40025060a4ecb5e11baa5ce9a25c6d2f5a6b"},
{file = "praw-7.6.1.tar.gz", hash = "sha256:07fc95ffc52187351ab77b81fa6910c66e8fa084faf4b060b90864ad186dfb9e"},
]
prawcore = [
{file = "prawcore-2.3.0-py3-none-any.whl", hash = "sha256:48c17db447fa06a13ca3e722201f443031437708daa736c05a1df895fbcceff5"},
{file = "prawcore-2.3.0.tar.gz", hash = "sha256:daf1ccd4b7a80dc4e6567d48336d782e94a9a6dad83770fc2edf76dc9a84f56d"},
]
requests = [
{file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"},
{file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"},
]
setuptools = [
{file = "setuptools-65.6.3-py3-none-any.whl", hash = "sha256:57f6f22bde4e042978bcd50176fdb381d7c21a9efa4041202288d3737a0c6a54"},
{file = "setuptools-65.6.3.tar.gz", hash = "sha256:a7620757bf984b58deaf32fc8a4577a9bbc0850cf92c20e1ce41c38c19e5fb75"},
]
sgmllib3k = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]
soupsieve = [
{file = "soupsieve-2.3.2.post1-py3-none-any.whl", hash = "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759"},
{file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"},
]
update-checker = [
{file = "update_checker-0.18.0-py3-none-any.whl", hash = "sha256:cbba64760a36fe2640d80d85306e8fe82b6816659190993b7bdabadee4d4bbfd"},
{file = "update_checker-0.18.0.tar.gz", hash = "sha256:6a2d45bb4ac585884a6b03f9eade9161cedd9e8111545141e9aa9058932acb13"},
]
urllib3 = [
{file = "urllib3-1.26.13-py2.py3-none-any.whl", hash = "sha256:47cc05d99aaa09c9e72ed5809b60e7ba354e64b59c9c173ac3018642d8bb41fc"},
{file = "urllib3-1.26.13.tar.gz", hash = "sha256:c083dd0dce68dbfbe1129d5271cb90f9447dea7d52097c6e0126120c521ddea8"},
]
websocket-client = [
{file = "websocket-client-1.4.2.tar.gz", hash = "sha256:d6e8f90ca8e2dd4e8027c4561adeb9456b54044312dba655e7cae652ceb9ae59"},
{file = "websocket_client-1.4.2-py3-none-any.whl", hash = "sha256:d6b06432f184438d99ac1f456eaf22fe1ade524c3dd16e661142dc54e9cba574"},
]
werkzeug = [
{file = "Werkzeug-2.2.2-py3-none-any.whl", hash = "sha256:f979ab81f58d7318e064e99c4506445d60135ac5cd2e177a2de0089bfd4c9bd5"},
{file = "Werkzeug-2.2.2.tar.gz", hash = "sha256:7ea2d48322cc7c0f8b3a215ed73eabd7b5d75d0b50e31ab006286ccff9e00b8f"},
]

26
pyproject.toml Normal file
View File

@ -0,0 +1,26 @@
[tool.poetry]
name = "inquisitor"
version = "0.2.0"
description = "An arbitrary feed aggregator"
authors = ["Tim Van Baak <tim.vanbaak+inquisitor@gmail.com>"]
readme = "README.md"
repository = "https://github.com/Jaculabilis/Inquisitor"
[tool.poetry.dependencies]
python = "^3.10"
flask = "^2.2.2"
requests = "^2.28.1"
beautifulsoup4 = "^4.11.1"
praw = {version = "^7.6.1", optional = true}
gunicorn = {version = "^20.1.0", optional = true}
feedparser = {version = "^6.0.10", optional = true}
[tool.poetry.extras]
dev = ["praw", "gunicorn", "feedparser"]
[tool.poetry.scripts]
inquisitor = "inquisitor.cli:main"
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"

View File

@ -1,10 +0,0 @@
import setuptools
setuptools.setup(
name="inquisitor",
version="0.0.1",
author="Tim Van Baak",
description="An arbitrary feed aggregator",
packages=setuptools.find_packages(),
python_requires=">=3.6",
)

8
shell.nix Normal file
View File

@ -0,0 +1,8 @@
{ pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/2ebb6c1e5ae402ba35cca5eec58385e5f1adea04.tar.gz") {}
}:
pkgs.mkShell {
buildInputs = [
(pkgs.python3.withPackages (p: [p.poetry]))
];
}

View File

@ -0,0 +1,29 @@
"""
Demonstrates the behavior of exceptions in create/delete triggers.
To allow for deletions, it alternates returning a single item and
returning nothing.
"""
# Standard library imports
from datetime import datetime
import json
import random
def fetch_new(state):
if state.get('return_item'):
state['return_item'] = False
return [{
'source': 'deletetriggerexdemo',
'id': 'deletetriggerexdemoitem',
'title': 'Delete trigger exception demo'
}]
else:
state['return_item'] = True
return []
def on_create(state, item):
raise Exception('on_create')
def on_delete(state, item):
raise Exception('on_delete')

20
sources/importdemo.py Normal file
View File

@ -0,0 +1,20 @@
"""
Demonstrates the behavior of imports in sources.
"""
# Standard library imports
from datetime import datetime
import random
# Environment import
import flask
# Local import
from importdemohelper import secret
def fetch_new(state):
return [{
'source': 'importdemo',
'id': '{:x}'.format(random.getrandbits(16 * 4)),
'title': f'The secret is "{secret}"',
'body': f'And flask\'s name is "{flask.__name__}"',
}]

View File

@ -0,0 +1,6 @@
"""
Demonstrates the behavior of imports in sources.
This python file is not a source and will not
"""
secret = f'This string is from {__file__}'

41
sources/triggerdemo.py Normal file
View File

@ -0,0 +1,41 @@
"""
Demonstrates the behavior of the on_create and on_delete triggers.
The items it creates spawn dummy messages on creation and deletion.
It assumes the dungeon is located at ./dungeon.
"""
# Standard library imports
from datetime import datetime
import json
import random
def fetch_new(state):
if state.get('return_item'):
state['return_item'] = False
return [{
'source': 'triggerdemo',
'id': 'triggerdemoitem',
'title': 'This is the trigger demo item'
}]
else:
state['return_item'] = True
return []
def on_create(state, item):
with open('dungeon/inquisitor/triggerdemo_create.item', 'w') as f:
json.dump({
'source': 'inquisitor',
'id': 'triggerdemo_create',
'title': 'Trigger demo on_create item',
'active': True,
}, f)
def on_delete(state, item):
with open('dungeon/inquisitor/triggerdemo_delete.item', 'w') as f:
json.dump({
'source': 'inquisitor',
'id': 'triggerdemo_delete',
'title': 'Trigger demo on_delete item',
'active': True,
}, f)