Initial black linting pass
This commit is contained in:
parent
52e32917d5
commit
42544bd30b
|
@ -1,2 +1,3 @@
|
|||
from cli import main
|
||||
|
||||
main()
|
|
@ -14,7 +14,8 @@ from inquisitor.configs import (
|
|||
subfeeds,
|
||||
get_subfeed_overrides,
|
||||
logger,
|
||||
init_default_logging)
|
||||
init_default_logging,
|
||||
)
|
||||
from inquisitor import sources, loader, timestamp
|
||||
|
||||
# Globals
|
||||
|
@ -28,18 +29,22 @@ def make_query_link(text, wl, bl):
|
|||
query = "?{}".format("&".join(params))
|
||||
return '<a href="{1}">{0}</a>'.format(text, query)
|
||||
|
||||
|
||||
@app.template_filter("datetimeformat")
|
||||
def datetimeformat(value):
|
||||
return timestamp.stamp_to_readable(value) if value is not None else ""
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def root():
|
||||
return redirect(url_for('feed'))
|
||||
return redirect(url_for("feed"))
|
||||
|
||||
|
||||
@app.route("/feed/")
|
||||
def feed():
|
||||
return feed_for_sources(source_names=None)
|
||||
|
||||
|
||||
@app.route("/feed/<string:feed_name>/")
|
||||
def subfeed(feed_name):
|
||||
# Check for and apply subfeed overrides
|
||||
|
@ -47,7 +52,7 @@ def subfeed(feed_name):
|
|||
subfeed_config = subfeed_overrides or subfeeds or {}
|
||||
|
||||
# The built-in inquisitor subfeed contains sources not in another subfeed
|
||||
if feed_name == 'inquisitor':
|
||||
if feed_name == "inquisitor":
|
||||
all_sources = os.listdir(DUNGEON_PATH)
|
||||
for subfeed, sources in subfeed_config.items():
|
||||
for source_name in sources:
|
||||
|
@ -59,17 +64,18 @@ def subfeed(feed_name):
|
|||
return abort(404)
|
||||
return feed_for_sources(subfeed_config[feed_name])
|
||||
|
||||
|
||||
def feed_for_sources(source_names):
|
||||
# Determine exclusion filters
|
||||
filters = []
|
||||
wl_param = request.args.get('only')
|
||||
wl_param = request.args.get("only")
|
||||
wl = wl_param.split(",") if wl_param else []
|
||||
bl_param = request.args.get('not')
|
||||
bl_param = request.args.get("not")
|
||||
bl = bl_param.split(",") if bl_param else []
|
||||
if wl:
|
||||
filters.append(lambda item: not any([tag in wl for tag in item['tags']]))
|
||||
filters.append(lambda item: not any([tag in wl for tag in item["tags"]]))
|
||||
if bl:
|
||||
filters.append(lambda item: any([tag in bl for tag in item['tags']]))
|
||||
filters.append(lambda item: any([tag in bl for tag in item["tags"]]))
|
||||
|
||||
# Get all active+filtered items and all active tags
|
||||
total = 0
|
||||
|
@ -77,30 +83,40 @@ def feed_for_sources(source_names):
|
|||
active_items = []
|
||||
active_tags = {}
|
||||
for item in items:
|
||||
if item['active']:
|
||||
for tag in item['tags']:
|
||||
if tag not in active_tags: active_tags[tag] = 0
|
||||
if item["active"]:
|
||||
for tag in item["tags"]:
|
||||
if tag not in active_tags:
|
||||
active_tags[tag] = 0
|
||||
active_tags[tag] += 1
|
||||
# active_tags |= set(item['tags'])
|
||||
total += 1
|
||||
if not any(map(lambda f: f(item), filters)):
|
||||
active_items.append(item)
|
||||
# Sort items by time
|
||||
active_items.sort(key=lambda i: i['time'] if 'time' in i and i['time'] else i['created'] if 'created' in i and i['created'] else 0)
|
||||
active_items.sort(
|
||||
key=lambda i: i["time"]
|
||||
if "time" in i and i["time"]
|
||||
else i["created"]
|
||||
if "created" in i and i["created"]
|
||||
else 0
|
||||
)
|
||||
|
||||
logger.info("Returning {} of {} items".format(len(active_items), total))
|
||||
if errors:
|
||||
read_ex = {
|
||||
'title': 'Read errors',
|
||||
'body': "<pre>{}</pre>".format("\n\n".join(errors)),
|
||||
'created': None,
|
||||
"title": "Read errors",
|
||||
"body": "<pre>{}</pre>".format("\n\n".join(errors)),
|
||||
"created": None,
|
||||
}
|
||||
active_items.insert(0, read_ex)
|
||||
|
||||
if total > 0:
|
||||
# Create the feed control item
|
||||
link_table = ["<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
|
||||
total, make_query_link("all", [], []))]
|
||||
link_table = [
|
||||
"<tr><td>{0}</td><td>{1}</td><td></td><td></td></tr>".format(
|
||||
total, make_query_link("all", [], [])
|
||||
)
|
||||
]
|
||||
for tag, count in sorted(active_tags.items(), key=lambda i: i[0].lower()):
|
||||
links = [count]
|
||||
links.append(make_query_link(tag, [tag], []))
|
||||
|
@ -116,76 +132,90 @@ def feed_for_sources(source_names):
|
|||
else:
|
||||
new_wl = [t for t in wl if t != tag]
|
||||
links.append(make_query_link("+not", new_wl, bl + [tag]))
|
||||
link_table.append("<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(*links))
|
||||
link_table.append(
|
||||
"<tr><td>{0}</td><td>{1}</td><td>{2}</td><td>{3}</td></tr>".format(
|
||||
*links
|
||||
)
|
||||
)
|
||||
body = '<table class="feed-control">{}</table>'.format("\n".join(link_table))
|
||||
|
||||
feed_control = {
|
||||
'title': 'Feed Control [{}/{}]'.format(len(active_items), total),
|
||||
'body': body,
|
||||
"title": "Feed Control [{}/{}]".format(len(active_items), total),
|
||||
"body": body,
|
||||
}
|
||||
active_items.insert(0, feed_control)
|
||||
|
||||
selection = active_items[:100]
|
||||
|
||||
return render_template("feed.jinja2",
|
||||
return render_template(
|
||||
"feed.jinja2",
|
||||
items=selection,
|
||||
mdeac=[
|
||||
{'source': item['source'], 'itemid': item['id']}
|
||||
{"source": item["source"], "itemid": item["id"]}
|
||||
for item in selection
|
||||
if 'id' in item])
|
||||
if "id" in item
|
||||
],
|
||||
)
|
||||
|
||||
@app.route("/deactivate/", methods=['POST'])
|
||||
|
||||
@app.route("/deactivate/", methods=["POST"])
|
||||
def deactivate():
|
||||
params = request.get_json()
|
||||
if 'source' not in params and 'itemid' not in params:
|
||||
if "source" not in params and "itemid" not in params:
|
||||
logger.error("Bad request params: {}".format(params))
|
||||
item = loader.load_item(params['source'], params['itemid'])
|
||||
if item['active']:
|
||||
item = loader.load_item(params["source"], params["itemid"])
|
||||
if item["active"]:
|
||||
logger.debug(f"Deactivating {params['source']}/{params['itemid']}")
|
||||
item['active'] = False
|
||||
return jsonify({'active': item['active']})
|
||||
item["active"] = False
|
||||
return jsonify({"active": item["active"]})
|
||||
|
||||
@app.route("/punt/", methods=['POST'])
|
||||
|
||||
@app.route("/punt/", methods=["POST"])
|
||||
def punt():
|
||||
params = request.get_json()
|
||||
if 'source' not in params and 'itemid' not in params:
|
||||
if "source" not in params and "itemid" not in params:
|
||||
logger.error("Bad request params: {}".format(params))
|
||||
item = loader.load_item(params['source'], params['itemid'])
|
||||
item = loader.load_item(params["source"], params["itemid"])
|
||||
tomorrow = datetime.now() + timedelta(days=1)
|
||||
morning = datetime(tomorrow.year, tomorrow.month, tomorrow.day, 6, 0, 0)
|
||||
til_then = morning.timestamp() - item['created']
|
||||
item['tts'] = til_then
|
||||
til_then = morning.timestamp() - item["created"]
|
||||
item["tts"] = til_then
|
||||
return jsonify(item.item)
|
||||
|
||||
@app.route("/mass-deactivate/", methods=['POST'])
|
||||
|
||||
@app.route("/mass-deactivate/", methods=["POST"])
|
||||
def mass_deactivate():
|
||||
params = request.get_json()
|
||||
if 'items' not in params:
|
||||
if "items" not in params:
|
||||
logger.error("Bad request params: {}".format(params))
|
||||
for info in params.get('items', []):
|
||||
source = info['source']
|
||||
itemid = info['itemid']
|
||||
for info in params.get("items", []):
|
||||
source = info["source"]
|
||||
itemid = info["itemid"]
|
||||
item = loader.load_item(source, itemid)
|
||||
if item['active']:
|
||||
if item["active"]:
|
||||
logger.debug(f"Deactivating {info['source']}/{info['itemid']}")
|
||||
item['active'] = False
|
||||
item["active"] = False
|
||||
return jsonify({})
|
||||
|
||||
@app.route("/callback/", methods=['POST'])
|
||||
|
||||
@app.route("/callback/", methods=["POST"])
|
||||
def callback():
|
||||
params = request.get_json()
|
||||
if 'source' not in params and 'itemid' not in params:
|
||||
if "source" not in params and "itemid" not in params:
|
||||
logger.error("Bad request params: {}".format(params))
|
||||
logger.info('Executing callback for {}/{}'.format(params['source'], params['itemid']))
|
||||
sources.item_callback(params['source'], params['itemid'])
|
||||
logger.info(
|
||||
"Executing callback for {}/{}".format(params["source"], params["itemid"])
|
||||
)
|
||||
sources.item_callback(params["source"], params["itemid"])
|
||||
return jsonify({})
|
||||
|
||||
@app.route('/cache/<path:cache_path>')
|
||||
|
||||
@app.route("/cache/<path:cache_path>")
|
||||
def cache(cache_path):
|
||||
path = os.path.join(CACHE_PATH, cache_path)
|
||||
if not os.path.isfile(path):
|
||||
return abort(404)
|
||||
with open(path, 'rb') as f:
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
|
|
|
@ -14,53 +14,62 @@ def command_test(args):
|
|||
"""Echo config file values."""
|
||||
from inquisitor.configs.resolver import (
|
||||
config_path,
|
||||
CONFIG_DATA, data_path,
|
||||
CONFIG_SOURCES, source_path,
|
||||
CONFIG_CACHE, cache_path,
|
||||
CONFIG_LOGFILE, log_file,
|
||||
CONFIG_VERBOSE, is_verbose,
|
||||
CONFIG_SUBFEEDS, subfeeds,
|
||||
CONFIG_DATA,
|
||||
data_path,
|
||||
CONFIG_SOURCES,
|
||||
source_path,
|
||||
CONFIG_CACHE,
|
||||
cache_path,
|
||||
CONFIG_LOGFILE,
|
||||
log_file,
|
||||
CONFIG_VERBOSE,
|
||||
is_verbose,
|
||||
CONFIG_SUBFEEDS,
|
||||
subfeeds,
|
||||
)
|
||||
subfeeds = '; '.join(
|
||||
'{0}: {1}'.format(
|
||||
sf_name,
|
||||
' '.join(sf_sources)
|
||||
|
||||
subfeeds = (
|
||||
"; ".join(
|
||||
"{0}: {1}".format(sf_name, " ".join(sf_sources))
|
||||
for sf_name, sf_sources in subfeeds.items()
|
||||
)
|
||||
for sf_name, sf_sources
|
||||
in subfeeds.items()
|
||||
) if subfeeds else ''
|
||||
print(f'Inquisitor configured from {config_path}')
|
||||
print(f' {CONFIG_DATA} = {data_path}')
|
||||
print(f' {CONFIG_SOURCES} = {source_path}')
|
||||
print(f' {CONFIG_CACHE} = {cache_path}')
|
||||
print(f' {CONFIG_LOGFILE} = {log_file}')
|
||||
print(f' {CONFIG_VERBOSE} = {is_verbose}')
|
||||
print(f' {CONFIG_SUBFEEDS} = {subfeeds}')
|
||||
if subfeeds
|
||||
else ""
|
||||
)
|
||||
print(f"Inquisitor configured from {config_path}")
|
||||
print(f" {CONFIG_DATA} = {data_path}")
|
||||
print(f" {CONFIG_SOURCES} = {source_path}")
|
||||
print(f" {CONFIG_CACHE} = {cache_path}")
|
||||
print(f" {CONFIG_LOGFILE} = {log_file}")
|
||||
print(f" {CONFIG_VERBOSE} = {is_verbose}")
|
||||
print(f" {CONFIG_SUBFEEDS} = {subfeeds}")
|
||||
return 0
|
||||
|
||||
|
||||
def command_update(args):
|
||||
"""Fetch and store new items from the specified sources."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="inquisitor update",
|
||||
description=command_update.__doc__,
|
||||
add_help=False)
|
||||
parser.add_argument("source",
|
||||
nargs="*",
|
||||
help="Sources to update.")
|
||||
prog="inquisitor update", description=command_update.__doc__, add_help=False
|
||||
)
|
||||
parser.add_argument("source", nargs="*", help="Sources to update.")
|
||||
args = parser.parse_args(args)
|
||||
|
||||
if len(args.source) == 0:
|
||||
parser.print_help()
|
||||
return 0
|
||||
if not os.path.isdir(DUNGEON_PATH):
|
||||
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
|
||||
logger.error(
|
||||
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
|
||||
)
|
||||
return -1
|
||||
if not os.path.isdir(SOURCES_PATH):
|
||||
logger.error("Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources")
|
||||
logger.error(
|
||||
"Couldn't find sources. Set INQUISITOR_SOURCES or cd to parent folder of ./sources"
|
||||
)
|
||||
|
||||
# Update sources
|
||||
from inquisitor.sources import update_sources
|
||||
|
||||
update_sources(*args.source)
|
||||
return 0
|
||||
|
||||
|
@ -70,25 +79,27 @@ def command_deactivate(args):
|
|||
parser = argparse.ArgumentParser(
|
||||
prog="inquisitor deactivate",
|
||||
description=command_deactivate.__doc__,
|
||||
add_help=False)
|
||||
parser.add_argument("source",
|
||||
nargs="*",
|
||||
help="Cells to deactivate.")
|
||||
parser.add_argument("--tag",
|
||||
help="Only deactivate items with this tag")
|
||||
parser.add_argument("--title",
|
||||
help="Only deactivate items with titles containing this substring")
|
||||
add_help=False,
|
||||
)
|
||||
parser.add_argument("source", nargs="*", help="Cells to deactivate.")
|
||||
parser.add_argument("--tag", help="Only deactivate items with this tag")
|
||||
parser.add_argument(
|
||||
"--title", help="Only deactivate items with titles containing this substring"
|
||||
)
|
||||
args = parser.parse_args(args)
|
||||
|
||||
if len(args.source) == 0:
|
||||
parser.print_help()
|
||||
return 0
|
||||
if not os.path.isdir(DUNGEON_PATH):
|
||||
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
|
||||
logger.error(
|
||||
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
|
||||
)
|
||||
return -1
|
||||
|
||||
# Deactivate all items in each source.
|
||||
from inquisitor.loader import load_items
|
||||
|
||||
for source_name in args.source:
|
||||
path = os.path.join(DUNGEON_PATH, source_name)
|
||||
if not os.path.isdir(path):
|
||||
|
@ -96,12 +107,12 @@ def command_deactivate(args):
|
|||
count = 0
|
||||
items, _ = load_items(source_name)
|
||||
for item in items.values():
|
||||
if args.tag and args.tag not in item['tags']:
|
||||
if args.tag and args.tag not in item["tags"]:
|
||||
continue
|
||||
if args.title and args.title not in item['title']:
|
||||
if args.title and args.title not in item["title"]:
|
||||
continue
|
||||
if item['active']:
|
||||
item['active'] = False
|
||||
if item["active"]:
|
||||
item["active"] = False
|
||||
count += 1
|
||||
logger.info("Deactivated {} items in '{}'".format(count, source_name))
|
||||
|
||||
|
@ -111,9 +122,8 @@ def command_deactivate(args):
|
|||
def command_add(args):
|
||||
"""Creates an item."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="inquisitor add",
|
||||
description=command_add.__doc__,
|
||||
add_help=False)
|
||||
prog="inquisitor add", description=command_add.__doc__, add_help=False
|
||||
)
|
||||
parser.add_argument("--id", help="String")
|
||||
parser.add_argument("--source", help="String")
|
||||
parser.add_argument("--title", help="String")
|
||||
|
@ -125,40 +135,55 @@ def command_add(args):
|
|||
parser.add_argument("--ttl", type=int, help="Cleanup protection in seconds")
|
||||
parser.add_argument("--ttd", type=int, help="Cleanup force in seconds")
|
||||
parser.add_argument("--tts", type=int, help="Display delay in seconds")
|
||||
parser.add_argument("--create", action="store_true", help="Create source if it doesn't exist")
|
||||
parser.add_argument(
|
||||
"--create", action="store_true", help="Create source if it doesn't exist"
|
||||
)
|
||||
args = parser.parse_args(args)
|
||||
|
||||
if not args.title:
|
||||
parser.print_help()
|
||||
return 0
|
||||
if not os.path.isdir(DUNGEON_PATH):
|
||||
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
|
||||
logger.error(
|
||||
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
|
||||
)
|
||||
return -1
|
||||
|
||||
source = args.source or 'inquisitor'
|
||||
source = args.source or "inquisitor"
|
||||
cell_path = os.path.join(DUNGEON_PATH, source)
|
||||
if args.create:
|
||||
from inquisitor.sources import ensure_cell
|
||||
|
||||
ensure_cell(source)
|
||||
elif not os.path.isdir(cell_path):
|
||||
logger.error("Source '{}' does not exist".format(source))
|
||||
return -1
|
||||
|
||||
item = {
|
||||
'id': args.id or '{:x}'.format(random.getrandbits(16 * 4)),
|
||||
'source': source,
|
||||
"id": args.id or "{:x}".format(random.getrandbits(16 * 4)),
|
||||
"source": source,
|
||||
}
|
||||
if args.title: item['title'] = str(args.title)
|
||||
if args.link: item['link'] = str(args.link)
|
||||
if args.time: item['time'] = int(args.time)
|
||||
if args.author: item['author'] = str(args.author)
|
||||
if args.body: item['body'] = str(args.body)
|
||||
if args.tags: item['tags'] = [str(tag) for tag in args.tags.split(",")]
|
||||
if args.ttl: item['ttl'] = int(args.ttl)
|
||||
if args.ttd: item['ttd'] = int(args.ttd)
|
||||
if args.tts: item['tts'] = int(args.tts)
|
||||
if args.title:
|
||||
item["title"] = str(args.title)
|
||||
if args.link:
|
||||
item["link"] = str(args.link)
|
||||
if args.time:
|
||||
item["time"] = int(args.time)
|
||||
if args.author:
|
||||
item["author"] = str(args.author)
|
||||
if args.body:
|
||||
item["body"] = str(args.body)
|
||||
if args.tags:
|
||||
item["tags"] = [str(tag) for tag in args.tags.split(",")]
|
||||
if args.ttl:
|
||||
item["ttl"] = int(args.ttl)
|
||||
if args.ttd:
|
||||
item["ttd"] = int(args.ttd)
|
||||
if args.tts:
|
||||
item["tts"] = int(args.tts)
|
||||
|
||||
from inquisitor.loader import new_item
|
||||
|
||||
saved_item = new_item(source, item)
|
||||
logger.info(saved_item)
|
||||
|
||||
|
@ -166,7 +191,9 @@ def command_add(args):
|
|||
def command_feed(args):
|
||||
"""Print the current feed."""
|
||||
if not os.path.isdir(DUNGEON_PATH):
|
||||
logger.error("Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon")
|
||||
logger.error(
|
||||
"Couldn't find dungeon. Set INQUISITOR_DUNGEON or cd to parent folder of ./dungeon"
|
||||
)
|
||||
return -1
|
||||
|
||||
import shutil
|
||||
|
@ -179,50 +206,56 @@ def command_feed(args):
|
|||
return 0
|
||||
|
||||
if errors:
|
||||
items.insert(0, {
|
||||
'title': '{} read errors: {}'.format(len(errors), ' '.join(errors)),
|
||||
'body': "\n".join(errors)
|
||||
})
|
||||
items.insert(
|
||||
0,
|
||||
{
|
||||
"title": "{} read errors: {}".format(len(errors), " ".join(errors)),
|
||||
"body": "\n".join(errors),
|
||||
},
|
||||
)
|
||||
|
||||
size = shutil.get_terminal_size((80, 20))
|
||||
width = min(80, size.columns)
|
||||
|
||||
for item in items:
|
||||
title = item['title'] if 'title' in item else ""
|
||||
title = item["title"] if "title" in item else ""
|
||||
titles = [title]
|
||||
while len(titles[-1]) > width - 4:
|
||||
i = titles[-1][:width - 4].rfind(' ')
|
||||
i = titles[-1][: width - 4].rfind(" ")
|
||||
titles = titles[:-1] + [titles[-1][:i].strip(), titles[-1][i:].strip()]
|
||||
print('+' + (width - 2) * '-' + '+')
|
||||
print("+" + (width - 2) * "-" + "+")
|
||||
for title in titles:
|
||||
print("| {0:<{1}} |".format(title, width - 4))
|
||||
print("|{0:<{1}}|".format("", width - 2))
|
||||
info1 = ""
|
||||
if 'author' in title and item['author']:
|
||||
info1 += item['author'] + " "
|
||||
if 'time' in item and item['time']:
|
||||
info1 += timestamp.stamp_to_readable(item['time'])
|
||||
if "author" in title and item["author"]:
|
||||
info1 += item["author"] + " "
|
||||
if "time" in item and item["time"]:
|
||||
info1 += timestamp.stamp_to_readable(item["time"])
|
||||
print("| {0:<{1}} |".format(info1, width - 4))
|
||||
created = timestamp.stamp_to_readable(item['created']) if 'created' in item else ""
|
||||
created = (
|
||||
timestamp.stamp_to_readable(item["created"]) if "created" in item else ""
|
||||
)
|
||||
info2 = "{0} {1} {2}".format(
|
||||
item.get('source', ''), item.get('id', ''), created)
|
||||
item.get("source", ""), item.get("id", ""), created
|
||||
)
|
||||
print("| {0:<{1}} |".format(info2, width - 4))
|
||||
print('+' + (width - 2) * '-' + '+')
|
||||
print("+" + (width - 2) * "-" + "+")
|
||||
print()
|
||||
|
||||
|
||||
def command_run(args):
|
||||
"""Run the default Flask server."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="inquisitor run",
|
||||
description=command_run.__doc__,
|
||||
add_help=False)
|
||||
prog="inquisitor run", description=command_run.__doc__, add_help=False
|
||||
)
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
parser.add_argument("--port", type=int, default=5000)
|
||||
args = parser.parse_args(args)
|
||||
|
||||
try:
|
||||
from inquisitor.app import app
|
||||
|
||||
app.run(port=args.port, debug=args.debug)
|
||||
return 0
|
||||
except Exception as e:
|
||||
|
@ -245,38 +278,41 @@ def main():
|
|||
"""CLI entry point"""
|
||||
# Enable piping
|
||||
from signal import signal, SIGPIPE, SIG_DFL
|
||||
|
||||
signal(SIGPIPE, SIG_DFL)
|
||||
|
||||
# Collect the commands from this module
|
||||
import inquisitor.cli
|
||||
|
||||
commands = {
|
||||
name[8:]: func
|
||||
for name, func in vars(inquisitor.cli).items()
|
||||
if name.startswith('command_')
|
||||
if name.startswith("command_")
|
||||
}
|
||||
descriptions = "\n".join([
|
||||
"- {0}: {1}".format(name, func.__doc__)
|
||||
for name, func in commands.items()])
|
||||
descriptions = "\n".join(
|
||||
["- {0}: {1}".format(name, func.__doc__) for name, func in commands.items()]
|
||||
)
|
||||
|
||||
# Set up the parser
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Available commands:\n{}\n".format(descriptions),
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
add_help=False)
|
||||
parser.add_argument("command",
|
||||
add_help=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"command",
|
||||
nargs="?",
|
||||
default="help",
|
||||
help="The command to execute",
|
||||
choices=commands,
|
||||
metavar="command")
|
||||
parser.add_argument("args",
|
||||
nargs=argparse.REMAINDER,
|
||||
help="Command arguments",
|
||||
metavar="args")
|
||||
parser.add_argument("-v",
|
||||
action="store_true",
|
||||
dest="verbose",
|
||||
help="Enable debug logging")
|
||||
metavar="command",
|
||||
)
|
||||
parser.add_argument(
|
||||
"args", nargs=argparse.REMAINDER, help="Command arguments", metavar="args"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", action="store_true", dest="verbose", help="Enable debug logging"
|
||||
)
|
||||
|
||||
# Extract the usage print for command_help
|
||||
global print_usage
|
||||
|
|
|
@ -1,10 +1,5 @@
|
|||
from .resolver import data_path as DUNGEON_PATH
|
||||
from .resolver import source_path as SOURCES_PATH
|
||||
from .resolver import cache_path as CACHE_PATH
|
||||
from .resolver import (
|
||||
logger,
|
||||
subfeeds)
|
||||
from .resolver import (
|
||||
add_logging_handler,
|
||||
init_default_logging,
|
||||
get_subfeed_overrides)
|
||||
from .resolver import logger, subfeeds
|
||||
from .resolver import add_logging_handler, init_default_logging, get_subfeed_overrides
|
||||
|
|
|
@ -4,34 +4,34 @@ import logging
|
|||
|
||||
# Constants governing config resolution:
|
||||
# Path to the config file, containing key-value pairs of the other settings
|
||||
CONFIG_ENVVAR = 'INQUISITOR_CONFIG'
|
||||
DEFAULT_CONFIG_PATH = '/etc/inquisitor.conf'
|
||||
CONFIG_ENVVAR = "INQUISITOR_CONFIG"
|
||||
DEFAULT_CONFIG_PATH = "/etc/inquisitor.conf"
|
||||
|
||||
# Path to the folder where items are stored
|
||||
CONFIG_DATA = 'DataPath'
|
||||
DEFAULT_DATA_PATH = '/var/inquisitor/data/'
|
||||
CONFIG_DATA = "DataPath"
|
||||
DEFAULT_DATA_PATH = "/var/inquisitor/data/"
|
||||
|
||||
# Path to the folder where source modules are stored
|
||||
CONFIG_SOURCES = 'SourcePath'
|
||||
DEFAULT_SOURCES_PATH = '/var/inquisitor/sources/'
|
||||
CONFIG_SOURCES = "SourcePath"
|
||||
DEFAULT_SOURCES_PATH = "/var/inquisitor/sources/"
|
||||
|
||||
# Path to the folder where cached files are stored
|
||||
CONFIG_CACHE = 'CachePath'
|
||||
DEFAULT_CACHE_PATH = '/var/inquisitor/cache/'
|
||||
CONFIG_CACHE = "CachePath"
|
||||
DEFAULT_CACHE_PATH = "/var/inquisitor/cache/"
|
||||
|
||||
# Path to a log file where logging will be redirected
|
||||
CONFIG_LOGFILE = 'LogFile'
|
||||
CONFIG_LOGFILE = "LogFile"
|
||||
DEFAULT_LOG_FILE = None
|
||||
|
||||
# Whether logging is verbose
|
||||
CONFIG_VERBOSE = 'Verbose'
|
||||
DEFAULT_VERBOSITY = 'false'
|
||||
CONFIG_VERBOSE = "Verbose"
|
||||
DEFAULT_VERBOSITY = "false"
|
||||
|
||||
# Subfeed source lists, with each subfeed config separated by lines and
|
||||
# sources within a subfeed separated by spaces
|
||||
CONFIG_SUBFEEDS = 'Subfeeds'
|
||||
CONFIG_SUBFEEDS = "Subfeeds"
|
||||
DEFAULT_SUBFEEDS = None
|
||||
SUBFEED_CONFIG_FILE = 'subfeeds.conf'
|
||||
SUBFEED_CONFIG_FILE = "subfeeds.conf"
|
||||
|
||||
|
||||
def read_config_file(config_path):
|
||||
|
@ -43,79 +43,78 @@ def read_config_file(config_path):
|
|||
# Parse the config file into key-value pairs
|
||||
if not os.path.isfile(config_path):
|
||||
|
||||
raise FileNotFoundError(f'No config file found at {config_path}, try setting {CONFIG_ENVVAR}')
|
||||
raise FileNotFoundError(
|
||||
f"No config file found at {config_path}, try setting {CONFIG_ENVVAR}"
|
||||
)
|
||||
accumulated_configs = {}
|
||||
current_key = None
|
||||
with open(config_path, 'r', encoding='utf8') as cfg:
|
||||
with open(config_path, "r", encoding="utf8") as cfg:
|
||||
line_no = 0
|
||||
for line in cfg:
|
||||
line_no += 1
|
||||
# Skip blank lines and comments
|
||||
if not line.strip() or line.lstrip().startswith('#'):
|
||||
if not line.strip() or line.lstrip().startswith("#"):
|
||||
continue
|
||||
# Accumulate config keyvalue pairs
|
||||
if '=' in line:
|
||||
if "=" in line:
|
||||
# "key = value" begins a new keyvalue pair
|
||||
current_key, value = line.split('=', maxsplit=1)
|
||||
current_key, value = line.split("=", maxsplit=1)
|
||||
current_key = current_key.strip()
|
||||
accumulated_configs[current_key] = value.strip()
|
||||
else:
|
||||
# If there's no '=' and no previous key, throw
|
||||
if not current_key:
|
||||
raise ValueError(f'Invalid config format on line {line_no}')
|
||||
raise ValueError(f"Invalid config format on line {line_no}")
|
||||
else:
|
||||
accumulated_configs[current_key] += '\n' + line.strip()
|
||||
accumulated_configs[current_key] += "\n" + line.strip()
|
||||
|
||||
return accumulated_configs
|
||||
|
||||
|
||||
def parse_subfeed_value(value):
|
||||
sf_defs = [sf.strip() for sf in value.split('\n') if sf.strip()]
|
||||
sf_defs = [sf.strip() for sf in value.split("\n") if sf.strip()]
|
||||
subfeeds = {}
|
||||
for sf_def in sf_defs:
|
||||
if ':' not in sf_def:
|
||||
raise ValueError(f'Invalid subfeed definition: {sf_def}')
|
||||
sf_name, sf_sources = sf_def.split(':', maxsplit=1)
|
||||
if ":" not in sf_def:
|
||||
raise ValueError(f"Invalid subfeed definition: {sf_def}")
|
||||
sf_name, sf_sources = sf_def.split(":", maxsplit=1)
|
||||
sf_sources = sf_sources.split()
|
||||
subfeeds[sf_name.strip()] = [source.strip() for source in sf_sources]
|
||||
return subfeeds
|
||||
|
||||
|
||||
# Read envvar for config file location, with fallback to default
|
||||
config_path = os.path.abspath(
|
||||
os.environ.get(CONFIG_ENVVAR) or
|
||||
DEFAULT_CONFIG_PATH
|
||||
)
|
||||
config_path = os.path.abspath(os.environ.get(CONFIG_ENVVAR) or DEFAULT_CONFIG_PATH)
|
||||
|
||||
configs = read_config_file(config_path)
|
||||
|
||||
# Extract and validate config values
|
||||
data_path = configs.get(CONFIG_DATA) or DEFAULT_DATA_PATH
|
||||
if not os.path.isabs(data_path):
|
||||
raise ValueError(f'Non-absolute data path: {data_path}')
|
||||
raise ValueError(f"Non-absolute data path: {data_path}")
|
||||
if not os.path.isdir(data_path):
|
||||
raise FileNotFoundError(f'Cannot find directory {data_path}')
|
||||
raise FileNotFoundError(f"Cannot find directory {data_path}")
|
||||
|
||||
source_path = configs.get(CONFIG_SOURCES) or DEFAULT_SOURCES_PATH
|
||||
if not os.path.isabs(source_path):
|
||||
raise ValueError(f'Non-absolute source path: {source_path}')
|
||||
raise ValueError(f"Non-absolute source path: {source_path}")
|
||||
if not os.path.isdir(source_path):
|
||||
raise FileNotFoundError(f'Cannot find directory {source_path}')
|
||||
raise FileNotFoundError(f"Cannot find directory {source_path}")
|
||||
|
||||
cache_path = configs.get(CONFIG_CACHE) or DEFAULT_CACHE_PATH
|
||||
if not os.path.isabs(cache_path):
|
||||
raise ValueError(f'Non-absolute cache path: {cache_path}')
|
||||
raise ValueError(f"Non-absolute cache path: {cache_path}")
|
||||
if not os.path.isdir(cache_path):
|
||||
raise FileNotFoundError(f'Cannot find directory {cache_path}')
|
||||
raise FileNotFoundError(f"Cannot find directory {cache_path}")
|
||||
|
||||
log_file = configs.get(CONFIG_LOGFILE) or DEFAULT_LOG_FILE
|
||||
if log_file and not os.path.isabs(log_file):
|
||||
raise ValueError(f'Non-absolute log file path: {log_file}')
|
||||
raise ValueError(f"Non-absolute log file path: {log_file}")
|
||||
|
||||
is_verbose = configs.get(CONFIG_VERBOSE) or DEFAULT_VERBOSITY
|
||||
if is_verbose != 'true' and is_verbose != 'false':
|
||||
if is_verbose != "true" and is_verbose != "false":
|
||||
raise ValueError(f'Invalid verbose value (must be "true" or "false"): {is_verbose}')
|
||||
is_verbose = (is_verbose == 'true')
|
||||
is_verbose = is_verbose == "true"
|
||||
|
||||
subfeeds = configs.get(CONFIG_SUBFEEDS) or DEFAULT_SUBFEEDS
|
||||
if subfeeds:
|
||||
|
@ -143,35 +142,34 @@ def get_subfeed_overrides():
|
|||
logger = logging.getLogger("inquisitor")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
def add_logging_handler(verbose, log_filename):
|
||||
"""
|
||||
Adds a logging handler according to the given settings
|
||||
"""
|
||||
log_format = (
|
||||
'[{asctime}] [{levelname}:{filename}:{lineno}] {message}'
|
||||
if verbose else
|
||||
'[{levelname}] {message}'
|
||||
"[{asctime}] [{levelname}:{filename}:{lineno}] {message}"
|
||||
if verbose
|
||||
else "[{levelname}] {message}"
|
||||
)
|
||||
formatter = logging.Formatter(log_format, style='{')
|
||||
formatter = logging.Formatter(log_format, style="{")
|
||||
|
||||
log_level = (
|
||||
logging.DEBUG
|
||||
if verbose else
|
||||
logging.INFO
|
||||
)
|
||||
log_level = logging.DEBUG if verbose else logging.INFO
|
||||
handler = (
|
||||
logging.handlers.RotatingFileHandler(
|
||||
log_filename,
|
||||
encoding='utf8',
|
||||
encoding="utf8",
|
||||
maxBytes=2**22, # 4 MB per log file
|
||||
backupCount=4) # 16 MB total
|
||||
if log_filename else
|
||||
logging.StreamHandler()
|
||||
backupCount=4,
|
||||
) # 16 MB total
|
||||
if log_filename
|
||||
else logging.StreamHandler()
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
handler.setLevel(log_level)
|
||||
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def init_default_logging():
|
||||
add_logging_handler(is_verbose, log_file)
|
||||
|
|
|
@ -8,19 +8,20 @@ from inquisitor.configs import DUNGEON_PATH, logger
|
|||
|
||||
logger = logging.getLogger("inquisitor")
|
||||
|
||||
|
||||
def as_item(title, body=None):
|
||||
iid = '{:x}'.format(random.getrandbits(16 * 4))
|
||||
iid = "{:x}".format(random.getrandbits(16 * 4))
|
||||
item = {
|
||||
'id': iid,
|
||||
'source': 'inquisitor',
|
||||
'title': title,
|
||||
'active': True,
|
||||
'created': timestamp.now(),
|
||||
'tags': ['inquisitor', 'error'],
|
||||
"id": iid,
|
||||
"source": "inquisitor",
|
||||
"title": title,
|
||||
"active": True,
|
||||
"created": timestamp.now(),
|
||||
"tags": ["inquisitor", "error"],
|
||||
}
|
||||
if body is not None:
|
||||
item['body'] = '<pre>{}</pre>'.format(body)
|
||||
path = os.path.join(DUNGEON_PATH, 'inquisitor', iid + ".item")
|
||||
item["body"] = "<pre>{}</pre>".format(body)
|
||||
path = os.path.join(DUNGEON_PATH, "inquisitor", iid + ".item")
|
||||
logger.error(json.dumps(item))
|
||||
with open(path, 'w') as f:
|
||||
with open(path, "w") as f:
|
||||
f.write(json.dumps(item, indent=2))
|
||||
|
|
|
@ -7,7 +7,7 @@ from inquisitor import error
|
|||
from inquisitor import timestamp
|
||||
|
||||
|
||||
class WritethroughDict():
|
||||
class WritethroughDict:
|
||||
"""A wrapper for a dictionary saved to the file system."""
|
||||
|
||||
@staticmethod
|
||||
|
@ -59,7 +59,7 @@ class WritethroughDict():
|
|||
|
||||
def flush(self):
|
||||
s = json.dumps(self.item, indent=2)
|
||||
with open(self.path, 'w', encoding="utf8") as f:
|
||||
with open(self.path, "w", encoding="utf8") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
|
@ -71,7 +71,7 @@ def load_state(source_name):
|
|||
|
||||
def load_item(source_name, item_id):
|
||||
"""Loads an item from a source."""
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
|
||||
return WritethroughDict.load(item_path)
|
||||
|
||||
|
||||
|
@ -79,7 +79,7 @@ def item_exists(source_name, item_id):
|
|||
"""
|
||||
Checks for the existence of an item.
|
||||
"""
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
|
||||
return os.path.isfile(item_path)
|
||||
|
||||
|
||||
|
@ -91,7 +91,7 @@ def get_item_ids(cell_name):
|
|||
return [
|
||||
filename[:-5]
|
||||
for filename in os.listdir(cell_path)
|
||||
if filename.endswith('.item')
|
||||
if filename.endswith(".item")
|
||||
]
|
||||
|
||||
|
||||
|
@ -101,30 +101,30 @@ def new_item(source_name, item):
|
|||
Initializes other fields to their default values.
|
||||
"""
|
||||
# id is required
|
||||
if 'id' not in item:
|
||||
raise Exception(f'Cannot create item with no id. Value = {item}')
|
||||
if "id" not in item:
|
||||
raise Exception(f"Cannot create item with no id. Value = {item}")
|
||||
|
||||
# source must be filled in, so if it is absent it is auto-populated with
|
||||
# source_name. Note: this allows sources to fill in a different source.
|
||||
if 'source' not in item:
|
||||
item['source'] = source_name
|
||||
if "source" not in item:
|
||||
item["source"] = source_name
|
||||
|
||||
# active is forced to True for new items
|
||||
item['active'] = True
|
||||
item["active"] = True
|
||||
|
||||
# created is forced to the current timestamp
|
||||
item['created'] = timestamp.now()
|
||||
item["created"] = timestamp.now()
|
||||
|
||||
# title is auto-populated with the id if missing
|
||||
if 'title' not in item:
|
||||
item['title'] = item['id']
|
||||
if "title" not in item:
|
||||
item["title"] = item["id"]
|
||||
|
||||
# tags is auto-populated if missing (not if empty!)
|
||||
if 'tags' not in item:
|
||||
item['tags'] = [source_name]
|
||||
if "tags" not in item:
|
||||
item["tags"] = [source_name]
|
||||
|
||||
# All other fields are optional.
|
||||
item_path = os.path.join(DUNGEON_PATH, item['source'], f'{item["id"]}.item')
|
||||
item_path = os.path.join(DUNGEON_PATH, item["source"], f'{item["id"]}.item')
|
||||
return WritethroughDict.create(item_path, item)
|
||||
|
||||
|
||||
|
@ -132,7 +132,7 @@ def delete_item(source_name, item_id):
|
|||
"""
|
||||
Delete an item.
|
||||
"""
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f'{item_id}.item')
|
||||
item_path = os.path.join(DUNGEON_PATH, source_name, f"{item_id}.item")
|
||||
os.remove(item_path)
|
||||
|
||||
|
||||
|
@ -144,10 +144,10 @@ def load_items(source_name):
|
|||
items = {}
|
||||
errors = []
|
||||
for filename in os.listdir(cell_path):
|
||||
if filename.endswith('.item'):
|
||||
if filename.endswith(".item"):
|
||||
try:
|
||||
item = load_item(source_name, filename[:-5])
|
||||
items[item['id']] = item
|
||||
items[item["id"]] = item
|
||||
except Exception:
|
||||
errors.append(filename)
|
||||
return items, errors
|
||||
|
@ -165,20 +165,20 @@ def load_active_items(source_names):
|
|||
for source_name in check_list:
|
||||
source_path = os.path.join(DUNGEON_PATH, source_name)
|
||||
if not os.path.isdir(source_path):
|
||||
logger.warning(f'Skipping nonexistent source {source_name}')
|
||||
logger.warning(f"Skipping nonexistent source {source_name}")
|
||||
continue
|
||||
for filename in os.listdir(source_path):
|
||||
if not filename.endswith('.item'):
|
||||
if not filename.endswith(".item"):
|
||||
continue
|
||||
try:
|
||||
item = load_item(source_name, filename[:-5])
|
||||
# The time-to-show field hides items until an expiry date.
|
||||
if 'tts' in item:
|
||||
tts_date = item['created'] + item['tts']
|
||||
if "tts" in item:
|
||||
tts_date = item["created"] + item["tts"]
|
||||
if now < tts_date:
|
||||
continue
|
||||
# Don't show inactive items
|
||||
if not item['active']:
|
||||
if not item["active"]:
|
||||
continue
|
||||
items.append(item)
|
||||
except Exception:
|
||||
|
|
|
@ -10,20 +10,20 @@ from inquisitor.configs import SOURCES_PATH, DUNGEON_PATH, logger
|
|||
|
||||
|
||||
USE_NEWEST = (
|
||||
'title',
|
||||
'tags',
|
||||
'link',
|
||||
'time'
|
||||
'author',
|
||||
'body',
|
||||
'ttl',
|
||||
'ttd',
|
||||
'tts',
|
||||
"title",
|
||||
"tags",
|
||||
"link",
|
||||
"time" "author",
|
||||
"body",
|
||||
"ttl",
|
||||
"ttd",
|
||||
"tts",
|
||||
)
|
||||
|
||||
|
||||
class InquisitorStubSource:
|
||||
"""A dummy source-like object for clearing out ad-hoc inquisitor items"""
|
||||
|
||||
def fetch_new(self, state):
|
||||
return []
|
||||
|
||||
|
@ -36,9 +36,9 @@ def ensure_cell(name):
|
|||
if not os.path.isdir(cell_path):
|
||||
logger.info(f'Creating cell for source "{name}"')
|
||||
os.mkdir(cell_path)
|
||||
state_path = os.path.join(cell_path, 'state')
|
||||
state_path = os.path.join(cell_path, "state")
|
||||
if not os.path.isfile(state_path):
|
||||
with open(state_path, 'w', encoding='utf8') as state:
|
||||
with open(state_path, "w", encoding="utf8") as state:
|
||||
json.dump({}, state)
|
||||
|
||||
|
||||
|
@ -52,8 +52,8 @@ def update_sources(*source_names):
|
|||
source_module = load_source(source_name)
|
||||
except Exception:
|
||||
error.as_item(
|
||||
f'Error importing source "{source_name}"',
|
||||
traceback.format_exc())
|
||||
f'Error importing source "{source_name}"', traceback.format_exc()
|
||||
)
|
||||
continue
|
||||
|
||||
# If it doesn't have a cell yet, create one
|
||||
|
@ -61,8 +61,8 @@ def update_sources(*source_names):
|
|||
ensure_cell(source_name)
|
||||
except Exception:
|
||||
error.as_item(
|
||||
f'Error initializing source "{source_name}"',
|
||||
traceback.format_exc())
|
||||
f'Error initializing source "{source_name}"', traceback.format_exc()
|
||||
)
|
||||
continue
|
||||
|
||||
# Update the source
|
||||
|
@ -71,8 +71,8 @@ def update_sources(*source_names):
|
|||
update_source(source_name, source_module)
|
||||
except Exception:
|
||||
error.as_item(
|
||||
f'Error updating source "{source_name}"',
|
||||
traceback.format_exc())
|
||||
f'Error updating source "{source_name}"', traceback.format_exc()
|
||||
)
|
||||
|
||||
|
||||
def load_source(source_name):
|
||||
|
@ -80,7 +80,7 @@ def load_source(source_name):
|
|||
Attempts to load the source module with the given name.
|
||||
Raises an exception on failure.
|
||||
"""
|
||||
if source_name == 'inquisitor':
|
||||
if source_name == "inquisitor":
|
||||
return InquisitorStubSource()
|
||||
|
||||
cwd = os.getcwd()
|
||||
|
@ -92,7 +92,7 @@ def load_source(source_name):
|
|||
sys.path.insert(0, SOURCES_PATH)
|
||||
|
||||
# Check if the named source is present.
|
||||
source_file_name = source_name + '.py'
|
||||
source_file_name = source_name + ".py"
|
||||
if not os.path.isfile(source_file_name):
|
||||
raise FileNotFoundError(f'Missing "{source_name}" in "{SOURCES_PATH}"')
|
||||
|
||||
|
@ -104,7 +104,7 @@ def load_source(source_name):
|
|||
itemsource = importlib.import_module(source_name)
|
||||
|
||||
# Require fetch_new().
|
||||
if not hasattr(itemsource, 'fetch_new'):
|
||||
if not hasattr(itemsource, "fetch_new"):
|
||||
raise ImportError(f'Missing fetch_new in "{source_file_name}"')
|
||||
|
||||
return itemsource
|
||||
|
@ -121,14 +121,14 @@ def update_source(source_name, source):
|
|||
"""
|
||||
# Get a list of item ids that already existed in this source's cell.
|
||||
prior_ids = loader.get_item_ids(source_name)
|
||||
logger.debug(f'Found {len(prior_ids)} prior items')
|
||||
logger.debug(f"Found {len(prior_ids)} prior items")
|
||||
|
||||
# Get the feed items from the source's fetch method.
|
||||
state = loader.load_state(source_name)
|
||||
fetched = source.fetch_new(state)
|
||||
state.flush()
|
||||
logger.debug(f'Fetched {len(fetched)} items')
|
||||
fetched_items = {item['id']: item for item in fetched}
|
||||
logger.debug(f"Fetched {len(fetched)} items")
|
||||
fetched_items = {item["id"]: item for item in fetched}
|
||||
|
||||
# Determine which items are new and which are updates.
|
||||
# We query the file system here instead of checking against this source's
|
||||
|
@ -137,16 +137,16 @@ def update_source(source_name, source):
|
|||
new_items = []
|
||||
updated_items = []
|
||||
for item in fetched:
|
||||
item_source = item.get('source', source_name)
|
||||
if loader.item_exists(item_source, item['id']):
|
||||
item_source = item.get("source", source_name)
|
||||
if loader.item_exists(item_source, item["id"]):
|
||||
updated_items.append(item)
|
||||
else:
|
||||
new_items.append(item)
|
||||
|
||||
# Write all the new items to the source's cell.
|
||||
has_create_handler = hasattr(source, 'on_create')
|
||||
has_create_handler = hasattr(source, "on_create")
|
||||
for item in new_items:
|
||||
item_source = item.get('source', source_name)
|
||||
item_source = item.get("source", source_name)
|
||||
created_item = loader.new_item(item_source, item)
|
||||
if has_create_handler:
|
||||
# Because some sources do not return items more than once,
|
||||
|
@ -155,45 +155,43 @@ def update_source(source_name, source):
|
|||
source.on_create(state, created_item)
|
||||
except:
|
||||
error.as_item(
|
||||
f'Exception in {source_name}.on_create',
|
||||
traceback.format_exc())
|
||||
f"Exception in {source_name}.on_create", traceback.format_exc()
|
||||
)
|
||||
|
||||
# Update the other items using the fetched items' values.
|
||||
for new_item in updated_items:
|
||||
old_item = loader.load_item(new_item['source'], new_item['id'])
|
||||
old_item = loader.load_item(new_item["source"], new_item["id"])
|
||||
for field in USE_NEWEST:
|
||||
if field in new_item and old_item[field] != new_item[field]:
|
||||
old_item[field] = new_item[field]
|
||||
if 'callback' in new_item:
|
||||
old_callback = old_item.get('callback', {})
|
||||
if "callback" in new_item:
|
||||
old_callback = old_item.get("callback", {})
|
||||
# Because of the way this update happens, any fields that are set
|
||||
# in the callback when the item is new will keep their original
|
||||
# values, as those values reappear in new_item on subsequent
|
||||
# updates.
|
||||
old_item['callback'] = {**old_item['callback'], **new_item['callback']}
|
||||
old_item["callback"] = {**old_item["callback"], **new_item["callback"]}
|
||||
|
||||
# In general, items are removed when they are old (not found in the last
|
||||
# fetch) and inactive. Some item fields can change this basic behavior.
|
||||
del_count = 0
|
||||
now = timestamp.now()
|
||||
has_delete_handler = hasattr(source, 'on_delete')
|
||||
fetched_ids = [item['id'] for item in updated_items]
|
||||
old_item_ids = [
|
||||
item_id for item_id in prior_ids
|
||||
if item_id not in fetched_ids]
|
||||
has_delete_handler = hasattr(source, "on_delete")
|
||||
fetched_ids = [item["id"] for item in updated_items]
|
||||
old_item_ids = [item_id for item_id in prior_ids if item_id not in fetched_ids]
|
||||
for item_id in old_item_ids:
|
||||
item = loader.load_item(source_name, item_id)
|
||||
remove = not item['active']
|
||||
remove = not item["active"]
|
||||
# The time-to-live field protects an item from removal until expiry.
|
||||
# This is mainly used to avoid old items resurfacing when their source
|
||||
# cannot guarantee monotonicity.
|
||||
if 'ttl' in item:
|
||||
ttl_date = item['created'] + item['ttl']
|
||||
if "ttl" in item:
|
||||
ttl_date = item["created"] + item["ttl"]
|
||||
if ttl_date > now:
|
||||
continue
|
||||
# The time-to-die field can force an active item to be removed.
|
||||
if 'ttd' in item:
|
||||
ttd_date = item['created'] + item['ttd']
|
||||
if "ttd" in item:
|
||||
ttd_date = item["created"] + item["ttd"]
|
||||
if ttd_date < now:
|
||||
remove = True
|
||||
# Items to be removed are deleted
|
||||
|
@ -202,27 +200,33 @@ def update_source(source_name, source):
|
|||
if has_delete_handler:
|
||||
# Run the delete handler so exceptions prevent deletions
|
||||
source.on_delete(state, item)
|
||||
loader.delete_item(source_name, item['id'])
|
||||
loader.delete_item(source_name, item["id"])
|
||||
del_count += 1
|
||||
except:
|
||||
error.as_item(
|
||||
f'Failed to delete {source_name}/{item["id"]}',
|
||||
traceback.format_exc())
|
||||
traceback.format_exc(),
|
||||
)
|
||||
|
||||
# Note update timestamp in state
|
||||
state['last_updated'] = timestamp.now()
|
||||
state["last_updated"] = timestamp.now()
|
||||
|
||||
# Log counts
|
||||
logger.info("{} new item{}, {} deleted item{}".format(
|
||||
len(new_items), "s" if len(new_items) != 1 else "",
|
||||
del_count, "s" if del_count != 1 else ""))
|
||||
logger.info(
|
||||
"{} new item{}, {} deleted item{}".format(
|
||||
len(new_items),
|
||||
"s" if len(new_items) != 1 else "",
|
||||
del_count,
|
||||
"s" if del_count != 1 else "",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def item_callback(source_name, itemid):
|
||||
try:
|
||||
# Load the module with the callback function
|
||||
source_module = load_source(source_name)
|
||||
if not hasattr(source_module, 'callback'):
|
||||
if not hasattr(source_module, "callback"):
|
||||
raise ImportError(f"Missing callback in '{source_name}'")
|
||||
# Load the source state and the origin item
|
||||
state = loader.load_state(source_name)
|
||||
|
@ -235,4 +239,5 @@ def item_callback(source_name, itemid):
|
|||
except Exception:
|
||||
error.as_item(
|
||||
f"Error executing callback for {source_name}/{itemid}",
|
||||
traceback.format_exc())
|
||||
traceback.format_exc(),
|
||||
)
|
||||
|
|
|
@ -17,22 +17,22 @@ import requests
|
|||
# Module imports
|
||||
from inquisitor import CACHE_PATH
|
||||
|
||||
logger = logging.getLogger('inquisitor.templates')
|
||||
logger = logging.getLogger("inquisitor.templates")
|
||||
|
||||
|
||||
def cache_image(source, url, filename):
|
||||
# Define some paths
|
||||
path = os.path.join(CACHE_PATH, source)
|
||||
file_path = os.path.join(path, filename)
|
||||
cached_url = f'/cache/{source}/{filename}'
|
||||
cached_url = f"/cache/{source}/{filename}"
|
||||
# Ensure cache folder
|
||||
if not os.path.isdir(path):
|
||||
os.mkdir(path)
|
||||
# Fetch url
|
||||
logger.info(f'Caching {url} to {file_path}')
|
||||
logger.info(f"Caching {url} to {file_path}")
|
||||
response = requests.get(url)
|
||||
# Write file to disk
|
||||
with open(file_path, 'wb') as f:
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
# Return the inquisitor path to the file
|
||||
return cached_url
|
||||
|
@ -43,6 +43,7 @@ class LinearCrawler:
|
|||
An engine for generating items from web sources that link content
|
||||
together in a linear fashion, such as webcomics.
|
||||
"""
|
||||
|
||||
def fetch_new(self, state):
|
||||
items = []
|
||||
max_iter = self.max_iterations() - 1
|
||||
|
@ -66,18 +67,18 @@ class LinearCrawler:
|
|||
|
||||
def try_fetch(self, state):
|
||||
# Check for whether a new page should be crawled
|
||||
if 'current_page' not in state:
|
||||
if "current_page" not in state:
|
||||
next_page = self.get_start_url()
|
||||
else:
|
||||
current = state['current_page']
|
||||
current = state["current_page"]
|
||||
response = requests.get(current)
|
||||
soup = BeautifulSoup(response.text, features='html.parser')
|
||||
soup = BeautifulSoup(response.text, features="html.parser")
|
||||
next_page = self.get_next_page_url(current, soup)
|
||||
if not next_page:
|
||||
return [] # nothing new
|
||||
|
||||
# Download the new page
|
||||
logger.info('Fetching ' + next_page)
|
||||
logger.info("Fetching " + next_page)
|
||||
response = requests.get(next_page)
|
||||
soup = BeautifulSoup(response.text, features="html.parser")
|
||||
|
||||
|
@ -85,20 +86,20 @@ class LinearCrawler:
|
|||
item = self.make_item(next_page, soup)
|
||||
|
||||
# Update the state and return the item
|
||||
state['current_page'] = next_page
|
||||
state["current_page"] = next_page
|
||||
return [item]
|
||||
|
||||
def max_iterations(self):
|
||||
return 3
|
||||
|
||||
def get_start_url(self):
|
||||
raise NotImplementedError('get_start_url is required')
|
||||
raise NotImplementedError("get_start_url is required")
|
||||
|
||||
def get_next_page_url(self, url, soup):
|
||||
raise NotImplementedError('get_next_page_url is required')
|
||||
raise NotImplementedError("get_next_page_url is required")
|
||||
|
||||
def make_item(self, url, soup):
|
||||
raise NotImplementedError('make_item is required')
|
||||
raise NotImplementedError("make_item is required")
|
||||
|
||||
|
||||
class RedditScraper:
|
||||
|
@ -107,11 +108,13 @@ class RedditScraper:
|
|||
Requires defining source, subreddit_name
|
||||
fetch new with RedditScraper.fetch_new(state, __name__, reddit)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def fetch_new(state, name, reddit):
|
||||
items = []
|
||||
for name, obj in inspect.getmembers(sys.modules[name]):
|
||||
if (inspect.isclass(obj)
|
||||
if (
|
||||
inspect.isclass(obj)
|
||||
and issubclass(obj, RedditScraper)
|
||||
and obj is not RedditScraper
|
||||
):
|
||||
|
@ -124,7 +127,7 @@ class RedditScraper:
|
|||
|
||||
def get_items(self):
|
||||
sub_name = self.subreddit_name
|
||||
logger.info(f'Fetching posts from r/{sub_name}')
|
||||
logger.info(f"Fetching posts from r/{sub_name}")
|
||||
subreddit = self.reddit.subreddit(sub_name)
|
||||
posts = self.subreddit_page(subreddit)
|
||||
items = []
|
||||
|
@ -135,24 +138,28 @@ class RedditScraper:
|
|||
|
||||
def item_from_post(self, post):
|
||||
item = {
|
||||
'source': self.source,
|
||||
'id': post.id,
|
||||
'title': self.get_title(post),
|
||||
'link': self.get_link(post),
|
||||
'time': post.created_utc,
|
||||
'author': '/u/' + (post.author.name if post.author else "[deleted]"),
|
||||
'body': self.get_body(post),
|
||||
'tags': self.get_tags(post),
|
||||
'ttl': self.get_ttl(post),
|
||||
"source": self.source,
|
||||
"id": post.id,
|
||||
"title": self.get_title(post),
|
||||
"link": self.get_link(post),
|
||||
"time": post.created_utc,
|
||||
"author": "/u/" + (post.author.name if post.author else "[deleted]"),
|
||||
"body": self.get_body(post),
|
||||
"tags": self.get_tags(post),
|
||||
"ttl": self.get_ttl(post),
|
||||
}
|
||||
ttl = self.get_ttl(post)
|
||||
if ttl is not None: item['ttl'] = ttl
|
||||
if ttl is not None:
|
||||
item["ttl"] = ttl
|
||||
ttd = self.get_ttd(post)
|
||||
if ttd is not None: item['ttd'] = ttd
|
||||
if ttd is not None:
|
||||
item["ttd"] = ttd
|
||||
tts = self.get_tts(post)
|
||||
if tts is not None: item['tts'] = tts
|
||||
if tts is not None:
|
||||
item["tts"] = tts
|
||||
callback = self.get_callback(post)
|
||||
if callback is not None: item['callback'] = callback
|
||||
if callback is not None:
|
||||
item["callback"] = callback
|
||||
return item
|
||||
|
||||
def subreddit_page(self, subreddit):
|
||||
|
@ -162,48 +169,50 @@ class RedditScraper:
|
|||
return True
|
||||
|
||||
def get_title(self, post):
|
||||
s = '[S] ' if post.spoiler else ''
|
||||
nsfw = '[NSFW] ' if post.over_18 else ''
|
||||
return f'{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}'
|
||||
s = "[S] " if post.spoiler else ""
|
||||
nsfw = "[NSFW] " if post.over_18 else ""
|
||||
return f"{s}{nsfw}/{post.subreddit_name_prefixed}: {post.title}"
|
||||
|
||||
def get_link(self, post):
|
||||
return f'https://reddit.com{post.permalink}'
|
||||
return f"https://reddit.com{post.permalink}"
|
||||
|
||||
def get_body(self, post):
|
||||
parts = []
|
||||
if not post.is_self:
|
||||
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
|
||||
if hasattr(post, 'preview'):
|
||||
if hasattr(post, "preview"):
|
||||
try:
|
||||
previews = post.preview['images'][0]['resolutions']
|
||||
small_previews = [p for p in previews if p['width'] < 800]
|
||||
preview = sorted(small_previews, key=lambda p:-p['width'])[0]
|
||||
previews = post.preview["images"][0]["resolutions"]
|
||||
small_previews = [p for p in previews if p["width"] < 800]
|
||||
preview = sorted(small_previews, key=lambda p: -p["width"])[0]
|
||||
parts.append(f'<img src="{preview["url"]}">')
|
||||
except:
|
||||
pass
|
||||
if getattr(post, 'is_gallery', False):
|
||||
if getattr(post, "is_gallery", False):
|
||||
try:
|
||||
for gallery_item in post.gallery_data['items']:
|
||||
media_id = gallery_item['media_id']
|
||||
for gallery_item in post.gallery_data["items"]:
|
||||
media_id = gallery_item["media_id"]
|
||||
metadata = post.media_metadata[media_id]
|
||||
small_previews = [p for p in metadata['p'] if p['x'] < 800]
|
||||
preview = sorted(small_previews, key=lambda p:-p['x'])[0]
|
||||
parts.append(f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>')
|
||||
small_previews = [p for p in metadata["p"] if p["x"] < 800]
|
||||
preview = sorted(small_previews, key=lambda p: -p["x"])[0]
|
||||
parts.append(
|
||||
f'<i>link:</i> <a href="{metadata["s"]["u"]}">{metadata["s"]["u"]}</a>'
|
||||
)
|
||||
parts.append(f'<img src="{preview["u"]}">')
|
||||
except:
|
||||
pass
|
||||
if post.selftext:
|
||||
limit = post.selftext[1024:].find(' ')
|
||||
limit = post.selftext[1024:].find(" ")
|
||||
preview_body = post.selftext[: 1024 + limit]
|
||||
if len(preview_body) < len(post.selftext):
|
||||
preview_body += '[...]'
|
||||
parts.append(f'<p>{preview_body}</p>')
|
||||
return '<br><hr>'.join(parts)
|
||||
preview_body += "[...]"
|
||||
parts.append(f"<p>{preview_body}</p>")
|
||||
return "<br><hr>".join(parts)
|
||||
|
||||
def get_tags(self, post):
|
||||
tags = ['reddit', post.subreddit_name_prefixed[2:]]
|
||||
tags = ["reddit", post.subreddit_name_prefixed[2:]]
|
||||
if post.over_18:
|
||||
tags.append('nsfw')
|
||||
tags.append("nsfw")
|
||||
return tags
|
||||
|
||||
def get_ttl(self, post):
|
||||
|
@ -219,10 +228,10 @@ class RedditScraper:
|
|||
return None
|
||||
|
||||
def callback(self, state, item):
|
||||
raise NotImplementedError('callback')
|
||||
raise NotImplementedError("callback")
|
||||
|
||||
def on_create(self, state, item):
|
||||
raise NotImplementedError('on_create')
|
||||
raise NotImplementedError("on_create")
|
||||
|
||||
def on_delete(self, state, item):
|
||||
raise NotImplementedError('on_delete')
|
||||
raise NotImplementedError("on_delete")
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import time
|
||||
import datetime
|
||||
|
||||
|
||||
def now():
|
||||
return int(time.time())
|
||||
|
||||
|
||||
def stamp_to_readable(ts, formatstr="%Y-%m-%d %H:%M:%S"):
|
||||
dt = datetime.datetime.fromtimestamp(ts)
|
||||
return dt.strftime(formatstr)
|
||||
|
|
Loading…
Reference in New Issue