Add submodule and cli for executing source fetches
This commit is contained in:
parent
8ed3b85cad
commit
7afd5e1484
55
README.md
55
README.md
|
@ -0,0 +1,55 @@
|
||||||
|
# intake
|
||||||
|
|
||||||
|
`intake` is an arbitrary feed aggregator.
|
||||||
|
|
||||||
|
## Feed Source Interface
|
||||||
|
|
||||||
|
The base `intake` directory is `$XDG_DATA_HOME/intake`. Each feed source's data is contained within a subdirectory of the base directory. The name of the feed source is the name of the subdirectory.
|
||||||
|
|
||||||
|
Feed source directories have the following structure:
|
||||||
|
|
||||||
|
```
|
||||||
|
intake
|
||||||
|
|- <source name>
|
||||||
|
| |- intake.json
|
||||||
|
| |- state
|
||||||
|
| |- <item id>.item
|
||||||
|
| |- <item id>.item
|
||||||
|
| |- ...
|
||||||
|
|- <source name>
|
||||||
|
| | ...
|
||||||
|
| ...
|
||||||
|
```
|
||||||
|
|
||||||
|
`intake.json` must be present; the other files are optional. Each `.item` file contains the data for one feed item. `state` provides a file for the feed source to write arbitrary data, e.g. JSON or binary data.
|
||||||
|
|
||||||
|
`intake.json` has the following structure:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"fetch": {
|
||||||
|
"exe": "<absolute path to program or name on intake's PATH>",
|
||||||
|
"args": ["list", "of", "program", "arguments"]
|
||||||
|
},
|
||||||
|
"action": {
|
||||||
|
"<action name>": {
|
||||||
|
"exe": "...",
|
||||||
|
"args": "..."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"env": { ... }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`fetch` is required. If `action` or `env` are absent, they will be treated as if they were empty.
|
||||||
|
|
||||||
|
When a feed source is updated, `fetch.exe` will be executed with `fetch.args` as arguments. The following environment variables will be set:
|
||||||
|
|
||||||
|
* `STATE_PATH` is set to the absolute path of `state`.
|
||||||
|
* Each key in `env` in `config.json` is passed with its value.
|
||||||
|
|
||||||
|
Each line written to the process's `stdout` will be parsed as a JSON object representing a feed item. Each line written to `stderr` will be logged by intake. `stdout` and `stderr` are decoded as UTF-8.
|
||||||
|
|
||||||
|
If invalid JSON is written, intake will consider the feed update to be a failure. If the exit code is nonzero, intake will consider the feed update to be a failure, even if valid JSON was received. No changes will happen to the feed state as a result of a failed update.
|
||||||
|
|
||||||
|
Item actions are performed by executing `action.<name>.exe` with `action.<name>.args` as arguments.
|
107
intake/cli.py
107
intake/cli.py
|
@ -1,9 +1,112 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
import os.path
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
def main():
|
from .source import fetch_items
|
||||||
|
from .types import InvalidConfigException, SourceUpdateException
|
||||||
|
|
||||||
|
|
||||||
|
def intake_data_dir() -> Path:
|
||||||
|
home = Path(os.environ["HOME"])
|
||||||
|
data_home = Path(os.environ.get("XDG_DATA_HOME", home / ".local" / "share"))
|
||||||
|
intake_data = data_home / "intake"
|
||||||
|
return intake_data
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_fetch(cmd_args):
|
||||||
|
"""Execute the fetch for a source."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="intake fetch",
|
||||||
|
description=cmd_fetch.__doc__,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--base",
|
||||||
|
default=intake_data_dir(),
|
||||||
|
help="Path to the intake data directory containing source directories",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--source",
|
||||||
|
help="Source name to fetch",
|
||||||
|
)
|
||||||
|
args = parser.parse_args(cmd_args)
|
||||||
|
ret = 0
|
||||||
|
|
||||||
|
source_path = Path(args.base) / args.source
|
||||||
try:
|
try:
|
||||||
print("Hello, world!")
|
items = fetch_items(source_path)
|
||||||
|
for item in items:
|
||||||
|
print("Item:", item)
|
||||||
|
except InvalidConfigException as ex:
|
||||||
|
print("Could not fetch", args.source)
|
||||||
|
print(ex)
|
||||||
|
ret = 1
|
||||||
|
except SourceUpdateException as ex:
|
||||||
|
print("Error updating source", args.source)
|
||||||
|
print(ex)
|
||||||
|
ret = 1
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_help(_):
|
||||||
|
"""Print the help text."""
|
||||||
|
print_usage()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def execute_cli():
|
||||||
|
"""
|
||||||
|
Internal entry point for CLI execution.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Collect the commands in this module.
|
||||||
|
cli = sys.modules[__name__]
|
||||||
|
commands = {
|
||||||
|
name[4:]: func for name, func in vars(cli).items() if name.startswith("cmd_")
|
||||||
|
}
|
||||||
|
names_width = max(map(len, commands.keys()))
|
||||||
|
desc_fmt = f" {{0:<{names_width}}} {{1}}"
|
||||||
|
descriptions = "\n".join(
|
||||||
|
[desc_fmt.format(name, func.__doc__) for name, func in commands.items()]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set up the top-level parser
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="intake",
|
||||||
|
description=f"Available commands:\n{descriptions}\n",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
# add_help=False,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"command",
|
||||||
|
nargs="?",
|
||||||
|
default="help",
|
||||||
|
help="The command to execute",
|
||||||
|
choices=commands,
|
||||||
|
metavar="command",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"args", nargs=argparse.REMAINDER, help="Command arguments", metavar="args"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract the usage print for command_help
|
||||||
|
global print_usage
|
||||||
|
print_usage = parser.print_help
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Execute command
|
||||||
|
sys.exit(commands[args.command](args.args))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Main entry point for CLI execution.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
execute_cli()
|
||||||
except BrokenPipeError:
|
except BrokenPipeError:
|
||||||
# See https://docs.python.org/3.10/library/signal.html#note-on-sigpipe
|
# See https://docs.python.org/3.10/library/signal.html#note-on-sigpipe
|
||||||
devnull = os.open(os.devnull, os.O_WRONLY)
|
devnull = os.open(os.devnull, os.O_WRONLY)
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from subprocess import Popen, PIPE, TimeoutExpired
|
||||||
|
from threading import Thread
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
from .types import InvalidConfigException, SourceUpdateException
|
||||||
|
|
||||||
|
|
||||||
|
def read_stdout(process: Popen, outs: list):
|
||||||
|
"""
|
||||||
|
Read the subprocess's stdout into memory.
|
||||||
|
This prevents the process from blocking when the pipe fills up.
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
data = process.stdout.readline()
|
||||||
|
if data:
|
||||||
|
print(f"[stdout] <{repr(data)}>")
|
||||||
|
outs.append(data)
|
||||||
|
if process.poll() is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def read_stderr(process: Popen):
|
||||||
|
"""
|
||||||
|
Read the subprocess's stderr stream and pass it to logging.
|
||||||
|
This prevents the process from blocking when the pipe fills up.
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
data = process.stderr.readline()
|
||||||
|
if data:
|
||||||
|
print(f"[stderr] <{repr(data)}>")
|
||||||
|
if process.poll() is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_items(source_path: Path, update_timeout=60):
|
||||||
|
"""
|
||||||
|
Execute the feed source and return the current feed items.
|
||||||
|
Returns a list of feed items on success.
|
||||||
|
Throws SourceUpdateException if the feed source update failed.
|
||||||
|
"""
|
||||||
|
# Load the source's config to get its update command
|
||||||
|
config_path = source_path / "intake.json"
|
||||||
|
with open(config_path, "r", encoding="utf8") as config_file:
|
||||||
|
config = json.load(config_file)
|
||||||
|
|
||||||
|
if "fetch" not in config:
|
||||||
|
raise InvalidConfigException("Missing exe")
|
||||||
|
|
||||||
|
exe_name = config["fetch"]["exe"]
|
||||||
|
exe_args = config["fetch"].get("args", [])
|
||||||
|
|
||||||
|
# Overlay the current env with the config env and intake-provided values
|
||||||
|
exe_env = {
|
||||||
|
**os.environ.copy(),
|
||||||
|
**config.get("env", {}),
|
||||||
|
"STATE_PATH": str((source_path / "state").absolute()),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Launch the update command
|
||||||
|
try:
|
||||||
|
process = Popen(
|
||||||
|
[exe_name, *exe_args],
|
||||||
|
stdout=PIPE,
|
||||||
|
stderr=PIPE,
|
||||||
|
cwd=source_path,
|
||||||
|
env=exe_env,
|
||||||
|
encoding="utf8",
|
||||||
|
)
|
||||||
|
except PermissionError:
|
||||||
|
raise SourceUpdateException("command not executable")
|
||||||
|
|
||||||
|
# While the update command is executing, watch its output
|
||||||
|
t_stderr = Thread(target=read_stderr, args=(process,), daemon=True)
|
||||||
|
t_stderr.start()
|
||||||
|
|
||||||
|
outs = []
|
||||||
|
t_stdout = Thread(target=read_stdout, args=(process, outs), daemon=True)
|
||||||
|
t_stdout.start()
|
||||||
|
|
||||||
|
# Time out the process if it takes too long
|
||||||
|
try:
|
||||||
|
process.wait(timeout=update_timeout)
|
||||||
|
except TimeoutExpired:
|
||||||
|
process.kill()
|
||||||
|
t_stdout.join(timeout=1)
|
||||||
|
t_stderr.join(timeout=1)
|
||||||
|
|
||||||
|
if process.poll():
|
||||||
|
raise SourceUpdateException("return code")
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for line in outs:
|
||||||
|
try:
|
||||||
|
item = json.loads(line)
|
||||||
|
items.append(item)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise SourceUpdateException("invalid json")
|
||||||
|
|
||||||
|
return items
|
|
@ -0,0 +1,21 @@
|
||||||
|
"""
|
||||||
|
Common exception types.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class IntakeException(Exception):
|
||||||
|
"""
|
||||||
|
Base class for intake application exceptions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidConfigException(IntakeException):
|
||||||
|
"""
|
||||||
|
Could not interact with a source because the source's config was not valid.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class SourceUpdateException(Exception):
|
||||||
|
"""
|
||||||
|
The source update process did not return valid data and signal success.
|
||||||
|
"""
|
|
@ -0,0 +1,9 @@
|
||||||
|
{
|
||||||
|
"fetch": {
|
||||||
|
"exe": "python3",
|
||||||
|
"args": ["update.py"]
|
||||||
|
},
|
||||||
|
"env": {
|
||||||
|
"HELLO": "WORLD"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
for i in range(3):
|
||||||
|
sys.stderr.write(f"{i+1}...\n")
|
||||||
|
sys.stderr.flush()
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
item = json.dumps({"id": "helloworld", "title": "Hello = " + os.environ.get("HELLO", "MISSING")})
|
||||||
|
print(item)
|
Loading…
Reference in New Issue