Set cwd to data dir to enable custom scripts

This commit is contained in:
Tim Van Baak 2025-02-24 11:30:12 -08:00
parent d84fdd0e5a
commit f39f525294
4 changed files with 33 additions and 20 deletions

View File

@ -46,6 +46,7 @@ Existing items are updated with new values when a fetch or action produces them,
* Automatic fields cannot be changed.
* Source-level settings for `ttl`, `ttd`, or `tts` override the item's values.
* Sources with batch settings for `tts` settings override the item's value.
* Fields cannot be updated from a non-empty value to an empty value.
If a field's previous value is non-empty and the new value is empty, the old value is kept.
@ -57,6 +58,10 @@ A minimally functional source requires a `fetch` action that returns items.
TTL, TTD, and TTS can be configured at the source level by setting the environment variables `INTAKE_TTL`, `INTAKE_TTS`, or `INTAKE_TTS` to an integer value.
These values override any `ttl`, `ttd`, or `tts` value returned by a fetch or action.
TTS can additionally be configured by using the `INTAKE_BATCH` variable.
If this variable is set to a value of the form `HH:MM`, representing an hour and minute, then the `tts` of new items will be set such that they become visible at that time UTC.
This effectively "batches" a single 24h period of new items from the source.
Automatic fetching can be configured by setting the `INTAKE_FETCH` environment variable to a fetch schedule.
A fetch schedule may be:
- `every <duration>`, where `<duration>` is a Go duration string
@ -82,14 +87,17 @@ Examples:
The Intake action API defines how programs should behave to be used with Intake sources.
To execute an action, Intake executes the command specified by that action's `argv`.
If `argv[0]` contains no `/`, it is resolved from `PATH`.
If it is a relative path, it is resolved relative to intake's working directory, which is the data directory.
Writing update scripts in the data directory is thus an alternative to getting them onto `PATH` for both the daemon and command line.
The process's environment is as follows:
* `intake`'s environment is inherited.
* Each environment variable defined in the source is set.
* `STATE_PATH` is set to the absolute path of a file that the source can use for persistent state. This file can be used for any data in any format. Changes to the state file are only saved if the action succeeds.
The process inherits `intake`'s working directory, which may differ between CLI invocations and the service daemon.
Consequently, actions should use the state file for persistence and temporary directories for ephemeral files, rather than depending on the current working directory.
To avoid causing chaos in the data directory, actions should use the state file for persistence and temporary directories for ephemeral files, rather than the current working directory.
When an action receives an item as input, that item's JSON representation is written to that action's `stdin`.
When an action outputs an item, it should write the item's JSON representation to `stdout` on one line.
@ -121,11 +129,15 @@ Instead, the web interface can be locked behind a password set via `intake passw
Parity features
* [ ] source batching
* [x] source batching
* [x] web source add
* [x] first-party replacement for cron
* [x] NixOS module
* [x] NixOS vm demo
* [ ] Escape HTML in logs saved to error items
* [x] Add dbdir/bin to source PATH for custom update scripts
* [ ] Mark which sources have INTAKE_FETCH and which don't
* [ ] Source-level execution timeout
Future features

View File

@ -30,29 +30,25 @@ func init() {
rootCmd.SetHelpCommand(&cobra.Command{Hidden: true})
// All commands need to operate on a database
rootCmd.PersistentFlags().StringVarP(&dataPath, "data-dir", "d", "", "Path to the intake data directory containing the database")
// Setting the env-derived path as the default allows the command line to override the env
dbPathFromEnv := core.ResolveEnvDataDir()
rootCmd.PersistentFlags().StringVarP(&dataPath, "data-dir", "d", dbPathFromEnv, "Path to the intake data directory containing the database")
if dataPath == "" {
fmt.Println("error: no database specified")
fmt.Println("One of --data-dir, INTAKE_DATA_DIR, XDG_DATA_HOME, or HOME must be defined.")
os.Exit(1)
}
os.Chdir(dataPath)
}
//
// Common logic shared by multiple commands
//
func getDbPath() string {
if dataPath != "" {
return core.DatabasePath(dataPath)
}
if dataDir := core.ResolveDataDir(); dataDir != "" {
return core.DatabasePath(dataDir)
}
fmt.Println("error: no database specified")
fmt.Println("One of --data-dir, INTAKE_DATA_DIR, XDG_DATA_HOME, or HOME must be defined.")
os.Exit(1)
return ""
}
// Attempt to open the specified database and exit with an error if it fails.
func openDb() core.DB {
dbPath := getDbPath()
dbPath := core.DatabasePath(dataPath)
db, err := core.OpenDb(dbPath)
if err != nil {
log.Fatalf("error: failed to open %s", dbPath)

View File

@ -5,7 +5,7 @@ import (
"path/filepath"
)
func ResolveDataDir() string {
func ResolveEnvDataDir() string {
if intakeData := os.Getenv("INTAKE_DATA_DIR"); intakeData != "" {
return intakeData
} else if xdgData := os.Getenv("XDG_DATA_HOME"); xdgData != "" {

View File

@ -108,12 +108,17 @@ func Execute(
return
}
// Set up the action environment:
// - intake's environment (os.Environ)
// - source's environment (env)
// - STATE_PATH (stateFile)
env = append(os.Environ(), env...)
env = append(env, "STATE_PATH="+stateFile.Name())
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cmd := exec.CommandContext(ctx, argv[0], argv[1:]...)
cmd.Env = append(os.Environ(), env...)
cmd.Env = env
cmd.WaitDelay = time.Second * 5
// Open pipes to the command