intake-rss: Add FILTER_REGEX

This commit is contained in:
Tim Van Baak 2023-08-09 20:35:44 -07:00
parent 4f8b424416
commit 4cfa13f248
3 changed files with 9 additions and 1 deletions

View File

@ -12,6 +12,7 @@ Supported `env`:
- `FEED_URL`: Required. The url of the RSS/Atom feed. - `FEED_URL`: Required. The url of the RSS/Atom feed.
- `FEED_TITLE`: Override the feed `<title>`. Item titles are in the format "[feed title]: [item title]". - `FEED_TITLE`: Override the feed `<title>`. Item titles are in the format "[feed title]: [item title]".
- `FILTER_REGEX`: Applied as a regex to the entry title. Entries that match are filtered out of the feed.
## intake-reddit ## intake-reddit

View File

@ -1,6 +1,7 @@
import hashlib import hashlib
import json import json
import os import os
import re
import sys import sys
import time import time
@ -24,6 +25,8 @@ def main():
feed_title = os.environ.get("FEED_TITLE") or feed.feed.get("title") feed_title = os.environ.get("FEED_TITLE") or feed.feed.get("title")
filter_regex = os.environ.get("FILTER_REGEX")
for entry in feed.entries: for entry in feed.entries:
item = {} item = {}
@ -31,6 +34,10 @@ def main():
id_basis = entry_link or entry.get("id") or str(entry) id_basis = entry_link or entry.get("id") or str(entry)
item["id"] = hashlib.md5(id_basis.encode("utf8")).hexdigest() item["id"] = hashlib.md5(id_basis.encode("utf8")).hexdigest()
if filter_regex and re.search(filter_regex, entry.get("title", "")):
stderr("Item matched filter regex, skipping")
continue
entry_title = entry.get("title", "(No title)") entry_title = entry.get("title", "(No title)")
if feed_title: if feed_title:
item["title"] = f"{feed_title}: {entry_title}" item["title"] = f"{feed_title}: {entry_title}"

View File

@ -1,6 +1,6 @@
[project] [project]
name = "intake-rss" name = "intake-rss"
version = "1.1.0" version = "1.2.0"
[project.scripts] [project.scripts]
intake-rss = "intake_rss.core:main" intake-rss = "intake_rss.core:main"