From 4cfa13f24807311ce2ce924eadb16e6d8607dcb9 Mon Sep 17 00:00:00 2001 From: Tim Van Baak Date: Wed, 9 Aug 2023 20:35:44 -0700 Subject: [PATCH] intake-rss: Add FILTER_REGEX --- README.md | 1 + intake-rss/intake_rss/core.py | 7 +++++++ intake-rss/pyproject.toml | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1eb908c..1181866 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Supported `env`: - `FEED_URL`: Required. The url of the RSS/Atom feed. - `FEED_TITLE`: Override the feed ``. Item titles are in the format "[feed title]: [item title]". +- `FILTER_REGEX`: Applied as a regex to the entry title. Entries that match are filtered out of the feed. ## intake-reddit diff --git a/intake-rss/intake_rss/core.py b/intake-rss/intake_rss/core.py index a277ef4..c43b5f5 100644 --- a/intake-rss/intake_rss/core.py +++ b/intake-rss/intake_rss/core.py @@ -1,6 +1,7 @@ import hashlib import json import os +import re import sys import time @@ -24,6 +25,8 @@ def main(): feed_title = os.environ.get("FEED_TITLE") or feed.feed.get("title") + filter_regex = os.environ.get("FILTER_REGEX") + for entry in feed.entries: item = {} @@ -31,6 +34,10 @@ def main(): id_basis = entry_link or entry.get("id") or str(entry) item["id"] = hashlib.md5(id_basis.encode("utf8")).hexdigest() + if filter_regex and re.search(filter_regex, entry.get("title", "")): + stderr("Item matched filter regex, skipping") + continue + entry_title = entry.get("title", "(No title)") if feed_title: item["title"] = f"{feed_title}: {entry_title}" diff --git a/intake-rss/pyproject.toml b/intake-rss/pyproject.toml index fd008b1..db48d34 100644 --- a/intake-rss/pyproject.toml +++ b/intake-rss/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "intake-rss" -version = "1.1.0" +version = "1.2.0" [project.scripts] intake-rss = "intake_rss.core:main"