intake-rss: Add FILTER_REGEX
This commit is contained in:
parent
4f8b424416
commit
4cfa13f248
|
@ -12,6 +12,7 @@ Supported `env`:
|
|||
|
||||
- `FEED_URL`: Required. The url of the RSS/Atom feed.
|
||||
- `FEED_TITLE`: Override the feed `<title>`. Item titles are in the format "[feed title]: [item title]".
|
||||
- `FILTER_REGEX`: Applied as a regex to the entry title. Entries that match are filtered out of the feed.
|
||||
|
||||
## intake-reddit
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
@ -24,6 +25,8 @@ def main():
|
|||
|
||||
feed_title = os.environ.get("FEED_TITLE") or feed.feed.get("title")
|
||||
|
||||
filter_regex = os.environ.get("FILTER_REGEX")
|
||||
|
||||
for entry in feed.entries:
|
||||
item = {}
|
||||
|
||||
|
@ -31,6 +34,10 @@ def main():
|
|||
id_basis = entry_link or entry.get("id") or str(entry)
|
||||
item["id"] = hashlib.md5(id_basis.encode("utf8")).hexdigest()
|
||||
|
||||
if filter_regex and re.search(filter_regex, entry.get("title", "")):
|
||||
stderr("Item matched filter regex, skipping")
|
||||
continue
|
||||
|
||||
entry_title = entry.get("title", "(No title)")
|
||||
if feed_title:
|
||||
item["title"] = f"{feed_title}: {entry_title}"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "intake-rss"
|
||||
version = "1.1.0"
|
||||
version = "1.2.0"
|
||||
|
||||
[project.scripts]
|
||||
intake-rss = "intake_rss.core:main"
|
||||
|
|
Loading…
Reference in New Issue