From 01f9849aecafb7a9c5603842cf13a3899c389b87 Mon Sep 17 00:00:00 2001 From: Tim Van Baak Date: Sat, 10 Jun 2023 21:01:28 -0700 Subject: [PATCH] Implement intake-rss --- intake-rss/intake_rss/__init__.py | 53 +++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/intake-rss/intake_rss/__init__.py b/intake-rss/intake_rss/__init__.py index 35b518a..5694f08 100644 --- a/intake-rss/intake_rss/__init__.py +++ b/intake-rss/intake_rss/__init__.py @@ -1,6 +1,53 @@ +import hashlib +import json +import os +import sys +import time + +import feedparser + + +def stderr(*args): + print(*args, file=sys.stderr) + + def main(): - print("Hello, world!") + feed_url = os.environ.get("FEED_URL") + if not feed_url: + stderr("No FEED_URL defined") + return 1 + + feed = feedparser.parse(feed_url) + if feed.bozo: + stderr("Failed to parse feed", feed_url) + return 1 + + feed_title = feed.feed.get("title") or os.environ.get("FEED_TITLE") + + for entry in feed.entries: + item = {} + + entry_link = entry.get("link") + id_basis = entry_link or entry.get("id") or str(entry) + item["id"] = hashlib.md5(id_basis.encode("utf8")).hexdigest() + + entry_title = entry.get("title", "(No title)") + if feed_title: + item["title"] = f"{feed_title}: {entry_title}" + else: + item["title"] = entry_title + + if entry_pubparsed := entry.get("published_parsed"): + item["time"] = int(time.mktime(entry_pubparsed)) + + if entry_desc := entry.get("summary"): + item["body"] = entry_desc + + if entry_link: + item["link"] = entry_link + + print(json.dumps(item)) + if __name__ == "__main__": - main() - + sys.exit(main())