2023-06-11 04:01:28 +00:00
|
|
|
import hashlib
|
|
|
|
import json
|
|
|
|
import os
|
2023-08-10 03:35:44 +00:00
|
|
|
import re
|
2023-06-11 04:01:28 +00:00
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
|
|
|
|
import feedparser
|
|
|
|
|
|
|
|
|
|
|
|
def stderr(*args):
|
|
|
|
print(*args, file=sys.stderr)
|
|
|
|
|
|
|
|
|
2023-06-11 00:01:20 +00:00
|
|
|
def main():
|
2023-06-11 04:01:28 +00:00
|
|
|
feed_url = os.environ.get("FEED_URL")
|
|
|
|
if not feed_url:
|
|
|
|
stderr("No FEED_URL defined")
|
|
|
|
return 1
|
2023-06-11 00:01:20 +00:00
|
|
|
|
2023-06-11 04:01:28 +00:00
|
|
|
feed = feedparser.parse(feed_url)
|
|
|
|
if feed.bozo:
|
|
|
|
stderr("Failed to parse feed", feed_url)
|
|
|
|
return 1
|
|
|
|
|
2023-06-11 04:11:13 +00:00
|
|
|
feed_title = os.environ.get("FEED_TITLE") or feed.feed.get("title")
|
2023-06-11 04:01:28 +00:00
|
|
|
|
2023-08-10 03:35:44 +00:00
|
|
|
filter_regex = os.environ.get("FILTER_REGEX")
|
|
|
|
|
2023-06-11 04:01:28 +00:00
|
|
|
for entry in feed.entries:
|
|
|
|
item = {}
|
2023-06-11 00:01:20 +00:00
|
|
|
|
2023-06-11 04:01:28 +00:00
|
|
|
entry_link = entry.get("link")
|
|
|
|
id_basis = entry_link or entry.get("id") or str(entry)
|
|
|
|
item["id"] = hashlib.md5(id_basis.encode("utf8")).hexdigest()
|
|
|
|
|
2023-08-10 03:35:44 +00:00
|
|
|
if filter_regex and re.search(filter_regex, entry.get("title", "")):
|
|
|
|
stderr("Item matched filter regex, skipping")
|
|
|
|
continue
|
|
|
|
|
2023-06-11 04:01:28 +00:00
|
|
|
entry_title = entry.get("title", "(No title)")
|
|
|
|
if feed_title:
|
|
|
|
item["title"] = f"{feed_title}: {entry_title}"
|
|
|
|
else:
|
|
|
|
item["title"] = entry_title
|
|
|
|
|
|
|
|
if entry_pubparsed := entry.get("published_parsed"):
|
|
|
|
item["time"] = int(time.mktime(entry_pubparsed))
|
|
|
|
|
|
|
|
if entry_desc := entry.get("summary"):
|
|
|
|
item["body"] = entry_desc
|
|
|
|
|
2023-07-24 17:52:15 +00:00
|
|
|
if entry_content := entry.get("content"):
|
|
|
|
content = " ".join([c.value for c in entry_content])
|
|
|
|
if "body" in item:
|
|
|
|
item["body"] = "<hr>" + content
|
|
|
|
else:
|
|
|
|
item["body"] = content
|
|
|
|
|
2023-06-11 04:01:28 +00:00
|
|
|
if entry_link:
|
|
|
|
item["link"] = entry_link
|
|
|
|
|
|
|
|
print(json.dumps(item))
|