From 98ad7f66fb3219674d56aad5b9100d56e762918b Mon Sep 17 00:00:00 2001 From: Tim Van Baak Date: Sun, 11 Jun 2023 20:56:51 -0700 Subject: [PATCH] Add intake-hackernews --- README.md | 9 +++ flake.nix | 1 + .../intake_hackernews/__main__.py | 3 + intake-hackernews/intake_hackernews/core.py | 69 +++++++++++++++++++ intake-hackernews/pyproject.toml | 9 +++ 5 files changed, 91 insertions(+) create mode 100644 intake-hackernews/intake_hackernews/__main__.py create mode 100644 intake-hackernews/intake_hackernews/core.py create mode 100644 intake-hackernews/pyproject.toml diff --git a/README.md b/README.md index a7bc36b..20886b7 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,12 @@ Supported `env`: - `MIN_SCORE`: Skip posts with scores below this number. - `TAGS`: Comma-separated list of tags to add to all items. - `AUTHOR_BLOCKLIST`: Comma-separated list of usernames. Posts by these users will be skipped. + +## intake-hackernews + +A feed source that returns stories from [Hacker News](https://news.ycombinator.com/). + +Supported `env`: +- `FETCH_COUNT`: Number of posts to fetch from the front page. Default 30. +- `REQUEST_RETRY`: Attempt count for fetching posts. Retries are done with exponential backoff. +- `MIN_SCORE`: Skip stories with scores below this number. diff --git a/flake.nix b/flake.nix index bf4a874..c216536 100644 --- a/flake.nix +++ b/flake.nix @@ -18,6 +18,7 @@ packages.${system} = { intake-rss = pythonPackage "intake-rss" ./intake-rss [ pypkgs.feedparser ]; intake-reddit = pythonPackage "intake-reddit" ./intake-reddit []; + intake-hackernews = pythonPackage "intake-hackernews" ./intake-hackernews []; }; devShells.${system} = { diff --git a/intake-hackernews/intake_hackernews/__main__.py b/intake-hackernews/intake_hackernews/__main__.py new file mode 100644 index 0000000..06684aa --- /dev/null +++ b/intake-hackernews/intake_hackernews/__main__.py @@ -0,0 +1,3 @@ +import sys +from .core import main +sys.exit(main()) diff --git a/intake-hackernews/intake_hackernews/core.py b/intake-hackernews/intake_hackernews/core.py new file mode 100644 index 0000000..13bbe00 --- /dev/null +++ b/intake-hackernews/intake_hackernews/core.py @@ -0,0 +1,69 @@ +import json +import os +import sys +import time +import urllib.request + + +def stderr(*args): + print(*args, file=sys.stderr) + + +def urlopen(url): + attempts = int(os.environ.get("REQUEST_RETRY", "6")) + backoff = 20 + for attempt in range(attempts): + try: + return urllib.request.urlopen(url) + except Exception as ex: + stderr(f"[{attempt + 1}/{attempts}] Error fetching", url) + stderr(ex) + if attempt < attempts - 1: + stderr("Retrying in", backoff, "seconds") + time.sleep(backoff) + backoff *= 2 + else: + stderr("Failed to fetch in", attempts, "tries") + return None + + +def main(): + # Get the front page + top_stories_url = "https://hacker-news.firebaseio.com/v0/topstories.json" + response = urlopen(top_stories_url) + top_stories = json.load(response) + + # Decide how many to fetch (the API returns up to 500) + fetch_limit = int(os.environ.get("FETCH_COUNT", "30")) + to_fetch = top_stories[:fetch_limit] + + min_score = int(os.environ.get("MIN_SCORE", 0)) + for story_id in to_fetch: + story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json" + story_resp = urlopen(story_url) + story = json.load(story_resp) + + if min_score: + story_score = story.get("score", 0) + if story_score < min_score: + continue + + item = {} + item["id"] = story["id"] + + if story_by := story.get("by"): + item["author"] = story_by + + if story_time := story.get("time"): + item["time"] = int(story_time) + + if story_text := story.get("text"): + item["body"] = story_text + + if story_url := story.get("url"): + item["link"] = story_url + + if story_title := story.get("title"): + item["title"] = story_title + + print(json.dumps(item)) diff --git a/intake-hackernews/pyproject.toml b/intake-hackernews/pyproject.toml new file mode 100644 index 0000000..f167775 --- /dev/null +++ b/intake-hackernews/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "intake-hackernews" +version = "0.1.0" + +[project.scripts] +intake-hackernews = "intake_hackernews.core:main" + +[tool.setuptools] +packages = ["intake_hackernews"]