Add intake-hackernews

2023-06-11 20:56:51 -07:00 · 2023-06-11 20:56:51 -07:00 · 98ad7f66fb
commit 98ad7f66fb
parent a63fceceb4
5 changed files with 91 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -27,3 +27,12 @@ Supported `env`:
 - `MIN_SCORE`: Skip posts with scores below this number.
 - `TAGS`: Comma-separated list of tags to add to all items.
 - `AUTHOR_BLOCKLIST`: Comma-separated list of usernames. Posts by these users will be skipped.
 ## intake-hackernews
 A feed source that returns stories from [Hacker News](https://news.ycombinator.com/).
 Supported `env`:
 - `FETCH_COUNT`: Number of posts to fetch from the front page. Default 30.
 - `REQUEST_RETRY`: Attempt count for fetching posts. Retries are done with exponential backoff.
 - `MIN_SCORE`: Skip stories with scores below this number.
--- a/flake.nix
+++ b/flake.nix
@ -18,6 +18,7 @@
    packages.${system} = {
      intake-rss = pythonPackage "intake-rss" ./intake-rss [ pypkgs.feedparser ];
      intake-reddit = pythonPackage "intake-reddit" ./intake-reddit [];
      intake-hackernews = pythonPackage "intake-hackernews" ./intake-hackernews [];
    };
    devShells.${system} = {
--- a/intake-hackernews/intake_hackernews/main.py
+++ b/intake-hackernews/intake_hackernews/main.py
@ -0,0 +1,3 @@
 import sys
 from .core import main
 sys.exit(main())
--- a/intake-hackernews/intake_hackernews/core.py
+++ b/intake-hackernews/intake_hackernews/core.py
@ -0,0 +1,69 @@
 import json
 import os
 import sys
 import time
 import urllib.request
 def stderr(*args):
    print(*args, file=sys.stderr)
 def urlopen(url):
    attempts = int(os.environ.get("REQUEST_RETRY", "6"))
    backoff = 20
    for attempt in range(attempts):
        try:
            return urllib.request.urlopen(url)
        except Exception as ex:
            stderr(f"[{attempt + 1}/{attempts}] Error fetching", url)
            stderr(ex)
            if attempt < attempts - 1:
                stderr("Retrying in", backoff, "seconds")
                time.sleep(backoff)
                backoff *= 2
    else:
        stderr("Failed to fetch in", attempts, "tries")
        return None
 def main():
    # Get the front page
    top_stories_url = "https://hacker-news.firebaseio.com/v0/topstories.json"
    response = urlopen(top_stories_url)
    top_stories = json.load(response)
    # Decide how many to fetch (the API returns up to 500)
    fetch_limit = int(os.environ.get("FETCH_COUNT", "30"))
    to_fetch = top_stories[:fetch_limit]
    min_score = int(os.environ.get("MIN_SCORE", 0))
    for story_id in to_fetch:
        story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
        story_resp = urlopen(story_url)
        story = json.load(story_resp)
        if min_score:
            story_score = story.get("score", 0)
            if story_score < min_score:
                continue
        item = {}
        item["id"] = story["id"]
        if story_by := story.get("by"):
            item["author"] = story_by
        if story_time := story.get("time"):
            item["time"] = int(story_time)
        if story_text := story.get("text"):
            item["body"] = story_text
        if story_url := story.get("url"):
            item["link"] = story_url
        if story_title := story.get("title"):
            item["title"] = story_title
        print(json.dumps(item))
--- a/intake-hackernews/pyproject.toml
+++ b/intake-hackernews/pyproject.toml
@ -0,0 +1,9 @@
 [project]
 name = "intake-hackernews"
 version = "0.1.0"
 [project.scripts]
 intake-hackernews = "intake_hackernews.core:main"
 [tool.setuptools]
 packages = ["intake_hackernews"]