Add intake-hackernews
This commit is contained in:
parent
a63fceceb4
commit
98ad7f66fb
|
@ -27,3 +27,12 @@ Supported `env`:
|
|||
- `MIN_SCORE`: Skip posts with scores below this number.
|
||||
- `TAGS`: Comma-separated list of tags to add to all items.
|
||||
- `AUTHOR_BLOCKLIST`: Comma-separated list of usernames. Posts by these users will be skipped.
|
||||
|
||||
## intake-hackernews
|
||||
|
||||
A feed source that returns stories from [Hacker News](https://news.ycombinator.com/).
|
||||
|
||||
Supported `env`:
|
||||
- `FETCH_COUNT`: Number of posts to fetch from the front page. Default 30.
|
||||
- `REQUEST_RETRY`: Attempt count for fetching posts. Retries are done with exponential backoff.
|
||||
- `MIN_SCORE`: Skip stories with scores below this number.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
packages.${system} = {
|
||||
intake-rss = pythonPackage "intake-rss" ./intake-rss [ pypkgs.feedparser ];
|
||||
intake-reddit = pythonPackage "intake-reddit" ./intake-reddit [];
|
||||
intake-hackernews = pythonPackage "intake-hackernews" ./intake-hackernews [];
|
||||
};
|
||||
|
||||
devShells.${system} = {
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
import sys
|
||||
from .core import main
|
||||
sys.exit(main())
|
|
@ -0,0 +1,69 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
|
||||
def stderr(*args):
|
||||
print(*args, file=sys.stderr)
|
||||
|
||||
|
||||
def urlopen(url):
|
||||
attempts = int(os.environ.get("REQUEST_RETRY", "6"))
|
||||
backoff = 20
|
||||
for attempt in range(attempts):
|
||||
try:
|
||||
return urllib.request.urlopen(url)
|
||||
except Exception as ex:
|
||||
stderr(f"[{attempt + 1}/{attempts}] Error fetching", url)
|
||||
stderr(ex)
|
||||
if attempt < attempts - 1:
|
||||
stderr("Retrying in", backoff, "seconds")
|
||||
time.sleep(backoff)
|
||||
backoff *= 2
|
||||
else:
|
||||
stderr("Failed to fetch in", attempts, "tries")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
# Get the front page
|
||||
top_stories_url = "https://hacker-news.firebaseio.com/v0/topstories.json"
|
||||
response = urlopen(top_stories_url)
|
||||
top_stories = json.load(response)
|
||||
|
||||
# Decide how many to fetch (the API returns up to 500)
|
||||
fetch_limit = int(os.environ.get("FETCH_COUNT", "30"))
|
||||
to_fetch = top_stories[:fetch_limit]
|
||||
|
||||
min_score = int(os.environ.get("MIN_SCORE", 0))
|
||||
for story_id in to_fetch:
|
||||
story_url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
|
||||
story_resp = urlopen(story_url)
|
||||
story = json.load(story_resp)
|
||||
|
||||
if min_score:
|
||||
story_score = story.get("score", 0)
|
||||
if story_score < min_score:
|
||||
continue
|
||||
|
||||
item = {}
|
||||
item["id"] = story["id"]
|
||||
|
||||
if story_by := story.get("by"):
|
||||
item["author"] = story_by
|
||||
|
||||
if story_time := story.get("time"):
|
||||
item["time"] = int(story_time)
|
||||
|
||||
if story_text := story.get("text"):
|
||||
item["body"] = story_text
|
||||
|
||||
if story_url := story.get("url"):
|
||||
item["link"] = story_url
|
||||
|
||||
if story_title := story.get("title"):
|
||||
item["title"] = story_title
|
||||
|
||||
print(json.dumps(item))
|
|
@ -0,0 +1,9 @@
|
|||
[project]
|
||||
name = "intake-hackernews"
|
||||
version = "0.1.0"
|
||||
|
||||
[project.scripts]
|
||||
intake-hackernews = "intake_hackernews.core:main"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["intake_hackernews"]
|
Loading…
Reference in New Issue