Add intake-praw
This commit is contained in:
parent
ec8b9166f9
commit
0bf444dfbf
17
README.md
17
README.md
|
@ -48,3 +48,20 @@ Supported `env`:
|
||||||
- `TITLE`: The title of the item.
|
- `TITLE`: The title of the item.
|
||||||
- `BODY`: The body of the item.
|
- `BODY`: The body of the item.
|
||||||
- `UNIQUE`: If set to a truthy value, the item id will be the hash of the title, so the same item will be generated until the message is changed.
|
- `UNIQUE`: If set to a truthy value, the item id will be the hash of the title, so the same item will be generated until the message is changed.
|
||||||
|
|
||||||
|
## intake-praw
|
||||||
|
|
||||||
|
Like `intake-reddit`, but uses [PRAW](https://praw.readthedocs.io/en/stable/) to access Reddit instead of just hitting the public JSON API directly.
|
||||||
|
|
||||||
|
Supported `env`:
|
||||||
|
- `CLIENT_ID`: Required. `client_id` for PRAW.
|
||||||
|
- `CLIENT_SECRET`: Required. `client_secret` for PRAW.
|
||||||
|
- `USER_AGENT`: User agent for PRAW. By default, `nixos:intake_praw:X.Y.Z` with the current version number.
|
||||||
|
- `SUBREDDIT_NAME`: Required. The subreddit name with no `r/` prefix.
|
||||||
|
- `SUBREDDIT_PAGE`: The listing page to fetch posts from. Defaults to `hot`. Listings that support multiple time ranges can specify as e.g. `top_week`.
|
||||||
|
- `POST_LIMIT`: The number of posts to fetch. Default `25`.
|
||||||
|
- `FILTER_NSFW`: By default, NSFW posts are included. Set to a truthy value to skip them.
|
||||||
|
- `FILTER_SPOILER`: As `FILTER_NSFW` for posts marked as spoilers.
|
||||||
|
- `MIN_SCORE`: Skip posts with scores below this number.
|
||||||
|
- `TAGS`: Comma-separated list of tags to add to all items.
|
||||||
|
- `NO_VIDEO`: Set to a truthy value to filter out `v.redd.it` links.
|
||||||
|
|
|
@ -14,13 +14,13 @@
|
||||||
});
|
});
|
||||||
in {
|
in {
|
||||||
default = pkgs.intakeSources;
|
default = pkgs.intakeSources;
|
||||||
inherit (pkgs) intake-rss intake-reddit intake-hackernews intake-echo;
|
inherit (pkgs) intake-rss intake-reddit intake-hackernews intake-echo intake-praw;
|
||||||
};
|
};
|
||||||
|
|
||||||
devShells.${system} = {
|
devShells.${system} = {
|
||||||
python = let
|
python = let
|
||||||
pkgs = nixpkgs.legacyPackages.${system};
|
pkgs = nixpkgs.legacyPackages.${system};
|
||||||
pythonEnv = pkgs.python38.withPackages (pypkgs: with pypkgs; [ black feedparser ]);
|
pythonEnv = pkgs.python38.withPackages (pypkgs: with pypkgs; [ black feedparser praw ]);
|
||||||
in pkgs.mkShell {
|
in pkgs.mkShell {
|
||||||
packages = [ pythonEnv ];
|
packages = [ pythonEnv ];
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
|
@ -45,12 +45,14 @@
|
||||||
final.intake-reddit
|
final.intake-reddit
|
||||||
final.intake-hackernews
|
final.intake-hackernews
|
||||||
final.intake-echo
|
final.intake-echo
|
||||||
|
final.intake-praw
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
intake-rss = pythonPackage "intake-rss" ./intake-rss [ final.python38Packages.feedparser ];
|
intake-rss = pythonPackage "intake-rss" ./intake-rss [ final.python38Packages.feedparser ];
|
||||||
intake-reddit = pythonPackage "intake-reddit" ./intake-reddit [];
|
intake-reddit = pythonPackage "intake-reddit" ./intake-reddit [];
|
||||||
intake-hackernews = pythonPackage "intake-hackernews" ./intake-hackernews [];
|
intake-hackernews = pythonPackage "intake-hackernews" ./intake-hackernews [];
|
||||||
intake-echo = pythonPackage "intake-echo" ./intake-echo [];
|
intake-echo = pythonPackage "intake-echo" ./intake-echo [];
|
||||||
|
intake-praw = pythonPackage "intake-praw" ./intake-praw [ final.python38Packages.praw ];
|
||||||
};
|
};
|
||||||
|
|
||||||
nixosModules.default = {
|
nixosModules.default = {
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
import sys
|
||||||
|
from .core import main
|
||||||
|
|
||||||
|
sys.exit(main())
|
|
@ -0,0 +1,159 @@
|
||||||
|
import importlib.metadata
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import praw
|
||||||
|
|
||||||
|
|
||||||
|
SORT_TTL = {
|
||||||
|
"hour": 60 * 60,
|
||||||
|
"day": 60 * 60 * 24,
|
||||||
|
"week": 60 * 60 * 24 * 7,
|
||||||
|
"month": 60 * 60 * 24 * 31,
|
||||||
|
"year": 60 * 60 * 24 * 366,
|
||||||
|
"all": 60 * 60 * 24 * 366,
|
||||||
|
}
|
||||||
|
|
||||||
|
VERSION = importlib.metadata.version(__package__)
|
||||||
|
|
||||||
|
|
||||||
|
def stderr(*args, **kwargs):
|
||||||
|
print(*args, **kwargs, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Get the reddit client
|
||||||
|
client_id = os.environ.get("CLIENT_ID")
|
||||||
|
if not client_id:
|
||||||
|
stderr("CLIENT_ID required")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
client_secret = os.environ.get("CLIENT_SECRET")
|
||||||
|
if not client_secret:
|
||||||
|
stderr("CLIENT_SECRET required")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
user_agent = os.environ.get("USER_AGENT", f"nixos:{__package__}:{VERSION}")
|
||||||
|
|
||||||
|
reddit = praw.Reddit(
|
||||||
|
client_id=client_id, client_secret=client_secret, user_agent=user_agent
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the subreddit
|
||||||
|
sub_name = os.environ.get("SUBREDDIT_NAME")
|
||||||
|
if not sub_name:
|
||||||
|
stderr("No SUBREDDIT_NAME defined")
|
||||||
|
return 1
|
||||||
|
subreddit = reddit.subreddit(sub_name)
|
||||||
|
post_limit = int(os.environ.get("POST_LIMIT", "25"))
|
||||||
|
|
||||||
|
# Get the listing page
|
||||||
|
sub_page, sub_sort = os.environ.get("SUBREDDIT_PAGE", "hot"), ""
|
||||||
|
if "_" in sub_page:
|
||||||
|
sub_page, sub_sort = sub_page.split("_", 1)
|
||||||
|
if sub_sort:
|
||||||
|
stderr(f"Fetching {post_limit} posts from r/{sub_name}/{sub_page}?t={sub_sort}")
|
||||||
|
else:
|
||||||
|
stderr(f"Fetching {post_limit} posts from r/{sub_name}/{sub_page}")
|
||||||
|
|
||||||
|
if sub_page == "hot":
|
||||||
|
posts = subreddit.hot(limit=post_limit)
|
||||||
|
elif sub_page == "new":
|
||||||
|
posts = subreddit.new(limit=post_limit)
|
||||||
|
elif sub_page == "top":
|
||||||
|
posts = subreddit.top(limit=post_limit, time_filter=sub_sort)
|
||||||
|
elif sub_page == "rising":
|
||||||
|
posts = subreddit.top(limit=post_limit)
|
||||||
|
elif sub_page == "controversial":
|
||||||
|
posts = subreddit.top(limit=post_limit, time_filter=sub_sort)
|
||||||
|
else:
|
||||||
|
stderr("Invalid subreddit page", sub_page)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Pull item configuration options from the environment
|
||||||
|
filter_nsfw = os.environ.get("FILTER_NSFW", False)
|
||||||
|
filter_spoiler = os.environ.get("FILTER_SPOILER", False)
|
||||||
|
min_score = int(os.environ.get("MIN_SCORE", 0))
|
||||||
|
tags = [tag for tag in os.environ.get("TAGS", "").split(",") if tag]
|
||||||
|
no_video = os.environ.get("NO_VIDEO", False)
|
||||||
|
|
||||||
|
stderr("filter nsfw =", bool(filter_nsfw))
|
||||||
|
stderr("filter spoiler =", bool(filter_spoiler))
|
||||||
|
stderr("min score =", min_score)
|
||||||
|
stderr("tags =", ", ".join(tags))
|
||||||
|
stderr("no video =", bool(no_video))
|
||||||
|
|
||||||
|
for post in posts:
|
||||||
|
# Fill in some basic tags
|
||||||
|
item = {}
|
||||||
|
item["id"] = post.id
|
||||||
|
item["title"] = f"/{post.subreddit_name_prefixed}: {post.title}"
|
||||||
|
item["author"] = f"/u/{post.author.name}" if post.author else "[deleted]"
|
||||||
|
item["link"] = f"https://reddit.com{post.permalink:}"
|
||||||
|
item["time"] = post.created_utc
|
||||||
|
item["tags"] = list(tags)
|
||||||
|
item["ttl"] = SORT_TTL.get(sub_sort, 60 * 60 * 24 * 8)
|
||||||
|
|
||||||
|
# Special handling for native video posts
|
||||||
|
if "v.redd" in post.url:
|
||||||
|
if no_video:
|
||||||
|
continue
|
||||||
|
post["title"] = "[V] " + post["title"]
|
||||||
|
|
||||||
|
# Special handling for NSFW
|
||||||
|
if post.over_18:
|
||||||
|
if filter_nsfw:
|
||||||
|
continue
|
||||||
|
item["title"] = "[NSFW] " + item["title"]
|
||||||
|
item["tags"].append("nsfw")
|
||||||
|
|
||||||
|
# Special handling for spoilers
|
||||||
|
if post.spoiler:
|
||||||
|
if filter_spoiler:
|
||||||
|
continue
|
||||||
|
item["title"] = "[S] " + item["title"]
|
||||||
|
item["tags"].append("spoiler")
|
||||||
|
|
||||||
|
# Post score
|
||||||
|
if min_score and post.score < min_score:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Body
|
||||||
|
parts = []
|
||||||
|
if not post.is_self:
|
||||||
|
parts.append(f'<i>link:</i> <a href="{post.url}">{post.url}</a>')
|
||||||
|
if hasattr(post, "preview"):
|
||||||
|
try:
|
||||||
|
previews = post.preview["images"][0]["resolutions"]
|
||||||
|
small_previews = [p for p in previews if p["width"] < 800]
|
||||||
|
preview = sorted(small_previews, key=lambda p: -p["width"])[0]
|
||||||
|
parts.append(f'<img src="{preview["url"]}">')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if getattr(post, "is_gallery", False):
|
||||||
|
try:
|
||||||
|
for gallery_item in post.gallery_data["items"]:
|
||||||
|
media_id = gallery_item["media_id"]
|
||||||
|
metadata = post.media_metadata[media_id]
|
||||||
|
small_previews = [p for p in metadata["p"] if p["x"] < 800]
|
||||||
|
preview = sorted(small_previews, key=lambda p: -p["x"])[0]
|
||||||
|
preview_url = metadata["s"]["u"]
|
||||||
|
if match := re.search(r"redd\.it/([A-Za-z0-9]*\....)", preview_url):
|
||||||
|
preview_url = "https://i.redd.it/" + match.group(1)
|
||||||
|
parts.append(
|
||||||
|
f'<i>link:</i> <a href="{metadata["s"]["u"]}">{preview_url}</a>'
|
||||||
|
)
|
||||||
|
parts.append(f'<img src="{preview["u"]}">')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if post.selftext:
|
||||||
|
limit = post.selftext[1024:].find(" ")
|
||||||
|
preview_body = post.selftext[: 1024 + limit]
|
||||||
|
if len(preview_body) < len(post.selftext):
|
||||||
|
preview_body += "[...]"
|
||||||
|
parts.append(f"<p>{preview_body}</p>")
|
||||||
|
item["body"] = "<br><hr>".join(parts)
|
||||||
|
|
||||||
|
print(json.dumps(item))
|
|
@ -0,0 +1,10 @@
|
||||||
|
[project]
|
||||||
|
name = "intake-praw"
|
||||||
|
version = "1.0.0"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
intake-praw = "intake_praw.core:main"
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
packages = ["intake_praw"]
|
||||||
|
|
Loading…
Reference in New Issue