154 lines
6.5 KiB
Python
154 lines
6.5 KiB
Python
import praw
|
|
from app_log import LoggingManager
|
|
from models import Post
|
|
from api import PostManager, PostAnalyticsManager
|
|
from webhook import WebhookNotifier
|
|
|
|
|
|
class RedditMonitor:
|
|
"""
|
|
Monitors Reddit submissions for a specific subreddit, streaming new submissions and
|
|
updating existing ones. Utilizes PRAW (Python Reddit API Wrapper) to interact with Reddit's API.
|
|
|
|
Attributes:
|
|
reddit (praw.Reddit): An instance of the PRAW Reddit class for API interactions.
|
|
subreddit (praw.models.Subreddit): The subreddit object for the specified subreddit.
|
|
log_manager (LoggingManager): Manages logging for Reddit monitoring operations.
|
|
"""
|
|
|
|
def __init__(
|
|
self, client_id, client_secret, user_agent, username, password, subreddit_name
|
|
):
|
|
"""
|
|
Initializes the RedditMonitor with credentials for Reddit API access and the target subreddit.
|
|
|
|
Parameters:
|
|
client_id (str): The client ID for the Reddit API application.
|
|
client_secret (str): The client secret for the Reddit API application.
|
|
user_agent (str): The user agent string identifying the application to Reddit.
|
|
username (str): The Reddit account username for authentication.
|
|
password (str): The Reddit account password for authentication.
|
|
subreddit_name (str): The name of the subreddit to monitor.
|
|
"""
|
|
self.reddit = praw.Reddit(
|
|
client_id=client_id,
|
|
client_secret=client_secret,
|
|
user_agent=user_agent,
|
|
username=username,
|
|
password=password,
|
|
)
|
|
self.subreddit = self.reddit.subreddit(subreddit_name)
|
|
self.log_manager = LoggingManager("scraper.log")
|
|
|
|
def stream_submissions(self):
|
|
"""
|
|
Streams new submissions from the specified subreddit, yielding each submission
|
|
as it becomes available.
|
|
|
|
Yields:
|
|
praw.models.Submission: A submission object representing a Reddit post.
|
|
"""
|
|
self.log_manager.info("Starting submission stream")
|
|
for submission in self.subreddit.stream.submissions():
|
|
yield submission
|
|
|
|
def update_submissions(self, posts_to_update):
|
|
"""
|
|
Retrieves and yields submissions corresponding to a list of posts that need to be updated,
|
|
identified by their Reddit IDs.
|
|
|
|
Parameters:
|
|
posts_to_update (list of dict): A list of dictionaries, each containing the 'reddit_id' of a post to update.
|
|
|
|
Yields:
|
|
praw.models.Submission: A submission object for each post that needs to be updated.
|
|
"""
|
|
self.log_manager.info("Updating submissions")
|
|
for post in posts_to_update:
|
|
submission = self.reddit.submission(id=post["reddit_id"])
|
|
yield submission
|
|
|
|
|
|
class SubmissionManager:
|
|
"""
|
|
Manages the processing of Reddit submissions, including conversion to post objects,
|
|
checking for updates, and notifying via webhook. It integrates closely with RedditMonitor,
|
|
PostManager, and PostAnalyticsManager to streamline the handling of new and existing submissions.
|
|
|
|
Attributes:
|
|
reddit_monitor (RedditMonitor): Monitors and streams Reddit submissions.
|
|
post_manager (PostManager): Manages post data interactions.
|
|
post_analytics_manager (PostAnalyticsManager): Manages post analytics data.
|
|
webhook_notifier (WebhookNotifier): Handles notifications for new or updated posts.
|
|
log_manager (LoggingManager): Manages logging for submission processing operations.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
reddit_monitor: RedditMonitor,
|
|
post_manager: PostManager,
|
|
post_analytics_manager: PostAnalyticsManager,
|
|
webhook_notifier: WebhookNotifier,
|
|
):
|
|
"""
|
|
Initializes the SubmissionManager with necessary components for processing submissions.
|
|
|
|
Parameters:
|
|
reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions.
|
|
post_manager (PostManager): The component for managing post data.
|
|
post_analytics_manager (PostAnalyticsManager): The component for managing post analytics.
|
|
WebhookNotifier: The component for sending notifications about posts.
|
|
"""
|
|
self.reddit_monitor = reddit_monitor
|
|
self.post_manager = post_manager
|
|
self.post_analytics_manager = post_analytics_manager
|
|
self.webhook_notifier = webhook_notifier
|
|
self.log_manager = LoggingManager("scraper.log")
|
|
|
|
def convert_submission_to_post(self, submission):
|
|
"""
|
|
Converts a Reddit submission object into a Post object suitable for database insertion
|
|
or analytics processing.
|
|
|
|
Parameters:
|
|
submission (praw.models.Submission): The Reddit submission to convert.
|
|
|
|
Returns:
|
|
Post: A Post object populated with data from the Reddit submission.
|
|
"""
|
|
post = Post(
|
|
reddit_id=submission.id,
|
|
title=submission.title,
|
|
name=submission.name,
|
|
url=submission.url,
|
|
score=submission.score,
|
|
num_comments=submission.num_comments,
|
|
created_utc=submission.created_utc,
|
|
selftext=submission.selftext,
|
|
permalink=submission.permalink,
|
|
upvote_ratio=submission.upvote_ratio,
|
|
)
|
|
return post
|
|
|
|
def process_submissions(self, submissions, update_frequency=None):
|
|
"""
|
|
Processes a stream of Reddit submissions, checking for their existence, updating analytics,
|
|
and notifying via webhook if necessary. Optionally respects an update frequency to limit updates.
|
|
|
|
Parameters:
|
|
submissions (Iterable[praw.models.Submission]): An iterable of Reddit submission objects to process.
|
|
update_frequency (int, optional): The minimum frequency in seconds to update a post's analytics.
|
|
"""
|
|
for submission in submissions:
|
|
if self.post_manager.post_exists(submission.id):
|
|
if self.post_analytics_manager.check_update_requirements(
|
|
submission.id, update_frequency
|
|
):
|
|
post = self.convert_submission_to_post(submission)
|
|
self.post_analytics_manager.update_post_analytics(post)
|
|
else:
|
|
post = self.convert_submission_to_post(submission)
|
|
self.post_manager.insert_post(post)
|
|
self.post_analytics_manager.update_post_analytics(post)
|
|
self.webhook_notifier.send_notification(post)
|