import praw from .app_log import LoggingManager from .models import Post from .api import PostManager, PostAnalyticsManager from .webhook import WebhookNotifier class RedditMonitor: """ Monitors Reddit submissions for a specific subreddit, streaming new submissions and updating existing ones. Utilizes PRAW (Python Reddit API Wrapper) to interact with Reddit's API. Attributes: reddit (praw.Reddit): An instance of the PRAW Reddit class for API interactions. subreddit (praw.models.Subreddit): The subreddit object for the specified subreddit. log_manager (LoggingManager): Manages logging for Reddit monitoring operations. """ def __init__( self, client_id, client_secret, user_agent, username, password, subreddit_name ): """ Initializes the RedditMonitor with credentials for Reddit API access and the target subreddit. Parameters: client_id (str): The client ID for the Reddit API application. client_secret (str): The client secret for the Reddit API application. user_agent (str): The user agent string identifying the application to Reddit. username (str): The Reddit account username for authentication. password (str): The Reddit account password for authentication. subreddit_name (str): The name of the subreddit to monitor. """ self.reddit = praw.Reddit( client_id=client_id, client_secret=client_secret, user_agent=user_agent, username=username, password=password, ) self.subreddit = self.reddit.subreddit(subreddit_name) self.log_manager = LoggingManager("scraper.log") def stream_submissions(self): """ Streams new submissions from the specified subreddit, yielding each submission as it becomes available. Yields: praw.models.Submission: A submission object representing a Reddit post. """ self.log_manager.info("Starting submission stream") for submission in self.subreddit.stream.submissions(): yield submission def update_submissions(self, posts_to_update): """ Retrieves and yields submissions corresponding to a list of posts that need to be updated, identified by their Reddit IDs. Parameters: posts_to_update (list of dict): A list of dictionaries, each containing the 'reddit_id' of a post to update. Yields: praw.models.Submission: A submission object for each post that needs to be updated. """ self.log_manager.info("Updating submissions") for post in posts_to_update: submission = self.reddit.submission(id=post["reddit_id"]) yield submission class SubmissionManager: """ Manages the processing of Reddit submissions, including conversion to post objects, checking for updates, and notifying via webhook. It integrates closely with RedditMonitor, PostManager, and PostAnalyticsManager to streamline the handling of new and existing submissions. Attributes: reddit_monitor (RedditMonitor): Monitors and streams Reddit submissions. post_manager (PostManager): Manages post data interactions. post_analytics_manager (PostAnalyticsManager): Manages post analytics data. webhook_notifier (WebhookNotifier): Handles notifications for new or updated posts. log_manager (LoggingManager): Manages logging for submission processing operations. """ def __init__( self, reddit_monitor: RedditMonitor, post_manager: PostManager, post_analytics_manager: PostAnalyticsManager, webhook_notifier: WebhookNotifier, ): """ Initializes the SubmissionManager with necessary components for processing submissions. Parameters: reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions. post_manager (PostManager): The component for managing post data. post_analytics_manager (PostAnalyticsManager): The component for managing post analytics. WebhookNotifier: The component for sending notifications about posts. """ self.reddit_monitor = reddit_monitor self.post_manager = post_manager self.post_analytics_manager = post_analytics_manager self.webhook_notifier = webhook_notifier self.log_manager = LoggingManager("scraper.log") def convert_submission_to_post(self, submission): """ Converts a Reddit submission object into a Post object suitable for database insertion or analytics processing. Parameters: submission (praw.models.Submission): The Reddit submission to convert. Returns: Post: A Post object populated with data from the Reddit submission. """ post = Post( reddit_id=submission.id, title=submission.title, name=submission.name, url=submission.url, score=submission.score, num_comments=submission.num_comments, created_utc=submission.created_utc, selftext=submission.selftext, permalink=submission.permalink, upvote_ratio=submission.upvote_ratio, ) return post def process_submissions(self, submissions, update_frequency=None): """ Processes a stream of Reddit submissions, checking for their existence, updating analytics, and notifying via webhook if necessary. Optionally respects an update frequency to limit updates. Parameters: submissions (Iterable[praw.models.Submission]): An iterable of Reddit submission objects to process. update_frequency (int, optional): The minimum frequency in seconds to update a post's analytics. """ for submission in submissions: if self.post_manager.post_exists(submission.id): if self.post_analytics_manager.check_update_requirements( submission.id, update_frequency ): post = self.convert_submission_to_post(submission) self.post_analytics_manager.update_post_analytics(post) else: post = self.convert_submission_to_post(submission) self.post_manager.insert_post(post) self.post_analytics_manager.update_post_analytics(post) self.webhook_notifier.send_notification(post)