documentation and mvp of scraper
This commit is contained in:
		
							
								
								
									
										277
									
								
								scraper/app.py
									
									
									
									
									
								
							
							
						
						
									
										277
									
								
								scraper/app.py
									
									
									
									
									
								
							| @@ -9,6 +9,16 @@ from threads import Scheduler, ThreadManager | |||||||
|  |  | ||||||
|  |  | ||||||
| class ApiRequestHandler: | class ApiRequestHandler: | ||||||
|  |     """ | ||||||
|  |     Handles API requests for the application. Supports basic HTTP methods: GET, POST, PUT, DELETE. | ||||||
|  |     Utilizes the `requests` library to send requests to a specified API URL and handles | ||||||
|  |     response validation and error logging. | ||||||
|  |  | ||||||
|  |     Attributes: | ||||||
|  |         api_url (str): The base URL for the API to which requests are sent. | ||||||
|  |         log_manager (LoggingManager): Manages logging for API request operations. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|     def __init__(self, api_url: str): |     def __init__(self, api_url: str): | ||||||
|         self.api_url = api_url |         self.api_url = api_url | ||||||
|         self.log_manager = LoggingManager("scraper.log") |         self.log_manager = LoggingManager("scraper.log") | ||||||
| @@ -16,6 +26,23 @@ class ApiRequestHandler: | |||||||
|     def send_api_request( |     def send_api_request( | ||||||
|         self, method: str, api_url: str, data=None, params=None |         self, method: str, api_url: str, data=None, params=None | ||||||
|     ) -> dict: |     ) -> dict: | ||||||
|  |         """ | ||||||
|  |         Sends a request to the API using the specified HTTP method, URL, and optional data and parameters. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             method (str): The HTTP method to use for the request. Must be one of: GET, POST, PUT, DELETE. | ||||||
|  |             api_url (str): The URL endpoint to send the request to. | ||||||
|  |             data (dict, optional): The payload to send in the request body. | ||||||
|  |             params (dict, optional): The URL parameters to append to the request. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             dict: The JSON response from the API. | ||||||
|  |  | ||||||
|  |         Raises: | ||||||
|  |             InvalidMethodError: If the provided method is not supported. | ||||||
|  |             InvalidDataTypeError: If `data` or `params` is provided but is not a dictionary. | ||||||
|  |             APIRequestError: If the response from the API is not a success. | ||||||
|  |         """ | ||||||
|         if method not in ["GET", "POST", "PUT", "DELETE"]: |         if method not in ["GET", "POST", "PUT", "DELETE"]: | ||||||
|             raise InvalidMethodError(f"Invalid method: {method}") |             raise InvalidMethodError(f"Invalid method: {method}") | ||||||
|         if data is not None and not isinstance(data, dict): |         if data is not None and not isinstance(data, dict): | ||||||
| @@ -24,7 +51,11 @@ class ApiRequestHandler: | |||||||
|             raise InvalidDataTypeError( |             raise InvalidDataTypeError( | ||||||
|                 f"Invalid data type: {type(params)} expected dict" |                 f"Invalid data type: {type(params)} expected dict" | ||||||
|             ) |             ) | ||||||
|  |         try: | ||||||
|             response = requests.request(method, api_url, data=data, params=params) |             response = requests.request(method, api_url, data=data, params=params) | ||||||
|  |         except requests.RequestException as e: | ||||||
|  |             self.log_manager.error(f"API request failed: {e}") | ||||||
|  |             raise APIRequestError(0, str(e)) | ||||||
|         success_codes = [200, 201, 204] |         success_codes = [200, 201, 204] | ||||||
|         if response.status_code not in success_codes: |         if response.status_code not in success_codes: | ||||||
|             self.log_manager.error( |             self.log_manager.error( | ||||||
| @@ -35,11 +66,34 @@ class ApiRequestHandler: | |||||||
|  |  | ||||||
|  |  | ||||||
| class PostManager: | class PostManager: | ||||||
|  |     """ | ||||||
|  |     Manages operations related to posts, including retrieval and insertion of post data into a database via API requests. | ||||||
|  |     Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging operations. | ||||||
|  |  | ||||||
|  |     Attributes: | ||||||
|  |         api_request_handler (ApiRequestHandler): Handles the API requests for interacting with post data. | ||||||
|  |         log_manager (LoggingManager): Manages logging for operations performed by PostManager. | ||||||
|  |     """ | ||||||
|     def __init__(self, api_request_handler: ApiRequestHandler): |     def __init__(self, api_request_handler: ApiRequestHandler): | ||||||
|  |         """ | ||||||
|  |         Initializes the PostManager with an API request handler for making API calls and a logging manager for logging. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             api_request_handler (ApiRequestHandler): The handler for making API requests. | ||||||
|  |         """ | ||||||
|         self.api_request_handler = api_request_handler |         self.api_request_handler = api_request_handler | ||||||
|         self.log_manager = LoggingManager("scraper.log") |         self.log_manager = LoggingManager("scraper.log") | ||||||
|  |  | ||||||
|     def get_post_by_reddit_id(self, reddit_id: str) -> dict: |     def get_post_by_reddit_id(self, reddit_id: str) -> dict: | ||||||
|  |         """ | ||||||
|  |         Retrieves a post by its Reddit ID from the database through an API call. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             reddit_id (str): The Reddit ID of the post to retrieve. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             dict: The response from the API containing the post data. | ||||||
|  |         """ | ||||||
|         self.log_manager.log(f"Getting post by reddit id: {reddit_id}") |         self.log_manager.log(f"Getting post by reddit id: {reddit_id}") | ||||||
|         response = self.api_request_handler.send_api_request( |         response = self.api_request_handler.send_api_request( | ||||||
|             "GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}" |             "GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}" | ||||||
| @@ -47,6 +101,15 @@ class PostManager: | |||||||
|         return response |         return response | ||||||
|  |  | ||||||
|     def post_exists(self, reddit_id: str) -> bool: |     def post_exists(self, reddit_id: str) -> bool: | ||||||
|  |         """ | ||||||
|  |         Checks if a post with the specified Reddit ID exists in the database. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             reddit_id (str): The Reddit ID of the post to check. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             bool: True if the post exists, False otherwise. | ||||||
|  |         """ | ||||||
|         self.log_manager.log(f"Checking if post exists: {reddit_id}") |         self.log_manager.log(f"Checking if post exists: {reddit_id}") | ||||||
|         response = self.get_post_by_reddit_id(reddit_id) |         response = self.get_post_by_reddit_id(reddit_id) | ||||||
|         if len(response) == 0: |         if len(response) == 0: | ||||||
| @@ -54,16 +117,24 @@ class PostManager: | |||||||
|         return True |         return True | ||||||
|  |  | ||||||
|     def insert_post(self, post) -> dict: |     def insert_post(self, post) -> dict: | ||||||
|  |         """ | ||||||
|  |         Inserts a new post into the database through an API call. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             post (Post): The Post object containing the data to insert. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             dict: The response from the API after attempting to insert the post data. | ||||||
|  |         """ | ||||||
|         self.log_manager.log(f"Inserting post: {post.reddit_id}") |         self.log_manager.log(f"Inserting post: {post.reddit_id}") | ||||||
|         self.post = post |  | ||||||
|         data = { |         data = { | ||||||
|             "reddit_id": self.post.reddit_id, |             "reddit_id": post.reddit_id, | ||||||
|             "title": self.post.title, |             "title": post.title, | ||||||
|             "name": self.post.name, |             "name": post.name, | ||||||
|             "url": self.post.url, |             "url": post.url, | ||||||
|             "created_utc": self.post.created_utc, |             "created_utc": post.created_utc, | ||||||
|             "selftext": self.post.selftext, |             "selftext": post.selftext, | ||||||
|             "permalink": self.post.permalink, |             "permalink": post.permalink, | ||||||
|         } |         } | ||||||
|         response = self.api_request_handler.send_api_request( |         response = self.api_request_handler.send_api_request( | ||||||
|             "POST", f"{self.api_request_handler.api_url}posts/", data=data |             "POST", f"{self.api_request_handler.api_url}posts/", data=data | ||||||
| @@ -71,6 +142,12 @@ class PostManager: | |||||||
|         return response |         return response | ||||||
|  |  | ||||||
|     def get_posts_from_last_7_days(self) -> dict: |     def get_posts_from_last_7_days(self) -> dict: | ||||||
|  |         """ | ||||||
|  |         Retrieves posts from the last 7 days from the database through an API call. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             dict: The response from the API containing the posts from the last 7 days. | ||||||
|  |         """ | ||||||
|         self.log_manager.log("Getting posts from last 7 days") |         self.log_manager.log("Getting posts from last 7 days") | ||||||
|         posts_from_last_7_days = self.api_request_handler.send_api_request( |         posts_from_last_7_days = self.api_request_handler.send_api_request( | ||||||
|             "GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1" |             "GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1" | ||||||
| @@ -79,26 +156,54 @@ class PostManager: | |||||||
|  |  | ||||||
|  |  | ||||||
| class PostAnalyticsManager: | class PostAnalyticsManager: | ||||||
|  |     """ | ||||||
|  |     Manages the analytics for posts by interfacing with the API to check for update requirements | ||||||
|  |     and update post analytics. This class leverages the ApiRequestHandler for API interactions | ||||||
|  |     and the PostManager for retrieving specific post information. | ||||||
|  |  | ||||||
|  |     Attributes: | ||||||
|  |         api_request_handler (ApiRequestHandler): Handles API requests for analytics data. | ||||||
|  |         post_manager (PostManager): Manages post retrieval and existence checks. | ||||||
|  |         log_manager (LoggingManager): Manages logging for analytics operations. | ||||||
|  |     """ | ||||||
|     def __init__( |     def __init__( | ||||||
|         self, api_request_handler: ApiRequestHandler, post_manager: PostManager |         self, api_request_handler: ApiRequestHandler, post_manager: PostManager | ||||||
|     ): |     ): | ||||||
|  |         """ | ||||||
|  |         Initializes the PostAnalyticsManager with necessary handlers and managers. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             api_request_handler (ApiRequestHandler): The API request handler for making API calls. | ||||||
|  |             post_manager (PostManager): The manager for interacting with post data. | ||||||
|  |         """ | ||||||
|         self.api_request_handler = api_request_handler |         self.api_request_handler = api_request_handler | ||||||
|         self.post_manager = post_manager |         self.post_manager = post_manager | ||||||
|         self.log_manager = LoggingManager("scraper.log") |         self.log_manager = LoggingManager("scraper.log") | ||||||
|  |  | ||||||
|     def check_update_requirements(self, reddit_id: str) -> bool: |     def check_update_requirements(self, reddit_id: str, update_frequency: int) -> bool: | ||||||
|  |         """ | ||||||
|  |         Checks if the post identified by the given reddit_id meets the requirements for an update | ||||||
|  |         by analyzing the analytics data within the last x seconds (update_frequency). | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             reddit_id (str): The Reddit ID of the post to check. | ||||||
|  |             update_frequency (int): The frequency in seconds for updating post analytics. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             bool: True if the post meets update requirements, False otherwise. | ||||||
|  |         """ | ||||||
|         self.log_manager.log(f"Checking update requirements for {reddit_id}") |         self.log_manager.log(f"Checking update requirements for {reddit_id}") | ||||||
|  |  | ||||||
|         # Specify your desired timezone, e.g., UTC |         # Specify your desired timezone, e.g., UTC | ||||||
|         timezone = ZoneInfo("UTC") |         timezone = ZoneInfo("UTC") | ||||||
|  |  | ||||||
|         # Make your datetime objects timezone-aware |         # Make your datetime objects timezone-aware | ||||||
|         fifteen_minutes_ago = datetime.now(timezone) - timedelta(minutes=15) |         time_start = datetime.now(timezone) - timedelta(seconds=update_frequency) | ||||||
|         now = datetime.now(timezone) |         now = datetime.now(timezone) | ||||||
|  |  | ||||||
|         # Format datetime objects for the API request |         # Format datetime objects for the API request | ||||||
|         time_begin_str = fifteen_minutes_ago.isoformat(timespec="seconds") |         time_begin_str = time_start.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' | ||||||
|         time_end_str = now.isoformat(timespec="seconds") |         time_end_str = now.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' | ||||||
|  |  | ||||||
|         post_id = self.post_manager.get_post_by_reddit_id(reddit_id) |         post_id = self.post_manager.get_post_by_reddit_id(reddit_id) | ||||||
|         post_id = post_id[0]["id"] |         post_id = post_id[0]["id"] | ||||||
| @@ -119,6 +224,16 @@ class PostAnalyticsManager: | |||||||
|         return True |         return True | ||||||
|  |  | ||||||
|     def update_post_analytics(self, post: Post) -> dict: |     def update_post_analytics(self, post: Post) -> dict: | ||||||
|  |         """ | ||||||
|  |         Updates the analytics for a given post with new data such as score, number of comments, | ||||||
|  |         and upvote ratio. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             post (Post): The post object containing the new analytics data. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             dict: The response from the API after updating the post's analytics. | ||||||
|  |         """ | ||||||
|         self.log_manager.log(f"Updating post analytics for {post.reddit_id}") |         self.log_manager.log(f"Updating post analytics for {post.reddit_id}") | ||||||
|         post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id) |         post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id) | ||||||
|         post_id = post_id[0]["id"] |         post_id = post_id[0]["id"] | ||||||
| @@ -135,9 +250,29 @@ class PostAnalyticsManager: | |||||||
|  |  | ||||||
|  |  | ||||||
| class RedditMonitor: | class RedditMonitor: | ||||||
|  |     """ | ||||||
|  |     Monitors Reddit submissions for a specific subreddit, streaming new submissions and | ||||||
|  |     updating existing ones. Utilizes PRAW (Python Reddit API Wrapper) to interact with Reddit's API. | ||||||
|  |  | ||||||
|  |     Attributes: | ||||||
|  |         reddit (praw.Reddit): An instance of the PRAW Reddit class for API interactions. | ||||||
|  |         subreddit (praw.models.Subreddit): The subreddit object for the specified subreddit. | ||||||
|  |         log_manager (LoggingManager): Manages logging for Reddit monitoring operations. | ||||||
|  |     """ | ||||||
|     def __init__( |     def __init__( | ||||||
|         self, client_id, client_secret, user_agent, username, password, subreddit_name |         self, client_id, client_secret, user_agent, username, password, subreddit_name | ||||||
|     ): |     ): | ||||||
|  |         """ | ||||||
|  |         Initializes the RedditMonitor with credentials for Reddit API access and the target subreddit. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             client_id (str): The client ID for the Reddit API application. | ||||||
|  |             client_secret (str): The client secret for the Reddit API application. | ||||||
|  |             user_agent (str): The user agent string identifying the application to Reddit. | ||||||
|  |             username (str): The Reddit account username for authentication. | ||||||
|  |             password (str): The Reddit account password for authentication. | ||||||
|  |             subreddit_name (str): The name of the subreddit to monitor. | ||||||
|  |         """ | ||||||
|         self.reddit = praw.Reddit( |         self.reddit = praw.Reddit( | ||||||
|             client_id=client_id, |             client_id=client_id, | ||||||
|             client_secret=client_secret, |             client_secret=client_secret, | ||||||
| @@ -149,11 +284,28 @@ class RedditMonitor: | |||||||
|         self.log_manager = LoggingManager("scraper.log") |         self.log_manager = LoggingManager("scraper.log") | ||||||
|  |  | ||||||
|     def stream_submissions(self): |     def stream_submissions(self): | ||||||
|  |         """ | ||||||
|  |         Streams new submissions from the specified subreddit, yielding each submission | ||||||
|  |         as it becomes available. | ||||||
|  |  | ||||||
|  |         Yields: | ||||||
|  |             praw.models.Submission: A submission object representing a Reddit post. | ||||||
|  |         """ | ||||||
|         self.log_manager.info("Starting submission stream") |         self.log_manager.info("Starting submission stream") | ||||||
|         for submission in self.subreddit.stream.submissions(): |         for submission in self.subreddit.stream.submissions(): | ||||||
|             yield submission |             yield submission | ||||||
|  |  | ||||||
|     def update_submissions(self, posts_to_update): |     def update_submissions(self, posts_to_update): | ||||||
|  |         """ | ||||||
|  |         Retrieves and yields submissions corresponding to a list of posts that need to be updated, | ||||||
|  |         identified by their Reddit IDs. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             posts_to_update (list of dict): A list of dictionaries, each containing the 'reddit_id' of a post to update. | ||||||
|  |  | ||||||
|  |         Yields: | ||||||
|  |             praw.models.Submission: A submission object for each post that needs to be updated. | ||||||
|  |         """ | ||||||
|         self.log_manager.info("Updating submissions") |         self.log_manager.info("Updating submissions") | ||||||
|         for post in posts_to_update: |         for post in posts_to_update: | ||||||
|             submission = self.reddit.submission(id=post["reddit_id"]) |             submission = self.reddit.submission(id=post["reddit_id"]) | ||||||
| @@ -161,6 +313,18 @@ class RedditMonitor: | |||||||
|  |  | ||||||
|  |  | ||||||
| class SubmissionManager: | class SubmissionManager: | ||||||
|  |     """ | ||||||
|  |     Manages the processing of Reddit submissions, including conversion to post objects, | ||||||
|  |     checking for updates, and notifying via webhook. It integrates closely with RedditMonitor, | ||||||
|  |     PostManager, and PostAnalyticsManager to streamline the handling of new and existing submissions. | ||||||
|  |  | ||||||
|  |     Attributes: | ||||||
|  |         reddit_monitor (RedditMonitor): Monitors and streams Reddit submissions. | ||||||
|  |         post_manager (PostManager): Manages post data interactions. | ||||||
|  |         post_analytics_manager (PostAnalyticsManager): Manages post analytics data. | ||||||
|  |         webhook_notifier (WebhookNotifier): Handles notifications for new or updated posts. | ||||||
|  |         log_manager (LoggingManager): Manages logging for submission processing operations. | ||||||
|  |     """ | ||||||
|     def __init__( |     def __init__( | ||||||
|         self, |         self, | ||||||
|         reddit_monitor: RedditMonitor, |         reddit_monitor: RedditMonitor, | ||||||
| @@ -168,6 +332,15 @@ class SubmissionManager: | |||||||
|         post_analytics_manager: PostAnalyticsManager, |         post_analytics_manager: PostAnalyticsManager, | ||||||
|         WebhookNotifier, |         WebhookNotifier, | ||||||
|     ): |     ): | ||||||
|  |         """ | ||||||
|  |         Initializes the SubmissionManager with necessary components for processing submissions. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions. | ||||||
|  |             post_manager (PostManager): The component for managing post data. | ||||||
|  |             post_analytics_manager (PostAnalyticsManager): The component for managing post analytics. | ||||||
|  |             WebhookNotifier: The component for sending notifications about posts. | ||||||
|  |         """ | ||||||
|         self.reddit_monitor = reddit_monitor |         self.reddit_monitor = reddit_monitor | ||||||
|         self.post_manager = post_manager |         self.post_manager = post_manager | ||||||
|         self.post_analytics_manager = post_analytics_manager |         self.post_analytics_manager = post_analytics_manager | ||||||
| @@ -175,6 +348,16 @@ class SubmissionManager: | |||||||
|         self.log_manager = LoggingManager("scraper.log") |         self.log_manager = LoggingManager("scraper.log") | ||||||
|  |  | ||||||
|     def convert_submission_to_post(self, submission): |     def convert_submission_to_post(self, submission): | ||||||
|  |         """ | ||||||
|  |         Converts a Reddit submission object into a Post object suitable for database insertion | ||||||
|  |         or analytics processing. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             submission (praw.models.Submission): The Reddit submission to convert. | ||||||
|  |  | ||||||
|  |         Returns: | ||||||
|  |             Post: A Post object populated with data from the Reddit submission. | ||||||
|  |         """ | ||||||
|         post = Post( |         post = Post( | ||||||
|             reddit_id=submission.id, |             reddit_id=submission.id, | ||||||
|             title=submission.title, |             title=submission.title, | ||||||
| @@ -189,13 +372,21 @@ class SubmissionManager: | |||||||
|         ) |         ) | ||||||
|         return post |         return post | ||||||
|  |  | ||||||
|     def process_submissions(self, submissions): |     def process_submissions(self, submissions, update_frequency=None): | ||||||
|  |         """ | ||||||
|  |         Processes a stream of Reddit submissions, checking for their existence, updating analytics, | ||||||
|  |         and notifying via webhook if necessary. Optionally respects an update frequency to limit updates. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             submissions (Iterable[praw.models.Submission]): An iterable of Reddit submission objects to process. | ||||||
|  |             update_frequency (int, optional): The minimum frequency in seconds to update a post's analytics. | ||||||
|  |         """ | ||||||
|         for submission in submissions: |         for submission in submissions: | ||||||
|             self.log_manager.log(submission) |             self.log_manager.log(submission) | ||||||
|             if self.post_manager.post_exists(submission.id): |             if self.post_manager.post_exists(submission.id): | ||||||
|                 self.log_manager.log("Post exists") |                 self.log_manager.log("Post exists") | ||||||
|                 self.log_manager.log(f"post id: {submission.id}") |                 self.log_manager.log(f"post id: {submission.id}") | ||||||
|                 if self.post_analytics_manager.check_update_requirements(submission.id): |                 if self.post_analytics_manager.check_update_requirements(submission.id, update_frequency): | ||||||
|                     self.log_manager.log("Update requirements met") |                     self.log_manager.log("Update requirements met") | ||||||
|                     post = self.convert_submission_to_post(submission) |                     post = self.convert_submission_to_post(submission) | ||||||
|                     self.post_analytics_manager.update_post_analytics(post) |                     self.post_analytics_manager.update_post_analytics(post) | ||||||
| @@ -207,6 +398,23 @@ class SubmissionManager: | |||||||
|  |  | ||||||
|  |  | ||||||
| class Application: | class Application: | ||||||
|  |     """ | ||||||
|  |     Orchestrates the main application flow, including starting the submission stream, | ||||||
|  |     managing periodic updates of post analytics, and initializing all necessary components | ||||||
|  |     for the application to function. | ||||||
|  |  | ||||||
|  |     Attributes: | ||||||
|  |         reddit_monitor (RedditMonitor): Monitors Reddit for new or updated submissions. | ||||||
|  |         webhook_notifier: Notifies external services via webhooks when certain actions occur. | ||||||
|  |         api_conn: Manages API connections and requests. | ||||||
|  |         post_manager (PostManager): Manages CRUD operations for posts. | ||||||
|  |         post_analytics_manager (PostAnalyticsManager): Manages analytics for posts. | ||||||
|  |         submission_manager (SubmissionManager): Manages the processing of Reddit submissions. | ||||||
|  |         log_manager (LoggingManager): Centralized logging for the application. | ||||||
|  |         scheduler: Manages the scheduling of periodic updates. | ||||||
|  |         thread_manager: Manages threading for asynchronous operations. | ||||||
|  |         update_frequency (int): The frequency, in seconds, at which post analytics should be updated. | ||||||
|  |     """ | ||||||
|     def __init__( |     def __init__( | ||||||
|         self, |         self, | ||||||
|         reddit_monitor, |         reddit_monitor, | ||||||
| @@ -215,7 +423,20 @@ class Application: | |||||||
|         post_manager, |         post_manager, | ||||||
|         post_analytics_manager, |         post_analytics_manager, | ||||||
|         submission_manager, |         submission_manager, | ||||||
|  |         update_frequency | ||||||
|     ): |     ): | ||||||
|  |         """ | ||||||
|  |         Initializes the application with all necessary components. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions. | ||||||
|  |             webhook_notifier: The notifier for sending updates via webhooks. | ||||||
|  |             api_conn: The API connection manager. | ||||||
|  |             post_manager (PostManager): The manager for post operations. | ||||||
|  |             post_analytics_manager (PostAnalyticsManager): The manager for post analytics operations. | ||||||
|  |             submission_manager (SubmissionManager): The manager for processing Reddit submissions. | ||||||
|  |             update_frequency (int): The frequency, in seconds, at which to perform updates. | ||||||
|  |         """ | ||||||
|         self.reddit_monitor = reddit_monitor |         self.reddit_monitor = reddit_monitor | ||||||
|         self.webhook_notifier = webhook_notifier |         self.webhook_notifier = webhook_notifier | ||||||
|         self.api_conn = api_conn |         self.api_conn = api_conn | ||||||
| @@ -225,23 +446,37 @@ class Application: | |||||||
|         self.submission_manager = submission_manager |         self.submission_manager = submission_manager | ||||||
|         self.scheduler = None |         self.scheduler = None | ||||||
|         self.thread_manager = None |         self.thread_manager = None | ||||||
|  |         # how often should post analytics be updated (call for update and database update are separate) | ||||||
|  |         self.update_frequency = update_frequency | ||||||
|  |  | ||||||
|     def periodic_update(self): |     def periodic_update(self, update_frequency): | ||||||
|  |         """ | ||||||
|  |         Executes periodic updates for post analytics based on a predefined frequency. | ||||||
|  |  | ||||||
|  |         Parameters: | ||||||
|  |             update_frequency (int): The frequency, in seconds, at which to perform updates. | ||||||
|  |         """ | ||||||
|         self.log_manager.info("Running periodic update") |         self.log_manager.info("Running periodic update") | ||||||
|         to_be_updated = self.post_manager.get_posts_from_last_7_days() |         to_be_updated = self.post_manager.get_posts_from_last_7_days() | ||||||
|         submissions = self.reddit_monitor.update_submissions(to_be_updated) |         submissions = self.reddit_monitor.update_submissions(to_be_updated) | ||||||
|         self.submission_manager.process_submissions(submissions) |         self.submission_manager.process_submissions(submissions, update_frequency) | ||||||
|  |  | ||||||
|     def run_periodic_update(self, interval): |     def run_periodic_update(self): | ||||||
|         self.scheduler = Scheduler(interval, self.periodic_update) |         """ | ||||||
|  |         Initializes and runs the scheduler for periodic updates. | ||||||
|  |         """ | ||||||
|  |         self.scheduler = Scheduler(self.update_frequency, lambda: self.periodic_update(self.update_frequency)) | ||||||
|         self.scheduler.run() |         self.scheduler.run() | ||||||
|  |  | ||||||
|     def run(self): |     def run(self): | ||||||
|  |         """ | ||||||
|  |         Starts the main application process, including streaming submissions, running periodic updates, | ||||||
|  |         and processing submissions. | ||||||
|  |         """ | ||||||
|         self.log_manager.info("Application started") |         self.log_manager.info("Application started") | ||||||
|         update_frequency = 60 * 15  # 15 minutes in seconds |  | ||||||
|         self.thread_manager = ThreadManager( |         self.thread_manager = ThreadManager( | ||||||
|             target=self.run_periodic_update, args=(update_frequency,) |             target=self.run_periodic_update, args=() | ||||||
|         ) |         ) | ||||||
|         self.thread_manager.run() |         self.thread_manager.run() | ||||||
|         submissions = self.reddit_monitor.stream_submissions() |         submissions = self.reddit_monitor.stream_submissions() | ||||||
|         self.submission_manager.process_submissions(submissions) |         self.submission_manager.process_submissions(submissions, self.update_frequency) | ||||||
|   | |||||||
| @@ -12,3 +12,4 @@ class Config: | |||||||
|     USER_AGENT = "praw:zman.video_repost_bot:v0.1.0 (by u/jzman21)" |     USER_AGENT = "praw:zman.video_repost_bot:v0.1.0 (by u/jzman21)" | ||||||
|     DISABLE_WEBHOOK = False |     DISABLE_WEBHOOK = False | ||||||
|     API_URL = "http://server:8000/api/" |     API_URL = "http://server:8000/api/" | ||||||
|  |     UPDATE_FREQUENCY = 60 * 15 # 15 minutes | ||||||
|   | |||||||
| @@ -23,6 +23,7 @@ if __name__ == "__main__": | |||||||
|     disable_webhook = Config.DISABLE_WEBHOOK |     disable_webhook = Config.DISABLE_WEBHOOK | ||||||
|     pkmn_env = Config.PKMN_ENV |     pkmn_env = Config.PKMN_ENV | ||||||
|     api_url = Config.API_URL |     api_url = Config.API_URL | ||||||
|  |     update_frequency = Config.UPDATE_FREQUENCY | ||||||
|  |  | ||||||
|     reddit_monitor = RedditMonitor( |     reddit_monitor = RedditMonitor( | ||||||
|         client_id, client_secret, user_agent, username, password, subreddit_name |         client_id, client_secret, user_agent, username, password, subreddit_name | ||||||
| @@ -41,6 +42,7 @@ if __name__ == "__main__": | |||||||
|         post_manager, |         post_manager, | ||||||
|         post_analytics_manager, |         post_analytics_manager, | ||||||
|         submission_manager, |         submission_manager, | ||||||
|  |         update_frequency | ||||||
|     ) |     ) | ||||||
|     app.run() |     app.run() | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| # Generated by Django 5.0.2 on 2024-03-04 05:15 | # Generated by Django 5.0.2 on 2024-03-04 16:07 | ||||||
|  |  | ||||||
| import django.db.models.deletion | import django.db.models.deletion | ||||||
| from django.db import migrations, models | from django.db import migrations, models | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user