pokemans/scraper/app.py
2024-03-05 17:03:40 -05:00

104 lines
4.7 KiB
Python

from app_log import LoggingManager
from threads import Scheduler
from costco import CostcoMonitor
class Application:
"""
Orchestrates the main application flow, including starting the submission stream,
managing periodic updates of post analytics, and initializing all necessary components
for the application to function.
Attributes:
reddit_monitor (RedditMonitor): Monitors Reddit for new or updated submissions.
webhook_notifier: Notifies external services via webhooks when certain actions occur.
api_conn: Manages API connections and requests.
post_manager (PostManager): Manages CRUD operations for posts.
post_analytics_manager (PostAnalyticsManager): Manages analytics for posts.
submission_manager (SubmissionManager): Manages the processing of Reddit submissions.
log_manager (LoggingManager): Centralized logging for the application.
scheduler: Manages the scheduling of periodic updates.
thread_manager: Manages threading for asynchronous operations.
update_frequency (int): The frequency, in seconds, at which post analytics should be updated.
"""
def __init__(
self,
reddit_monitor,
webhook_notifier,
api_conn,
post_manager,
post_analytics_manager,
submission_manager,
):
"""
Initializes the application with all necessary components.
Parameters:
reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions.
webhook_notifier: The notifier for sending updates via webhooks.
api_conn: The API connection manager.
post_manager (PostManager): The manager for post operations.
post_analytics_manager (PostAnalyticsManager): The manager for post analytics operations.
submission_manager (SubmissionManager): The manager for processing Reddit submissions.
update_frequency (int): The frequency, in seconds, at which to perform updates.
"""
self.reddit_monitor = reddit_monitor
self.webhook_notifier = webhook_notifier
self.api_conn = api_conn
self.post_manager = post_manager
self.post_analytics_manager = post_analytics_manager
self.log_manager = LoggingManager("scraper.log")
self.submission_manager = submission_manager
self.scheduler = Scheduler()
# how often should post analytics be updated (call for update and database update are separate)
self.update_analytics_frequency = 60 * 15
self.scrape_costco_frequency = 60 * 60
def update_analytics(self):
"""
Executes periodic updates for post analytics based on the predefined frequency.
"""
self.log_manager.info("Running periodic analytics update")
to_be_updated = self.post_manager.get_posts_from_last_7_days()
submissions = self.reddit_monitor.update_submissions(to_be_updated)
self.submission_manager.process_submissions(submissions, self.update_analytics_frequency)
def scrape_costco(self):
"""
Executes periodic updates for costco products based on the predefined frequency.
"""
self.log_manager.info("Running periodic costco scrape")
costco_monitor = CostcoMonitor("https://www.costco.com/CatalogSearch?dept=All&keyword=pokemon")
products = costco_monitor.get_products()
costco_monitor.close()
self.log_manager.info(f"Found {len(products)} products on the page")
self.log_manager.info(products)
self.webhook_notifier.costco_notification(products)
def add_scheduler_task(self, name, task, interval):
"""
Registers a task with the scheduler to be run at a specified interval.
Parameters:
name (str): Name of the task.
task (callable): The task function to be executed.
interval (int): The frequency in seconds at which the task should be executed.
"""
self.scheduler.add_task(name, task, interval)
def run(self):
"""
Starts the main application process, including streaming submissions, running periodic updates,
and processing submissions.
"""
self.log_manager.info("Application started")
# tasks
self.add_scheduler_task("update_analytics", self.update_analytics, self.update_analytics_frequency)
self.add_scheduler_task("scrape_costco", self.scrape_costco, self.scrape_costco_frequency)
# Stream submissions and process them
submissions = self.reddit_monitor.stream_submissions()
self.submission_manager.process_submissions(submissions, self.update_analytics_frequency)