350 lines
13 KiB
Python
350 lines
13 KiB
Python
"""
|
|
Interacts with the API to handle requests for post and product data.
|
|
Utilizes the `requests` library to send requests
|
|
"""
|
|
|
|
from datetime import datetime, timedelta
|
|
from zoneinfo import ZoneInfo
|
|
import requests
|
|
from exceptions import APIRequestError, InvalidDataTypeError, InvalidMethodError
|
|
from app_log import LoggingManager
|
|
from models import Post
|
|
|
|
|
|
class ApiRequestHandler:
|
|
"""
|
|
Handles API requests for the application. Supports basic HTTP methods: GET, POST, PUT, DELETE.
|
|
Utilizes the `requests` library to send requests to a specified API URL and handles
|
|
response validation and error logging.
|
|
|
|
Attributes:
|
|
api_url (str): The base URL for the API to which requests are sent.
|
|
log_manager (LoggingManager): Manages logging for API request operations.
|
|
"""
|
|
|
|
def __init__(self, api_url: str):
|
|
self.api_url = api_url
|
|
self.log_manager = LoggingManager("scraper.log")
|
|
|
|
def send_api_request(
|
|
self, method: str, api_url: str, data=None, params=None
|
|
) -> dict:
|
|
"""
|
|
Sends a request to the API using the specified HTTP method, URL, and optional data and parameters.
|
|
|
|
Parameters:
|
|
method (str): The HTTP method to use for the request. Must be one of: GET, POST, PUT, DELETE.
|
|
api_url (str): The URL endpoint to send the request to.
|
|
data (dict, optional): The payload to send in the request body.
|
|
params (dict, optional): The URL parameters to append to the request.
|
|
|
|
Returns:
|
|
dict: The JSON response from the API.
|
|
|
|
Raises:
|
|
InvalidMethodError: If the provided method is not supported.
|
|
InvalidDataTypeError: If `data` or `params` is provided but is not a dictionary.
|
|
APIRequestError: If the response from the API is not a success.
|
|
"""
|
|
if method not in ["GET", "POST", "PUT", "DELETE"]:
|
|
raise InvalidMethodError(f"Invalid method: {method}")
|
|
if data is not None and not isinstance(data, dict):
|
|
raise InvalidDataTypeError(f"Invalid data type: {type(data)} expected dict")
|
|
if params is not None and not isinstance(params, dict):
|
|
raise InvalidDataTypeError(
|
|
f"Invalid data type: {type(params)} expected dict"
|
|
)
|
|
try:
|
|
response = requests.request(
|
|
method, api_url, data=data, params=params, timeout=10
|
|
)
|
|
except requests.RequestException as e:
|
|
self.log_manager.error(f"API request failed: {e}")
|
|
raise APIRequestError(0, str(e)) from e
|
|
try:
|
|
response.raise_for_status()
|
|
except requests.HTTPError as e:
|
|
self.log_manager.error(f"API request failed: {e}")
|
|
raise APIRequestError(response.status_code, response.text) from e
|
|
return response.json()
|
|
|
|
|
|
class PostManager:
|
|
"""
|
|
Manages operations related to posts, including retrieval and insertion of post data into a database via API requests.
|
|
Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging operations.
|
|
|
|
Attributes:
|
|
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with post data.
|
|
log_manager (LoggingManager): Manages logging for operations performed by PostManager.
|
|
"""
|
|
|
|
def __init__(self, api_request_handler: ApiRequestHandler):
|
|
"""
|
|
Initializes the PostManager with an API request handler for making API calls and a logging manager for logging.
|
|
|
|
Parameters:
|
|
api_request_handler (ApiRequestHandler): The handler for making API requests.
|
|
"""
|
|
self.api_request_handler = api_request_handler
|
|
self.log_manager = LoggingManager("scraper.log")
|
|
|
|
def get_post_by_reddit_id(self, reddit_id: str) -> dict:
|
|
"""
|
|
Retrieves a post by its Reddit ID from the database through an API call.
|
|
|
|
Parameters:
|
|
reddit_id (str): The Reddit ID of the post to retrieve.
|
|
|
|
Returns:
|
|
dict: The response from the API containing the post data.
|
|
"""
|
|
response = self.api_request_handler.send_api_request(
|
|
"GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}"
|
|
)
|
|
return response
|
|
|
|
def post_exists(self, reddit_id: str) -> bool:
|
|
"""
|
|
Checks if a post with the specified Reddit ID exists in the database.
|
|
|
|
Parameters:
|
|
reddit_id (str): The Reddit ID of the post to check.
|
|
|
|
Returns:
|
|
bool: True if the post exists, False otherwise.
|
|
"""
|
|
response = self.get_post_by_reddit_id(reddit_id)
|
|
if len(response) == 0:
|
|
return False
|
|
return True
|
|
|
|
def insert_post(self, post) -> dict:
|
|
"""
|
|
Inserts a new post into the database through an API call.
|
|
|
|
Parameters:
|
|
post (Post): The Post object containing the data to insert.
|
|
|
|
Returns:
|
|
dict: The response from the API after attempting to insert the post data.
|
|
"""
|
|
data = {
|
|
"reddit_id": post.reddit_id,
|
|
"title": post.title,
|
|
"name": post.name,
|
|
"url": post.url,
|
|
"created_utc": post.created_utc,
|
|
"selftext": post.selftext,
|
|
"permalink": post.permalink,
|
|
}
|
|
response = self.api_request_handler.send_api_request(
|
|
"POST", f"{self.api_request_handler.api_url}posts/", data=data
|
|
)
|
|
return response
|
|
|
|
def get_posts_from_last_7_days(self) -> dict:
|
|
"""
|
|
Retrieves posts from the last 7 days from the database through an API call.
|
|
|
|
Returns:
|
|
dict: The response from the API containing the posts from the last 7 days.
|
|
"""
|
|
self.log_manager.log("Getting posts from last 7 days")
|
|
posts_from_last_7_days = self.api_request_handler.send_api_request(
|
|
"GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1"
|
|
)
|
|
return posts_from_last_7_days
|
|
|
|
|
|
class PostAnalyticsManager:
|
|
"""
|
|
Manages the analytics for posts by interfacing with the API to check for update requirements
|
|
and update post analytics. This class leverages the ApiRequestHandler for API interactions
|
|
and the PostManager for retrieving specific post information.
|
|
|
|
Attributes:
|
|
api_request_handler (ApiRequestHandler): Handles API requests for analytics data.
|
|
post_manager (PostManager): Manages post retrieval and existence checks.
|
|
log_manager (LoggingManager): Manages logging for analytics operations.
|
|
"""
|
|
|
|
def __init__(
|
|
self, api_request_handler: ApiRequestHandler, post_manager: PostManager
|
|
):
|
|
"""
|
|
Initializes the PostAnalyticsManager with necessary handlers and managers.
|
|
|
|
Parameters:
|
|
api_request_handler (ApiRequestHandler): The API request handler for making API calls.
|
|
post_manager (PostManager): The manager for interacting with post data.
|
|
"""
|
|
self.api_request_handler = api_request_handler
|
|
self.post_manager = post_manager
|
|
self.log_manager = LoggingManager("scraper.log")
|
|
|
|
def check_update_requirements(self, reddit_id: str, update_frequency: int) -> bool:
|
|
"""
|
|
Checks if the post identified by the given reddit_id meets the requirements for an update
|
|
by analyzing the analytics data within the last x seconds (update_frequency).
|
|
|
|
Parameters:
|
|
reddit_id (str): The Reddit ID of the post to check.
|
|
update_frequency (int): The frequency in seconds for updating post analytics.
|
|
|
|
Returns:
|
|
bool: True if the post meets update requirements, False otherwise.
|
|
"""
|
|
|
|
# Specify your desired timezone, e.g., UTC
|
|
timezone = ZoneInfo("UTC")
|
|
|
|
# Make your datetime objects timezone-aware
|
|
time_start = datetime.now(timezone) - timedelta(seconds=update_frequency)
|
|
now = datetime.now(timezone)
|
|
|
|
# Format datetime objects for the API request
|
|
time_begin_str = time_start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
|
time_end_str = now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
|
|
|
post_id = self.post_manager.get_post_by_reddit_id(reddit_id)
|
|
post_id = post_id[0]["id"]
|
|
|
|
response = self.api_request_handler.send_api_request(
|
|
"GET",
|
|
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}",
|
|
)
|
|
|
|
if len(response) > 0:
|
|
# post should not be updated
|
|
return False
|
|
|
|
# post should be updated
|
|
return True
|
|
|
|
def update_post_analytics(self, post: Post) -> dict:
|
|
"""
|
|
Updates the analytics for a given post with new data such as score, number of comments,
|
|
and upvote ratio.
|
|
|
|
Parameters:
|
|
post (Post): The post object containing the new analytics data.
|
|
|
|
Returns:
|
|
dict: The response from the API after updating the post's analytics.
|
|
"""
|
|
post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id)
|
|
post_id = post_id[0]["id"]
|
|
data = {
|
|
"post": post_id,
|
|
"score": post.score,
|
|
"num_comments": post.num_comments,
|
|
"upvote_ratio": post.upvote_ratio,
|
|
}
|
|
response = self.api_request_handler.send_api_request(
|
|
"POST", f"{self.api_request_handler.api_url}post_analytics/", data=data
|
|
)
|
|
return response
|
|
|
|
|
|
class CostcoProductManager:
|
|
"""
|
|
Manages operations related to Costco products, including retrieval and insertion of product data into a database
|
|
via API requests. Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging
|
|
operations.
|
|
|
|
Attributes:
|
|
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with Costco product data.
|
|
log_manager (LoggingManager): Manages logging for operations performed by CostcoProductManager.
|
|
"""
|
|
|
|
def __init__(self, api_request_handler: ApiRequestHandler):
|
|
"""
|
|
Initializes the CostcoProductManager with an API request handler for making API calls and a logging manager
|
|
for logging.
|
|
|
|
Parameters:
|
|
api_request_handler (ApiRequestHandler): The handler for making API requests.
|
|
"""
|
|
self.api_request_handler = api_request_handler
|
|
self.log_manager = LoggingManager("scraper.log")
|
|
|
|
def get_all_costco_products(self) -> list:
|
|
"""
|
|
Retrieves all Costco products from the database through an API call.
|
|
|
|
Returns:
|
|
dict: The response from the API containing all Costco products.
|
|
"""
|
|
self.log_manager.log("Getting all Costco products")
|
|
all_products = self.api_request_handler.send_api_request(
|
|
"GET", f"{self.api_request_handler.api_url}costco_products/"
|
|
)
|
|
return all_products
|
|
|
|
def insert_costco_product(self, product) -> dict:
|
|
"""
|
|
Inserts a new Costco product into the database through an API call.
|
|
|
|
Parameters:
|
|
product (CostcoProduct): The CostcoProduct object containing the data to insert.
|
|
|
|
Returns:
|
|
dict: The response from the API after attempting to insert the product data.
|
|
"""
|
|
self.log_manager.log(f"Inserting Costco product: {product.sku}")
|
|
data = {
|
|
"sku": product.sku,
|
|
"name": product.name,
|
|
"price": product.price,
|
|
"img_url": product.img_url,
|
|
"product_link": product.product_link,
|
|
"active": product.active,
|
|
}
|
|
response = self.api_request_handler.send_api_request(
|
|
"POST", f"{self.api_request_handler.api_url}costco_products/", data=data
|
|
)
|
|
return response
|
|
|
|
def update_costco_product(self, product) -> dict:
|
|
"""
|
|
Updates an existing Costco product in the database through an API call.
|
|
|
|
Parameters:
|
|
product (CostcoProduct): The CostcoProduct object containing the updated data.
|
|
|
|
Returns:
|
|
dict: The response from the API after attempting to update the product data.
|
|
"""
|
|
self.log_manager.log(f"Updating Costco product: {product.sku}")
|
|
data = {
|
|
"sku": product.sku,
|
|
"name": product.name,
|
|
"price": product.price,
|
|
"img_url": product.img_url,
|
|
"product_link": product.product_link,
|
|
"active": product.active,
|
|
}
|
|
response = self.api_request_handler.send_api_request(
|
|
"PUT",
|
|
f"{self.api_request_handler.api_url}costco_products/{product.sku}/",
|
|
data=data,
|
|
)
|
|
return response
|
|
|
|
def get_costco_product_by_sku(self, sku: str) -> dict:
|
|
"""
|
|
Retrieves a Costco product by its SKU from the database through an API call.
|
|
|
|
Parameters:
|
|
sku (str): The SKU of the product to retrieve.
|
|
|
|
Returns:
|
|
dict: The response from the API containing the product data.
|
|
"""
|
|
self.log_manager.log(f"Getting Costco product by SKU: {sku}")
|
|
response = self.api_request_handler.send_api_request(
|
|
"GET", f"{self.api_request_handler.api_url}costco_products/?sku={sku}"
|
|
)
|
|
return response
|