pokemans/scraper/api.py
2024-03-24 11:42:35 -04:00

350 lines
13 KiB
Python

"""
Interacts with the API to handle requests for post and product data.
Utilizes the `requests` library to send requests
"""
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
import requests
from exceptions import APIRequestError, InvalidDataTypeError, InvalidMethodError
from app_log import LoggingManager
from models import Post
class ApiRequestHandler:
"""
Handles API requests for the application. Supports basic HTTP methods: GET, POST, PUT, DELETE.
Utilizes the `requests` library to send requests to a specified API URL and handles
response validation and error logging.
Attributes:
api_url (str): The base URL for the API to which requests are sent.
log_manager (LoggingManager): Manages logging for API request operations.
"""
def __init__(self, api_url: str):
self.api_url = api_url
self.log_manager = LoggingManager("scraper.log")
def send_api_request(
self, method: str, api_url: str, data=None, params=None
) -> dict:
"""
Sends a request to the API using the specified HTTP method, URL, and optional data and parameters.
Parameters:
method (str): The HTTP method to use for the request. Must be one of: GET, POST, PUT, DELETE.
api_url (str): The URL endpoint to send the request to.
data (dict, optional): The payload to send in the request body.
params (dict, optional): The URL parameters to append to the request.
Returns:
dict: The JSON response from the API.
Raises:
InvalidMethodError: If the provided method is not supported.
InvalidDataTypeError: If `data` or `params` is provided but is not a dictionary.
APIRequestError: If the response from the API is not a success.
"""
if method not in ["GET", "POST", "PUT", "DELETE"]:
raise InvalidMethodError(f"Invalid method: {method}")
if data is not None and not isinstance(data, dict):
raise InvalidDataTypeError(f"Invalid data type: {type(data)} expected dict")
if params is not None and not isinstance(params, dict):
raise InvalidDataTypeError(
f"Invalid data type: {type(params)} expected dict"
)
try:
response = requests.request(
method, api_url, data=data, params=params, timeout=10
)
except requests.RequestException as e:
self.log_manager.error(f"API request failed: {e}")
raise APIRequestError(0, str(e)) from e
try:
response.raise_for_status()
except requests.HTTPError as e:
self.log_manager.error(f"API request failed: {e}")
raise APIRequestError(response.status_code, response.text) from e
return response.json()
class PostManager:
"""
Manages operations related to posts, including retrieval and insertion of post data into a database via API requests.
Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging operations.
Attributes:
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with post data.
log_manager (LoggingManager): Manages logging for operations performed by PostManager.
"""
def __init__(self, api_request_handler: ApiRequestHandler):
"""
Initializes the PostManager with an API request handler for making API calls and a logging manager for logging.
Parameters:
api_request_handler (ApiRequestHandler): The handler for making API requests.
"""
self.api_request_handler = api_request_handler
self.log_manager = LoggingManager("scraper.log")
def get_post_by_reddit_id(self, reddit_id: str) -> dict:
"""
Retrieves a post by its Reddit ID from the database through an API call.
Parameters:
reddit_id (str): The Reddit ID of the post to retrieve.
Returns:
dict: The response from the API containing the post data.
"""
response = self.api_request_handler.send_api_request(
"GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}"
)
return response
def post_exists(self, reddit_id: str) -> bool:
"""
Checks if a post with the specified Reddit ID exists in the database.
Parameters:
reddit_id (str): The Reddit ID of the post to check.
Returns:
bool: True if the post exists, False otherwise.
"""
response = self.get_post_by_reddit_id(reddit_id)
if len(response) == 0:
return False
return True
def insert_post(self, post) -> dict:
"""
Inserts a new post into the database through an API call.
Parameters:
post (Post): The Post object containing the data to insert.
Returns:
dict: The response from the API after attempting to insert the post data.
"""
data = {
"reddit_id": post.reddit_id,
"title": post.title,
"name": post.name,
"url": post.url,
"created_utc": post.created_utc,
"selftext": post.selftext,
"permalink": post.permalink,
}
response = self.api_request_handler.send_api_request(
"POST", f"{self.api_request_handler.api_url}posts/", data=data
)
return response
def get_posts_from_last_7_days(self) -> dict:
"""
Retrieves posts from the last 7 days from the database through an API call.
Returns:
dict: The response from the API containing the posts from the last 7 days.
"""
self.log_manager.log("Getting posts from last 7 days")
posts_from_last_7_days = self.api_request_handler.send_api_request(
"GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1"
)
return posts_from_last_7_days
class PostAnalyticsManager:
"""
Manages the analytics for posts by interfacing with the API to check for update requirements
and update post analytics. This class leverages the ApiRequestHandler for API interactions
and the PostManager for retrieving specific post information.
Attributes:
api_request_handler (ApiRequestHandler): Handles API requests for analytics data.
post_manager (PostManager): Manages post retrieval and existence checks.
log_manager (LoggingManager): Manages logging for analytics operations.
"""
def __init__(
self, api_request_handler: ApiRequestHandler, post_manager: PostManager
):
"""
Initializes the PostAnalyticsManager with necessary handlers and managers.
Parameters:
api_request_handler (ApiRequestHandler): The API request handler for making API calls.
post_manager (PostManager): The manager for interacting with post data.
"""
self.api_request_handler = api_request_handler
self.post_manager = post_manager
self.log_manager = LoggingManager("scraper.log")
def check_update_requirements(self, reddit_id: str, update_frequency: int) -> bool:
"""
Checks if the post identified by the given reddit_id meets the requirements for an update
by analyzing the analytics data within the last x seconds (update_frequency).
Parameters:
reddit_id (str): The Reddit ID of the post to check.
update_frequency (int): The frequency in seconds for updating post analytics.
Returns:
bool: True if the post meets update requirements, False otherwise.
"""
# Specify your desired timezone, e.g., UTC
timezone = ZoneInfo("UTC")
# Make your datetime objects timezone-aware
time_start = datetime.now(timezone) - timedelta(seconds=update_frequency)
now = datetime.now(timezone)
# Format datetime objects for the API request
time_begin_str = time_start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
time_end_str = now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
post_id = self.post_manager.get_post_by_reddit_id(reddit_id)
post_id = post_id[0]["id"]
response = self.api_request_handler.send_api_request(
"GET",
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}",
)
if len(response) > 0:
# post should not be updated
return False
# post should be updated
return True
def update_post_analytics(self, post: Post) -> dict:
"""
Updates the analytics for a given post with new data such as score, number of comments,
and upvote ratio.
Parameters:
post (Post): The post object containing the new analytics data.
Returns:
dict: The response from the API after updating the post's analytics.
"""
post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id)
post_id = post_id[0]["id"]
data = {
"post": post_id,
"score": post.score,
"num_comments": post.num_comments,
"upvote_ratio": post.upvote_ratio,
}
response = self.api_request_handler.send_api_request(
"POST", f"{self.api_request_handler.api_url}post_analytics/", data=data
)
return response
class CostcoProductManager:
"""
Manages operations related to Costco products, including retrieval and insertion of product data into a database
via API requests. Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging
operations.
Attributes:
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with Costco product data.
log_manager (LoggingManager): Manages logging for operations performed by CostcoProductManager.
"""
def __init__(self, api_request_handler: ApiRequestHandler):
"""
Initializes the CostcoProductManager with an API request handler for making API calls and a logging manager
for logging.
Parameters:
api_request_handler (ApiRequestHandler): The handler for making API requests.
"""
self.api_request_handler = api_request_handler
self.log_manager = LoggingManager("scraper.log")
def get_all_costco_products(self) -> list:
"""
Retrieves all Costco products from the database through an API call.
Returns:
dict: The response from the API containing all Costco products.
"""
self.log_manager.log("Getting all Costco products")
all_products = self.api_request_handler.send_api_request(
"GET", f"{self.api_request_handler.api_url}costco_products/"
)
return all_products
def insert_costco_product(self, product) -> dict:
"""
Inserts a new Costco product into the database through an API call.
Parameters:
product (CostcoProduct): The CostcoProduct object containing the data to insert.
Returns:
dict: The response from the API after attempting to insert the product data.
"""
self.log_manager.log(f"Inserting Costco product: {product.sku}")
data = {
"sku": product.sku,
"name": product.name,
"price": product.price,
"img_url": product.img_url,
"product_link": product.product_link,
"active": product.active,
}
response = self.api_request_handler.send_api_request(
"POST", f"{self.api_request_handler.api_url}costco_products/", data=data
)
return response
def update_costco_product(self, product) -> dict:
"""
Updates an existing Costco product in the database through an API call.
Parameters:
product (CostcoProduct): The CostcoProduct object containing the updated data.
Returns:
dict: The response from the API after attempting to update the product data.
"""
self.log_manager.log(f"Updating Costco product: {product.sku}")
data = {
"sku": product.sku,
"name": product.name,
"price": product.price,
"img_url": product.img_url,
"product_link": product.product_link,
"active": product.active,
}
response = self.api_request_handler.send_api_request(
"PUT",
f"{self.api_request_handler.api_url}costco_products/{product.sku}/",
data=data,
)
return response
def get_costco_product_by_sku(self, sku: str) -> dict:
"""
Retrieves a Costco product by its SKU from the database through an API call.
Parameters:
sku (str): The SKU of the product to retrieve.
Returns:
dict: The response from the API containing the product data.
"""
self.log_manager.log(f"Getting Costco product by SKU: {sku}")
response = self.api_request_handler.send_api_request(
"GET", f"{self.api_request_handler.api_url}costco_products/?sku={sku}"
)
return response