pokemans/scraper/api.py
2024-03-05 17:03:40 -05:00

250 lines
9.8 KiB
Python

import requests
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from exceptions import APIRequestError, InvalidDataTypeError, InvalidMethodError
from app_log import LoggingManager
from models import Post
class ApiRequestHandler:
"""
Handles API requests for the application. Supports basic HTTP methods: GET, POST, PUT, DELETE.
Utilizes the `requests` library to send requests to a specified API URL and handles
response validation and error logging.
Attributes:
api_url (str): The base URL for the API to which requests are sent.
log_manager (LoggingManager): Manages logging for API request operations.
"""
def __init__(self, api_url: str):
self.api_url = api_url
self.log_manager = LoggingManager("scraper.log")
def send_api_request(
self, method: str, api_url: str, data=None, params=None
) -> dict:
"""
Sends a request to the API using the specified HTTP method, URL, and optional data and parameters.
Parameters:
method (str): The HTTP method to use for the request. Must be one of: GET, POST, PUT, DELETE.
api_url (str): The URL endpoint to send the request to.
data (dict, optional): The payload to send in the request body.
params (dict, optional): The URL parameters to append to the request.
Returns:
dict: The JSON response from the API.
Raises:
InvalidMethodError: If the provided method is not supported.
InvalidDataTypeError: If `data` or `params` is provided but is not a dictionary.
APIRequestError: If the response from the API is not a success.
"""
if method not in ["GET", "POST", "PUT", "DELETE"]:
raise InvalidMethodError(f"Invalid method: {method}")
if data is not None and not isinstance(data, dict):
raise InvalidDataTypeError(f"Invalid data type: {type(data)} expected dict")
if params is not None and not isinstance(params, dict):
raise InvalidDataTypeError(
f"Invalid data type: {type(params)} expected dict"
)
try:
response = requests.request(method, api_url, data=data, params=params)
except requests.RequestException as e:
self.log_manager.error(f"API request failed: {e}")
raise APIRequestError(0, str(e))
success_codes = [200, 201, 204]
if response.status_code not in success_codes:
self.log_manager.error(
f"API request failed: {response.status_code} - {response.text}"
)
raise APIRequestError(response.status_code, response.text)
return response.json()
class PostManager:
"""
Manages operations related to posts, including retrieval and insertion of post data into a database via API requests.
Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging operations.
Attributes:
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with post data.
log_manager (LoggingManager): Manages logging for operations performed by PostManager.
"""
def __init__(self, api_request_handler: ApiRequestHandler):
"""
Initializes the PostManager with an API request handler for making API calls and a logging manager for logging.
Parameters:
api_request_handler (ApiRequestHandler): The handler for making API requests.
"""
self.api_request_handler = api_request_handler
self.log_manager = LoggingManager("scraper.log")
def get_post_by_reddit_id(self, reddit_id: str) -> dict:
"""
Retrieves a post by its Reddit ID from the database through an API call.
Parameters:
reddit_id (str): The Reddit ID of the post to retrieve.
Returns:
dict: The response from the API containing the post data.
"""
self.log_manager.log(f"Getting post by reddit id: {reddit_id}")
response = self.api_request_handler.send_api_request(
"GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}"
)
return response
def post_exists(self, reddit_id: str) -> bool:
"""
Checks if a post with the specified Reddit ID exists in the database.
Parameters:
reddit_id (str): The Reddit ID of the post to check.
Returns:
bool: True if the post exists, False otherwise.
"""
self.log_manager.log(f"Checking if post exists: {reddit_id}")
response = self.get_post_by_reddit_id(reddit_id)
if len(response) == 0:
return False
return True
def insert_post(self, post) -> dict:
"""
Inserts a new post into the database through an API call.
Parameters:
post (Post): The Post object containing the data to insert.
Returns:
dict: The response from the API after attempting to insert the post data.
"""
self.log_manager.log(f"Inserting post: {post.reddit_id}")
data = {
"reddit_id": post.reddit_id,
"title": post.title,
"name": post.name,
"url": post.url,
"created_utc": post.created_utc,
"selftext": post.selftext,
"permalink": post.permalink,
}
response = self.api_request_handler.send_api_request(
"POST", f"{self.api_request_handler.api_url}posts/", data=data
)
return response
def get_posts_from_last_7_days(self) -> dict:
"""
Retrieves posts from the last 7 days from the database through an API call.
Returns:
dict: The response from the API containing the posts from the last 7 days.
"""
self.log_manager.log("Getting posts from last 7 days")
posts_from_last_7_days = self.api_request_handler.send_api_request(
"GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1"
)
return posts_from_last_7_days
class PostAnalyticsManager:
"""
Manages the analytics for posts by interfacing with the API to check for update requirements
and update post analytics. This class leverages the ApiRequestHandler for API interactions
and the PostManager for retrieving specific post information.
Attributes:
api_request_handler (ApiRequestHandler): Handles API requests for analytics data.
post_manager (PostManager): Manages post retrieval and existence checks.
log_manager (LoggingManager): Manages logging for analytics operations.
"""
def __init__(
self, api_request_handler: ApiRequestHandler, post_manager: PostManager
):
"""
Initializes the PostAnalyticsManager with necessary handlers and managers.
Parameters:
api_request_handler (ApiRequestHandler): The API request handler for making API calls.
post_manager (PostManager): The manager for interacting with post data.
"""
self.api_request_handler = api_request_handler
self.post_manager = post_manager
self.log_manager = LoggingManager("scraper.log")
def check_update_requirements(self, reddit_id: str, update_frequency: int) -> bool:
"""
Checks if the post identified by the given reddit_id meets the requirements for an update
by analyzing the analytics data within the last x seconds (update_frequency).
Parameters:
reddit_id (str): The Reddit ID of the post to check.
update_frequency (int): The frequency in seconds for updating post analytics.
Returns:
bool: True if the post meets update requirements, False otherwise.
"""
self.log_manager.log(f"Checking update requirements for {reddit_id}")
# Specify your desired timezone, e.g., UTC
timezone = ZoneInfo("UTC")
# Make your datetime objects timezone-aware
time_start = datetime.now(timezone) - timedelta(seconds=update_frequency)
now = datetime.now(timezone)
# Format datetime objects for the API request
time_begin_str = time_start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
time_end_str = now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
post_id = self.post_manager.get_post_by_reddit_id(reddit_id)
post_id = post_id[0]["id"]
self.log_manager.log(
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}"
)
response = self.api_request_handler.send_api_request(
"GET",
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}",
)
if len(response) > 0:
# post should not be updated
return False
# post should be updated
return True
def update_post_analytics(self, post: Post) -> dict:
"""
Updates the analytics for a given post with new data such as score, number of comments,
and upvote ratio.
Parameters:
post (Post): The post object containing the new analytics data.
Returns:
dict: The response from the API after updating the post's analytics.
"""
self.log_manager.log(f"Updating post analytics for {post.reddit_id}")
post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id)
post_id = post_id[0]["id"]
data = {
"post": post_id,
"score": post.score,
"num_comments": post.num_comments,
"upvote_ratio": post.upvote_ratio,
}
response = self.api_request_handler.send_api_request(
"POST", f"{self.api_request_handler.api_url}post_analytics/", data=data
)
return response