pokemans/scraper/app.py
2024-03-03 23:01:01 -05:00

103 lines
4.1 KiB
Python

import threading
import time
from datetime import datetime
import requests
from models import Submission
import logging
# logging
logging.basicConfig(level=logging.INFO)
class Application:
def __init__(self, reddit_monitor, webhook_notifier, api_url):
self.reddit_monitor = reddit_monitor
self.webhook_notifier = webhook_notifier
self.api_url = api_url
def send_api_request(self, method, url, data=None, params=None):
response = requests.request(method, url, data=data, params=params)
return response.json()
def get_submission_by_reddit_id(self, reddit_id):
logging.info(f"Getting submission by reddit_id: {reddit_id}")
logging.info(f"{self.api_url}submissions/?reddit_id={reddit_id}")
response = self.send_api_request("GET", f"{self.api_url}submissions/?reddit_id={reddit_id}")
logging.info(response)
return response
def submission_exists(self, reddit_id):
response = self.get_submission_by_reddit_id(reddit_id)
if len(response) == 0:
logging.info(f"Submission {reddit_id} does not exist")
return False
return True
def update_submission_analytics(self, submission):
submission_id = self.get_submission_by_reddit_id(submission.reddit_id)
logging.info(submission_id)
submission_id = submission_id[0]["id"]
data = {
"id": submission_id,
"score": submission.score,
"num_comments": submission.num_comments,
}
self.send_api_request("PATCH", f"{self.api_url}submissions/{submission_id}/", data=data)
def get_submissions_to_update(self):
submissions_to_update = self.send_api_request("GET", f"{self.api_url}submissions/?last_7_days=1")
return submissions_to_update
def insert_submission(self, submission):
data = {
"reddit_id": submission.reddit_id,
"title": submission.title,
"name": submission.name,
"url": submission.url,
"created_utc": submission.created_utc,
"selftext": submission.selftext,
"permalink": submission.permalink,
"upvote_ratio": submission.upvote_ratio,
}
response = self.send_api_request("POST", f"{self.api_url}submissions/", data=data)
logging.info("Inserting submission")
logging.info(response)
def process_submissions(self, submissions):
for submission in submissions:
submission = Submission(
reddit_id=submission.id,
title=submission.title,
name=submission.name,
url=submission.url,
score=submission.score,
num_comments=submission.num_comments,
created_utc=submission.created_utc,
selftext=submission.selftext,
permalink=submission.permalink,
upvote_ratio=submission.upvote_ratio
)
if self.submission_exists(submission.reddit_id):
self.update_submission_analytics(submission)
else:
self.insert_submission(submission)
self.update_submission_analytics(submission)
self.webhook_notifier.send_notification(submission)
def periodic_update(self):
to_be_updated = self.get_submissions_to_update()
submissions = self.reddit_monitor.update_submissions(to_be_updated)
self.process_submissions(submissions)
def run_periodic_update(self, interval=3600):
while True:
self.periodic_update()
print(f"Existing posts Updated at {datetime.now()}")
time.sleep(interval)
def run(self):
#update_frequency = 3600 # 3600
#update_thread = threading.Thread(target=self.run_periodic_update, args=(update_frequency, ))
#update_thread.daemon = True
#update_thread.start()
submissions = self.reddit_monitor.stream_submissions()
self.process_submissions(submissions)