broken bad code lole

This commit is contained in:
zman27 2024-03-03 23:01:01 -05:00
parent 91d55efd20
commit c16bbb5275
29 changed files with 538 additions and 154 deletions

View File

@ -1,20 +1,19 @@
version: '3.8' version: '3.8'
services: services:
web: scraper:
build: ./server build: ./scraper
ports:
- "5000:5000"
volumes: volumes:
- ./server:/app - ./scraper:/app
environment: environment:
- DEBUG=1
- POKEMANS_DB_URL - POKEMANS_DB_URL
- PRAW_CLIENT_ID - PRAW_CLIENT_ID
- PRAW_CLIENT_SECRET - PRAW_CLIENT_SECRET
- PRAW_USERNAME - PRAW_USERNAME
- PRAW_PASSWORD - PRAW_PASSWORD
- POKEMANS_WEBHOOK_URL - POKEMANS_WEBHOOK_URL
depends_on:
- db
command: command:
python main.py python main.py
@ -34,4 +33,17 @@ services:
volumes: volumes:
- ./client:/usr/share/nginx/html - ./client:/usr/share/nginx/html
depends_on: depends_on:
- web - server
server:
build: ./server
ports:
- "8000:8000"
volumes:
- ./server:/app
depends_on:
- db
- scraper
command:
python manage.py runserver 0.0.0.0:8000

14
scraper/Dockerfile Normal file
View File

@ -0,0 +1,14 @@
FROM python:3.11
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
# Set the working directory in the container
WORKDIR /app
# Install any needed packages specified in requirements.txt
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
CMD ["python", "main.py"]

103
scraper/app.py Normal file
View File

@ -0,0 +1,103 @@
import threading
import time
from datetime import datetime
import requests
from models import Submission
import logging
# logging
logging.basicConfig(level=logging.INFO)
class Application:
def __init__(self, reddit_monitor, webhook_notifier, api_url):
self.reddit_monitor = reddit_monitor
self.webhook_notifier = webhook_notifier
self.api_url = api_url
def send_api_request(self, method, url, data=None, params=None):
response = requests.request(method, url, data=data, params=params)
return response.json()
def get_submission_by_reddit_id(self, reddit_id):
logging.info(f"Getting submission by reddit_id: {reddit_id}")
logging.info(f"{self.api_url}submissions/?reddit_id={reddit_id}")
response = self.send_api_request("GET", f"{self.api_url}submissions/?reddit_id={reddit_id}")
logging.info(response)
return response
def submission_exists(self, reddit_id):
response = self.get_submission_by_reddit_id(reddit_id)
if len(response) == 0:
logging.info(f"Submission {reddit_id} does not exist")
return False
return True
def update_submission_analytics(self, submission):
submission_id = self.get_submission_by_reddit_id(submission.reddit_id)
logging.info(submission_id)
submission_id = submission_id[0]["id"]
data = {
"id": submission_id,
"score": submission.score,
"num_comments": submission.num_comments,
}
self.send_api_request("PATCH", f"{self.api_url}submissions/{submission_id}/", data=data)
def get_submissions_to_update(self):
submissions_to_update = self.send_api_request("GET", f"{self.api_url}submissions/?last_7_days=1")
return submissions_to_update
def insert_submission(self, submission):
data = {
"reddit_id": submission.reddit_id,
"title": submission.title,
"name": submission.name,
"url": submission.url,
"created_utc": submission.created_utc,
"selftext": submission.selftext,
"permalink": submission.permalink,
"upvote_ratio": submission.upvote_ratio,
}
response = self.send_api_request("POST", f"{self.api_url}submissions/", data=data)
logging.info("Inserting submission")
logging.info(response)
def process_submissions(self, submissions):
for submission in submissions:
submission = Submission(
reddit_id=submission.id,
title=submission.title,
name=submission.name,
url=submission.url,
score=submission.score,
num_comments=submission.num_comments,
created_utc=submission.created_utc,
selftext=submission.selftext,
permalink=submission.permalink,
upvote_ratio=submission.upvote_ratio
)
if self.submission_exists(submission.reddit_id):
self.update_submission_analytics(submission)
else:
self.insert_submission(submission)
self.update_submission_analytics(submission)
self.webhook_notifier.send_notification(submission)
def periodic_update(self):
to_be_updated = self.get_submissions_to_update()
submissions = self.reddit_monitor.update_submissions(to_be_updated)
self.process_submissions(submissions)
def run_periodic_update(self, interval=3600):
while True:
self.periodic_update()
print(f"Existing posts Updated at {datetime.now()}")
time.sleep(interval)
def run(self):
#update_frequency = 3600 # 3600
#update_thread = threading.Thread(target=self.run_periodic_update, args=(update_frequency, ))
#update_thread.daemon = True
#update_thread.start()
submissions = self.reddit_monitor.stream_submissions()
self.process_submissions(submissions)

View File

@ -2,7 +2,6 @@ import os
class Config: class Config:
POKEMANS_DB_URL = os.getenv("POKEMANS_DB_URL", "sqlite:///pokemans.db")
PRAW_CLIENT_ID = os.getenv("PRAW_CLIENT_ID") PRAW_CLIENT_ID = os.getenv("PRAW_CLIENT_ID")
PRAW_CLIENT_SECRET = os.getenv("PRAW_CLIENT_SECRET") PRAW_CLIENT_SECRET = os.getenv("PRAW_CLIENT_SECRET")
PRAW_USERNAME = os.getenv("PRAW_USERNAME") PRAW_USERNAME = os.getenv("PRAW_USERNAME")
@ -12,4 +11,4 @@ class Config:
SUBREDDIT_NAME = "pkmntcgdeals" SUBREDDIT_NAME = "pkmntcgdeals"
USER_AGENT = "praw:zman.video_repost_bot:v0.1.0 (by u/jzman21)" USER_AGENT = "praw:zman.video_repost_bot:v0.1.0 (by u/jzman21)"
DISABLE_WEBHOOK = False DISABLE_WEBHOOK = False
DESTROY_DB = False API_URL = "http://server:8000/api/"

View File

@ -1,8 +1,8 @@
from models import create_db, reset_db
from reddit_monitor import RedditMonitor from reddit_monitor import RedditMonitor
from webhook import WebhookNotifier from webhook import WebhookNotifier
from app import Application from app import Application
from config import Config from config import Config
import logging
if __name__ == "__main__": if __name__ == "__main__":
@ -14,25 +14,29 @@ if __name__ == "__main__":
subreddit_name = Config.SUBREDDIT_NAME subreddit_name = Config.SUBREDDIT_NAME
discord_webhook_url = Config.POKEMANS_WEBHOOK_URL discord_webhook_url = Config.POKEMANS_WEBHOOK_URL
disable_webhook = Config.DISABLE_WEBHOOK disable_webhook = Config.DISABLE_WEBHOOK
destroy_db = Config.DESTROY_DB
pkmn_env = Config.PKMN_ENV pkmn_env = Config.PKMN_ENV
api_url = Config.API_URL
if destroy_db and pkmn_env == 'dev': # logging
reset_db() logging.basicConfig(filename='scraper.log', level=logging.DEBUG)
else: logging.info('Starting scraper')
create_db()
reddit_monitor = RedditMonitor(client_id, client_secret, user_agent, username, password, subreddit_name) reddit_monitor = RedditMonitor(client_id, client_secret, user_agent, username, password, subreddit_name)
webhook_notifier = WebhookNotifier(discord_webhook_url, disable_webhook) webhook_notifier = WebhookNotifier(discord_webhook_url, disable_webhook)
app = Application(reddit_monitor, webhook_notifier) app = Application(reddit_monitor, webhook_notifier, api_url)
print("Starting app")
app.run() app.run()
""" """
TODO: TODO:
- implement django - django rest framework
- api for managing database
- remove scraper models
- connect scraper to django rest framework api
- pull upvote ration into analytics?
- sqlite vs postgres figure out
- basic front end (react) - basic front end (react)
- tests - tests
- logging
- Filter out canadian/uk deals - Filter out canadian/uk deals
- track score and number of comments over time in db - track score and number of comments over time in db
- try to identify product, number of cards, price per card, etc - try to identify product, number of cards, price per card, etc

15
scraper/models.py Normal file
View File

@ -0,0 +1,15 @@
class Submission():
def __init__(self, reddit_id, title, name, url, score, num_comments, created_utc, selftext, permalink, upvote_ratio):
self.reddit_id = reddit_id
self.title = title
self.name = name
self.url = url
self.score = score
self.num_comments = num_comments
self.created_utc = created_utc
self.selftext = selftext
self.permalink = permalink
self.upvote_ratio = upvote_ratio
def __str__(self):
return f"{self.reddit_id} {self.title} {self.name} {self.url} {self.score} {self.num_comments} {self.created_utc} {self.selftext} {self.permalink} {self.upvote_ratio}"

View File

@ -1,5 +1,4 @@
import praw import praw
from models import Submission, session_scope
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -18,10 +17,7 @@ class RedditMonitor:
for submission in self.subreddit.stream.submissions(): for submission in self.subreddit.stream.submissions():
yield submission yield submission
def update_submissions(self): def update_submissions(self, submissions_to_update):
with session_scope() as session: for submission in submissions_to_update:
one_week_ago = datetime.utcnow() - timedelta(weeks=1) praw_submission = self.reddit.submission(id=submission['reddit_id'])
submissions_to_update = session.query(Submission).filter(Submission.created_utc >= one_week_ago.timestamp()).all()
for db_submission in submissions_to_update:
praw_submission = self.reddit.submission(id=db_submission.id)
yield praw_submission yield praw_submission

16
scraper/requirements.txt Normal file
View File

@ -0,0 +1,16 @@
asgiref==3.7.2
certifi==2024.2.2
charset-normalizer==3.3.2
Django==5.0.2
djangorestframework==3.14.0
greenlet==3.0.3
idna==3.6
praw==7.7.1
prawcore==2.4.0
pytz==2024.1
requests==2.31.0
sqlparse==0.4.4
typing_extensions==4.10.0
update-checker==0.18.0
urllib3==2.2.1
websocket-client==1.7.0

View File

@ -1,3 +1,4 @@
# Use an official Python runtime as a base image
FROM python:3.11 FROM python:3.11
# Set environment variables # Set environment variables
@ -15,4 +16,5 @@ RUN pip install --no-cache-dir -r requirements.txt
EXPOSE 8000 EXPOSE 8000
# Run python manage.py runserver 0.0.0.0:8000 when the container launches # Run python manage.py runserver 0.0.0.0:8000 when the container launches
CMD ["python", "main.py"] CMD ["python", "manage.py", "runserver", "0.0.0.0:8000"]

View File

@ -1,49 +0,0 @@
import threading
import time
from datetime import datetime
from models import Submission, session_scope, submission_exists, update_submission, insert_submission
class Application:
def __init__(self, reddit_monitor, webhook_notifier):
self.reddit_monitor = reddit_monitor
self.webhook_notifier = webhook_notifier
def process_submissions(self, submissions):
with session_scope() as session:
for submission in submissions:
if submission_exists(session, submission.id):
update_submission(session, submission)
else:
submission = Submission(
id=submission.id,
title=submission.title,
name=submission.name,
url=submission.url,
score=submission.score,
num_comments=submission.num_comments,
created_utc=submission.created_utc,
selftext=submission.selftext,
permalink=submission.permalink,
upvote_ratio=submission.upvote_ratio
)
insert_submission(session, submission)
self.webhook_notifier.send_notification(submission)
def periodic_update(self):
submissions = self.reddit_monitor.update_submissions()
self.process_submissions(submissions)
def run_periodic_update(self, interval=3600):
while True:
self.periodic_update()
print(f"Existing posts Updated at {datetime.now()}")
time.sleep(interval)
def run(self):
update_frequency = 3600 # 3600
update_thread = threading.Thread(target=self.run_periodic_update, args=(update_frequency, ))
update_thread.daemon = True
update_thread.start()
submissions = self.reddit_monitor.stream_submissions()
self.process_submissions(submissions)

22
server/manage.py Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pokemans_django.settings")
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)
if __name__ == "__main__":
main()

View File

@ -1,79 +0,0 @@
from sqlalchemy import create_engine, Column, Integer, String, Float
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import os
from contextlib import contextmanager
Base = declarative_base()
Session = sessionmaker()
@contextmanager
def session_scope():
session = get_session()
try:
yield session
session.commit()
except:
session.rollback()
raise
finally:
session.close()
class Submission(Base):
__tablename__ = 'submissions'
id = Column(String, primary_key=True)
title = Column(String)
name = Column(String)
url = Column(String)
score = Column(Integer)
num_comments = Column(Integer)
created_utc = Column(Float)
selftext = Column(String)
permalink = Column(String)
upvote_ratio = Column(Float)
def get_engine(database_url=os.getenv("POKEMANS_DB_URL", "sqlite:///pokemans.db")):
engine = create_engine(database_url)
Session.configure(bind=engine)
return engine
def create_db():
engine = get_engine()
Base.metadata.create_all(engine)
def reset_db():
engine = get_engine()
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
def get_session():
return Session()
def insert_submission(session, submission):
session.add(submission)
session.commit()
def submission_exists(session, submission_id):
return session.query(Submission).filter(Submission.id == submission_id).first() is not None
def get_all_submissions(session):
return session.query(Submission).all()
def delete_submission(session, submission_id):
session.query(Submission).filter(Submission.id == submission_id).delete()
session.commit()
def update_submission(session, submission):
session.query(Submission).filter(Submission.id == submission.id).update({
'title': submission.title,
'name': submission.name,
'url': submission.url,
'score': submission.score,
'num_comments': submission.num_comments,
'created_utc': submission.created_utc,
'selftext': submission.selftext,
'permalink': submission.permalink,
'upvote_ratio': submission.upvote_ratio
})
session.commit()

View File

View File

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

View File

@ -0,0 +1,6 @@
from django.apps import AppConfig
class PokemansConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "pokemans_app"

View File

@ -0,0 +1,45 @@
# Generated by Django 5.0.2 on 2024-03-04 01:40
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="Submission",
fields=[
("id", models.AutoField(primary_key=True, serialize=False)),
("reddit_id", models.CharField(max_length=255, unique=True)),
("title", models.CharField(max_length=255)),
("name", models.CharField(max_length=255)),
("url", models.CharField(max_length=255)),
("created_utc", models.FloatField()),
("selftext", models.CharField(max_length=255)),
("permalink", models.CharField(max_length=255)),
("upvote_ratio", models.FloatField()),
("updated_at", models.DateTimeField(auto_now=True)),
],
),
migrations.CreateModel(
name="SubmissionAnalytics",
fields=[
("id", models.AutoField(primary_key=True, serialize=False)),
("num_comments", models.IntegerField()),
("score", models.IntegerField()),
("created_at", models.DateTimeField(auto_now=True)),
(
"submission",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="pokemans_app.submission",
),
),
],
),
]

View File

@ -0,0 +1,18 @@
# Generated by Django 5.0.2 on 2024-03-04 03:51
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pokemans_app", "0001_initial"),
]
operations = [
migrations.AlterField(
model_name="submission",
name="selftext",
field=models.CharField(blank=True, max_length=1234),
),
]

View File

@ -0,0 +1,22 @@
from django.db import models
class Submission(models.Model):
id = models.AutoField(primary_key=True)
reddit_id = models.CharField(max_length=255, unique=True)
title = models.CharField(max_length=255)
name = models.CharField(max_length=255)
url = models.CharField(max_length=255)
created_utc = models.FloatField()
selftext = models.CharField(max_length=1234, blank=True)
permalink = models.CharField(max_length=255)
upvote_ratio = models.FloatField()
updated_at = models.DateTimeField(auto_now=True)
class SubmissionAnalytics(models.Model):
id = models.AutoField(primary_key=True)
submission = models.ForeignKey(Submission, on_delete=models.CASCADE)
num_comments = models.IntegerField()
score = models.IntegerField()
created_at = models.DateTimeField(auto_now=True)

View File

@ -0,0 +1,13 @@
from rest_framework import serializers
from .models import Submission, SubmissionAnalytics
class SubmissionSerializer(serializers.ModelSerializer):
class Meta:
model = Submission
fields = '__all__'
class SubmissionAnalyticsSerializer(serializers.ModelSerializer):
class Meta:
model = SubmissionAnalytics
fields = '__all__'

View File

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View File

@ -0,0 +1,32 @@
from django.shortcuts import render
from rest_framework import viewsets
from .models import Submission, SubmissionAnalytics
from .serializers import SubmissionSerializer, SubmissionAnalyticsSerializer
from datetime import timedelta
from django.utils import timezone
class SubmissionViewSet(viewsets.ModelViewSet):
queryset = Submission.objects.all()
serializer_class = SubmissionSerializer
def get_queryset(self):
queryset = Submission.objects.all()
reddit_id = self.request.query_params.get('reddit_id', None)
last_7_days = self.request.query_params.get('last_7_days', None)
if reddit_id is not None:
queryset = queryset.filter(reddit_id=reddit_id)
if last_7_days is not None:
# Get the current time and subtract 7 days, convert to Unix timestamp
date_threshold = timezone.now() - timedelta(days=7)
date_threshold_unix = date_threshold.timestamp()
# Filter using the Unix timestamp
queryset = queryset.filter(created_utc__gte=date_threshold_unix)
return queryset
class SubmissionAnalyticsViewSet(viewsets.ModelViewSet):
queryset = SubmissionAnalytics.objects.all()
serializer_class = SubmissionAnalyticsSerializer

View File

View File

@ -0,0 +1,16 @@
"""
ASGI config for pokemans_server project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/
"""
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pokemans_server.settings")
application = get_asgi_application()

View File

@ -0,0 +1,125 @@
"""
Django settings for pokemans_server project.
Generated by 'django-admin startproject' using Django 4.2.4.
For more information on this file, see
https://docs.djangoproject.com/en/4.2/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/4.2/ref/settings/
"""
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = "django-insecure-$zr_vau$5sj8cpz1srj#hm37#h-48l571mwy!@x!p4jv)@5xwn"
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = ['server', 'localhost',]
# Application definition
INSTALLED_APPS = [
"django.contrib.admin",
"django.contrib.auth",
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
"rest_framework",
"pokemans_app",
]
MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
"django.contrib.auth.middleware.AuthenticationMiddleware",
"django.contrib.messages.middleware.MessageMiddleware",
"django.middleware.clickjacking.XFrameOptionsMiddleware",
]
ROOT_URLCONF = "pokemans_django.urls"
TEMPLATES = [
{
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": [],
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
"django.template.context_processors.debug",
"django.template.context_processors.request",
"django.contrib.auth.context_processors.auth",
"django.contrib.messages.context_processors.messages",
],
},
},
]
WSGI_APPLICATION = "pokemans_django.wsgi.application"
# Database
# https://docs.djangoproject.com/en/4.2/ref/settings/#databases
DATABASES = {
"default": {
"ENGINE": "django.db.backends.sqlite3",
"NAME": BASE_DIR / "db.sqlite3",
}
}
# Password validation
# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
},
{
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
},
{
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
},
{
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
},
]
# Internationalization
# https://docs.djangoproject.com/en/4.2/topics/i18n/
LANGUAGE_CODE = "en-us"
TIME_ZONE = "UTC"
USE_I18N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/4.2/howto/static-files/
STATIC_URL = "static/"
# Default primary key field type
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

View File

@ -0,0 +1,30 @@
"""
URL configuration for pokemans_server project.
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/4.2/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path, include
from rest_framework.routers import DefaultRouter
from pokemans_app.views import SubmissionViewSet, SubmissionAnalyticsViewSet
router = DefaultRouter()
router.register(r"submissions", SubmissionViewSet)
router.register(r"submission_analytics", SubmissionAnalyticsViewSet)
urlpatterns = [
path("admin/", admin.site.urls),
path("api/", include(router.urls)),
]

View File

@ -0,0 +1,16 @@
"""
WSGI config for pokemans_server project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pokemans_django.settings")
application = get_wsgi_application()

Binary file not shown.