finish scraper django integration

This commit is contained in:
2024-03-04 10:22:57 -05:00
parent c16bbb5275
commit 9a666df52c
16 changed files with 449 additions and 202 deletions

View File

@@ -1,4 +1,4 @@
# Generated by Django 5.0.2 on 2024-03-04 01:40
# Generated by Django 5.0.2 on 2024-03-04 05:15
import django.db.models.deletion
from django.db import migrations, models
@@ -8,38 +8,33 @@ class Migration(migrations.Migration):
initial = True
dependencies = []
dependencies = [
]
operations = [
migrations.CreateModel(
name="Submission",
name='Post',
fields=[
("id", models.AutoField(primary_key=True, serialize=False)),
("reddit_id", models.CharField(max_length=255, unique=True)),
("title", models.CharField(max_length=255)),
("name", models.CharField(max_length=255)),
("url", models.CharField(max_length=255)),
("created_utc", models.FloatField()),
("selftext", models.CharField(max_length=255)),
("permalink", models.CharField(max_length=255)),
("upvote_ratio", models.FloatField()),
("updated_at", models.DateTimeField(auto_now=True)),
('id', models.AutoField(primary_key=True, serialize=False)),
('reddit_id', models.CharField(max_length=255, unique=True)),
('title', models.CharField(max_length=255)),
('name', models.CharField(max_length=255)),
('url', models.CharField(max_length=555)),
('created_utc', models.FloatField()),
('selftext', models.CharField(blank=True, max_length=2555, null=True)),
('permalink', models.CharField(max_length=255)),
('updated_at', models.DateTimeField(auto_now=True)),
],
),
migrations.CreateModel(
name="SubmissionAnalytics",
name='PostAnalytics',
fields=[
("id", models.AutoField(primary_key=True, serialize=False)),
("num_comments", models.IntegerField()),
("score", models.IntegerField()),
("created_at", models.DateTimeField(auto_now=True)),
(
"submission",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="pokemans_app.submission",
),
),
('id', models.AutoField(primary_key=True, serialize=False)),
('num_comments', models.IntegerField()),
('score', models.IntegerField()),
('upvote_ratio', models.FloatField()),
('created_at', models.DateTimeField(auto_now=True)),
('post', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='pokemans_app.post')),
],
),
]

View File

@@ -1,18 +0,0 @@
# Generated by Django 5.0.2 on 2024-03-04 03:51
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("pokemans_app", "0001_initial"),
]
operations = [
migrations.AlterField(
model_name="submission",
name="selftext",
field=models.CharField(blank=True, max_length=1234),
),
]

View File

@@ -1,22 +1,22 @@
from django.db import models
class Submission(models.Model):
class Post(models.Model):
id = models.AutoField(primary_key=True)
reddit_id = models.CharField(max_length=255, unique=True)
title = models.CharField(max_length=255)
name = models.CharField(max_length=255)
url = models.CharField(max_length=255)
url = models.CharField(max_length=555)
created_utc = models.FloatField()
selftext = models.CharField(max_length=1234, blank=True)
selftext = models.CharField(max_length=2555, blank=True, null=True)
permalink = models.CharField(max_length=255)
upvote_ratio = models.FloatField()
updated_at = models.DateTimeField(auto_now=True)
class SubmissionAnalytics(models.Model):
class PostAnalytics(models.Model):
id = models.AutoField(primary_key=True)
submission = models.ForeignKey(Submission, on_delete=models.CASCADE)
post = models.ForeignKey(Post, on_delete=models.CASCADE)
num_comments = models.IntegerField()
score = models.IntegerField()
upvote_ratio = models.FloatField()
created_at = models.DateTimeField(auto_now=True)

View File

@@ -1,13 +1,13 @@
from rest_framework import serializers
from .models import Submission, SubmissionAnalytics
from .models import Post, PostAnalytics
class SubmissionSerializer(serializers.ModelSerializer):
class PostSerializer(serializers.ModelSerializer):
class Meta:
model = Submission
model = Post
fields = '__all__'
class SubmissionAnalyticsSerializer(serializers.ModelSerializer):
class PostAnalyticsSerializer(serializers.ModelSerializer):
class Meta:
model = SubmissionAnalytics
model = PostAnalytics
fields = '__all__'

View File

@@ -1,17 +1,18 @@
from django.shortcuts import render
from rest_framework import viewsets
from .models import Submission, SubmissionAnalytics
from .serializers import SubmissionSerializer, SubmissionAnalyticsSerializer
from .models import Post, PostAnalytics
from .serializers import PostSerializer, PostAnalyticsSerializer
from datetime import timedelta
from django.utils import timezone
from django.utils.dateparse import parse_datetime
class SubmissionViewSet(viewsets.ModelViewSet):
queryset = Submission.objects.all()
serializer_class = SubmissionSerializer
class PostViewSet(viewsets.ModelViewSet):
queryset = Post.objects.all()
serializer_class = PostSerializer
def get_queryset(self):
queryset = Submission.objects.all()
queryset = Post.objects.all()
reddit_id = self.request.query_params.get('reddit_id', None)
last_7_days = self.request.query_params.get('last_7_days', None)
@@ -27,6 +28,30 @@ class SubmissionViewSet(viewsets.ModelViewSet):
return queryset
class SubmissionAnalyticsViewSet(viewsets.ModelViewSet):
queryset = SubmissionAnalytics.objects.all()
serializer_class = SubmissionAnalyticsSerializer
class PostAnalyticsViewSet(viewsets.ModelViewSet):
queryset = PostAnalytics.objects.all()
serializer_class = PostAnalyticsSerializer
def get_queryset(self):
queryset = PostAnalytics.objects.all()
post_id = self.request.query_params.get('post', None)
time_begin = self.request.query_params.get('time_begin', None)
time_end = self.request.query_params.get('time_end', None)
if post_id is not None:
queryset = queryset.filter(post=post_id)
if time_begin is not None and time_end is not None:
# Parse the datetime strings to timezone-aware datetime objects
time_begin_parsed = parse_datetime(time_begin)
time_end_parsed = parse_datetime(time_end)
# Ensure datetime objects are timezone-aware
if time_begin_parsed is not None and time_end_parsed is not None:
queryset = queryset.filter(created_at__gte=time_begin_parsed, created_at__lte=time_end_parsed)
else:
# Handle invalid datetime format
# This is where you could log an error or handle the case where datetime strings are invalid
pass
return queryset

View File

@@ -17,12 +17,12 @@ Including another URLconf
from django.contrib import admin
from django.urls import path, include
from rest_framework.routers import DefaultRouter
from pokemans_app.views import SubmissionViewSet, SubmissionAnalyticsViewSet
from pokemans_app.views import PostViewSet, PostAnalyticsViewSet
router = DefaultRouter()
router.register(r"submissions", SubmissionViewSet)
router.register(r"submission_analytics", SubmissionAnalyticsViewSet)
router.register(r"posts", PostViewSet)
router.register(r"post_analytics", PostAnalyticsViewSet)
urlpatterns = [
path("admin/", admin.site.urls),