Compare commits
No commits in common. "main" and "django" have entirely different histories.
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,5 +2,4 @@
|
||||
__pycache__
|
||||
.venv
|
||||
*.sqlite3
|
||||
*.log*
|
||||
node_modules
|
||||
*.log
|
9
client/Dockerfile
Normal file
9
client/Dockerfile
Normal file
@ -0,0 +1,9 @@
|
||||
# Use an official nginx image
|
||||
FROM nginx:alpine
|
||||
|
||||
# Expose port 80
|
||||
EXPOSE 80
|
||||
|
||||
# Start Nginx and keep it running
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
|
28
client/index.html
Normal file
28
client/index.html
Normal file
@ -0,0 +1,28 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Document</title>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<ul>
|
||||
<li><a href="#">Home</a></li>
|
||||
<li><a href="#">About</a></li>
|
||||
<li><a href="#">Contact</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</header>
|
||||
<main>
|
||||
<section>
|
||||
<h1>Welcome to My Website</h1>
|
||||
<p>This is a paragraph of text to introduce visitors to the site and inform them about what they can find here.</p>
|
||||
</section>
|
||||
</main>
|
||||
<footer>
|
||||
<p>© 2024 My Website</p>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
@ -12,18 +12,26 @@ services:
|
||||
- PRAW_USERNAME
|
||||
- PRAW_PASSWORD
|
||||
- POKEMANS_WEBHOOK_URL
|
||||
depends_on:
|
||||
- db
|
||||
command:
|
||||
python main.py
|
||||
|
||||
frontend:
|
||||
build: ./pokemans-client
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- ./pokemans-client:/app
|
||||
- /app/node_modules
|
||||
db:
|
||||
image: postgres:12
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
POSTGRES_DB: pokemans
|
||||
POSTGRES_USER: pokemans
|
||||
POSTGRES_PASSWORD: pokemans
|
||||
ports:
|
||||
- "5432:5432"
|
||||
|
||||
frontend:
|
||||
build: ./client
|
||||
ports:
|
||||
- "3000:80"
|
||||
volumes:
|
||||
- ./client:/usr/share/nginx/html
|
||||
depends_on:
|
||||
- server
|
||||
|
||||
@ -34,6 +42,7 @@ services:
|
||||
volumes:
|
||||
- ./server:/app
|
||||
depends_on:
|
||||
- db
|
||||
- scraper
|
||||
command:
|
||||
python manage.py runserver 0.0.0.0:8000
|
||||
|
@ -1,2 +0,0 @@
|
||||
node_modules
|
||||
npm-debug.log
|
23
pokemans-client/.gitignore
vendored
23
pokemans-client/.gitignore
vendored
@ -1,23 +0,0 @@
|
||||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
||||
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
@ -1,17 +0,0 @@
|
||||
# Use an official Node runtime as a parent image
|
||||
FROM node:alpine
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package.json and package-lock.json to work directory
|
||||
COPY package*.json ./
|
||||
|
||||
# Install any dependencies
|
||||
RUN npm install
|
||||
|
||||
# Bundle app source inside the docker image
|
||||
COPY . .
|
||||
|
||||
# Command to run the app
|
||||
CMD ["npm", "start"]
|
@ -1,70 +0,0 @@
|
||||
# Getting Started with Create React App
|
||||
|
||||
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
|
||||
|
||||
## Available Scripts
|
||||
|
||||
In the project directory, you can run:
|
||||
|
||||
### `npm start`
|
||||
|
||||
Runs the app in the development mode.\
|
||||
Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
|
||||
|
||||
The page will reload when you make changes.\
|
||||
You may also see any lint errors in the console.
|
||||
|
||||
### `npm test`
|
||||
|
||||
Launches the test runner in the interactive watch mode.\
|
||||
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
|
||||
|
||||
### `npm run build`
|
||||
|
||||
Builds the app for production to the `build` folder.\
|
||||
It correctly bundles React in production mode and optimizes the build for the best performance.
|
||||
|
||||
The build is minified and the filenames include the hashes.\
|
||||
Your app is ready to be deployed!
|
||||
|
||||
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
|
||||
|
||||
### `npm run eject`
|
||||
|
||||
**Note: this is a one-way operation. Once you `eject`, you can't go back!**
|
||||
|
||||
If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
|
||||
|
||||
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
|
||||
|
||||
You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
|
||||
|
||||
## Learn More
|
||||
|
||||
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
|
||||
|
||||
To learn React, check out the [React documentation](https://reactjs.org/).
|
||||
|
||||
### Code Splitting
|
||||
|
||||
This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
|
||||
|
||||
### Analyzing the Bundle Size
|
||||
|
||||
This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
|
||||
|
||||
### Making a Progressive Web App
|
||||
|
||||
This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
|
||||
|
||||
### Advanced Configuration
|
||||
|
||||
This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
|
||||
|
||||
### Deployment
|
||||
|
||||
This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
|
||||
|
||||
### `npm run build` fails to minify
|
||||
|
||||
This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)
|
18413
pokemans-client/package-lock.json
generated
18413
pokemans-client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,42 +0,0 @@
|
||||
{
|
||||
"name": "pokemans-client",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@testing-library/jest-dom": "^5.17.0",
|
||||
"@testing-library/react": "^13.4.0",
|
||||
"@testing-library/user-event": "^13.5.0",
|
||||
"axios": "^1.6.7",
|
||||
"chart.js": "^4.4.2",
|
||||
"chartjs-adapter-date-fns": "^3.0.0",
|
||||
"react": "^18.2.0",
|
||||
"react-chartjs-2": "^5.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-scripts": "5.0.1",
|
||||
"web-vitals": "^2.1.4"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "WATCHPACK_POLLING=true react-scripts start",
|
||||
"build": "react-scripts build",
|
||||
"test": "react-scripts test",
|
||||
"eject": "react-scripts eject"
|
||||
},
|
||||
"eslintConfig": {
|
||||
"extends": [
|
||||
"react-app",
|
||||
"react-app/jest"
|
||||
]
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.2%",
|
||||
"not dead",
|
||||
"not op_mini all"
|
||||
],
|
||||
"development": [
|
||||
"last 1 chrome version",
|
||||
"last 1 firefox version",
|
||||
"last 1 safari version"
|
||||
]
|
||||
}
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 3.8 KiB |
@ -1,43 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta
|
||||
name="description"
|
||||
content="Web site created using create-react-app"
|
||||
/>
|
||||
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
||||
<!--
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
||||
<!--
|
||||
Notice the use of %PUBLIC_URL% in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
Only files inside the `public` folder can be referenced from the HTML.
|
||||
|
||||
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
|
||||
work correctly both with client-side routing and a non-root public URL.
|
||||
Learn how to configure a non-root public URL by running `npm run build`.
|
||||
-->
|
||||
<title>React App</title>
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
<div id="root"></div>
|
||||
<!--
|
||||
This HTML file is a template.
|
||||
If you open it directly in the browser, you will see an empty page.
|
||||
|
||||
You can add webfonts, meta tags, or analytics to this file.
|
||||
The build step will place the bundled scripts into the <body> tag.
|
||||
|
||||
To begin the development, run `npm start` or `yarn start`.
|
||||
To create a production bundle, use `npm run build` or `yarn build`.
|
||||
-->
|
||||
</body>
|
||||
</html>
|
Binary file not shown.
Before Width: | Height: | Size: 5.2 KiB |
Binary file not shown.
Before Width: | Height: | Size: 9.4 KiB |
@ -1,25 +0,0 @@
|
||||
{
|
||||
"short_name": "React App",
|
||||
"name": "Create React App Sample",
|
||||
"icons": [
|
||||
{
|
||||
"src": "favicon.ico",
|
||||
"sizes": "64x64 32x32 24x24 16x16",
|
||||
"type": "image/x-icon"
|
||||
},
|
||||
{
|
||||
"src": "logo192.png",
|
||||
"type": "image/png",
|
||||
"sizes": "192x192"
|
||||
},
|
||||
{
|
||||
"src": "logo512.png",
|
||||
"type": "image/png",
|
||||
"sizes": "512x512"
|
||||
}
|
||||
],
|
||||
"start_url": ".",
|
||||
"display": "standalone",
|
||||
"theme_color": "#000000",
|
||||
"background_color": "#ffffff"
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
# https://www.robotstxt.org/robotstxt.html
|
||||
User-agent: *
|
||||
Disallow:
|
@ -1,76 +0,0 @@
|
||||
// AnalyticsChart.js
|
||||
import React from 'react';
|
||||
import { Line } from 'react-chartjs-2';
|
||||
import {
|
||||
Chart as ChartJS,
|
||||
CategoryScale,
|
||||
LinearScale,
|
||||
PointElement,
|
||||
LineElement,
|
||||
Title,
|
||||
Tooltip,
|
||||
Legend,
|
||||
TimeScale,
|
||||
TimeSeriesScale,
|
||||
} from 'chart.js';
|
||||
import 'chartjs-adapter-date-fns';
|
||||
|
||||
|
||||
ChartJS.register(
|
||||
CategoryScale,
|
||||
LinearScale,
|
||||
PointElement,
|
||||
LineElement,
|
||||
Title,
|
||||
Tooltip,
|
||||
Legend,
|
||||
TimeScale,
|
||||
TimeSeriesScale
|
||||
);
|
||||
|
||||
const AnalyticsChart = ({ analyticsData, label, dataKey }) => {
|
||||
const data = {
|
||||
labels: analyticsData.map((data) => data.created_at),
|
||||
datasets: [
|
||||
{
|
||||
label: label,
|
||||
data: analyticsData.map((data) => data[dataKey]),
|
||||
fill: false,
|
||||
borderColor: 'rgb(75, 192, 192)',
|
||||
tension: 0.1,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const options = {
|
||||
scales: {
|
||||
x: {
|
||||
type: 'time',
|
||||
time: {
|
||||
parser: "yyyy-MM-dd'T'HH:mm:ss.SSSSSSxxx", // Adjusted date format parser for Date-fns
|
||||
unit: 'day',
|
||||
tooltipFormat: 'yyyy-MM-dd HH:mm',
|
||||
},
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Date',
|
||||
},
|
||||
},
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
title: {
|
||||
display: true,
|
||||
text: label,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
// Log the analyticsData to the console right here
|
||||
console.log("Logging analyticsData:", analyticsData);
|
||||
return <div className="chart-container">
|
||||
<Line data={data} options={options} />
|
||||
</div>;
|
||||
};
|
||||
|
||||
export default AnalyticsChart;
|
@ -1,50 +0,0 @@
|
||||
.App {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.App-logo {
|
||||
height: 40vmin;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
@media (prefers-reduced-motion: no-preference) {
|
||||
.App-logo {
|
||||
animation: App-logo-spin infinite 20s linear;
|
||||
}
|
||||
}
|
||||
|
||||
.App-header {
|
||||
background-color: #282c34;
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-size: calc(10px + 2vmin);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.App-link {
|
||||
color: #61dafb;
|
||||
}
|
||||
|
||||
@keyframes App-logo-spin {
|
||||
from {
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
to {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
|
||||
.post-image {
|
||||
max-width: 500px;
|
||||
max-height: 300px;
|
||||
width: auto; /* ensures aspect ratio is maintained */
|
||||
height: auto; /* ensures aspect ratio is maintained */
|
||||
}
|
||||
|
||||
.chart-container {
|
||||
width: 600px;
|
||||
height: 400px;
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
import React, { useEffect, useState } from 'react';
|
||||
import './App.css';
|
||||
import AnalyticsChart from './AnalyticsChart'; // Adjust the path as necessary
|
||||
|
||||
function App() {
|
||||
const [posts, setPosts] = useState([]);
|
||||
const [currentIndex, setCurrentIndex] = useState(0);
|
||||
const [analyticsData, setAnalyticsData] = useState(null); // New state for analytics data
|
||||
|
||||
useEffect(() => {
|
||||
fetch('http://localhost:8000/api/posts/')
|
||||
.then(response => response.json())
|
||||
.then(data => setPosts(data))
|
||||
.catch(error => console.error('Error:', error));
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
// Assuming each post has a unique ID, fetch analytics data for the current post
|
||||
const postId = posts[currentIndex]?.id; // Safe navigation operator to avoid undefined errors
|
||||
if (postId) {
|
||||
fetch(`http://localhost:8000/api/post_analytics/?post=${postId}`)
|
||||
.then(response => response.json())
|
||||
.then(data => setAnalyticsData(data))
|
||||
.catch(error => console.error('Error:', error));
|
||||
}
|
||||
}, [currentIndex, posts]); // Fetch analytics data when currentIndex or posts array changes
|
||||
|
||||
const goToNextPost = () => setCurrentIndex(prevIndex => (prevIndex + 1) % posts.length);
|
||||
const goToPrevPost = () => setCurrentIndex(prevIndex => (prevIndex - 1 + posts.length) % posts.length);
|
||||
|
||||
return (
|
||||
<div>
|
||||
<h1>Data from Django</h1>
|
||||
{posts.length > 0 ? (
|
||||
<div>
|
||||
<div key={posts[currentIndex].id}>
|
||||
<h2>{posts[currentIndex].title}</h2>
|
||||
<img src={posts[currentIndex].url} alt={posts[currentIndex].title} className="post-image" />
|
||||
<p>Posted on: {new Date(posts[currentIndex].created_utc * 1000).toLocaleDateString()}</p>
|
||||
<a href={`https://old.reddit.com${posts[currentIndex].permalink}`}>View on Reddit</a>
|
||||
</div>
|
||||
{analyticsData && (
|
||||
<>
|
||||
<AnalyticsChart analyticsData={analyticsData} label="Score Over Time" dataKey="score" />
|
||||
<AnalyticsChart analyticsData={analyticsData} label="Number of Comments Over Time" dataKey="num_comments" />
|
||||
</>
|
||||
)}
|
||||
<button onClick={goToPrevPost}>Prev</button>
|
||||
<button onClick={goToNextPost}>Next</button>
|
||||
</div>
|
||||
) : (
|
||||
<p>Loading...</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default App;
|
@ -1,8 +0,0 @@
|
||||
import { render, screen } from '@testing-library/react';
|
||||
import App from './App';
|
||||
|
||||
test('renders learn react link', () => {
|
||||
render(<App />);
|
||||
const linkElement = screen.getByText(/learn react/i);
|
||||
expect(linkElement).toBeInTheDocument();
|
||||
});
|
@ -1,13 +0,0 @@
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
|
||||
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
|
||||
sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
|
||||
monospace;
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
import React from 'react';
|
||||
import ReactDOM from 'react-dom/client';
|
||||
import './index.css';
|
||||
import App from './App';
|
||||
import reportWebVitals from './reportWebVitals';
|
||||
|
||||
const root = ReactDOM.createRoot(document.getElementById('root'));
|
||||
root.render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
</React.StrictMode>
|
||||
);
|
||||
|
||||
// If you want to start measuring performance in your app, pass a function
|
||||
// to log results (for example: reportWebVitals(console.log))
|
||||
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
|
||||
reportWebVitals();
|
@ -1 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>
|
Before Width: | Height: | Size: 2.6 KiB |
@ -1,13 +0,0 @@
|
||||
const reportWebVitals = onPerfEntry => {
|
||||
if (onPerfEntry && onPerfEntry instanceof Function) {
|
||||
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
|
||||
getCLS(onPerfEntry);
|
||||
getFID(onPerfEntry);
|
||||
getFCP(onPerfEntry);
|
||||
getLCP(onPerfEntry);
|
||||
getTTFB(onPerfEntry);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
export default reportWebVitals;
|
@ -1,5 +0,0 @@
|
||||
// jest-dom adds custom jest matchers for asserting on DOM nodes.
|
||||
// allows you to do things like:
|
||||
// expect(element).toHaveTextContent(/react/i)
|
||||
// learn more: https://github.com/testing-library/jest-dom
|
||||
import '@testing-library/jest-dom';
|
@ -3,31 +3,6 @@ FROM python:3.11
|
||||
# Set environment variables
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# Install dependencies for Chrome
|
||||
RUN apt-get update && apt-get install -y wget gnupg2 ca-certificates unzip \
|
||||
&& wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
||||
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list \
|
||||
&& apt-get update
|
||||
|
||||
# Install Google Chrome
|
||||
RUN apt-get install -y google-chrome-stable
|
||||
|
||||
# Install specific version of ChromeDriver
|
||||
ARG CHROMEDRIVER_VERSION=122.0.6261.94
|
||||
RUN wget -N https://storage.googleapis.com/chrome-for-testing-public/$CHROMEDRIVER_VERSION/linux64/chromedriver-linux64.zip -P ~/ \
|
||||
&& unzip ~/chromedriver-linux64.zip -d ~/ \
|
||||
&& rm ~/chromedriver-linux64.zip \
|
||||
&& mv -f ~/chromedriver-linux64/chromedriver /usr/local/bin/chromedriver \
|
||||
&& chown root:root /usr/local/bin/chromedriver \
|
||||
&& chmod 0755 /usr/local/bin/chromedriver
|
||||
|
||||
# Set display port to avoid crash
|
||||
ENV DISPLAY=:99
|
||||
|
||||
# Upgrade pip
|
||||
RUN pip install --upgrade pip
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /app
|
||||
|
349
scraper/api.py
349
scraper/api.py
@ -1,349 +0,0 @@
|
||||
"""
|
||||
Interacts with the API to handle requests for post and product data.
|
||||
Utilizes the `requests` library to send requests
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
import requests
|
||||
from exceptions import APIRequestError, InvalidDataTypeError, InvalidMethodError
|
||||
from app_log import LoggingManager
|
||||
from models import Post
|
||||
|
||||
|
||||
class ApiRequestHandler:
|
||||
"""
|
||||
Handles API requests for the application. Supports basic HTTP methods: GET, POST, PUT, DELETE.
|
||||
Utilizes the `requests` library to send requests to a specified API URL and handles
|
||||
response validation and error logging.
|
||||
|
||||
Attributes:
|
||||
api_url (str): The base URL for the API to which requests are sent.
|
||||
log_manager (LoggingManager): Manages logging for API request operations.
|
||||
"""
|
||||
|
||||
def __init__(self, api_url: str):
|
||||
self.api_url = api_url
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def send_api_request(
|
||||
self, method: str, api_url: str, data=None, params=None
|
||||
) -> dict:
|
||||
"""
|
||||
Sends a request to the API using the specified HTTP method, URL, and optional data and parameters.
|
||||
|
||||
Parameters:
|
||||
method (str): The HTTP method to use for the request. Must be one of: GET, POST, PUT, DELETE.
|
||||
api_url (str): The URL endpoint to send the request to.
|
||||
data (dict, optional): The payload to send in the request body.
|
||||
params (dict, optional): The URL parameters to append to the request.
|
||||
|
||||
Returns:
|
||||
dict: The JSON response from the API.
|
||||
|
||||
Raises:
|
||||
InvalidMethodError: If the provided method is not supported.
|
||||
InvalidDataTypeError: If `data` or `params` is provided but is not a dictionary.
|
||||
APIRequestError: If the response from the API is not a success.
|
||||
"""
|
||||
if method not in ["GET", "POST", "PUT", "DELETE"]:
|
||||
raise InvalidMethodError(f"Invalid method: {method}")
|
||||
if data is not None and not isinstance(data, dict):
|
||||
raise InvalidDataTypeError(f"Invalid data type: {type(data)} expected dict")
|
||||
if params is not None and not isinstance(params, dict):
|
||||
raise InvalidDataTypeError(
|
||||
f"Invalid data type: {type(params)} expected dict"
|
||||
)
|
||||
try:
|
||||
response = requests.request(
|
||||
method, api_url, data=data, params=params, timeout=10
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
self.log_manager.error(f"API request failed: {e}")
|
||||
raise APIRequestError(0, str(e)) from e
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as e:
|
||||
self.log_manager.error(f"API request failed: {e}")
|
||||
raise APIRequestError(response.status_code, response.text) from e
|
||||
return response.json()
|
||||
|
||||
|
||||
class PostManager:
|
||||
"""
|
||||
Manages operations related to posts, including retrieval and insertion of post data into a database via API requests.
|
||||
Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging operations.
|
||||
|
||||
Attributes:
|
||||
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with post data.
|
||||
log_manager (LoggingManager): Manages logging for operations performed by PostManager.
|
||||
"""
|
||||
|
||||
def __init__(self, api_request_handler: ApiRequestHandler):
|
||||
"""
|
||||
Initializes the PostManager with an API request handler for making API calls and a logging manager for logging.
|
||||
|
||||
Parameters:
|
||||
api_request_handler (ApiRequestHandler): The handler for making API requests.
|
||||
"""
|
||||
self.api_request_handler = api_request_handler
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def get_post_by_reddit_id(self, reddit_id: str) -> dict:
|
||||
"""
|
||||
Retrieves a post by its Reddit ID from the database through an API call.
|
||||
|
||||
Parameters:
|
||||
reddit_id (str): The Reddit ID of the post to retrieve.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API containing the post data.
|
||||
"""
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}"
|
||||
)
|
||||
return response
|
||||
|
||||
def post_exists(self, reddit_id: str) -> bool:
|
||||
"""
|
||||
Checks if a post with the specified Reddit ID exists in the database.
|
||||
|
||||
Parameters:
|
||||
reddit_id (str): The Reddit ID of the post to check.
|
||||
|
||||
Returns:
|
||||
bool: True if the post exists, False otherwise.
|
||||
"""
|
||||
response = self.get_post_by_reddit_id(reddit_id)
|
||||
if len(response) == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def insert_post(self, post) -> dict:
|
||||
"""
|
||||
Inserts a new post into the database through an API call.
|
||||
|
||||
Parameters:
|
||||
post (Post): The Post object containing the data to insert.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API after attempting to insert the post data.
|
||||
"""
|
||||
data = {
|
||||
"reddit_id": post.reddit_id,
|
||||
"title": post.title,
|
||||
"name": post.name,
|
||||
"url": post.url,
|
||||
"created_utc": post.created_utc,
|
||||
"selftext": post.selftext,
|
||||
"permalink": post.permalink,
|
||||
}
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"POST", f"{self.api_request_handler.api_url}posts/", data=data
|
||||
)
|
||||
return response
|
||||
|
||||
def get_posts_from_last_7_days(self) -> dict:
|
||||
"""
|
||||
Retrieves posts from the last 7 days from the database through an API call.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API containing the posts from the last 7 days.
|
||||
"""
|
||||
self.log_manager.log("Getting posts from last 7 days")
|
||||
posts_from_last_7_days = self.api_request_handler.send_api_request(
|
||||
"GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1"
|
||||
)
|
||||
return posts_from_last_7_days
|
||||
|
||||
|
||||
class PostAnalyticsManager:
|
||||
"""
|
||||
Manages the analytics for posts by interfacing with the API to check for update requirements
|
||||
and update post analytics. This class leverages the ApiRequestHandler for API interactions
|
||||
and the PostManager for retrieving specific post information.
|
||||
|
||||
Attributes:
|
||||
api_request_handler (ApiRequestHandler): Handles API requests for analytics data.
|
||||
post_manager (PostManager): Manages post retrieval and existence checks.
|
||||
log_manager (LoggingManager): Manages logging for analytics operations.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, api_request_handler: ApiRequestHandler, post_manager: PostManager
|
||||
):
|
||||
"""
|
||||
Initializes the PostAnalyticsManager with necessary handlers and managers.
|
||||
|
||||
Parameters:
|
||||
api_request_handler (ApiRequestHandler): The API request handler for making API calls.
|
||||
post_manager (PostManager): The manager for interacting with post data.
|
||||
"""
|
||||
self.api_request_handler = api_request_handler
|
||||
self.post_manager = post_manager
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def check_update_requirements(self, reddit_id: str, update_frequency: int) -> bool:
|
||||
"""
|
||||
Checks if the post identified by the given reddit_id meets the requirements for an update
|
||||
by analyzing the analytics data within the last x seconds (update_frequency).
|
||||
|
||||
Parameters:
|
||||
reddit_id (str): The Reddit ID of the post to check.
|
||||
update_frequency (int): The frequency in seconds for updating post analytics.
|
||||
|
||||
Returns:
|
||||
bool: True if the post meets update requirements, False otherwise.
|
||||
"""
|
||||
|
||||
# Specify your desired timezone, e.g., UTC
|
||||
timezone = ZoneInfo("UTC")
|
||||
|
||||
# Make your datetime objects timezone-aware
|
||||
time_start = datetime.now(timezone) - timedelta(seconds=update_frequency)
|
||||
now = datetime.now(timezone)
|
||||
|
||||
# Format datetime objects for the API request
|
||||
time_begin_str = time_start.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
||||
time_end_str = now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
||||
|
||||
post_id = self.post_manager.get_post_by_reddit_id(reddit_id)
|
||||
post_id = post_id[0]["id"]
|
||||
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"GET",
|
||||
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}",
|
||||
)
|
||||
|
||||
if len(response) > 0:
|
||||
# post should not be updated
|
||||
return False
|
||||
|
||||
# post should be updated
|
||||
return True
|
||||
|
||||
def update_post_analytics(self, post: Post) -> dict:
|
||||
"""
|
||||
Updates the analytics for a given post with new data such as score, number of comments,
|
||||
and upvote ratio.
|
||||
|
||||
Parameters:
|
||||
post (Post): The post object containing the new analytics data.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API after updating the post's analytics.
|
||||
"""
|
||||
post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id)
|
||||
post_id = post_id[0]["id"]
|
||||
data = {
|
||||
"post": post_id,
|
||||
"score": post.score,
|
||||
"num_comments": post.num_comments,
|
||||
"upvote_ratio": post.upvote_ratio,
|
||||
}
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"POST", f"{self.api_request_handler.api_url}post_analytics/", data=data
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
class CostcoProductManager:
|
||||
"""
|
||||
Manages operations related to Costco products, including retrieval and insertion of product data into a database
|
||||
via API requests. Utilizes an instance of ApiRequestHandler for API interactions and LoggingManager for logging
|
||||
operations.
|
||||
|
||||
Attributes:
|
||||
api_request_handler (ApiRequestHandler): Handles the API requests for interacting with Costco product data.
|
||||
log_manager (LoggingManager): Manages logging for operations performed by CostcoProductManager.
|
||||
"""
|
||||
|
||||
def __init__(self, api_request_handler: ApiRequestHandler):
|
||||
"""
|
||||
Initializes the CostcoProductManager with an API request handler for making API calls and a logging manager
|
||||
for logging.
|
||||
|
||||
Parameters:
|
||||
api_request_handler (ApiRequestHandler): The handler for making API requests.
|
||||
"""
|
||||
self.api_request_handler = api_request_handler
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def get_all_costco_products(self) -> list:
|
||||
"""
|
||||
Retrieves all Costco products from the database through an API call.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API containing all Costco products.
|
||||
"""
|
||||
self.log_manager.log("Getting all Costco products")
|
||||
all_products = self.api_request_handler.send_api_request(
|
||||
"GET", f"{self.api_request_handler.api_url}costco_products/"
|
||||
)
|
||||
return all_products
|
||||
|
||||
def insert_costco_product(self, product) -> dict:
|
||||
"""
|
||||
Inserts a new Costco product into the database through an API call.
|
||||
|
||||
Parameters:
|
||||
product (CostcoProduct): The CostcoProduct object containing the data to insert.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API after attempting to insert the product data.
|
||||
"""
|
||||
self.log_manager.log(f"Inserting Costco product: {product.sku}")
|
||||
data = {
|
||||
"sku": product.sku,
|
||||
"name": product.name,
|
||||
"price": product.price,
|
||||
"img_url": product.img_url,
|
||||
"product_link": product.product_link,
|
||||
"active": product.active,
|
||||
}
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"POST", f"{self.api_request_handler.api_url}costco_products/", data=data
|
||||
)
|
||||
return response
|
||||
|
||||
def update_costco_product(self, product) -> dict:
|
||||
"""
|
||||
Updates an existing Costco product in the database through an API call.
|
||||
|
||||
Parameters:
|
||||
product (CostcoProduct): The CostcoProduct object containing the updated data.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API after attempting to update the product data.
|
||||
"""
|
||||
self.log_manager.log(f"Updating Costco product: {product.sku}")
|
||||
data = {
|
||||
"sku": product.sku,
|
||||
"name": product.name,
|
||||
"price": product.price,
|
||||
"img_url": product.img_url,
|
||||
"product_link": product.product_link,
|
||||
"active": product.active,
|
||||
}
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"PUT",
|
||||
f"{self.api_request_handler.api_url}costco_products/{product.sku}/",
|
||||
data=data,
|
||||
)
|
||||
return response
|
||||
|
||||
def get_costco_product_by_sku(self, sku: str) -> dict:
|
||||
"""
|
||||
Retrieves a Costco product by its SKU from the database through an API call.
|
||||
|
||||
Parameters:
|
||||
sku (str): The SKU of the product to retrieve.
|
||||
|
||||
Returns:
|
||||
dict: The response from the API containing the product data.
|
||||
"""
|
||||
self.log_manager.log(f"Getting Costco product by SKU: {sku}")
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"GET", f"{self.api_request_handler.api_url}costco_products/?sku={sku}"
|
||||
)
|
||||
return response
|
353
scraper/app.py
353
scraper/app.py
@ -1,26 +1,212 @@
|
||||
from datetime import datetime, timedelta
|
||||
import requests
|
||||
from models import Post
|
||||
import praw
|
||||
from zoneinfo import ZoneInfo
|
||||
from exceptions import InvalidMethodError, InvalidDataTypeError, APIRequestError
|
||||
from app_log import LoggingManager
|
||||
from threads import Scheduler
|
||||
from costco import CostcoMonitor
|
||||
from threads import Scheduler, ThreadManager
|
||||
|
||||
|
||||
class ApiRequestHandler:
|
||||
def __init__(self, api_url: str):
|
||||
self.api_url = api_url
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def send_api_request(
|
||||
self, method: str, api_url: str, data=None, params=None
|
||||
) -> dict:
|
||||
if method not in ["GET", "POST", "PUT", "DELETE"]:
|
||||
raise InvalidMethodError(f"Invalid method: {method}")
|
||||
if data is not None and not isinstance(data, dict):
|
||||
raise InvalidDataTypeError(f"Invalid data type: {type(data)} expected dict")
|
||||
if params is not None and not isinstance(params, dict):
|
||||
raise InvalidDataTypeError(
|
||||
f"Invalid data type: {type(params)} expected dict"
|
||||
)
|
||||
response = requests.request(method, api_url, data=data, params=params)
|
||||
success_codes = [200, 201, 204]
|
||||
if response.status_code not in success_codes:
|
||||
self.log_manager.error(
|
||||
f"API request failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
raise APIRequestError(response.status_code, response.text)
|
||||
return response.json()
|
||||
|
||||
|
||||
class PostManager:
|
||||
def __init__(self, api_request_handler: ApiRequestHandler):
|
||||
self.api_request_handler = api_request_handler
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def get_post_by_reddit_id(self, reddit_id: str) -> dict:
|
||||
self.log_manager.log(f"Getting post by reddit id: {reddit_id}")
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"GET", f"{self.api_request_handler.api_url}posts/?reddit_id={reddit_id}"
|
||||
)
|
||||
return response
|
||||
|
||||
def post_exists(self, reddit_id: str) -> bool:
|
||||
self.log_manager.log(f"Checking if post exists: {reddit_id}")
|
||||
response = self.get_post_by_reddit_id(reddit_id)
|
||||
if len(response) == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def insert_post(self, post) -> dict:
|
||||
self.log_manager.log(f"Inserting post: {post.reddit_id}")
|
||||
self.post = post
|
||||
data = {
|
||||
"reddit_id": self.post.reddit_id,
|
||||
"title": self.post.title,
|
||||
"name": self.post.name,
|
||||
"url": self.post.url,
|
||||
"created_utc": self.post.created_utc,
|
||||
"selftext": self.post.selftext,
|
||||
"permalink": self.post.permalink,
|
||||
}
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"POST", f"{self.api_request_handler.api_url}posts/", data=data
|
||||
)
|
||||
return response
|
||||
|
||||
def get_posts_from_last_7_days(self) -> dict:
|
||||
self.log_manager.log("Getting posts from last 7 days")
|
||||
posts_from_last_7_days = self.api_request_handler.send_api_request(
|
||||
"GET", f"{self.api_request_handler.api_url}posts/?last_7_days=1"
|
||||
)
|
||||
return posts_from_last_7_days
|
||||
|
||||
|
||||
class PostAnalyticsManager:
|
||||
def __init__(
|
||||
self, api_request_handler: ApiRequestHandler, post_manager: PostManager
|
||||
):
|
||||
self.api_request_handler = api_request_handler
|
||||
self.post_manager = post_manager
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def check_update_requirements(self, reddit_id: str) -> bool:
|
||||
self.log_manager.log(f"Checking update requirements for {reddit_id}")
|
||||
|
||||
# Specify your desired timezone, e.g., UTC
|
||||
timezone = ZoneInfo("UTC")
|
||||
|
||||
# Make your datetime objects timezone-aware
|
||||
fifteen_minutes_ago = datetime.now(timezone) - timedelta(minutes=15)
|
||||
now = datetime.now(timezone)
|
||||
|
||||
# Format datetime objects for the API request
|
||||
time_begin_str = fifteen_minutes_ago.isoformat(timespec="seconds")
|
||||
time_end_str = now.isoformat(timespec="seconds")
|
||||
|
||||
post_id = self.post_manager.get_post_by_reddit_id(reddit_id)
|
||||
post_id = post_id[0]["id"]
|
||||
self.log_manager.log(
|
||||
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}"
|
||||
)
|
||||
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"GET",
|
||||
f"{self.api_request_handler.api_url}post_analytics/?post={post_id}&time_begin={time_begin_str}&time_end={time_end_str}",
|
||||
)
|
||||
|
||||
if len(response) > 0:
|
||||
# post should not be updated
|
||||
return False
|
||||
|
||||
# post should be updated
|
||||
return True
|
||||
|
||||
def update_post_analytics(self, post: Post) -> dict:
|
||||
self.log_manager.log(f"Updating post analytics for {post.reddit_id}")
|
||||
post_id = self.post_manager.get_post_by_reddit_id(post.reddit_id)
|
||||
post_id = post_id[0]["id"]
|
||||
data = {
|
||||
"post": post_id,
|
||||
"score": post.score,
|
||||
"num_comments": post.num_comments,
|
||||
"upvote_ratio": post.upvote_ratio,
|
||||
}
|
||||
response = self.api_request_handler.send_api_request(
|
||||
"POST", f"{self.api_request_handler.api_url}post_analytics/", data=data
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
class RedditMonitor:
|
||||
def __init__(
|
||||
self, client_id, client_secret, user_agent, username, password, subreddit_name
|
||||
):
|
||||
self.reddit = praw.Reddit(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
user_agent=user_agent,
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
self.subreddit = self.reddit.subreddit(subreddit_name)
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def stream_submissions(self):
|
||||
self.log_manager.info("Starting submission stream")
|
||||
for submission in self.subreddit.stream.submissions():
|
||||
yield submission
|
||||
|
||||
def update_submissions(self, posts_to_update):
|
||||
self.log_manager.info("Updating submissions")
|
||||
for post in posts_to_update:
|
||||
submission = self.reddit.submission(id=post["reddit_id"])
|
||||
yield submission
|
||||
|
||||
|
||||
class SubmissionManager:
|
||||
def __init__(
|
||||
self,
|
||||
reddit_monitor: RedditMonitor,
|
||||
post_manager: PostManager,
|
||||
post_analytics_manager: PostAnalyticsManager,
|
||||
WebhookNotifier,
|
||||
):
|
||||
self.reddit_monitor = reddit_monitor
|
||||
self.post_manager = post_manager
|
||||
self.post_analytics_manager = post_analytics_manager
|
||||
self.webhook_notifier = WebhookNotifier
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def convert_submission_to_post(self, submission):
|
||||
post = Post(
|
||||
reddit_id=submission.id,
|
||||
title=submission.title,
|
||||
name=submission.name,
|
||||
url=submission.url,
|
||||
score=submission.score,
|
||||
num_comments=submission.num_comments,
|
||||
created_utc=submission.created_utc,
|
||||
selftext=submission.selftext,
|
||||
permalink=submission.permalink,
|
||||
upvote_ratio=submission.upvote_ratio,
|
||||
)
|
||||
return post
|
||||
|
||||
def process_submissions(self, submissions):
|
||||
for submission in submissions:
|
||||
self.log_manager.log(submission)
|
||||
if self.post_manager.post_exists(submission.id):
|
||||
self.log_manager.log("Post exists")
|
||||
self.log_manager.log(f"post id: {submission.id}")
|
||||
if self.post_analytics_manager.check_update_requirements(submission.id):
|
||||
self.log_manager.log("Update requirements met")
|
||||
post = self.convert_submission_to_post(submission)
|
||||
self.post_analytics_manager.update_post_analytics(post)
|
||||
else:
|
||||
post = self.convert_submission_to_post(submission)
|
||||
self.post_manager.insert_post(post)
|
||||
self.post_analytics_manager.update_post_analytics(post)
|
||||
self.webhook_notifier.send_notification(post)
|
||||
|
||||
|
||||
class Application:
|
||||
"""
|
||||
Orchestrates the main application flow, including starting the submission stream,
|
||||
managing periodic updates of post analytics, and initializing all necessary components
|
||||
for the application to function.
|
||||
|
||||
Attributes:
|
||||
reddit_monitor (RedditMonitor): Monitors Reddit for new or updated submissions.
|
||||
webhook_notifier: Notifies external services via webhooks when certain actions occur.
|
||||
api_conn: Manages API connections and requests.
|
||||
post_manager (PostManager): Manages CRUD operations for posts.
|
||||
post_analytics_manager (PostAnalyticsManager): Manages analytics for posts.
|
||||
submission_manager (SubmissionManager): Manages the processing of Reddit submissions.
|
||||
log_manager (LoggingManager): Centralized logging for the application.
|
||||
scheduler: Manages the scheduling of periodic updates.
|
||||
costco_manager (CostcoManager): Manages Costco product data.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
reddit_monitor,
|
||||
@ -29,136 +215,33 @@ class Application:
|
||||
post_manager,
|
||||
post_analytics_manager,
|
||||
submission_manager,
|
||||
costco_manager,
|
||||
):
|
||||
"""
|
||||
Initializes the application with all necessary components.
|
||||
|
||||
Parameters:
|
||||
reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions.
|
||||
webhook_notifier: The notifier for sending updates via webhooks.
|
||||
api_conn: The API connection manager.
|
||||
post_manager (PostManager): The manager for post operations.
|
||||
post_analytics_manager (PostAnalyticsManager): The manager for post analytics operations.
|
||||
submission_manager (SubmissionManager): The manager for processing Reddit submissions.
|
||||
"""
|
||||
self.reddit_monitor = reddit_monitor
|
||||
self.webhook_notifier = webhook_notifier
|
||||
self.api_conn = api_conn
|
||||
self.post_manager = post_manager
|
||||
self.post_analytics_manager = post_analytics_manager
|
||||
self.costco_manager = costco_manager
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
self.submission_manager = submission_manager
|
||||
self.scheduler = Scheduler()
|
||||
# how often should post analytics be updated (call for update and database update are separate)
|
||||
self.update_analytics_frequency = 60 * 15 # every 15 minutes
|
||||
self.scrape_costco_frequency = 60 * 60 * 4 # every 4 hours
|
||||
self.scheduler = None
|
||||
self.thread_manager = None
|
||||
|
||||
def update_analytics(self):
|
||||
"""
|
||||
Executes periodic updates for post analytics based on the predefined frequency.
|
||||
"""
|
||||
self.log_manager.info("Running periodic analytics update")
|
||||
def periodic_update(self):
|
||||
self.log_manager.info("Running periodic update")
|
||||
to_be_updated = self.post_manager.get_posts_from_last_7_days()
|
||||
submissions = self.reddit_monitor.update_submissions(to_be_updated)
|
||||
self.submission_manager.process_submissions(
|
||||
submissions, self.update_analytics_frequency
|
||||
)
|
||||
self.submission_manager.process_submissions(submissions)
|
||||
|
||||
def scrape_costco(self):
|
||||
"""
|
||||
Executes periodic updates for Costco products based on the predefined frequency.
|
||||
"""
|
||||
self.log_manager.info("Running periodic Costco scrape")
|
||||
costco_monitor = CostcoMonitor(
|
||||
"https://www.costco.com/CatalogSearch?dept=All&keyword=pokemon"
|
||||
)
|
||||
fetched_products = costco_monitor.get_products()
|
||||
costco_monitor.close()
|
||||
|
||||
# Fetch existing products from the database, assuming it returns a list directly
|
||||
existing_products = self.costco_manager.get_all_costco_products()
|
||||
|
||||
# Containers for updates
|
||||
products_to_update = []
|
||||
products_to_insert = []
|
||||
|
||||
# Mapping existing products for quick lookup
|
||||
existing_products_map = {
|
||||
product["sku"]: product for product in existing_products
|
||||
}
|
||||
|
||||
for product in fetched_products:
|
||||
existing_product = existing_products_map.get(product.sku)
|
||||
|
||||
if existing_product:
|
||||
self.log_manager.log(f"Found existing product: {product.sku}")
|
||||
needs_update = False
|
||||
# Compare and decide if an update is necessary (for price change, activation/deactivation)
|
||||
if existing_product["price"] != product.price:
|
||||
existing_product["price"] = product.price
|
||||
needs_update = True
|
||||
if existing_product["active"] != product.active:
|
||||
existing_product["active"] = product.active
|
||||
needs_update = True
|
||||
if needs_update:
|
||||
products_to_update.append(existing_product)
|
||||
else:
|
||||
self.log_manager.log(f"Adding new product: {product.sku}")
|
||||
products_to_insert.append(product)
|
||||
|
||||
# Update existing products in the database if necessary
|
||||
for product in products_to_update:
|
||||
self.costco_manager.update_costco_product(product)
|
||||
|
||||
# Insert new products into the database
|
||||
for product in products_to_insert:
|
||||
self.costco_manager.insert_costco_product(product)
|
||||
|
||||
# Optionally, deactivate products not found in the latest fetch
|
||||
skus_fetched = {product.sku for product in fetched_products}
|
||||
products_to_deactivate = [
|
||||
product
|
||||
for product in existing_products
|
||||
if product["sku"] not in skus_fetched and product["active"]
|
||||
]
|
||||
for product in products_to_deactivate:
|
||||
product["active"] = False
|
||||
self.costco_manager.update_costco_product(product)
|
||||
|
||||
# Send notifications for new products
|
||||
for product in products_to_insert:
|
||||
self.webhook_notifier.costco_notification(product)
|
||||
|
||||
def add_scheduler_task(self, name, task, interval):
|
||||
"""
|
||||
Registers a task with the scheduler to be run at a specified interval.
|
||||
|
||||
Parameters:
|
||||
name (str): Name of the task.
|
||||
task (callable): The task function to be executed.
|
||||
interval (int): The frequency in seconds at which the task should be executed.
|
||||
"""
|
||||
self.scheduler.add_task(name, task, interval)
|
||||
def run_periodic_update(self, interval):
|
||||
self.scheduler = Scheduler(interval, self.periodic_update)
|
||||
self.scheduler.run()
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Starts the main application process, including streaming submissions, running periodic updates,
|
||||
and processing submissions.
|
||||
"""
|
||||
self.log_manager.info("Application started")
|
||||
|
||||
# tasks
|
||||
self.add_scheduler_task(
|
||||
"update_analytics", self.update_analytics, self.update_analytics_frequency
|
||||
update_frequency = 60 * 15 # 15 minutes in seconds
|
||||
self.thread_manager = ThreadManager(
|
||||
target=self.run_periodic_update, args=(update_frequency,)
|
||||
)
|
||||
self.add_scheduler_task(
|
||||
"scrape_costco", self.scrape_costco, self.scrape_costco_frequency
|
||||
)
|
||||
|
||||
# Stream submissions and process them
|
||||
self.thread_manager.run()
|
||||
submissions = self.reddit_monitor.stream_submissions()
|
||||
self.submission_manager.process_submissions(
|
||||
submissions, self.update_analytics_frequency
|
||||
)
|
||||
self.submission_manager.process_submissions(submissions)
|
||||
|
@ -1,15 +1,9 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
import sys
|
||||
import logging
|
||||
|
||||
|
||||
class SingletonMeta(type):
|
||||
"""
|
||||
A metaclass that creates a Singleton base class when called
|
||||
"""
|
||||
_instances = {}
|
||||
|
||||
def __call__(cls, *args, **kwargs):
|
||||
@ -19,9 +13,6 @@ class SingletonMeta(type):
|
||||
|
||||
|
||||
class LoggingManager(metaclass=SingletonMeta):
|
||||
"""
|
||||
A class that creates a logger object and sets up the logger with file and stream handlers
|
||||
"""
|
||||
def __init__(self, log_file):
|
||||
if not hasattr(self, "logger"):
|
||||
self.log_file = log_file
|
||||
|
@ -1,93 +0,0 @@
|
||||
import os
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from app_log import LoggingManager
|
||||
from models import Product
|
||||
|
||||
|
||||
|
||||
class CostcoMonitor:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument("--headless") # Remove this line if you want to see the browser
|
||||
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3")
|
||||
chrome_options.add_argument("--window-size=1920,1080")
|
||||
chrome_options.add_argument("--log-level=3")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
if os.name == "nt":
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
self.driver = webdriver.Chrome(options=chrome_options)
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def wait_for_page_load(self):
|
||||
try:
|
||||
WebDriverWait(self.driver, 20).until(
|
||||
lambda driver: driver.execute_script("return document.readyState") == "complete"
|
||||
)
|
||||
except TimeoutException:
|
||||
self.log_manager.error("Timed out waiting for page to load")
|
||||
|
||||
def get_products(self, retries=0) -> list[Product]:
|
||||
self.log_manager.info(f"Loading Costco page: {self.url}")
|
||||
self.driver.get(self.url)
|
||||
self.wait_for_page_load() # Wait for the page to fully load
|
||||
|
||||
# Wait for the product list to be visible on the page
|
||||
|
||||
print("Waiting for product")
|
||||
try:
|
||||
WebDriverWait(self.driver, 20).until(
|
||||
EC.visibility_of_element_located((By.XPATH, "//div[@automation-id='productList']"))
|
||||
)
|
||||
except TimeoutException:
|
||||
self.log_manager.error("Timed out waiting for product list to load")
|
||||
if retries < 3:
|
||||
self.log_manager.info("Retrying...")
|
||||
self.get_products(retries + 1)
|
||||
else:
|
||||
self.log_manager.error("Failed to load product list after 3 retries")
|
||||
return []
|
||||
products = self.driver.find_elements(By.XPATH, "//div[@automation-id='productList']/div[contains(@class, 'product')]")
|
||||
self.log_manager.info(f"Found {len(products)} products on the page")
|
||||
|
||||
product_detail_list = []
|
||||
|
||||
for product in products:
|
||||
try:
|
||||
product_sku = product.find_element(By.CSS_SELECTOR, "input[id^='product_sku_']").get_attribute('value')
|
||||
product_name = product.find_element(By.CSS_SELECTOR, "input[id^='product_name_']").get_attribute('value')
|
||||
price_element = product.find_element(By.CSS_SELECTOR, "div[class*='price']")
|
||||
price = price_element.text if price_element else "Price not found"
|
||||
img_element = product.find_element(By.CSS_SELECTOR, "a.product-image-url img.img-responsive")
|
||||
img_url = img_element.get_attribute('src') if img_element else "Image URL not found"
|
||||
product_link_element = product.find_element(By.CSS_SELECTOR, "a.product-image-url")
|
||||
product_link = product_link_element.get_attribute('href') if product_link_element else "Product link not found"
|
||||
# check if any are none, set to blank
|
||||
product_sku = product_sku if product_sku else ""
|
||||
product_name = product_name if product_name else ""
|
||||
price = price if price else ""
|
||||
img_url = img_url if img_url else ""
|
||||
product_link = product_link if product_link else ""
|
||||
product_detail_list.append(Product(product_sku, product_name, price, img_url, product_link))
|
||||
self.log_manager.log(f"SKU: {product_sku}, Name: {product_name}, Price: {price}, Image URL: {img_url}, Product Link: {product_link}")
|
||||
|
||||
except Exception as e:
|
||||
self.log_manager.error(f"Error processing product: {e}")
|
||||
|
||||
return product_detail_list
|
||||
|
||||
def close(self):
|
||||
self.driver.quit()
|
||||
self.log_manager.info("Browser closed")
|
||||
|
||||
if __name__ == "__main__":
|
||||
url = "https://www.costco.com/CatalogSearch?dept=All&keyword=bagels"
|
||||
monitor = CostcoMonitor(url)
|
||||
monitor.get_products()
|
||||
monitor.close()
|
@ -1,7 +1,12 @@
|
||||
from webhook import WebhookNotifier
|
||||
from app import Application
|
||||
from api import ApiRequestHandler, PostManager, PostAnalyticsManager, CostcoProductManager
|
||||
from reddit import RedditMonitor, SubmissionManager
|
||||
from app import (
|
||||
Application,
|
||||
RedditMonitor,
|
||||
ApiRequestHandler,
|
||||
PostManager,
|
||||
PostAnalyticsManager,
|
||||
SubmissionManager,
|
||||
)
|
||||
from config import Config
|
||||
from app_log import LoggingManager
|
||||
|
||||
@ -26,7 +31,6 @@ if __name__ == "__main__":
|
||||
api_conn = ApiRequestHandler(api_url)
|
||||
post_manager = PostManager(api_conn)
|
||||
post_analytics_manager = PostAnalyticsManager(api_conn, post_manager)
|
||||
costco_manager = CostcoProductManager(api_conn)
|
||||
submission_manager = SubmissionManager(
|
||||
reddit_monitor, post_manager, post_analytics_manager, webhook_notifier
|
||||
)
|
||||
@ -37,14 +41,16 @@ if __name__ == "__main__":
|
||||
post_manager,
|
||||
post_analytics_manager,
|
||||
submission_manager,
|
||||
costco_manager,
|
||||
)
|
||||
app.run()
|
||||
|
||||
"""
|
||||
TODO:
|
||||
- pull upvote ration into analytics?
|
||||
- sqlite vs postgres figure out
|
||||
- basic front end (react)
|
||||
- tests
|
||||
- logging
|
||||
- Filter out canadian/uk deals
|
||||
- track score and number of comments over time in db
|
||||
- try to identify product, number of cards, price per card, etc
|
||||
@ -52,5 +58,4 @@ TODO:
|
||||
- try to identify platform ie. costco for gift card, tiktok for coupons, etc.
|
||||
- support for craigslist, ebay, etc.
|
||||
- front end - vizualization, classification, lookup, etc.
|
||||
- postgres
|
||||
"""
|
||||
|
@ -25,16 +25,3 @@ class Post:
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.reddit_id} {self.title} {self.name} {self.url} {self.score} {self.num_comments} {self.created_utc} {self.selftext} {self.permalink} {self.upvote_ratio}"
|
||||
|
||||
|
||||
class Product:
|
||||
def __init__(self, sku, name, price, img_url, product_link, active=True):
|
||||
self.sku = sku
|
||||
self.name = name
|
||||
self.price = price
|
||||
self.img_url = img_url
|
||||
self.product_link = product_link
|
||||
self.active = active
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.sku} {self.name} {self.price} {self.img_url} {self.product_link} {self.active}"
|
||||
|
@ -1,153 +0,0 @@
|
||||
import praw
|
||||
from app_log import LoggingManager
|
||||
from models import Post
|
||||
from api import PostManager, PostAnalyticsManager
|
||||
from webhook import WebhookNotifier
|
||||
|
||||
|
||||
class RedditMonitor:
|
||||
"""
|
||||
Monitors Reddit submissions for a specific subreddit, streaming new submissions and
|
||||
updating existing ones. Utilizes PRAW (Python Reddit API Wrapper) to interact with Reddit's API.
|
||||
|
||||
Attributes:
|
||||
reddit (praw.Reddit): An instance of the PRAW Reddit class for API interactions.
|
||||
subreddit (praw.models.Subreddit): The subreddit object for the specified subreddit.
|
||||
log_manager (LoggingManager): Manages logging for Reddit monitoring operations.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, client_id, client_secret, user_agent, username, password, subreddit_name
|
||||
):
|
||||
"""
|
||||
Initializes the RedditMonitor with credentials for Reddit API access and the target subreddit.
|
||||
|
||||
Parameters:
|
||||
client_id (str): The client ID for the Reddit API application.
|
||||
client_secret (str): The client secret for the Reddit API application.
|
||||
user_agent (str): The user agent string identifying the application to Reddit.
|
||||
username (str): The Reddit account username for authentication.
|
||||
password (str): The Reddit account password for authentication.
|
||||
subreddit_name (str): The name of the subreddit to monitor.
|
||||
"""
|
||||
self.reddit = praw.Reddit(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
user_agent=user_agent,
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
self.subreddit = self.reddit.subreddit(subreddit_name)
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def stream_submissions(self):
|
||||
"""
|
||||
Streams new submissions from the specified subreddit, yielding each submission
|
||||
as it becomes available.
|
||||
|
||||
Yields:
|
||||
praw.models.Submission: A submission object representing a Reddit post.
|
||||
"""
|
||||
self.log_manager.info("Starting submission stream")
|
||||
for submission in self.subreddit.stream.submissions():
|
||||
yield submission
|
||||
|
||||
def update_submissions(self, posts_to_update):
|
||||
"""
|
||||
Retrieves and yields submissions corresponding to a list of posts that need to be updated,
|
||||
identified by their Reddit IDs.
|
||||
|
||||
Parameters:
|
||||
posts_to_update (list of dict): A list of dictionaries, each containing the 'reddit_id' of a post to update.
|
||||
|
||||
Yields:
|
||||
praw.models.Submission: A submission object for each post that needs to be updated.
|
||||
"""
|
||||
self.log_manager.info("Updating submissions")
|
||||
for post in posts_to_update:
|
||||
submission = self.reddit.submission(id=post["reddit_id"])
|
||||
yield submission
|
||||
|
||||
|
||||
class SubmissionManager:
|
||||
"""
|
||||
Manages the processing of Reddit submissions, including conversion to post objects,
|
||||
checking for updates, and notifying via webhook. It integrates closely with RedditMonitor,
|
||||
PostManager, and PostAnalyticsManager to streamline the handling of new and existing submissions.
|
||||
|
||||
Attributes:
|
||||
reddit_monitor (RedditMonitor): Monitors and streams Reddit submissions.
|
||||
post_manager (PostManager): Manages post data interactions.
|
||||
post_analytics_manager (PostAnalyticsManager): Manages post analytics data.
|
||||
webhook_notifier (WebhookNotifier): Handles notifications for new or updated posts.
|
||||
log_manager (LoggingManager): Manages logging for submission processing operations.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
reddit_monitor: RedditMonitor,
|
||||
post_manager: PostManager,
|
||||
post_analytics_manager: PostAnalyticsManager,
|
||||
webhook_notifier: WebhookNotifier,
|
||||
):
|
||||
"""
|
||||
Initializes the SubmissionManager with necessary components for processing submissions.
|
||||
|
||||
Parameters:
|
||||
reddit_monitor (RedditMonitor): The component for monitoring Reddit submissions.
|
||||
post_manager (PostManager): The component for managing post data.
|
||||
post_analytics_manager (PostAnalyticsManager): The component for managing post analytics.
|
||||
WebhookNotifier: The component for sending notifications about posts.
|
||||
"""
|
||||
self.reddit_monitor = reddit_monitor
|
||||
self.post_manager = post_manager
|
||||
self.post_analytics_manager = post_analytics_manager
|
||||
self.webhook_notifier = webhook_notifier
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def convert_submission_to_post(self, submission):
|
||||
"""
|
||||
Converts a Reddit submission object into a Post object suitable for database insertion
|
||||
or analytics processing.
|
||||
|
||||
Parameters:
|
||||
submission (praw.models.Submission): The Reddit submission to convert.
|
||||
|
||||
Returns:
|
||||
Post: A Post object populated with data from the Reddit submission.
|
||||
"""
|
||||
post = Post(
|
||||
reddit_id=submission.id,
|
||||
title=submission.title,
|
||||
name=submission.name,
|
||||
url=submission.url,
|
||||
score=submission.score,
|
||||
num_comments=submission.num_comments,
|
||||
created_utc=submission.created_utc,
|
||||
selftext=submission.selftext,
|
||||
permalink=submission.permalink,
|
||||
upvote_ratio=submission.upvote_ratio,
|
||||
)
|
||||
return post
|
||||
|
||||
def process_submissions(self, submissions, update_frequency=None):
|
||||
"""
|
||||
Processes a stream of Reddit submissions, checking for their existence, updating analytics,
|
||||
and notifying via webhook if necessary. Optionally respects an update frequency to limit updates.
|
||||
|
||||
Parameters:
|
||||
submissions (Iterable[praw.models.Submission]): An iterable of Reddit submission objects to process.
|
||||
update_frequency (int, optional): The minimum frequency in seconds to update a post's analytics.
|
||||
"""
|
||||
for submission in submissions:
|
||||
if self.post_manager.post_exists(submission.id):
|
||||
if self.post_analytics_manager.check_update_requirements(
|
||||
submission.id, update_frequency
|
||||
):
|
||||
post = self.convert_submission_to_post(submission)
|
||||
self.post_analytics_manager.update_post_analytics(post)
|
||||
else:
|
||||
post = self.convert_submission_to_post(submission)
|
||||
self.post_manager.insert_post(post)
|
||||
self.post_analytics_manager.update_post_analytics(post)
|
||||
self.webhook_notifier.send_notification(post)
|
Binary file not shown.
@ -1,52 +1,26 @@
|
||||
import threading
|
||||
|
||||
|
||||
class Scheduler:
|
||||
def __init__(self):
|
||||
self.tasks = {}
|
||||
def __init__(self, interval, function):
|
||||
self.interval = interval
|
||||
self.function = function
|
||||
self.stop_event = threading.Event()
|
||||
|
||||
def add_task(self, task_name, function, interval):
|
||||
"""
|
||||
Adds a new task to the scheduler.
|
||||
def run(self):
|
||||
while not self.stop_event.wait(self.interval):
|
||||
self.function()
|
||||
|
||||
Parameters:
|
||||
task_name (str): Unique name for the task.
|
||||
function (callable): The function to run for this task.
|
||||
interval (int): Time in seconds between each execution of the task.
|
||||
"""
|
||||
task = {
|
||||
"interval": interval,
|
||||
"function": function,
|
||||
"stop_event": threading.Event(),
|
||||
"thread": threading.Thread(target=self.run_task, args=(task_name,), daemon=True)
|
||||
}
|
||||
self.tasks[task_name] = task
|
||||
task["thread"].start()
|
||||
def stop(self):
|
||||
self.stop_event.set()
|
||||
|
||||
def run_task(self, task_name):
|
||||
"""
|
||||
Executes the task in a loop until its stop event is set.
|
||||
|
||||
Parameters:
|
||||
task_name (str): The name of the task to run.
|
||||
"""
|
||||
task = self.tasks[task_name]
|
||||
while not task["stop_event"].is_set():
|
||||
task["function"]()
|
||||
task["stop_event"].wait(task["interval"])
|
||||
class ThreadManager:
|
||||
def __init__(self, target, args: tuple = ()) -> None:
|
||||
self.target = target
|
||||
self.args = args
|
||||
|
||||
def stop_task(self, task_name):
|
||||
"""
|
||||
Stops the specified task.
|
||||
|
||||
Parameters:
|
||||
task_name (str): The name of the task to stop.
|
||||
"""
|
||||
if task_name in self.tasks:
|
||||
self.tasks[task_name]["stop_event"].set()
|
||||
|
||||
def stop_all_tasks(self):
|
||||
"""
|
||||
Stops all tasks managed by the scheduler.
|
||||
"""
|
||||
for task_name in self.tasks.keys():
|
||||
self.stop_task(task_name)
|
||||
def run(self):
|
||||
thread = threading.Thread(target=self.target, args=self.args)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
@ -1,6 +1,5 @@
|
||||
import requests
|
||||
from app_log import LoggingManager
|
||||
from models import Product, Post
|
||||
|
||||
|
||||
class WebhookNotifier:
|
||||
@ -9,7 +8,7 @@ class WebhookNotifier:
|
||||
self.disable_webhook = disable_webhook
|
||||
self.log_manager = LoggingManager("scraper.log")
|
||||
|
||||
def send_notification(self, post: Post):
|
||||
def send_notification(self, post):
|
||||
title = post.title
|
||||
url = post.url
|
||||
permalink = post.permalink
|
||||
@ -23,25 +22,6 @@ class WebhookNotifier:
|
||||
if not self.disable_webhook:
|
||||
self.log_manager.log(f"Sending notification to {self.webhook_url}")
|
||||
try:
|
||||
requests.post(self.webhook_url, data={"content": content}, timeout=5)
|
||||
except Exception as e:
|
||||
self.log_manager.error(f"Failed to send notification: {e}")
|
||||
|
||||
def costco_notification(self, product : Product):
|
||||
name = product.name
|
||||
price = product.price
|
||||
product_link = product.product_link
|
||||
img_url = product.img_url
|
||||
|
||||
content = f"""
|
||||
**Costco has a new item!**
|
||||
**Name:** {name}
|
||||
**Price:** {price}
|
||||
**Link:** {product_link}
|
||||
{img_url}"""
|
||||
if not self.disable_webhook:
|
||||
self.log_manager.log(f"Sending notification to {self.webhook_url}")
|
||||
try:
|
||||
requests.post(self.webhook_url, data={"content": content}, timeout=5)
|
||||
requests.post(self.webhook_url, data={"content": content})
|
||||
except Exception as e:
|
||||
self.log_manager.error(f"Failed to send notification: {e}")
|
@ -1,4 +1,4 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-04 16:07
|
||||
# Generated by Django 5.0.2 on 2024-03-04 05:15
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
@ -1,27 +0,0 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-06 00:59
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('pokemans_app', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='CostcoProduct',
|
||||
fields=[
|
||||
('id', models.AutoField(primary_key=True, serialize=False)),
|
||||
('sku', models.CharField(max_length=255)),
|
||||
('name', models.CharField(max_length=255)),
|
||||
('price', models.CharField(max_length=255)),
|
||||
('img_url', models.CharField(max_length=555)),
|
||||
('product_link', models.CharField(max_length=555)),
|
||||
('active', models.BooleanField(default=True)),
|
||||
('created_at', models.DateTimeField(auto_now=True)),
|
||||
('updated_at', models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
),
|
||||
]
|
@ -1,18 +0,0 @@
|
||||
# Generated by Django 5.0.2 on 2024-03-06 02:26
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('pokemans_app', '0002_costcoproduct'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='costcoproduct',
|
||||
name='sku',
|
||||
field=models.CharField(max_length=255, unique=True),
|
||||
),
|
||||
]
|
@ -20,15 +20,3 @@ class PostAnalytics(models.Model):
|
||||
score = models.IntegerField()
|
||||
upvote_ratio = models.FloatField()
|
||||
created_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
|
||||
class CostcoProduct(models.Model):
|
||||
id = models.AutoField(primary_key=True)
|
||||
sku = models.CharField(max_length=255, unique=True)
|
||||
name = models.CharField(max_length=255)
|
||||
price = models.CharField(max_length=255)
|
||||
img_url = models.CharField(max_length=555)
|
||||
product_link = models.CharField(max_length=555)
|
||||
active = models.BooleanField(default=True)
|
||||
created_at = models.DateTimeField(auto_now=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
@ -1,5 +1,5 @@
|
||||
from rest_framework import serializers
|
||||
from .models import Post, PostAnalytics, CostcoProduct
|
||||
from .models import Post, PostAnalytics
|
||||
|
||||
|
||||
class PostSerializer(serializers.ModelSerializer):
|
||||
@ -11,9 +11,3 @@ class PostAnalyticsSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = PostAnalytics
|
||||
fields = '__all__'
|
||||
|
||||
|
||||
class CostcoProductSerializer(serializers.ModelSerializer):
|
||||
class Meta:
|
||||
model = CostcoProduct
|
||||
fields = '__all__'
|
@ -1,7 +1,7 @@
|
||||
from django.shortcuts import render
|
||||
from rest_framework import viewsets
|
||||
from .models import Post, PostAnalytics, CostcoProduct
|
||||
from .serializers import PostSerializer, PostAnalyticsSerializer, CostcoProductSerializer
|
||||
from .models import Post, PostAnalytics
|
||||
from .serializers import PostSerializer, PostAnalyticsSerializer
|
||||
from datetime import timedelta
|
||||
from django.utils import timezone
|
||||
from django.utils.dateparse import parse_datetime
|
||||
@ -55,21 +55,3 @@ class PostAnalyticsViewSet(viewsets.ModelViewSet):
|
||||
pass
|
||||
|
||||
return queryset
|
||||
|
||||
|
||||
class CostcoProductViewSet(viewsets.ModelViewSet):
|
||||
queryset = CostcoProduct.objects.all()
|
||||
serializer_class = CostcoProductSerializer
|
||||
|
||||
def get_queryset(self):
|
||||
queryset = CostcoProduct.objects.all()
|
||||
active = self.request.query_params.get('active', None)
|
||||
sku = self.request.query_params.get('sku', None)
|
||||
|
||||
if sku is not None:
|
||||
queryset = queryset.filter(sku=sku)
|
||||
|
||||
if active is not None:
|
||||
queryset = queryset.filter(active=active)
|
||||
|
||||
return queryset
|
@ -39,11 +39,9 @@ INSTALLED_APPS = [
|
||||
"django.contrib.staticfiles",
|
||||
"rest_framework",
|
||||
"pokemans_app",
|
||||
"corsheaders",
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
"corsheaders.middleware.CorsMiddleware",
|
||||
"django.middleware.security.SecurityMiddleware",
|
||||
"django.contrib.sessions.middleware.SessionMiddleware",
|
||||
"django.middleware.common.CommonMiddleware",
|
||||
@ -125,10 +123,3 @@ STATIC_URL = "static/"
|
||||
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
|
||||
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
|
||||
|
||||
# cors
|
||||
|
||||
CORS_ALLOWED_ORIGINS = [
|
||||
"http://localhost:3000"
|
||||
]
|
@ -17,13 +17,12 @@ Including another URLconf
|
||||
from django.contrib import admin
|
||||
from django.urls import path, include
|
||||
from rest_framework.routers import DefaultRouter
|
||||
from pokemans_app.views import PostViewSet, PostAnalyticsViewSet, CostcoProductViewSet
|
||||
from pokemans_app.views import PostViewSet, PostAnalyticsViewSet
|
||||
|
||||
|
||||
router = DefaultRouter()
|
||||
router.register(r"posts", PostViewSet)
|
||||
router.register(r"post_analytics", PostAnalyticsViewSet)
|
||||
router.register(r"costco_products", CostcoProductViewSet)
|
||||
|
||||
urlpatterns = [
|
||||
path("admin/", admin.site.urls),
|
||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user