data init idk other stuff

This commit is contained in:
2025-04-18 15:19:57 -04:00
parent 8f35cedb4a
commit 03b43ce3ab
28 changed files with 3378 additions and 810 deletions

View File

@@ -3,11 +3,33 @@ from app.services.service_manager import ServiceManager
from app.services.file_processing_service import FileProcessingService
from app.services.inventory_service import InventoryService
from app.services.file_service import FileService
from app.services.data_initialization import DataInitializationService
from app.services.external_api.tcgcsv.tcgcsv_service import TCGCSVService
from app.services.external_api.mtgjson.mtgjson_service import MTGJSONService
from app.services.label_printer_service import LabelPrinterService
from app.services.regular_printer_service import RegularPrinterService
from app.services.address_label_service import AddressLabelService
from app.services.pull_sheet_service import PullSheetService
from app.services.set_label_service import SetLabelService
from app.services.scheduler.scheduler_service import SchedulerService
from app.services.external_api.tcgplayer.order_management_service import OrderManagementService
from app.services.external_api.tcgplayer.tcgplayer_inventory_service import TCGPlayerInventoryService
__all__ = [
'BaseService',
'ServiceManager',
'FileProcessingService',
'InventoryService',
'FileService'
'FileService',
'DataInitializationService',
'TCGCSVService',
'MTGJSONService',
'LabelPrinterService',
'RegularPrinterService',
'AddressLabelService',
'PullSheetService',
'SetLabelService',
'SchedulerService',
'OrderManagementService',
'TCGPlayerInventoryService'
]

View File

@@ -1,171 +1,171 @@
import os
import json
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from typing import Optional, List, Dict, Any, Union, Generator, Callable
from sqlalchemy.orm import Session
from app.services.external_api.tcgcsv.tcgcsv_service import TCGCSVService
from app.services.external_api.mtgjson.mtgjson_service import MTGJSONService
from app.models.tcgplayer_group import TCGPlayerGroup
from app.models.tcgplayer_product import TCGPlayerProduct
from app.models.tcgplayer_category import TCGPlayerCategory
from app.services.base_service import BaseService
from app.schemas.file import FileInDB
from app.db.database import transaction
import logging
from app.models.tcgplayer_price_history import TCGPlayerPriceHistory
from sqlalchemy import and_, bindparam, update, insert
import py7zr
import shutil
class DataInitializationService:
def __init__(self, cache_dir: str = "app/data/cache/tcgcsv"):
self.cache_dir = cache_dir
self.categories_dir = os.path.join(cache_dir, "categories")
self.groups_dir = os.path.join(cache_dir, "groups")
self.products_dir = os.path.join(cache_dir, "products")
self.tcgcsv_service = TCGCSVService()
self.mtgjson_service = MTGJSONService()
# Create all necessary directories
os.makedirs(cache_dir, exist_ok=True)
os.makedirs(self.categories_dir, exist_ok=True)
os.makedirs(self.groups_dir, exist_ok=True)
os.makedirs(self.products_dir, exist_ok=True)
logger = logging.getLogger(__name__)
def _get_cache_path(self, filename: str, subdir: str) -> str:
"""Get the full path for a cached file in the specified subdirectory"""
return os.path.join(self.cache_dir, subdir, filename)
async def _cache_categories(self, categories_data: dict):
"""Cache categories data to a JSON file"""
cache_path = self._get_cache_path("categories.json", "categories")
with open(cache_path, 'w') as f:
json.dump(categories_data, f, indent=2)
class DataInitializationService(BaseService):
def __init__(self):
super().__init__(None)
async def _cache_groups(self, game_ids: List[int], groups_data: dict):
for game_id in game_ids:
cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups")
with open(cache_path, 'w') as f:
json.dump(groups_data, f, default=str)
async def _cache_products(self, game_ids: List[int], group_id: int, products_data: list):
for game_id in game_ids:
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products")
with open(cache_path, 'w') as f:
json.dump(products_data, f, default=str)
async def _load_cached_categories(self) -> Optional[dict]:
cache_path = self._get_cache_path("categories.json", "categories")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
return None
async def _load_cached_groups(self, game_ids: List[int]) -> Optional[dict]:
# Try to load cached data for any of the game IDs
for game_id in game_ids:
cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
return None
async def _load_cached_products(self, game_ids: List[int], group_id: int) -> Optional[list]:
# Try to load cached data for any of the game IDs
for game_id in game_ids:
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
return None
async def initialize_data(
async def _cache_data(
self,
db: Session,
game_ids: List[int],
use_cache: bool = True,
init_categories: bool = True,
init_groups: bool = True,
init_products: bool = True,
init_archived_prices: bool = False,
archived_prices_start_date: Optional[str] = None,
archived_prices_end_date: Optional[str] = None,
init_mtgjson: bool = True
) -> Dict[str, Any]:
"""Initialize TCGPlayer data with configurable steps"""
print("Initializing TCGPlayer data...")
results = {
"categories": 0,
"groups": {},
"products": {},
"archived_prices": False,
"mtgjson": {}
}
data: Union[dict, list],
filename: str,
subdir: str,
default_str: bool = False,
file_type: str = "json",
content_type: str = "application/json",
metadata: Optional[Dict] = None
) -> FileInDB:
"""Generic function to cache data to a JSON file"""
file_data = json.dumps(data, default=str if default_str else None, indent=2)
return await self.file_service.save_file(
db,
file_data,
filename,
subdir,
file_type=file_type,
content_type=content_type,
metadata=metadata
)
if init_categories:
print("\nInitializing categories...")
categories_data = None
if use_cache:
categories_data = await self._load_cached_categories()
async def _load_cached_data(
self,
db: Session,
filename: str
) -> Optional[Dict[str, Any]]:
"""Generic function to load cached data from a JSON file with 7-day expiration"""
file_record = await self.file_service.get_file_by_filename(db, filename)
if file_record:
# Check if cache is expired (7 days)
cache_age = datetime.now() - file_record.created_at
if cache_age.days < 7:
with open(file_record.path, 'r') as f:
return json.load(f)
else:
logger.info(f"Cache expired for {filename}, age: {cache_age.days} days")
# Delete the expired cache file
await self.file_service.delete_file(db, file_record.id)
return None
async def sync_categories(self, db: Session, categories_data: dict):
"""Sync categories data to the database using streaming for large datasets"""
categories = categories_data.get("results", [])
batch_size = 1000 # Process in batches of 1000
total_categories = len(categories)
with transaction(db):
for i in range(0, total_categories, batch_size):
batch = categories[i:i + batch_size]
for category_data in batch:
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
if existing_category:
# Update existing category
for key, value in {
"name": category_data["name"],
"display_name": category_data.get("displayName"),
"seo_category_name": category_data.get("seoCategoryName"),
"category_description": category_data.get("categoryDescription"),
"category_page_title": category_data.get("categoryPageTitle"),
"sealed_label": category_data.get("sealedLabel"),
"non_sealed_label": category_data.get("nonSealedLabel"),
"condition_guide_url": category_data.get("conditionGuideUrl"),
"is_scannable": category_data.get("isScannable", False),
"popularity": category_data.get("popularity", 0),
"is_direct": category_data.get("isDirect", False),
"modified_on": datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
}.items():
setattr(existing_category, key, value)
else:
new_category = TCGPlayerCategory(
category_id=category_data["categoryId"],
name=category_data["name"],
display_name=category_data.get("displayName"),
seo_category_name=category_data.get("seoCategoryName"),
category_description=category_data.get("categoryDescription"),
category_page_title=category_data.get("categoryPageTitle"),
sealed_label=category_data.get("sealedLabel"),
non_sealed_label=category_data.get("nonSealedLabel"),
condition_guide_url=category_data.get("conditionGuideUrl"),
is_scannable=category_data.get("isScannable", False),
popularity=category_data.get("popularity", 0),
is_direct=category_data.get("isDirect", False),
modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
)
db.add(new_category)
# Commit after each batch
db.commit()
logger.info(f"Processed {min(i + batch_size, total_categories)}/{total_categories} categories")
if not categories_data:
print("Fetching categories from API...")
categories_data = await self.tcgcsv_service.get_categories()
if use_cache:
await self._cache_categories(categories_data)
if not categories_data.get("success"):
raise Exception(f"Failed to fetch categories: {categories_data.get('errors')}")
# Sync categories to database
categories = categories_data.get("results", [])
synced_categories = []
for category_data in categories:
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
if existing_category:
synced_categories.append(existing_category)
else:
new_category = TCGPlayerCategory(
category_id=category_data["categoryId"],
name=category_data["name"],
display_name=category_data.get("displayName"),
seo_category_name=category_data.get("seoCategoryName"),
category_description=category_data.get("categoryDescription"),
category_page_title=category_data.get("categoryPageTitle"),
sealed_label=category_data.get("sealedLabel"),
non_sealed_label=category_data.get("nonSealedLabel"),
condition_guide_url=category_data.get("conditionGuideUrl"),
is_scannable=category_data.get("isScannable", False),
popularity=category_data.get("popularity", 0),
is_direct=category_data.get("isDirect", False),
modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
)
db.add(new_category)
synced_categories.append(new_category)
db.commit()
results["categories"] = len(synced_categories)
print(f"Synced {len(synced_categories)} categories")
# Process each game ID separately
for game_id in game_ids:
print(f"\nProcessing game ID: {game_id}")
results["groups"][game_id] = 0
results["products"][game_id] = {}
if init_groups:
print(f"Initializing groups for game ID {game_id}...")
groups_data = None
if use_cache:
groups_data = await self._load_cached_groups([game_id])
if not groups_data:
print(f"Fetching groups for game ID {game_id} from API...")
groups_data = await self.tcgcsv_service.get_groups([game_id])
if use_cache:
await self._cache_groups([game_id], groups_data)
if not groups_data.get("success"):
raise Exception(f"Failed to fetch groups for game ID {game_id}: {groups_data.get('errors')}")
# Sync groups to database
groups = groups_data.get("results", [])
synced_groups = []
for group_data in groups:
async def init_categories(self, db: Session, use_cache: bool = True) -> bool:
"""Initialize categories data"""
logger.info("Starting categories initialization")
if use_cache:
categories_data = await self._load_cached_data(db, "categories.json")
if categories_data:
await self.sync_categories(db, categories_data)
logger.info("Categories initialized from cache")
return True
else:
logger.warning("No cached categories data found")
return False
else:
tcgcsv_service = self.get_service('tcgcsv')
categories_data = await tcgcsv_service.get_categories()
# Save the categories data
await self._cache_data(
db,
categories_data,
"categories.json",
"tcgcsv/categories",
file_type="json",
content_type="application/json"
)
await self.sync_categories(db, categories_data)
logger.info("Categories initialized from API")
return True
async def sync_groups(self, db: Session, groups_data: dict):
"""Sync groups data to the database using streaming for large datasets"""
groups = groups_data.get("results", [])
batch_size = 1000 # Process in batches of 1000
total_groups = len(groups)
with transaction(db):
for i in range(0, total_groups, batch_size):
batch = groups[i:i + batch_size]
for group_data in batch:
existing_group = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.group_id == group_data["groupId"]).first()
if existing_group:
synced_groups.append(existing_group)
# Update existing group
for key, value in {
"name": group_data["name"],
"abbreviation": group_data.get("abbreviation"),
"is_supplemental": group_data.get("isSupplemental", False),
"published_on": datetime.fromisoformat(group_data["publishedOn"].replace("Z", "+00:00")) if group_data.get("publishedOn") else None,
"modified_on": datetime.fromisoformat(group_data["modifiedOn"].replace("Z", "+00:00")) if group_data.get("modifiedOn") else None,
"category_id": group_data.get("categoryId")
}.items():
setattr(existing_group, key, value)
else:
new_group = TCGPlayerGroup(
group_id=group_data["groupId"],
@@ -177,88 +177,561 @@ class DataInitializationService:
category_id=group_data.get("categoryId")
)
db.add(new_group)
synced_groups.append(new_group)
# Commit after each batch
db.commit()
results["groups"][game_id] = len(synced_groups)
print(f"Synced {len(synced_groups)} groups for game ID {game_id}")
logger.info(f"Processed {min(i + batch_size, total_groups)}/{total_groups} groups")
if init_products:
# Handle products for each group in this game ID
for group in synced_groups:
print(f"Initializing products for group {group.name} (game ID {game_id})...")
products_data = None
if use_cache:
products_data = await self._load_cached_products([game_id], group.group_id)
async def init_groups(self, db: Session, use_cache: bool = True, game_ids: List[int] = None) -> bool:
"""Initialize groups data"""
logger.info(f"Starting groups initialization for game IDs: {game_ids}")
tcgcsv_service = self.get_service('tcgcsv')
for game_id in game_ids:
if use_cache:
groups_data = await self._load_cached_data(db, f"groups_{game_id}.json")
if groups_data:
await self.sync_groups(db, groups_data)
logger.info(f"Groups initialized from cache for game ID {game_id}")
else:
logger.warning(f"No cached groups data found for game ID {game_id}")
return False
else:
groups_data = await tcgcsv_service.get_groups(game_id)
# Save the groups data
await self._cache_data(
db,
groups_data,
f"groups_{game_id}.json",
"tcgcsv/groups",
file_type="json",
content_type="application/json"
)
await self.sync_groups(db, groups_data)
logger.info(f"Groups initialized from API for game ID {game_id}")
return True
async def sync_products(self, db: Session, products_data: str):
"""Sync products data to the database using streaming for large datasets"""
import csv
import io
# Parse CSV data
csv_reader = csv.DictReader(io.StringIO(products_data))
products_list = list(csv_reader)
batch_size = 1000 # Process in batches of 1000
total_products = len(products_list)
with transaction(db):
for i in range(0, total_products, batch_size):
batch = products_list[i:i + batch_size]
for product_data in batch:
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == product_data["productId"]).first()
if existing_product:
# Update existing product
for key, value in {
"name": product_data["name"],
"clean_name": product_data.get("cleanName"),
"image_url": product_data.get("imageUrl"),
"category_id": product_data.get("categoryId"),
"group_id": product_data.get("groupId"),
"url": product_data.get("url"),
"modified_on": datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
"image_count": product_data.get("imageCount", 0),
"ext_rarity": product_data.get("extRarity"),
"ext_number": product_data.get("extNumber"),
"low_price": float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
"mid_price": float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
"high_price": float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
"market_price": float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
"direct_low_price": float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
"sub_type_name": product_data.get("subTypeName")
}.items():
setattr(existing_product, key, value)
else:
new_product = TCGPlayerProduct(
product_id=product_data["productId"],
name=product_data["name"],
clean_name=product_data.get("cleanName"),
image_url=product_data.get("imageUrl"),
category_id=product_data.get("categoryId"),
group_id=product_data.get("groupId"),
url=product_data.get("url"),
modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
image_count=product_data.get("imageCount", 0),
ext_rarity=product_data.get("extRarity"),
ext_subtype=product_data.get("extSubtype"),
ext_oracle_text=product_data.get("extOracleText"),
ext_number=product_data.get("extNumber"),
low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
sub_type_name=product_data.get("subTypeName"),
ext_power=product_data.get("extPower"),
ext_toughness=product_data.get("extToughness"),
ext_flavor_text=product_data.get("extFlavorText")
)
db.add(new_product)
# Commit after each batch
db.commit()
logger.info(f"Processed {min(i + batch_size, total_products)}/{total_products} products")
if not products_data:
print(f"Fetching products for group {group.name} (game ID {game_id}) from API...")
products_data = await self.tcgcsv_service.get_products_and_prices([game_id], group.group_id)
if use_cache:
await self._cache_products([game_id], group.group_id, products_data)
async def init_products(self, db: Session, use_cache: bool = True, game_ids: List[int] = None) -> bool:
"""Initialize products data"""
logger.info(f"Starting products initialization for game IDs: {game_ids}")
tcgcsv_service = self.get_service('tcgcsv')
for game_id in game_ids:
groups = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.category_id == game_id).all()
logger.info(f"Processing {len(groups)} groups for game ID {game_id}")
for group in groups:
if use_cache:
products_data = await self._load_cached_data(db, f"products_{game_id}_{group.group_id}.json")
if products_data:
await self.sync_products(db, products_data)
logger.info(f"Products initialized from cache for group {group.group_id}")
else:
logger.warning(f"No cached products data found for group {group.group_id}")
continue
else:
# Get CSV data from API
csv_data = await tcgcsv_service.get_products_and_prices(game_id, group.group_id)
# Save the CSV file
await self.file_service.save_file(
db,
csv_data,
f"products_{game_id}_{group.group_id}.csv",
"tcgcsv/products",
file_type="csv",
content_type="text/csv"
)
# Parse and sync the CSV data
await self.sync_products(db, csv_data)
logger.info(f"Products initialized from API for group {group.group_id}")
return True
# Sync products to database
synced_products = []
for product_data in products_data:
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == int(product_data["productId"])).first()
if existing_product:
synced_products.append(existing_product)
else:
new_product = TCGPlayerProduct(
product_id=int(product_data["productId"]),
name=product_data["name"],
clean_name=product_data.get("cleanName"),
image_url=product_data.get("imageUrl"),
category_id=int(product_data["categoryId"]),
group_id=int(product_data["groupId"]),
url=product_data.get("url"),
modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
image_count=int(product_data.get("imageCount", 0)),
ext_rarity=product_data.get("extRarity"),
ext_number=product_data.get("extNumber"),
low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
sub_type_name=product_data.get("subTypeName")
)
db.add(new_product)
synced_products.append(new_product)
db.commit()
results["products"][game_id][group.group_id] = len(synced_products)
print(f"Synced {len(synced_products)} products for group {group.name} (game ID {game_id})")
if init_archived_prices:
if not archived_prices_start_date or not archived_prices_end_date:
raise ValueError("Both start_date and end_date are required for archived prices initialization")
print(f"\nInitializing archived prices from {archived_prices_start_date} to {archived_prices_end_date}...")
await self.tcgcsv_service.get_archived_prices_for_date_range(archived_prices_start_date, archived_prices_end_date)
results["archived_prices"] = True
print("Archived prices initialization completed")
if init_mtgjson:
print("\nInitializing MTGJSON data...")
identifiers_result = await self.mtgjson_service.download_and_process_identifiers(db)
skus_result = await self.mtgjson_service.download_and_process_skus(db)
results["mtgjson"] = {
"cards_processed": identifiers_result["cards_processed"],
"skus_processed": skus_result["skus_processed"]
async def sync_archived_prices(self, db: Session, archived_prices_data: dict, date: datetime):
"""Sync archived prices data to the database using bulk operations.
Note: Historical prices are never updated, only new records are inserted."""
from sqlalchemy import insert
from app.models.tcgplayer_price_history import TCGPlayerPriceHistory
# Prepare data for bulk operations
price_records = []
for price_data in archived_prices_data.get("results", []):
record = {
"product_id": price_data["productId"],
"date": date,
"sub_type_name": price_data["subTypeName"],
"low_price": price_data.get("lowPrice"),
"mid_price": price_data.get("midPrice"),
"high_price": price_data.get("highPrice"),
"market_price": price_data.get("marketPrice"),
"direct_low_price": price_data.get("directLowPrice")
}
price_records.append(record)
if not price_records:
return
# Get existing records in bulk to avoid duplicates
product_ids = [r["product_id"] for r in price_records]
sub_type_names = [r["sub_type_name"] for r in price_records]
existing_records = db.query(TCGPlayerPriceHistory).filter(
TCGPlayerPriceHistory.product_id.in_(product_ids),
TCGPlayerPriceHistory.date == date,
TCGPlayerPriceHistory.sub_type_name.in_(sub_type_names)
).all()
# Filter out existing records
existing_keys = {(r.product_id, r.date, r.sub_type_name) for r in existing_records}
to_insert = [
record for record in price_records
if (record["product_id"], record["date"], record["sub_type_name"]) not in existing_keys
]
# Perform bulk insert for new records only
if to_insert:
stmt = insert(TCGPlayerPriceHistory)
db.execute(stmt, to_insert)
db.commit()
async def init_archived_prices(self, db: Session, start_date: datetime, end_date: datetime, use_cache: bool = True, game_ids: List[int] = None) -> bool:
"""Initialize archived prices data"""
logger.info(f"Starting archived prices initialization from {start_date} to {end_date}")
tcgcsv_service = self.get_service('tcgcsv')
processed_dates = await tcgcsv_service.get_tcgcsv_date_range(start_date, end_date)
logger.info(f"Processing {len(processed_dates)} dates")
# Convert game_ids to set for faster lookups
desired_game_ids = set(game_ids) if game_ids else set()
for date in processed_dates:
date_path = f"app/data/cache/tcgcsv/prices/{date}"
# Check if we already have the data for this date
if use_cache and os.path.exists(date_path):
logger.info(f"Using cached price data for {date}")
else:
logger.info(f"Downloading and processing archived prices for {date}")
# Download and extract the archive
archive_data = await tcgcsv_service.get_archived_prices_for_date(date)
# Save the archive file
file_record = await self.file_service.save_file(
db,
archive_data,
f"prices-{date}.ppmd.7z",
"tcgcsv/prices/zip",
file_type="application/x-7z-compressed",
content_type="application/x-7z-compressed"
)
# Extract the 7z file to a temporary directory
temp_extract_path = f"app/data/cache/tcgcsv/prices/temp_{date}"
os.makedirs(temp_extract_path, exist_ok=True)
with py7zr.SevenZipFile(file_record.path, 'r') as archive:
archive.extractall(path=temp_extract_path)
# Find the date subdirectory in the temp directory
date_subdir = os.path.join(temp_extract_path, str(date))
if os.path.exists(date_subdir):
# Remove existing directory if it exists
if os.path.exists(date_path):
shutil.rmtree(date_path)
# Create the destination directory
os.makedirs(date_path, exist_ok=True)
# Move contents from the date subdirectory to the final path
for item in os.listdir(date_subdir):
src = os.path.join(date_subdir, item)
dst = os.path.join(date_path, item)
os.rename(src, dst)
# Clean up the temporary directory
os.rmdir(date_subdir)
os.rmdir(temp_extract_path)
# Process each category directory
for category_id in os.listdir(date_path):
# Skip categories that aren't in our desired game IDs
if int(category_id) not in desired_game_ids:
continue
category_path = os.path.join(date_path, category_id)
if not os.path.isdir(category_path):
continue
# Process each group directory
for group_id in os.listdir(category_path):
group_path = os.path.join(category_path, group_id)
if not os.path.isdir(group_path):
continue
# Process the prices file
prices_file = os.path.join(group_path, "prices")
if not os.path.exists(prices_file):
continue
try:
with open(prices_file, 'r') as f:
price_data = json.load(f)
if price_data.get("success"):
await self.sync_archived_prices(db, price_data, datetime.strptime(date, "%Y-%m-%d"))
logger.info(f"Processed prices for category {category_id}, group {group_id} on {date}")
except Exception as e:
logger.error(f"Error processing prices file {prices_file}: {str(e)}")
continue
return True
async def init_mtgjson(self, db: Session, use_cache: bool = True) -> Dict[str, Any]:
"""Initialize MTGJSON data"""
logger.info("Starting MTGJSON initialization")
mtgjson_service = self.get_service('mtgjson')
identifiers_count = 0
skus_count = 0
# Process identifiers
if use_cache:
cached_file = await self.file_service.get_file_by_filename(db, "mtgjson_identifiers.json")
if cached_file and os.path.exists(cached_file.path):
logger.info("MTGJSON identifiers initialized from cache")
identifiers_count = await self._process_streamed_data(
db,
self._stream_json_file(cached_file.path),
"mtgjson_identifiers.json",
"mtgjson",
self.sync_mtgjson_identifiers
)
else:
logger.info("Downloading MTGJSON identifiers from API")
identifiers_count = await self._process_streamed_data(
db,
await mtgjson_service.get_identifiers(db),
"mtgjson_identifiers.json",
"mtgjson",
self.sync_mtgjson_identifiers
)
else:
logger.info("Downloading MTGJSON identifiers from API")
identifiers_count = await self._process_streamed_data(
db,
await mtgjson_service.get_identifiers(db),
"mtgjson_identifiers.json",
"mtgjson",
self.sync_mtgjson_identifiers
)
# Process SKUs
if use_cache:
cached_file = await self.file_service.get_file_by_filename(db, "mtgjson_skus.json")
if cached_file and os.path.exists(cached_file.path):
logger.info("MTGJSON SKUs initialized from cache")
skus_count = await self._process_streamed_data(
db,
self._stream_json_file(cached_file.path),
"mtgjson_skus.json",
"mtgjson",
self.sync_mtgjson_skus
)
else:
logger.info("Downloading MTGJSON SKUs from API")
skus_count = await self._process_streamed_data(
db,
await mtgjson_service.get_skus(db),
"mtgjson_skus.json",
"mtgjson",
self.sync_mtgjson_skus
)
else:
logger.info("Downloading MTGJSON SKUs from API")
skus_count = await self._process_streamed_data(
db,
await mtgjson_service.get_skus(db),
"mtgjson_skus.json",
"mtgjson",
self.sync_mtgjson_skus
)
return {
"identifiers_processed": identifiers_count,
"skus_processed": skus_count
}
async def _process_streamed_data(
self,
db: Session,
data_stream: Generator[Dict[str, Any], None, None],
filename: str,
subdir: str,
sync_func: Callable
) -> int:
"""Process streamed data and sync to database"""
count = 0
items = []
batch_size = 1000
for item in data_stream:
if item["type"] == "meta":
# Handle meta data separately
continue
count += 1
items.append(item["data"])
# Process in batches
if len(items) >= batch_size:
await sync_func(db, items)
items = []
# Process any remaining items
if items:
await sync_func(db, items)
return count
async def sync_mtgjson_identifiers(self, db: Session, identifiers_data: dict):
"""Sync MTGJSON identifiers data to the database"""
from app.models.mtgjson_card import MTGJSONCard
with transaction(db):
for card_id, card_data in identifiers_data.items():
existing_card = db.query(MTGJSONCard).filter(MTGJSONCard.card_id == card_id).first()
if existing_card:
# Update existing card
for key, value in {
"name": card_data.get("name"),
"set_code": card_data.get("setCode"),
"uuid": card_data.get("uuid"),
"abu_id": card_data.get("identifiers", {}).get("abuId"),
"card_kingdom_etched_id": card_data.get("identifiers", {}).get("cardKingdomEtchedId"),
"card_kingdom_foil_id": card_data.get("identifiers", {}).get("cardKingdomFoilId"),
"card_kingdom_id": card_data.get("identifiers", {}).get("cardKingdomId"),
"cardsphere_id": card_data.get("identifiers", {}).get("cardsphereId"),
"cardsphere_foil_id": card_data.get("identifiers", {}).get("cardsphereFoilId"),
"cardtrader_id": card_data.get("identifiers", {}).get("cardtraderId"),
"csi_id": card_data.get("identifiers", {}).get("csiId"),
"mcm_id": card_data.get("identifiers", {}).get("mcmId"),
"mcm_meta_id": card_data.get("identifiers", {}).get("mcmMetaId"),
"miniaturemarket_id": card_data.get("identifiers", {}).get("miniaturemarketId"),
"mtg_arena_id": card_data.get("identifiers", {}).get("mtgArenaId"),
"mtgjson_foil_version_id": card_data.get("identifiers", {}).get("mtgjsonFoilVersionId"),
"mtgjson_non_foil_version_id": card_data.get("identifiers", {}).get("mtgjsonNonFoilVersionId"),
"mtgjson_v4_id": card_data.get("identifiers", {}).get("mtgjsonV4Id"),
"mtgo_foil_id": card_data.get("identifiers", {}).get("mtgoFoilId"),
"mtgo_id": card_data.get("identifiers", {}).get("mtgoId"),
"multiverse_id": card_data.get("identifiers", {}).get("multiverseId"),
"scg_id": card_data.get("identifiers", {}).get("scgId"),
"scryfall_id": card_data.get("identifiers", {}).get("scryfallId"),
"scryfall_card_back_id": card_data.get("identifiers", {}).get("scryfallCardBackId"),
"scryfall_oracle_id": card_data.get("identifiers", {}).get("scryfallOracleId"),
"scryfall_illustration_id": card_data.get("identifiers", {}).get("scryfallIllustrationId"),
"tcgplayer_product_id": card_data.get("identifiers", {}).get("tcgplayerProductId"),
"tcgplayer_etched_product_id": card_data.get("identifiers", {}).get("tcgplayerEtchedProductId"),
"tnt_id": card_data.get("identifiers", {}).get("tntId")
}.items():
setattr(existing_card, key, value)
else:
new_card = MTGJSONCard(
card_id=card_id,
name=card_data.get("name"),
set_code=card_data.get("setCode"),
uuid=card_data.get("uuid"),
abu_id=card_data.get("identifiers", {}).get("abuId"),
card_kingdom_etched_id=card_data.get("identifiers", {}).get("cardKingdomEtchedId"),
card_kingdom_foil_id=card_data.get("identifiers", {}).get("cardKingdomFoilId"),
card_kingdom_id=card_data.get("identifiers", {}).get("cardKingdomId"),
cardsphere_id=card_data.get("identifiers", {}).get("cardsphereId"),
cardsphere_foil_id=card_data.get("identifiers", {}).get("cardsphereFoilId"),
cardtrader_id=card_data.get("identifiers", {}).get("cardtraderId"),
csi_id=card_data.get("identifiers", {}).get("csiId"),
mcm_id=card_data.get("identifiers", {}).get("mcmId"),
mcm_meta_id=card_data.get("identifiers", {}).get("mcmMetaId"),
miniaturemarket_id=card_data.get("identifiers", {}).get("miniaturemarketId"),
mtg_arena_id=card_data.get("identifiers", {}).get("mtgArenaId"),
mtgjson_foil_version_id=card_data.get("identifiers", {}).get("mtgjsonFoilVersionId"),
mtgjson_non_foil_version_id=card_data.get("identifiers", {}).get("mtgjsonNonFoilVersionId"),
mtgjson_v4_id=card_data.get("identifiers", {}).get("mtgjsonV4Id"),
mtgo_foil_id=card_data.get("identifiers", {}).get("mtgoFoilId"),
mtgo_id=card_data.get("identifiers", {}).get("mtgoId"),
multiverse_id=card_data.get("identifiers", {}).get("multiverseId"),
scg_id=card_data.get("identifiers", {}).get("scgId"),
scryfall_id=card_data.get("identifiers", {}).get("scryfallId"),
scryfall_card_back_id=card_data.get("identifiers", {}).get("scryfallCardBackId"),
scryfall_oracle_id=card_data.get("identifiers", {}).get("scryfallOracleId"),
scryfall_illustration_id=card_data.get("identifiers", {}).get("scryfallIllustrationId"),
tcgplayer_product_id=card_data.get("identifiers", {}).get("tcgplayerProductId"),
tcgplayer_etched_product_id=card_data.get("identifiers", {}).get("tcgplayerEtchedProductId"),
tnt_id=card_data.get("identifiers", {}).get("tntId")
)
db.add(new_card)
async def sync_mtgjson_skus(self, db: Session, skus_data: dict):
"""Sync MTGJSON SKUs data to the database"""
from app.models.mtgjson_sku import MTGJSONSKU
with transaction(db):
for card_uuid, sku_list in skus_data.items():
for sku in sku_list:
# Handle case where sku is a string (skuId)
if isinstance(sku, str):
sku_id = sku
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == sku_id).first()
if existing_sku:
# Update existing SKU
existing_sku.card_id = card_uuid
else:
new_sku = MTGJSONSKU(
sku_id=sku_id,
card_id=card_uuid
)
db.add(new_sku)
# Handle case where sku is a dictionary
else:
sku_id = str(sku.get("skuId"))
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == sku_id).first()
if existing_sku:
# Update existing SKU
for key, value in {
"product_id": str(sku.get("productId")),
"condition": sku.get("condition"),
"finish": sku.get("finish"),
"language": sku.get("language"),
"printing": sku.get("printing"),
"card_id": card_uuid
}.items():
setattr(existing_sku, key, value)
else:
new_sku = MTGJSONSKU(
sku_id=sku_id,
product_id=str(sku.get("productId")),
condition=sku.get("condition"),
finish=sku.get("finish"),
language=sku.get("language"),
printing=sku.get("printing"),
card_id=card_uuid
)
db.add(new_sku)
async def initialize_data(
self,
db: Session,
game_ids: List[int],
use_cache: bool = False,
init_categories: bool = True,
init_groups: bool = True,
init_products: bool = True,
init_archived_prices: bool = True,
archived_prices_start_date: Optional[str] = None,
archived_prices_end_date: Optional[str] = None,
init_mtgjson: bool = True
) -> Dict[str, Any]:
"""Initialize 3rd party API data loads with configurable steps"""
logger.info("Starting data initialization process")
results = {}
if init_categories:
logger.info("Initializing categories...")
results["categories"] = await self.init_categories(db, use_cache)
if init_groups:
logger.info("Initializing groups...")
results["groups"] = await self.init_groups(db, use_cache, game_ids)
if init_products:
logger.info("Initializing products...")
results["products"] = await self.init_products(db, use_cache, game_ids)
if init_archived_prices:
logger.info("Initializing archived prices...")
results["archived_prices"] = await self.init_archived_prices(
db,
archived_prices_start_date,
archived_prices_end_date,
use_cache,
game_ids
)
if init_mtgjson:
logger.info("Initializing MTGJSON data...")
results["mtgjson"] = await self.init_mtgjson(db, use_cache)
logger.info("Data initialization completed")
return results
async def clear_cache(self) -> None:
async def clear_cache(self, db: Session) -> None:
"""Clear all cached data"""
# Delete all files in categories, groups, and products directories
for subdir in ["categories", "groups", "products"]:
dir_path = os.path.join(self.cache_dir, subdir)
if os.path.exists(dir_path):
for filename in os.listdir(dir_path):
file_path = os.path.join(dir_path, filename)
if os.path.isfile(file_path):
os.unlink(file_path)
files = await self.file_service.list_files(db, file_type="json")
for file in files:
if file.path.startswith(subdir):
await self.file_service.delete_file(db, file.id)
await self.mtgjson_service.clear_cache()
print("Cache cleared")
async def close(self):
await self.tcgcsv_service.close()
print("Cache cleared")

View File

@@ -92,24 +92,3 @@ class BaseExternalService:
def file_service(self):
"""Convenience property for file service"""
return self.get_service('file')
async def save_file(self, db: Session, file_data: Union[bytes, list[dict]], file_name: str, subdir: str, file_type: Optional[str] = None) -> FileInDB:
"""Save a file using the FileService"""
if isinstance(file_data, list):
# Convert list of dictionaries to CSV bytes
output = io.StringIO()
writer = csv.DictWriter(output, fieldnames=file_data[0].keys())
writer.writeheader()
writer.writerows(file_data)
file_data = output.getvalue().encode('utf-8')
file_type = file_type or 'text/csv'
# Use FileService to save the file
file_service = self.get_service('file')
return await file_service.save_file(
db=db,
file_data=file_data,
filename=file_name,
subdir=subdir,
file_type=file_type
)

View File

@@ -1,29 +1,24 @@
import os
import json
import zipfile
import aiohttp
import asyncio
import time
import sys
from typing import Dict, Any, Optional, Generator
from sqlalchemy.orm import Session
from datetime import datetime
from app.models.mtgjson_card import MTGJSONCard
from app.models.mtgjson_sku import MTGJSONSKU
from app.db.database import get_db, transaction
from app.services.external_api.base_external_service import BaseExternalService
from app.schemas.file import FileInDB
import logging
logger = logging.getLogger(__name__)
class MTGJSONService(BaseExternalService):
def __init__(self, cache_dir: str = "app/data/cache/mtgjson", batch_size: int = 1000):
def __init__(self, cache_dir: str = "app/data/cache/mtgjson"):
super().__init__(base_url="https://mtgjson.com/api/v5/")
# Ensure the cache directory exists
os.makedirs(cache_dir, exist_ok=True)
self.cache_dir = cache_dir
self.identifiers_dir = os.path.join(cache_dir, "identifiers")
self.skus_dir = os.path.join(cache_dir, "skus")
self.batch_size = batch_size
# Create necessary directories
os.makedirs(cache_dir, exist_ok=True)
# Ensure subdirectories exist
os.makedirs(self.identifiers_dir, exist_ok=True)
os.makedirs(self.skus_dir, exist_ok=True)
@@ -46,112 +41,133 @@ class MTGJSONService(BaseExternalService):
print(f"Downloading {url}...")
start_time = time.time()
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
if response.status == 200:
file_data = await response.read()
return await self.save_file(
db=db,
file_data=file_data,
file_name=filename,
subdir=f"mtgjson/{subdir}",
file_type=response.headers.get('content-type', 'application/octet-stream')
)
else:
raise Exception(f"Failed to download file from {url}. Status: {response.status}")
# Use the base external service's _make_request method
file_data = await self._make_request(
method="GET",
endpoint=url.replace(self.base_url, ""),
binary=True
)
# Save the file using the file service
return await self.file_service.save_file(
db=db,
file_data=file_data,
filename=filename,
subdir=f"mtgjson/{subdir}",
file_type="application/zip",
content_type="application/zip"
)
async def _unzip_file(self, zip_path: str, extract_dir: str) -> str:
"""Unzip a file to the specified directory and return the path to the extracted JSON file"""
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
json_filename = zip_ref.namelist()[0]
zip_ref.extractall(extract_dir)
return os.path.join(extract_dir, json_filename)
async def _unzip_file(self, file_record: FileInDB, subdir: str, db: Session) -> str:
"""Unzip a file to the specified subdirectory and return the path to the extracted JSON file"""
try:
# Use the appropriate subdirectory based on the type
extract_path = self.identifiers_dir if subdir == "identifiers" else self.skus_dir
os.makedirs(extract_path, exist_ok=True)
with zipfile.ZipFile(file_record.path, 'r') as zip_ref:
json_filename = zip_ref.namelist()[0]
zip_ref.extractall(extract_path)
json_path = os.path.join(extract_path, json_filename)
# Create a file record for the extracted JSON file
with open(json_path, 'r') as f:
json_data = f.read()
json_file_record = await self.file_service.save_file(
db=db,
file_data=json_data,
filename=json_filename,
subdir=f"mtgjson/{subdir}",
file_type="application/json",
content_type="application/json"
)
return str(json_file_record.path)
except Exception as e:
logger.error(f"Error unzipping file: {e}")
raise
def _stream_json_file(self, file_path: str) -> Generator[Dict[str, Any], None, None]:
"""Stream a JSON file and yield items one at a time"""
print(f"Starting to stream JSON file: {file_path}")
with open(file_path, 'r') as f:
# Load the entire file since MTGJSON uses a specific format
data = json.load(f)
# First yield the meta data
if "meta" in data:
yield {"type": "meta", "data": data["meta"]}
# Then yield each item in the data section
if "data" in data:
for key, value in data["data"].items():
yield {"type": "item", "data": {key: value}}
async def _process_batch(self, db: Session, items: list, model_class) -> int:
"""Process a batch of items and add them to the database"""
processed = 0
with transaction(db):
for item in items:
if model_class == MTGJSONCard:
# Check if card already exists
existing_card = db.query(MTGJSONCard).filter(MTGJSONCard.card_id == item["card_id"]).first()
if existing_card:
"""Stream a JSON file and yield items one at a time using a streaming parser"""
logger.info(f"Starting to stream JSON file: {file_path}")
try:
with open(file_path, 'r') as f:
# First, we need to find the start of the data section
data_started = False
current_key = None
current_value = []
brace_count = 0
for line in f:
line = line.strip()
if not line:
continue
new_item = MTGJSONCard(
card_id=item["card_id"],
name=item["name"],
set_code=item["set_code"],
uuid=item["uuid"],
abu_id=item.get("abu_id"),
card_kingdom_etched_id=item.get("card_kingdom_etched_id"),
card_kingdom_foil_id=item.get("card_kingdom_foil_id"),
card_kingdom_id=item.get("card_kingdom_id"),
cardsphere_id=item.get("cardsphere_id"),
cardsphere_foil_id=item.get("cardsphere_foil_id"),
cardtrader_id=item.get("cardtrader_id"),
csi_id=item.get("csi_id"),
mcm_id=item.get("mcm_id"),
mcm_meta_id=item.get("mcm_meta_id"),
miniaturemarket_id=item.get("miniaturemarket_id"),
mtg_arena_id=item.get("mtg_arena_id"),
mtgjson_foil_version_id=item.get("mtgjson_foil_version_id"),
mtgjson_non_foil_version_id=item.get("mtgjson_non_foil_version_id"),
mtgjson_v4_id=item.get("mtgjson_v4_id"),
mtgo_foil_id=item.get("mtgo_foil_id"),
mtgo_id=item.get("mtgo_id"),
multiverse_id=item.get("multiverse_id"),
scg_id=item.get("scg_id"),
scryfall_id=item.get("scryfall_id"),
scryfall_card_back_id=item.get("scryfall_card_back_id"),
scryfall_oracle_id=item.get("scryfall_oracle_id"),
scryfall_illustration_id=item.get("scryfall_illustration_id"),
tcgplayer_product_id=item.get("tcgplayer_product_id"),
tcgplayer_etched_product_id=item.get("tcgplayer_etched_product_id"),
tnt_id=item.get("tnt_id")
)
else: # MTGJSONSKU
# Check if SKU already exists
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == item["sku_id"]).first()
if existing_sku:
continue
new_item = MTGJSONSKU(
sku_id=str(item["sku_id"]),
product_id=str(item["product_id"]),
condition=item["condition"],
finish=item["finish"],
language=item["language"],
printing=item["printing"],
card_id=item["card_id"]
)
db.add(new_item)
processed += 1
if not data_started:
if '"data":' in line:
data_started = True
# Skip the opening brace of the data object
line = line[line.find('"data":') + 7:].strip()
if line.startswith('{'):
line = line[1:].strip()
else:
# Yield meta data if found
if '"meta":' in line:
meta_start = line.find('"meta":') + 7
meta_end = line.rfind('}')
if meta_end > meta_start:
meta_json = line[meta_start:meta_end + 1]
try:
meta_data = json.loads(meta_json)
yield {"type": "meta", "data": meta_data}
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse meta data: {e}")
continue
# Process the data section
if data_started:
if not current_key:
# Look for a new key
if '"' in line:
key_start = line.find('"') + 1
key_end = line.find('"', key_start)
if key_end > key_start:
current_key = line[key_start:key_end]
# Get the rest of the line after the key
line = line[key_end + 1:].strip()
if ':' in line:
line = line[line.find(':') + 1:].strip()
if current_key:
# Accumulate the value
current_value.append(line)
brace_count += line.count('{') - line.count('}')
if brace_count == 0 and line.endswith(','):
# We have a complete value
value_str = ''.join(current_value).rstrip(',')
try:
value = json.loads(value_str)
yield {"type": "item", "data": {current_key: value}}
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse value for key {current_key}: {e}")
current_key = None
current_value = []
except Exception as e:
logger.error(f"Error streaming JSON file: {e}")
raise
return processed
async def download_and_process_identifiers(self, db: Session) -> Dict[str, int]:
"""Download, unzip and process AllIdentifiers.json.zip using streaming"""
self._print_progress("Starting MTGJSON identifiers processing...")
start_time = time.time()
async def get_identifiers(self, db: Session) -> Generator[Dict[str, Any], None, None]:
"""Download and get MTGJSON identifiers data"""
# Check if we have a cached version
cached_file = await self.file_service.get_file_by_filename(db, "AllIdentifiers.json")
if cached_file:
# Ensure the file exists at the path
if os.path.exists(cached_file.path):
return self._stream_json_file(cached_file.path)
# Download the file using FileService
# Download and process the file
file_record = await self._download_file(
db=db,
url="https://mtgjson.com/api/v5/AllIdentifiers.json.zip",
@@ -159,87 +175,22 @@ class MTGJSONService(BaseExternalService):
subdir="identifiers"
)
# Get the file path from the database record
zip_path = file_record.path
# Unzip and process the file
json_path = await self._unzip_file(file_record, "identifiers", db)
cards_processed = 0
current_batch = []
total_cards = 0
last_progress_time = time.time()
self._print_progress("Processing cards...")
try:
for item in self._stream_json_file(zip_path):
if item["type"] == "meta":
self._print_progress(f"Processing MTGJSON data version {item['data'].get('version')} from {item['data'].get('date')}")
continue
card_data = item["data"]
card_id = list(card_data.keys())[0]
card_info = card_data[card_id]
total_cards += 1
current_batch.append({
"card_id": card_id,
"name": card_info.get("name"),
"set_code": card_info.get("setCode"),
"uuid": card_info.get("uuid"),
"abu_id": card_info.get("identifiers", {}).get("abuId"),
"card_kingdom_etched_id": card_info.get("identifiers", {}).get("cardKingdomEtchedId"),
"card_kingdom_foil_id": card_info.get("identifiers", {}).get("cardKingdomFoilId"),
"card_kingdom_id": card_info.get("identifiers", {}).get("cardKingdomId"),
"cardsphere_id": card_info.get("identifiers", {}).get("cardsphereId"),
"cardsphere_foil_id": card_info.get("identifiers", {}).get("cardsphereFoilId"),
"cardtrader_id": card_info.get("identifiers", {}).get("cardtraderId"),
"csi_id": card_info.get("identifiers", {}).get("csiId"),
"mcm_id": card_info.get("identifiers", {}).get("mcmId"),
"mcm_meta_id": card_info.get("identifiers", {}).get("mcmMetaId"),
"miniaturemarket_id": card_info.get("identifiers", {}).get("miniaturemarketId"),
"mtg_arena_id": card_info.get("identifiers", {}).get("mtgArenaId"),
"mtgjson_foil_version_id": card_info.get("identifiers", {}).get("mtgjsonFoilVersionId"),
"mtgjson_non_foil_version_id": card_info.get("identifiers", {}).get("mtgjsonNonFoilVersionId"),
"mtgjson_v4_id": card_info.get("identifiers", {}).get("mtgjsonV4Id"),
"mtgo_foil_id": card_info.get("identifiers", {}).get("mtgoFoilId"),
"mtgo_id": card_info.get("identifiers", {}).get("mtgoId"),
"multiverse_id": card_info.get("identifiers", {}).get("multiverseId"),
"scg_id": card_info.get("identifiers", {}).get("scgId"),
"scryfall_id": card_info.get("identifiers", {}).get("scryfallId"),
"scryfall_card_back_id": card_info.get("identifiers", {}).get("scryfallCardBackId"),
"scryfall_oracle_id": card_info.get("identifiers", {}).get("scryfallOracleId"),
"scryfall_illustration_id": card_info.get("identifiers", {}).get("scryfallIllustrationId"),
"tcgplayer_product_id": card_info.get("identifiers", {}).get("tcgplayerProductId"),
"tcgplayer_etched_product_id": card_info.get("identifiers", {}).get("tcgplayerEtchedProductId"),
"tnt_id": card_info.get("identifiers", {}).get("tntId"),
"data": card_info
})
if len(current_batch) >= self.batch_size:
batch_processed = await self._process_batch(db, current_batch, MTGJSONCard)
cards_processed += batch_processed
current_batch = []
current_time = time.time()
if current_time - last_progress_time >= 1.0: # Update progress every second
self._print_progress(f"\r{self._format_progress(cards_processed, total_cards, start_time)}", end="")
last_progress_time = current_time
except Exception as e:
self._print_progress(f"\nError during processing: {str(e)}")
raise
# Process remaining items
if current_batch:
batch_processed = await self._process_batch(db, current_batch, MTGJSONCard)
cards_processed += batch_processed
total_time = time.time() - start_time
self._print_progress(f"\nProcessing complete! Processed {cards_processed} cards in {total_time:.1f} seconds")
return {"cards_processed": cards_processed}
# Return a generator that streams the JSON file
return self._stream_json_file(json_path)
async def download_and_process_skus(self, db: Session) -> Dict[str, int]:
"""Download, unzip and process TcgplayerSkus.json.zip using streaming"""
self._print_progress("Starting MTGJSON SKUs processing...")
start_time = time.time()
async def get_skus(self, db: Session) -> Generator[Dict[str, Any], None, None]:
"""Download and get MTGJSON SKUs data"""
# Check if we have a cached version
cached_file = await self.file_service.get_file_by_filename(db, "TcgplayerSkus.json")
if cached_file:
# Ensure the file exists at the path
if os.path.exists(cached_file.path):
return self._stream_json_file(cached_file.path)
# Download the file using FileService
# Download and process the file
file_record = await self._download_file(
db=db,
url="https://mtgjson.com/api/v5/TcgplayerSkus.json.zip",
@@ -247,64 +198,21 @@ class MTGJSONService(BaseExternalService):
subdir="skus"
)
# Get the file path from the database record
zip_path = file_record.path
# Unzip and process the file
json_path = await self._unzip_file(file_record, "skus", db)
skus_processed = 0
current_batch = []
total_skus = 0
last_progress_time = time.time()
self._print_progress("Processing SKUs...")
try:
for item in self._stream_json_file(zip_path):
if item["type"] == "meta":
self._print_progress(f"Processing MTGJSON SKUs version {item['data'].get('version')} from {item['data'].get('date')}")
continue
# The data structure is {card_uuid: [sku1, sku2, ...]}
for card_uuid, sku_list in item["data"].items():
for sku in sku_list:
total_skus += 1
current_batch.append({
"sku_id": str(sku.get("skuId")),
"product_id": str(sku.get("productId")),
"condition": sku.get("condition"),
"finish": sku.get("finish"),
"language": sku.get("language"),
"printing": sku.get("printing"),
"card_id": card_uuid,
"data": sku
})
if len(current_batch) >= self.batch_size:
batch_processed = await self._process_batch(db, current_batch, MTGJSONSKU)
skus_processed += batch_processed
current_batch = []
current_time = time.time()
if current_time - last_progress_time >= 1.0: # Update progress every second
self._print_progress(f"\r{self._format_progress(skus_processed, total_skus, start_time)}", end="")
last_progress_time = current_time
except Exception as e:
self._print_progress(f"\nError during processing: {str(e)}")
raise
# Process remaining items
if current_batch:
batch_processed = await self._process_batch(db, current_batch, MTGJSONSKU)
skus_processed += batch_processed
total_time = time.time() - start_time
self._print_progress(f"\nProcessing complete! Processed {skus_processed} SKUs in {total_time:.1f} seconds")
return {"skus_processed": skus_processed}
# Return a generator that streams the JSON file
return self._stream_json_file(json_path)
async def clear_cache(self) -> None:
async def clear_cache(self, db: Session) -> None:
"""Clear all cached data"""
for subdir in ["identifiers", "skus"]:
dir_path = os.path.join(self.cache_dir, subdir)
if os.path.exists(dir_path):
for filename in os.listdir(dir_path):
file_path = os.path.join(dir_path, filename)
if os.path.isfile(file_path):
os.unlink(file_path)
print("MTGJSON cache cleared")
try:
# Delete all files in the mtgjson subdirectory
files = await self.file_service.list_files(db, file_type=["json", "zip"])
for file in files:
if file.path.startswith("mtgjson/"):
await self.file_service.delete_file(db, file.id)
logger.info("MTGJSON cache cleared")
except Exception as e:
logger.error(f"Error clearing cache: {e}")
raise

View File

@@ -3,256 +3,49 @@ from datetime import datetime, timedelta
import csv
import io
from app.services.external_api.base_external_service import BaseExternalService
from app.models.tcgplayer_group import TCGPlayerGroup
from app.models.tcgplayer_product import TCGPlayerProduct
from app.models.tcgplayer_category import TCGPlayerCategory
from app.db.database import get_db, transaction
from sqlalchemy.orm import Session
import py7zr
import os
from app.schemas.file import FileInDB
class TCGCSVService(BaseExternalService):
def __init__(self):
super().__init__(base_url="https://tcgcsv.com/")
async def get_groups(self, game_ids: List[int]) -> Dict[str, Any]:
async def get_groups(self, game_id: int) -> Dict[str, Any]:
"""Fetch groups for specific game IDs from TCGCSV API"""
game_ids_str = ",".join(map(str, game_ids))
endpoint = f"tcgplayer/{game_ids_str}/groups"
endpoint = f"tcgplayer/{game_id}/groups"
return await self._make_request("GET", endpoint)
async def get_products_and_prices(self, game_ids: List[int], group_id: int) -> List[Dict[str, Any]]:
async def get_products_and_prices(self, game_id: str, group_id: int) -> str:
"""Fetch products and prices for a specific group from TCGCSV API"""
game_ids_str = ",".join(map(str, game_ids))
endpoint = f"tcgplayer/{game_ids_str}/{group_id}/ProductsAndPrices.csv"
response = await self._make_request("GET", endpoint, headers={"Accept": "text/csv"})
# Parse CSV response
csv_data = io.StringIO(response)
reader = csv.DictReader(csv_data)
return list(reader)
endpoint = f"tcgplayer/{game_id}/{group_id}/ProductsAndPrices.csv"
return await self._make_request("GET", endpoint, headers={"Accept": "text/csv"})
async def get_categories(self) -> Dict[str, Any]:
"""Fetch all categories from TCGCSV API"""
endpoint = "tcgplayer/categories"
return await self._make_request("GET", endpoint)
async def get_archived_prices_for_date(self, db: Session, date_str: str) -> str:
async def get_archived_prices_for_date(self, date_str: str) -> bytes:
"""Fetch archived prices from TCGCSV API"""
# Download the archive file
endpoint = f"archive/tcgplayer/prices-{date_str}.ppmd.7z"
response = await self._make_request("GET", endpoint, binary=True)
# Save the archive file using FileService
file_record = await self.save_file(
db=db,
file_data=response,
file_name=f"prices-{date_str}.ppmd.7z",
subdir=f"tcgcsv/prices/zip",
file_type="application/x-7z-compressed"
)
# Extract the 7z file
with py7zr.SevenZipFile(file_record.path, 'r') as archive:
# Extract to a directory named after the date
extract_path = f"app/data/cache/tcgcsv/prices/{date_str}"
os.makedirs(extract_path, exist_ok=True)
archive.extractall(path=extract_path)
return date_str
return await self._make_request("GET", endpoint, binary=True)
async def get_archived_prices_for_date_range(self, start_date: str, end_date: str):
"""Fetch archived prices for a date range from TCGCSV API"""
# Convert string dates to datetime objects
async def get_tcgcsv_date_range(self, start_date: datetime, end_date: datetime) -> List[datetime]:
"""Get a date range for a given start and end date"""
start_dt = datetime.strptime(start_date, "%Y-%m-%d")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
# Set minimum start date
min_start_date = datetime.strptime("2025-02-08", "%Y-%m-%d")
min_start_date = datetime.strptime("2024-02-08", "%Y-%m-%d")
max_end_date = datetime.now()
if start_dt < min_start_date:
start_dt = min_start_date
# Set maximum end date to today
today = datetime.now()
if end_dt > today:
end_dt = today
# Generate date range
if end_dt > max_end_date:
end_dt = max_end_date
date_range = []
current_dt = start_dt
while current_dt <= end_dt:
date_range.append(current_dt.strftime("%Y-%m-%d"))
current_dt += timedelta(days=1)
# Process each date
for date_str in date_range:
await self.get_archived_prices_for_date(date_str)
async def sync_groups_to_db(self, db: Session, game_ids: List[int]) -> List[TCGPlayerGroup]:
"""Fetch groups from API and sync them to the database"""
response = await self.get_groups(game_ids)
if not response.get("success"):
raise Exception(f"Failed to fetch groups: {response.get('errors')}")
groups = response.get("results", [])
synced_groups = []
with transaction(db):
for group_data in groups:
# Convert string dates to datetime objects
published_on = datetime.fromisoformat(group_data["publishedOn"].replace("Z", "+00:00")) if group_data.get("publishedOn") else None
modified_on = datetime.fromisoformat(group_data["modifiedOn"].replace("Z", "+00:00")) if group_data.get("modifiedOn") else None
# Check if group already exists
existing_group = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.group_id == group_data["groupId"]).first()
if existing_group:
# Update existing group
for key, value in {
"name": group_data["name"],
"abbreviation": group_data.get("abbreviation"),
"is_supplemental": group_data.get("isSupplemental", False),
"published_on": published_on,
"modified_on": modified_on,
"category_id": group_data.get("categoryId")
}.items():
setattr(existing_group, key, value)
synced_groups.append(existing_group)
else:
# Create new group
new_group = TCGPlayerGroup(
group_id=group_data["groupId"],
name=group_data["name"],
abbreviation=group_data.get("abbreviation"),
is_supplemental=group_data.get("isSupplemental", False),
published_on=published_on,
modified_on=modified_on,
category_id=group_data.get("categoryId")
)
db.add(new_group)
synced_groups.append(new_group)
return synced_groups
async def sync_products_to_db(self, db: Session, game_id: int, group_id: int) -> List[TCGPlayerProduct]:
"""Fetch products and prices for a group and sync them to the database"""
products_data = await self.get_products_and_prices(game_id, group_id)
synced_products = []
for product_data in products_data:
# Convert string dates to datetime objects
modified_on = datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None
# Convert price strings to floats, handling empty strings
def parse_price(price_str):
return float(price_str) if price_str else None
# Check if product already exists
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == int(product_data["productId"])).first()
if existing_product:
# Update existing product
for key, value in {
"name": product_data["name"],
"clean_name": product_data.get("cleanName"),
"image_url": product_data.get("imageUrl"),
"category_id": int(product_data["categoryId"]),
"group_id": int(product_data["groupId"]),
"url": product_data.get("url"),
"modified_on": modified_on,
"image_count": int(product_data.get("imageCount", 0)),
"ext_rarity": product_data.get("extRarity"),
"ext_number": product_data.get("extNumber"),
"low_price": parse_price(product_data.get("lowPrice")),
"mid_price": parse_price(product_data.get("midPrice")),
"high_price": parse_price(product_data.get("highPrice")),
"market_price": parse_price(product_data.get("marketPrice")),
"direct_low_price": parse_price(product_data.get("directLowPrice")),
"sub_type_name": product_data.get("subTypeName")
}.items():
setattr(existing_product, key, value)
synced_products.append(existing_product)
else:
# Create new product
with transaction(db):
new_product = TCGPlayerProduct(
product_id=int(product_data["productId"]),
name=product_data["name"],
clean_name=product_data.get("cleanName"),
image_url=product_data.get("imageUrl"),
category_id=int(product_data["categoryId"]),
group_id=int(product_data["groupId"]),
url=product_data.get("url"),
modified_on=modified_on,
image_count=int(product_data.get("imageCount", 0)),
ext_rarity=product_data.get("extRarity"),
ext_number=product_data.get("extNumber"),
low_price=parse_price(product_data.get("lowPrice")),
mid_price=parse_price(product_data.get("midPrice")),
high_price=parse_price(product_data.get("highPrice")),
market_price=parse_price(product_data.get("marketPrice")),
direct_low_price=parse_price(product_data.get("directLowPrice")),
sub_type_name=product_data.get("subTypeName")
)
db.add(new_product)
synced_products.append(new_product)
return synced_products
async def sync_categories_to_db(self, db: Session) -> List[TCGPlayerCategory]:
"""Fetch categories from API and sync them to the database"""
response = await self.get_categories()
if not response.get("success"):
raise Exception(f"Failed to fetch categories: {response.get('errors')}")
categories = response.get("results", [])
synced_categories = []
with transaction(db):
for category_data in categories:
# Convert string dates to datetime objects
modified_on = datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
# Check if category already exists
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
if existing_category:
# Update existing category
for key, value in {
"name": category_data["name"],
"display_name": category_data.get("displayName"),
"seo_category_name": category_data.get("seoCategoryName"),
"category_description": category_data.get("categoryDescription"),
"category_page_title": category_data.get("categoryPageTitle"),
"sealed_label": category_data.get("sealedLabel"),
"non_sealed_label": category_data.get("nonSealedLabel"),
"condition_guide_url": category_data.get("conditionGuideUrl"),
"is_scannable": category_data.get("isScannable", False),
"popularity": category_data.get("popularity", 0),
"is_direct": category_data.get("isDirect", False),
"modified_on": modified_on
}.items():
setattr(existing_category, key, value)
synced_categories.append(existing_category)
else:
# Create new category
new_category = TCGPlayerCategory(
category_id=category_data["categoryId"],
name=category_data["name"],
display_name=category_data.get("displayName"),
seo_category_name=category_data.get("seoCategoryName"),
category_description=category_data.get("categoryDescription"),
category_page_title=category_data.get("categoryPageTitle"),
sealed_label=category_data.get("sealedLabel"),
non_sealed_label=category_data.get("nonSealedLabel"),
condition_guide_url=category_data.get("conditionGuideUrl"),
is_scannable=category_data.get("isScannable", False),
popularity=category_data.get("popularity", 0),
is_direct=category_data.get("isDirect", False),
modified_on=modified_on
)
db.add(new_category)
synced_categories.append(new_category)
return synced_categories
return date_range
async def get_archived_prices_for_date_range(self, start_date: datetime, end_date: datetime) -> List[datetime]:
"""Fetch archived prices for a date range from TCGCSV API"""
date_range = await self.get_tcgcsv_date_range(start_date, end_date)
return date_range

View File

@@ -150,3 +150,10 @@ class FileService:
return FileInDB.model_validate(file_record)
else:
return None
async def get_file_by_filename(self, db: Session, filename: str) -> Optional[FileInDB]:
"""Get a file record from the database by filename"""
file_record = db.query(File).filter(File.name == filename).first()
if file_record:
return FileInDB.model_validate(file_record)
return None

View File

@@ -142,13 +142,14 @@ class LabelPrinterService:
logger.error(f"Unexpected error in _send_print_request: {e}")
return False
async def print_file(self, file_path: Union[str, Path, FileInDB], label_size: Literal["dk1201", "dk1241"], label_type: Optional[Literal["address_label", "packing_slip", "set_label"]] = None) -> bool:
async def print_file(self, file_path: Union[str, Path, FileInDB], label_size: Literal["dk1201", "dk1241"], label_type: Optional[Literal["address_label", "packing_slip", "set_label", "return_label", "pirate_ship_label"]] = None, copies: Optional[int] = None) -> bool:
"""Print a PDF or PNG file to the label printer.
Args:
file_path: Path to the PDF or PNG file, or a FileInDB object
label_size: Size of label to use ("dk1201" or "dk1241")
label_type: Type of label to use ("address_label" or "packing_slip" or "set_label")
copies: Optional number of copies to print. If None, prints once.
Returns:
bool: True if print was successful, False otherwise
@@ -206,7 +207,7 @@ class LabelPrinterService:
resized_image = resized_image.resize((991, 306), Image.Resampling.LANCZOS)
# if file path contains address_label, rotate image 90 degrees
if label_type == "address_label" or label_type == "set_label":
if label_type == "address_label" or label_type == "set_label" or label_type == "return_label":
rotate = "90"
cut = False
else:
@@ -240,16 +241,30 @@ class LabelPrinterService:
with open(cache_path, "wb") as f:
f.write(converted_image)
# Send to API
if not await self._send_print_request(cache_path):
logger.error(f"Failed to print page {i+1}")
return False
# Wait for printer to be ready before processing next page
if i < len(images) - 1: # Don't wait after the last page
if not await self._wait_for_printer_ready():
logger.error("Printer not ready for next page")
if copies:
# Send to API for each copy
for copy in range(copies):
logger.info(f"Printing copy {copy + 1} of {copies}")
if not await self._send_print_request(cache_path):
logger.error(f"Failed to print page {i+1}, copy {copy + 1}")
return False
# Wait for printer to be ready before next copy or page
if copy < copies - 1 or i < len(images) - 1:
if not await self._wait_for_printer_ready():
logger.error("Printer not ready for next copy/page")
return False
else:
# Send to API once (original behavior)
if not await self._send_print_request(cache_path):
logger.error(f"Failed to print page {i+1}")
return False
# Wait for printer to be ready before processing next page
if i < len(images) - 1: # Don't wait after the last page
if not await self._wait_for_printer_ready():
logger.error("Printer not ready for next page")
return False
return True

View File

@@ -1,4 +1,4 @@
from app.db.database import transaction, get_db
from app.db.database import transaction
from app.services.scheduler.base_scheduler import BaseScheduler
import logging
@@ -17,11 +17,10 @@ class SchedulerService:
self._service_manager = ServiceManager()
return self._service_manager
async def update_open_orders_hourly(self):
async def update_open_orders_hourly(self, db):
"""
Hourly update of orders from TCGPlayer API to database
"""
db = next(get_db())
try:
logger.info("Starting hourly order update")
# Get order management service
@@ -39,14 +38,11 @@ class SchedulerService:
except Exception as e:
logger.error(f"Error updating open orders: {str(e)}")
raise
finally:
db.close()
async def update_all_orders_daily(self):
async def update_all_orders_daily(self, db):
"""
Daily update of all orders from TCGPlayer API to database
"""
db = next(get_db())
try:
logger.info("Starting daily order update")
# Get order management service
@@ -64,21 +60,19 @@ class SchedulerService:
except Exception as e:
logger.error(f"Error updating all orders: {str(e)}")
raise
finally:
db.close()
async def start_scheduled_tasks(self):
async def start_scheduled_tasks(self, db):
"""Start all scheduled tasks"""
# Schedule open orders update to run hourly at 00 minutes
await self.scheduler.schedule_task(
task_name="update_open_orders_hourly",
func=self.update_open_orders_hourly,
func=lambda: self.update_open_orders_hourly(db),
interval_seconds=60 * 60, # 1 hour
)
# Schedule all orders update to run daily at 1 AM
await self.scheduler.schedule_task(
task_name="update_all_orders_daily",
func=self.update_all_orders_daily,
func=lambda: self.update_all_orders_daily(db),
interval_seconds=24 * 60 * 60, # 24 hours
)

View File

@@ -26,7 +26,9 @@ class ServiceManager:
'set_label': 'app.services.set_label_service.SetLabelService',
'data_initialization': 'app.services.data_initialization.DataInitializationService',
'scheduler': 'app.services.scheduler.scheduler_service.SchedulerService',
'file': 'app.services.file_service.FileService'
'file': 'app.services.file_service.FileService',
'tcgcsv': 'app.services.external_api.tcgcsv.tcgcsv_service.TCGCSVService',
'mtgjson': 'app.services.external_api.mtgjson.mtgjson_service.MTGJSONService'
}
self._service_configs = {
'label_printer': {'printer_api_url': "http://192.168.1.110:8000"},