This commit is contained in:
2025-04-12 23:51:17 -04:00
parent df6490cab0
commit 56c2d1de47
2870 changed files with 3700 additions and 206 deletions

View File

@ -1,9 +1,10 @@
import os
import json
from datetime import datetime
from typing import Optional, List
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from sqlalchemy.orm import Session
from app.services.external_api.tcgcsv.tcgcsv_service import TCGCSVService
from app.services.external_api.mtgjson.mtgjson_service import MTGJSONService
from app.models.tcgplayer_group import TCGPlayerGroup
from app.models.tcgplayer_product import TCGPlayerProduct
from app.models.tcgplayer_category import TCGPlayerCategory
@ -11,32 +12,42 @@ from app.models.tcgplayer_category import TCGPlayerCategory
class DataInitializationService:
def __init__(self, cache_dir: str = "app/data/cache/tcgcsv"):
self.cache_dir = cache_dir
self.categories_dir = os.path.join(cache_dir, "categories")
self.groups_dir = os.path.join(cache_dir, "groups")
self.products_dir = os.path.join(cache_dir, "products")
self.tcgcsv_service = TCGCSVService()
self.mtgjson_service = MTGJSONService()
# Create all necessary directories
os.makedirs(cache_dir, exist_ok=True)
os.makedirs(self.categories_dir, exist_ok=True)
os.makedirs(self.groups_dir, exist_ok=True)
os.makedirs(self.products_dir, exist_ok=True)
def _get_cache_path(self, filename: str) -> str:
return os.path.join(self.cache_dir, filename)
def _get_cache_path(self, filename: str, subdir: str) -> str:
"""Get the full path for a cached file in the specified subdirectory"""
return os.path.join(self.cache_dir, subdir, filename)
async def _cache_categories(self, categories_data: dict):
"""Cache categories data to a JSON file"""
cache_path = self._get_cache_path("categories.json")
cache_path = self._get_cache_path("categories.json", "categories")
with open(cache_path, 'w') as f:
json.dump(categories_data, f, indent=2)
async def _cache_groups(self, game_ids: List[int], groups_data: dict):
for game_id in game_ids:
cache_path = self._get_cache_path(f"groups_{game_id}.json")
cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups")
with open(cache_path, 'w') as f:
json.dump(groups_data, f, default=str)
async def _cache_products(self, game_ids: List[int], group_id: int, products_data: list):
for game_id in game_ids:
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json")
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products")
with open(cache_path, 'w') as f:
json.dump(products_data, f, default=str)
async def _load_cached_categories(self) -> Optional[dict]:
cache_path = self._get_cache_path("categories.json")
cache_path = self._get_cache_path("categories.json", "categories")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
@ -45,7 +56,7 @@ class DataInitializationService:
async def _load_cached_groups(self, game_ids: List[int]) -> Optional[dict]:
# Try to load cached data for any of the game IDs
for game_id in game_ids:
cache_path = self._get_cache_path(f"groups_{game_id}.json")
cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
@ -54,147 +65,199 @@ class DataInitializationService:
async def _load_cached_products(self, game_ids: List[int], group_id: int) -> Optional[list]:
# Try to load cached data for any of the game IDs
for game_id in game_ids:
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json")
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products")
if os.path.exists(cache_path):
with open(cache_path, 'r') as f:
return json.load(f)
return None
async def initialize_data(self, db: Session, game_ids: List[int], use_cache: bool = True) -> None:
"""Initialize TCGPlayer data, using cache if available and requested"""
async def initialize_data(
self,
db: Session,
game_ids: List[int],
use_cache: bool = True,
init_categories: bool = True,
init_groups: bool = True,
init_products: bool = True,
init_archived_prices: bool = False,
archived_prices_start_date: Optional[str] = None,
archived_prices_end_date: Optional[str] = None,
init_mtgjson: bool = True
) -> Dict[str, Any]:
"""Initialize TCGPlayer data with configurable steps"""
print("Initializing TCGPlayer data...")
results = {
"categories": 0,
"groups": {},
"products": {},
"archived_prices": False,
"mtgjson": {}
}
# Handle categories
categories_data = None
if use_cache:
categories_data = await self._load_cached_categories()
if not categories_data:
print("Fetching categories from API...")
categories_data = await self.tcgcsv_service.get_categories()
if init_categories:
print("\nInitializing categories...")
categories_data = None
if use_cache:
await self._cache_categories(categories_data)
categories_data = await self._load_cached_categories()
if not categories_data.get("success"):
raise Exception(f"Failed to fetch categories: {categories_data.get('errors')}")
if not categories_data:
print("Fetching categories from API...")
categories_data = await self.tcgcsv_service.get_categories()
if use_cache:
await self._cache_categories(categories_data)
# Sync categories to database
categories = categories_data.get("results", [])
synced_categories = []
for category_data in categories:
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
if existing_category:
synced_categories.append(existing_category)
else:
new_category = TCGPlayerCategory(
category_id=category_data["categoryId"],
name=category_data["name"],
display_name=category_data.get("displayName"),
seo_category_name=category_data.get("seoCategoryName"),
category_description=category_data.get("categoryDescription"),
category_page_title=category_data.get("categoryPageTitle"),
sealed_label=category_data.get("sealedLabel"),
non_sealed_label=category_data.get("nonSealedLabel"),
condition_guide_url=category_data.get("conditionGuideUrl"),
is_scannable=category_data.get("isScannable", False),
popularity=category_data.get("popularity", 0),
is_direct=category_data.get("isDirect", False),
modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
)
db.add(new_category)
synced_categories.append(new_category)
db.commit()
print(f"Synced {len(synced_categories)} categories")
if not categories_data.get("success"):
raise Exception(f"Failed to fetch categories: {categories_data.get('errors')}")
# Sync categories to database
categories = categories_data.get("results", [])
synced_categories = []
for category_data in categories:
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
if existing_category:
synced_categories.append(existing_category)
else:
new_category = TCGPlayerCategory(
category_id=category_data["categoryId"],
name=category_data["name"],
display_name=category_data.get("displayName"),
seo_category_name=category_data.get("seoCategoryName"),
category_description=category_data.get("categoryDescription"),
category_page_title=category_data.get("categoryPageTitle"),
sealed_label=category_data.get("sealedLabel"),
non_sealed_label=category_data.get("nonSealedLabel"),
condition_guide_url=category_data.get("conditionGuideUrl"),
is_scannable=category_data.get("isScannable", False),
popularity=category_data.get("popularity", 0),
is_direct=category_data.get("isDirect", False),
modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
)
db.add(new_category)
synced_categories.append(new_category)
db.commit()
results["categories"] = len(synced_categories)
print(f"Synced {len(synced_categories)} categories")
# Process each game ID separately
for game_id in game_ids:
print(f"\nProcessing game ID: {game_id}")
results["groups"][game_id] = 0
results["products"][game_id] = {}
# Handle groups for this game ID
groups_data = None
if use_cache:
groups_data = await self._load_cached_groups([game_id])
if not groups_data:
print(f"Fetching groups for game ID {game_id} from API...")
groups_data = await self.tcgcsv_service.get_groups([game_id])
if init_groups:
print(f"Initializing groups for game ID {game_id}...")
groups_data = None
if use_cache:
await self._cache_groups([game_id], groups_data)
groups_data = await self._load_cached_groups([game_id])
if not groups_data.get("success"):
raise Exception(f"Failed to fetch groups for game ID {game_id}: {groups_data.get('errors')}")
# Sync groups to database
groups = groups_data.get("results", [])
synced_groups = []
for group_data in groups:
existing_group = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.group_id == group_data["groupId"]).first()
if existing_group:
synced_groups.append(existing_group)
else:
new_group = TCGPlayerGroup(
group_id=group_data["groupId"],
name=group_data["name"],
abbreviation=group_data.get("abbreviation"),
is_supplemental=group_data.get("isSupplemental", False),
published_on=datetime.fromisoformat(group_data["publishedOn"].replace("Z", "+00:00")) if group_data.get("publishedOn") else None,
modified_on=datetime.fromisoformat(group_data["modifiedOn"].replace("Z", "+00:00")) if group_data.get("modifiedOn") else None,
category_id=group_data.get("categoryId")
)
db.add(new_group)
synced_groups.append(new_group)
db.commit()
print(f"Synced {len(synced_groups)} groups for game ID {game_id}")
# Handle products for each group in this game ID
for group in synced_groups:
products_data = None
if use_cache:
products_data = await self._load_cached_products([game_id], group.group_id)
if not products_data:
print(f"Fetching products for group {group.name} (game ID {game_id}) from API...")
products_data = await self.tcgcsv_service.get_products_and_prices([game_id], group.group_id)
if not groups_data:
print(f"Fetching groups for game ID {game_id} from API...")
groups_data = await self.tcgcsv_service.get_groups([game_id])
if use_cache:
await self._cache_products([game_id], group.group_id, products_data)
await self._cache_groups([game_id], groups_data)
# Sync products to database
synced_products = []
for product_data in products_data:
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == int(product_data["productId"])).first()
if existing_product:
synced_products.append(existing_product)
if not groups_data.get("success"):
raise Exception(f"Failed to fetch groups for game ID {game_id}: {groups_data.get('errors')}")
# Sync groups to database
groups = groups_data.get("results", [])
synced_groups = []
for group_data in groups:
existing_group = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.group_id == group_data["groupId"]).first()
if existing_group:
synced_groups.append(existing_group)
else:
new_product = TCGPlayerProduct(
product_id=int(product_data["productId"]),
name=product_data["name"],
clean_name=product_data.get("cleanName"),
image_url=product_data.get("imageUrl"),
category_id=int(product_data["categoryId"]),
group_id=int(product_data["groupId"]),
url=product_data.get("url"),
modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
image_count=int(product_data.get("imageCount", 0)),
ext_rarity=product_data.get("extRarity"),
ext_number=product_data.get("extNumber"),
low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
sub_type_name=product_data.get("subTypeName")
new_group = TCGPlayerGroup(
group_id=group_data["groupId"],
name=group_data["name"],
abbreviation=group_data.get("abbreviation"),
is_supplemental=group_data.get("isSupplemental", False),
published_on=datetime.fromisoformat(group_data["publishedOn"].replace("Z", "+00:00")) if group_data.get("publishedOn") else None,
modified_on=datetime.fromisoformat(group_data["modifiedOn"].replace("Z", "+00:00")) if group_data.get("modifiedOn") else None,
category_id=group_data.get("categoryId")
)
db.add(new_product)
synced_products.append(new_product)
db.add(new_group)
synced_groups.append(new_group)
db.commit()
print(f"Synced {len(synced_products)} products for group {group.name} (game ID {game_id})")
results["groups"][game_id] = len(synced_groups)
print(f"Synced {len(synced_groups)} groups for game ID {game_id}")
if init_products:
# Handle products for each group in this game ID
for group in synced_groups:
print(f"Initializing products for group {group.name} (game ID {game_id})...")
products_data = None
if use_cache:
products_data = await self._load_cached_products([game_id], group.group_id)
if not products_data:
print(f"Fetching products for group {group.name} (game ID {game_id}) from API...")
products_data = await self.tcgcsv_service.get_products_and_prices([game_id], group.group_id)
if use_cache:
await self._cache_products([game_id], group.group_id, products_data)
# Sync products to database
synced_products = []
for product_data in products_data:
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == int(product_data["productId"])).first()
if existing_product:
synced_products.append(existing_product)
else:
new_product = TCGPlayerProduct(
product_id=int(product_data["productId"]),
name=product_data["name"],
clean_name=product_data.get("cleanName"),
image_url=product_data.get("imageUrl"),
category_id=int(product_data["categoryId"]),
group_id=int(product_data["groupId"]),
url=product_data.get("url"),
modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
image_count=int(product_data.get("imageCount", 0)),
ext_rarity=product_data.get("extRarity"),
ext_number=product_data.get("extNumber"),
low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
sub_type_name=product_data.get("subTypeName")
)
db.add(new_product)
synced_products.append(new_product)
db.commit()
results["products"][game_id][group.group_id] = len(synced_products)
print(f"Synced {len(synced_products)} products for group {group.name} (game ID {game_id})")
if init_archived_prices:
if not archived_prices_start_date or not archived_prices_end_date:
raise ValueError("Both start_date and end_date are required for archived prices initialization")
print(f"\nInitializing archived prices from {archived_prices_start_date} to {archived_prices_end_date}...")
await self.tcgcsv_service.get_archived_prices_for_date_range(archived_prices_start_date, archived_prices_end_date)
results["archived_prices"] = True
print("Archived prices initialization completed")
if init_mtgjson:
print("\nInitializing MTGJSON data...")
identifiers_result = await self.mtgjson_service.download_and_process_identifiers(db)
skus_result = await self.mtgjson_service.download_and_process_skus(db)
results["mtgjson"] = {
"cards_processed": identifiers_result["cards_processed"],
"skus_processed": skus_result["skus_processed"]
}
return results
async def clear_cache(self) -> None:
"""Clear all cached data"""
for filename in os.listdir(self.cache_dir):
file_path = os.path.join(self.cache_dir, filename)
if os.path.isfile(file_path):
os.unlink(file_path)
for subdir in ["categories", "groups", "products"]:
dir_path = os.path.join(self.cache_dir, subdir)
if os.path.exists(dir_path):
for filename in os.listdir(dir_path):
file_path = os.path.join(dir_path, filename)
if os.path.isfile(file_path):
os.unlink(file_path)
await self.mtgjson_service.clear_cache()
print("Cache cleared")
async def close(self):