data init idk other stuff
This commit is contained in:
@ -1,171 +1,171 @@
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from typing import Optional, List, Dict, Any, Union, Generator, Callable
|
||||
from sqlalchemy.orm import Session
|
||||
from app.services.external_api.tcgcsv.tcgcsv_service import TCGCSVService
|
||||
from app.services.external_api.mtgjson.mtgjson_service import MTGJSONService
|
||||
from app.models.tcgplayer_group import TCGPlayerGroup
|
||||
from app.models.tcgplayer_product import TCGPlayerProduct
|
||||
from app.models.tcgplayer_category import TCGPlayerCategory
|
||||
from app.services.base_service import BaseService
|
||||
from app.schemas.file import FileInDB
|
||||
from app.db.database import transaction
|
||||
import logging
|
||||
from app.models.tcgplayer_price_history import TCGPlayerPriceHistory
|
||||
from sqlalchemy import and_, bindparam, update, insert
|
||||
import py7zr
|
||||
import shutil
|
||||
|
||||
class DataInitializationService:
|
||||
def __init__(self, cache_dir: str = "app/data/cache/tcgcsv"):
|
||||
self.cache_dir = cache_dir
|
||||
self.categories_dir = os.path.join(cache_dir, "categories")
|
||||
self.groups_dir = os.path.join(cache_dir, "groups")
|
||||
self.products_dir = os.path.join(cache_dir, "products")
|
||||
self.tcgcsv_service = TCGCSVService()
|
||||
self.mtgjson_service = MTGJSONService()
|
||||
|
||||
# Create all necessary directories
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
os.makedirs(self.categories_dir, exist_ok=True)
|
||||
os.makedirs(self.groups_dir, exist_ok=True)
|
||||
os.makedirs(self.products_dir, exist_ok=True)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def _get_cache_path(self, filename: str, subdir: str) -> str:
|
||||
"""Get the full path for a cached file in the specified subdirectory"""
|
||||
return os.path.join(self.cache_dir, subdir, filename)
|
||||
|
||||
async def _cache_categories(self, categories_data: dict):
|
||||
"""Cache categories data to a JSON file"""
|
||||
cache_path = self._get_cache_path("categories.json", "categories")
|
||||
with open(cache_path, 'w') as f:
|
||||
json.dump(categories_data, f, indent=2)
|
||||
class DataInitializationService(BaseService):
|
||||
def __init__(self):
|
||||
super().__init__(None)
|
||||
|
||||
async def _cache_groups(self, game_ids: List[int], groups_data: dict):
|
||||
for game_id in game_ids:
|
||||
cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups")
|
||||
with open(cache_path, 'w') as f:
|
||||
json.dump(groups_data, f, default=str)
|
||||
|
||||
async def _cache_products(self, game_ids: List[int], group_id: int, products_data: list):
|
||||
for game_id in game_ids:
|
||||
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products")
|
||||
with open(cache_path, 'w') as f:
|
||||
json.dump(products_data, f, default=str)
|
||||
|
||||
async def _load_cached_categories(self) -> Optional[dict]:
|
||||
cache_path = self._get_cache_path("categories.json", "categories")
|
||||
if os.path.exists(cache_path):
|
||||
with open(cache_path, 'r') as f:
|
||||
return json.load(f)
|
||||
return None
|
||||
|
||||
async def _load_cached_groups(self, game_ids: List[int]) -> Optional[dict]:
|
||||
# Try to load cached data for any of the game IDs
|
||||
for game_id in game_ids:
|
||||
cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups")
|
||||
if os.path.exists(cache_path):
|
||||
with open(cache_path, 'r') as f:
|
||||
return json.load(f)
|
||||
return None
|
||||
|
||||
async def _load_cached_products(self, game_ids: List[int], group_id: int) -> Optional[list]:
|
||||
# Try to load cached data for any of the game IDs
|
||||
for game_id in game_ids:
|
||||
cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products")
|
||||
if os.path.exists(cache_path):
|
||||
with open(cache_path, 'r') as f:
|
||||
return json.load(f)
|
||||
return None
|
||||
|
||||
async def initialize_data(
|
||||
async def _cache_data(
|
||||
self,
|
||||
db: Session,
|
||||
game_ids: List[int],
|
||||
use_cache: bool = True,
|
||||
init_categories: bool = True,
|
||||
init_groups: bool = True,
|
||||
init_products: bool = True,
|
||||
init_archived_prices: bool = False,
|
||||
archived_prices_start_date: Optional[str] = None,
|
||||
archived_prices_end_date: Optional[str] = None,
|
||||
init_mtgjson: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""Initialize TCGPlayer data with configurable steps"""
|
||||
print("Initializing TCGPlayer data...")
|
||||
results = {
|
||||
"categories": 0,
|
||||
"groups": {},
|
||||
"products": {},
|
||||
"archived_prices": False,
|
||||
"mtgjson": {}
|
||||
}
|
||||
data: Union[dict, list],
|
||||
filename: str,
|
||||
subdir: str,
|
||||
default_str: bool = False,
|
||||
file_type: str = "json",
|
||||
content_type: str = "application/json",
|
||||
metadata: Optional[Dict] = None
|
||||
) -> FileInDB:
|
||||
"""Generic function to cache data to a JSON file"""
|
||||
file_data = json.dumps(data, default=str if default_str else None, indent=2)
|
||||
return await self.file_service.save_file(
|
||||
db,
|
||||
file_data,
|
||||
filename,
|
||||
subdir,
|
||||
file_type=file_type,
|
||||
content_type=content_type,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
if init_categories:
|
||||
print("\nInitializing categories...")
|
||||
categories_data = None
|
||||
if use_cache:
|
||||
categories_data = await self._load_cached_categories()
|
||||
async def _load_cached_data(
|
||||
self,
|
||||
db: Session,
|
||||
filename: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Generic function to load cached data from a JSON file with 7-day expiration"""
|
||||
file_record = await self.file_service.get_file_by_filename(db, filename)
|
||||
if file_record:
|
||||
# Check if cache is expired (7 days)
|
||||
cache_age = datetime.now() - file_record.created_at
|
||||
if cache_age.days < 7:
|
||||
with open(file_record.path, 'r') as f:
|
||||
return json.load(f)
|
||||
else:
|
||||
logger.info(f"Cache expired for {filename}, age: {cache_age.days} days")
|
||||
# Delete the expired cache file
|
||||
await self.file_service.delete_file(db, file_record.id)
|
||||
return None
|
||||
|
||||
async def sync_categories(self, db: Session, categories_data: dict):
|
||||
"""Sync categories data to the database using streaming for large datasets"""
|
||||
categories = categories_data.get("results", [])
|
||||
batch_size = 1000 # Process in batches of 1000
|
||||
total_categories = len(categories)
|
||||
|
||||
with transaction(db):
|
||||
for i in range(0, total_categories, batch_size):
|
||||
batch = categories[i:i + batch_size]
|
||||
for category_data in batch:
|
||||
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
|
||||
if existing_category:
|
||||
# Update existing category
|
||||
for key, value in {
|
||||
"name": category_data["name"],
|
||||
"display_name": category_data.get("displayName"),
|
||||
"seo_category_name": category_data.get("seoCategoryName"),
|
||||
"category_description": category_data.get("categoryDescription"),
|
||||
"category_page_title": category_data.get("categoryPageTitle"),
|
||||
"sealed_label": category_data.get("sealedLabel"),
|
||||
"non_sealed_label": category_data.get("nonSealedLabel"),
|
||||
"condition_guide_url": category_data.get("conditionGuideUrl"),
|
||||
"is_scannable": category_data.get("isScannable", False),
|
||||
"popularity": category_data.get("popularity", 0),
|
||||
"is_direct": category_data.get("isDirect", False),
|
||||
"modified_on": datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
|
||||
}.items():
|
||||
setattr(existing_category, key, value)
|
||||
else:
|
||||
new_category = TCGPlayerCategory(
|
||||
category_id=category_data["categoryId"],
|
||||
name=category_data["name"],
|
||||
display_name=category_data.get("displayName"),
|
||||
seo_category_name=category_data.get("seoCategoryName"),
|
||||
category_description=category_data.get("categoryDescription"),
|
||||
category_page_title=category_data.get("categoryPageTitle"),
|
||||
sealed_label=category_data.get("sealedLabel"),
|
||||
non_sealed_label=category_data.get("nonSealedLabel"),
|
||||
condition_guide_url=category_data.get("conditionGuideUrl"),
|
||||
is_scannable=category_data.get("isScannable", False),
|
||||
popularity=category_data.get("popularity", 0),
|
||||
is_direct=category_data.get("isDirect", False),
|
||||
modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
|
||||
)
|
||||
db.add(new_category)
|
||||
|
||||
# Commit after each batch
|
||||
db.commit()
|
||||
logger.info(f"Processed {min(i + batch_size, total_categories)}/{total_categories} categories")
|
||||
|
||||
if not categories_data:
|
||||
print("Fetching categories from API...")
|
||||
categories_data = await self.tcgcsv_service.get_categories()
|
||||
if use_cache:
|
||||
await self._cache_categories(categories_data)
|
||||
|
||||
if not categories_data.get("success"):
|
||||
raise Exception(f"Failed to fetch categories: {categories_data.get('errors')}")
|
||||
|
||||
# Sync categories to database
|
||||
categories = categories_data.get("results", [])
|
||||
synced_categories = []
|
||||
for category_data in categories:
|
||||
existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first()
|
||||
if existing_category:
|
||||
synced_categories.append(existing_category)
|
||||
else:
|
||||
new_category = TCGPlayerCategory(
|
||||
category_id=category_data["categoryId"],
|
||||
name=category_data["name"],
|
||||
display_name=category_data.get("displayName"),
|
||||
seo_category_name=category_data.get("seoCategoryName"),
|
||||
category_description=category_data.get("categoryDescription"),
|
||||
category_page_title=category_data.get("categoryPageTitle"),
|
||||
sealed_label=category_data.get("sealedLabel"),
|
||||
non_sealed_label=category_data.get("nonSealedLabel"),
|
||||
condition_guide_url=category_data.get("conditionGuideUrl"),
|
||||
is_scannable=category_data.get("isScannable", False),
|
||||
popularity=category_data.get("popularity", 0),
|
||||
is_direct=category_data.get("isDirect", False),
|
||||
modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None
|
||||
)
|
||||
db.add(new_category)
|
||||
synced_categories.append(new_category)
|
||||
db.commit()
|
||||
results["categories"] = len(synced_categories)
|
||||
print(f"Synced {len(synced_categories)} categories")
|
||||
|
||||
# Process each game ID separately
|
||||
for game_id in game_ids:
|
||||
print(f"\nProcessing game ID: {game_id}")
|
||||
results["groups"][game_id] = 0
|
||||
results["products"][game_id] = {}
|
||||
|
||||
if init_groups:
|
||||
print(f"Initializing groups for game ID {game_id}...")
|
||||
groups_data = None
|
||||
if use_cache:
|
||||
groups_data = await self._load_cached_groups([game_id])
|
||||
|
||||
if not groups_data:
|
||||
print(f"Fetching groups for game ID {game_id} from API...")
|
||||
groups_data = await self.tcgcsv_service.get_groups([game_id])
|
||||
if use_cache:
|
||||
await self._cache_groups([game_id], groups_data)
|
||||
|
||||
if not groups_data.get("success"):
|
||||
raise Exception(f"Failed to fetch groups for game ID {game_id}: {groups_data.get('errors')}")
|
||||
|
||||
# Sync groups to database
|
||||
groups = groups_data.get("results", [])
|
||||
synced_groups = []
|
||||
for group_data in groups:
|
||||
async def init_categories(self, db: Session, use_cache: bool = True) -> bool:
|
||||
"""Initialize categories data"""
|
||||
logger.info("Starting categories initialization")
|
||||
if use_cache:
|
||||
categories_data = await self._load_cached_data(db, "categories.json")
|
||||
if categories_data:
|
||||
await self.sync_categories(db, categories_data)
|
||||
logger.info("Categories initialized from cache")
|
||||
return True
|
||||
else:
|
||||
logger.warning("No cached categories data found")
|
||||
return False
|
||||
else:
|
||||
tcgcsv_service = self.get_service('tcgcsv')
|
||||
categories_data = await tcgcsv_service.get_categories()
|
||||
|
||||
# Save the categories data
|
||||
await self._cache_data(
|
||||
db,
|
||||
categories_data,
|
||||
"categories.json",
|
||||
"tcgcsv/categories",
|
||||
file_type="json",
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
await self.sync_categories(db, categories_data)
|
||||
logger.info("Categories initialized from API")
|
||||
return True
|
||||
|
||||
async def sync_groups(self, db: Session, groups_data: dict):
|
||||
"""Sync groups data to the database using streaming for large datasets"""
|
||||
groups = groups_data.get("results", [])
|
||||
batch_size = 1000 # Process in batches of 1000
|
||||
total_groups = len(groups)
|
||||
|
||||
with transaction(db):
|
||||
for i in range(0, total_groups, batch_size):
|
||||
batch = groups[i:i + batch_size]
|
||||
for group_data in batch:
|
||||
existing_group = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.group_id == group_data["groupId"]).first()
|
||||
if existing_group:
|
||||
synced_groups.append(existing_group)
|
||||
# Update existing group
|
||||
for key, value in {
|
||||
"name": group_data["name"],
|
||||
"abbreviation": group_data.get("abbreviation"),
|
||||
"is_supplemental": group_data.get("isSupplemental", False),
|
||||
"published_on": datetime.fromisoformat(group_data["publishedOn"].replace("Z", "+00:00")) if group_data.get("publishedOn") else None,
|
||||
"modified_on": datetime.fromisoformat(group_data["modifiedOn"].replace("Z", "+00:00")) if group_data.get("modifiedOn") else None,
|
||||
"category_id": group_data.get("categoryId")
|
||||
}.items():
|
||||
setattr(existing_group, key, value)
|
||||
else:
|
||||
new_group = TCGPlayerGroup(
|
||||
group_id=group_data["groupId"],
|
||||
@ -177,88 +177,561 @@ class DataInitializationService:
|
||||
category_id=group_data.get("categoryId")
|
||||
)
|
||||
db.add(new_group)
|
||||
synced_groups.append(new_group)
|
||||
|
||||
# Commit after each batch
|
||||
db.commit()
|
||||
results["groups"][game_id] = len(synced_groups)
|
||||
print(f"Synced {len(synced_groups)} groups for game ID {game_id}")
|
||||
logger.info(f"Processed {min(i + batch_size, total_groups)}/{total_groups} groups")
|
||||
|
||||
if init_products:
|
||||
# Handle products for each group in this game ID
|
||||
for group in synced_groups:
|
||||
print(f"Initializing products for group {group.name} (game ID {game_id})...")
|
||||
products_data = None
|
||||
if use_cache:
|
||||
products_data = await self._load_cached_products([game_id], group.group_id)
|
||||
async def init_groups(self, db: Session, use_cache: bool = True, game_ids: List[int] = None) -> bool:
|
||||
"""Initialize groups data"""
|
||||
logger.info(f"Starting groups initialization for game IDs: {game_ids}")
|
||||
tcgcsv_service = self.get_service('tcgcsv')
|
||||
for game_id in game_ids:
|
||||
if use_cache:
|
||||
groups_data = await self._load_cached_data(db, f"groups_{game_id}.json")
|
||||
if groups_data:
|
||||
await self.sync_groups(db, groups_data)
|
||||
logger.info(f"Groups initialized from cache for game ID {game_id}")
|
||||
else:
|
||||
logger.warning(f"No cached groups data found for game ID {game_id}")
|
||||
return False
|
||||
else:
|
||||
groups_data = await tcgcsv_service.get_groups(game_id)
|
||||
|
||||
# Save the groups data
|
||||
await self._cache_data(
|
||||
db,
|
||||
groups_data,
|
||||
f"groups_{game_id}.json",
|
||||
"tcgcsv/groups",
|
||||
file_type="json",
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
await self.sync_groups(db, groups_data)
|
||||
logger.info(f"Groups initialized from API for game ID {game_id}")
|
||||
return True
|
||||
|
||||
async def sync_products(self, db: Session, products_data: str):
|
||||
"""Sync products data to the database using streaming for large datasets"""
|
||||
import csv
|
||||
import io
|
||||
|
||||
# Parse CSV data
|
||||
csv_reader = csv.DictReader(io.StringIO(products_data))
|
||||
products_list = list(csv_reader)
|
||||
batch_size = 1000 # Process in batches of 1000
|
||||
total_products = len(products_list)
|
||||
|
||||
with transaction(db):
|
||||
for i in range(0, total_products, batch_size):
|
||||
batch = products_list[i:i + batch_size]
|
||||
for product_data in batch:
|
||||
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == product_data["productId"]).first()
|
||||
if existing_product:
|
||||
# Update existing product
|
||||
for key, value in {
|
||||
"name": product_data["name"],
|
||||
"clean_name": product_data.get("cleanName"),
|
||||
"image_url": product_data.get("imageUrl"),
|
||||
"category_id": product_data.get("categoryId"),
|
||||
"group_id": product_data.get("groupId"),
|
||||
"url": product_data.get("url"),
|
||||
"modified_on": datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
|
||||
"image_count": product_data.get("imageCount", 0),
|
||||
"ext_rarity": product_data.get("extRarity"),
|
||||
"ext_number": product_data.get("extNumber"),
|
||||
"low_price": float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
|
||||
"mid_price": float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
|
||||
"high_price": float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
|
||||
"market_price": float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
|
||||
"direct_low_price": float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
|
||||
"sub_type_name": product_data.get("subTypeName")
|
||||
}.items():
|
||||
setattr(existing_product, key, value)
|
||||
else:
|
||||
new_product = TCGPlayerProduct(
|
||||
product_id=product_data["productId"],
|
||||
name=product_data["name"],
|
||||
clean_name=product_data.get("cleanName"),
|
||||
image_url=product_data.get("imageUrl"),
|
||||
category_id=product_data.get("categoryId"),
|
||||
group_id=product_data.get("groupId"),
|
||||
url=product_data.get("url"),
|
||||
modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
|
||||
image_count=product_data.get("imageCount", 0),
|
||||
ext_rarity=product_data.get("extRarity"),
|
||||
ext_subtype=product_data.get("extSubtype"),
|
||||
ext_oracle_text=product_data.get("extOracleText"),
|
||||
ext_number=product_data.get("extNumber"),
|
||||
low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
|
||||
mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
|
||||
high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
|
||||
market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
|
||||
direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
|
||||
sub_type_name=product_data.get("subTypeName"),
|
||||
ext_power=product_data.get("extPower"),
|
||||
ext_toughness=product_data.get("extToughness"),
|
||||
ext_flavor_text=product_data.get("extFlavorText")
|
||||
|
||||
)
|
||||
db.add(new_product)
|
||||
|
||||
# Commit after each batch
|
||||
db.commit()
|
||||
logger.info(f"Processed {min(i + batch_size, total_products)}/{total_products} products")
|
||||
|
||||
if not products_data:
|
||||
print(f"Fetching products for group {group.name} (game ID {game_id}) from API...")
|
||||
products_data = await self.tcgcsv_service.get_products_and_prices([game_id], group.group_id)
|
||||
if use_cache:
|
||||
await self._cache_products([game_id], group.group_id, products_data)
|
||||
async def init_products(self, db: Session, use_cache: bool = True, game_ids: List[int] = None) -> bool:
|
||||
"""Initialize products data"""
|
||||
logger.info(f"Starting products initialization for game IDs: {game_ids}")
|
||||
tcgcsv_service = self.get_service('tcgcsv')
|
||||
for game_id in game_ids:
|
||||
groups = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.category_id == game_id).all()
|
||||
logger.info(f"Processing {len(groups)} groups for game ID {game_id}")
|
||||
for group in groups:
|
||||
if use_cache:
|
||||
products_data = await self._load_cached_data(db, f"products_{game_id}_{group.group_id}.json")
|
||||
if products_data:
|
||||
await self.sync_products(db, products_data)
|
||||
logger.info(f"Products initialized from cache for group {group.group_id}")
|
||||
else:
|
||||
logger.warning(f"No cached products data found for group {group.group_id}")
|
||||
continue
|
||||
else:
|
||||
# Get CSV data from API
|
||||
csv_data = await tcgcsv_service.get_products_and_prices(game_id, group.group_id)
|
||||
|
||||
# Save the CSV file
|
||||
await self.file_service.save_file(
|
||||
db,
|
||||
csv_data,
|
||||
f"products_{game_id}_{group.group_id}.csv",
|
||||
"tcgcsv/products",
|
||||
file_type="csv",
|
||||
content_type="text/csv"
|
||||
)
|
||||
|
||||
# Parse and sync the CSV data
|
||||
await self.sync_products(db, csv_data)
|
||||
logger.info(f"Products initialized from API for group {group.group_id}")
|
||||
return True
|
||||
|
||||
# Sync products to database
|
||||
synced_products = []
|
||||
for product_data in products_data:
|
||||
existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == int(product_data["productId"])).first()
|
||||
if existing_product:
|
||||
synced_products.append(existing_product)
|
||||
else:
|
||||
new_product = TCGPlayerProduct(
|
||||
product_id=int(product_data["productId"]),
|
||||
name=product_data["name"],
|
||||
clean_name=product_data.get("cleanName"),
|
||||
image_url=product_data.get("imageUrl"),
|
||||
category_id=int(product_data["categoryId"]),
|
||||
group_id=int(product_data["groupId"]),
|
||||
url=product_data.get("url"),
|
||||
modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None,
|
||||
image_count=int(product_data.get("imageCount", 0)),
|
||||
ext_rarity=product_data.get("extRarity"),
|
||||
ext_number=product_data.get("extNumber"),
|
||||
low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None,
|
||||
mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None,
|
||||
high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None,
|
||||
market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None,
|
||||
direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None,
|
||||
sub_type_name=product_data.get("subTypeName")
|
||||
)
|
||||
db.add(new_product)
|
||||
synced_products.append(new_product)
|
||||
db.commit()
|
||||
results["products"][game_id][group.group_id] = len(synced_products)
|
||||
print(f"Synced {len(synced_products)} products for group {group.name} (game ID {game_id})")
|
||||
|
||||
if init_archived_prices:
|
||||
if not archived_prices_start_date or not archived_prices_end_date:
|
||||
raise ValueError("Both start_date and end_date are required for archived prices initialization")
|
||||
|
||||
print(f"\nInitializing archived prices from {archived_prices_start_date} to {archived_prices_end_date}...")
|
||||
await self.tcgcsv_service.get_archived_prices_for_date_range(archived_prices_start_date, archived_prices_end_date)
|
||||
results["archived_prices"] = True
|
||||
print("Archived prices initialization completed")
|
||||
|
||||
if init_mtgjson:
|
||||
print("\nInitializing MTGJSON data...")
|
||||
identifiers_result = await self.mtgjson_service.download_and_process_identifiers(db)
|
||||
skus_result = await self.mtgjson_service.download_and_process_skus(db)
|
||||
results["mtgjson"] = {
|
||||
"cards_processed": identifiers_result["cards_processed"],
|
||||
"skus_processed": skus_result["skus_processed"]
|
||||
async def sync_archived_prices(self, db: Session, archived_prices_data: dict, date: datetime):
|
||||
"""Sync archived prices data to the database using bulk operations.
|
||||
Note: Historical prices are never updated, only new records are inserted."""
|
||||
from sqlalchemy import insert
|
||||
from app.models.tcgplayer_price_history import TCGPlayerPriceHistory
|
||||
|
||||
# Prepare data for bulk operations
|
||||
price_records = []
|
||||
|
||||
for price_data in archived_prices_data.get("results", []):
|
||||
record = {
|
||||
"product_id": price_data["productId"],
|
||||
"date": date,
|
||||
"sub_type_name": price_data["subTypeName"],
|
||||
"low_price": price_data.get("lowPrice"),
|
||||
"mid_price": price_data.get("midPrice"),
|
||||
"high_price": price_data.get("highPrice"),
|
||||
"market_price": price_data.get("marketPrice"),
|
||||
"direct_low_price": price_data.get("directLowPrice")
|
||||
}
|
||||
price_records.append(record)
|
||||
|
||||
if not price_records:
|
||||
return
|
||||
|
||||
# Get existing records in bulk to avoid duplicates
|
||||
product_ids = [r["product_id"] for r in price_records]
|
||||
sub_type_names = [r["sub_type_name"] for r in price_records]
|
||||
|
||||
existing_records = db.query(TCGPlayerPriceHistory).filter(
|
||||
TCGPlayerPriceHistory.product_id.in_(product_ids),
|
||||
TCGPlayerPriceHistory.date == date,
|
||||
TCGPlayerPriceHistory.sub_type_name.in_(sub_type_names)
|
||||
).all()
|
||||
|
||||
# Filter out existing records
|
||||
existing_keys = {(r.product_id, r.date, r.sub_type_name) for r in existing_records}
|
||||
to_insert = [
|
||||
record for record in price_records
|
||||
if (record["product_id"], record["date"], record["sub_type_name"]) not in existing_keys
|
||||
]
|
||||
|
||||
# Perform bulk insert for new records only
|
||||
if to_insert:
|
||||
stmt = insert(TCGPlayerPriceHistory)
|
||||
db.execute(stmt, to_insert)
|
||||
db.commit()
|
||||
|
||||
async def init_archived_prices(self, db: Session, start_date: datetime, end_date: datetime, use_cache: bool = True, game_ids: List[int] = None) -> bool:
|
||||
"""Initialize archived prices data"""
|
||||
logger.info(f"Starting archived prices initialization from {start_date} to {end_date}")
|
||||
tcgcsv_service = self.get_service('tcgcsv')
|
||||
processed_dates = await tcgcsv_service.get_tcgcsv_date_range(start_date, end_date)
|
||||
logger.info(f"Processing {len(processed_dates)} dates")
|
||||
|
||||
# Convert game_ids to set for faster lookups
|
||||
desired_game_ids = set(game_ids) if game_ids else set()
|
||||
|
||||
for date in processed_dates:
|
||||
date_path = f"app/data/cache/tcgcsv/prices/{date}"
|
||||
|
||||
# Check if we already have the data for this date
|
||||
if use_cache and os.path.exists(date_path):
|
||||
logger.info(f"Using cached price data for {date}")
|
||||
else:
|
||||
logger.info(f"Downloading and processing archived prices for {date}")
|
||||
# Download and extract the archive
|
||||
archive_data = await tcgcsv_service.get_archived_prices_for_date(date)
|
||||
|
||||
# Save the archive file
|
||||
file_record = await self.file_service.save_file(
|
||||
db,
|
||||
archive_data,
|
||||
f"prices-{date}.ppmd.7z",
|
||||
"tcgcsv/prices/zip",
|
||||
file_type="application/x-7z-compressed",
|
||||
content_type="application/x-7z-compressed"
|
||||
)
|
||||
|
||||
# Extract the 7z file to a temporary directory
|
||||
temp_extract_path = f"app/data/cache/tcgcsv/prices/temp_{date}"
|
||||
os.makedirs(temp_extract_path, exist_ok=True)
|
||||
|
||||
with py7zr.SevenZipFile(file_record.path, 'r') as archive:
|
||||
archive.extractall(path=temp_extract_path)
|
||||
|
||||
# Find the date subdirectory in the temp directory
|
||||
date_subdir = os.path.join(temp_extract_path, str(date))
|
||||
if os.path.exists(date_subdir):
|
||||
# Remove existing directory if it exists
|
||||
if os.path.exists(date_path):
|
||||
shutil.rmtree(date_path)
|
||||
|
||||
# Create the destination directory
|
||||
os.makedirs(date_path, exist_ok=True)
|
||||
|
||||
# Move contents from the date subdirectory to the final path
|
||||
for item in os.listdir(date_subdir):
|
||||
src = os.path.join(date_subdir, item)
|
||||
dst = os.path.join(date_path, item)
|
||||
os.rename(src, dst)
|
||||
|
||||
# Clean up the temporary directory
|
||||
os.rmdir(date_subdir)
|
||||
os.rmdir(temp_extract_path)
|
||||
|
||||
# Process each category directory
|
||||
for category_id in os.listdir(date_path):
|
||||
# Skip categories that aren't in our desired game IDs
|
||||
if int(category_id) not in desired_game_ids:
|
||||
continue
|
||||
|
||||
category_path = os.path.join(date_path, category_id)
|
||||
if not os.path.isdir(category_path):
|
||||
continue
|
||||
|
||||
# Process each group directory
|
||||
for group_id in os.listdir(category_path):
|
||||
group_path = os.path.join(category_path, group_id)
|
||||
if not os.path.isdir(group_path):
|
||||
continue
|
||||
|
||||
# Process the prices file
|
||||
prices_file = os.path.join(group_path, "prices")
|
||||
if not os.path.exists(prices_file):
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(prices_file, 'r') as f:
|
||||
price_data = json.load(f)
|
||||
if price_data.get("success"):
|
||||
await self.sync_archived_prices(db, price_data, datetime.strptime(date, "%Y-%m-%d"))
|
||||
logger.info(f"Processed prices for category {category_id}, group {group_id} on {date}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing prices file {prices_file}: {str(e)}")
|
||||
continue
|
||||
|
||||
return True
|
||||
|
||||
async def init_mtgjson(self, db: Session, use_cache: bool = True) -> Dict[str, Any]:
|
||||
"""Initialize MTGJSON data"""
|
||||
logger.info("Starting MTGJSON initialization")
|
||||
mtgjson_service = self.get_service('mtgjson')
|
||||
identifiers_count = 0
|
||||
skus_count = 0
|
||||
|
||||
# Process identifiers
|
||||
if use_cache:
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "mtgjson_identifiers.json")
|
||||
if cached_file and os.path.exists(cached_file.path):
|
||||
logger.info("MTGJSON identifiers initialized from cache")
|
||||
identifiers_count = await self._process_streamed_data(
|
||||
db,
|
||||
self._stream_json_file(cached_file.path),
|
||||
"mtgjson_identifiers.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_identifiers
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON identifiers from API")
|
||||
identifiers_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_identifiers(db),
|
||||
"mtgjson_identifiers.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_identifiers
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON identifiers from API")
|
||||
identifiers_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_identifiers(db),
|
||||
"mtgjson_identifiers.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_identifiers
|
||||
)
|
||||
|
||||
# Process SKUs
|
||||
if use_cache:
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "mtgjson_skus.json")
|
||||
if cached_file and os.path.exists(cached_file.path):
|
||||
logger.info("MTGJSON SKUs initialized from cache")
|
||||
skus_count = await self._process_streamed_data(
|
||||
db,
|
||||
self._stream_json_file(cached_file.path),
|
||||
"mtgjson_skus.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_skus
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON SKUs from API")
|
||||
skus_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_skus(db),
|
||||
"mtgjson_skus.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_skus
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON SKUs from API")
|
||||
skus_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_skus(db),
|
||||
"mtgjson_skus.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_skus
|
||||
)
|
||||
|
||||
return {
|
||||
"identifiers_processed": identifiers_count,
|
||||
"skus_processed": skus_count
|
||||
}
|
||||
|
||||
async def _process_streamed_data(
|
||||
self,
|
||||
db: Session,
|
||||
data_stream: Generator[Dict[str, Any], None, None],
|
||||
filename: str,
|
||||
subdir: str,
|
||||
sync_func: Callable
|
||||
) -> int:
|
||||
"""Process streamed data and sync to database"""
|
||||
count = 0
|
||||
items = []
|
||||
batch_size = 1000
|
||||
|
||||
for item in data_stream:
|
||||
if item["type"] == "meta":
|
||||
# Handle meta data separately
|
||||
continue
|
||||
|
||||
count += 1
|
||||
items.append(item["data"])
|
||||
|
||||
# Process in batches
|
||||
if len(items) >= batch_size:
|
||||
await sync_func(db, items)
|
||||
items = []
|
||||
|
||||
# Process any remaining items
|
||||
if items:
|
||||
await sync_func(db, items)
|
||||
|
||||
return count
|
||||
|
||||
async def sync_mtgjson_identifiers(self, db: Session, identifiers_data: dict):
|
||||
"""Sync MTGJSON identifiers data to the database"""
|
||||
from app.models.mtgjson_card import MTGJSONCard
|
||||
|
||||
with transaction(db):
|
||||
for card_id, card_data in identifiers_data.items():
|
||||
existing_card = db.query(MTGJSONCard).filter(MTGJSONCard.card_id == card_id).first()
|
||||
if existing_card:
|
||||
# Update existing card
|
||||
for key, value in {
|
||||
"name": card_data.get("name"),
|
||||
"set_code": card_data.get("setCode"),
|
||||
"uuid": card_data.get("uuid"),
|
||||
"abu_id": card_data.get("identifiers", {}).get("abuId"),
|
||||
"card_kingdom_etched_id": card_data.get("identifiers", {}).get("cardKingdomEtchedId"),
|
||||
"card_kingdom_foil_id": card_data.get("identifiers", {}).get("cardKingdomFoilId"),
|
||||
"card_kingdom_id": card_data.get("identifiers", {}).get("cardKingdomId"),
|
||||
"cardsphere_id": card_data.get("identifiers", {}).get("cardsphereId"),
|
||||
"cardsphere_foil_id": card_data.get("identifiers", {}).get("cardsphereFoilId"),
|
||||
"cardtrader_id": card_data.get("identifiers", {}).get("cardtraderId"),
|
||||
"csi_id": card_data.get("identifiers", {}).get("csiId"),
|
||||
"mcm_id": card_data.get("identifiers", {}).get("mcmId"),
|
||||
"mcm_meta_id": card_data.get("identifiers", {}).get("mcmMetaId"),
|
||||
"miniaturemarket_id": card_data.get("identifiers", {}).get("miniaturemarketId"),
|
||||
"mtg_arena_id": card_data.get("identifiers", {}).get("mtgArenaId"),
|
||||
"mtgjson_foil_version_id": card_data.get("identifiers", {}).get("mtgjsonFoilVersionId"),
|
||||
"mtgjson_non_foil_version_id": card_data.get("identifiers", {}).get("mtgjsonNonFoilVersionId"),
|
||||
"mtgjson_v4_id": card_data.get("identifiers", {}).get("mtgjsonV4Id"),
|
||||
"mtgo_foil_id": card_data.get("identifiers", {}).get("mtgoFoilId"),
|
||||
"mtgo_id": card_data.get("identifiers", {}).get("mtgoId"),
|
||||
"multiverse_id": card_data.get("identifiers", {}).get("multiverseId"),
|
||||
"scg_id": card_data.get("identifiers", {}).get("scgId"),
|
||||
"scryfall_id": card_data.get("identifiers", {}).get("scryfallId"),
|
||||
"scryfall_card_back_id": card_data.get("identifiers", {}).get("scryfallCardBackId"),
|
||||
"scryfall_oracle_id": card_data.get("identifiers", {}).get("scryfallOracleId"),
|
||||
"scryfall_illustration_id": card_data.get("identifiers", {}).get("scryfallIllustrationId"),
|
||||
"tcgplayer_product_id": card_data.get("identifiers", {}).get("tcgplayerProductId"),
|
||||
"tcgplayer_etched_product_id": card_data.get("identifiers", {}).get("tcgplayerEtchedProductId"),
|
||||
"tnt_id": card_data.get("identifiers", {}).get("tntId")
|
||||
}.items():
|
||||
setattr(existing_card, key, value)
|
||||
else:
|
||||
new_card = MTGJSONCard(
|
||||
card_id=card_id,
|
||||
name=card_data.get("name"),
|
||||
set_code=card_data.get("setCode"),
|
||||
uuid=card_data.get("uuid"),
|
||||
abu_id=card_data.get("identifiers", {}).get("abuId"),
|
||||
card_kingdom_etched_id=card_data.get("identifiers", {}).get("cardKingdomEtchedId"),
|
||||
card_kingdom_foil_id=card_data.get("identifiers", {}).get("cardKingdomFoilId"),
|
||||
card_kingdom_id=card_data.get("identifiers", {}).get("cardKingdomId"),
|
||||
cardsphere_id=card_data.get("identifiers", {}).get("cardsphereId"),
|
||||
cardsphere_foil_id=card_data.get("identifiers", {}).get("cardsphereFoilId"),
|
||||
cardtrader_id=card_data.get("identifiers", {}).get("cardtraderId"),
|
||||
csi_id=card_data.get("identifiers", {}).get("csiId"),
|
||||
mcm_id=card_data.get("identifiers", {}).get("mcmId"),
|
||||
mcm_meta_id=card_data.get("identifiers", {}).get("mcmMetaId"),
|
||||
miniaturemarket_id=card_data.get("identifiers", {}).get("miniaturemarketId"),
|
||||
mtg_arena_id=card_data.get("identifiers", {}).get("mtgArenaId"),
|
||||
mtgjson_foil_version_id=card_data.get("identifiers", {}).get("mtgjsonFoilVersionId"),
|
||||
mtgjson_non_foil_version_id=card_data.get("identifiers", {}).get("mtgjsonNonFoilVersionId"),
|
||||
mtgjson_v4_id=card_data.get("identifiers", {}).get("mtgjsonV4Id"),
|
||||
mtgo_foil_id=card_data.get("identifiers", {}).get("mtgoFoilId"),
|
||||
mtgo_id=card_data.get("identifiers", {}).get("mtgoId"),
|
||||
multiverse_id=card_data.get("identifiers", {}).get("multiverseId"),
|
||||
scg_id=card_data.get("identifiers", {}).get("scgId"),
|
||||
scryfall_id=card_data.get("identifiers", {}).get("scryfallId"),
|
||||
scryfall_card_back_id=card_data.get("identifiers", {}).get("scryfallCardBackId"),
|
||||
scryfall_oracle_id=card_data.get("identifiers", {}).get("scryfallOracleId"),
|
||||
scryfall_illustration_id=card_data.get("identifiers", {}).get("scryfallIllustrationId"),
|
||||
tcgplayer_product_id=card_data.get("identifiers", {}).get("tcgplayerProductId"),
|
||||
tcgplayer_etched_product_id=card_data.get("identifiers", {}).get("tcgplayerEtchedProductId"),
|
||||
tnt_id=card_data.get("identifiers", {}).get("tntId")
|
||||
)
|
||||
db.add(new_card)
|
||||
|
||||
async def sync_mtgjson_skus(self, db: Session, skus_data: dict):
|
||||
"""Sync MTGJSON SKUs data to the database"""
|
||||
from app.models.mtgjson_sku import MTGJSONSKU
|
||||
|
||||
with transaction(db):
|
||||
for card_uuid, sku_list in skus_data.items():
|
||||
for sku in sku_list:
|
||||
# Handle case where sku is a string (skuId)
|
||||
if isinstance(sku, str):
|
||||
sku_id = sku
|
||||
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == sku_id).first()
|
||||
if existing_sku:
|
||||
# Update existing SKU
|
||||
existing_sku.card_id = card_uuid
|
||||
else:
|
||||
new_sku = MTGJSONSKU(
|
||||
sku_id=sku_id,
|
||||
card_id=card_uuid
|
||||
)
|
||||
db.add(new_sku)
|
||||
# Handle case where sku is a dictionary
|
||||
else:
|
||||
sku_id = str(sku.get("skuId"))
|
||||
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == sku_id).first()
|
||||
if existing_sku:
|
||||
# Update existing SKU
|
||||
for key, value in {
|
||||
"product_id": str(sku.get("productId")),
|
||||
"condition": sku.get("condition"),
|
||||
"finish": sku.get("finish"),
|
||||
"language": sku.get("language"),
|
||||
"printing": sku.get("printing"),
|
||||
"card_id": card_uuid
|
||||
}.items():
|
||||
setattr(existing_sku, key, value)
|
||||
else:
|
||||
new_sku = MTGJSONSKU(
|
||||
sku_id=sku_id,
|
||||
product_id=str(sku.get("productId")),
|
||||
condition=sku.get("condition"),
|
||||
finish=sku.get("finish"),
|
||||
language=sku.get("language"),
|
||||
printing=sku.get("printing"),
|
||||
card_id=card_uuid
|
||||
)
|
||||
db.add(new_sku)
|
||||
|
||||
async def initialize_data(
|
||||
self,
|
||||
db: Session,
|
||||
game_ids: List[int],
|
||||
use_cache: bool = False,
|
||||
init_categories: bool = True,
|
||||
init_groups: bool = True,
|
||||
init_products: bool = True,
|
||||
init_archived_prices: bool = True,
|
||||
archived_prices_start_date: Optional[str] = None,
|
||||
archived_prices_end_date: Optional[str] = None,
|
||||
init_mtgjson: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""Initialize 3rd party API data loads with configurable steps"""
|
||||
logger.info("Starting data initialization process")
|
||||
results = {}
|
||||
if init_categories:
|
||||
logger.info("Initializing categories...")
|
||||
results["categories"] = await self.init_categories(db, use_cache)
|
||||
if init_groups:
|
||||
logger.info("Initializing groups...")
|
||||
results["groups"] = await self.init_groups(db, use_cache, game_ids)
|
||||
if init_products:
|
||||
logger.info("Initializing products...")
|
||||
results["products"] = await self.init_products(db, use_cache, game_ids)
|
||||
if init_archived_prices:
|
||||
logger.info("Initializing archived prices...")
|
||||
results["archived_prices"] = await self.init_archived_prices(
|
||||
db,
|
||||
archived_prices_start_date,
|
||||
archived_prices_end_date,
|
||||
use_cache,
|
||||
game_ids
|
||||
)
|
||||
if init_mtgjson:
|
||||
logger.info("Initializing MTGJSON data...")
|
||||
results["mtgjson"] = await self.init_mtgjson(db, use_cache)
|
||||
|
||||
logger.info("Data initialization completed")
|
||||
return results
|
||||
|
||||
async def clear_cache(self) -> None:
|
||||
async def clear_cache(self, db: Session) -> None:
|
||||
"""Clear all cached data"""
|
||||
# Delete all files in categories, groups, and products directories
|
||||
for subdir in ["categories", "groups", "products"]:
|
||||
dir_path = os.path.join(self.cache_dir, subdir)
|
||||
if os.path.exists(dir_path):
|
||||
for filename in os.listdir(dir_path):
|
||||
file_path = os.path.join(dir_path, filename)
|
||||
if os.path.isfile(file_path):
|
||||
os.unlink(file_path)
|
||||
files = await self.file_service.list_files(db, file_type="json")
|
||||
for file in files:
|
||||
if file.path.startswith(subdir):
|
||||
await self.file_service.delete_file(db, file.id)
|
||||
await self.mtgjson_service.clear_cache()
|
||||
print("Cache cleared")
|
||||
|
||||
async def close(self):
|
||||
await self.tcgcsv_service.close()
|
||||
print("Cache cleared")
|
Reference in New Issue
Block a user