import os import json from datetime import datetime, timedelta from typing import Optional, List, Dict, Any from sqlalchemy.orm import Session from app.services.external_api.tcgcsv.tcgcsv_service import TCGCSVService from app.services.external_api.mtgjson.mtgjson_service import MTGJSONService from app.models.tcgplayer_group import TCGPlayerGroup from app.models.tcgplayer_product import TCGPlayerProduct from app.models.tcgplayer_category import TCGPlayerCategory class DataInitializationService: def __init__(self, cache_dir: str = "app/data/cache/tcgcsv"): self.cache_dir = cache_dir self.categories_dir = os.path.join(cache_dir, "categories") self.groups_dir = os.path.join(cache_dir, "groups") self.products_dir = os.path.join(cache_dir, "products") self.tcgcsv_service = TCGCSVService() self.mtgjson_service = MTGJSONService() # Create all necessary directories os.makedirs(cache_dir, exist_ok=True) os.makedirs(self.categories_dir, exist_ok=True) os.makedirs(self.groups_dir, exist_ok=True) os.makedirs(self.products_dir, exist_ok=True) def _get_cache_path(self, filename: str, subdir: str) -> str: """Get the full path for a cached file in the specified subdirectory""" return os.path.join(self.cache_dir, subdir, filename) async def _cache_categories(self, categories_data: dict): """Cache categories data to a JSON file""" cache_path = self._get_cache_path("categories.json", "categories") with open(cache_path, 'w') as f: json.dump(categories_data, f, indent=2) async def _cache_groups(self, game_ids: List[int], groups_data: dict): for game_id in game_ids: cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups") with open(cache_path, 'w') as f: json.dump(groups_data, f, default=str) async def _cache_products(self, game_ids: List[int], group_id: int, products_data: list): for game_id in game_ids: cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products") with open(cache_path, 'w') as f: json.dump(products_data, f, default=str) async def _load_cached_categories(self) -> Optional[dict]: cache_path = self._get_cache_path("categories.json", "categories") if os.path.exists(cache_path): with open(cache_path, 'r') as f: return json.load(f) return None async def _load_cached_groups(self, game_ids: List[int]) -> Optional[dict]: # Try to load cached data for any of the game IDs for game_id in game_ids: cache_path = self._get_cache_path(f"groups_{game_id}.json", "groups") if os.path.exists(cache_path): with open(cache_path, 'r') as f: return json.load(f) return None async def _load_cached_products(self, game_ids: List[int], group_id: int) -> Optional[list]: # Try to load cached data for any of the game IDs for game_id in game_ids: cache_path = self._get_cache_path(f"products_{game_id}_{group_id}.json", "products") if os.path.exists(cache_path): with open(cache_path, 'r') as f: return json.load(f) return None async def initialize_data( self, db: Session, game_ids: List[int], use_cache: bool = True, init_categories: bool = True, init_groups: bool = True, init_products: bool = True, init_archived_prices: bool = False, archived_prices_start_date: Optional[str] = None, archived_prices_end_date: Optional[str] = None, init_mtgjson: bool = True ) -> Dict[str, Any]: """Initialize TCGPlayer data with configurable steps""" print("Initializing TCGPlayer data...") results = { "categories": 0, "groups": {}, "products": {}, "archived_prices": False, "mtgjson": {} } if init_categories: print("\nInitializing categories...") categories_data = None if use_cache: categories_data = await self._load_cached_categories() if not categories_data: print("Fetching categories from API...") categories_data = await self.tcgcsv_service.get_categories() if use_cache: await self._cache_categories(categories_data) if not categories_data.get("success"): raise Exception(f"Failed to fetch categories: {categories_data.get('errors')}") # Sync categories to database categories = categories_data.get("results", []) synced_categories = [] for category_data in categories: existing_category = db.query(TCGPlayerCategory).filter(TCGPlayerCategory.category_id == category_data["categoryId"]).first() if existing_category: synced_categories.append(existing_category) else: new_category = TCGPlayerCategory( category_id=category_data["categoryId"], name=category_data["name"], display_name=category_data.get("displayName"), seo_category_name=category_data.get("seoCategoryName"), category_description=category_data.get("categoryDescription"), category_page_title=category_data.get("categoryPageTitle"), sealed_label=category_data.get("sealedLabel"), non_sealed_label=category_data.get("nonSealedLabel"), condition_guide_url=category_data.get("conditionGuideUrl"), is_scannable=category_data.get("isScannable", False), popularity=category_data.get("popularity", 0), is_direct=category_data.get("isDirect", False), modified_on=datetime.fromisoformat(category_data["modifiedOn"].replace("Z", "+00:00")) if category_data.get("modifiedOn") else None ) db.add(new_category) synced_categories.append(new_category) db.commit() results["categories"] = len(synced_categories) print(f"Synced {len(synced_categories)} categories") # Process each game ID separately for game_id in game_ids: print(f"\nProcessing game ID: {game_id}") results["groups"][game_id] = 0 results["products"][game_id] = {} if init_groups: print(f"Initializing groups for game ID {game_id}...") groups_data = None if use_cache: groups_data = await self._load_cached_groups([game_id]) if not groups_data: print(f"Fetching groups for game ID {game_id} from API...") groups_data = await self.tcgcsv_service.get_groups([game_id]) if use_cache: await self._cache_groups([game_id], groups_data) if not groups_data.get("success"): raise Exception(f"Failed to fetch groups for game ID {game_id}: {groups_data.get('errors')}") # Sync groups to database groups = groups_data.get("results", []) synced_groups = [] for group_data in groups: existing_group = db.query(TCGPlayerGroup).filter(TCGPlayerGroup.group_id == group_data["groupId"]).first() if existing_group: synced_groups.append(existing_group) else: new_group = TCGPlayerGroup( group_id=group_data["groupId"], name=group_data["name"], abbreviation=group_data.get("abbreviation"), is_supplemental=group_data.get("isSupplemental", False), published_on=datetime.fromisoformat(group_data["publishedOn"].replace("Z", "+00:00")) if group_data.get("publishedOn") else None, modified_on=datetime.fromisoformat(group_data["modifiedOn"].replace("Z", "+00:00")) if group_data.get("modifiedOn") else None, category_id=group_data.get("categoryId") ) db.add(new_group) synced_groups.append(new_group) db.commit() results["groups"][game_id] = len(synced_groups) print(f"Synced {len(synced_groups)} groups for game ID {game_id}") if init_products: # Handle products for each group in this game ID for group in synced_groups: print(f"Initializing products for group {group.name} (game ID {game_id})...") products_data = None if use_cache: products_data = await self._load_cached_products([game_id], group.group_id) if not products_data: print(f"Fetching products for group {group.name} (game ID {game_id}) from API...") products_data = await self.tcgcsv_service.get_products_and_prices([game_id], group.group_id) if use_cache: await self._cache_products([game_id], group.group_id, products_data) # Sync products to database synced_products = [] for product_data in products_data: existing_product = db.query(TCGPlayerProduct).filter(TCGPlayerProduct.product_id == int(product_data["productId"])).first() if existing_product: synced_products.append(existing_product) else: new_product = TCGPlayerProduct( product_id=int(product_data["productId"]), name=product_data["name"], clean_name=product_data.get("cleanName"), image_url=product_data.get("imageUrl"), category_id=int(product_data["categoryId"]), group_id=int(product_data["groupId"]), url=product_data.get("url"), modified_on=datetime.fromisoformat(product_data["modifiedOn"].replace("Z", "+00:00")) if product_data.get("modifiedOn") else None, image_count=int(product_data.get("imageCount", 0)), ext_rarity=product_data.get("extRarity"), ext_number=product_data.get("extNumber"), low_price=float(product_data.get("lowPrice")) if product_data.get("lowPrice") else None, mid_price=float(product_data.get("midPrice")) if product_data.get("midPrice") else None, high_price=float(product_data.get("highPrice")) if product_data.get("highPrice") else None, market_price=float(product_data.get("marketPrice")) if product_data.get("marketPrice") else None, direct_low_price=float(product_data.get("directLowPrice")) if product_data.get("directLowPrice") else None, sub_type_name=product_data.get("subTypeName") ) db.add(new_product) synced_products.append(new_product) db.commit() results["products"][game_id][group.group_id] = len(synced_products) print(f"Synced {len(synced_products)} products for group {group.name} (game ID {game_id})") if init_archived_prices: if not archived_prices_start_date or not archived_prices_end_date: raise ValueError("Both start_date and end_date are required for archived prices initialization") print(f"\nInitializing archived prices from {archived_prices_start_date} to {archived_prices_end_date}...") await self.tcgcsv_service.get_archived_prices_for_date_range(archived_prices_start_date, archived_prices_end_date) results["archived_prices"] = True print("Archived prices initialization completed") if init_mtgjson: print("\nInitializing MTGJSON data...") identifiers_result = await self.mtgjson_service.download_and_process_identifiers(db) skus_result = await self.mtgjson_service.download_and_process_skus(db) results["mtgjson"] = { "cards_processed": identifiers_result["cards_processed"], "skus_processed": skus_result["skus_processed"] } return results async def clear_cache(self) -> None: """Clear all cached data""" for subdir in ["categories", "groups", "products"]: dir_path = os.path.join(self.cache_dir, subdir) if os.path.exists(dir_path): for filename in os.listdir(dir_path): file_path = os.path.join(dir_path, filename) if os.path.isfile(file_path): os.unlink(file_path) await self.mtgjson_service.clear_cache() print("Cache cleared") async def close(self): await self.tcgcsv_service.close()