god help me
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any, Union, Generator, Callable
|
||||
from typing import Optional, List, Dict, Any, Union, Generator, Callable, AsyncGenerator
|
||||
from sqlalchemy.orm import Session
|
||||
from app.models.tcgplayer_group import TCGPlayerGroup
|
||||
from app.models.tcgplayer_product import TCGPlayerProduct
|
||||
@ -462,111 +462,37 @@ class DataInitializationService(BaseService):
|
||||
identifiers_count = 0
|
||||
skus_count = 0
|
||||
|
||||
# Process identifiers
|
||||
if use_cache:
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "mtgjson_identifiers.json")
|
||||
if cached_file and os.path.exists(cached_file.path):
|
||||
logger.info("MTGJSON identifiers initialized from cache")
|
||||
identifiers_count = await self._process_streamed_data(
|
||||
db,
|
||||
self._stream_json_file(cached_file.path),
|
||||
"mtgjson_identifiers.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_identifiers
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON identifiers from API")
|
||||
identifiers_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_identifiers(db),
|
||||
"mtgjson_identifiers.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_identifiers
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON identifiers from API")
|
||||
identifiers_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_identifiers(db),
|
||||
"mtgjson_identifiers.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_identifiers
|
||||
)
|
||||
# Get identifiers data
|
||||
identifiers_data = await mtgjson_service.get_identifiers(db, use_cache)
|
||||
if identifiers_data and "data" in identifiers_data:
|
||||
identifiers_count = await self.sync_mtgjson_identifiers(db, list(identifiers_data["data"].values()))
|
||||
|
||||
# Process SKUs
|
||||
if use_cache:
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "mtgjson_skus.json")
|
||||
if cached_file and os.path.exists(cached_file.path):
|
||||
logger.info("MTGJSON SKUs initialized from cache")
|
||||
skus_count = await self._process_streamed_data(
|
||||
db,
|
||||
self._stream_json_file(cached_file.path),
|
||||
"mtgjson_skus.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_skus
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON SKUs from API")
|
||||
skus_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_skus(db),
|
||||
"mtgjson_skus.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_skus
|
||||
)
|
||||
else:
|
||||
logger.info("Downloading MTGJSON SKUs from API")
|
||||
skus_count = await self._process_streamed_data(
|
||||
db,
|
||||
await mtgjson_service.get_skus(db),
|
||||
"mtgjson_skus.json",
|
||||
"mtgjson",
|
||||
self.sync_mtgjson_skus
|
||||
)
|
||||
# Get SKUs data
|
||||
skus_data = await mtgjson_service.get_skus(db, use_cache)
|
||||
if skus_data and "data" in skus_data:
|
||||
skus_count = await self.sync_mtgjson_skus(db, list(skus_data["data"].values()))
|
||||
|
||||
return {
|
||||
"identifiers_processed": identifiers_count,
|
||||
"skus_processed": skus_count
|
||||
}
|
||||
|
||||
async def _process_streamed_data(
|
||||
self,
|
||||
db: Session,
|
||||
data_stream: Generator[Dict[str, Any], None, None],
|
||||
filename: str,
|
||||
subdir: str,
|
||||
sync_func: Callable
|
||||
) -> int:
|
||||
"""Process streamed data and sync to database"""
|
||||
count = 0
|
||||
items = []
|
||||
batch_size = 1000
|
||||
|
||||
for item in data_stream:
|
||||
if item["type"] == "meta":
|
||||
# Handle meta data separately
|
||||
continue
|
||||
|
||||
count += 1
|
||||
items.append(item["data"])
|
||||
|
||||
# Process in batches
|
||||
if len(items) >= batch_size:
|
||||
await sync_func(db, items)
|
||||
items = []
|
||||
|
||||
# Process any remaining items
|
||||
if items:
|
||||
await sync_func(db, items)
|
||||
|
||||
return count
|
||||
|
||||
async def sync_mtgjson_identifiers(self, db: Session, identifiers_data: dict):
|
||||
async def sync_mtgjson_identifiers(self, db: Session, identifiers_data: List[dict]) -> int:
|
||||
"""Sync MTGJSON identifiers data to the database"""
|
||||
from app.models.mtgjson_card import MTGJSONCard
|
||||
|
||||
count = 0
|
||||
with transaction(db):
|
||||
for card_id, card_data in identifiers_data.items():
|
||||
for card_data in identifiers_data:
|
||||
if not isinstance(card_data, dict):
|
||||
logger.debug(f"Skipping non-dict item: {card_data}")
|
||||
continue
|
||||
|
||||
card_id = card_data.get("uuid")
|
||||
if not card_id:
|
||||
logger.debug(f"Skipping item without UUID: {card_data}")
|
||||
continue
|
||||
|
||||
existing_card = db.query(MTGJSONCard).filter(MTGJSONCard.card_id == card_id).first()
|
||||
if existing_card:
|
||||
# Update existing card
|
||||
@ -636,53 +562,47 @@ class DataInitializationService(BaseService):
|
||||
tnt_id=card_data.get("identifiers", {}).get("tntId")
|
||||
)
|
||||
db.add(new_card)
|
||||
count += 1
|
||||
|
||||
async def sync_mtgjson_skus(self, db: Session, skus_data: dict):
|
||||
return count
|
||||
|
||||
async def sync_mtgjson_skus(self, db: Session, skus_data: List[List[dict]]) -> int:
|
||||
"""Sync MTGJSON SKUs data to the database"""
|
||||
from app.models.mtgjson_sku import MTGJSONSKU
|
||||
|
||||
count = 0
|
||||
with transaction(db):
|
||||
for card_uuid, sku_list in skus_data.items():
|
||||
for sku in sku_list:
|
||||
# Handle case where sku is a string (skuId)
|
||||
if isinstance(sku, str):
|
||||
sku_id = sku
|
||||
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == sku_id).first()
|
||||
if existing_sku:
|
||||
# Update existing SKU
|
||||
existing_sku.card_id = card_uuid
|
||||
else:
|
||||
new_sku = MTGJSONSKU(
|
||||
sku_id=sku_id,
|
||||
card_id=card_uuid
|
||||
)
|
||||
db.add(new_sku)
|
||||
# Handle case where sku is a dictionary
|
||||
for product_data in skus_data:
|
||||
for sku_data in product_data:
|
||||
sku_id = sku_data.get("skuId")
|
||||
if not sku_id:
|
||||
logger.debug(f"Skipping item without SKU ID: {sku_data}")
|
||||
continue
|
||||
|
||||
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == str(sku_id)).first()
|
||||
if existing_sku:
|
||||
# Update existing SKU
|
||||
for key, value in {
|
||||
"product_id": sku_data.get("productId"),
|
||||
"condition": sku_data.get("condition"),
|
||||
"finish": sku_data.get("finish"),
|
||||
"language": sku_data.get("language"),
|
||||
"printing": sku_data.get("printing"),
|
||||
}.items():
|
||||
setattr(existing_sku, key, value)
|
||||
else:
|
||||
sku_id = str(sku.get("skuId"))
|
||||
existing_sku = db.query(MTGJSONSKU).filter(MTGJSONSKU.sku_id == sku_id).first()
|
||||
if existing_sku:
|
||||
# Update existing SKU
|
||||
for key, value in {
|
||||
"product_id": str(sku.get("productId")),
|
||||
"condition": sku.get("condition"),
|
||||
"finish": sku.get("finish"),
|
||||
"language": sku.get("language"),
|
||||
"printing": sku.get("printing"),
|
||||
"card_id": card_uuid
|
||||
}.items():
|
||||
setattr(existing_sku, key, value)
|
||||
else:
|
||||
new_sku = MTGJSONSKU(
|
||||
sku_id=sku_id,
|
||||
product_id=str(sku.get("productId")),
|
||||
condition=sku.get("condition"),
|
||||
finish=sku.get("finish"),
|
||||
language=sku.get("language"),
|
||||
printing=sku.get("printing"),
|
||||
card_id=card_uuid
|
||||
)
|
||||
db.add(new_sku)
|
||||
new_sku = MTGJSONSKU(
|
||||
sku_id=sku_id,
|
||||
product_id=sku_data.get("productId"),
|
||||
condition=sku_data.get("condition"),
|
||||
finish=sku_data.get("finish"),
|
||||
language=sku_data.get("language"),
|
||||
printing=sku_data.get("printing"),
|
||||
)
|
||||
db.add(new_sku)
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
async def initialize_data(
|
||||
self,
|
||||
|
@ -2,7 +2,8 @@ import os
|
||||
import json
|
||||
import zipfile
|
||||
import time
|
||||
from typing import Dict, Any, Optional, Generator
|
||||
import shutil
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from app.services.external_api.base_external_service import BaseExternalService
|
||||
from app.schemas.file import FileInDB
|
||||
@ -11,32 +12,10 @@ import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MTGJSONService(BaseExternalService):
|
||||
def __init__(self, cache_dir: str = "app/data/cache/mtgjson"):
|
||||
def __init__(self):
|
||||
super().__init__(base_url="https://mtgjson.com/api/v5/")
|
||||
# Ensure the cache directory exists
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
self.cache_dir = cache_dir
|
||||
self.identifiers_dir = os.path.join(cache_dir, "identifiers")
|
||||
self.skus_dir = os.path.join(cache_dir, "skus")
|
||||
# Ensure subdirectories exist
|
||||
os.makedirs(self.identifiers_dir, exist_ok=True)
|
||||
os.makedirs(self.skus_dir, exist_ok=True)
|
||||
|
||||
def _format_progress(self, current: int, total: int, start_time: float) -> str:
|
||||
"""Format a progress message with percentage and timing information"""
|
||||
elapsed = time.time() - start_time
|
||||
if total > 0:
|
||||
percent = (current / total) * 100
|
||||
items_per_second = current / elapsed if elapsed > 0 else 0
|
||||
eta = (total - current) / items_per_second if items_per_second > 0 else 0
|
||||
return f"[{current}/{total} ({percent:.1f}%)] {items_per_second:.1f} items/sec, ETA: {eta:.1f}s"
|
||||
return f"[{current} items] {current/elapsed:.1f} items/sec"
|
||||
|
||||
def _print_progress(self, message: str, end: str = "\n") -> None:
|
||||
"""Print progress message with flush"""
|
||||
print(message, end=end, flush=True)
|
||||
|
||||
async def _download_file(self, db: Session, url: str, filename: str, subdir: str) -> FileInDB:
|
||||
async def _download_and_unzip_file(self, db: Session, url: str, filename: str, subdir: str) -> FileInDB:
|
||||
"""Download a file from the given URL and save it using FileService"""
|
||||
print(f"Downloading {url}...")
|
||||
start_time = time.time()
|
||||
@ -49,7 +28,7 @@ class MTGJSONService(BaseExternalService):
|
||||
)
|
||||
|
||||
# Save the file using the file service
|
||||
return await self.file_service.save_file(
|
||||
file_record = await self.file_service.save_file(
|
||||
db=db,
|
||||
file_data=file_data,
|
||||
filename=filename,
|
||||
@ -57,18 +36,24 @@ class MTGJSONService(BaseExternalService):
|
||||
file_type="application/zip",
|
||||
content_type="application/zip"
|
||||
)
|
||||
|
||||
# Unzip the file
|
||||
await self._unzip_file(file_record, subdir, db)
|
||||
|
||||
return file_record
|
||||
|
||||
|
||||
async def _unzip_file(self, file_record: FileInDB, subdir: str, db: Session) -> str:
|
||||
async def _unzip_file(self, file_record: FileInDB, subdir: str, db: Session) -> FileInDB:
|
||||
"""Unzip a file to the specified subdirectory and return the path to the extracted JSON file"""
|
||||
try:
|
||||
# Use the appropriate subdirectory based on the type
|
||||
extract_path = self.identifiers_dir if subdir == "identifiers" else self.skus_dir
|
||||
os.makedirs(extract_path, exist_ok=True)
|
||||
|
||||
file_service = self.get_service('file')
|
||||
cache_dir = file_service.base_cache_dir
|
||||
temp_dir = os.path.join(cache_dir,'mtgjson', subdir, 'temp')
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
with zipfile.ZipFile(file_record.path, 'r') as zip_ref:
|
||||
json_filename = zip_ref.namelist()[0]
|
||||
zip_ref.extractall(extract_path)
|
||||
json_path = os.path.join(extract_path, json_filename)
|
||||
zip_ref.extractall(temp_dir)
|
||||
json_path = os.path.join(temp_dir, json_filename)
|
||||
|
||||
# Create a file record for the extracted JSON file
|
||||
with open(json_path, 'r') as f:
|
||||
@ -82,127 +67,57 @@ class MTGJSONService(BaseExternalService):
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
return str(json_file_record.path)
|
||||
# remove the temp directory
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
return json_file_record
|
||||
except Exception as e:
|
||||
logger.error(f"Error unzipping file: {e}")
|
||||
raise
|
||||
|
||||
def _stream_json_file(self, file_path: str) -> Generator[Dict[str, Any], None, None]:
|
||||
"""Stream a JSON file and yield items one at a time using a streaming parser"""
|
||||
logger.info(f"Starting to stream JSON file: {file_path}")
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
# First, we need to find the start of the data section
|
||||
data_started = False
|
||||
current_key = None
|
||||
current_value = []
|
||||
brace_count = 0
|
||||
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if not data_started:
|
||||
if '"data":' in line:
|
||||
data_started = True
|
||||
# Skip the opening brace of the data object
|
||||
line = line[line.find('"data":') + 7:].strip()
|
||||
if line.startswith('{'):
|
||||
line = line[1:].strip()
|
||||
else:
|
||||
# Yield meta data if found
|
||||
if '"meta":' in line:
|
||||
meta_start = line.find('"meta":') + 7
|
||||
meta_end = line.rfind('}')
|
||||
if meta_end > meta_start:
|
||||
meta_json = line[meta_start:meta_end + 1]
|
||||
try:
|
||||
meta_data = json.loads(meta_json)
|
||||
yield {"type": "meta", "data": meta_data}
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse meta data: {e}")
|
||||
continue
|
||||
|
||||
# Process the data section
|
||||
if data_started:
|
||||
if not current_key:
|
||||
# Look for a new key
|
||||
if '"' in line:
|
||||
key_start = line.find('"') + 1
|
||||
key_end = line.find('"', key_start)
|
||||
if key_end > key_start:
|
||||
current_key = line[key_start:key_end]
|
||||
# Get the rest of the line after the key
|
||||
line = line[key_end + 1:].strip()
|
||||
if ':' in line:
|
||||
line = line[line.find(':') + 1:].strip()
|
||||
|
||||
if current_key:
|
||||
# Accumulate the value
|
||||
current_value.append(line)
|
||||
brace_count += line.count('{') - line.count('}')
|
||||
|
||||
if brace_count == 0 and line.endswith(','):
|
||||
# We have a complete value
|
||||
value_str = ''.join(current_value).rstrip(',')
|
||||
try:
|
||||
value = json.loads(value_str)
|
||||
yield {"type": "item", "data": {current_key: value}}
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse value for key {current_key}: {e}")
|
||||
current_key = None
|
||||
current_value = []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming JSON file: {e}")
|
||||
raise
|
||||
|
||||
async def get_identifiers(self, db: Session) -> Generator[Dict[str, Any], None, None]:
|
||||
async def get_identifiers(self, db: Session, use_cache: bool = True) -> Dict[str, Any]:
|
||||
"""Download and get MTGJSON identifiers data"""
|
||||
# Check if we have a cached version
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "AllIdentifiers.json")
|
||||
if cached_file:
|
||||
# Ensure the file exists at the path
|
||||
if os.path.exists(cached_file.path):
|
||||
return self._stream_json_file(cached_file.path)
|
||||
|
||||
# Download and process the file
|
||||
file_record = await self._download_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/AllIdentifiers.json.zip",
|
||||
filename="AllIdentifiers.json.zip",
|
||||
subdir="identifiers"
|
||||
)
|
||||
|
||||
# Unzip and process the file
|
||||
json_path = await self._unzip_file(file_record, "identifiers", db)
|
||||
|
||||
# Return a generator that streams the JSON file
|
||||
return self._stream_json_file(json_path)
|
||||
if cached_file and os.path.exists(cached_file.path) and use_cache:
|
||||
with open(cached_file.path, 'r') as f:
|
||||
logger.debug(f"Loaded identifiers from cache: {cached_file.path}")
|
||||
return json.load(f)
|
||||
else:
|
||||
# Download and process the file
|
||||
logger.debug(f"Downloading identifiers from MTGJSON")
|
||||
file_record = await self._download_and_unzip_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/AllIdentifiers.json.zip",
|
||||
filename="AllIdentifiers.json.zip",
|
||||
subdir="identifiers"
|
||||
)
|
||||
|
||||
async def get_skus(self, db: Session) -> Generator[Dict[str, Any], None, None]:
|
||||
with open(file_record.path, 'r') as f:
|
||||
logger.debug(f"Loaded identifiers from MTGJSON: {file_record.path}")
|
||||
return json.load(f)
|
||||
|
||||
async def get_skus(self, db: Session, use_cache: bool = True) -> Dict[str, Any]:
|
||||
"""Download and get MTGJSON SKUs data"""
|
||||
# Check if we have a cached version
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "TcgplayerSkus.json")
|
||||
if cached_file:
|
||||
# Ensure the file exists at the path
|
||||
if os.path.exists(cached_file.path):
|
||||
return self._stream_json_file(cached_file.path)
|
||||
|
||||
# Download and process the file
|
||||
file_record = await self._download_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/TcgplayerSkus.json.zip",
|
||||
filename="TcgplayerSkus.json.zip",
|
||||
subdir="skus"
|
||||
)
|
||||
|
||||
# Unzip and process the file
|
||||
json_path = await self._unzip_file(file_record, "skus", db)
|
||||
|
||||
# Return a generator that streams the JSON file
|
||||
return self._stream_json_file(json_path)
|
||||
if cached_file and os.path.exists(cached_file.path) and use_cache:
|
||||
with open(cached_file.path, 'r') as f:
|
||||
logger.debug(f"Loaded SKUs from cache: {cached_file.path}")
|
||||
return json.load(f)
|
||||
else:
|
||||
# Download and process the file
|
||||
logger.debug(f"Downloading SKUs from MTGJSON")
|
||||
file_record = await self._download_and_unzip_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/TcgplayerSkus.json.zip",
|
||||
filename="TcgplayerSkus.json.zip",
|
||||
subdir="skus"
|
||||
)
|
||||
|
||||
with open(file_record.path, 'r') as f:
|
||||
logger.debug(f"Loaded SKUs from MTGJSON: {file_record.path}")
|
||||
return json.load(f)
|
||||
|
||||
async def clear_cache(self, db: Session) -> None:
|
||||
"""Clear all cached data"""
|
||||
|
@ -153,7 +153,8 @@ class FileService:
|
||||
|
||||
async def get_file_by_filename(self, db: Session, filename: str) -> Optional[FileInDB]:
|
||||
"""Get a file record from the database by filename"""
|
||||
file_record = db.query(File).filter(File.name == filename).first()
|
||||
# get most recent file by filename
|
||||
file_record = db.query(File).filter(File.name == filename).order_by(File.created_at.desc()).first()
|
||||
if file_record:
|
||||
return FileInDB.model_validate(file_record)
|
||||
return None
|
||||
|
15
app/services/manabox_service.py
Normal file
15
app/services/manabox_service.py
Normal file
@ -0,0 +1,15 @@
|
||||
from app.services.base_service import BaseService
|
||||
from sqlalchemy.orm import Session
|
||||
from app.schemas.file import FileInDB
|
||||
from typing import Dict, Any
|
||||
import csv
|
||||
|
||||
class ManaboxService(BaseService):
|
||||
def __init__(self):
|
||||
super().__init__(None)
|
||||
|
||||
async def process_manabox_csv(self, db: Session, csv_file: FileInDB) -> bool:
|
||||
|
||||
return True
|
||||
|
||||
# Name,Set code,Set name,Collector number,Foil,Rarity,Quantity,ManaBox ID,Scryfall ID,Purchase price,Misprint,Altered,Condition,Language,Purchase price currency
|
Reference in New Issue
Block a user