god help me
This commit is contained in:
@ -2,7 +2,8 @@ import os
|
||||
import json
|
||||
import zipfile
|
||||
import time
|
||||
from typing import Dict, Any, Optional, Generator
|
||||
import shutil
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from app.services.external_api.base_external_service import BaseExternalService
|
||||
from app.schemas.file import FileInDB
|
||||
@ -11,32 +12,10 @@ import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MTGJSONService(BaseExternalService):
|
||||
def __init__(self, cache_dir: str = "app/data/cache/mtgjson"):
|
||||
def __init__(self):
|
||||
super().__init__(base_url="https://mtgjson.com/api/v5/")
|
||||
# Ensure the cache directory exists
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
self.cache_dir = cache_dir
|
||||
self.identifiers_dir = os.path.join(cache_dir, "identifiers")
|
||||
self.skus_dir = os.path.join(cache_dir, "skus")
|
||||
# Ensure subdirectories exist
|
||||
os.makedirs(self.identifiers_dir, exist_ok=True)
|
||||
os.makedirs(self.skus_dir, exist_ok=True)
|
||||
|
||||
def _format_progress(self, current: int, total: int, start_time: float) -> str:
|
||||
"""Format a progress message with percentage and timing information"""
|
||||
elapsed = time.time() - start_time
|
||||
if total > 0:
|
||||
percent = (current / total) * 100
|
||||
items_per_second = current / elapsed if elapsed > 0 else 0
|
||||
eta = (total - current) / items_per_second if items_per_second > 0 else 0
|
||||
return f"[{current}/{total} ({percent:.1f}%)] {items_per_second:.1f} items/sec, ETA: {eta:.1f}s"
|
||||
return f"[{current} items] {current/elapsed:.1f} items/sec"
|
||||
|
||||
def _print_progress(self, message: str, end: str = "\n") -> None:
|
||||
"""Print progress message with flush"""
|
||||
print(message, end=end, flush=True)
|
||||
|
||||
async def _download_file(self, db: Session, url: str, filename: str, subdir: str) -> FileInDB:
|
||||
async def _download_and_unzip_file(self, db: Session, url: str, filename: str, subdir: str) -> FileInDB:
|
||||
"""Download a file from the given URL and save it using FileService"""
|
||||
print(f"Downloading {url}...")
|
||||
start_time = time.time()
|
||||
@ -49,7 +28,7 @@ class MTGJSONService(BaseExternalService):
|
||||
)
|
||||
|
||||
# Save the file using the file service
|
||||
return await self.file_service.save_file(
|
||||
file_record = await self.file_service.save_file(
|
||||
db=db,
|
||||
file_data=file_data,
|
||||
filename=filename,
|
||||
@ -57,18 +36,24 @@ class MTGJSONService(BaseExternalService):
|
||||
file_type="application/zip",
|
||||
content_type="application/zip"
|
||||
)
|
||||
|
||||
# Unzip the file
|
||||
await self._unzip_file(file_record, subdir, db)
|
||||
|
||||
return file_record
|
||||
|
||||
|
||||
async def _unzip_file(self, file_record: FileInDB, subdir: str, db: Session) -> str:
|
||||
async def _unzip_file(self, file_record: FileInDB, subdir: str, db: Session) -> FileInDB:
|
||||
"""Unzip a file to the specified subdirectory and return the path to the extracted JSON file"""
|
||||
try:
|
||||
# Use the appropriate subdirectory based on the type
|
||||
extract_path = self.identifiers_dir if subdir == "identifiers" else self.skus_dir
|
||||
os.makedirs(extract_path, exist_ok=True)
|
||||
|
||||
file_service = self.get_service('file')
|
||||
cache_dir = file_service.base_cache_dir
|
||||
temp_dir = os.path.join(cache_dir,'mtgjson', subdir, 'temp')
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
with zipfile.ZipFile(file_record.path, 'r') as zip_ref:
|
||||
json_filename = zip_ref.namelist()[0]
|
||||
zip_ref.extractall(extract_path)
|
||||
json_path = os.path.join(extract_path, json_filename)
|
||||
zip_ref.extractall(temp_dir)
|
||||
json_path = os.path.join(temp_dir, json_filename)
|
||||
|
||||
# Create a file record for the extracted JSON file
|
||||
with open(json_path, 'r') as f:
|
||||
@ -82,127 +67,57 @@ class MTGJSONService(BaseExternalService):
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
return str(json_file_record.path)
|
||||
# remove the temp directory
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
return json_file_record
|
||||
except Exception as e:
|
||||
logger.error(f"Error unzipping file: {e}")
|
||||
raise
|
||||
|
||||
def _stream_json_file(self, file_path: str) -> Generator[Dict[str, Any], None, None]:
|
||||
"""Stream a JSON file and yield items one at a time using a streaming parser"""
|
||||
logger.info(f"Starting to stream JSON file: {file_path}")
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
# First, we need to find the start of the data section
|
||||
data_started = False
|
||||
current_key = None
|
||||
current_value = []
|
||||
brace_count = 0
|
||||
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if not data_started:
|
||||
if '"data":' in line:
|
||||
data_started = True
|
||||
# Skip the opening brace of the data object
|
||||
line = line[line.find('"data":') + 7:].strip()
|
||||
if line.startswith('{'):
|
||||
line = line[1:].strip()
|
||||
else:
|
||||
# Yield meta data if found
|
||||
if '"meta":' in line:
|
||||
meta_start = line.find('"meta":') + 7
|
||||
meta_end = line.rfind('}')
|
||||
if meta_end > meta_start:
|
||||
meta_json = line[meta_start:meta_end + 1]
|
||||
try:
|
||||
meta_data = json.loads(meta_json)
|
||||
yield {"type": "meta", "data": meta_data}
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse meta data: {e}")
|
||||
continue
|
||||
|
||||
# Process the data section
|
||||
if data_started:
|
||||
if not current_key:
|
||||
# Look for a new key
|
||||
if '"' in line:
|
||||
key_start = line.find('"') + 1
|
||||
key_end = line.find('"', key_start)
|
||||
if key_end > key_start:
|
||||
current_key = line[key_start:key_end]
|
||||
# Get the rest of the line after the key
|
||||
line = line[key_end + 1:].strip()
|
||||
if ':' in line:
|
||||
line = line[line.find(':') + 1:].strip()
|
||||
|
||||
if current_key:
|
||||
# Accumulate the value
|
||||
current_value.append(line)
|
||||
brace_count += line.count('{') - line.count('}')
|
||||
|
||||
if brace_count == 0 and line.endswith(','):
|
||||
# We have a complete value
|
||||
value_str = ''.join(current_value).rstrip(',')
|
||||
try:
|
||||
value = json.loads(value_str)
|
||||
yield {"type": "item", "data": {current_key: value}}
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Failed to parse value for key {current_key}: {e}")
|
||||
current_key = None
|
||||
current_value = []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming JSON file: {e}")
|
||||
raise
|
||||
|
||||
async def get_identifiers(self, db: Session) -> Generator[Dict[str, Any], None, None]:
|
||||
async def get_identifiers(self, db: Session, use_cache: bool = True) -> Dict[str, Any]:
|
||||
"""Download and get MTGJSON identifiers data"""
|
||||
# Check if we have a cached version
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "AllIdentifiers.json")
|
||||
if cached_file:
|
||||
# Ensure the file exists at the path
|
||||
if os.path.exists(cached_file.path):
|
||||
return self._stream_json_file(cached_file.path)
|
||||
|
||||
# Download and process the file
|
||||
file_record = await self._download_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/AllIdentifiers.json.zip",
|
||||
filename="AllIdentifiers.json.zip",
|
||||
subdir="identifiers"
|
||||
)
|
||||
|
||||
# Unzip and process the file
|
||||
json_path = await self._unzip_file(file_record, "identifiers", db)
|
||||
|
||||
# Return a generator that streams the JSON file
|
||||
return self._stream_json_file(json_path)
|
||||
if cached_file and os.path.exists(cached_file.path) and use_cache:
|
||||
with open(cached_file.path, 'r') as f:
|
||||
logger.debug(f"Loaded identifiers from cache: {cached_file.path}")
|
||||
return json.load(f)
|
||||
else:
|
||||
# Download and process the file
|
||||
logger.debug(f"Downloading identifiers from MTGJSON")
|
||||
file_record = await self._download_and_unzip_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/AllIdentifiers.json.zip",
|
||||
filename="AllIdentifiers.json.zip",
|
||||
subdir="identifiers"
|
||||
)
|
||||
|
||||
async def get_skus(self, db: Session) -> Generator[Dict[str, Any], None, None]:
|
||||
with open(file_record.path, 'r') as f:
|
||||
logger.debug(f"Loaded identifiers from MTGJSON: {file_record.path}")
|
||||
return json.load(f)
|
||||
|
||||
async def get_skus(self, db: Session, use_cache: bool = True) -> Dict[str, Any]:
|
||||
"""Download and get MTGJSON SKUs data"""
|
||||
# Check if we have a cached version
|
||||
cached_file = await self.file_service.get_file_by_filename(db, "TcgplayerSkus.json")
|
||||
if cached_file:
|
||||
# Ensure the file exists at the path
|
||||
if os.path.exists(cached_file.path):
|
||||
return self._stream_json_file(cached_file.path)
|
||||
|
||||
# Download and process the file
|
||||
file_record = await self._download_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/TcgplayerSkus.json.zip",
|
||||
filename="TcgplayerSkus.json.zip",
|
||||
subdir="skus"
|
||||
)
|
||||
|
||||
# Unzip and process the file
|
||||
json_path = await self._unzip_file(file_record, "skus", db)
|
||||
|
||||
# Return a generator that streams the JSON file
|
||||
return self._stream_json_file(json_path)
|
||||
if cached_file and os.path.exists(cached_file.path) and use_cache:
|
||||
with open(cached_file.path, 'r') as f:
|
||||
logger.debug(f"Loaded SKUs from cache: {cached_file.path}")
|
||||
return json.load(f)
|
||||
else:
|
||||
# Download and process the file
|
||||
logger.debug(f"Downloading SKUs from MTGJSON")
|
||||
file_record = await self._download_and_unzip_file(
|
||||
db=db,
|
||||
url="https://mtgjson.com/api/v5/TcgplayerSkus.json.zip",
|
||||
filename="TcgplayerSkus.json.zip",
|
||||
subdir="skus"
|
||||
)
|
||||
|
||||
with open(file_record.path, 'r') as f:
|
||||
logger.debug(f"Loaded SKUs from MTGJSON: {file_record.path}")
|
||||
return json.load(f)
|
||||
|
||||
async def clear_cache(self, db: Session) -> None:
|
||||
"""Clear all cached data"""
|
||||
|
Reference in New Issue
Block a user