commit

2025-09-09 12:43:38 -04:00
parent 698ec83c96
commit a73ec73921
14 changed files with 2646 additions and 0 deletions
--- a/f/CCR_ETL/ccr_db_config.variable.yaml
+++ b/f/CCR_ETL/ccr_db_config.variable.yaml
@@ -0,0 +1,189 @@
+description: ''
+value: |-
+  # Global table strategy (can be overridden per table)
+  table_strategy: drop_and_recreate
+  schema:
+    name: ccr_etl_raw
+    version: 1.0.0
+    description: CCR ETL Raw Data Schema
+    tables:
+      - name: mtgjson_skus
+        strategy: drop_and_recreate
+        columns:
+          - name: id
+            type: integer
+            description: internal database id
+            primary_key: true
+            autoincrement: true
+          - name: uuid
+            type: string
+            description: The UUID of the MTGJSON SKU
+          - name: condition
+            type: string
+            description: The condition of the MTGJSON SKU
+          - name: language
+            type: string
+            description: The language of the MTGJSON SKU
+          - name: printing
+            type: string
+            description: The printing of the MTGJSON SKU
+          - name: finish
+            type: string
+            description: The finish of the MTGJSON SKU
+          - name: productId
+            type: string
+            description: The tcgplayer product ID of the MTGJSON SKU
+          - name: skuId
+            type: string
+            description: The tcgplayer SKU ID of the MTGJSON SKU
+      - name: mtgjson_identifiers
+        strategy: drop_and_recreate
+        columns:
+          - name: id
+            type: integer
+            description: internal database id
+            primary_key: true
+            autoincrement: true
+          - name: uuid
+            type: string
+            description: The UUID of the MTGJSON Identifier
+          - name: name
+            type: string
+            description: The name of the MTGJSON Identifier
+          - name: setCode
+            type: string
+            description: The set code of the MTGJSON Identifier
+          - name: abuId
+            type: string
+            description: The Abu Games ID
+          - name: cardKingdomEtchedId
+            type: string
+            description: The Card Kingdom Etched ID
+          - name: cardKingdomFoilId
+            type: string
+            description: The Card Kingdom Foil ID
+          - name: cardKingdomId
+            type: string
+            description: The Card Kingdom ID
+          - name: cardsphereId
+            type: string
+            description: The Cardsphere ID
+          - name: cardsphereFoilId
+            type: string
+            description: The Cardsphere Foil ID
+          - name: cardtraderId
+            type: string
+            description: The Cardtrader ID
+          - name: csiId
+            type: string
+            description: The cool stuff inc ID
+          - name: mcmId
+            type: string
+            description: The cardmarket ID
+          - name: mcmMetaId
+            type: string
+            description: The cardmarket meta ID
+          - name: miniaturemarketId
+            type: string
+            description: The miniaturemarket ID
+          - name: mtgArenaId
+            type: string
+            description: The mtg arena ID
+          - name: mtgjsonFoilVersionId
+            type: string
+            description: The uuid generated by mtgjson for the foil version of a card
+          - name: mtgjsonNonFoilVersionId
+            type: string
+            description: The uuid generated by mtgjson for the non-foil version of a card
+          - name: mtgjsonV4Id
+            type: string
+            description: The uuid generated by mtgjson a card
+          - name: mtgoFoilId
+            type: string
+            description: The mtgo foil ID
+          - name: mtgoId
+            type: string
+            description: The mtgo ID
+          - name: multiverseId
+            type: string
+            description: The multiverse ID used by wotc for gatherer
+          - name: scgId
+            type: string
+            description: The starcitygames ID
+          - name: scryfallId
+            type: string
+            description: The scryfall ID
+          - name: scryfallCardBackId
+            type: string
+            description: The scryfall card back ID
+          - name: scryfallOracleId
+            type: string
+            description: The scryfall oracle ID
+          - name: scryfallIllustrationId
+            type: string
+            description: The scryfall illustration ID
+          - name: tcgplayerProductId
+            type: string
+            description: The tcgplayer product ID
+          - name: tcgplayerEtchedProductId
+            type: string
+            description: The tcgplayer etched product ID
+          - name: tntId
+            type: string
+            description: The troll and toad ID
+      - name: tcgcsv_categories
+        strategy: drop_and_recreate
+        columns:
+          - name: id
+            type: integer
+            description: internal database id
+            primary_key: true
+            autoincrement: true
+          - name: categoryId
+            type: integer
+          - name: name
+            type: string
+          - name: modifiedOn
+            type: string
+          - name: displayName
+            type: string
+          - name: seoCategoryName
+            type: string
+          - name: categoryDescription
+            type: string
+          - name: categoryPageTitle
+            type: string
+          - name: sealedLabel
+            type: string
+          - name: nonSealedLabel
+            type: string
+          - name: conditionGuideUrl
+            type: string
+          - name: isScannable
+            type: boolean
+          - name: popularity
+            type: integer
+          - name: isDirect
+            type: boolean
+      - name: tcgcsv_groups
+        strategy: drop_and_recreate
+        columns:
+          - name: id
+            type: integer
+            primary_key: true
+            autoincrement: true
+          - name: groupId
+            type: integer
+          - name: name
+            type: string
+          - name: abbreviation
+            type: string
+          - name: isSupplemental
+            type: boolean
+          - name: publishedOn
+            type: string
+          - name: modifiedOn
+            type: string
+          - name: categoryId
+            type: integer
+is_secret: false
--- a/f/CCR_ETL/ccr_etl_db_init.py
+++ b/f/CCR_ETL/ccr_etl_db_init.py
@@ -0,0 +1,89 @@
+import os
+import wmill
+import yaml
+from sqlalchemy import create_engine, text, MetaData, Table, Column, Integer, String, inspect
+from sqlalchemy.engine import Engine
+import psycopg2
+
+# You can import any PyPi package. 
+# See here for more info: https://www.windmill.dev/docs/advanced/dependencies_in_python
+
+# you can use typed resources by doing a type alias to dict
+#postgresql = dict
+
+DB_RESOURCE_PATH = 'u/joshuakrzemien/slick_postgresql'
+DB_CONFIG_PATH = 'f/CCR_ETL/ccr_db_config'
+
+def create_db_engine(db: dict):
+    db_url = f"postgresql+psycopg2://postgres:{db['password']}@{db['host']}:{db['port']}/{db['dbname']}"
+    engine = create_engine(db_url)
+    engine.connect()
+    return engine
+
+def table_exists(engine: Engine, table_name: str) -> bool:
+    """Check if a table exists in the database."""
+    inspector = inspect(engine)
+    return table_name in inspector.get_table_names()
+
+def create_table(engine: Engine, table: dict, strategy: str = "create_if_not_exists"):
+    try:
+        table_name = table['name']
+        columns = table['columns']
+        
+        # Handle different table strategies
+        if strategy == "drop_and_recreate":
+            if table_exists(engine, table_name):
+                print(f"Dropping existing table: {table_name}")
+                with engine.connect() as conn:
+                    conn.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE"))
+                    conn.commit()
+        
+        elif strategy == "create_if_not_exists":
+            if table_exists(engine, table_name):
+                print(f"Table {table_name} already exists, skipping creation")
+                return
+        
+        else:
+            raise ValueError(f"Unknown table strategy: {strategy}")
+        
+        # Map config types to SQLAlchemy types
+        type_mapping = {
+            'integer': Integer,
+            'string': String
+        }
+        
+        # Build SQLAlchemy columns
+        sqlalchemy_columns = []
+        for column in columns:
+            col_type = type_mapping.get(column['type'], String)
+            sqlalchemy_columns.append(Column(column['name'], col_type, primary_key=column.get('primary_key', False), nullable=column.get('nullable', True), index=column.get('index', False), autoincrement=column.get('autoincrement', False)))
+        
+        # Create table using SQLAlchemy Core
+        metadata = MetaData()
+        new_table = Table(table_name, metadata, *sqlalchemy_columns)
+        
+        # Create the table
+        metadata.create_all(engine)
+        
+        print(f"Successfully created table: {table_name}")
+        
+    except Exception as e:
+        print(f"Error creating table {table_name}: {str(e)}")
+        raise
+
+def main():
+    db = wmill.client.get_resource(DB_RESOURCE_PATH)
+    config_yaml = wmill.get_variable(DB_CONFIG_PATH)
+    config = yaml.safe_load(config_yaml)
+    engine = create_db_engine(db)
+    
+    # Get table strategy from config (default to drop_and_recreate)
+    table_strategy = config.get('table_strategy', 'drop_and_recreate')
+    print(f"Using table strategy: {table_strategy}")
+    
+    for table in config['schema']['tables']:
+        # Allow per-table strategy override
+        table_specific_strategy = table.get('strategy', table_strategy)
+        create_table(engine, table, table_specific_strategy)
+    
+    return {"status": "success"}
--- a/f/CCR_ETL/ccr_etl_db_init.script.lock
+++ b/f/CCR_ETL/ccr_etl_db_init.script.lock
@@ -0,0 +1,14 @@
+# py: 3.11
+anyio==4.10.0
+certifi==2025.8.3
+greenlet==3.2.4
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+idna==3.10
+psycopg2-binary==2.9.10
+pyyaml==6.0.2
+sniffio==1.3.1
+sqlalchemy==2.0.43
+typing-extensions==4.15.0
+wmill==1.538.0
--- a/f/CCR_ETL/ccr_etl_db_init.script.yaml
+++ b/f/CCR_ETL/ccr_etl_db_init.script.yaml
@@ -0,0 +1,9 @@
+summary: ''
+description: ''
+lock: '!inline f/CCR_ETL/ccr_etl_db_init.script.lock'
+kind: script
+schema:
+  $schema: 'https://json-schema.org/draft/2020-12/schema'
+  type: object
+  properties: {}
+  required: []
--- a/f/CCR_ETL/ccr_etl_mtgjson.py
+++ b/f/CCR_ETL/ccr_etl_mtgjson.py
@@ -0,0 +1,377 @@
+"""
+CCR ETL MTGJSON Processing Script
+
+This script handles the extraction, transformation, and loading of MTGJSON data
+into a PostgreSQL database. It supports downloading, unzipping, preprocessing,
+and batch inserting of various data formats.
+"""
+
+import json
+import os
+import yaml
+from typing import Union
+from zipfile import ZipFile
+
+import psycopg2
+import requests
+import wmill
+from sqlalchemy import create_engine, text
+from sqlalchemy.engine import Engine
+
+# Configuration paths
+DB_RESOURCE_PATH = 'u/joshuakrzemien/slick_postgresql'
+DB_CONFIG_PATH = 'f/CCR_ETL/ccr_db_config'
+EXTRACT_CONFIG_PATH = 'f/CCR_ETL/ccr_extract_config'
+DOWNLOAD_CONFIG_PATH = './shared/'
+
+# Default processing parameters
+DEFAULT_BATCH_SIZE = 1000
+
+def validate_response_headers(response: requests.Response, outer_file_type: str) -> None:
+    """Validate that the response content type matches the expected file type."""
+    if response.headers['Content-Type'] != f'application/{outer_file_type}':
+        raise ValueError(f"Expected {outer_file_type} response, got {response.headers['Content-Type']}")
+
+
+def download_request(url: str, filename: str, outer_file_type: str) -> bytes:
+    """Download a file from the given URL and validate its content type."""
+    print(f"🔽 Downloading {filename} from {url}")
+    
+    response = requests.get(url)
+    response.raise_for_status()
+    validate_response_headers(response, outer_file_type)
+    
+    print(f"✅ Download successful ({response.headers.get('Content-Length', 'Unknown')} bytes)")
+    return response.content
+
+def generate_download_queue(url: str, filename: str, outer_file_type: str, iterables: dict) -> list:
+    """
+    Generate a queue of download items based on URL templates and iterable values.
+    
+    Example:
+        url = "https://tcgcsv.com/tcgplayer/{game_id}/groups"
+        iterables = {'game_id': [1,3,65,71,86]}
+    """
+    queue = []
+    for key, value in iterables.items():
+        for item in value:
+            queue_item = {
+                'url': url.format(key=key, value=item),
+                'filename': filename.format(key=key, value=item),
+                'outer_file_type': outer_file_type,
+            }
+            queue.append(queue_item)
+    return queue
+
+
+def save_file(content: bytes, filename: str) -> None:
+    """Save binary content to a file in the download directory."""
+    filepath = DOWNLOAD_CONFIG_PATH + filename
+    with open(filepath, 'wb') as f:
+        f.write(content)
+    print(f"💾 Saved {len(content)} bytes to {filename}")
+
+
+def unzip_file(filename: str) -> str:
+    """Extract a zip file and return the name of the extracted content."""
+    new_filename = filename.replace('.zip', '')
+    zip_path = DOWNLOAD_CONFIG_PATH + filename
+    
+    with ZipFile(zip_path, 'r') as zip_ref:
+        file_list = zip_ref.namelist()
+        print(f"📦 Extracting {len(file_list)} files from {filename}")
+        zip_ref.extractall(DOWNLOAD_CONFIG_PATH)
+    
+    return new_filename
+
+def load_file(filename: str, file_type: str) -> Union[dict, list]:
+    """Load and parse a file from the download directory."""
+    filepath = DOWNLOAD_CONFIG_PATH + filename
+    
+    if file_type == 'json':
+        with open(filepath, 'r') as f:
+            data = json.load(f)
+            print(f"📖 Loaded {file_type} file: {filename}")
+            return data
+    else:
+        raise ValueError(f"Unsupported file type: {file_type}")
+
+def build_record_from_config(source_data: dict, expected_columns: list, additional_data: dict = None) -> dict:
+    """
+    Build a record using the structure defined in the extract config.
+    
+    Args:
+        source_data: The source data dictionary
+        expected_columns: List of column definitions from config
+        additional_data: Optional additional data to merge (e.g., parent UUID)
+    
+    Returns:
+        Dictionary representing a single database record
+    """
+    if additional_data is None:
+        additional_data = {}
+    
+    # Merge source data with additional data (like uuid from parent structure)
+    combined_data = {**source_data, **additional_data}
+    
+    record = {}
+    for column in expected_columns:
+        col_name = column['name']
+        # Skip auto-increment columns (like 'id')
+        if column.get('auto_increment', False):
+            continue
+        # Get value from combined data, use empty string as default
+        record[col_name] = combined_data.get(col_name, '')
+    
+    return record
+
+def create_db_engine(db: dict) -> Engine:
+    """Create and test a database engine connection."""
+    db_url = f"postgresql+psycopg2://postgres:{db['password']}@{db['host']}:{db['port']}/{db['dbname']}"
+    engine = create_engine(db_url)
+    
+    # Test connection
+    conn = engine.connect()
+    conn.close()
+    print(f"🔌 Connected to database: {db['host']}:{db['port']}/{db['dbname']}")
+    
+    return engine
+
+
+def get_db_engine() -> Engine:
+    """Get a database engine using the configured resource."""
+    db = wmill.client.get_resource(DB_RESOURCE_PATH)
+    return create_db_engine(db)
+
+def generic_preprocess(
+    data: Union[dict, list],
+    expected_columns: list,
+    config: dict
+) -> list:
+    """
+    Generic data preprocessing function that handles various data structures.
+    
+    Args:
+        data: Source data (dict or list)
+        expected_columns: List of column definitions
+        config: Preprocessing configuration
+    
+    Returns:
+        List of processed records
+    """
+    # Step 1: Follow data path
+    data_path = config.get("data_path", [])
+    for key in data_path:
+        if not isinstance(data, dict):
+            raise ValueError(f"Expected dict while navigating path, got {type(data)} at key '{key}'")
+        data = data.get(key)
+        if data is None:
+            raise ValueError(f"Missing key '{key}' in data path: {data_path}")
+
+    # Step 2: Handle nested structure
+    nested = config.get("nested", False)
+    nested_key = config.get("nested_key", None)
+    id_key = config.get("id_key", None)
+    flatten = config.get("flatten", False)
+    
+    records = []
+    
+    if isinstance(data, dict):
+        items = data.items()
+    elif isinstance(data, list):
+        items = enumerate(data)
+    else:
+        raise ValueError(f"Unsupported data structure: {type(data)}")
+    
+    for outer_key, outer_value in items:
+        if nested:
+            if not isinstance(outer_value, list):
+                continue
+            for inner_value in outer_value:
+                if id_key and not inner_value.get(id_key):
+                    continue
+                additional_data = {nested_key: outer_key} if nested_key else {}
+                record = build_record_from_config(inner_value, expected_columns, additional_data)
+                records.append(record)
+        else:
+            if not isinstance(outer_value, dict):
+                continue
+            if id_key and not outer_value.get(id_key):
+                continue
+            if flatten:
+                nested_data = outer_value.get("identifiers", {})
+                combined = {**nested_data, "uuid": outer_value.get("uuid")}
+                record = build_record_from_config(combined, expected_columns)
+            else:
+                record = build_record_from_config(outer_value, expected_columns)
+            records.append(record)
+    
+    print(f"🔄 Processed {len(records)} records")
+    return records
+
+def control_batch(data: list, batch_size: int = DEFAULT_BATCH_SIZE):
+    """Split data into batches for processing."""
+    for i in range(0, len(data), batch_size):
+        yield data[i:i+batch_size]
+
+
+def insert_data_into_table_batch(records: list, table: str, engine: Engine, batch_size: int = DEFAULT_BATCH_SIZE) -> None:
+    """Insert records into database table in batches."""
+    if not records:
+        print("⚠️ No records to insert, skipping database operation")
+        return
+    
+    print(f"💾 Inserting {len(records)} records into {table} (batch size: {batch_size})")
+    
+    # Get column names from first record
+    columns = list(records[0].keys())
+    column_names = ', '.join(f'"{col}"' for col in columns)
+    placeholders = ', '.join([f':{col}' for col in columns])
+    
+    insert_sql = f"INSERT INTO {table} ({column_names}) VALUES ({placeholders})"
+    
+    with engine.connect() as conn:
+        batch_count = 0
+        total_inserted = 0
+        
+        for batch in control_batch(records, batch_size):
+            batch_count += 1
+            batch_size_actual = len(batch)
+            
+            conn.execute(text(insert_sql), batch)
+            total_inserted += batch_size_actual
+            
+            if batch_count % 10 == 0:
+                print(f"⏳ Inserted {total_inserted}/{len(records)} records...")
+        
+        conn.commit()
+        print(f"✅ Inserted {total_inserted} records in {batch_count} batches")
+
+
+def process_job(job: dict) -> dict:
+    """
+    Process a single ETL job.
+    
+    Args:
+        job: Job configuration dictionary
+    
+    Returns:
+        Dictionary with job processing results
+    """
+    # Extract job parameters
+    url = job.get('url')
+    filename = job.get('filename')
+    outer_file_type = job.get('outer_file_type')
+    inner_file_type = job.get('inner_file_type')
+    table = job.get('table')
+    expected_columns = job.get('expected_columns')
+    batch_size = job.get('batch_size', DEFAULT_BATCH_SIZE)
+    preprocess_function_name = job.get('preprocess_function', 'generic_preprocess')
+    preprocess_config = job.get('preprocess_config')
+    active = job.get('active')
+    iterables = job.get('iterables')
+    
+    print(f"\n🚀 Processing job for table '{table}'")
+    
+    if not active:
+        print(f"⚠️ Job is not active, skipping")
+        return {"status": "skipped"}
+    
+    # Get preprocessing function
+    if isinstance(preprocess_function_name, str):
+        preprocess_function = globals().get(preprocess_function_name)
+    if not callable(preprocess_function):
+        raise ValueError(f"Preprocessing function '{preprocess_function_name}' not found or not callable.")
+    
+    # Get database engine
+    engine = get_db_engine()
+    
+    # Populate download queue
+    if iterables:
+        queue = generate_download_queue(url, filename, outer_file_type, iterables)
+    else:
+        queue = [{
+            'url': url,
+            'filename': filename,
+            'outer_file_type': outer_file_type,
+            'inner_file_type': inner_file_type,
+            'table': table,
+            'expected_columns': expected_columns
+        }]
+    
+    # Process download queue
+    for queue_item in queue:
+        content = download_request(queue_item.get('url'), queue_item.get('filename'), queue_item.get('outer_file_type'))
+        save_file(content, queue_item.get('filename'))
+    
+    # Handle file extraction if needed
+    saved_filename = filename
+    if outer_file_type == 'zip':
+        saved_filename = unzip_file(filename)
+    
+    # Load and preprocess data
+    data = load_file(saved_filename, inner_file_type)
+    records = preprocess_function(data, expected_columns, preprocess_config)
+    
+    # Insert data into database
+    insert_data_into_table_batch(records, table, engine, batch_size)
+    
+    result = {
+        "status": "success",
+        "table": table,
+        "records_processed": len(records),
+        "filename": saved_filename
+    }
+    
+    print(f"✅ Job complete: {len(records)} records processed for {table}")
+    return result
+
+
+
+def main() -> dict:
+    """
+    Main ETL processing function.
+    
+    Returns:
+        Dictionary with overall processing results
+    """
+    print("🎯 ETL Process Starting")
+    print("=" * 50)
+    
+    # Load configuration
+    config_yaml = wmill.get_variable(EXTRACT_CONFIG_PATH)
+    config = yaml.safe_load(config_yaml)
+    print(f"📋 Processing {len(config['jobs'])} jobs")
+    
+    results = []
+    successful_jobs = 0
+    failed_jobs = 0
+    
+    for i, job in enumerate(config['jobs'], 1):
+        print(f"\n--- Job {i}/{len(config['jobs'])} ---")
+        
+        try:
+            result = process_job(job)
+            results.append(result)
+            successful_jobs += 1
+        except Exception as e:
+            error_result = {
+                "status": "error",
+                "table": job.get('table', 'unknown'),
+                "error": str(e),
+                "filename": job.get('filename', 'unknown')
+            }
+            results.append(error_result)
+            failed_jobs += 1
+            print(f"❌ Job {i} failed: {str(e)}")
+    
+    print(f"\n🏁 ETL Process Complete")
+    print(f"✅ Successful: {successful_jobs} | ❌ Failed: {failed_jobs} | 📋 Total: {len(results)}")
+    
+    return {
+        "status": "completed",
+        "jobs_processed": len(results),
+        "successful_jobs": successful_jobs,
+        "failed_jobs": failed_jobs,
+        "results": results
+    }
--- a/f/CCR_ETL/ccr_etl_mtgjson.script.lock
+++ b/f/CCR_ETL/ccr_etl_mtgjson.script.lock
@@ -0,0 +1,17 @@
+# py: 3.11
+anyio==4.10.0
+certifi==2025.8.3
+charset-normalizer==3.4.3
+greenlet==3.2.4
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+idna==3.10
+psycopg2-binary==2.9.10
+pyyaml==6.0.2
+requests==2.32.5
+sniffio==1.3.1
+sqlalchemy==2.0.43
+typing-extensions==4.15.0
+urllib3==2.5.0
+wmill==1.539.1
--- a/f/CCR_ETL/ccr_etl_mtgjson.script.yaml
+++ b/f/CCR_ETL/ccr_etl_mtgjson.script.yaml
@@ -0,0 +1,9 @@
+summary: CCR ETL MTGJSON
+description: ''
+lock: '!inline f/CCR_ETL/ccr_etl_mtgjson.script.lock'
+kind: script
+schema:
+  $schema: 'https://json-schema.org/draft/2020-12/schema'
+  type: object
+  properties: {}
+  required: []
--- a/f/CCR_ETL/ccr_extract_config.variable.yaml
+++ b/f/CCR_ETL/ccr_extract_config.variable.yaml
@@ -0,0 +1,162 @@
+description: ''
+value: |-
+  jobs:
+    - name: mtgjson_skus
+      active: true
+      url: https://mtgjson.com/api/v5/TcgplayerSkus.json.zip
+      filename: TcgplayerSkus.json.zip
+      outer_file_type: zip
+      inner_file_type: json
+      preprocess_config:
+        data_path: ["data"]
+        nested: true
+        nested_key: "uuid"
+        id_key: "skuId"
+      table: mtgjson_skus
+      batch_size: 1000
+      expected_columns:
+        - name: uuid
+          type: string
+        - name: condition
+          type: string
+        - name: language
+          type: string
+        - name: printing
+          type: string
+        - name: finish
+          type: string
+        - name: productId
+          type: string
+        - name: skuId
+          type: string
+      cache:
+        status: true
+        ttl: 86400
+    - name: mtgjson_identifiers
+      active: true
+      url: https://mtgjson.com/api/v5/AllIdentifiers.json.zip
+      filename: AllIdentifiers.json.zip
+      outer_file_type: zip
+      inner_file_type: json
+      preprocess_config:
+        data_path: ["data"]
+        nested: false
+        flatten: true
+        id_key: "uuid"
+      table: mtgjson_identifiers
+      batch_size: 1000
+      expected_columns:
+        - name: uuid
+          type: string
+        - name: name
+          type: string
+        - name: setCode
+          type: string
+        - name: abuId
+          type: string
+        - name: cardKingdomEtchedId
+          type: string
+        - name: cardKingdomFoilId
+          type: string
+        - name: cardKingdomId
+          type: string
+        - name: cardsphereId
+          type: string
+        - name: cardsphereFoilId
+          type: string
+        - name: cardtraderId
+          type: string
+        - name: csiId
+          type: string
+        - name: mcmId
+          type: string
+        - name: mcmMetaId
+          type: string
+        - name: miniaturemarketId
+          type: string
+        - name: mtgArenaId
+          type: string
+        - name: mtgjsonFoilVersionId
+          type: string
+        - name: mtgjsonNonFoilVersionId
+          type: string
+        - name: mtgjsonV4Id
+          type: string
+        - name: mtgoFoilId
+          type: string
+        - name: mtgoId
+          type: string
+        - name: multiverseId
+          type: string
+        - name: scgId
+          type: string
+        - name: scryfallId
+          type: string
+        - name: scryfallCardBackId
+          type: string
+        - name: scryfallOracleId
+          type: string
+        - name: scryfallIllustrationId
+          type: string
+        - name: tcgplayerProductId
+          type: string
+        - name: tcgplayerEtchedProductId
+          type: string
+        - name: tntId
+          type: string
+      cache:
+        status: true
+        ttl: 86400
+    - name: tcgcsv_categories
+      active: true
+      url: https://tcgcsv.com/tcgplayer/categories
+      outer_file_type: json
+      preprocess_config:
+        data_path: ["results"]
+        nested: false
+      filename: tcgplayer_categories.json
+      expected_columns:
+        - name: categoryId
+          type: integer
+        - name: name
+          type: string
+        - name: modifiedOn
+          type: string
+        - name: displayName
+          type: string
+        - name: seoCategoryName
+          type: string
+        - name: categoryDescription
+          type: string
+        - name: categoryPageTitle
+          type: string
+        - name: sealedLabel
+          type: string
+        - name: nonSealedLabel
+          type: string
+        - name: conditionGuideUrl
+          type: string
+        - name: isScannable
+          type: boolean
+        - name: popularity
+          type: integer
+        - name: isDirect
+          type: boolean
+    - name: tcgcsv_groups
+      active: true
+      url: https://tcgcsv.com/tcgplayer/{game_id}/groups
+      outer_file_type: json
+      preprocess_config:
+        data_path: ["results"]
+        nested: false
+      filename: tcgplayer_{game_id}_groups.json
+      expected_columns:
+        - name: groupId
+          type: integer
+        - name: name
+          type: string
+        - name: modifiedOn
+          type: string
+      iterables:
+        game_id: [1,3,65,71,86]
+is_secret: false
--- a/f/CCR_ETL/folder.meta.yaml
+++ b/f/CCR_ETL/folder.meta.yaml
@@ -0,0 +1,6 @@
+summary: null
+display_name: CCR_ETL
+extra_perms:
+  u/joshuakrzemien: true
+owners:
+  - u/joshuakrzemien