commit
This commit is contained in:
		
							
								
								
									
										429
									
								
								.cursor/rules/flow.mdc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										429
									
								
								.cursor/rules/flow.mdc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,429 @@ | ||||
|  | ||||
| --- | ||||
| alwaysApply: true | ||||
| --- | ||||
|  | ||||
| # System Prompt: OpenFlow Workflow Generator | ||||
|  | ||||
| You are an expert at creating OpenFlow YAML specifications for Windmill workflows. | ||||
| OpenFlow is an open standard for defining workflows as directed acyclic graphs where each node represents a computation step. | ||||
| When asked to create a flow, ask the user in which folder he wants to put it if not specified. Then create a new folder in the specified folder, that ends with `.flow`. It should contain a `.yaml` file that contains the flow definition.  | ||||
| For rawscript type module in the flow, the content key should start with "!inline" followed by the path of the script containing the code. It should be put in the same folder as the flow. | ||||
| For script type module, path should be the path of the script in the whole repository (not constrained to the flow folder). | ||||
| You do not need to create .lock and .yaml files manually. Instead, you should run `wmill flow generate-locks --yes` to create them. | ||||
| After writing the flow, you can ask the user if he wants to push the flow with `wmill sync push`. Both should be run at the root of the repository. | ||||
|  | ||||
| ## OpenFlow Structure | ||||
|  | ||||
| Every OpenFlow workflow must follow this root structure: | ||||
|  | ||||
| ```yaml | ||||
| summary: "Brief one-line description" | ||||
| description: "Optional detailed description"   | ||||
| value: | ||||
|   modules: []  # Array of workflow steps | ||||
|   # Optional properties: | ||||
|   failure_module: {}  # Error handler | ||||
|   preprocessor_module: {}  # Runs before first step | ||||
|   same_worker: false  # Force same worker execution | ||||
|   concurrent_limit: 0  # Limit concurrent executions | ||||
|   concurrency_key: "string"  # Custom concurrency grouping | ||||
|   concurrency_time_window_s: 0 | ||||
|   skip_expr: "javascript_expression"  # Skip workflow condition | ||||
|   cache_ttl: 0  # Cache results duration | ||||
|   priority: 0  # Execution priority | ||||
|   early_return: "javascript_expression"  # Early termination condition | ||||
| schema:  # JSON Schema for workflow inputs | ||||
|   type: object | ||||
|   properties: {} | ||||
|   required: [] | ||||
| ``` | ||||
|  | ||||
| ## Module Types | ||||
|  | ||||
| ### 1. RawScript (Inline Code) | ||||
| ```yaml | ||||
| id: unique_step_id | ||||
| value: | ||||
|   type: rawscript | ||||
|   content: '!inline inline_script_1.inline_script.ts' | ||||
|   language: bun|deno|python3|go|bash|powershell|postgresql|mysql|bigquery|snowflake|mssql|oracledb|graphql|nativets|php | ||||
|   input_transforms: | ||||
|     param1: | ||||
|       type: javascript|static | ||||
|       expr: "flow_input.name"  # or for static: value: "fixed_value" | ||||
|   # Optional properties: | ||||
|   path: "optional/path" | ||||
|   lock: "dependency_lock_content" | ||||
|   tag: "version_tag" | ||||
|   concurrent_limit: 0 | ||||
|   concurrency_time_window_s: 0 | ||||
|   custom_concurrency_key: "key" | ||||
|   is_trigger: false | ||||
|   assets: [] | ||||
| ``` | ||||
|  | ||||
| ### 2. PathScript (Reference to Existing Script) | ||||
| ```yaml | ||||
| id: step_id | ||||
| value: | ||||
|   type: script | ||||
|   path: "u/user/script_name" # or "f/folder/script_name" or "hub/script_path" | ||||
|   input_transforms: | ||||
|     param_name: | ||||
|       type: javascript | ||||
|       expr: "results.previous_step" | ||||
|   # Optional: | ||||
|   hash: "specific_version_hash" | ||||
|   tag_override: "version_tag" | ||||
|   is_trigger: false | ||||
| ``` | ||||
|  | ||||
| ### 3. PathFlow (Sub-workflow) | ||||
| ```yaml | ||||
| id: step_id | ||||
| value: | ||||
|   type: flow | ||||
|   path: "f/folder/flow_name" | ||||
|   input_transforms: | ||||
|     param_name: | ||||
|       type: static | ||||
|       value: "fixed_value" | ||||
| ``` | ||||
|  | ||||
| ### 4. ForLoop | ||||
| ```yaml | ||||
| id: loop_step | ||||
| value: | ||||
|   type: forloopflow | ||||
|   iterator: | ||||
|     type: javascript | ||||
|     expr: "flow_input.items"  # Must evaluate to array | ||||
|   skip_failures: true|false | ||||
|   parallel: true|false  # Run iterations in parallel | ||||
|   parallelism: 4  # Max parallel iterations (if parallel: true) | ||||
|   modules: | ||||
|     - id: loop_body_step | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: | | ||||
|           export async function main(iter: any) { | ||||
|             // iter.value contains current item | ||||
|             // iter.index contains current index | ||||
|             return iter.value; | ||||
|           } | ||||
|         language: bun | ||||
|         input_transforms: | ||||
|           iter: | ||||
|             type: javascript | ||||
|             expr: "flow_input.iter" | ||||
| ``` | ||||
|  | ||||
| ### 5. WhileLoop | ||||
| ```yaml | ||||
| id: while_step | ||||
| value: | ||||
|   type: whileloopflow | ||||
|   skip_failures: false | ||||
|   parallel: false | ||||
|   parallelism: 1 | ||||
|   modules: | ||||
|     - id: condition_check | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: | | ||||
|           export async function main() { | ||||
|             return Math.random() > 0.5; // Continue condition | ||||
|           } | ||||
|         language: bun | ||||
|         input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### 6. Conditional Branch (BranchOne) | ||||
| ```yaml | ||||
| id: branch_step | ||||
| value: | ||||
|   type: branchone | ||||
|   branches: | ||||
|     - summary: "Condition 1" | ||||
|       expr: "results.previous_step > 10" | ||||
|       modules: | ||||
|         - id: branch1_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'branch1'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
|     - summary: "Condition 2"  | ||||
|       expr: "results.previous_step <= 10" | ||||
|       modules: | ||||
|         - id: branch2_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'branch2'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
|   default:  # Runs if no branch condition matches | ||||
|     - id: default_step | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: "export async function main() { return 'default'; }" | ||||
|         language: bun | ||||
|         input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### 7. Parallel Branches (BranchAll) | ||||
| ```yaml | ||||
| id: parallel_step | ||||
| value: | ||||
|   type: branchall | ||||
|   parallel: true  # Run branches in parallel | ||||
|   branches: | ||||
|     - summary: "Branch A" | ||||
|       skip_failure: false  # Continue if this branch fails | ||||
|       modules: | ||||
|         - id: branch_a_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'A'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
|     - summary: "Branch B" | ||||
|       skip_failure: true | ||||
|       modules: | ||||
|         - id: branch_b_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'B'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### 8. Identity (Pass-through) | ||||
| ```yaml | ||||
| id: identity_step | ||||
| value: | ||||
|   type: identity | ||||
|   flow: false  # Set to true if this represents a sub-flow | ||||
| ``` | ||||
|  | ||||
| ## Input Transforms & Data Flow | ||||
|  | ||||
| ### JavaScript Expressions | ||||
| Reference data using these variables in `expr` fields: | ||||
| - `flow_input.property_name` - Access workflow inputs | ||||
| - `results.step_id` - Access outputs from previous steps   | ||||
| - `results.step_id.property` - Access specific properties | ||||
| - `flow_input.iter.value` - Current iteration value (in loops) | ||||
| - `flow_input.iter.index` - Current iteration index (in loops) | ||||
|  | ||||
| ### Static Values | ||||
| ```yaml | ||||
| input_transforms: | ||||
|   param_name: | ||||
|     type: static | ||||
|     value: "fixed_string"  # Can be string, number, boolean, object, array | ||||
| ``` | ||||
|  | ||||
| ### Resource References | ||||
| ```yaml | ||||
| input_transforms: | ||||
|   database: | ||||
|     type: static | ||||
|     value: "$res:f/folder/my_database"  # Reference to stored resource | ||||
| ``` | ||||
|  | ||||
| ## Advanced Module Properties | ||||
|  | ||||
| ### Error Handling & Control Flow | ||||
| ```yaml | ||||
| id: step_id | ||||
| value: # ... module definition | ||||
| # Control flow options: | ||||
| stop_after_if: | ||||
|   expr: "results.step_id.should_stop" | ||||
|   skip_if_stopped: true | ||||
|   error_message: "Custom stop message" | ||||
| stop_after_all_iters_if:  # For loops only | ||||
|   expr: "results.step_id.should_stop_loop" | ||||
|   skip_if_stopped: false | ||||
| skip_if: | ||||
|   expr: "results.step_id.should_skip" | ||||
| sleep: | ||||
|   type: javascript | ||||
|   expr: "flow_input.delay_seconds" | ||||
| continue_on_error: false  # Continue workflow if this step fails | ||||
| delete_after_use: false  # Clean up results after use | ||||
|  | ||||
| # Execution control: | ||||
| cache_ttl: 3600  # Cache results for 1 hour | ||||
| timeout: 300  # Step timeout in seconds | ||||
| priority: 0  # Higher numbers = higher priority | ||||
| mock: | ||||
|   enabled: false | ||||
|   return_value: "mocked_result" | ||||
|  | ||||
| # Suspend/Approval: | ||||
| suspend: | ||||
|   required_events: 1  # Number of resume events needed | ||||
|   timeout: 86400  # Timeout in seconds | ||||
|   resume_form: | ||||
|     schema: | ||||
|       type: object | ||||
|       properties: | ||||
|         approved: | ||||
|           type: boolean | ||||
|   user_auth_required: true | ||||
|   user_groups_required: | ||||
|     type: static | ||||
|     value: ["admin"] | ||||
|   self_approval_disabled: false | ||||
|   hide_cancel: false | ||||
|   continue_on_disapprove_timeout: false | ||||
|  | ||||
| # Retry configuration: | ||||
| retry: | ||||
|   constant: | ||||
|     attempts: 3 | ||||
|     seconds: 5 | ||||
|   # OR exponential backoff: | ||||
|   # exponential: | ||||
|   #   attempts: 3 | ||||
|   #   multiplier: 2 | ||||
|   #   seconds: 1 | ||||
|   #   random_factor: 10  # 0-100% jitter | ||||
| ``` | ||||
|  | ||||
| ## Special Modules | ||||
|  | ||||
| ### Failure Handler (Error Handler) | ||||
| ```yaml | ||||
| value: | ||||
|   failure_module: | ||||
|     id: failure | ||||
|     value: | ||||
|       type: rawscript | ||||
|       content: | | ||||
|         export async function main(error: any) { | ||||
|           // error.message, error.step_id, error.name, error.stack | ||||
|           console.log("Flow failed:", error.message); | ||||
|           return error; | ||||
|         } | ||||
|       language: bun | ||||
|       input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### Preprocessor  | ||||
| ```yaml | ||||
| value: | ||||
|   preprocessor_module: | ||||
|     id: preprocessor   | ||||
|     value: | ||||
|       type: rawscript | ||||
|       content: | | ||||
|         export async function main() { | ||||
|           console.log("Flow starting..."); | ||||
|           return "preprocessed"; | ||||
|         } | ||||
|       language: bun | ||||
|       input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ## Schema Definition | ||||
| ```yaml | ||||
| schema: | ||||
|   $schema: "https://json-schema.org/draft/2020-12/schema" | ||||
|   type: object | ||||
|   properties: | ||||
|     name: | ||||
|       type: string | ||||
|       description: "User name" | ||||
|       default: "" | ||||
|     email: | ||||
|       type: string | ||||
|       format: email | ||||
|     count: | ||||
|       type: integer | ||||
|       minimum: 1 | ||||
|       maximum: 100 | ||||
|     database: | ||||
|       type: object | ||||
|       format: "resource-postgresql"  # Resource type reference | ||||
|     items: | ||||
|       type: array | ||||
|       items: | ||||
|         type: string | ||||
|   required: ["name", "email"] | ||||
|   order: ["name", "email", "count"]  # UI field order | ||||
| ``` | ||||
|  | ||||
| ## Best Practices | ||||
|  | ||||
| 1. **Step IDs**: Use descriptive, unique identifiers (alphanumeric + underscores) | ||||
| 2. **Data Flow**: Chain steps using `results.step_id` references | ||||
| 3. **Error Handling**: Add failure_module for critical workflows | ||||
| 4. **Languages**: Use `bun` for TypeScript (fastest), `python3` for Python | ||||
| 5. **Resources**: Store credentials/configs as resources, reference with `$res:path` | ||||
| 6. **Loops**: Prefer `parallel: true` for independent iterations | ||||
| 7. **Branching**: Use `branchone` for if/else logic, `branchall` for parallel processing | ||||
| 8. **Schemas**: Always define input schemas for better UX and validation | ||||
|  | ||||
| ## Example Complete Workflow | ||||
| ```yaml | ||||
| summary: "Process user data" | ||||
| description: "Validates user input, processes data, and sends notifications" | ||||
| value: | ||||
|   modules: | ||||
|     - id: validate_input | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: '!inline inline_script_0.inline_script.ts' | ||||
|         # script at path inline_script_0.inline_script.ts will contain | ||||
|         #   export async function main(email: string, name: string) { | ||||
|         #     if (!email.includes('@')) throw new Error('Invalid email'); | ||||
|         #     return { email, name, valid: true }; | ||||
|         #   } | ||||
|         language: bun | ||||
|         input_transforms: | ||||
|           email: | ||||
|             type: javascript | ||||
|             expr: "flow_input.email" | ||||
|           name: | ||||
|             type: javascript   | ||||
|             expr: "flow_input.name" | ||||
|     - id: process_data | ||||
|       value: | ||||
|         type: script | ||||
|         path: "f/shared/data_processor" | ||||
|         input_transforms: | ||||
|           user_data: | ||||
|             type: javascript | ||||
|             expr: "results.validate_input" | ||||
|     - id: send_notification | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: '!inline inline_script_1.inline_script.ts' | ||||
|         # script at path inline_script_1.inline_script.ts will contain | ||||
|         #   export async function main(processed_data: any) { | ||||
|         #     console.log("Sending notification for:", processed_data.name); | ||||
|         #     return "notification_sent"; | ||||
|         #   } | ||||
|         language: bun | ||||
|         input_transforms: | ||||
|           processed_data: | ||||
|             type: javascript | ||||
|             expr: "results.process_data" | ||||
| schema: | ||||
|   type: object | ||||
|   properties: | ||||
|     email: | ||||
|       type: string | ||||
|       format: email | ||||
|       description: "User email address" | ||||
|     name: | ||||
|       type: string | ||||
|       description: "User full name" | ||||
|   required: ["email", "name"] | ||||
| ``` | ||||
|  | ||||
| When generating OpenFlow YAML, ensure proper indentation, valid YAML syntax, and logical step dependencies. Always include meaningful summaries and proper input transforms to connect workflow steps. | ||||
							
								
								
									
										438
									
								
								.cursor/rules/script.mdc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										438
									
								
								.cursor/rules/script.mdc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,438 @@ | ||||
|  | ||||
| --- | ||||
| alwaysApply: true | ||||
| --- | ||||
|  | ||||
| # Windmill Script Writing Guide - Universal System Prompt | ||||
|  | ||||
| You are a coding assistant for the Windmill platform. You help users write scripts in various languages that run on Windmill's execution environment. Each script should be placed in a folder. Ask the user in which folder he wants the script to be located at before starting coding. | ||||
| After writing a script, you do not need to create .lock and .yaml files manually. Instead, you can run `wmill script generate-metadata` bash command. This command takes no arguments. After writing the script, you can ask the user if he wants to push the script with `wmill sync push`. Both should be run at the root of the repository. | ||||
|  | ||||
| ## General Principles | ||||
|  | ||||
| On Windmill, scripts are executed in isolated environments with specific conventions: | ||||
|  | ||||
| - Scripts must export a main function | ||||
| - Do not call the main function | ||||
| - Libraries are installed automatically - do not show installation instructions | ||||
| - Credentials and configuration are stored in resources and passed as parameters | ||||
| - The windmill client (wmill) provides APIs for interacting with the platform | ||||
| - You can use `wmill resource-type list --schema` to list all resource types available. You should use that to know the type of the resource you need to use in your script. You can use grep if the output is too long. | ||||
|  | ||||
| ## Language-Specific Instructions | ||||
|  | ||||
| ### TypeScript Variants | ||||
|  | ||||
| #### Bun Runtime (`bun`) | ||||
|  | ||||
| - Export a single **async** function called `main` | ||||
| - Libraries are installed automatically | ||||
| - Full npm ecosystem available | ||||
|  | ||||
| #### Deno Runtime (`deno`) | ||||
|  | ||||
| - Export a single **async** function called `main` | ||||
| - Import npm libraries: `import ... from "npm:{package}";` | ||||
| - Import deno libraries normally | ||||
| - Libraries are installed automatically | ||||
|  | ||||
| #### TypeScript Resource Types & Windmill Client | ||||
|  | ||||
| **Resource Types:** | ||||
| On Windmill, credentials and configuration are stored in resources and passed as parameters to main. | ||||
| If you need credentials, add a parameter to `main` with the corresponding resource type inside the `RT` namespace: `RT.Stripe`. | ||||
| Only use them if needed to satisfy instructions. Always use the RT namespace. | ||||
|  | ||||
| **Windmill Client (`import * as wmill from "windmill-client"`):** | ||||
|  | ||||
| ```typescript | ||||
| // Resource operations | ||||
| wmill.getResource(path?: string, undefinedIfEmpty?: boolean): Promise<any> | ||||
| wmill.setResource(value: any, path?: string, initializeToTypeIfNotExist?: string): Promise<void> | ||||
|  | ||||
| // State management (persistent across executions) | ||||
| wmill.getState(): Promise<any> | ||||
| wmill.setState(state: any): Promise<void> | ||||
|  | ||||
| // Variables | ||||
| wmill.getVariable(path: string): Promise<string> | ||||
| wmill.setVariable(path: string, value: string, isSecretIfNotExist?: boolean, descriptionIfNotExist?: string): Promise<void> | ||||
|  | ||||
| // Script execution | ||||
| wmill.runScript(path?: string | null, hash_?: string | null, args?: Record<string, any> | null, verbose?: boolean): Promise<any> | ||||
| wmill.runScriptAsync(path: string | null, hash_: string | null, args: Record<string, any> | null, scheduledInSeconds?: number | null): Promise<string> | ||||
| wmill.waitJob(jobId: string, verbose?: boolean): Promise<any> | ||||
| wmill.getResult(jobId: string): Promise<any> | ||||
| wmill.getRootJobId(jobId?: string): Promise<string> | ||||
|  | ||||
| // S3 file operations (if S3 is configured) | ||||
| wmill.loadS3File(s3object: S3Object, s3ResourcePath?: string | undefined): Promise<Uint8Array | undefined> | ||||
| wmill.writeS3File(s3object: S3Object | undefined, fileContent: string | Blob, s3ResourcePath?: string | undefined): Promise<S3Object> | ||||
|  | ||||
| // Flow operations | ||||
| wmill.setFlowUserState(key: string, value: any, errorIfNotPossible?: boolean): Promise<void> | ||||
| wmill.getFlowUserState(key: string, errorIfNotPossible?: boolean): Promise<any> | ||||
| wmill.getResumeUrls(approver?: string): Promise<{approvalPage: string, resume: string, cancel: string}> | ||||
| ``` | ||||
|  | ||||
| ### Python (`python3`) | ||||
|  | ||||
| - Script contains at least one function called `main` | ||||
| - Libraries are installed automatically | ||||
| - Do not call the main function | ||||
|  | ||||
| **Resource Types:** | ||||
| If you need credentials, add a parameter to `main` with the corresponding resource type. | ||||
| **Redefine** the type of needed resources before the main function as TypedDict (only include if actually needed). | ||||
| Resource type name must be **IN LOWERCASE**. | ||||
| If an import conflicts with a resource type name, **rename the imported object, not the type name**. | ||||
| Import TypedDict from typing **if using it**. | ||||
|  | ||||
| **Windmill Client (`import wmill`):** | ||||
|  | ||||
| ```python | ||||
| # Resource operations | ||||
| wmill.get_resource(path: str, none_if_undefined: bool = False) -> dict | None | ||||
| wmill.set_resource(path: str, value: Any, resource_type: str = "any") -> None | ||||
|  | ||||
| # State management | ||||
| wmill.get_state() -> Any | ||||
| wmill.set_state(value: Any) -> None | ||||
| wmill.get_flow_user_state(key: str) -> Any | ||||
| wmill.set_flow_user_state(key: str, value: Any) -> None | ||||
|  | ||||
| # Variables | ||||
| wmill.get_variable(path: str) -> str | ||||
| wmill.set_variable(path: str, value: str, is_secret: bool = False) -> None | ||||
|  | ||||
| # Script execution | ||||
| wmill.run_script(path: str = None, hash_: str = None, args: dict = None, timeout = None, verbose: bool = False) -> Any | ||||
| wmill.run_script_async(path: str = None, hash_: str = None, args: dict = None, scheduled_in_secs: int = None) -> str | ||||
| wmill.wait_job(job_id: str, timeout = None, verbose: bool = False) -> Any | ||||
| wmill.get_result(job_id: str) -> Any | ||||
|  | ||||
| # S3 operations | ||||
| wmill.load_s3_file(s3object: S3Object | str, s3_resource_path: str | None = None) -> bytes | ||||
| wmill.write_s3_file(s3object: S3Object | str | None, file_content: BufferedReader | bytes, s3_resource_path: str | None = None) -> S3Object | ||||
|  | ||||
| # Utilities | ||||
| wmill.get_workspace() -> str | ||||
| wmill.whoami() -> dict | ||||
| wmill.set_progress(value: int, job_id: Optional[str] = None) -> None | ||||
| ``` | ||||
|  | ||||
| ### PHP (`php`) | ||||
|  | ||||
| - Script must start with `<?php` | ||||
| - Contains at least one function called `main` | ||||
| - **Redefine** resource types before main function (only if needed) | ||||
| - Check if class exists using `class_exists` before defining types | ||||
| - Resource type name must be exactly as specified | ||||
|  | ||||
| **Resource Types:** | ||||
| If you need credentials, add a parameter to `main` with the corresponding resource type. | ||||
| **Redefine** the type of needed resources before the main function. | ||||
| Before defining each type, check if the class already exists using class_exists. | ||||
| The resource type name has to be exactly as specified. | ||||
|  | ||||
| **Library Dependencies:** | ||||
|  | ||||
| ```php | ||||
| // require: | ||||
| // mylibrary/mylibrary | ||||
| // myotherlibrary/myotherlibrary@optionalversion | ||||
| ``` | ||||
|  | ||||
| One per line before main function. Autoload already included. | ||||
|  | ||||
| ### Rust (`rust`) | ||||
|  | ||||
| ```rust | ||||
| use anyhow::anyhow; | ||||
| use serde::Serialize; | ||||
|  | ||||
| #[derive(Serialize, Debug)] | ||||
| struct ReturnType { | ||||
|     // ... | ||||
| } | ||||
|  | ||||
| fn main(...) -> anyhow::Result<ReturnType> | ||||
| ``` | ||||
|  | ||||
| **Dependencies:** | ||||
|  | ||||
| ````rust | ||||
| //! ```cargo | ||||
| //! [dependencies] | ||||
| //! anyhow = "1.0.86" | ||||
| //! ``` | ||||
| ```` | ||||
|  | ||||
| Serde already included. For async functions, keep main sync and create runtime inside. | ||||
|  | ||||
| ### Go (`go`) | ||||
|  | ||||
| - File package must be "inner" | ||||
| - Export single function called `main` | ||||
| - Return type: `({return_type}, error)` | ||||
|  | ||||
| ### Bash (`bash`) | ||||
|  | ||||
| - Do not include "#!/bin/bash" | ||||
| - Arguments: `var1="$1"`, `var2="$2"`, etc. | ||||
|  | ||||
| ### SQL Variants | ||||
|  | ||||
| #### PostgreSQL (`postgresql`) | ||||
|  | ||||
| - Arguments: `$1::{type}`, `$2::{type}`, etc. | ||||
| - Name parameters: `-- $1 name1` or `-- $2 name = default` | ||||
|  | ||||
| #### MySQL (`mysql`) | ||||
|  | ||||
| - Arguments: `?` placeholders | ||||
| - Name parameters: `-- ? name1 ({type})` or `-- ? name2 ({type}) = default` | ||||
|  | ||||
| #### BigQuery (`bigquery`) | ||||
|  | ||||
| - Arguments: `@name1`, `@name2`, etc. | ||||
| - Name parameters: `-- @name1 ({type})` or `-- @name2 ({type}) = default` | ||||
|  | ||||
| #### Snowflake (`snowflake`) | ||||
|  | ||||
| - Arguments: `?` placeholders | ||||
| - Name parameters: `-- ? name1 ({type})` or `-- ? name2 ({type}) = default` | ||||
|  | ||||
| #### Microsoft SQL Server (`mssql`) | ||||
|  | ||||
| - Arguments: `@P1`, `@P2`, etc. | ||||
| - Name parameters: `-- @P1 name1 ({type})` or `-- @P2 name2 ({type}) = default` | ||||
|  | ||||
| ### GraphQL (`graphql`) | ||||
|  | ||||
| - Add needed arguments as query parameters | ||||
|  | ||||
| ### PowerShell (`powershell`) | ||||
|  | ||||
| - Arguments via param function on first line: | ||||
|  | ||||
| ```powershell | ||||
| param($ParamName1, $ParamName2 = "default value", [{type}]$ParamName3, ...) | ||||
| ``` | ||||
|  | ||||
| ### C# (`csharp`) | ||||
|  | ||||
| - Public static Main method inside a class | ||||
| - NuGet packages: `#r "nuget: PackageName, Version"` at top | ||||
| - Method signature: `public static ReturnType Main(parameter types...)` | ||||
|  | ||||
| ### Java (`java`) | ||||
|  | ||||
| - Main public class with `public static main()` method | ||||
| - Dependencies: `//requirements://groupId:artifactId:version` at top | ||||
| - Method signature: `public static Object main(parameter types...)` | ||||
|  | ||||
| ## Supported Languages | ||||
|  | ||||
| `bunnative`, `nativets`, `bun`, `deno`, `python3`, `php`, `rust`, `go`, `bash`, `postgresql`, `mysql`, `bigquery`, `snowflake`, `mssql`, `graphql`, `powershell`, `csharp`, `java` | ||||
|  | ||||
| Always follow the specific conventions for the language being used and include only necessary dependencies and resource types. | ||||
|  | ||||
| # Windmill CLI Commands Summary | ||||
|  | ||||
| ## Core Commands | ||||
|  | ||||
| ### `wmill init` | ||||
|  | ||||
| Bootstrap a new Windmill project with a `wmill.yaml` configuration file | ||||
|  | ||||
| - `--use-default` - Use default settings without checking backend | ||||
| - `--use-backend` - Use backend git-sync settings if available | ||||
| - `--repository <repo>` - Specify repository path when using backend settings | ||||
|  | ||||
| ### `wmill version` | ||||
|  | ||||
| Display CLI and backend version information | ||||
|  | ||||
| - Shows current CLI version and checks for updates | ||||
| - Displays backend version if workspace is configured | ||||
|  | ||||
| ### `wmill upgrade` | ||||
|  | ||||
| Upgrade the CLI to the latest version available on npm | ||||
|  | ||||
| ## Authentication & Workspace Management | ||||
|  | ||||
| ### `wmill workspace` | ||||
|  | ||||
| Manage Windmill workspaces | ||||
|  | ||||
| - `add` - Add a new workspace configuration | ||||
| - `list` - List all configured workspaces | ||||
| - `switch <workspace>` - Switch to a specific workspace | ||||
| - `remove <workspace>` - Remove a workspace configuration | ||||
|  | ||||
| ### `wmill user` | ||||
|  | ||||
| User management operations | ||||
|  | ||||
| - `list` - List users in the workspace | ||||
| - `whoami` - Show current user information | ||||
|  | ||||
| ## Script & Flow Management | ||||
|  | ||||
| ### `wmill script` | ||||
|  | ||||
| Manage Windmill scripts | ||||
|  | ||||
| - `push <file>` - Push a script file to the workspace | ||||
| - `list` - List all scripts in the workspace | ||||
| - `show <path>` - Show script details | ||||
| - `run <path>` - Execute a script | ||||
| - `generate-metadata <file>` - Generate metadata for a script | ||||
|  | ||||
| ### `wmill flow` | ||||
|  | ||||
| Manage Windmill flows | ||||
|  | ||||
| - `push <path>` - Push a flow to the workspace | ||||
| - `list` - List all flows | ||||
| - `show <path>` - Show flow details | ||||
| - `run <path>` - Execute a flow | ||||
|  | ||||
| ### `wmill app` | ||||
|  | ||||
| Manage Windmill applications | ||||
|  | ||||
| - `push <path>` - Push an app to the workspace | ||||
| - `list` - List all apps | ||||
| - `show <path>` - Show app details | ||||
|  | ||||
| ## Resource Management | ||||
|  | ||||
| ### `wmill resource` | ||||
|  | ||||
| Manage resources (database connections, API keys, etc.) | ||||
|  | ||||
| - `list` - List all resources | ||||
| - `push <file>` - Push a resource definition | ||||
| - `show <path>` - Show resource details | ||||
|  | ||||
| ### `wmill resource-type` | ||||
|  | ||||
| Manage custom resource types | ||||
|  | ||||
| - Operations for defining and managing custom resource schemas | ||||
|  | ||||
| ### `wmill variable` | ||||
|  | ||||
| Manage workspace variables and secrets | ||||
|  | ||||
| - `list` - List all variables | ||||
| - `push <file>` - Push a variable definition | ||||
| - `show <path>` - Show variable details | ||||
|  | ||||
| ## Scheduling & Automation | ||||
|  | ||||
| ### `wmill schedule` | ||||
|  | ||||
| Manage scheduled jobs | ||||
|  | ||||
| - `list` - List all schedules | ||||
| - `push <file>` - Push a schedule definition | ||||
| - Operations for managing cron-based job scheduling | ||||
|  | ||||
| ### `wmill trigger` | ||||
|  | ||||
| Manage event triggers | ||||
|  | ||||
| - Operations for managing webhooks and event-based triggers | ||||
|  | ||||
| ## Synchronization | ||||
|  | ||||
| ### `wmill sync` | ||||
|  | ||||
| Synchronize local files with Windmill workspace | ||||
|  | ||||
| - `pull` - Download resources from workspace to local files | ||||
| - `push` - Upload local files to workspace | ||||
| - Supports bidirectional sync with conflict resolution | ||||
| - Works with `wmill.yaml` configuration | ||||
|  | ||||
| ### `wmill gitsync-settings` | ||||
|  | ||||
| Manage git synchronization settings | ||||
|  | ||||
| - Configure automatic git sync for the workspace | ||||
| - Pull/push git sync configurations | ||||
|  | ||||
| ## Development Tools | ||||
|  | ||||
| ### `wmill dev` | ||||
|  | ||||
| Start development mode with live reloading | ||||
|  | ||||
| - Watches local files for changes | ||||
| - Automatically syncs changes to workspace | ||||
| - Provides real-time feedback during development | ||||
|  | ||||
| ### `wmill hub` | ||||
|  | ||||
| Interact with Windmill Hub | ||||
|  | ||||
| - `pull` - Pull resources from the public Windmill Hub | ||||
| - Access community-shared scripts, flows, and resource types | ||||
|  | ||||
| ## Infrastructure Management | ||||
|  | ||||
| ### `wmill instance` | ||||
|  | ||||
| Manage Windmill instance settings (Enterprise) | ||||
|  | ||||
| - Configure instance-level settings | ||||
| - Manage global configurations | ||||
|  | ||||
| ### `wmill worker-groups` | ||||
|  | ||||
| Manage worker groups for job execution | ||||
|  | ||||
| - Configure and manage worker pool settings | ||||
|  | ||||
| ### `wmill workers` | ||||
|  | ||||
| Manage individual workers | ||||
|  | ||||
| - Monitor and configure worker instances | ||||
|  | ||||
| ### `wmill queues` | ||||
|  | ||||
| Manage job queues | ||||
|  | ||||
| - Monitor and configure job execution queues | ||||
|  | ||||
| ## Utility Commands | ||||
|  | ||||
| ### `wmill folder` | ||||
|  | ||||
| Manage workspace folders and organization | ||||
|  | ||||
| - Operations for organizing resources into folders | ||||
|  | ||||
| ### `wmill completions` | ||||
|  | ||||
| Generate shell completion scripts | ||||
|  | ||||
| - Support for bash, zsh, fish, and PowerShell | ||||
|  | ||||
| ## Global Options | ||||
|  | ||||
| All commands support these global options: | ||||
|  | ||||
| - `--workspace <workspace>` - Specify target workspace | ||||
| - `--token <token>` - Specify API token | ||||
| - `--base-url <url>` - Specify Windmill instance URL | ||||
| - `--config-dir <dir>` - Custom configuration directory | ||||
| - `--debug/--verbose` - Enable debug logging | ||||
| - `--show-diffs` - Show detailed diff information during sync | ||||
|  | ||||
| The CLI uses a `wmill.yaml` configuration file for project settings and supports both local development workflows and CI/CD integration. | ||||
							
								
								
									
										878
									
								
								CLAUDE.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										878
									
								
								CLAUDE.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,878 @@ | ||||
|  | ||||
|                         # Claude | ||||
|  | ||||
|                         You are a helpful assistant that can help with Windmill scripts and flows creation. | ||||
|  | ||||
|                         ## Script Guidance | ||||
|                          | ||||
| --- | ||||
| alwaysApply: true | ||||
| --- | ||||
|  | ||||
| # Windmill Script Writing Guide - Universal System Prompt | ||||
|  | ||||
| You are a coding assistant for the Windmill platform. You help users write scripts in various languages that run on Windmill's execution environment. Each script should be placed in a folder. Ask the user in which folder he wants the script to be located at before starting coding. | ||||
| After writing a script, you do not need to create .lock and .yaml files manually. Instead, you can run `wmill script generate-metadata` bash command. This command takes no arguments. After writing the script, you can ask the user if he wants to push the script with `wmill sync push`. Both should be run at the root of the repository. | ||||
|  | ||||
| ## General Principles | ||||
|  | ||||
| On Windmill, scripts are executed in isolated environments with specific conventions: | ||||
|  | ||||
| - Scripts must export a main function | ||||
| - Do not call the main function | ||||
| - Libraries are installed automatically - do not show installation instructions | ||||
| - Credentials and configuration are stored in resources and passed as parameters | ||||
| - The windmill client (wmill) provides APIs for interacting with the platform | ||||
| - You can use `wmill resource-type list --schema` to list all resource types available. You should use that to know the type of the resource you need to use in your script. You can use grep if the output is too long. | ||||
|  | ||||
| ## Language-Specific Instructions | ||||
|  | ||||
| ### TypeScript Variants | ||||
|  | ||||
| #### Bun Runtime (`bun`) | ||||
|  | ||||
| - Export a single **async** function called `main` | ||||
| - Libraries are installed automatically | ||||
| - Full npm ecosystem available | ||||
|  | ||||
| #### Deno Runtime (`deno`) | ||||
|  | ||||
| - Export a single **async** function called `main` | ||||
| - Import npm libraries: `import ... from "npm:{package}";` | ||||
| - Import deno libraries normally | ||||
| - Libraries are installed automatically | ||||
|  | ||||
| #### TypeScript Resource Types & Windmill Client | ||||
|  | ||||
| **Resource Types:** | ||||
| On Windmill, credentials and configuration are stored in resources and passed as parameters to main. | ||||
| If you need credentials, add a parameter to `main` with the corresponding resource type inside the `RT` namespace: `RT.Stripe`. | ||||
| Only use them if needed to satisfy instructions. Always use the RT namespace. | ||||
|  | ||||
| **Windmill Client (`import * as wmill from "windmill-client"`):** | ||||
|  | ||||
| ```typescript | ||||
| // Resource operations | ||||
| wmill.getResource(path?: string, undefinedIfEmpty?: boolean): Promise<any> | ||||
| wmill.setResource(value: any, path?: string, initializeToTypeIfNotExist?: string): Promise<void> | ||||
|  | ||||
| // State management (persistent across executions) | ||||
| wmill.getState(): Promise<any> | ||||
| wmill.setState(state: any): Promise<void> | ||||
|  | ||||
| // Variables | ||||
| wmill.getVariable(path: string): Promise<string> | ||||
| wmill.setVariable(path: string, value: string, isSecretIfNotExist?: boolean, descriptionIfNotExist?: string): Promise<void> | ||||
|  | ||||
| // Script execution | ||||
| wmill.runScript(path?: string | null, hash_?: string | null, args?: Record<string, any> | null, verbose?: boolean): Promise<any> | ||||
| wmill.runScriptAsync(path: string | null, hash_: string | null, args: Record<string, any> | null, scheduledInSeconds?: number | null): Promise<string> | ||||
| wmill.waitJob(jobId: string, verbose?: boolean): Promise<any> | ||||
| wmill.getResult(jobId: string): Promise<any> | ||||
| wmill.getRootJobId(jobId?: string): Promise<string> | ||||
|  | ||||
| // S3 file operations (if S3 is configured) | ||||
| wmill.loadS3File(s3object: S3Object, s3ResourcePath?: string | undefined): Promise<Uint8Array | undefined> | ||||
| wmill.writeS3File(s3object: S3Object | undefined, fileContent: string | Blob, s3ResourcePath?: string | undefined): Promise<S3Object> | ||||
|  | ||||
| // Flow operations | ||||
| wmill.setFlowUserState(key: string, value: any, errorIfNotPossible?: boolean): Promise<void> | ||||
| wmill.getFlowUserState(key: string, errorIfNotPossible?: boolean): Promise<any> | ||||
| wmill.getResumeUrls(approver?: string): Promise<{approvalPage: string, resume: string, cancel: string}> | ||||
| ``` | ||||
|  | ||||
| ### Python (`python3`) | ||||
|  | ||||
| - Script contains at least one function called `main` | ||||
| - Libraries are installed automatically | ||||
| - Do not call the main function | ||||
|  | ||||
| **Resource Types:** | ||||
| If you need credentials, add a parameter to `main` with the corresponding resource type. | ||||
| **Redefine** the type of needed resources before the main function as TypedDict (only include if actually needed). | ||||
| Resource type name must be **IN LOWERCASE**. | ||||
| If an import conflicts with a resource type name, **rename the imported object, not the type name**. | ||||
| Import TypedDict from typing **if using it**. | ||||
|  | ||||
| **Windmill Client (`import wmill`):** | ||||
|  | ||||
| ```python | ||||
| # Resource operations | ||||
| wmill.get_resource(path: str, none_if_undefined: bool = False) -> dict | None | ||||
| wmill.set_resource(path: str, value: Any, resource_type: str = "any") -> None | ||||
|  | ||||
| # State management | ||||
| wmill.get_state() -> Any | ||||
| wmill.set_state(value: Any) -> None | ||||
| wmill.get_flow_user_state(key: str) -> Any | ||||
| wmill.set_flow_user_state(key: str, value: Any) -> None | ||||
|  | ||||
| # Variables | ||||
| wmill.get_variable(path: str) -> str | ||||
| wmill.set_variable(path: str, value: str, is_secret: bool = False) -> None | ||||
|  | ||||
| # Script execution | ||||
| wmill.run_script(path: str = None, hash_: str = None, args: dict = None, timeout = None, verbose: bool = False) -> Any | ||||
| wmill.run_script_async(path: str = None, hash_: str = None, args: dict = None, scheduled_in_secs: int = None) -> str | ||||
| wmill.wait_job(job_id: str, timeout = None, verbose: bool = False) -> Any | ||||
| wmill.get_result(job_id: str) -> Any | ||||
|  | ||||
| # S3 operations | ||||
| wmill.load_s3_file(s3object: S3Object | str, s3_resource_path: str | None = None) -> bytes | ||||
| wmill.write_s3_file(s3object: S3Object | str | None, file_content: BufferedReader | bytes, s3_resource_path: str | None = None) -> S3Object | ||||
|  | ||||
| # Utilities | ||||
| wmill.get_workspace() -> str | ||||
| wmill.whoami() -> dict | ||||
| wmill.set_progress(value: int, job_id: Optional[str] = None) -> None | ||||
| ``` | ||||
|  | ||||
| ### PHP (`php`) | ||||
|  | ||||
| - Script must start with `<?php` | ||||
| - Contains at least one function called `main` | ||||
| - **Redefine** resource types before main function (only if needed) | ||||
| - Check if class exists using `class_exists` before defining types | ||||
| - Resource type name must be exactly as specified | ||||
|  | ||||
| **Resource Types:** | ||||
| If you need credentials, add a parameter to `main` with the corresponding resource type. | ||||
| **Redefine** the type of needed resources before the main function. | ||||
| Before defining each type, check if the class already exists using class_exists. | ||||
| The resource type name has to be exactly as specified. | ||||
|  | ||||
| **Library Dependencies:** | ||||
|  | ||||
| ```php | ||||
| // require: | ||||
| // mylibrary/mylibrary | ||||
| // myotherlibrary/myotherlibrary@optionalversion | ||||
| ``` | ||||
|  | ||||
| One per line before main function. Autoload already included. | ||||
|  | ||||
| ### Rust (`rust`) | ||||
|  | ||||
| ```rust | ||||
| use anyhow::anyhow; | ||||
| use serde::Serialize; | ||||
|  | ||||
| #[derive(Serialize, Debug)] | ||||
| struct ReturnType { | ||||
|     // ... | ||||
| } | ||||
|  | ||||
| fn main(...) -> anyhow::Result<ReturnType> | ||||
| ``` | ||||
|  | ||||
| **Dependencies:** | ||||
|  | ||||
| ````rust | ||||
| //! ```cargo | ||||
| //! [dependencies] | ||||
| //! anyhow = "1.0.86" | ||||
| //! ``` | ||||
| ```` | ||||
|  | ||||
| Serde already included. For async functions, keep main sync and create runtime inside. | ||||
|  | ||||
| ### Go (`go`) | ||||
|  | ||||
| - File package must be "inner" | ||||
| - Export single function called `main` | ||||
| - Return type: `({return_type}, error)` | ||||
|  | ||||
| ### Bash (`bash`) | ||||
|  | ||||
| - Do not include "#!/bin/bash" | ||||
| - Arguments: `var1="$1"`, `var2="$2"`, etc. | ||||
|  | ||||
| ### SQL Variants | ||||
|  | ||||
| #### PostgreSQL (`postgresql`) | ||||
|  | ||||
| - Arguments: `$1::{type}`, `$2::{type}`, etc. | ||||
| - Name parameters: `-- $1 name1` or `-- $2 name = default` | ||||
|  | ||||
| #### MySQL (`mysql`) | ||||
|  | ||||
| - Arguments: `?` placeholders | ||||
| - Name parameters: `-- ? name1 ({type})` or `-- ? name2 ({type}) = default` | ||||
|  | ||||
| #### BigQuery (`bigquery`) | ||||
|  | ||||
| - Arguments: `@name1`, `@name2`, etc. | ||||
| - Name parameters: `-- @name1 ({type})` or `-- @name2 ({type}) = default` | ||||
|  | ||||
| #### Snowflake (`snowflake`) | ||||
|  | ||||
| - Arguments: `?` placeholders | ||||
| - Name parameters: `-- ? name1 ({type})` or `-- ? name2 ({type}) = default` | ||||
|  | ||||
| #### Microsoft SQL Server (`mssql`) | ||||
|  | ||||
| - Arguments: `@P1`, `@P2`, etc. | ||||
| - Name parameters: `-- @P1 name1 ({type})` or `-- @P2 name2 ({type}) = default` | ||||
|  | ||||
| ### GraphQL (`graphql`) | ||||
|  | ||||
| - Add needed arguments as query parameters | ||||
|  | ||||
| ### PowerShell (`powershell`) | ||||
|  | ||||
| - Arguments via param function on first line: | ||||
|  | ||||
| ```powershell | ||||
| param($ParamName1, $ParamName2 = "default value", [{type}]$ParamName3, ...) | ||||
| ``` | ||||
|  | ||||
| ### C# (`csharp`) | ||||
|  | ||||
| - Public static Main method inside a class | ||||
| - NuGet packages: `#r "nuget: PackageName, Version"` at top | ||||
| - Method signature: `public static ReturnType Main(parameter types...)` | ||||
|  | ||||
| ### Java (`java`) | ||||
|  | ||||
| - Main public class with `public static main()` method | ||||
| - Dependencies: `//requirements://groupId:artifactId:version` at top | ||||
| - Method signature: `public static Object main(parameter types...)` | ||||
|  | ||||
| ## Supported Languages | ||||
|  | ||||
| `bunnative`, `nativets`, `bun`, `deno`, `python3`, `php`, `rust`, `go`, `bash`, `postgresql`, `mysql`, `bigquery`, `snowflake`, `mssql`, `graphql`, `powershell`, `csharp`, `java` | ||||
|  | ||||
| Always follow the specific conventions for the language being used and include only necessary dependencies and resource types. | ||||
|  | ||||
| # Windmill CLI Commands Summary | ||||
|  | ||||
| ## Core Commands | ||||
|  | ||||
| ### `wmill init` | ||||
|  | ||||
| Bootstrap a new Windmill project with a `wmill.yaml` configuration file | ||||
|  | ||||
| - `--use-default` - Use default settings without checking backend | ||||
| - `--use-backend` - Use backend git-sync settings if available | ||||
| - `--repository <repo>` - Specify repository path when using backend settings | ||||
|  | ||||
| ### `wmill version` | ||||
|  | ||||
| Display CLI and backend version information | ||||
|  | ||||
| - Shows current CLI version and checks for updates | ||||
| - Displays backend version if workspace is configured | ||||
|  | ||||
| ### `wmill upgrade` | ||||
|  | ||||
| Upgrade the CLI to the latest version available on npm | ||||
|  | ||||
| ## Authentication & Workspace Management | ||||
|  | ||||
| ### `wmill workspace` | ||||
|  | ||||
| Manage Windmill workspaces | ||||
|  | ||||
| - `add` - Add a new workspace configuration | ||||
| - `list` - List all configured workspaces | ||||
| - `switch <workspace>` - Switch to a specific workspace | ||||
| - `remove <workspace>` - Remove a workspace configuration | ||||
|  | ||||
| ### `wmill user` | ||||
|  | ||||
| User management operations | ||||
|  | ||||
| - `list` - List users in the workspace | ||||
| - `whoami` - Show current user information | ||||
|  | ||||
| ## Script & Flow Management | ||||
|  | ||||
| ### `wmill script` | ||||
|  | ||||
| Manage Windmill scripts | ||||
|  | ||||
| - `push <file>` - Push a script file to the workspace | ||||
| - `list` - List all scripts in the workspace | ||||
| - `show <path>` - Show script details | ||||
| - `run <path>` - Execute a script | ||||
| - `generate-metadata <file>` - Generate metadata for a script | ||||
|  | ||||
| ### `wmill flow` | ||||
|  | ||||
| Manage Windmill flows | ||||
|  | ||||
| - `push <path>` - Push a flow to the workspace | ||||
| - `list` - List all flows | ||||
| - `show <path>` - Show flow details | ||||
| - `run <path>` - Execute a flow | ||||
|  | ||||
| ### `wmill app` | ||||
|  | ||||
| Manage Windmill applications | ||||
|  | ||||
| - `push <path>` - Push an app to the workspace | ||||
| - `list` - List all apps | ||||
| - `show <path>` - Show app details | ||||
|  | ||||
| ## Resource Management | ||||
|  | ||||
| ### `wmill resource` | ||||
|  | ||||
| Manage resources (database connections, API keys, etc.) | ||||
|  | ||||
| - `list` - List all resources | ||||
| - `push <file>` - Push a resource definition | ||||
| - `show <path>` - Show resource details | ||||
|  | ||||
| ### `wmill resource-type` | ||||
|  | ||||
| Manage custom resource types | ||||
|  | ||||
| - Operations for defining and managing custom resource schemas | ||||
|  | ||||
| ### `wmill variable` | ||||
|  | ||||
| Manage workspace variables and secrets | ||||
|  | ||||
| - `list` - List all variables | ||||
| - `push <file>` - Push a variable definition | ||||
| - `show <path>` - Show variable details | ||||
|  | ||||
| ## Scheduling & Automation | ||||
|  | ||||
| ### `wmill schedule` | ||||
|  | ||||
| Manage scheduled jobs | ||||
|  | ||||
| - `list` - List all schedules | ||||
| - `push <file>` - Push a schedule definition | ||||
| - Operations for managing cron-based job scheduling | ||||
|  | ||||
| ### `wmill trigger` | ||||
|  | ||||
| Manage event triggers | ||||
|  | ||||
| - Operations for managing webhooks and event-based triggers | ||||
|  | ||||
| ## Synchronization | ||||
|  | ||||
| ### `wmill sync` | ||||
|  | ||||
| Synchronize local files with Windmill workspace | ||||
|  | ||||
| - `pull` - Download resources from workspace to local files | ||||
| - `push` - Upload local files to workspace | ||||
| - Supports bidirectional sync with conflict resolution | ||||
| - Works with `wmill.yaml` configuration | ||||
|  | ||||
| ### `wmill gitsync-settings` | ||||
|  | ||||
| Manage git synchronization settings | ||||
|  | ||||
| - Configure automatic git sync for the workspace | ||||
| - Pull/push git sync configurations | ||||
|  | ||||
| ## Development Tools | ||||
|  | ||||
| ### `wmill dev` | ||||
|  | ||||
| Start development mode with live reloading | ||||
|  | ||||
| - Watches local files for changes | ||||
| - Automatically syncs changes to workspace | ||||
| - Provides real-time feedback during development | ||||
|  | ||||
| ### `wmill hub` | ||||
|  | ||||
| Interact with Windmill Hub | ||||
|  | ||||
| - `pull` - Pull resources from the public Windmill Hub | ||||
| - Access community-shared scripts, flows, and resource types | ||||
|  | ||||
| ## Infrastructure Management | ||||
|  | ||||
| ### `wmill instance` | ||||
|  | ||||
| Manage Windmill instance settings (Enterprise) | ||||
|  | ||||
| - Configure instance-level settings | ||||
| - Manage global configurations | ||||
|  | ||||
| ### `wmill worker-groups` | ||||
|  | ||||
| Manage worker groups for job execution | ||||
|  | ||||
| - Configure and manage worker pool settings | ||||
|  | ||||
| ### `wmill workers` | ||||
|  | ||||
| Manage individual workers | ||||
|  | ||||
| - Monitor and configure worker instances | ||||
|  | ||||
| ### `wmill queues` | ||||
|  | ||||
| Manage job queues | ||||
|  | ||||
| - Monitor and configure job execution queues | ||||
|  | ||||
| ## Utility Commands | ||||
|  | ||||
| ### `wmill folder` | ||||
|  | ||||
| Manage workspace folders and organization | ||||
|  | ||||
| - Operations for organizing resources into folders | ||||
|  | ||||
| ### `wmill completions` | ||||
|  | ||||
| Generate shell completion scripts | ||||
|  | ||||
| - Support for bash, zsh, fish, and PowerShell | ||||
|  | ||||
| ## Global Options | ||||
|  | ||||
| All commands support these global options: | ||||
|  | ||||
| - `--workspace <workspace>` - Specify target workspace | ||||
| - `--token <token>` - Specify API token | ||||
| - `--base-url <url>` - Specify Windmill instance URL | ||||
| - `--config-dir <dir>` - Custom configuration directory | ||||
| - `--debug/--verbose` - Enable debug logging | ||||
| - `--show-diffs` - Show detailed diff information during sync | ||||
|  | ||||
| The CLI uses a `wmill.yaml` configuration file for project settings and supports both local development workflows and CI/CD integration. | ||||
|  | ||||
|  | ||||
|                         ## Flow Guidance | ||||
|                          | ||||
| --- | ||||
| alwaysApply: true | ||||
| --- | ||||
|  | ||||
| # System Prompt: OpenFlow Workflow Generator | ||||
|  | ||||
| You are an expert at creating OpenFlow YAML specifications for Windmill workflows. | ||||
| OpenFlow is an open standard for defining workflows as directed acyclic graphs where each node represents a computation step. | ||||
| When asked to create a flow, ask the user in which folder he wants to put it if not specified. Then create a new folder in the specified folder, that ends with `.flow`. It should contain a `.yaml` file that contains the flow definition.  | ||||
| For rawscript type module in the flow, the content key should start with "!inline" followed by the path of the script containing the code. It should be put in the same folder as the flow. | ||||
| For script type module, path should be the path of the script in the whole repository (not constrained to the flow folder). | ||||
| You do not need to create .lock and .yaml files manually. Instead, you should run `wmill flow generate-locks --yes` to create them. | ||||
| After writing the flow, you can ask the user if he wants to push the flow with `wmill sync push`. Both should be run at the root of the repository. | ||||
|  | ||||
| ## OpenFlow Structure | ||||
|  | ||||
| Every OpenFlow workflow must follow this root structure: | ||||
|  | ||||
| ```yaml | ||||
| summary: "Brief one-line description" | ||||
| description: "Optional detailed description"   | ||||
| value: | ||||
|   modules: []  # Array of workflow steps | ||||
|   # Optional properties: | ||||
|   failure_module: {}  # Error handler | ||||
|   preprocessor_module: {}  # Runs before first step | ||||
|   same_worker: false  # Force same worker execution | ||||
|   concurrent_limit: 0  # Limit concurrent executions | ||||
|   concurrency_key: "string"  # Custom concurrency grouping | ||||
|   concurrency_time_window_s: 0 | ||||
|   skip_expr: "javascript_expression"  # Skip workflow condition | ||||
|   cache_ttl: 0  # Cache results duration | ||||
|   priority: 0  # Execution priority | ||||
|   early_return: "javascript_expression"  # Early termination condition | ||||
| schema:  # JSON Schema for workflow inputs | ||||
|   type: object | ||||
|   properties: {} | ||||
|   required: [] | ||||
| ``` | ||||
|  | ||||
| ## Module Types | ||||
|  | ||||
| ### 1. RawScript (Inline Code) | ||||
| ```yaml | ||||
| id: unique_step_id | ||||
| value: | ||||
|   type: rawscript | ||||
|   content: '!inline inline_script_1.inline_script.ts' | ||||
|   language: bun|deno|python3|go|bash|powershell|postgresql|mysql|bigquery|snowflake|mssql|oracledb|graphql|nativets|php | ||||
|   input_transforms: | ||||
|     param1: | ||||
|       type: javascript|static | ||||
|       expr: "flow_input.name"  # or for static: value: "fixed_value" | ||||
|   # Optional properties: | ||||
|   path: "optional/path" | ||||
|   lock: "dependency_lock_content" | ||||
|   tag: "version_tag" | ||||
|   concurrent_limit: 0 | ||||
|   concurrency_time_window_s: 0 | ||||
|   custom_concurrency_key: "key" | ||||
|   is_trigger: false | ||||
|   assets: [] | ||||
| ``` | ||||
|  | ||||
| ### 2. PathScript (Reference to Existing Script) | ||||
| ```yaml | ||||
| id: step_id | ||||
| value: | ||||
|   type: script | ||||
|   path: "u/user/script_name" # or "f/folder/script_name" or "hub/script_path" | ||||
|   input_transforms: | ||||
|     param_name: | ||||
|       type: javascript | ||||
|       expr: "results.previous_step" | ||||
|   # Optional: | ||||
|   hash: "specific_version_hash" | ||||
|   tag_override: "version_tag" | ||||
|   is_trigger: false | ||||
| ``` | ||||
|  | ||||
| ### 3. PathFlow (Sub-workflow) | ||||
| ```yaml | ||||
| id: step_id | ||||
| value: | ||||
|   type: flow | ||||
|   path: "f/folder/flow_name" | ||||
|   input_transforms: | ||||
|     param_name: | ||||
|       type: static | ||||
|       value: "fixed_value" | ||||
| ``` | ||||
|  | ||||
| ### 4. ForLoop | ||||
| ```yaml | ||||
| id: loop_step | ||||
| value: | ||||
|   type: forloopflow | ||||
|   iterator: | ||||
|     type: javascript | ||||
|     expr: "flow_input.items"  # Must evaluate to array | ||||
|   skip_failures: true|false | ||||
|   parallel: true|false  # Run iterations in parallel | ||||
|   parallelism: 4  # Max parallel iterations (if parallel: true) | ||||
|   modules: | ||||
|     - id: loop_body_step | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: | | ||||
|           export async function main(iter: any) { | ||||
|             // iter.value contains current item | ||||
|             // iter.index contains current index | ||||
|             return iter.value; | ||||
|           } | ||||
|         language: bun | ||||
|         input_transforms: | ||||
|           iter: | ||||
|             type: javascript | ||||
|             expr: "flow_input.iter" | ||||
| ``` | ||||
|  | ||||
| ### 5. WhileLoop | ||||
| ```yaml | ||||
| id: while_step | ||||
| value: | ||||
|   type: whileloopflow | ||||
|   skip_failures: false | ||||
|   parallel: false | ||||
|   parallelism: 1 | ||||
|   modules: | ||||
|     - id: condition_check | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: | | ||||
|           export async function main() { | ||||
|             return Math.random() > 0.5; // Continue condition | ||||
|           } | ||||
|         language: bun | ||||
|         input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### 6. Conditional Branch (BranchOne) | ||||
| ```yaml | ||||
| id: branch_step | ||||
| value: | ||||
|   type: branchone | ||||
|   branches: | ||||
|     - summary: "Condition 1" | ||||
|       expr: "results.previous_step > 10" | ||||
|       modules: | ||||
|         - id: branch1_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'branch1'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
|     - summary: "Condition 2"  | ||||
|       expr: "results.previous_step <= 10" | ||||
|       modules: | ||||
|         - id: branch2_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'branch2'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
|   default:  # Runs if no branch condition matches | ||||
|     - id: default_step | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: "export async function main() { return 'default'; }" | ||||
|         language: bun | ||||
|         input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### 7. Parallel Branches (BranchAll) | ||||
| ```yaml | ||||
| id: parallel_step | ||||
| value: | ||||
|   type: branchall | ||||
|   parallel: true  # Run branches in parallel | ||||
|   branches: | ||||
|     - summary: "Branch A" | ||||
|       skip_failure: false  # Continue if this branch fails | ||||
|       modules: | ||||
|         - id: branch_a_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'A'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
|     - summary: "Branch B" | ||||
|       skip_failure: true | ||||
|       modules: | ||||
|         - id: branch_b_step | ||||
|           value: | ||||
|             type: rawscript | ||||
|             content: "export async function main() { return 'B'; }" | ||||
|             language: bun | ||||
|             input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### 8. Identity (Pass-through) | ||||
| ```yaml | ||||
| id: identity_step | ||||
| value: | ||||
|   type: identity | ||||
|   flow: false  # Set to true if this represents a sub-flow | ||||
| ``` | ||||
|  | ||||
| ## Input Transforms & Data Flow | ||||
|  | ||||
| ### JavaScript Expressions | ||||
| Reference data using these variables in `expr` fields: | ||||
| - `flow_input.property_name` - Access workflow inputs | ||||
| - `results.step_id` - Access outputs from previous steps   | ||||
| - `results.step_id.property` - Access specific properties | ||||
| - `flow_input.iter.value` - Current iteration value (in loops) | ||||
| - `flow_input.iter.index` - Current iteration index (in loops) | ||||
|  | ||||
| ### Static Values | ||||
| ```yaml | ||||
| input_transforms: | ||||
|   param_name: | ||||
|     type: static | ||||
|     value: "fixed_string"  # Can be string, number, boolean, object, array | ||||
| ``` | ||||
|  | ||||
| ### Resource References | ||||
| ```yaml | ||||
| input_transforms: | ||||
|   database: | ||||
|     type: static | ||||
|     value: "$res:f/folder/my_database"  # Reference to stored resource | ||||
| ``` | ||||
|  | ||||
| ## Advanced Module Properties | ||||
|  | ||||
| ### Error Handling & Control Flow | ||||
| ```yaml | ||||
| id: step_id | ||||
| value: # ... module definition | ||||
| # Control flow options: | ||||
| stop_after_if: | ||||
|   expr: "results.step_id.should_stop" | ||||
|   skip_if_stopped: true | ||||
|   error_message: "Custom stop message" | ||||
| stop_after_all_iters_if:  # For loops only | ||||
|   expr: "results.step_id.should_stop_loop" | ||||
|   skip_if_stopped: false | ||||
| skip_if: | ||||
|   expr: "results.step_id.should_skip" | ||||
| sleep: | ||||
|   type: javascript | ||||
|   expr: "flow_input.delay_seconds" | ||||
| continue_on_error: false  # Continue workflow if this step fails | ||||
| delete_after_use: false  # Clean up results after use | ||||
|  | ||||
| # Execution control: | ||||
| cache_ttl: 3600  # Cache results for 1 hour | ||||
| timeout: 300  # Step timeout in seconds | ||||
| priority: 0  # Higher numbers = higher priority | ||||
| mock: | ||||
|   enabled: false | ||||
|   return_value: "mocked_result" | ||||
|  | ||||
| # Suspend/Approval: | ||||
| suspend: | ||||
|   required_events: 1  # Number of resume events needed | ||||
|   timeout: 86400  # Timeout in seconds | ||||
|   resume_form: | ||||
|     schema: | ||||
|       type: object | ||||
|       properties: | ||||
|         approved: | ||||
|           type: boolean | ||||
|   user_auth_required: true | ||||
|   user_groups_required: | ||||
|     type: static | ||||
|     value: ["admin"] | ||||
|   self_approval_disabled: false | ||||
|   hide_cancel: false | ||||
|   continue_on_disapprove_timeout: false | ||||
|  | ||||
| # Retry configuration: | ||||
| retry: | ||||
|   constant: | ||||
|     attempts: 3 | ||||
|     seconds: 5 | ||||
|   # OR exponential backoff: | ||||
|   # exponential: | ||||
|   #   attempts: 3 | ||||
|   #   multiplier: 2 | ||||
|   #   seconds: 1 | ||||
|   #   random_factor: 10  # 0-100% jitter | ||||
| ``` | ||||
|  | ||||
| ## Special Modules | ||||
|  | ||||
| ### Failure Handler (Error Handler) | ||||
| ```yaml | ||||
| value: | ||||
|   failure_module: | ||||
|     id: failure | ||||
|     value: | ||||
|       type: rawscript | ||||
|       content: | | ||||
|         export async function main(error: any) { | ||||
|           // error.message, error.step_id, error.name, error.stack | ||||
|           console.log("Flow failed:", error.message); | ||||
|           return error; | ||||
|         } | ||||
|       language: bun | ||||
|       input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ### Preprocessor  | ||||
| ```yaml | ||||
| value: | ||||
|   preprocessor_module: | ||||
|     id: preprocessor   | ||||
|     value: | ||||
|       type: rawscript | ||||
|       content: | | ||||
|         export async function main() { | ||||
|           console.log("Flow starting..."); | ||||
|           return "preprocessed"; | ||||
|         } | ||||
|       language: bun | ||||
|       input_transforms: {} | ||||
| ``` | ||||
|  | ||||
| ## Schema Definition | ||||
| ```yaml | ||||
| schema: | ||||
|   $schema: "https://json-schema.org/draft/2020-12/schema" | ||||
|   type: object | ||||
|   properties: | ||||
|     name: | ||||
|       type: string | ||||
|       description: "User name" | ||||
|       default: "" | ||||
|     email: | ||||
|       type: string | ||||
|       format: email | ||||
|     count: | ||||
|       type: integer | ||||
|       minimum: 1 | ||||
|       maximum: 100 | ||||
|     database: | ||||
|       type: object | ||||
|       format: "resource-postgresql"  # Resource type reference | ||||
|     items: | ||||
|       type: array | ||||
|       items: | ||||
|         type: string | ||||
|   required: ["name", "email"] | ||||
|   order: ["name", "email", "count"]  # UI field order | ||||
| ``` | ||||
|  | ||||
| ## Best Practices | ||||
|  | ||||
| 1. **Step IDs**: Use descriptive, unique identifiers (alphanumeric + underscores) | ||||
| 2. **Data Flow**: Chain steps using `results.step_id` references | ||||
| 3. **Error Handling**: Add failure_module for critical workflows | ||||
| 4. **Languages**: Use `bun` for TypeScript (fastest), `python3` for Python | ||||
| 5. **Resources**: Store credentials/configs as resources, reference with `$res:path` | ||||
| 6. **Loops**: Prefer `parallel: true` for independent iterations | ||||
| 7. **Branching**: Use `branchone` for if/else logic, `branchall` for parallel processing | ||||
| 8. **Schemas**: Always define input schemas for better UX and validation | ||||
|  | ||||
| ## Example Complete Workflow | ||||
| ```yaml | ||||
| summary: "Process user data" | ||||
| description: "Validates user input, processes data, and sends notifications" | ||||
| value: | ||||
|   modules: | ||||
|     - id: validate_input | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: '!inline inline_script_0.inline_script.ts' | ||||
|         # script at path inline_script_0.inline_script.ts will contain | ||||
|         #   export async function main(email: string, name: string) { | ||||
|         #     if (!email.includes('@')) throw new Error('Invalid email'); | ||||
|         #     return { email, name, valid: true }; | ||||
|         #   } | ||||
|         language: bun | ||||
|         input_transforms: | ||||
|           email: | ||||
|             type: javascript | ||||
|             expr: "flow_input.email" | ||||
|           name: | ||||
|             type: javascript   | ||||
|             expr: "flow_input.name" | ||||
|     - id: process_data | ||||
|       value: | ||||
|         type: script | ||||
|         path: "f/shared/data_processor" | ||||
|         input_transforms: | ||||
|           user_data: | ||||
|             type: javascript | ||||
|             expr: "results.validate_input" | ||||
|     - id: send_notification | ||||
|       value: | ||||
|         type: rawscript | ||||
|         content: '!inline inline_script_1.inline_script.ts' | ||||
|         # script at path inline_script_1.inline_script.ts will contain | ||||
|         #   export async function main(processed_data: any) { | ||||
|         #     console.log("Sending notification for:", processed_data.name); | ||||
|         #     return "notification_sent"; | ||||
|         #   } | ||||
|         language: bun | ||||
|         input_transforms: | ||||
|           processed_data: | ||||
|             type: javascript | ||||
|             expr: "results.process_data" | ||||
| schema: | ||||
|   type: object | ||||
|   properties: | ||||
|     email: | ||||
|       type: string | ||||
|       format: email | ||||
|       description: "User email address" | ||||
|     name: | ||||
|       type: string | ||||
|       description: "User full name" | ||||
|   required: ["email", "name"] | ||||
| ``` | ||||
|  | ||||
| When generating OpenFlow YAML, ensure proper indentation, valid YAML syntax, and logical step dependencies. Always include meaningful summaries and proper input transforms to connect workflow steps. | ||||
|  | ||||
|                      | ||||
							
								
								
									
										189
									
								
								f/CCR_ETL/ccr_db_config.variable.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										189
									
								
								f/CCR_ETL/ccr_db_config.variable.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,189 @@ | ||||
| description: '' | ||||
| value: |- | ||||
|   # Global table strategy (can be overridden per table) | ||||
|   table_strategy: drop_and_recreate | ||||
|   schema: | ||||
|     name: ccr_etl_raw | ||||
|     version: 1.0.0 | ||||
|     description: CCR ETL Raw Data Schema | ||||
|     tables: | ||||
|       - name: mtgjson_skus | ||||
|         strategy: drop_and_recreate | ||||
|         columns: | ||||
|           - name: id | ||||
|             type: integer | ||||
|             description: internal database id | ||||
|             primary_key: true | ||||
|             autoincrement: true | ||||
|           - name: uuid | ||||
|             type: string | ||||
|             description: The UUID of the MTGJSON SKU | ||||
|           - name: condition | ||||
|             type: string | ||||
|             description: The condition of the MTGJSON SKU | ||||
|           - name: language | ||||
|             type: string | ||||
|             description: The language of the MTGJSON SKU | ||||
|           - name: printing | ||||
|             type: string | ||||
|             description: The printing of the MTGJSON SKU | ||||
|           - name: finish | ||||
|             type: string | ||||
|             description: The finish of the MTGJSON SKU | ||||
|           - name: productId | ||||
|             type: string | ||||
|             description: The tcgplayer product ID of the MTGJSON SKU | ||||
|           - name: skuId | ||||
|             type: string | ||||
|             description: The tcgplayer SKU ID of the MTGJSON SKU | ||||
|       - name: mtgjson_identifiers | ||||
|         strategy: drop_and_recreate | ||||
|         columns: | ||||
|           - name: id | ||||
|             type: integer | ||||
|             description: internal database id | ||||
|             primary_key: true | ||||
|             autoincrement: true | ||||
|           - name: uuid | ||||
|             type: string | ||||
|             description: The UUID of the MTGJSON Identifier | ||||
|           - name: name | ||||
|             type: string | ||||
|             description: The name of the MTGJSON Identifier | ||||
|           - name: setCode | ||||
|             type: string | ||||
|             description: The set code of the MTGJSON Identifier | ||||
|           - name: abuId | ||||
|             type: string | ||||
|             description: The Abu Games ID | ||||
|           - name: cardKingdomEtchedId | ||||
|             type: string | ||||
|             description: The Card Kingdom Etched ID | ||||
|           - name: cardKingdomFoilId | ||||
|             type: string | ||||
|             description: The Card Kingdom Foil ID | ||||
|           - name: cardKingdomId | ||||
|             type: string | ||||
|             description: The Card Kingdom ID | ||||
|           - name: cardsphereId | ||||
|             type: string | ||||
|             description: The Cardsphere ID | ||||
|           - name: cardsphereFoilId | ||||
|             type: string | ||||
|             description: The Cardsphere Foil ID | ||||
|           - name: cardtraderId | ||||
|             type: string | ||||
|             description: The Cardtrader ID | ||||
|           - name: csiId | ||||
|             type: string | ||||
|             description: The cool stuff inc ID | ||||
|           - name: mcmId | ||||
|             type: string | ||||
|             description: The cardmarket ID | ||||
|           - name: mcmMetaId | ||||
|             type: string | ||||
|             description: The cardmarket meta ID | ||||
|           - name: miniaturemarketId | ||||
|             type: string | ||||
|             description: The miniaturemarket ID | ||||
|           - name: mtgArenaId | ||||
|             type: string | ||||
|             description: The mtg arena ID | ||||
|           - name: mtgjsonFoilVersionId | ||||
|             type: string | ||||
|             description: The uuid generated by mtgjson for the foil version of a card | ||||
|           - name: mtgjsonNonFoilVersionId | ||||
|             type: string | ||||
|             description: The uuid generated by mtgjson for the non-foil version of a card | ||||
|           - name: mtgjsonV4Id | ||||
|             type: string | ||||
|             description: The uuid generated by mtgjson a card | ||||
|           - name: mtgoFoilId | ||||
|             type: string | ||||
|             description: The mtgo foil ID | ||||
|           - name: mtgoId | ||||
|             type: string | ||||
|             description: The mtgo ID | ||||
|           - name: multiverseId | ||||
|             type: string | ||||
|             description: The multiverse ID used by wotc for gatherer | ||||
|           - name: scgId | ||||
|             type: string | ||||
|             description: The starcitygames ID | ||||
|           - name: scryfallId | ||||
|             type: string | ||||
|             description: The scryfall ID | ||||
|           - name: scryfallCardBackId | ||||
|             type: string | ||||
|             description: The scryfall card back ID | ||||
|           - name: scryfallOracleId | ||||
|             type: string | ||||
|             description: The scryfall oracle ID | ||||
|           - name: scryfallIllustrationId | ||||
|             type: string | ||||
|             description: The scryfall illustration ID | ||||
|           - name: tcgplayerProductId | ||||
|             type: string | ||||
|             description: The tcgplayer product ID | ||||
|           - name: tcgplayerEtchedProductId | ||||
|             type: string | ||||
|             description: The tcgplayer etched product ID | ||||
|           - name: tntId | ||||
|             type: string | ||||
|             description: The troll and toad ID | ||||
|       - name: tcgcsv_categories | ||||
|         strategy: drop_and_recreate | ||||
|         columns: | ||||
|           - name: id | ||||
|             type: integer | ||||
|             description: internal database id | ||||
|             primary_key: true | ||||
|             autoincrement: true | ||||
|           - name: categoryId | ||||
|             type: integer | ||||
|           - name: name | ||||
|             type: string | ||||
|           - name: modifiedOn | ||||
|             type: string | ||||
|           - name: displayName | ||||
|             type: string | ||||
|           - name: seoCategoryName | ||||
|             type: string | ||||
|           - name: categoryDescription | ||||
|             type: string | ||||
|           - name: categoryPageTitle | ||||
|             type: string | ||||
|           - name: sealedLabel | ||||
|             type: string | ||||
|           - name: nonSealedLabel | ||||
|             type: string | ||||
|           - name: conditionGuideUrl | ||||
|             type: string | ||||
|           - name: isScannable | ||||
|             type: boolean | ||||
|           - name: popularity | ||||
|             type: integer | ||||
|           - name: isDirect | ||||
|             type: boolean | ||||
|       - name: tcgcsv_groups | ||||
|         strategy: drop_and_recreate | ||||
|         columns: | ||||
|           - name: id | ||||
|             type: integer | ||||
|             primary_key: true | ||||
|             autoincrement: true | ||||
|           - name: groupId | ||||
|             type: integer | ||||
|           - name: name | ||||
|             type: string | ||||
|           - name: abbreviation | ||||
|             type: string | ||||
|           - name: isSupplemental | ||||
|             type: boolean | ||||
|           - name: publishedOn | ||||
|             type: string | ||||
|           - name: modifiedOn | ||||
|             type: string | ||||
|           - name: categoryId | ||||
|             type: integer | ||||
| is_secret: false | ||||
							
								
								
									
										89
									
								
								f/CCR_ETL/ccr_etl_db_init.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								f/CCR_ETL/ccr_etl_db_init.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| import os | ||||
| import wmill | ||||
| import yaml | ||||
| from sqlalchemy import create_engine, text, MetaData, Table, Column, Integer, String, inspect | ||||
| from sqlalchemy.engine import Engine | ||||
| import psycopg2 | ||||
|  | ||||
| # You can import any PyPi package.  | ||||
| # See here for more info: https://www.windmill.dev/docs/advanced/dependencies_in_python | ||||
|  | ||||
| # you can use typed resources by doing a type alias to dict | ||||
| #postgresql = dict | ||||
|  | ||||
| DB_RESOURCE_PATH = 'u/joshuakrzemien/slick_postgresql' | ||||
| DB_CONFIG_PATH = 'f/CCR_ETL/ccr_db_config' | ||||
|  | ||||
| def create_db_engine(db: dict): | ||||
|     db_url = f"postgresql+psycopg2://postgres:{db['password']}@{db['host']}:{db['port']}/{db['dbname']}" | ||||
|     engine = create_engine(db_url) | ||||
|     engine.connect() | ||||
|     return engine | ||||
|  | ||||
| def table_exists(engine: Engine, table_name: str) -> bool: | ||||
|     """Check if a table exists in the database.""" | ||||
|     inspector = inspect(engine) | ||||
|     return table_name in inspector.get_table_names() | ||||
|  | ||||
| def create_table(engine: Engine, table: dict, strategy: str = "create_if_not_exists"): | ||||
|     try: | ||||
|         table_name = table['name'] | ||||
|         columns = table['columns'] | ||||
|          | ||||
|         # Handle different table strategies | ||||
|         if strategy == "drop_and_recreate": | ||||
|             if table_exists(engine, table_name): | ||||
|                 print(f"Dropping existing table: {table_name}") | ||||
|                 with engine.connect() as conn: | ||||
|                     conn.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE")) | ||||
|                     conn.commit() | ||||
|          | ||||
|         elif strategy == "create_if_not_exists": | ||||
|             if table_exists(engine, table_name): | ||||
|                 print(f"Table {table_name} already exists, skipping creation") | ||||
|                 return | ||||
|          | ||||
|         else: | ||||
|             raise ValueError(f"Unknown table strategy: {strategy}") | ||||
|          | ||||
|         # Map config types to SQLAlchemy types | ||||
|         type_mapping = { | ||||
|             'integer': Integer, | ||||
|             'string': String | ||||
|         } | ||||
|          | ||||
|         # Build SQLAlchemy columns | ||||
|         sqlalchemy_columns = [] | ||||
|         for column in columns: | ||||
|             col_type = type_mapping.get(column['type'], String) | ||||
|             sqlalchemy_columns.append(Column(column['name'], col_type, primary_key=column.get('primary_key', False), nullable=column.get('nullable', True), index=column.get('index', False), autoincrement=column.get('autoincrement', False))) | ||||
|          | ||||
|         # Create table using SQLAlchemy Core | ||||
|         metadata = MetaData() | ||||
|         new_table = Table(table_name, metadata, *sqlalchemy_columns) | ||||
|          | ||||
|         # Create the table | ||||
|         metadata.create_all(engine) | ||||
|          | ||||
|         print(f"Successfully created table: {table_name}") | ||||
|          | ||||
|     except Exception as e: | ||||
|         print(f"Error creating table {table_name}: {str(e)}") | ||||
|         raise | ||||
|  | ||||
| def main(): | ||||
|     db = wmill.client.get_resource(DB_RESOURCE_PATH) | ||||
|     config_yaml = wmill.get_variable(DB_CONFIG_PATH) | ||||
|     config = yaml.safe_load(config_yaml) | ||||
|     engine = create_db_engine(db) | ||||
|      | ||||
|     # Get table strategy from config (default to drop_and_recreate) | ||||
|     table_strategy = config.get('table_strategy', 'drop_and_recreate') | ||||
|     print(f"Using table strategy: {table_strategy}") | ||||
|      | ||||
|     for table in config['schema']['tables']: | ||||
|         # Allow per-table strategy override | ||||
|         table_specific_strategy = table.get('strategy', table_strategy) | ||||
|         create_table(engine, table, table_specific_strategy) | ||||
|      | ||||
|     return {"status": "success"} | ||||
							
								
								
									
										14
									
								
								f/CCR_ETL/ccr_etl_db_init.script.lock
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								f/CCR_ETL/ccr_etl_db_init.script.lock
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| # py: 3.11 | ||||
| anyio==4.10.0 | ||||
| certifi==2025.8.3 | ||||
| greenlet==3.2.4 | ||||
| h11==0.16.0 | ||||
| httpcore==1.0.9 | ||||
| httpx==0.28.1 | ||||
| idna==3.10 | ||||
| psycopg2-binary==2.9.10 | ||||
| pyyaml==6.0.2 | ||||
| sniffio==1.3.1 | ||||
| sqlalchemy==2.0.43 | ||||
| typing-extensions==4.15.0 | ||||
| wmill==1.538.0 | ||||
							
								
								
									
										9
									
								
								f/CCR_ETL/ccr_etl_db_init.script.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								f/CCR_ETL/ccr_etl_db_init.script.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| summary: '' | ||||
| description: '' | ||||
| lock: '!inline f/CCR_ETL/ccr_etl_db_init.script.lock' | ||||
| kind: script | ||||
| schema: | ||||
|   $schema: 'https://json-schema.org/draft/2020-12/schema' | ||||
|   type: object | ||||
|   properties: {} | ||||
|   required: [] | ||||
							
								
								
									
										377
									
								
								f/CCR_ETL/ccr_etl_mtgjson.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										377
									
								
								f/CCR_ETL/ccr_etl_mtgjson.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,377 @@ | ||||
| """ | ||||
| CCR ETL MTGJSON Processing Script | ||||
|  | ||||
| This script handles the extraction, transformation, and loading of MTGJSON data | ||||
| into a PostgreSQL database. It supports downloading, unzipping, preprocessing, | ||||
| and batch inserting of various data formats. | ||||
| """ | ||||
|  | ||||
| import json | ||||
| import os | ||||
| import yaml | ||||
| from typing import Union | ||||
| from zipfile import ZipFile | ||||
|  | ||||
| import psycopg2 | ||||
| import requests | ||||
| import wmill | ||||
| from sqlalchemy import create_engine, text | ||||
| from sqlalchemy.engine import Engine | ||||
|  | ||||
| # Configuration paths | ||||
| DB_RESOURCE_PATH = 'u/joshuakrzemien/slick_postgresql' | ||||
| DB_CONFIG_PATH = 'f/CCR_ETL/ccr_db_config' | ||||
| EXTRACT_CONFIG_PATH = 'f/CCR_ETL/ccr_extract_config' | ||||
| DOWNLOAD_CONFIG_PATH = './shared/' | ||||
|  | ||||
| # Default processing parameters | ||||
| DEFAULT_BATCH_SIZE = 1000 | ||||
|  | ||||
| def validate_response_headers(response: requests.Response, outer_file_type: str) -> None: | ||||
|     """Validate that the response content type matches the expected file type.""" | ||||
|     if response.headers['Content-Type'] != f'application/{outer_file_type}': | ||||
|         raise ValueError(f"Expected {outer_file_type} response, got {response.headers['Content-Type']}") | ||||
|  | ||||
|  | ||||
| def download_request(url: str, filename: str, outer_file_type: str) -> bytes: | ||||
|     """Download a file from the given URL and validate its content type.""" | ||||
|     print(f"🔽 Downloading {filename} from {url}") | ||||
|      | ||||
|     response = requests.get(url) | ||||
|     response.raise_for_status() | ||||
|     validate_response_headers(response, outer_file_type) | ||||
|      | ||||
|     print(f"✅ Download successful ({response.headers.get('Content-Length', 'Unknown')} bytes)") | ||||
|     return response.content | ||||
|  | ||||
| def generate_download_queue(url: str, filename: str, outer_file_type: str, iterables: dict) -> list: | ||||
|     """ | ||||
|     Generate a queue of download items based on URL templates and iterable values. | ||||
|      | ||||
|     Example: | ||||
|         url = "https://tcgcsv.com/tcgplayer/{game_id}/groups" | ||||
|         iterables = {'game_id': [1,3,65,71,86]} | ||||
|     """ | ||||
|     queue = [] | ||||
|     for key, value in iterables.items(): | ||||
|         for item in value: | ||||
|             queue_item = { | ||||
|                 'url': url.format(key=key, value=item), | ||||
|                 'filename': filename.format(key=key, value=item), | ||||
|                 'outer_file_type': outer_file_type, | ||||
|             } | ||||
|             queue.append(queue_item) | ||||
|     return queue | ||||
|  | ||||
|  | ||||
| def save_file(content: bytes, filename: str) -> None: | ||||
|     """Save binary content to a file in the download directory.""" | ||||
|     filepath = DOWNLOAD_CONFIG_PATH + filename | ||||
|     with open(filepath, 'wb') as f: | ||||
|         f.write(content) | ||||
|     print(f"💾 Saved {len(content)} bytes to {filename}") | ||||
|  | ||||
|  | ||||
| def unzip_file(filename: str) -> str: | ||||
|     """Extract a zip file and return the name of the extracted content.""" | ||||
|     new_filename = filename.replace('.zip', '') | ||||
|     zip_path = DOWNLOAD_CONFIG_PATH + filename | ||||
|      | ||||
|     with ZipFile(zip_path, 'r') as zip_ref: | ||||
|         file_list = zip_ref.namelist() | ||||
|         print(f"📦 Extracting {len(file_list)} files from {filename}") | ||||
|         zip_ref.extractall(DOWNLOAD_CONFIG_PATH) | ||||
|      | ||||
|     return new_filename | ||||
|  | ||||
| def load_file(filename: str, file_type: str) -> Union[dict, list]: | ||||
|     """Load and parse a file from the download directory.""" | ||||
|     filepath = DOWNLOAD_CONFIG_PATH + filename | ||||
|      | ||||
|     if file_type == 'json': | ||||
|         with open(filepath, 'r') as f: | ||||
|             data = json.load(f) | ||||
|             print(f"📖 Loaded {file_type} file: {filename}") | ||||
|             return data | ||||
|     else: | ||||
|         raise ValueError(f"Unsupported file type: {file_type}") | ||||
|  | ||||
| def build_record_from_config(source_data: dict, expected_columns: list, additional_data: dict = None) -> dict: | ||||
|     """ | ||||
|     Build a record using the structure defined in the extract config. | ||||
|      | ||||
|     Args: | ||||
|         source_data: The source data dictionary | ||||
|         expected_columns: List of column definitions from config | ||||
|         additional_data: Optional additional data to merge (e.g., parent UUID) | ||||
|      | ||||
|     Returns: | ||||
|         Dictionary representing a single database record | ||||
|     """ | ||||
|     if additional_data is None: | ||||
|         additional_data = {} | ||||
|      | ||||
|     # Merge source data with additional data (like uuid from parent structure) | ||||
|     combined_data = {**source_data, **additional_data} | ||||
|      | ||||
|     record = {} | ||||
|     for column in expected_columns: | ||||
|         col_name = column['name'] | ||||
|         # Skip auto-increment columns (like 'id') | ||||
|         if column.get('auto_increment', False): | ||||
|             continue | ||||
|         # Get value from combined data, use empty string as default | ||||
|         record[col_name] = combined_data.get(col_name, '') | ||||
|      | ||||
|     return record | ||||
|  | ||||
| def create_db_engine(db: dict) -> Engine: | ||||
|     """Create and test a database engine connection.""" | ||||
|     db_url = f"postgresql+psycopg2://postgres:{db['password']}@{db['host']}:{db['port']}/{db['dbname']}" | ||||
|     engine = create_engine(db_url) | ||||
|      | ||||
|     # Test connection | ||||
|     conn = engine.connect() | ||||
|     conn.close() | ||||
|     print(f"🔌 Connected to database: {db['host']}:{db['port']}/{db['dbname']}") | ||||
|      | ||||
|     return engine | ||||
|  | ||||
|  | ||||
| def get_db_engine() -> Engine: | ||||
|     """Get a database engine using the configured resource.""" | ||||
|     db = wmill.client.get_resource(DB_RESOURCE_PATH) | ||||
|     return create_db_engine(db) | ||||
|  | ||||
| def generic_preprocess( | ||||
|     data: Union[dict, list], | ||||
|     expected_columns: list, | ||||
|     config: dict | ||||
| ) -> list: | ||||
|     """ | ||||
|     Generic data preprocessing function that handles various data structures. | ||||
|      | ||||
|     Args: | ||||
|         data: Source data (dict or list) | ||||
|         expected_columns: List of column definitions | ||||
|         config: Preprocessing configuration | ||||
|      | ||||
|     Returns: | ||||
|         List of processed records | ||||
|     """ | ||||
|     # Step 1: Follow data path | ||||
|     data_path = config.get("data_path", []) | ||||
|     for key in data_path: | ||||
|         if not isinstance(data, dict): | ||||
|             raise ValueError(f"Expected dict while navigating path, got {type(data)} at key '{key}'") | ||||
|         data = data.get(key) | ||||
|         if data is None: | ||||
|             raise ValueError(f"Missing key '{key}' in data path: {data_path}") | ||||
|  | ||||
|     # Step 2: Handle nested structure | ||||
|     nested = config.get("nested", False) | ||||
|     nested_key = config.get("nested_key", None) | ||||
|     id_key = config.get("id_key", None) | ||||
|     flatten = config.get("flatten", False) | ||||
|      | ||||
|     records = [] | ||||
|      | ||||
|     if isinstance(data, dict): | ||||
|         items = data.items() | ||||
|     elif isinstance(data, list): | ||||
|         items = enumerate(data) | ||||
|     else: | ||||
|         raise ValueError(f"Unsupported data structure: {type(data)}") | ||||
|      | ||||
|     for outer_key, outer_value in items: | ||||
|         if nested: | ||||
|             if not isinstance(outer_value, list): | ||||
|                 continue | ||||
|             for inner_value in outer_value: | ||||
|                 if id_key and not inner_value.get(id_key): | ||||
|                     continue | ||||
|                 additional_data = {nested_key: outer_key} if nested_key else {} | ||||
|                 record = build_record_from_config(inner_value, expected_columns, additional_data) | ||||
|                 records.append(record) | ||||
|         else: | ||||
|             if not isinstance(outer_value, dict): | ||||
|                 continue | ||||
|             if id_key and not outer_value.get(id_key): | ||||
|                 continue | ||||
|             if flatten: | ||||
|                 nested_data = outer_value.get("identifiers", {}) | ||||
|                 combined = {**nested_data, "uuid": outer_value.get("uuid")} | ||||
|                 record = build_record_from_config(combined, expected_columns) | ||||
|             else: | ||||
|                 record = build_record_from_config(outer_value, expected_columns) | ||||
|             records.append(record) | ||||
|      | ||||
|     print(f"🔄 Processed {len(records)} records") | ||||
|     return records | ||||
|  | ||||
| def control_batch(data: list, batch_size: int = DEFAULT_BATCH_SIZE): | ||||
|     """Split data into batches for processing.""" | ||||
|     for i in range(0, len(data), batch_size): | ||||
|         yield data[i:i+batch_size] | ||||
|  | ||||
|  | ||||
| def insert_data_into_table_batch(records: list, table: str, engine: Engine, batch_size: int = DEFAULT_BATCH_SIZE) -> None: | ||||
|     """Insert records into database table in batches.""" | ||||
|     if not records: | ||||
|         print("⚠️ No records to insert, skipping database operation") | ||||
|         return | ||||
|      | ||||
|     print(f"💾 Inserting {len(records)} records into {table} (batch size: {batch_size})") | ||||
|      | ||||
|     # Get column names from first record | ||||
|     columns = list(records[0].keys()) | ||||
|     column_names = ', '.join(f'"{col}"' for col in columns) | ||||
|     placeholders = ', '.join([f':{col}' for col in columns]) | ||||
|      | ||||
|     insert_sql = f"INSERT INTO {table} ({column_names}) VALUES ({placeholders})" | ||||
|      | ||||
|     with engine.connect() as conn: | ||||
|         batch_count = 0 | ||||
|         total_inserted = 0 | ||||
|          | ||||
|         for batch in control_batch(records, batch_size): | ||||
|             batch_count += 1 | ||||
|             batch_size_actual = len(batch) | ||||
|              | ||||
|             conn.execute(text(insert_sql), batch) | ||||
|             total_inserted += batch_size_actual | ||||
|              | ||||
|             if batch_count % 10 == 0: | ||||
|                 print(f"⏳ Inserted {total_inserted}/{len(records)} records...") | ||||
|          | ||||
|         conn.commit() | ||||
|         print(f"✅ Inserted {total_inserted} records in {batch_count} batches") | ||||
|  | ||||
|  | ||||
| def process_job(job: dict) -> dict: | ||||
|     """ | ||||
|     Process a single ETL job. | ||||
|      | ||||
|     Args: | ||||
|         job: Job configuration dictionary | ||||
|      | ||||
|     Returns: | ||||
|         Dictionary with job processing results | ||||
|     """ | ||||
|     # Extract job parameters | ||||
|     url = job.get('url') | ||||
|     filename = job.get('filename') | ||||
|     outer_file_type = job.get('outer_file_type') | ||||
|     inner_file_type = job.get('inner_file_type') | ||||
|     table = job.get('table') | ||||
|     expected_columns = job.get('expected_columns') | ||||
|     batch_size = job.get('batch_size', DEFAULT_BATCH_SIZE) | ||||
|     preprocess_function_name = job.get('preprocess_function', 'generic_preprocess') | ||||
|     preprocess_config = job.get('preprocess_config') | ||||
|     active = job.get('active') | ||||
|     iterables = job.get('iterables') | ||||
|      | ||||
|     print(f"\n🚀 Processing job for table '{table}'") | ||||
|      | ||||
|     if not active: | ||||
|         print(f"⚠️ Job is not active, skipping") | ||||
|         return {"status": "skipped"} | ||||
|      | ||||
|     # Get preprocessing function | ||||
|     if isinstance(preprocess_function_name, str): | ||||
|         preprocess_function = globals().get(preprocess_function_name) | ||||
|     if not callable(preprocess_function): | ||||
|         raise ValueError(f"Preprocessing function '{preprocess_function_name}' not found or not callable.") | ||||
|      | ||||
|     # Get database engine | ||||
|     engine = get_db_engine() | ||||
|      | ||||
|     # Populate download queue | ||||
|     if iterables: | ||||
|         queue = generate_download_queue(url, filename, outer_file_type, iterables) | ||||
|     else: | ||||
|         queue = [{ | ||||
|             'url': url, | ||||
|             'filename': filename, | ||||
|             'outer_file_type': outer_file_type, | ||||
|             'inner_file_type': inner_file_type, | ||||
|             'table': table, | ||||
|             'expected_columns': expected_columns | ||||
|         }] | ||||
|      | ||||
|     # Process download queue | ||||
|     for queue_item in queue: | ||||
|         content = download_request(queue_item.get('url'), queue_item.get('filename'), queue_item.get('outer_file_type')) | ||||
|         save_file(content, queue_item.get('filename')) | ||||
|      | ||||
|     # Handle file extraction if needed | ||||
|     saved_filename = filename | ||||
|     if outer_file_type == 'zip': | ||||
|         saved_filename = unzip_file(filename) | ||||
|      | ||||
|     # Load and preprocess data | ||||
|     data = load_file(saved_filename, inner_file_type) | ||||
|     records = preprocess_function(data, expected_columns, preprocess_config) | ||||
|      | ||||
|     # Insert data into database | ||||
|     insert_data_into_table_batch(records, table, engine, batch_size) | ||||
|      | ||||
|     result = { | ||||
|         "status": "success", | ||||
|         "table": table, | ||||
|         "records_processed": len(records), | ||||
|         "filename": saved_filename | ||||
|     } | ||||
|      | ||||
|     print(f"✅ Job complete: {len(records)} records processed for {table}") | ||||
|     return result | ||||
|  | ||||
|  | ||||
|  | ||||
| def main() -> dict: | ||||
|     """ | ||||
|     Main ETL processing function. | ||||
|      | ||||
|     Returns: | ||||
|         Dictionary with overall processing results | ||||
|     """ | ||||
|     print("🎯 ETL Process Starting") | ||||
|     print("=" * 50) | ||||
|      | ||||
|     # Load configuration | ||||
|     config_yaml = wmill.get_variable(EXTRACT_CONFIG_PATH) | ||||
|     config = yaml.safe_load(config_yaml) | ||||
|     print(f"📋 Processing {len(config['jobs'])} jobs") | ||||
|      | ||||
|     results = [] | ||||
|     successful_jobs = 0 | ||||
|     failed_jobs = 0 | ||||
|      | ||||
|     for i, job in enumerate(config['jobs'], 1): | ||||
|         print(f"\n--- Job {i}/{len(config['jobs'])} ---") | ||||
|          | ||||
|         try: | ||||
|             result = process_job(job) | ||||
|             results.append(result) | ||||
|             successful_jobs += 1 | ||||
|         except Exception as e: | ||||
|             error_result = { | ||||
|                 "status": "error", | ||||
|                 "table": job.get('table', 'unknown'), | ||||
|                 "error": str(e), | ||||
|                 "filename": job.get('filename', 'unknown') | ||||
|             } | ||||
|             results.append(error_result) | ||||
|             failed_jobs += 1 | ||||
|             print(f"❌ Job {i} failed: {str(e)}") | ||||
|      | ||||
|     print(f"\n🏁 ETL Process Complete") | ||||
|     print(f"✅ Successful: {successful_jobs} | ❌ Failed: {failed_jobs} | 📋 Total: {len(results)}") | ||||
|      | ||||
|     return { | ||||
|         "status": "completed", | ||||
|         "jobs_processed": len(results), | ||||
|         "successful_jobs": successful_jobs, | ||||
|         "failed_jobs": failed_jobs, | ||||
|         "results": results | ||||
|     } | ||||
							
								
								
									
										17
									
								
								f/CCR_ETL/ccr_etl_mtgjson.script.lock
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								f/CCR_ETL/ccr_etl_mtgjson.script.lock
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | ||||
| # py: 3.11 | ||||
| anyio==4.10.0 | ||||
| certifi==2025.8.3 | ||||
| charset-normalizer==3.4.3 | ||||
| greenlet==3.2.4 | ||||
| h11==0.16.0 | ||||
| httpcore==1.0.9 | ||||
| httpx==0.28.1 | ||||
| idna==3.10 | ||||
| psycopg2-binary==2.9.10 | ||||
| pyyaml==6.0.2 | ||||
| requests==2.32.5 | ||||
| sniffio==1.3.1 | ||||
| sqlalchemy==2.0.43 | ||||
| typing-extensions==4.15.0 | ||||
| urllib3==2.5.0 | ||||
| wmill==1.539.1 | ||||
							
								
								
									
										9
									
								
								f/CCR_ETL/ccr_etl_mtgjson.script.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								f/CCR_ETL/ccr_etl_mtgjson.script.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| summary: CCR ETL MTGJSON | ||||
| description: '' | ||||
| lock: '!inline f/CCR_ETL/ccr_etl_mtgjson.script.lock' | ||||
| kind: script | ||||
| schema: | ||||
|   $schema: 'https://json-schema.org/draft/2020-12/schema' | ||||
|   type: object | ||||
|   properties: {} | ||||
|   required: [] | ||||
							
								
								
									
										162
									
								
								f/CCR_ETL/ccr_extract_config.variable.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								f/CCR_ETL/ccr_extract_config.variable.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,162 @@ | ||||
| description: '' | ||||
| value: |- | ||||
|   jobs: | ||||
|     - name: mtgjson_skus | ||||
|       active: true | ||||
|       url: https://mtgjson.com/api/v5/TcgplayerSkus.json.zip | ||||
|       filename: TcgplayerSkus.json.zip | ||||
|       outer_file_type: zip | ||||
|       inner_file_type: json | ||||
|       preprocess_config: | ||||
|         data_path: ["data"] | ||||
|         nested: true | ||||
|         nested_key: "uuid" | ||||
|         id_key: "skuId" | ||||
|       table: mtgjson_skus | ||||
|       batch_size: 1000 | ||||
|       expected_columns: | ||||
|         - name: uuid | ||||
|           type: string | ||||
|         - name: condition | ||||
|           type: string | ||||
|         - name: language | ||||
|           type: string | ||||
|         - name: printing | ||||
|           type: string | ||||
|         - name: finish | ||||
|           type: string | ||||
|         - name: productId | ||||
|           type: string | ||||
|         - name: skuId | ||||
|           type: string | ||||
|       cache: | ||||
|         status: true | ||||
|         ttl: 86400 | ||||
|     - name: mtgjson_identifiers | ||||
|       active: true | ||||
|       url: https://mtgjson.com/api/v5/AllIdentifiers.json.zip | ||||
|       filename: AllIdentifiers.json.zip | ||||
|       outer_file_type: zip | ||||
|       inner_file_type: json | ||||
|       preprocess_config: | ||||
|         data_path: ["data"] | ||||
|         nested: false | ||||
|         flatten: true | ||||
|         id_key: "uuid" | ||||
|       table: mtgjson_identifiers | ||||
|       batch_size: 1000 | ||||
|       expected_columns: | ||||
|         - name: uuid | ||||
|           type: string | ||||
|         - name: name | ||||
|           type: string | ||||
|         - name: setCode | ||||
|           type: string | ||||
|         - name: abuId | ||||
|           type: string | ||||
|         - name: cardKingdomEtchedId | ||||
|           type: string | ||||
|         - name: cardKingdomFoilId | ||||
|           type: string | ||||
|         - name: cardKingdomId | ||||
|           type: string | ||||
|         - name: cardsphereId | ||||
|           type: string | ||||
|         - name: cardsphereFoilId | ||||
|           type: string | ||||
|         - name: cardtraderId | ||||
|           type: string | ||||
|         - name: csiId | ||||
|           type: string | ||||
|         - name: mcmId | ||||
|           type: string | ||||
|         - name: mcmMetaId | ||||
|           type: string | ||||
|         - name: miniaturemarketId | ||||
|           type: string | ||||
|         - name: mtgArenaId | ||||
|           type: string | ||||
|         - name: mtgjsonFoilVersionId | ||||
|           type: string | ||||
|         - name: mtgjsonNonFoilVersionId | ||||
|           type: string | ||||
|         - name: mtgjsonV4Id | ||||
|           type: string | ||||
|         - name: mtgoFoilId | ||||
|           type: string | ||||
|         - name: mtgoId | ||||
|           type: string | ||||
|         - name: multiverseId | ||||
|           type: string | ||||
|         - name: scgId | ||||
|           type: string | ||||
|         - name: scryfallId | ||||
|           type: string | ||||
|         - name: scryfallCardBackId | ||||
|           type: string | ||||
|         - name: scryfallOracleId | ||||
|           type: string | ||||
|         - name: scryfallIllustrationId | ||||
|           type: string | ||||
|         - name: tcgplayerProductId | ||||
|           type: string | ||||
|         - name: tcgplayerEtchedProductId | ||||
|           type: string | ||||
|         - name: tntId | ||||
|           type: string | ||||
|       cache: | ||||
|         status: true | ||||
|         ttl: 86400 | ||||
|     - name: tcgcsv_categories | ||||
|       active: true | ||||
|       url: https://tcgcsv.com/tcgplayer/categories | ||||
|       outer_file_type: json | ||||
|       preprocess_config: | ||||
|         data_path: ["results"] | ||||
|         nested: false | ||||
|       filename: tcgplayer_categories.json | ||||
|       expected_columns: | ||||
|         - name: categoryId | ||||
|           type: integer | ||||
|         - name: name | ||||
|           type: string | ||||
|         - name: modifiedOn | ||||
|           type: string | ||||
|         - name: displayName | ||||
|           type: string | ||||
|         - name: seoCategoryName | ||||
|           type: string | ||||
|         - name: categoryDescription | ||||
|           type: string | ||||
|         - name: categoryPageTitle | ||||
|           type: string | ||||
|         - name: sealedLabel | ||||
|           type: string | ||||
|         - name: nonSealedLabel | ||||
|           type: string | ||||
|         - name: conditionGuideUrl | ||||
|           type: string | ||||
|         - name: isScannable | ||||
|           type: boolean | ||||
|         - name: popularity | ||||
|           type: integer | ||||
|         - name: isDirect | ||||
|           type: boolean | ||||
|     - name: tcgcsv_groups | ||||
|       active: true | ||||
|       url: https://tcgcsv.com/tcgplayer/{game_id}/groups | ||||
|       outer_file_type: json | ||||
|       preprocess_config: | ||||
|         data_path: ["results"] | ||||
|         nested: false | ||||
|       filename: tcgplayer_{game_id}_groups.json | ||||
|       expected_columns: | ||||
|         - name: groupId | ||||
|           type: integer | ||||
|         - name: name | ||||
|           type: string | ||||
|         - name: modifiedOn | ||||
|           type: string | ||||
|       iterables: | ||||
|         game_id: [1,3,65,71,86] | ||||
| is_secret: false | ||||
							
								
								
									
										6
									
								
								f/CCR_ETL/folder.meta.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								f/CCR_ETL/folder.meta.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| summary: null | ||||
| display_name: CCR_ETL | ||||
| extra_perms: | ||||
|   u/joshuakrzemien: true | ||||
| owners: | ||||
|   - u/joshuakrzemien | ||||
| @@ -1,5 +1,10 @@ | ||||
| version: v2 | ||||
| locks: | ||||
|   f/CCR_ETL/ccr_etl_db_config+672b195893fa2357771ffa3cedc08fd5c8a89b2e831453c694fa2e6491f3b13b: 672b195893fa2357771ffa3cedc08fd5c8a89b2e831453c694fa2e6491f3b13b | ||||
|   f/CCR_ETL/ccr_etl_db_config+92fd8458ec5f2ae9db765da90a8b5f68086769e62767933a095ce03f154e2863: 92fd8458ec5f2ae9db765da90a8b5f68086769e62767933a095ce03f154e2863 | ||||
|   f/CCR_ETL/ccr_etl_db_init+55def58b140529028aec966f2f8ff7a98380842e3edd0cca985bad0e18dd3533: 55def58b140529028aec966f2f8ff7a98380842e3edd0cca985bad0e18dd3533 | ||||
|   f/CCR_ETL/ccr_etl_mtgjson+7b4ff1f872736a4935a108283f1cebac0a8ef84f174eeccd797652eb1e9f004b: 7b4ff1f872736a4935a108283f1cebac0a8ef84f174eeccd797652eb1e9f004b | ||||
|   f/CCR_ETL/ccr_etl_mtgjson+c18f5bd64c559e4765379b2e0bcbfd06df1b8becf3a47bfd6707017878ffc610: c18f5bd64c559e4765379b2e0bcbfd06df1b8becf3a47bfd6707017878ffc610 | ||||
|   u/joshuakrzemien/db_test+972c08a79a04b0017b517504a61b5d5069e82b199171e0569f281508306d8c46: 972c08a79a04b0017b517504a61b5d5069e82b199171e0569f281508306d8c46 | ||||
|   u/joshuakrzemien/mtg_json_download+f31539309b4d4b4ef529e47352f676f64e58e2a1b2797c2a630acab8a7c40260: f31539309b4d4b4ef529e47352f676f64e58e2a1b2797c2a630acab8a7c40260 | ||||
|   u/joshuakrzemien/mtgjson_unzip+0bdcf659018721653979c2de3bfd9ce2f70cf0e76ef6993226ee76e9d6c73dc4: 0bdcf659018721653979c2de3bfd9ce2f70cf0e76ef6993226ee76e9d6c73dc4 | ||||
|   | ||||
							
								
								
									
										24
									
								
								wmill.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								wmill.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,24 @@ | ||||
| defaultTs: bun | ||||
| includes: | ||||
|   - f/** | ||||
| excludes: [] | ||||
| codebases: [] | ||||
| skipVariables: false | ||||
| skipResources: false | ||||
| skipResourceTypes: false | ||||
| skipSecrets: true | ||||
| skipScripts: false | ||||
| skipFlows: false | ||||
| skipApps: false | ||||
| skipFolders: false | ||||
| includeSchedules: false | ||||
| includeTriggers: false | ||||
| includeUsers: false | ||||
| includeGroups: false | ||||
| includeSettings: false | ||||
| includeKey: false | ||||
| gitBranches: | ||||
|   master: | ||||
|     overrides: {} | ||||
|     baseUrl: 'http://192.168.1.41:8009/' | ||||
|     workspaceId: ccr-cards | ||||
		Reference in New Issue
	
	Block a user