first commit
This commit is contained in:
7
app_factory/__init__.py
Normal file
7
app_factory/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""App Factory - Autonomous multi-agent orchestration framework."""
|
||||
|
||||
from app_factory.core.graph import AppFactoryOrchestrator
|
||||
from app_factory.core.workspace import WorkspaceManager
|
||||
from app_factory.core.observability import ObservabilityManager
|
||||
|
||||
__all__ = ["AppFactoryOrchestrator", "WorkspaceManager", "ObservabilityManager"]
|
||||
8
app_factory/agents/__init__.py
Normal file
8
app_factory/agents/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Agent modules for the App Factory orchestration framework."""
|
||||
|
||||
from app_factory.agents.pm_agent import PMAgent
|
||||
from app_factory.agents.task_agent import TaskMasterAgent
|
||||
from app_factory.agents.dev_agent import DevAgentManager
|
||||
from app_factory.agents.qa_agent import QAAgent
|
||||
|
||||
__all__ = ["PMAgent", "TaskMasterAgent", "DevAgentManager", "QAAgent"]
|
||||
205
app_factory/agents/dev_agent.py
Normal file
205
app_factory/agents/dev_agent.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""Dev Agent Manager - Spawns Dev Agents in Docker containers via Claude Code."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pexpect
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PROMPT_TEMPLATE_PATH = Path(__file__).resolve().parent.parent / "prompts" / "dev_task_execution.txt"
|
||||
|
||||
|
||||
class DevAgentManager:
|
||||
"""Spawns Dev Agents in Docker containers, interfaces with Claude Code via pexpect."""
|
||||
|
||||
def __init__(self, docker_client=None, max_retries: int = 3, timeout: int = 1800):
|
||||
"""Initialize DevAgentManager.
|
||||
|
||||
Args:
|
||||
docker_client: Docker client instance (or None to create from env).
|
||||
max_retries: Maximum Dev-QA bounce retries per task.
|
||||
timeout: Timeout in seconds for Claude Code execution (default 30 min).
|
||||
"""
|
||||
if docker_client is not None:
|
||||
self.docker_client = docker_client
|
||||
else:
|
||||
import docker
|
||||
self.docker_client = docker.from_env()
|
||||
|
||||
self.max_retries = max_retries
|
||||
self.timeout = timeout
|
||||
self._retry_counts: dict[str, int] = {}
|
||||
|
||||
def prepare_task_prompt(self, task: dict, global_arch: str = "") -> str:
|
||||
"""Build a prompt string for the Dev Agent from the template.
|
||||
|
||||
Args:
|
||||
task: Task dict with keys task_id, title, description, details, testStrategy.
|
||||
global_arch: Optional global architecture summary.
|
||||
|
||||
Returns:
|
||||
Formatted prompt string.
|
||||
"""
|
||||
template = PROMPT_TEMPLATE_PATH.read_text()
|
||||
return template.format(
|
||||
task_id=task.get("task_id", task.get("id", "")),
|
||||
title=task.get("title", ""),
|
||||
description=task.get("description", ""),
|
||||
details=task.get("details", ""),
|
||||
test_strategy=task.get("testStrategy", ""),
|
||||
global_architecture=global_arch or "No architecture context provided.",
|
||||
)
|
||||
|
||||
async def execute_task(
|
||||
self,
|
||||
task: dict,
|
||||
container_id: str,
|
||||
worktree_path: str,
|
||||
global_arch: str = "",
|
||||
) -> dict:
|
||||
"""Execute a task inside a Docker container using Claude Code.
|
||||
|
||||
Args:
|
||||
task: Task dict.
|
||||
container_id: Docker container ID to exec into.
|
||||
worktree_path: Host path to the worktree (mounted at /workspace).
|
||||
global_arch: Optional architecture context.
|
||||
|
||||
Returns:
|
||||
Dict with status, output, files_changed, and exit_code.
|
||||
"""
|
||||
prompt = self.prepare_task_prompt(task, global_arch)
|
||||
|
||||
# Write prompt to temp file in worktree so it's visible inside the container
|
||||
prompt_file = os.path.join(worktree_path, ".task_prompt.txt")
|
||||
with open(prompt_file, "w") as f:
|
||||
f.write(prompt)
|
||||
|
||||
cmd = f"docker exec {container_id} claude --print --prompt-file /workspace/.task_prompt.txt"
|
||||
|
||||
try:
|
||||
child = pexpect.spawn(cmd, timeout=self.timeout, encoding="utf-8")
|
||||
child.expect(pexpect.EOF, timeout=self.timeout)
|
||||
output = child.before or ""
|
||||
child.close()
|
||||
exit_code = child.exitstatus if child.exitstatus is not None else -1
|
||||
except pexpect.TIMEOUT:
|
||||
try:
|
||||
child.close(force=True)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"status": "failed",
|
||||
"output": "timeout",
|
||||
"files_changed": [],
|
||||
"exit_code": -1,
|
||||
}
|
||||
finally:
|
||||
# Clean up prompt file
|
||||
try:
|
||||
os.remove(prompt_file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
parsed = self.parse_claude_output(output)
|
||||
|
||||
if exit_code == 0:
|
||||
status = "success"
|
||||
else:
|
||||
status = "failed"
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"output": output,
|
||||
"files_changed": parsed["files_changed"],
|
||||
"exit_code": exit_code,
|
||||
}
|
||||
|
||||
def parse_claude_output(self, output: str) -> dict:
|
||||
"""Parse Claude Code output to extract structured info.
|
||||
|
||||
Args:
|
||||
output: Raw stdout from Claude Code.
|
||||
|
||||
Returns:
|
||||
Dict with files_changed, test_results, and errors.
|
||||
"""
|
||||
# Extract file paths (common patterns: Created/Modified/Updated path/to/file.py)
|
||||
file_patterns = re.findall(
|
||||
r"(?:(?:Creat|Modifi|Updat|Edit|Writ)(?:ed|ing)\s+)([^\s]+\.\w+)",
|
||||
output,
|
||||
)
|
||||
# Also catch paths that look like source files mentioned standalone
|
||||
standalone_paths = re.findall(
|
||||
r"(?:^|\s)([\w./]+\.(?:py|js|ts|yaml|yml|json|txt|md|toml|cfg))\b",
|
||||
output,
|
||||
)
|
||||
all_files = list(dict.fromkeys(file_patterns + standalone_paths)) # dedupe, preserve order
|
||||
|
||||
# Extract test results
|
||||
test_results = {}
|
||||
passed_match = re.search(r"(\d+)\s+passed", output)
|
||||
failed_match = re.search(r"(\d+)\s+failed", output)
|
||||
if passed_match:
|
||||
test_results["passed"] = int(passed_match.group(1))
|
||||
if failed_match:
|
||||
test_results["failed"] = int(failed_match.group(1))
|
||||
|
||||
# Extract error messages
|
||||
errors = re.findall(r"(?:Error|Exception|FAILED)[:\s]+(.*?)(?:\n|$)", output, re.IGNORECASE)
|
||||
|
||||
return {
|
||||
"files_changed": all_files,
|
||||
"test_results": test_results,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
async def execute_with_retry(
|
||||
self,
|
||||
task: dict,
|
||||
container_id: str,
|
||||
worktree_path: str,
|
||||
global_arch: str = "",
|
||||
) -> dict:
|
||||
"""Execute a task with retry logic.
|
||||
|
||||
Retries up to max_retries times on failure. If all retries are exhausted,
|
||||
returns a result with status 'needs_clarification'.
|
||||
|
||||
Args:
|
||||
task: Task dict.
|
||||
container_id: Docker container ID.
|
||||
worktree_path: Host worktree path.
|
||||
global_arch: Optional architecture context.
|
||||
|
||||
Returns:
|
||||
Final execution result dict.
|
||||
"""
|
||||
task_id = str(task.get("task_id", task.get("id", "")))
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
self._retry_counts[task_id] = attempt + 1
|
||||
result = await self.execute_task(task, container_id, worktree_path, global_arch)
|
||||
if result["status"] == "success":
|
||||
return result
|
||||
|
||||
# All retries exhausted
|
||||
return {
|
||||
"status": "needs_clarification",
|
||||
"output": result.get("output", ""),
|
||||
"files_changed": result.get("files_changed", []),
|
||||
"exit_code": result.get("exit_code", -1),
|
||||
}
|
||||
|
||||
def get_retry_count(self, task_id: str) -> int:
|
||||
"""Return current retry count for a task."""
|
||||
return self._retry_counts.get(task_id, 0)
|
||||
|
||||
def reset_retry_count(self, task_id: str):
|
||||
"""Reset retry counter for a task (after clarification resolved)."""
|
||||
self._retry_counts.pop(task_id, None)
|
||||
136
app_factory/agents/pm_agent.py
Normal file
136
app_factory/agents/pm_agent.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Project Manager Agent - Expands user prompts into structured PRDs and handles clarification requests."""
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from app_factory.core.claude_client import ClaudeSDKClient
|
||||
|
||||
|
||||
class PMAgent:
|
||||
"""Agent responsible for PRD generation, clarification handling, and project planning."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str = None,
|
||||
auth_token: str = None,
|
||||
model: str = "claude-opus-4-6",
|
||||
debug: bool = False,
|
||||
observability=None,
|
||||
):
|
||||
self.model = model
|
||||
self.input_tokens = 0
|
||||
self.output_tokens = 0
|
||||
self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
|
||||
self.observability = observability
|
||||
|
||||
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
||||
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
||||
self.client = ClaudeSDKClient(
|
||||
api_key=resolved_key,
|
||||
auth_token=resolved_auth,
|
||||
enable_debug=debug,
|
||||
)
|
||||
|
||||
def _load_template(self, template_name: str) -> str:
|
||||
"""Load a prompt template file from app_factory/prompts/."""
|
||||
path = self._prompts_dir / template_name
|
||||
return path.read_text()
|
||||
|
||||
async def expand_prompt_to_prd(self, user_input: str) -> str:
|
||||
"""Expand a user prompt into a structured PRD using Claude.
|
||||
|
||||
Returns markdown with sections: Objective, Core Requirements,
|
||||
Technical Architecture, Tech Stack, Success Criteria, Non-Functional Requirements.
|
||||
"""
|
||||
system_prompt = self._load_template("pm_prd_expansion.txt")
|
||||
|
||||
response = await self.client.complete(
|
||||
prompt=user_input,
|
||||
model=self.model,
|
||||
system_prompt=system_prompt,
|
||||
max_turns=100,
|
||||
observability=self.observability,
|
||||
agent_name="pm_agent",
|
||||
task_id="expand_prd",
|
||||
)
|
||||
|
||||
self.input_tokens += response.input_tokens
|
||||
self.output_tokens += response.output_tokens
|
||||
if self.observability:
|
||||
self.observability.log_token_usage(
|
||||
"pm_agent",
|
||||
"expand_prd",
|
||||
input_tokens=response.input_tokens,
|
||||
output_tokens=response.output_tokens,
|
||||
model=self.model,
|
||||
)
|
||||
|
||||
return response.text
|
||||
|
||||
async def handle_clarification_request(self, clarification: dict) -> str:
|
||||
"""Handle a clarification request from a downstream agent.
|
||||
|
||||
Args:
|
||||
clarification: dict with keys requesting_agent, task_id, question, context.
|
||||
|
||||
Returns:
|
||||
Clarification response string. If the question requires human input,
|
||||
prompts the user and returns their answer.
|
||||
"""
|
||||
template = self._load_template("pm_clarification.txt")
|
||||
prompt = template.format(
|
||||
requesting_agent=clarification.get("requesting_agent", "unknown"),
|
||||
task_id=clarification.get("task_id", "N/A"),
|
||||
question=clarification.get("question", ""),
|
||||
context=clarification.get("context", ""),
|
||||
)
|
||||
|
||||
response = await self.client.complete(
|
||||
prompt=prompt,
|
||||
model=self.model,
|
||||
max_turns=100,
|
||||
observability=self.observability,
|
||||
agent_name="pm_agent",
|
||||
task_id=f"clarification:{clarification.get('task_id', 'N/A')}",
|
||||
)
|
||||
|
||||
self.input_tokens += response.input_tokens
|
||||
self.output_tokens += response.output_tokens
|
||||
if self.observability:
|
||||
self.observability.log_token_usage(
|
||||
"pm_agent",
|
||||
f"clarification:{clarification.get('task_id', 'N/A')}",
|
||||
input_tokens=response.input_tokens,
|
||||
output_tokens=response.output_tokens,
|
||||
model=self.model,
|
||||
)
|
||||
|
||||
answer = response.text.strip()
|
||||
|
||||
if "ESCALATE_TO_HUMAN" in answer:
|
||||
human_answer = input(
|
||||
f"[PMAgent] Clarification needed for {clarification.get('requesting_agent', 'agent')} "
|
||||
f"(task {clarification.get('task_id', 'N/A')}): "
|
||||
f"{clarification.get('question', '')}\n> "
|
||||
)
|
||||
return human_answer
|
||||
|
||||
return answer
|
||||
|
||||
def update_prd(self, prd_path: str, updates: str):
|
||||
"""Append updates to an existing PRD file with a versioned header."""
|
||||
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
header = f"\n\n---\n## PRD Update - {timestamp}\n\n"
|
||||
|
||||
with open(prd_path, "a") as f:
|
||||
f.write(header)
|
||||
f.write(updates)
|
||||
|
||||
def get_token_usage(self) -> dict:
|
||||
"""Return cumulative token usage."""
|
||||
return {
|
||||
"input_tokens": self.input_tokens,
|
||||
"output_tokens": self.output_tokens,
|
||||
"total_tokens": self.input_tokens + self.output_tokens,
|
||||
}
|
||||
383
app_factory/agents/qa_agent.py
Normal file
383
app_factory/agents/qa_agent.py
Normal file
@@ -0,0 +1,383 @@
|
||||
"""QA Agent - Handles code review, testing, linting, and merge operations."""
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import git
|
||||
|
||||
from app_factory.core.claude_client import ClaudeSDKClient
|
||||
|
||||
|
||||
class QAAgent:
|
||||
"""Reviews code, runs tests, handles merge conflicts, merges worktrees to main."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
repo_path: str,
|
||||
api_key: str = None,
|
||||
auth_token: str = None,
|
||||
max_retries: int = 3,
|
||||
debug: bool = False,
|
||||
observability=None,
|
||||
):
|
||||
"""Initialize QAAgent.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the git repository.
|
||||
api_key: Optional API key. Falls back to ANTHROPIC_API_KEY env var.
|
||||
max_retries: Maximum QA-Dev bounce retries per task.
|
||||
"""
|
||||
self.repo = git.Repo(repo_path)
|
||||
self.repo_path = Path(repo_path).resolve()
|
||||
self.max_retries = max_retries
|
||||
self._retry_counts: dict[str, int] = {}
|
||||
self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
|
||||
self.observability = observability
|
||||
|
||||
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
||||
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
||||
self.client = ClaudeSDKClient(
|
||||
api_key=resolved_key,
|
||||
auth_token=resolved_auth,
|
||||
enable_debug=debug,
|
||||
)
|
||||
|
||||
async def review_and_merge(self, task_id: str, worktree_path: str, task: dict = None) -> dict:
|
||||
"""Full QA pipeline: rebase, lint, test, review, merge.
|
||||
|
||||
Returns:
|
||||
dict with status and details. Status is one of:
|
||||
'merged', 'rebase_failed', 'lint_failed', 'tests_failed', 'review_failed'.
|
||||
"""
|
||||
# 1. Rebase feature branch onto main
|
||||
rebase_result = await self.rebase_onto_main(worktree_path, task_id)
|
||||
if not rebase_result["success"]:
|
||||
self._increment_retry(task_id)
|
||||
return {
|
||||
"status": "rebase_failed",
|
||||
"conflicts": rebase_result.get("conflicts", []),
|
||||
"retry_count": self.get_retry_count(task_id),
|
||||
}
|
||||
|
||||
# 2. Run linting
|
||||
lint_result = self.run_linter(worktree_path)
|
||||
if not lint_result["passed"]:
|
||||
self._increment_retry(task_id)
|
||||
return {
|
||||
"status": "lint_failed",
|
||||
"errors": lint_result["errors"],
|
||||
"warnings": lint_result["warnings"],
|
||||
"retry_count": self.get_retry_count(task_id),
|
||||
}
|
||||
|
||||
# 3. Run tests
|
||||
test_result = self.run_tests(worktree_path)
|
||||
if not test_result["passed"]:
|
||||
self._increment_retry(task_id)
|
||||
return {
|
||||
"status": "tests_failed",
|
||||
"total": test_result["total"],
|
||||
"failures": test_result["failures"],
|
||||
"errors": test_result["errors"],
|
||||
"output": test_result["output"],
|
||||
"retry_count": self.get_retry_count(task_id),
|
||||
}
|
||||
|
||||
# 4. Code review via Claude
|
||||
wt_repo = git.Repo(worktree_path)
|
||||
diff = wt_repo.git.diff("main", "--", ".")
|
||||
review_result = await self.code_review(diff, task=task)
|
||||
if not review_result["approved"]:
|
||||
self._increment_retry(task_id)
|
||||
return {
|
||||
"status": "review_failed",
|
||||
"issues": review_result["issues"],
|
||||
"summary": review_result["summary"],
|
||||
"retry_count": self.get_retry_count(task_id),
|
||||
}
|
||||
|
||||
# 5. Merge to main
|
||||
merge_result = self.merge_to_main(worktree_path, task_id)
|
||||
if not merge_result["success"]:
|
||||
return {
|
||||
"status": "merge_failed",
|
||||
"error": merge_result.get("error", "Unknown merge error"),
|
||||
}
|
||||
|
||||
return {
|
||||
"status": "merged",
|
||||
"commit_sha": merge_result["commit_sha"],
|
||||
"review_summary": review_result["summary"],
|
||||
}
|
||||
|
||||
async def rebase_onto_main(self, worktree_path: str, task_id: str) -> dict:
|
||||
"""Rebase the feature branch in the worktree onto main.
|
||||
|
||||
Returns:
|
||||
dict with success bool and conflicts list.
|
||||
"""
|
||||
wt_repo = git.Repo(worktree_path)
|
||||
try:
|
||||
wt_repo.git.fetch("origin", "main")
|
||||
except git.GitCommandError:
|
||||
pass # fetch may fail in local-only repos; continue with local main
|
||||
|
||||
try:
|
||||
wt_repo.git.rebase("main")
|
||||
return {"success": True, "conflicts": []}
|
||||
except git.GitCommandError:
|
||||
# Rebase failed — check for conflicts
|
||||
conflicts = self._get_conflict_files(wt_repo)
|
||||
if conflicts and self.auto_resolve_conflicts(worktree_path):
|
||||
return {"success": True, "conflicts": []}
|
||||
# Abort the failed rebase
|
||||
try:
|
||||
wt_repo.git.rebase("--abort")
|
||||
except git.GitCommandError:
|
||||
pass
|
||||
return {"success": False, "conflicts": conflicts}
|
||||
|
||||
def run_linter(self, worktree_path: str) -> dict:
|
||||
"""Run ruff linter on the worktree.
|
||||
|
||||
Returns:
|
||||
dict with passed bool, errors list, and warnings list.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ruff", "check", "."],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return {"passed": True, "errors": [], "warnings": ["ruff not found, skipping lint"]}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"passed": False, "errors": ["Linter timed out"], "warnings": []}
|
||||
|
||||
errors = []
|
||||
warnings = []
|
||||
for line in result.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("Found") or line.startswith("All checks"):
|
||||
continue
|
||||
# ruff output lines contain error codes like E501, W291, etc.
|
||||
if re.search(r"\b[A-Z]\d{3,4}\b", line):
|
||||
errors.append(line)
|
||||
elif line:
|
||||
warnings.append(line)
|
||||
|
||||
passed = result.returncode == 0
|
||||
return {"passed": passed, "errors": errors, "warnings": warnings}
|
||||
|
||||
def run_tests(self, worktree_path: str) -> dict:
|
||||
"""Run pytest in the worktree.
|
||||
|
||||
Returns:
|
||||
dict with passed bool, total/failures/errors counts, and raw output.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["python", "-m", "pytest", "-v", "--tb=short"],
|
||||
cwd=worktree_path,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return {"passed": False, "total": 0, "failures": 0, "errors": 1,
|
||||
"output": "pytest not found"}
|
||||
except subprocess.TimeoutExpired:
|
||||
return {"passed": False, "total": 0, "failures": 0, "errors": 1,
|
||||
"output": "Test execution timed out"}
|
||||
|
||||
output = result.stdout + result.stderr
|
||||
parsed = self.parse_test_results(output)
|
||||
parsed["output"] = output
|
||||
return parsed
|
||||
|
||||
async def code_review(self, diff: str, task: dict = None) -> dict:
|
||||
"""Review a diff using Claude for quality and security issues.
|
||||
|
||||
Returns:
|
||||
dict with approved bool, issues list, and summary string.
|
||||
"""
|
||||
template = self._load_template("qa_review.txt")
|
||||
task_context = ""
|
||||
if task:
|
||||
task_context = (
|
||||
f"Task ID: {task.get('id', 'N/A')}\n"
|
||||
f"Title: {task.get('title', 'N/A')}\n"
|
||||
f"Description: {task.get('description', 'N/A')}"
|
||||
)
|
||||
|
||||
prompt = template.format(task_context=task_context, diff=diff)
|
||||
|
||||
response = await self.client.complete(
|
||||
prompt=prompt,
|
||||
model="claude-sonnet-4-6",
|
||||
max_turns=100,
|
||||
observability=self.observability,
|
||||
agent_name="qa_agent",
|
||||
task_id=str(task.get("id", task.get("task_id", "review"))) if task else "review",
|
||||
)
|
||||
if self.observability:
|
||||
self.observability.log_token_usage(
|
||||
"qa_agent",
|
||||
str(task.get("id", task.get("task_id", "review"))) if task else "review",
|
||||
input_tokens=response.input_tokens,
|
||||
output_tokens=response.output_tokens,
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
|
||||
text = response.text
|
||||
return self._parse_review_response(text)
|
||||
|
||||
def merge_to_main(self, worktree_path: str, task_id: str) -> dict:
|
||||
"""Merge the feature branch into main with --no-ff.
|
||||
|
||||
Returns:
|
||||
dict with success bool and commit_sha.
|
||||
"""
|
||||
branch_name = f"feature/task-{task_id}"
|
||||
try:
|
||||
self.repo.git.checkout("main")
|
||||
self.repo.git.merge("--no-ff", branch_name, m=f"Merge {branch_name}")
|
||||
commit_sha = self.repo.head.commit.hexsha
|
||||
return {"success": True, "commit_sha": commit_sha}
|
||||
except git.GitCommandError as e:
|
||||
return {"success": False, "commit_sha": None, "error": str(e)}
|
||||
|
||||
def auto_resolve_conflicts(self, worktree_path: str) -> bool:
|
||||
"""Try to auto-resolve simple merge conflicts.
|
||||
|
||||
Returns True if all conflicts were resolved.
|
||||
"""
|
||||
wt_repo = git.Repo(worktree_path)
|
||||
unmerged = wt_repo.index.unmerged_blobs()
|
||||
if not unmerged:
|
||||
return True
|
||||
|
||||
for path in unmerged:
|
||||
file_path = os.path.join(worktree_path, path)
|
||||
if not os.path.exists(file_path):
|
||||
continue
|
||||
try:
|
||||
with open(file_path) as f:
|
||||
content = f.read()
|
||||
# Accept "theirs" (incoming) for simple conflicts
|
||||
if "<<<<<<< " in content and "=======" in content and ">>>>>>> " in content:
|
||||
resolved = re.sub(
|
||||
r"<<<<<<< [^\n]*\n.*?=======\n(.*?)>>>>>>> [^\n]*\n",
|
||||
r"\1",
|
||||
content,
|
||||
flags=re.DOTALL,
|
||||
)
|
||||
with open(file_path, "w") as f:
|
||||
f.write(resolved)
|
||||
wt_repo.index.add([path])
|
||||
else:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
try:
|
||||
wt_repo.git.rebase("--continue")
|
||||
return True
|
||||
except git.GitCommandError:
|
||||
return False
|
||||
|
||||
def parse_test_results(self, output: str) -> dict:
|
||||
"""Parse pytest output into structured results.
|
||||
|
||||
Returns:
|
||||
dict with passed bool, total int, failures int, errors int.
|
||||
"""
|
||||
# Match pytest summary line like "5 passed, 2 failed, 1 error"
|
||||
passed_count = 0
|
||||
failed_count = 0
|
||||
error_count = 0
|
||||
|
||||
# Look for the summary line
|
||||
summary_match = re.search(
|
||||
r"=+\s*(.*?)\s*=+\s*$",
|
||||
output,
|
||||
re.MULTILINE,
|
||||
)
|
||||
if summary_match:
|
||||
summary_line = summary_match.group(1)
|
||||
p = re.search(r"(\d+)\s+passed", summary_line)
|
||||
f = re.search(r"(\d+)\s+failed", summary_line)
|
||||
e = re.search(r"(\d+)\s+error", summary_line)
|
||||
if p:
|
||||
passed_count = int(p.group(1))
|
||||
if f:
|
||||
failed_count = int(f.group(1))
|
||||
if e:
|
||||
error_count = int(e.group(1))
|
||||
|
||||
total = passed_count + failed_count + error_count
|
||||
all_passed = failed_count == 0 and error_count == 0 and total > 0
|
||||
|
||||
return {
|
||||
"passed": all_passed,
|
||||
"total": total,
|
||||
"failures": failed_count,
|
||||
"errors": error_count,
|
||||
}
|
||||
|
||||
def get_retry_count(self, task_id: str) -> int:
|
||||
"""Return QA retry count for a task."""
|
||||
return self._retry_counts.get(task_id, 0)
|
||||
|
||||
def _increment_retry(self, task_id: str):
|
||||
"""Increment the retry counter for a task."""
|
||||
self._retry_counts[task_id] = self._retry_counts.get(task_id, 0) + 1
|
||||
|
||||
def _load_template(self, template_name: str) -> str:
|
||||
"""Load a prompt template file from app_factory/prompts/."""
|
||||
path = self._prompts_dir / template_name
|
||||
return path.read_text()
|
||||
|
||||
def _get_conflict_files(self, repo: git.Repo) -> list[str]:
|
||||
"""Get list of conflicting files from a repo."""
|
||||
try:
|
||||
status_output = repo.git.status("--porcelain")
|
||||
conflicts = []
|
||||
for line in status_output.splitlines():
|
||||
if line.startswith("UU ") or line.startswith("AA "):
|
||||
conflicts.append(line[3:].strip())
|
||||
return conflicts
|
||||
except git.GitCommandError:
|
||||
return []
|
||||
|
||||
def _parse_review_response(self, text: str) -> dict:
|
||||
"""Parse Claude's review response into structured data."""
|
||||
approved = False
|
||||
issues = []
|
||||
summary = ""
|
||||
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if line.upper().startswith("APPROVED:"):
|
||||
value = line.split(":", 1)[1].strip().lower()
|
||||
approved = value in ("true", "yes")
|
||||
elif line.startswith("- ["):
|
||||
# Parse issue lines like "- [severity: critical] description"
|
||||
issue_match = re.match(
|
||||
r"-\s*\[severity:\s*(critical|warning|info)\]\s*(.*)",
|
||||
line,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if issue_match:
|
||||
issues.append({
|
||||
"severity": issue_match.group(1).lower(),
|
||||
"description": issue_match.group(2).strip(),
|
||||
})
|
||||
elif line.upper().startswith("SUMMARY:"):
|
||||
summary = line.split(":", 1)[1].strip()
|
||||
|
||||
return {"approved": approved, "issues": issues, "summary": summary}
|
||||
180
app_factory/agents/task_agent.py
Normal file
180
app_factory/agents/task_agent.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Task Master Agent - Bridge to claude-task-master for task graph management."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskMasterAgent:
|
||||
"""Bridge to claude-task-master for task graph management and dependency resolution."""
|
||||
|
||||
def __init__(self, project_root: str, mcp_client=None):
|
||||
self.project_root = str(project_root)
|
||||
self.mcp_client = mcp_client
|
||||
self.max_retries = 3
|
||||
self.base_delay = 1.0
|
||||
|
||||
async def parse_prd(self, prd_content: str, num_tasks: int = 10) -> dict:
|
||||
"""Write PRD content to disk and invoke task-master parse-prd."""
|
||||
docs_dir = Path(self.project_root) / ".taskmaster" / "docs"
|
||||
docs_dir.mkdir(parents=True, exist_ok=True)
|
||||
prd_path = docs_dir / "prd.md"
|
||||
prd_path.write_text(prd_content)
|
||||
|
||||
result = await self._call_with_retry(
|
||||
self._run_cli,
|
||||
"parse-prd",
|
||||
str(prd_path),
|
||||
"--num-tasks",
|
||||
str(num_tasks),
|
||||
"--force",
|
||||
)
|
||||
return result
|
||||
|
||||
async def get_unblocked_tasks(self) -> list:
|
||||
"""Get all pending tasks whose dependencies are all done."""
|
||||
result = await self._call_with_retry(self._run_cli, "list", "--json")
|
||||
tasks = result.get("tasks", [])
|
||||
|
||||
done_ids = {
|
||||
str(t["id"]) for t in tasks if t.get("status") == "done"
|
||||
}
|
||||
|
||||
unblocked = []
|
||||
for task in tasks:
|
||||
if task.get("status") != "pending":
|
||||
continue
|
||||
deps = [str(d) for d in task.get("dependencies", [])]
|
||||
if all(d in done_ids for d in deps):
|
||||
unblocked.append(task)
|
||||
|
||||
return unblocked
|
||||
|
||||
async def update_task_status(
|
||||
self, task_id: str, status: str, notes: str = ""
|
||||
):
|
||||
"""Update a task's status and optionally add implementation notes."""
|
||||
await self._call_with_retry(
|
||||
self._run_cli,
|
||||
"set-status",
|
||||
f"--id={task_id}",
|
||||
f"--status={status}",
|
||||
)
|
||||
if notes:
|
||||
await self._call_with_retry(
|
||||
self._run_cli,
|
||||
"update-subtask",
|
||||
f"--id={task_id}",
|
||||
f"--prompt={notes}",
|
||||
)
|
||||
|
||||
async def get_task_details(self, task_id: str) -> dict:
|
||||
"""Get full details for a specific task."""
|
||||
result = await self._call_with_retry(
|
||||
self._run_cli, "show", str(task_id), "--json"
|
||||
)
|
||||
task = result.get("task", result)
|
||||
return {
|
||||
"id": task.get("id"),
|
||||
"title": task.get("title", ""),
|
||||
"description": task.get("description", ""),
|
||||
"details": task.get("details", ""),
|
||||
"testStrategy": task.get("testStrategy", ""),
|
||||
"dependencies": task.get("dependencies", []),
|
||||
"subtasks": task.get("subtasks", []),
|
||||
"status": task.get("status", "pending"),
|
||||
"priority": task.get("priority", ""),
|
||||
}
|
||||
|
||||
async def get_next_task(self) -> dict | None:
|
||||
"""Get the highest-priority unblocked task, or None."""
|
||||
try:
|
||||
result = await self._call_with_retry(
|
||||
self._run_cli, "next", "--json"
|
||||
)
|
||||
task = result.get("task", result)
|
||||
if task and task.get("id"):
|
||||
return task
|
||||
except RuntimeError:
|
||||
logger.debug("next_task command failed, falling back to manual selection")
|
||||
|
||||
unblocked = await self.get_unblocked_tasks()
|
||||
if not unblocked:
|
||||
return None
|
||||
|
||||
priority_order = {"high": 0, "medium": 1, "low": 2}
|
||||
unblocked.sort(
|
||||
key=lambda t: (
|
||||
priority_order.get(t.get("priority", "medium"), 1),
|
||||
t.get("id", 0),
|
||||
)
|
||||
)
|
||||
return unblocked[0]
|
||||
|
||||
async def expand_task(self, task_id: str, num_subtasks: int = 5) -> dict:
|
||||
"""Break a task into subtasks."""
|
||||
result = await self._call_with_retry(
|
||||
self._run_cli,
|
||||
"expand",
|
||||
f"--id={task_id}",
|
||||
f"--num={num_subtasks}",
|
||||
"--force",
|
||||
)
|
||||
return result
|
||||
|
||||
async def _call_with_retry(self, func, *args, **kwargs):
|
||||
"""Retry with exponential backoff."""
|
||||
last_exc = None
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if attempt < self.max_retries - 1:
|
||||
delay = self.base_delay * (2 ** attempt)
|
||||
logger.warning(
|
||||
"Attempt %d/%d failed: %s. Retrying in %.1fs",
|
||||
attempt + 1,
|
||||
self.max_retries,
|
||||
exc,
|
||||
delay,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
raise RuntimeError(
|
||||
f"All {self.max_retries} attempts failed. Last error: {last_exc}"
|
||||
) from last_exc
|
||||
|
||||
async def _run_cli(self, *args: str) -> dict:
|
||||
"""Execute a task-master CLI command and return parsed JSON output."""
|
||||
cmd = ["task-master", *args]
|
||||
logger.debug("Running CLI: %s", " ".join(cmd))
|
||||
|
||||
proc = await asyncio.get_event_loop().run_in_executor(
|
||||
None,
|
||||
lambda: subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=self.project_root,
|
||||
timeout=120,
|
||||
),
|
||||
)
|
||||
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"task-master {args[0]} failed (rc={proc.returncode}): {proc.stderr.strip()}"
|
||||
)
|
||||
|
||||
stdout = proc.stdout.strip()
|
||||
if not stdout:
|
||||
return {}
|
||||
|
||||
try:
|
||||
return json.loads(stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {"raw_output": stdout}
|
||||
8
app_factory/core/__init__.py
Normal file
8
app_factory/core/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Core modules for the App Factory orchestration framework."""
|
||||
|
||||
from app_factory.core.graph import AppFactoryOrchestrator, AppFactoryState
|
||||
from app_factory.core.workspace import WorkspaceManager
|
||||
from app_factory.core.observability import ObservabilityManager
|
||||
from app_factory.core.architecture_tracker import ArchitectureTracker
|
||||
|
||||
__all__ = ["AppFactoryOrchestrator", "AppFactoryState", "WorkspaceManager", "ObservabilityManager", "ArchitectureTracker"]
|
||||
300
app_factory/core/architecture_tracker.py
Normal file
300
app_factory/core/architecture_tracker.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""Architecture Tracker - Tracks global architecture to prevent context starvation and code duplication."""
|
||||
|
||||
import ast
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from app_factory.core.claude_client import ClaudeSDKClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArchitectureTracker:
|
||||
"""Tracks global architecture to prevent Dev Agent context starvation and code duplication."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_dir: str = "app_factory/data",
|
||||
api_key: str = None,
|
||||
auth_token: str = None,
|
||||
debug: bool = False,
|
||||
observability=None,
|
||||
):
|
||||
"""Initialize ArchitectureTracker.
|
||||
|
||||
Args:
|
||||
data_dir: Directory for storing global_architecture.json.
|
||||
api_key: Optional API key for AI-powered summarization.
|
||||
"""
|
||||
self.data_dir = Path(data_dir)
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._arch_path = self.data_dir / "global_architecture.json"
|
||||
self.observability = observability
|
||||
|
||||
self._client = None
|
||||
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
||||
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
||||
if resolved_key or resolved_auth:
|
||||
try:
|
||||
self._client = ClaudeSDKClient(
|
||||
api_key=resolved_key,
|
||||
auth_token=resolved_auth,
|
||||
enable_debug=debug,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Claude SDK unavailable (%s). AI summarization disabled.", exc)
|
||||
|
||||
self._architecture = self.load_architecture()
|
||||
|
||||
def _default_architecture(self) -> dict:
|
||||
"""Return default architecture schema."""
|
||||
return {
|
||||
"modules": [],
|
||||
"utilities": [],
|
||||
"design_patterns": [],
|
||||
"naming_conventions": {
|
||||
"variables": "snake_case",
|
||||
"classes": "PascalCase",
|
||||
"functions": "snake_case",
|
||||
"constants": "UPPER_SNAKE_CASE",
|
||||
},
|
||||
"tech_stack": {
|
||||
"language": "Python",
|
||||
"framework": "LangGraph",
|
||||
},
|
||||
"version": 1,
|
||||
"last_updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
def load_architecture(self) -> dict:
|
||||
"""Load from global_architecture.json or return default."""
|
||||
if self._arch_path.exists():
|
||||
try:
|
||||
with open(self._arch_path, "r") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
logger.warning("Failed to load architecture file (%s). Using default.", exc)
|
||||
return self._default_architecture()
|
||||
|
||||
def save_architecture(self, data: dict):
|
||||
"""Save to global_architecture.json with timestamp update."""
|
||||
data["last_updated"] = datetime.now(timezone.utc).isoformat()
|
||||
with open(self._arch_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
self._architecture = data
|
||||
|
||||
async def update_architecture(self, completed_task: dict, files_changed: list):
|
||||
"""Update architecture based on completed task and changed files.
|
||||
|
||||
Args:
|
||||
completed_task: Dict with task info (e.g. title, description).
|
||||
files_changed: List of file paths that were modified.
|
||||
"""
|
||||
new_modules = []
|
||||
new_utilities = []
|
||||
|
||||
for file_path in files_changed:
|
||||
if not os.path.exists(file_path) or not file_path.endswith(".py"):
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
source = f.read()
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if self._client:
|
||||
await self._ai_extract(source, file_path, new_modules, new_utilities)
|
||||
else:
|
||||
self._basic_extract(source, file_path, new_modules, new_utilities)
|
||||
|
||||
existing_module_names = {m["name"] for m in self._architecture["modules"]}
|
||||
for mod in new_modules:
|
||||
if mod["name"] not in existing_module_names:
|
||||
self._architecture["modules"].append(mod)
|
||||
existing_module_names.add(mod["name"])
|
||||
|
||||
existing_utility_names = {u["name"] for u in self._architecture["utilities"]}
|
||||
for util in new_utilities:
|
||||
if util["name"] not in existing_utility_names:
|
||||
self._architecture["utilities"].append(util)
|
||||
existing_utility_names.add(util["name"])
|
||||
|
||||
self.save_architecture(self._architecture)
|
||||
|
||||
async def _ai_extract(
|
||||
self, source: str, file_path: str, modules: list, utilities: list
|
||||
):
|
||||
"""Use Claude to extract architecture info from source code."""
|
||||
prompt = (
|
||||
"Analyze this Python source file and extract:\n"
|
||||
"1. Module-level classes (name, purpose)\n"
|
||||
"2. Utility functions (name, description)\n"
|
||||
"Respond ONLY with valid JSON: "
|
||||
'{"classes": [{"name": "...", "purpose": "..."}], '
|
||||
'"functions": [{"name": "...", "description": "..."}]}\n\n'
|
||||
f"File: {file_path}\n```python\n{source[:4000]}\n```"
|
||||
)
|
||||
try:
|
||||
response = await self._client.complete(
|
||||
prompt=prompt,
|
||||
model="claude-sonnet-4-6",
|
||||
max_turns=100,
|
||||
observability=self.observability,
|
||||
agent_name="architecture_tracker",
|
||||
task_id=f"ai_extract:{Path(file_path).name}",
|
||||
)
|
||||
if self.observability:
|
||||
self.observability.log_token_usage(
|
||||
"architecture_tracker",
|
||||
f"ai_extract:{Path(file_path).name}",
|
||||
input_tokens=response.input_tokens,
|
||||
output_tokens=response.output_tokens,
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
text = response.text
|
||||
# Extract JSON from response
|
||||
start = text.find("{")
|
||||
end = text.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
data = json.loads(text[start:end])
|
||||
for cls in data.get("classes", []):
|
||||
modules.append({
|
||||
"name": cls["name"],
|
||||
"purpose": cls.get("purpose", ""),
|
||||
"file_path": file_path,
|
||||
})
|
||||
for func in data.get("functions", []):
|
||||
utilities.append({
|
||||
"name": func["name"],
|
||||
"description": func.get("description", ""),
|
||||
"file_path": file_path,
|
||||
})
|
||||
except Exception as exc:
|
||||
logger.warning("AI extraction failed (%s). Falling back to basic.", exc)
|
||||
self._basic_extract(source, file_path, modules, utilities)
|
||||
|
||||
def _basic_extract(
|
||||
self, source: str, file_path: str, modules: list, utilities: list
|
||||
):
|
||||
"""Extract architecture info using AST parsing."""
|
||||
try:
|
||||
tree = ast.parse(source)
|
||||
except SyntaxError:
|
||||
return
|
||||
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
modules.append({
|
||||
"name": node.name,
|
||||
"purpose": docstring.split("\n")[0] if docstring else "",
|
||||
"file_path": file_path,
|
||||
})
|
||||
elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||
if node.name.startswith("_"):
|
||||
continue
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
utilities.append({
|
||||
"name": node.name,
|
||||
"description": docstring.split("\n")[0] if docstring else "",
|
||||
"file_path": file_path,
|
||||
})
|
||||
|
||||
def get_architecture_summary(self, max_tokens: int = 2000) -> str:
|
||||
"""Generate concise text summary from architecture data.
|
||||
|
||||
Args:
|
||||
max_tokens: Approximate max tokens for the summary (~4 chars per token).
|
||||
|
||||
Returns:
|
||||
Formatted string for injection into Dev Agent prompts.
|
||||
"""
|
||||
max_chars = max_tokens * 4
|
||||
parts = []
|
||||
|
||||
parts.append("## Project Architecture Summary")
|
||||
parts.append("")
|
||||
|
||||
# Tech stack
|
||||
tech = self._architecture.get("tech_stack", {})
|
||||
if tech:
|
||||
parts.append("### Tech Stack")
|
||||
for key, value in tech.items():
|
||||
parts.append(f"- {key}: {value}")
|
||||
parts.append("")
|
||||
|
||||
# Modules
|
||||
mods = self._architecture.get("modules", [])
|
||||
if mods:
|
||||
parts.append("### Modules")
|
||||
for m in mods:
|
||||
line = f"- **{m['name']}** ({m.get('file_path', '')}): {m.get('purpose', '')}"
|
||||
parts.append(line)
|
||||
parts.append("")
|
||||
|
||||
# Utilities
|
||||
utils = self._architecture.get("utilities", [])
|
||||
if utils:
|
||||
parts.append("### Shared Utilities")
|
||||
for u in utils:
|
||||
line = f"- **{u['name']}** ({u.get('file_path', '')}): {u.get('description', '')}"
|
||||
parts.append(line)
|
||||
parts.append("")
|
||||
|
||||
# Design patterns
|
||||
patterns = self._architecture.get("design_patterns", [])
|
||||
if patterns:
|
||||
parts.append("### Design Patterns")
|
||||
for p in patterns:
|
||||
parts.append(f"- {p.get('pattern', '')}: {p.get('usage', '')}")
|
||||
parts.append("")
|
||||
|
||||
# Naming conventions
|
||||
conventions = self._architecture.get("naming_conventions", {})
|
||||
if conventions:
|
||||
parts.append("### Naming Conventions")
|
||||
for key, value in conventions.items():
|
||||
parts.append(f"- {key}: {value}")
|
||||
parts.append("")
|
||||
|
||||
summary = "\n".join(parts)
|
||||
|
||||
if len(summary) > max_chars:
|
||||
summary = summary[:max_chars - 3] + "..."
|
||||
|
||||
return summary
|
||||
|
||||
def add_module(self, name: str, purpose: str, file_path: str):
|
||||
"""Manually add a module to the architecture.
|
||||
|
||||
Args:
|
||||
name: Module/class name.
|
||||
purpose: Brief description of what it does.
|
||||
file_path: Path to the source file.
|
||||
"""
|
||||
self._architecture["modules"].append({
|
||||
"name": name,
|
||||
"purpose": purpose,
|
||||
"file_path": file_path,
|
||||
})
|
||||
self.save_architecture(self._architecture)
|
||||
|
||||
def add_utility(self, name: str, description: str, file_path: str):
|
||||
"""Manually add a utility function to the architecture.
|
||||
|
||||
Args:
|
||||
name: Function name.
|
||||
description: Brief description of what it does.
|
||||
file_path: Path to the source file.
|
||||
"""
|
||||
self._architecture["utilities"].append({
|
||||
"name": name,
|
||||
"description": description,
|
||||
"file_path": file_path,
|
||||
})
|
||||
self.save_architecture(self._architecture)
|
||||
721
app_factory/core/claude_client.py
Normal file
721
app_factory/core/claude_client.py
Normal file
@@ -0,0 +1,721 @@
|
||||
"""Shared Claude Agent SDK client wrapper."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from importlib import import_module
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClaudeCompletion:
|
||||
"""Normalized completion result from Claude Agent SDK."""
|
||||
|
||||
text: str
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
raw_usage: dict[str, Any] | None = None
|
||||
|
||||
|
||||
def _load_sdk() -> tuple[Any, Any]:
|
||||
"""Load Claude Agent SDK symbols."""
|
||||
try:
|
||||
mod = import_module("claude_agent_sdk")
|
||||
return mod.query, mod.ClaudeAgentOptions
|
||||
except Exception as exc:
|
||||
raise ImportError(
|
||||
"Claude Agent SDK is not installed. Install 'claude-agent-sdk'."
|
||||
) from exc
|
||||
|
||||
|
||||
class ClaudeSDKClient:
|
||||
"""Small adapter over Claude Agent SDK query() streaming API."""
|
||||
|
||||
_RATE_LIMIT_RETRY_TIME_MARKS_SECONDS: tuple[float, ...] = (0.2, 1.0, 5.0)
|
||||
_SENSITIVE_KEY_TOKENS: tuple[str, ...] = (
|
||||
"api_key",
|
||||
"apikey",
|
||||
"auth",
|
||||
"token",
|
||||
"secret",
|
||||
"password",
|
||||
"authorization",
|
||||
"cookie",
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
auth_token: str | None = None,
|
||||
enable_debug: bool = False,
|
||||
):
|
||||
self._query: Any | None = None
|
||||
self._options_cls: Any | None = None
|
||||
self._env: dict[str, str] = {}
|
||||
self._enable_debug = enable_debug
|
||||
if api_key:
|
||||
self._env["ANTHROPIC_API_KEY"] = api_key
|
||||
if auth_token:
|
||||
self._env["ANTHROPIC_AUTH_TOKEN"] = auth_token
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
model: str | None = None,
|
||||
system_prompt: str | None = None,
|
||||
max_turns: int = 100,
|
||||
cwd: str | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
observability: Any | None = None,
|
||||
agent_name: str = "claude_sdk",
|
||||
task_id: str = "-",
|
||||
) -> ClaudeCompletion:
|
||||
"""Run a single-turn completion and normalize text/token usage."""
|
||||
self._ensure_sdk_loaded()
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_start",
|
||||
{
|
||||
"model": model,
|
||||
"max_turns": max_turns,
|
||||
"cwd": cwd,
|
||||
"prompt_chars": len(prompt),
|
||||
"system_prompt_chars": len(system_prompt) if system_prompt else 0,
|
||||
},
|
||||
)
|
||||
options_kwargs: dict[str, Any] = {"max_turns": max_turns}
|
||||
if model:
|
||||
options_kwargs["model"] = model
|
||||
if system_prompt:
|
||||
options_kwargs["system_prompt"] = system_prompt
|
||||
if cwd:
|
||||
options_kwargs["cwd"] = cwd
|
||||
|
||||
effective_env = dict(self._env)
|
||||
if env:
|
||||
effective_env.update(env)
|
||||
effective_env = self._ensure_claude_home_writable(effective_env, cwd=cwd)
|
||||
if effective_env:
|
||||
options_kwargs["env"] = effective_env
|
||||
total_attempts = len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS) + 1
|
||||
for attempt in range(total_attempts):
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"attempt_start",
|
||||
{"attempt": attempt + 1, "total_attempts": total_attempts},
|
||||
)
|
||||
debug_stderr = None
|
||||
if self._enable_debug:
|
||||
debug_stderr = tempfile.TemporaryFile(mode="w+t", encoding="utf-8")
|
||||
attempt_options_kwargs = dict(options_kwargs)
|
||||
if debug_stderr is not None:
|
||||
attempt_options_kwargs["debug_stderr"] = debug_stderr
|
||||
attempt_options_kwargs["extra_args"] = {"debug-to-stderr": None}
|
||||
options = self._options_cls(**attempt_options_kwargs)
|
||||
assistant_parts: list[str] = []
|
||||
result_text: str | None = None
|
||||
usage: dict[str, Any] | None = None
|
||||
error_text: str | None = None
|
||||
result_subtype: str | None = None
|
||||
session_id: str | None = None
|
||||
stderr_detail = ""
|
||||
|
||||
try:
|
||||
async for msg in self._query(prompt=prompt, options=options):
|
||||
session_id = self._record_stream_message(
|
||||
msg=msg,
|
||||
observability=observability,
|
||||
agent_name=agent_name,
|
||||
task_id=task_id,
|
||||
current_session_id=session_id,
|
||||
)
|
||||
content = getattr(msg, "content", None)
|
||||
# Only assistant messages contain model output content.
|
||||
if content and hasattr(msg, "model"):
|
||||
for block in content:
|
||||
text = getattr(block, "text", None)
|
||||
if text:
|
||||
assistant_parts.append(text)
|
||||
|
||||
msg_result = getattr(msg, "result", None)
|
||||
if isinstance(msg_result, str) and msg_result.strip():
|
||||
result_text = msg_result
|
||||
|
||||
msg_subtype = getattr(msg, "subtype", None)
|
||||
if isinstance(msg_subtype, str):
|
||||
result_subtype = msg_subtype
|
||||
|
||||
msg_usage = getattr(msg, "usage", None)
|
||||
if isinstance(msg_usage, dict):
|
||||
usage = msg_usage
|
||||
|
||||
if getattr(msg, "is_error", False):
|
||||
error_text = msg_result if isinstance(msg_result, str) else "Claude SDK error"
|
||||
stderr_detail = self._combine_stderr_details(self._read_debug_stderr(debug_stderr))
|
||||
except Exception as exc:
|
||||
stderr_detail = self._combine_stderr_details(
|
||||
self._read_debug_stderr(debug_stderr),
|
||||
self._extract_exception_stderr(exc),
|
||||
)
|
||||
error_message = self._format_error(
|
||||
f"Claude SDK query failed: {exc}",
|
||||
stderr_detail,
|
||||
add_hint=True,
|
||||
)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": self._truncate_text(str(exc)),
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message) from exc
|
||||
finally:
|
||||
if debug_stderr is not None:
|
||||
debug_stderr.close()
|
||||
|
||||
if error_text:
|
||||
error_message = self._format_error(error_text, stderr_detail, add_hint=True)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": self._truncate_text(error_text),
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
if result_subtype and "error" in result_subtype.lower():
|
||||
error_message = self._format_error(
|
||||
f"Claude SDK execution ended with subtype '{result_subtype}'.",
|
||||
stderr_detail,
|
||||
add_hint=True,
|
||||
)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": f"result subtype={result_subtype}",
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
text = (result_text or "\n".join(assistant_parts)).strip()
|
||||
if not text:
|
||||
error_message = self._format_error(
|
||||
"Claude SDK returned empty response",
|
||||
stderr_detail,
|
||||
add_hint=True,
|
||||
)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": "empty response",
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
input_tokens, output_tokens = self._extract_token_counts(usage)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_complete",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"session_id": session_id,
|
||||
"result_subtype": result_subtype,
|
||||
"result_preview": self._truncate_text(text, max_chars=180),
|
||||
"input_tokens": input_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
"usage": self._sanitize_payload(usage),
|
||||
},
|
||||
)
|
||||
return ClaudeCompletion(
|
||||
text=text,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
raw_usage=usage,
|
||||
)
|
||||
|
||||
raise RuntimeError("Claude SDK retry loop exhausted unexpectedly")
|
||||
|
||||
def _ensure_sdk_loaded(self) -> None:
|
||||
if self._query is not None and self._options_cls is not None:
|
||||
return
|
||||
self._query, self._options_cls = _load_sdk()
|
||||
|
||||
def _extract_token_counts(self, usage: dict[str, Any] | None) -> tuple[int, int]:
|
||||
"""Best-effort token extraction across SDK usage payload variants."""
|
||||
if not isinstance(usage, dict):
|
||||
return 0, 0
|
||||
|
||||
input_tokens = self._to_int(
|
||||
usage.get("input_tokens") or usage.get("inputTokens")
|
||||
)
|
||||
output_tokens = self._to_int(
|
||||
usage.get("output_tokens") or usage.get("outputTokens")
|
||||
)
|
||||
|
||||
if input_tokens == 0:
|
||||
input_tokens = sum(
|
||||
self._to_int(v)
|
||||
for k, v in usage.items()
|
||||
if "input" in k.lower() and "output" not in k.lower()
|
||||
)
|
||||
|
||||
if output_tokens == 0:
|
||||
output_tokens = sum(
|
||||
self._to_int(v) for k, v in usage.items() if "output" in k.lower()
|
||||
)
|
||||
|
||||
return input_tokens, output_tokens
|
||||
|
||||
def _ensure_claude_home_writable(
|
||||
self,
|
||||
env: dict[str, str],
|
||||
*,
|
||||
cwd: str | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""Fallback to a project-local HOME when ~/.claude paths are not writable."""
|
||||
effective = dict(env)
|
||||
current_home = Path(effective.get("HOME") or str(Path.home())).expanduser()
|
||||
|
||||
if self._claude_home_is_writable(current_home):
|
||||
return effective
|
||||
|
||||
fallback_root = Path(cwd or os.getcwd()) / ".app_factory" / "claude_home"
|
||||
fallback_home = self._prepare_fallback_claude_home(
|
||||
source_home=current_home,
|
||||
fallback_home=fallback_root,
|
||||
)
|
||||
effective["HOME"] = str(fallback_home)
|
||||
logger.warning(
|
||||
"Claude home '%s' is not writable; using fallback HOME at '%s'.",
|
||||
current_home,
|
||||
fallback_home,
|
||||
)
|
||||
return effective
|
||||
|
||||
@staticmethod
|
||||
def _claude_home_is_writable(home: Path) -> bool:
|
||||
claude_dir = home / ".claude"
|
||||
required_dirs = [claude_dir, claude_dir / "todos", claude_dir / "debug"]
|
||||
config_file = home / ".claude.json"
|
||||
|
||||
try:
|
||||
for directory in required_dirs:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
probe = directory / ".app_factory_write_probe"
|
||||
probe.write_text("ok", encoding="utf-8")
|
||||
probe.unlink()
|
||||
|
||||
config_file.touch(exist_ok=True)
|
||||
with open(config_file, "a", encoding="utf-8"):
|
||||
pass
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _prepare_fallback_claude_home(source_home: Path, fallback_home: Path) -> Path:
|
||||
fallback_home.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
fallback_claude_dir = fallback_home / ".claude"
|
||||
(fallback_claude_dir / "todos").mkdir(parents=True, exist_ok=True)
|
||||
(fallback_claude_dir / "debug").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
source_claude_dir = source_home / ".claude"
|
||||
source_config = source_home / ".claude.json"
|
||||
target_config = fallback_home / ".claude.json"
|
||||
|
||||
if source_config.exists() and source_config.is_file() and os.access(source_config, os.R_OK):
|
||||
try:
|
||||
shutil.copy2(source_config, target_config)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
target_config.touch(exist_ok=True)
|
||||
|
||||
if source_claude_dir.exists() and source_claude_dir.is_dir() and os.access(
|
||||
source_claude_dir, os.R_OK
|
||||
):
|
||||
try:
|
||||
shutil.copytree(source_claude_dir, fallback_claude_dir, dirs_exist_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return fallback_home
|
||||
|
||||
@staticmethod
|
||||
def _to_int(value: Any) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _read_debug_stderr(debug_stderr: Any) -> str:
|
||||
if debug_stderr is None:
|
||||
return ""
|
||||
try:
|
||||
debug_stderr.flush()
|
||||
debug_stderr.seek(0)
|
||||
value = debug_stderr.read()
|
||||
if isinstance(value, str):
|
||||
return value.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_exception_stderr(exc: Exception) -> str:
|
||||
stderr = getattr(exc, "stderr", None)
|
||||
return stderr.strip() if isinstance(stderr, str) else ""
|
||||
|
||||
@staticmethod
|
||||
def _combine_stderr_details(*details: str) -> str:
|
||||
merged: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for detail in details:
|
||||
value = detail.strip() if isinstance(detail, str) else ""
|
||||
if not value or value in seen:
|
||||
continue
|
||||
seen.add(value)
|
||||
merged.append(value)
|
||||
|
||||
if not merged:
|
||||
return ""
|
||||
|
||||
placeholder = "Check stderr output for details"
|
||||
non_placeholder = [detail for detail in merged if placeholder not in detail]
|
||||
preferred = non_placeholder if non_placeholder else merged
|
||||
return "\n\n".join(preferred)
|
||||
|
||||
async def _should_retry_rate_limit_error(self, error_message: str, attempt: int) -> bool:
|
||||
if attempt >= len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS):
|
||||
return False
|
||||
|
||||
text = error_message.lower()
|
||||
retryable_tokens = (
|
||||
"rate_limit_event",
|
||||
"rate limit",
|
||||
"rate-limited",
|
||||
"too many requests",
|
||||
"status code: 429",
|
||||
"status code 429",
|
||||
)
|
||||
if not any(token in text for token in retryable_tokens):
|
||||
return False
|
||||
|
||||
time_marks = self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS
|
||||
target_mark = time_marks[attempt]
|
||||
previous_mark = time_marks[attempt - 1] if attempt > 0 else 0.0
|
||||
delay = max(target_mark - previous_mark, 0.0)
|
||||
logger.warning(
|
||||
"Claude SDK rate limit/transient event detected (attempt %d/%d). "
|
||||
"Retrying in %.1fs (target %.1fs from first failure).",
|
||||
attempt + 1,
|
||||
len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS) + 1,
|
||||
delay,
|
||||
target_mark,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _format_error(message: str, stderr_detail: str, add_hint: bool = False) -> str:
|
||||
hint = ""
|
||||
if add_hint:
|
||||
hint = (
|
||||
"\nHint: verify Claude auth is available (ANTHROPIC_API_KEY or "
|
||||
"ANTHROPIC_AUTH_TOKEN, or a valid Claude Code OAuth session) and that the "
|
||||
"process can write ~/.claude and ~/.claude.json."
|
||||
)
|
||||
if stderr_detail:
|
||||
return f"{message}\nSDK stderr:\n{stderr_detail}{hint}"
|
||||
return f"{message}{hint}"
|
||||
|
||||
def _record_stream_message(
|
||||
self,
|
||||
*,
|
||||
msg: Any,
|
||||
observability: Any | None,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
current_session_id: str | None,
|
||||
) -> str | None:
|
||||
session_id = getattr(msg, "session_id", None) or current_session_id
|
||||
parent_tool_use_id = getattr(msg, "parent_tool_use_id", None)
|
||||
|
||||
stream_event = getattr(msg, "event", None)
|
||||
if stream_event is not None and hasattr(msg, "uuid"):
|
||||
stream_event_type = None
|
||||
if isinstance(stream_event, dict):
|
||||
stream_event_type = stream_event.get("type") or stream_event.get("event")
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"stream_event",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"stream_event_type": stream_event_type,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
},
|
||||
)
|
||||
return session_id
|
||||
|
||||
content = getattr(msg, "content", None)
|
||||
if content:
|
||||
is_assistant_message = hasattr(msg, "model")
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"assistant_message" if is_assistant_message else "user_message",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
"model": getattr(msg, "model", None),
|
||||
"content_block_count": len(content) if isinstance(content, list) else 1,
|
||||
},
|
||||
)
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
self._record_content_block(
|
||||
block=block,
|
||||
observability=observability,
|
||||
agent_name=agent_name,
|
||||
task_id=task_id,
|
||||
session_id=session_id,
|
||||
parent_tool_use_id=parent_tool_use_id,
|
||||
)
|
||||
|
||||
subtype = getattr(msg, "subtype", None)
|
||||
if isinstance(subtype, str):
|
||||
if hasattr(msg, "duration_ms"):
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"result_message",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"subtype": subtype,
|
||||
"is_error": bool(getattr(msg, "is_error", False)),
|
||||
"num_turns": getattr(msg, "num_turns", None),
|
||||
"duration_ms": getattr(msg, "duration_ms", None),
|
||||
"duration_api_ms": getattr(msg, "duration_api_ms", None),
|
||||
"total_cost_usd": getattr(msg, "total_cost_usd", None),
|
||||
"usage": self._sanitize_payload(getattr(msg, "usage", None)),
|
||||
},
|
||||
)
|
||||
elif hasattr(msg, "data"):
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"system_message",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"subtype": subtype,
|
||||
"data": self._sanitize_payload(getattr(msg, "data", None)),
|
||||
},
|
||||
)
|
||||
|
||||
return session_id
|
||||
|
||||
def _record_content_block(
|
||||
self,
|
||||
*,
|
||||
block: Any,
|
||||
observability: Any | None,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
session_id: str | None,
|
||||
parent_tool_use_id: str | None,
|
||||
) -> None:
|
||||
block_name = getattr(block, "name", None)
|
||||
block_input = getattr(block, "input", None)
|
||||
block_id = getattr(block, "id", None)
|
||||
if block_name is not None and block_input is not None and block_id is not None:
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"tool_use",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"tool_use_id": block_id,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
"tool_name": str(block_name),
|
||||
"tool_input": self._sanitize_payload(block_input),
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
tool_use_id = getattr(block, "tool_use_id", None)
|
||||
if tool_use_id is not None:
|
||||
content = getattr(block, "content", None)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"tool_result",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"tool_use_id": tool_use_id,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
"is_error": bool(getattr(block, "is_error", False)),
|
||||
"content": self._sanitize_payload(content),
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
text = getattr(block, "text", None)
|
||||
if isinstance(text, str) and text:
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"text_block",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"chars": len(text),
|
||||
"preview": self._truncate_text(text),
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
thinking = getattr(block, "thinking", None)
|
||||
if isinstance(thinking, str) and thinking:
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"thinking_block",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"chars": len(thinking),
|
||||
},
|
||||
)
|
||||
|
||||
def _emit_observability_event(
|
||||
self,
|
||||
observability: Any | None,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
event_type: str,
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
if observability is None:
|
||||
return
|
||||
log_method = getattr(observability, "log_claude_event", None)
|
||||
if not callable(log_method):
|
||||
return
|
||||
try:
|
||||
log_method(
|
||||
agent_name=agent_name,
|
||||
task_id=task_id,
|
||||
event_type=event_type,
|
||||
payload=self._sanitize_payload(payload),
|
||||
)
|
||||
except Exception:
|
||||
# Observability should never break execution.
|
||||
logger.debug("Failed to emit observability event", exc_info=True)
|
||||
|
||||
@classmethod
|
||||
def _is_sensitive_key(cls, key: Any) -> bool:
|
||||
if not isinstance(key, str):
|
||||
return False
|
||||
lowered = key.lower()
|
||||
return any(token in lowered for token in cls._SENSITIVE_KEY_TOKENS)
|
||||
|
||||
@classmethod
|
||||
def _sanitize_payload(cls, value: Any, *, _depth: int = 0) -> Any:
|
||||
if _depth >= 4:
|
||||
return "[truncated]"
|
||||
|
||||
if isinstance(value, dict):
|
||||
sanitized: dict[str, Any] = {}
|
||||
for idx, (k, v) in enumerate(value.items()):
|
||||
if idx >= 40:
|
||||
sanitized["__truncated_items__"] = len(value) - 40
|
||||
break
|
||||
key = str(k)
|
||||
if cls._is_sensitive_key(key):
|
||||
sanitized[key] = "[REDACTED]"
|
||||
else:
|
||||
sanitized[key] = cls._sanitize_payload(v, _depth=_depth + 1)
|
||||
return sanitized
|
||||
|
||||
if isinstance(value, (list, tuple)):
|
||||
items = [cls._sanitize_payload(v, _depth=_depth + 1) for v in value[:40]]
|
||||
if len(value) > 40:
|
||||
items.append(f"...({len(value) - 40} more)")
|
||||
return items
|
||||
|
||||
if isinstance(value, str):
|
||||
return cls._truncate_text(value)
|
||||
|
||||
if isinstance(value, (int, float, bool)) or value is None:
|
||||
return value
|
||||
|
||||
return cls._truncate_text(str(value))
|
||||
|
||||
@staticmethod
|
||||
def _truncate_text(value: str, max_chars: int = 400) -> str:
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
trimmed = value.strip()
|
||||
if len(trimmed) <= max_chars:
|
||||
return trimmed
|
||||
return f"{trimmed[:max_chars]}...({len(trimmed) - max_chars} more chars)"
|
||||
444
app_factory/core/graph.py
Normal file
444
app_factory/core/graph.py
Normal file
@@ -0,0 +1,444 @@
|
||||
"""Graph Orchestrator - LangGraph-based multi-agent workflow orchestration."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import TypedDict
|
||||
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppFactoryState(TypedDict):
|
||||
"""Global state passed through the orchestration graph."""
|
||||
|
||||
user_input: str
|
||||
prd: str
|
||||
tasks: list # All tasks from task-master
|
||||
active_tasks: dict # task_id -> {status, container_id, worktree_path}
|
||||
completed_tasks: list # List of completed task_ids
|
||||
blocked_tasks: dict # task_id -> reason
|
||||
clarification_requests: list # Pending clarification dicts
|
||||
global_architecture: str # Architecture summary for dev agents
|
||||
iteration_count: int # Safety counter to prevent infinite loops
|
||||
max_iterations: int # Max loop iterations (default 50)
|
||||
errors: list # Error log
|
||||
|
||||
|
||||
class AppFactoryOrchestrator:
|
||||
"""Main LangGraph state machine for the App Factory."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pm_agent=None,
|
||||
task_agent=None,
|
||||
dev_manager=None,
|
||||
qa_agent=None,
|
||||
workspace_manager=None,
|
||||
observability=None,
|
||||
):
|
||||
self.pm_agent = pm_agent
|
||||
self.task_agent = task_agent
|
||||
self.dev_manager = dev_manager
|
||||
self.qa_agent = qa_agent
|
||||
self.workspace_manager = workspace_manager
|
||||
self.observability = observability
|
||||
|
||||
def build_graph(self) -> StateGraph:
|
||||
"""Build and compile the LangGraph StateGraph with nodes and edges."""
|
||||
graph = StateGraph(AppFactoryState)
|
||||
|
||||
graph.add_node("pm_node", self._pm_node)
|
||||
graph.add_node("task_node", self._task_node)
|
||||
graph.add_node("dev_dispatch_node", self._dev_dispatch_node)
|
||||
graph.add_node("qa_node", self._qa_node)
|
||||
graph.add_node("clarification_node", self._clarification_node)
|
||||
|
||||
graph.add_edge(START, "pm_node")
|
||||
graph.add_conditional_edges(
|
||||
"pm_node",
|
||||
self._should_continue_after_pm,
|
||||
{
|
||||
"task_node": "task_node",
|
||||
"end": END,
|
||||
},
|
||||
)
|
||||
graph.add_conditional_edges(
|
||||
"task_node",
|
||||
self._should_continue_after_tasks,
|
||||
{
|
||||
"dev_dispatch": "dev_dispatch_node",
|
||||
"end": END,
|
||||
"clarification": "clarification_node",
|
||||
},
|
||||
)
|
||||
graph.add_edge("dev_dispatch_node", "qa_node")
|
||||
graph.add_conditional_edges(
|
||||
"qa_node",
|
||||
self._should_continue_after_qa,
|
||||
{
|
||||
"task_node": "task_node",
|
||||
"clarification": "clarification_node",
|
||||
"end": END,
|
||||
},
|
||||
)
|
||||
graph.add_edge("clarification_node", "task_node")
|
||||
|
||||
return graph.compile()
|
||||
|
||||
def _should_continue_after_pm(self, state: dict) -> str:
|
||||
"""Routing function after pm_node: 'task_node' | 'end'."""
|
||||
prd = state.get("prd", "")
|
||||
if prd and prd.strip():
|
||||
return "task_node"
|
||||
|
||||
# PM failure (or empty prompt) yields no PRD and should terminate cleanly.
|
||||
return "end"
|
||||
|
||||
def _should_continue_after_tasks(self, state: dict) -> str:
|
||||
"""Routing function after task_node: 'dev_dispatch' | 'end' | 'clarification'."""
|
||||
if state.get("iteration_count", 0) >= state.get("max_iterations", 50):
|
||||
return "end"
|
||||
|
||||
tasks = state.get("tasks", [])
|
||||
completed = set(state.get("completed_tasks", []))
|
||||
all_task_ids = {str(t.get("id", "")) for t in tasks}
|
||||
|
||||
# Check if all tasks are done
|
||||
if all_task_ids and all_task_ids <= completed:
|
||||
return "end"
|
||||
|
||||
# Check for unblocked tasks (pending tasks with all deps done)
|
||||
unblocked = []
|
||||
for t in tasks:
|
||||
if str(t.get("id", "")) in completed:
|
||||
continue
|
||||
if t.get("status") == "done":
|
||||
continue
|
||||
deps = [str(d) for d in t.get("dependencies", [])]
|
||||
if all(d in completed for d in deps):
|
||||
unblocked.append(t)
|
||||
|
||||
if unblocked:
|
||||
return "dev_dispatch"
|
||||
|
||||
# No unblocked tasks - if there are blocked ones, try clarification
|
||||
if state.get("blocked_tasks") or state.get("clarification_requests"):
|
||||
return "clarification"
|
||||
|
||||
# No tasks at all or nothing left to do
|
||||
return "end"
|
||||
|
||||
def _should_continue_after_qa(self, state: dict) -> str:
|
||||
"""Routing function after qa_node: 'task_node' | 'clarification' | 'end'."""
|
||||
if state.get("iteration_count", 0) >= state.get("max_iterations", 50):
|
||||
return "end"
|
||||
|
||||
if state.get("clarification_requests"):
|
||||
return "clarification"
|
||||
|
||||
# Loop back to check for newly unblocked tasks
|
||||
return "task_node"
|
||||
|
||||
async def _pm_node(self, state: dict) -> dict:
|
||||
"""Call PM agent to expand user input into a PRD."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("start", "pm_node")
|
||||
|
||||
user_input = state.get("user_input", "")
|
||||
if not user_input:
|
||||
return {"prd": "", "errors": state.get("errors", []) + ["No user input provided"]}
|
||||
|
||||
if self.pm_agent is None:
|
||||
return {"prd": f"Mock PRD for: {user_input}"}
|
||||
|
||||
try:
|
||||
prd = await self.pm_agent.expand_prompt_to_prd(user_input)
|
||||
return {"prd": prd}
|
||||
except Exception as e:
|
||||
logger.error("PM agent failed: %s", e)
|
||||
return {"prd": "", "errors": state.get("errors", []) + [f"PM agent error: {e}"]}
|
||||
|
||||
async def _task_node(self, state: dict) -> dict:
|
||||
"""Parse PRD into tasks or get unblocked tasks. Increments iteration_count."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("pm_node/qa_node/clarification_node", "task_node")
|
||||
|
||||
iteration_count = state.get("iteration_count", 0) + 1
|
||||
updates = {"iteration_count": iteration_count}
|
||||
|
||||
if iteration_count >= state.get("max_iterations", 50):
|
||||
updates["errors"] = state.get("errors", []) + ["Max iterations reached"]
|
||||
return updates
|
||||
|
||||
if self.task_agent is None:
|
||||
return updates
|
||||
|
||||
try:
|
||||
existing_tasks = state.get("tasks", [])
|
||||
if not existing_tasks:
|
||||
# First pass - parse the PRD
|
||||
prd = state.get("prd", "")
|
||||
if prd:
|
||||
await self.task_agent.parse_prd(prd)
|
||||
unblocked = await self.task_agent.get_unblocked_tasks()
|
||||
updates["tasks"] = unblocked
|
||||
else:
|
||||
# Subsequent passes - refresh unblocked tasks
|
||||
unblocked = await self.task_agent.get_unblocked_tasks()
|
||||
updates["tasks"] = unblocked
|
||||
except Exception as e:
|
||||
logger.error("Task agent failed: %s", e)
|
||||
updates["errors"] = state.get("errors", []) + [f"Task agent error: {e}"]
|
||||
|
||||
return updates
|
||||
|
||||
async def _dev_dispatch_node(self, state: dict) -> dict:
|
||||
"""Dispatch dev agents concurrently for unblocked tasks."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("task_node", "dev_dispatch_node")
|
||||
|
||||
tasks = state.get("tasks", [])
|
||||
completed = set(state.get("completed_tasks", []))
|
||||
active_tasks = dict(state.get("active_tasks", {}))
|
||||
errors = list(state.get("errors", []))
|
||||
clarification_requests = list(state.get("clarification_requests", []))
|
||||
global_arch = state.get("global_architecture", "")
|
||||
|
||||
# Filter to unblocked, not-yet-completed tasks
|
||||
to_execute = []
|
||||
for t in tasks:
|
||||
tid = str(t.get("id", ""))
|
||||
if tid in completed or tid in active_tasks:
|
||||
continue
|
||||
deps = [str(d) for d in t.get("dependencies", [])]
|
||||
if all(d in completed for d in deps):
|
||||
to_execute.append(t)
|
||||
|
||||
if not to_execute:
|
||||
return {}
|
||||
|
||||
if self.dev_manager is None or self.workspace_manager is None:
|
||||
# Mock execution for testing
|
||||
new_completed = list(completed)
|
||||
for t in to_execute:
|
||||
tid = str(t.get("id", ""))
|
||||
active_tasks[tid] = {"status": "success", "container_id": "mock", "worktree_path": "/mock"}
|
||||
new_completed.append(tid)
|
||||
return {"active_tasks": active_tasks, "completed_tasks": new_completed}
|
||||
|
||||
async def _execute_single(task):
|
||||
tid = str(task.get("id", ""))
|
||||
worktree_path = None
|
||||
container = None
|
||||
try:
|
||||
worktree_path = await self.workspace_manager.create_worktree(tid)
|
||||
container = await self.workspace_manager.spin_up_clean_room(worktree_path, tid)
|
||||
container_id = container.id
|
||||
|
||||
if self.task_agent:
|
||||
await self.task_agent.update_task_status(tid, "in-progress")
|
||||
|
||||
result = await self.dev_manager.execute_with_retry(
|
||||
task, container_id, worktree_path, global_arch
|
||||
)
|
||||
return tid, result, worktree_path
|
||||
except Exception as e:
|
||||
logger.error("Dev dispatch failed for task %s: %s", tid, e)
|
||||
return tid, {"status": "failed", "output": str(e), "files_changed": [], "exit_code": -1}, worktree_path
|
||||
|
||||
# Execute concurrently
|
||||
results = await asyncio.gather(*[_execute_single(t) for t in to_execute], return_exceptions=True)
|
||||
|
||||
new_completed = list(completed)
|
||||
for item in results:
|
||||
if isinstance(item, Exception):
|
||||
errors.append(f"Dev dispatch exception: {item}")
|
||||
continue
|
||||
|
||||
tid, result, worktree_path = item
|
||||
status = result.get("status", "failed")
|
||||
active_tasks[tid] = {
|
||||
"status": status,
|
||||
"container_id": result.get("container_id", ""),
|
||||
"worktree_path": worktree_path or "",
|
||||
}
|
||||
|
||||
if status == "success":
|
||||
new_completed.append(tid)
|
||||
elif status == "needs_clarification":
|
||||
clarification_requests.append({
|
||||
"requesting_agent": "dev_agent",
|
||||
"task_id": tid,
|
||||
"question": f"Task {tid} failed after retries. Output: {result.get('output', '')[:500]}",
|
||||
"context": result.get("output", "")[:1000],
|
||||
})
|
||||
|
||||
return {
|
||||
"active_tasks": active_tasks,
|
||||
"completed_tasks": new_completed,
|
||||
"errors": errors,
|
||||
"clarification_requests": clarification_requests,
|
||||
}
|
||||
|
||||
async def _qa_node(self, state: dict) -> dict:
|
||||
"""Run QA on completed dev tasks."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("dev_dispatch_node", "qa_node")
|
||||
|
||||
active_tasks = dict(state.get("active_tasks", {}))
|
||||
completed = list(state.get("completed_tasks", []))
|
||||
errors = list(state.get("errors", []))
|
||||
clarification_requests = list(state.get("clarification_requests", []))
|
||||
blocked_tasks = dict(state.get("blocked_tasks", {}))
|
||||
|
||||
# Find tasks that were successfully completed by dev and need QA
|
||||
tasks_for_qa = []
|
||||
for tid, info in active_tasks.items():
|
||||
if info.get("status") == "success" and tid in completed:
|
||||
tasks_for_qa.append((tid, info))
|
||||
|
||||
if not tasks_for_qa or self.qa_agent is None:
|
||||
return {}
|
||||
|
||||
for tid, info in tasks_for_qa:
|
||||
worktree_path = info.get("worktree_path", "")
|
||||
if not worktree_path:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Find the task dict for context
|
||||
task_dict = None
|
||||
for t in state.get("tasks", []):
|
||||
if str(t.get("id", "")) == tid:
|
||||
task_dict = t
|
||||
break
|
||||
|
||||
qa_result = await self.qa_agent.review_and_merge(tid, worktree_path, task=task_dict)
|
||||
qa_status = qa_result.get("status", "")
|
||||
|
||||
if qa_status == "merged":
|
||||
# Successfully merged - update task status
|
||||
if self.task_agent:
|
||||
await self.task_agent.update_task_status(tid, "done")
|
||||
active_tasks[tid]["status"] = "merged"
|
||||
else:
|
||||
# QA failed - may need clarification or retry
|
||||
retry_count = qa_result.get("retry_count", 0)
|
||||
if retry_count >= (self.qa_agent.max_retries if self.qa_agent else 3):
|
||||
clarification_requests.append({
|
||||
"requesting_agent": "qa_agent",
|
||||
"task_id": tid,
|
||||
"question": f"QA failed for task {tid} with status '{qa_status}'",
|
||||
"context": str(qa_result),
|
||||
})
|
||||
else:
|
||||
blocked_tasks[tid] = f"QA {qa_status}: {qa_result}"
|
||||
# Remove from completed so it can be retried
|
||||
if tid in completed:
|
||||
completed.remove(tid)
|
||||
active_tasks[tid]["status"] = qa_status
|
||||
|
||||
# Cleanup workspace after QA
|
||||
if self.workspace_manager:
|
||||
try:
|
||||
await self.workspace_manager.cleanup_workspace(tid)
|
||||
except Exception as e:
|
||||
logger.warning("Workspace cleanup failed for task %s: %s", tid, e)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("QA failed for task %s: %s", tid, e)
|
||||
errors.append(f"QA error for task {tid}: {e}")
|
||||
|
||||
return {
|
||||
"active_tasks": active_tasks,
|
||||
"completed_tasks": completed,
|
||||
"errors": errors,
|
||||
"clarification_requests": clarification_requests,
|
||||
"blocked_tasks": blocked_tasks,
|
||||
}
|
||||
|
||||
async def _clarification_node(self, state: dict) -> dict:
|
||||
"""Handle clarification requests via PM agent."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("task_node/qa_node", "clarification_node")
|
||||
|
||||
requests = list(state.get("clarification_requests", []))
|
||||
blocked_tasks = dict(state.get("blocked_tasks", {}))
|
||||
errors = list(state.get("errors", []))
|
||||
|
||||
if not requests:
|
||||
return {"clarification_requests": []}
|
||||
|
||||
if self.pm_agent is None:
|
||||
# Clear requests without processing for testing
|
||||
return {"clarification_requests": [], "blocked_tasks": {}}
|
||||
|
||||
resolved = []
|
||||
remaining = []
|
||||
|
||||
for req in requests:
|
||||
try:
|
||||
answer = await self.pm_agent.handle_clarification_request(req)
|
||||
tid = req.get("task_id", "")
|
||||
if tid and tid in blocked_tasks:
|
||||
del blocked_tasks[tid]
|
||||
resolved.append({"request": req, "answer": answer})
|
||||
except Exception as e:
|
||||
logger.error("Clarification failed: %s", e)
|
||||
errors.append(f"Clarification error: {e}")
|
||||
remaining.append(req)
|
||||
|
||||
return {
|
||||
"clarification_requests": remaining,
|
||||
"blocked_tasks": blocked_tasks,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
async def run(self, user_input: str) -> dict:
|
||||
"""Build graph and execute with initial state."""
|
||||
compiled = self.build_graph()
|
||||
|
||||
initial_state = {
|
||||
"user_input": user_input,
|
||||
"prd": "",
|
||||
"tasks": [],
|
||||
"active_tasks": {},
|
||||
"completed_tasks": [],
|
||||
"blocked_tasks": {},
|
||||
"clarification_requests": [],
|
||||
"global_architecture": "",
|
||||
"iteration_count": 0,
|
||||
"max_iterations": 50,
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("init", "run")
|
||||
|
||||
result = await compiled.ainvoke(initial_state)
|
||||
|
||||
self.save_state(result)
|
||||
return result
|
||||
|
||||
def save_state(self, state: dict, path: str = "app_factory/data/state.json"):
|
||||
"""Persist state to disk."""
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
# Convert to JSON-serializable form
|
||||
serializable = {}
|
||||
for k, v in state.items():
|
||||
try:
|
||||
json.dumps(v)
|
||||
serializable[k] = v
|
||||
except (TypeError, ValueError):
|
||||
serializable[k] = str(v)
|
||||
|
||||
with open(path, "w") as f:
|
||||
json.dump(serializable, f, indent=2)
|
||||
|
||||
def load_state(self, path: str = "app_factory/data/state.json") -> dict:
|
||||
"""Load state from disk."""
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
83
app_factory/core/logging_utils.py
Normal file
83
app_factory/core/logging_utils.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Logging formatters and helpers for colorized terminal output."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, TextIO
|
||||
|
||||
|
||||
RESET = "\033[0m"
|
||||
DIM = "\033[2m"
|
||||
BOLD = "\033[1m"
|
||||
FG_BLUE = "\033[34m"
|
||||
FG_CYAN = "\033[36m"
|
||||
FG_GREEN = "\033[32m"
|
||||
FG_MAGENTA = "\033[35m"
|
||||
FG_YELLOW = "\033[33m"
|
||||
FG_RED = "\033[31m"
|
||||
|
||||
LEVEL_COLORS = {
|
||||
logging.DEBUG: f"{DIM}{FG_CYAN}",
|
||||
logging.INFO: FG_GREEN,
|
||||
logging.WARNING: FG_YELLOW,
|
||||
logging.ERROR: FG_RED,
|
||||
logging.CRITICAL: f"{BOLD}{FG_RED}",
|
||||
}
|
||||
|
||||
|
||||
def should_use_color(stream: Optional[TextIO] = None, use_color: Optional[bool] = None) -> bool:
|
||||
"""Return whether ANSI colors should be used for the given stream."""
|
||||
if use_color is not None:
|
||||
return use_color
|
||||
|
||||
if os.getenv("NO_COLOR") is not None:
|
||||
return False
|
||||
|
||||
force_color = os.getenv("FORCE_COLOR", "").strip().lower()
|
||||
if force_color and force_color not in {"0", "false", "no"}:
|
||||
return True
|
||||
|
||||
if os.getenv("TERM", "").lower() == "dumb":
|
||||
return False
|
||||
|
||||
target_stream = stream or sys.stderr
|
||||
is_tty = getattr(target_stream, "isatty", None)
|
||||
return bool(is_tty and is_tty())
|
||||
|
||||
|
||||
def colorize(text: str, style: str, enabled: bool) -> str:
|
||||
"""Apply ANSI style to text when enabled."""
|
||||
if not enabled or not style:
|
||||
return text
|
||||
return f"{style}{text}{RESET}"
|
||||
|
||||
|
||||
class LevelColorFormatter(logging.Formatter):
|
||||
"""Formatter that colors only the log level token."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
fmt: Optional[str] = None,
|
||||
datefmt: Optional[str] = None,
|
||||
style: str = "%",
|
||||
*,
|
||||
stream: Optional[TextIO] = None,
|
||||
use_color: Optional[bool] = None,
|
||||
):
|
||||
super().__init__(fmt=fmt, datefmt=datefmt, style=style)
|
||||
self._use_color = should_use_color(stream=stream, use_color=use_color)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
if not self._use_color:
|
||||
return super().format(record)
|
||||
|
||||
original_levelname = record.levelname
|
||||
record.levelname = colorize(
|
||||
original_levelname,
|
||||
LEVEL_COLORS.get(record.levelno, ""),
|
||||
enabled=True,
|
||||
)
|
||||
try:
|
||||
return super().format(record)
|
||||
finally:
|
||||
record.levelname = original_levelname
|
||||
572
app_factory/core/observability.py
Normal file
572
app_factory/core/observability.py
Normal file
@@ -0,0 +1,572 @@
|
||||
"""Observability Manager - LangSmith tracing, logging, and monitoring."""
|
||||
|
||||
import contextlib
|
||||
import functools
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from app_factory.core.logging_utils import (
|
||||
FG_BLUE,
|
||||
FG_CYAN,
|
||||
FG_MAGENTA,
|
||||
LEVEL_COLORS,
|
||||
colorize,
|
||||
should_use_color,
|
||||
)
|
||||
|
||||
|
||||
class _StructuredFormatter(logging.Formatter):
|
||||
"""Custom formatter: [ISO_TIMESTAMP] [AGENT] [TASK] [LEVEL] message"""
|
||||
|
||||
_EVENT_COLORS = {
|
||||
"State transition": FG_MAGENTA,
|
||||
"Token usage": FG_BLUE,
|
||||
"Claude event": FG_BLUE,
|
||||
"Trace started": FG_CYAN,
|
||||
"Trace ended": FG_CYAN,
|
||||
}
|
||||
|
||||
def __init__(self, use_color: Optional[bool] = None):
|
||||
super().__init__()
|
||||
self._use_color = should_use_color(use_color=use_color)
|
||||
|
||||
def _colorize_message(self, message: str) -> str:
|
||||
for prefix, style in self._EVENT_COLORS.items():
|
||||
if message.startswith(prefix):
|
||||
return colorize(message, style, self._use_color)
|
||||
return message
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
|
||||
agent = getattr(record, "agent_name", "SYSTEM")
|
||||
task = getattr(record, "task_id", "-")
|
||||
level = record.levelname
|
||||
message = record.getMessage()
|
||||
|
||||
if self._use_color:
|
||||
ts = colorize(ts, FG_BLUE, enabled=True)
|
||||
agent = colorize(agent, FG_CYAN, enabled=True)
|
||||
task = colorize(task, FG_MAGENTA, enabled=True)
|
||||
level = colorize(level, LEVEL_COLORS.get(record.levelno, ""), enabled=True)
|
||||
message = self._colorize_message(message)
|
||||
|
||||
return f"[{ts}] [{agent}] [{task}] [{level}] {message}"
|
||||
|
||||
|
||||
class _TraceContext:
|
||||
"""Async context manager for trace_context()."""
|
||||
|
||||
def __init__(self, manager: "ObservabilityManager", agent_name: str, task_id: str):
|
||||
self._manager = manager
|
||||
self._agent_name = agent_name
|
||||
self._task_id = task_id
|
||||
self._run_id: Optional[str] = None
|
||||
|
||||
async def __aenter__(self) -> str:
|
||||
self._run_id = self._manager.start_trace(self._agent_name, self._task_id)
|
||||
return self._run_id
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
|
||||
if exc_val is not None:
|
||||
self._manager.end_trace(
|
||||
self._run_id, error=f"{exc_type.__name__}: {exc_val}"
|
||||
)
|
||||
else:
|
||||
self._manager.end_trace(self._run_id)
|
||||
return False # do not suppress exceptions
|
||||
|
||||
|
||||
class ObservabilityManager:
|
||||
"""Wraps LangSmith client for tracing and structured logging."""
|
||||
|
||||
_CLAUDE_EVENT_FILTERS = {
|
||||
"quiet": {
|
||||
"request_start",
|
||||
"request_error",
|
||||
"request_complete",
|
||||
"tool_use",
|
||||
"tool_result",
|
||||
},
|
||||
"focused": {
|
||||
"request_start",
|
||||
"request_error",
|
||||
"request_complete",
|
||||
"tool_use",
|
||||
"tool_result",
|
||||
"thinking_block",
|
||||
"result_message",
|
||||
},
|
||||
"verbose": None, # no filtering
|
||||
"off": set(),
|
||||
}
|
||||
|
||||
def __init__(self, project_name: str = None, claude_event_mode: str | None = None):
|
||||
self.project_name = project_name or os.getenv("LANGSMITH_PROJECT", "app-factory")
|
||||
requested_mode = (
|
||||
claude_event_mode
|
||||
or os.getenv("APP_FACTORY_CLAUDE_EVENT_MODE", "quiet")
|
||||
)
|
||||
normalized_mode = requested_mode.strip().lower() if isinstance(requested_mode, str) else "focused"
|
||||
self._claude_event_mode = (
|
||||
normalized_mode if normalized_mode in self._CLAUDE_EVENT_FILTERS else "focused"
|
||||
)
|
||||
|
||||
# --- LangSmith client (optional) ---
|
||||
self._client = None
|
||||
try:
|
||||
from langsmith import Client # noqa: F811
|
||||
|
||||
self._client = Client()
|
||||
except Exception as exc:
|
||||
# LangSmith not configured or unreachable -- degrade gracefully
|
||||
logging.getLogger(__name__).warning(
|
||||
"LangSmith unavailable (%s). Tracing disabled.", exc
|
||||
)
|
||||
|
||||
# --- Structured logger ---
|
||||
self.logger = logging.getLogger(f"app_factory.{self.project_name}")
|
||||
if not self.logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(_StructuredFormatter())
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
self.logger.propagate = False
|
||||
self._tool_name_by_use_id: dict[str, str] = {}
|
||||
self._tool_summary_by_use_id: dict[str, str] = {}
|
||||
|
||||
# --- Internal metrics ---
|
||||
self._active_runs: dict[str, dict] = {}
|
||||
self._metrics = {
|
||||
"total_tokens": 0,
|
||||
"total_traces": 0,
|
||||
"total_errors": 0,
|
||||
"total_claude_events": 0,
|
||||
"total_tool_calls": 0,
|
||||
"per_agent": defaultdict(lambda: {
|
||||
"tokens": 0,
|
||||
"traces": 0,
|
||||
"errors": 0,
|
||||
"claude_events": 0,
|
||||
"tool_calls": 0,
|
||||
}),
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tracing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def start_trace(self, agent_name: str, task_id: str, inputs: dict = None) -> str:
|
||||
"""Start a new trace run, return run_id."""
|
||||
run_id = uuid.uuid4().hex
|
||||
self._metrics["total_traces"] += 1
|
||||
self._metrics["per_agent"][agent_name]["traces"] += 1
|
||||
|
||||
self._active_runs[run_id] = {
|
||||
"agent_name": agent_name,
|
||||
"task_id": task_id,
|
||||
"start_time": time.time(),
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
"Trace started: run_id=%s",
|
||||
run_id,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
try:
|
||||
if self._client is not None:
|
||||
self._client.create_run(
|
||||
name=f"{agent_name}:{task_id}",
|
||||
run_type="chain",
|
||||
inputs=inputs or {},
|
||||
id=run_id,
|
||||
project_name=self.project_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
self.logger.warning(
|
||||
"LangSmith create_run failed: %s",
|
||||
exc,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
return run_id
|
||||
|
||||
def end_trace(self, run_id: str, outputs: dict = None, error: str = None):
|
||||
"""End a trace run with outputs or error."""
|
||||
run_info = self._active_runs.pop(run_id, {})
|
||||
agent_name = run_info.get("agent_name", "unknown")
|
||||
task_id = run_info.get("task_id", "-")
|
||||
|
||||
if error:
|
||||
self._metrics["total_errors"] += 1
|
||||
self._metrics["per_agent"][agent_name]["errors"] += 1
|
||||
self.logger.error(
|
||||
"Trace error: run_id=%s error=%s",
|
||||
run_id,
|
||||
error,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
else:
|
||||
self.logger.info(
|
||||
"Trace ended: run_id=%s",
|
||||
run_id,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
try:
|
||||
if self._client is not None:
|
||||
update_kwargs: dict[str, Any] = {"end_time": datetime.now(timezone.utc)}
|
||||
if outputs:
|
||||
update_kwargs["outputs"] = outputs
|
||||
if error:
|
||||
update_kwargs["error"] = error
|
||||
self._client.update_run(run_id, **update_kwargs)
|
||||
except Exception as exc:
|
||||
self.logger.warning(
|
||||
"LangSmith update_run failed: %s",
|
||||
exc,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Decorator
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def trace_agent_execution(self, agent_name: str, task_id: str):
|
||||
"""Decorator for tracking agent calls with context."""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@functools.wraps(func)
|
||||
async def async_wrapper(*args, **kwargs):
|
||||
run_id = self.start_trace(agent_name, task_id, inputs={"args": str(args), "kwargs": str(kwargs)})
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
self.end_trace(run_id, outputs={"result": str(result)})
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
|
||||
raise
|
||||
|
||||
@functools.wraps(func)
|
||||
def sync_wrapper(*args, **kwargs):
|
||||
run_id = self.start_trace(agent_name, task_id, inputs={"args": str(args), "kwargs": str(kwargs)})
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
self.end_trace(run_id, outputs={"result": str(result)})
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
|
||||
raise
|
||||
|
||||
if inspect.iscoroutinefunction(func):
|
||||
return async_wrapper
|
||||
return sync_wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Async helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def trace_agent(self, agent_name: str, task_id: str, func: Callable):
|
||||
"""Async helper to run a function within a trace context."""
|
||||
run_id = self.start_trace(agent_name, task_id)
|
||||
try:
|
||||
result = await func()
|
||||
self.end_trace(run_id, outputs={"result": str(result)})
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
|
||||
raise
|
||||
|
||||
def trace_context(self, agent_name: str, task_id: str) -> _TraceContext:
|
||||
"""Return an async context manager for tracing.
|
||||
|
||||
Usage::
|
||||
|
||||
async with obs.trace_context("agent", "task_id") as run_id:
|
||||
...
|
||||
"""
|
||||
return _TraceContext(self, agent_name, task_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Logging helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def log_state_transition(self, from_state: str, to_state: str, metadata: dict = None):
|
||||
"""Log a state machine transition."""
|
||||
msg = f"State transition: {from_state} -> {to_state}"
|
||||
if metadata:
|
||||
msg += f" metadata={metadata}"
|
||||
self.logger.info(msg, extra={"agent_name": "STATE_MACHINE", "task_id": "-"})
|
||||
|
||||
def log_token_usage(
|
||||
self,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
input_tokens: int,
|
||||
output_tokens: int,
|
||||
model: str = None,
|
||||
):
|
||||
"""Log token usage for cost monitoring."""
|
||||
total = input_tokens + output_tokens
|
||||
self._metrics["total_tokens"] += total
|
||||
self._metrics["per_agent"][agent_name]["tokens"] += total
|
||||
|
||||
msg = f"Token usage: input={input_tokens} output={output_tokens} total={total}"
|
||||
if model:
|
||||
msg += f" model={model}"
|
||||
self.logger.info(msg, extra={"agent_name": agent_name, "task_id": task_id})
|
||||
|
||||
def log_error(self, agent_name: str, task_id: str, error: Exception, context: dict = None):
|
||||
"""Log an error with full stack trace."""
|
||||
self._metrics["total_errors"] += 1
|
||||
self._metrics["per_agent"][agent_name]["errors"] += 1
|
||||
|
||||
tb = traceback.format_exception(type(error), error, error.__traceback__)
|
||||
msg = f"Error: {error}\n{''.join(tb)}"
|
||||
if context:
|
||||
msg += f" context={context}"
|
||||
self.logger.error(msg, extra={"agent_name": agent_name, "task_id": task_id})
|
||||
|
||||
def log_claude_event(
|
||||
self,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
event_type: str,
|
||||
payload: dict | None = None,
|
||||
):
|
||||
"""Log a Claude SDK/CLI event in structured form."""
|
||||
self._metrics["total_claude_events"] += 1
|
||||
self._metrics["per_agent"][agent_name]["claude_events"] += 1
|
||||
|
||||
normalized_event = (event_type or "unknown").strip().lower()
|
||||
normalized_payload = dict(payload or {})
|
||||
if normalized_event == "tool_use":
|
||||
self._metrics["total_tool_calls"] += 1
|
||||
self._metrics["per_agent"][agent_name]["tool_calls"] += 1
|
||||
tool_use_id = normalized_payload.get("tool_use_id")
|
||||
tool_name = normalized_payload.get("tool_name")
|
||||
tool_input = normalized_payload.get("tool_input")
|
||||
if isinstance(tool_use_id, str) and isinstance(tool_name, str):
|
||||
self._tool_name_by_use_id[tool_use_id] = tool_name
|
||||
self._tool_summary_by_use_id[tool_use_id] = self._summarize_tool_input(
|
||||
str(tool_name),
|
||||
tool_input,
|
||||
)
|
||||
|
||||
if normalized_event == "tool_result":
|
||||
tool_use_id = normalized_payload.get("tool_use_id")
|
||||
if isinstance(tool_use_id, str):
|
||||
tool_name = self._tool_name_by_use_id.pop(tool_use_id, None)
|
||||
tool_summary = self._tool_summary_by_use_id.pop(tool_use_id, None)
|
||||
if "tool_name" not in normalized_payload and tool_name:
|
||||
normalized_payload["tool_name"] = tool_name
|
||||
if "tool_input_summary" not in normalized_payload and tool_summary:
|
||||
normalized_payload["tool_input_summary"] = tool_summary
|
||||
|
||||
if not self._should_log_claude_event(normalized_event):
|
||||
return
|
||||
|
||||
msg = self._format_claude_event_message(normalized_event, normalized_payload)
|
||||
if not msg:
|
||||
return
|
||||
|
||||
self.logger.debug(msg, extra={"agent_name": agent_name, "task_id": task_id})
|
||||
|
||||
def _should_log_claude_event(self, event_type: str) -> bool:
|
||||
allowed = self._CLAUDE_EVENT_FILTERS.get(self._claude_event_mode)
|
||||
if allowed is None:
|
||||
return True
|
||||
return event_type in allowed
|
||||
|
||||
def _format_claude_event_message(self, event_type: str, payload: dict[str, Any]) -> str:
|
||||
session_id = payload.get("session_id")
|
||||
session_suffix = f" session={session_id}" if session_id else ""
|
||||
|
||||
if event_type == "request_start":
|
||||
model = payload.get("model") or "default"
|
||||
prompt_chars = payload.get("prompt_chars", 0)
|
||||
return f"Claude request started: model={model} prompt_chars={prompt_chars}{session_suffix}"
|
||||
|
||||
if event_type == "request_complete":
|
||||
inp = payload.get("input_tokens", 0)
|
||||
out = payload.get("output_tokens", 0)
|
||||
subtype = payload.get("result_subtype") or "unknown"
|
||||
preview = self._shorten_text(payload.get("result_preview"), max_chars=140)
|
||||
preview_fragment = f' result="{preview}"' if preview else ""
|
||||
return (
|
||||
f"Claude request completed: subtype={subtype} "
|
||||
f"tokens={inp}->{out}{preview_fragment}{session_suffix}"
|
||||
)
|
||||
|
||||
if event_type == "request_error":
|
||||
err = self._shorten_text(payload.get("error"))
|
||||
retrying = payload.get("retrying")
|
||||
retry_fragment = " retrying=true" if retrying else ""
|
||||
return f"Claude request error: {err}{retry_fragment}{session_suffix}"
|
||||
|
||||
if event_type == "tool_use":
|
||||
tool_name = payload.get("tool_name", "unknown_tool")
|
||||
tool_input = payload.get("tool_input")
|
||||
input_summary = self._summarize_tool_input(str(tool_name), tool_input)
|
||||
return f"Claude tool call: {tool_name} {input_summary}{session_suffix}"
|
||||
|
||||
if event_type == "tool_result":
|
||||
tool_name = payload.get("tool_name", "tool")
|
||||
is_error = bool(payload.get("is_error", False))
|
||||
content = payload.get("content")
|
||||
input_summary = payload.get("tool_input_summary")
|
||||
input_fragment = f" {input_summary}" if input_summary else ""
|
||||
status = "error" if is_error else "ok"
|
||||
if self._is_noisy_tool_name(str(tool_name)) and not is_error:
|
||||
return ""
|
||||
if self._is_noisy_tool_name(str(tool_name)) and is_error:
|
||||
error_preview = self._shorten_text(content, max_chars=420)
|
||||
error_fragment = f" error={error_preview}" if error_preview else ""
|
||||
return (
|
||||
f"Claude tool result: {tool_name} status={status}"
|
||||
f"{input_fragment}{error_fragment}{session_suffix}"
|
||||
)
|
||||
content_preview = self._compact_json(content, max_chars=420)
|
||||
return (
|
||||
f"Claude tool result: {tool_name} status={status}"
|
||||
f"{input_fragment} content={content_preview}{session_suffix}"
|
||||
)
|
||||
|
||||
if event_type == "text_block":
|
||||
preview = self._shorten_text(payload.get("preview"))
|
||||
return f"Claude says: {preview}{session_suffix}"
|
||||
|
||||
if event_type == "thinking_block":
|
||||
chars = payload.get("chars", 0)
|
||||
return f"Claude thinking block: chars={chars}{session_suffix}"
|
||||
|
||||
if event_type == "result_message":
|
||||
subtype = payload.get("subtype", "unknown")
|
||||
turns = payload.get("num_turns", 0)
|
||||
duration_ms = payload.get("duration_ms")
|
||||
duration_fragment = f" duration_ms={duration_ms}" if duration_ms is not None else ""
|
||||
return f"Claude result message: subtype={subtype} turns={turns}{duration_fragment}{session_suffix}"
|
||||
|
||||
payload_json = self._compact_json(payload)
|
||||
return f"Claude event: type={event_type} payload={payload_json}{session_suffix}"
|
||||
|
||||
@staticmethod
|
||||
def _shorten_text(value: Any, max_chars: int = 220) -> str:
|
||||
text = str(value) if value is not None else ""
|
||||
text = text.strip().replace("\n", " ")
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
return f"{text[:max_chars]}..."
|
||||
|
||||
@staticmethod
|
||||
def _compact_json(value: Any, max_chars: int = 300) -> str:
|
||||
with contextlib.suppress(TypeError, ValueError):
|
||||
rendered = json.dumps(value, sort_keys=True, default=str)
|
||||
if len(rendered) <= max_chars:
|
||||
return rendered
|
||||
return f"{rendered[:max_chars]}..."
|
||||
return ObservabilityManager._shorten_text(value, max_chars=max_chars)
|
||||
|
||||
@staticmethod
|
||||
def _is_noisy_tool_name(tool_name: str) -> bool:
|
||||
return tool_name.lower() in {"read", "bash", "grep", "glob", "find", "ls"}
|
||||
|
||||
@classmethod
|
||||
def _summarize_tool_input(cls, tool_name: str, tool_input: Any) -> str:
|
||||
if not isinstance(tool_input, dict):
|
||||
return f"input={cls._compact_json(tool_input, max_chars=140)}"
|
||||
|
||||
normalized_name = tool_name.lower()
|
||||
if normalized_name == "read":
|
||||
path = tool_input.get("file_path") or tool_input.get("path")
|
||||
return f"path={cls._shorten_path(path, max_chars=120)}"
|
||||
|
||||
if normalized_name == "bash":
|
||||
cmd = tool_input.get("command")
|
||||
compact_cmd = cls._abbreviate_workspace_paths(cmd)
|
||||
return f"command={cls._shorten_text(compact_cmd, max_chars=160)}"
|
||||
|
||||
description = tool_input.get("description")
|
||||
if isinstance(description, str) and description.strip():
|
||||
return f"description={cls._shorten_text(description, max_chars=140)}"
|
||||
|
||||
summary_keys = ("file_path", "path", "pattern", "query", "command", "name")
|
||||
summary: dict[str, Any] = {}
|
||||
for key in summary_keys:
|
||||
if key in tool_input:
|
||||
value = tool_input[key]
|
||||
if key in {"file_path", "path"}:
|
||||
value = cls._shorten_path(value, max_chars=120)
|
||||
summary[key] = value
|
||||
if summary:
|
||||
return f"input={cls._compact_json(summary, max_chars=160)}"
|
||||
return f"input={cls._compact_json(tool_input, max_chars=160)}"
|
||||
|
||||
@classmethod
|
||||
def _shorten_path(cls, value: Any, max_chars: int = 120) -> str:
|
||||
text = str(value).strip() if value is not None else ""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
normalized = text
|
||||
with contextlib.suppress(Exception):
|
||||
cwd = os.path.abspath(os.getcwd())
|
||||
if os.path.isabs(text):
|
||||
abs_path = os.path.abspath(text)
|
||||
if abs_path == cwd:
|
||||
normalized = "."
|
||||
elif abs_path.startswith(f"{cwd}{os.sep}"):
|
||||
normalized = os.path.relpath(abs_path, cwd)
|
||||
else:
|
||||
normalized = text.replace(f"{cwd}{os.sep}", "")
|
||||
|
||||
return cls._shorten_text(normalized, max_chars=max_chars)
|
||||
|
||||
@staticmethod
|
||||
def _abbreviate_workspace_paths(value: Any) -> str:
|
||||
text = str(value).strip() if value is not None else ""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
compact = text
|
||||
with contextlib.suppress(Exception):
|
||||
cwd = os.path.abspath(os.getcwd())
|
||||
compact = compact.replace(f"{cwd}{os.sep}", "")
|
||||
compact = compact.replace(cwd, ".")
|
||||
|
||||
return compact
|
||||
|
||||
@classmethod
|
||||
def _estimate_chars(cls, value: Any) -> int:
|
||||
if value is None:
|
||||
return 0
|
||||
if isinstance(value, str):
|
||||
return len(value)
|
||||
with contextlib.suppress(TypeError, ValueError):
|
||||
return len(json.dumps(value, default=str))
|
||||
return len(str(value))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Metrics
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_metrics(self) -> dict:
|
||||
"""Return accumulated metrics (total tokens, traces, errors)."""
|
||||
return {
|
||||
"total_tokens": self._metrics["total_tokens"],
|
||||
"total_traces": self._metrics["total_traces"],
|
||||
"total_errors": self._metrics["total_errors"],
|
||||
"total_claude_events": self._metrics["total_claude_events"],
|
||||
"total_tool_calls": self._metrics["total_tool_calls"],
|
||||
"per_agent": dict(self._metrics["per_agent"]),
|
||||
}
|
||||
230
app_factory/core/workspace.py
Normal file
230
app_factory/core/workspace.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Workspace Manager - Handles git worktrees and Docker containers for isolated execution."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import docker
|
||||
import git
|
||||
|
||||
|
||||
class WorkspaceError(Exception):
|
||||
"""Base exception for workspace operations."""
|
||||
|
||||
|
||||
class GitWorktreeError(WorkspaceError):
|
||||
"""Exception for git worktree failures."""
|
||||
|
||||
|
||||
class DockerProvisionError(WorkspaceError):
|
||||
"""Exception for Docker provisioning failures."""
|
||||
|
||||
|
||||
class WorkspaceManager:
|
||||
"""Manages git worktrees and Docker containers for isolated Dev Agent execution."""
|
||||
|
||||
def __init__(self, repo_path: str, docker_image: str = "python:3.11-slim"):
|
||||
"""Initialize WorkspaceManager.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the git repository.
|
||||
docker_image: Docker image to use for clean room containers.
|
||||
"""
|
||||
try:
|
||||
self.repo = git.Repo(repo_path)
|
||||
except git.InvalidGitRepositoryError as e:
|
||||
raise GitWorktreeError(f"Invalid git repository: {repo_path}") from e
|
||||
except git.NoSuchPathError as e:
|
||||
raise GitWorktreeError(f"Repository path not found: {repo_path}") from e
|
||||
|
||||
self.repo_path = Path(repo_path).resolve()
|
||||
self.docker_image = docker_image
|
||||
self.active_workspaces: dict[str, dict] = {}
|
||||
|
||||
try:
|
||||
self.docker_client = docker.from_env()
|
||||
except docker.errors.DockerException as e:
|
||||
raise DockerProvisionError(
|
||||
"Failed to connect to Docker daemon. Is Docker running?"
|
||||
) from e
|
||||
|
||||
async def create_worktree(self, task_id: str, base_branch: str = "main") -> str:
|
||||
"""Create a git worktree for a task.
|
||||
|
||||
Args:
|
||||
task_id: Unique identifier for the task.
|
||||
base_branch: Branch to base the worktree on.
|
||||
|
||||
Returns:
|
||||
Absolute path to the created worktree.
|
||||
|
||||
Raises:
|
||||
GitWorktreeError: If worktree creation fails.
|
||||
"""
|
||||
branch_name = f"feature/task-{task_id}"
|
||||
worktree_path = str(self.repo_path.parent / "worktrees" / task_id)
|
||||
|
||||
# Validate base branch exists
|
||||
try:
|
||||
self.repo.git.rev_parse("--verify", base_branch)
|
||||
except git.GitCommandError as e:
|
||||
raise GitWorktreeError(
|
||||
f"Base branch '{base_branch}' does not exist"
|
||||
) from e
|
||||
|
||||
# Check if worktree path already exists
|
||||
if os.path.exists(worktree_path):
|
||||
raise GitWorktreeError(
|
||||
f"Worktree path already exists: {worktree_path}"
|
||||
)
|
||||
|
||||
# Check if branch already exists
|
||||
if branch_name in [ref.name for ref in self.repo.branches]:
|
||||
raise GitWorktreeError(
|
||||
f"Branch already exists: {branch_name}"
|
||||
)
|
||||
|
||||
try:
|
||||
os.makedirs(os.path.dirname(worktree_path), exist_ok=True)
|
||||
self.repo.git.worktree(
|
||||
"add", worktree_path, "-b", branch_name, base_branch
|
||||
)
|
||||
except git.GitCommandError as e:
|
||||
raise GitWorktreeError(
|
||||
f"Failed to create worktree for task {task_id}: {e}"
|
||||
) from e
|
||||
|
||||
return str(Path(worktree_path).resolve())
|
||||
|
||||
async def spin_up_clean_room(self, worktree_path: str, task_id: str):
|
||||
"""Create an isolated Docker container for a task.
|
||||
|
||||
Args:
|
||||
worktree_path: Path to the git worktree to mount.
|
||||
task_id: Unique identifier for the task.
|
||||
|
||||
Returns:
|
||||
Container object with metadata.
|
||||
|
||||
Raises:
|
||||
DockerProvisionError: If container creation fails.
|
||||
"""
|
||||
try:
|
||||
self.docker_client.images.pull(self.docker_image)
|
||||
except docker.errors.APIError as e:
|
||||
raise DockerProvisionError(
|
||||
f"Failed to pull image '{self.docker_image}': {e}"
|
||||
) from e
|
||||
|
||||
try:
|
||||
container = self.docker_client.containers.create(
|
||||
image=self.docker_image,
|
||||
name=f"appfactory-task-{task_id}",
|
||||
volumes={
|
||||
worktree_path: {"bind": "/workspace", "mode": "rw"}
|
||||
},
|
||||
working_dir="/workspace",
|
||||
network_mode="none",
|
||||
auto_remove=False,
|
||||
detach=True,
|
||||
command="sleep infinity",
|
||||
)
|
||||
except docker.errors.APIError as e:
|
||||
raise DockerProvisionError(
|
||||
f"Failed to create container for task {task_id}: {e}"
|
||||
) from e
|
||||
|
||||
self.active_workspaces[task_id] = {
|
||||
"task_id": task_id,
|
||||
"worktree_path": worktree_path,
|
||||
"container_id": container.id,
|
||||
"container": container,
|
||||
}
|
||||
|
||||
return container
|
||||
|
||||
async def cleanup_workspace(self, task_id: str, container=None):
|
||||
"""Clean up a workspace by removing its container and worktree.
|
||||
|
||||
Args:
|
||||
task_id: Unique identifier for the task.
|
||||
container: Optional container object. If None, uses the registered one.
|
||||
|
||||
Raises:
|
||||
WorkspaceError: If cleanup fails completely.
|
||||
"""
|
||||
workspace = self.active_workspaces.get(task_id, {})
|
||||
errors = []
|
||||
|
||||
# Resolve container
|
||||
if container is None:
|
||||
container = workspace.get("container")
|
||||
|
||||
# Stop and remove container
|
||||
if container is not None:
|
||||
try:
|
||||
container.stop(timeout=5)
|
||||
except Exception:
|
||||
pass # Container may already be stopped
|
||||
try:
|
||||
container.remove(force=True)
|
||||
except Exception as e:
|
||||
errors.append(f"Container removal failed: {e}")
|
||||
|
||||
# Remove worktree
|
||||
worktree_path = workspace.get("worktree_path")
|
||||
if worktree_path is None:
|
||||
worktree_path = str(self.repo_path.parent / "worktrees" / task_id)
|
||||
|
||||
try:
|
||||
self.repo.git.worktree("remove", worktree_path, "--force")
|
||||
except git.GitCommandError:
|
||||
# Worktree may already be removed; try cleaning up the directory
|
||||
if os.path.exists(worktree_path):
|
||||
try:
|
||||
shutil.rmtree(worktree_path)
|
||||
except OSError as e:
|
||||
errors.append(f"Worktree directory removal failed: {e}")
|
||||
|
||||
# Prune worktree references
|
||||
try:
|
||||
self.repo.git.worktree("prune")
|
||||
except git.GitCommandError:
|
||||
pass
|
||||
|
||||
# Remove from registry
|
||||
self.active_workspaces.pop(task_id, None)
|
||||
|
||||
if errors:
|
||||
raise WorkspaceError(
|
||||
f"Cleanup completed with errors for task {task_id}: {'; '.join(errors)}"
|
||||
)
|
||||
|
||||
def get_active_workspaces(self) -> list:
|
||||
"""Return list of active workspace info dicts.
|
||||
|
||||
Returns:
|
||||
List of dicts with task_id, worktree_path, and container_id.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"task_id": info["task_id"],
|
||||
"worktree_path": info["worktree_path"],
|
||||
"container_id": info["container_id"],
|
||||
}
|
||||
for info in self.active_workspaces.values()
|
||||
]
|
||||
|
||||
async def cleanup_all(self):
|
||||
"""Cleanup all active workspaces. Used for graceful shutdown."""
|
||||
task_ids = list(self.active_workspaces.keys())
|
||||
errors = []
|
||||
for task_id in task_ids:
|
||||
try:
|
||||
await self.cleanup_workspace(task_id)
|
||||
except WorkspaceError as e:
|
||||
errors.append(str(e))
|
||||
if errors:
|
||||
raise WorkspaceError(
|
||||
f"Cleanup all completed with errors: {'; '.join(errors)}"
|
||||
)
|
||||
1
app_factory/data/__init__.py
Normal file
1
app_factory/data/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Data models and schemas for App Factory."""
|
||||
15
app_factory/data/state.json
Normal file
15
app_factory/data/state.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"user_input": "please review the project in the app_factory directory and create an api middleware to sit between the app_factory and the ui. if the core app does not have the required features, they will be added according to the middleware spec. the middleware should support sending + receiving data (start/stop jobs, respond to pm questions, etc), tracking progress, errors, logs, etc. visualizing the graph, tracking multiple projects running at a time",
|
||||
"prd": "",
|
||||
"tasks": [],
|
||||
"active_tasks": {},
|
||||
"completed_tasks": [],
|
||||
"blocked_tasks": {},
|
||||
"clarification_requests": [],
|
||||
"global_architecture": "",
|
||||
"iteration_count": 0,
|
||||
"max_iterations": 50,
|
||||
"errors": [
|
||||
"PM agent error: Claude SDK query failed: Command failed with exit code -9 (exit code: -9)\nError output: Check stderr output for details\nHint: verify Claude auth is available (ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN, or a valid Claude Code OAuth session) and that the process can write ~/.claude and ~/.claude.json."
|
||||
]
|
||||
}
|
||||
1
app_factory/prompts/__init__.py
Normal file
1
app_factory/prompts/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Prompt templates and management for App Factory agents."""
|
||||
22
app_factory/prompts/dev_task_execution.txt
Normal file
22
app_factory/prompts/dev_task_execution.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
You are a Dev Agent working on a specific task in an automated software factory.
|
||||
|
||||
## YOUR TASK
|
||||
- Task ID: {task_id}
|
||||
- Title: {title}
|
||||
- Description: {description}
|
||||
|
||||
## DETAILED REQUIREMENTS
|
||||
{details}
|
||||
|
||||
## TEST STRATEGY
|
||||
{test_strategy}
|
||||
|
||||
## GLOBAL ARCHITECTURE (Read-Only Context)
|
||||
{global_architecture}
|
||||
|
||||
## STRICT INSTRUCTIONS
|
||||
1. Implement ONLY this task. Do not make changes unrelated to this task.
|
||||
2. Follow existing code patterns and conventions from the architecture summary.
|
||||
3. Create or update test files as specified in the test strategy.
|
||||
4. All tests must pass before you consider the task complete.
|
||||
5. Do not modify files outside the scope of this task.
|
||||
11
app_factory/prompts/pm_clarification.txt
Normal file
11
app_factory/prompts/pm_clarification.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
You are a Product Manager resolving a clarification request from a downstream agent.
|
||||
|
||||
Agent: {requesting_agent}
|
||||
Task ID: {task_id}
|
||||
Question: {question}
|
||||
Context: {context}
|
||||
|
||||
If you can answer this question based on the PRD and general best practices, provide a clear, specific answer.
|
||||
If the question requires human input (business decision, external dependency, or ambiguous requirement), respond with exactly: ESCALATE_TO_HUMAN
|
||||
|
||||
Provide only the answer, no preamble.
|
||||
12
app_factory/prompts/pm_prd_expansion.txt
Normal file
12
app_factory/prompts/pm_prd_expansion.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
You are an expert Product Manager. Analyze the user's project description and expand it into a comprehensive Product Requirements Document (PRD).
|
||||
|
||||
Your PRD must include these sections:
|
||||
1. **Objective** - Clear project goal and vision
|
||||
2. **Core Requirements** - Detailed functional requirements (numbered list)
|
||||
3. **Technical Architecture** - System design, components, data flow
|
||||
4. **Tech Stack** - Languages, frameworks, databases, infrastructure
|
||||
5. **Success Criteria** - Measurable outcomes for project completion
|
||||
6. **Non-Functional Requirements** - Performance, security, scalability constraints
|
||||
|
||||
Be specific and actionable. Include edge cases and error handling requirements.
|
||||
Fill in reasonable technical decisions where the user hasn't specified.
|
||||
20
app_factory/prompts/qa_review.txt
Normal file
20
app_factory/prompts/qa_review.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
You are a QA code reviewer in an automated software factory. Review the following code changes for quality and security.
|
||||
|
||||
## Task Context
|
||||
{task_context}
|
||||
|
||||
## Code Diff
|
||||
{diff}
|
||||
|
||||
## Review Checklist
|
||||
1. **Security**: Check for OWASP Top 10 vulnerabilities (SQL injection, XSS, command injection, path traversal)
|
||||
2. **Code Quality**: Proper error handling, no dead code, clear naming, appropriate abstractions
|
||||
3. **Task Adherence**: Changes match the task requirements, no scope creep
|
||||
4. **Testing**: Adequate test coverage for the changes
|
||||
5. **Potential Bugs**: Race conditions, edge cases, null/None handling
|
||||
|
||||
Respond in this format:
|
||||
APPROVED: true/false
|
||||
ISSUES:
|
||||
- [severity: critical/warning/info] description
|
||||
SUMMARY: One sentence summary of review
|
||||
Reference in New Issue
Block a user