first commit
This commit is contained in:
8
app_factory/core/__init__.py
Normal file
8
app_factory/core/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""Core modules for the App Factory orchestration framework."""
|
||||
|
||||
from app_factory.core.graph import AppFactoryOrchestrator, AppFactoryState
|
||||
from app_factory.core.workspace import WorkspaceManager
|
||||
from app_factory.core.observability import ObservabilityManager
|
||||
from app_factory.core.architecture_tracker import ArchitectureTracker
|
||||
|
||||
__all__ = ["AppFactoryOrchestrator", "AppFactoryState", "WorkspaceManager", "ObservabilityManager", "ArchitectureTracker"]
|
||||
300
app_factory/core/architecture_tracker.py
Normal file
300
app_factory/core/architecture_tracker.py
Normal file
@@ -0,0 +1,300 @@
|
||||
"""Architecture Tracker - Tracks global architecture to prevent context starvation and code duplication."""
|
||||
|
||||
import ast
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from app_factory.core.claude_client import ClaudeSDKClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArchitectureTracker:
|
||||
"""Tracks global architecture to prevent Dev Agent context starvation and code duplication."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data_dir: str = "app_factory/data",
|
||||
api_key: str = None,
|
||||
auth_token: str = None,
|
||||
debug: bool = False,
|
||||
observability=None,
|
||||
):
|
||||
"""Initialize ArchitectureTracker.
|
||||
|
||||
Args:
|
||||
data_dir: Directory for storing global_architecture.json.
|
||||
api_key: Optional API key for AI-powered summarization.
|
||||
"""
|
||||
self.data_dir = Path(data_dir)
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._arch_path = self.data_dir / "global_architecture.json"
|
||||
self.observability = observability
|
||||
|
||||
self._client = None
|
||||
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
||||
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
||||
if resolved_key or resolved_auth:
|
||||
try:
|
||||
self._client = ClaudeSDKClient(
|
||||
api_key=resolved_key,
|
||||
auth_token=resolved_auth,
|
||||
enable_debug=debug,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Claude SDK unavailable (%s). AI summarization disabled.", exc)
|
||||
|
||||
self._architecture = self.load_architecture()
|
||||
|
||||
def _default_architecture(self) -> dict:
|
||||
"""Return default architecture schema."""
|
||||
return {
|
||||
"modules": [],
|
||||
"utilities": [],
|
||||
"design_patterns": [],
|
||||
"naming_conventions": {
|
||||
"variables": "snake_case",
|
||||
"classes": "PascalCase",
|
||||
"functions": "snake_case",
|
||||
"constants": "UPPER_SNAKE_CASE",
|
||||
},
|
||||
"tech_stack": {
|
||||
"language": "Python",
|
||||
"framework": "LangGraph",
|
||||
},
|
||||
"version": 1,
|
||||
"last_updated": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
def load_architecture(self) -> dict:
|
||||
"""Load from global_architecture.json or return default."""
|
||||
if self._arch_path.exists():
|
||||
try:
|
||||
with open(self._arch_path, "r") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, OSError) as exc:
|
||||
logger.warning("Failed to load architecture file (%s). Using default.", exc)
|
||||
return self._default_architecture()
|
||||
|
||||
def save_architecture(self, data: dict):
|
||||
"""Save to global_architecture.json with timestamp update."""
|
||||
data["last_updated"] = datetime.now(timezone.utc).isoformat()
|
||||
with open(self._arch_path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
self._architecture = data
|
||||
|
||||
async def update_architecture(self, completed_task: dict, files_changed: list):
|
||||
"""Update architecture based on completed task and changed files.
|
||||
|
||||
Args:
|
||||
completed_task: Dict with task info (e.g. title, description).
|
||||
files_changed: List of file paths that were modified.
|
||||
"""
|
||||
new_modules = []
|
||||
new_utilities = []
|
||||
|
||||
for file_path in files_changed:
|
||||
if not os.path.exists(file_path) or not file_path.endswith(".py"):
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
source = f.read()
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if self._client:
|
||||
await self._ai_extract(source, file_path, new_modules, new_utilities)
|
||||
else:
|
||||
self._basic_extract(source, file_path, new_modules, new_utilities)
|
||||
|
||||
existing_module_names = {m["name"] for m in self._architecture["modules"]}
|
||||
for mod in new_modules:
|
||||
if mod["name"] not in existing_module_names:
|
||||
self._architecture["modules"].append(mod)
|
||||
existing_module_names.add(mod["name"])
|
||||
|
||||
existing_utility_names = {u["name"] for u in self._architecture["utilities"]}
|
||||
for util in new_utilities:
|
||||
if util["name"] not in existing_utility_names:
|
||||
self._architecture["utilities"].append(util)
|
||||
existing_utility_names.add(util["name"])
|
||||
|
||||
self.save_architecture(self._architecture)
|
||||
|
||||
async def _ai_extract(
|
||||
self, source: str, file_path: str, modules: list, utilities: list
|
||||
):
|
||||
"""Use Claude to extract architecture info from source code."""
|
||||
prompt = (
|
||||
"Analyze this Python source file and extract:\n"
|
||||
"1. Module-level classes (name, purpose)\n"
|
||||
"2. Utility functions (name, description)\n"
|
||||
"Respond ONLY with valid JSON: "
|
||||
'{"classes": [{"name": "...", "purpose": "..."}], '
|
||||
'"functions": [{"name": "...", "description": "..."}]}\n\n'
|
||||
f"File: {file_path}\n```python\n{source[:4000]}\n```"
|
||||
)
|
||||
try:
|
||||
response = await self._client.complete(
|
||||
prompt=prompt,
|
||||
model="claude-sonnet-4-6",
|
||||
max_turns=100,
|
||||
observability=self.observability,
|
||||
agent_name="architecture_tracker",
|
||||
task_id=f"ai_extract:{Path(file_path).name}",
|
||||
)
|
||||
if self.observability:
|
||||
self.observability.log_token_usage(
|
||||
"architecture_tracker",
|
||||
f"ai_extract:{Path(file_path).name}",
|
||||
input_tokens=response.input_tokens,
|
||||
output_tokens=response.output_tokens,
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
text = response.text
|
||||
# Extract JSON from response
|
||||
start = text.find("{")
|
||||
end = text.rfind("}") + 1
|
||||
if start >= 0 and end > start:
|
||||
data = json.loads(text[start:end])
|
||||
for cls in data.get("classes", []):
|
||||
modules.append({
|
||||
"name": cls["name"],
|
||||
"purpose": cls.get("purpose", ""),
|
||||
"file_path": file_path,
|
||||
})
|
||||
for func in data.get("functions", []):
|
||||
utilities.append({
|
||||
"name": func["name"],
|
||||
"description": func.get("description", ""),
|
||||
"file_path": file_path,
|
||||
})
|
||||
except Exception as exc:
|
||||
logger.warning("AI extraction failed (%s). Falling back to basic.", exc)
|
||||
self._basic_extract(source, file_path, modules, utilities)
|
||||
|
||||
def _basic_extract(
|
||||
self, source: str, file_path: str, modules: list, utilities: list
|
||||
):
|
||||
"""Extract architecture info using AST parsing."""
|
||||
try:
|
||||
tree = ast.parse(source)
|
||||
except SyntaxError:
|
||||
return
|
||||
|
||||
for node in ast.iter_child_nodes(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
modules.append({
|
||||
"name": node.name,
|
||||
"purpose": docstring.split("\n")[0] if docstring else "",
|
||||
"file_path": file_path,
|
||||
})
|
||||
elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
|
||||
if node.name.startswith("_"):
|
||||
continue
|
||||
docstring = ast.get_docstring(node) or ""
|
||||
utilities.append({
|
||||
"name": node.name,
|
||||
"description": docstring.split("\n")[0] if docstring else "",
|
||||
"file_path": file_path,
|
||||
})
|
||||
|
||||
def get_architecture_summary(self, max_tokens: int = 2000) -> str:
|
||||
"""Generate concise text summary from architecture data.
|
||||
|
||||
Args:
|
||||
max_tokens: Approximate max tokens for the summary (~4 chars per token).
|
||||
|
||||
Returns:
|
||||
Formatted string for injection into Dev Agent prompts.
|
||||
"""
|
||||
max_chars = max_tokens * 4
|
||||
parts = []
|
||||
|
||||
parts.append("## Project Architecture Summary")
|
||||
parts.append("")
|
||||
|
||||
# Tech stack
|
||||
tech = self._architecture.get("tech_stack", {})
|
||||
if tech:
|
||||
parts.append("### Tech Stack")
|
||||
for key, value in tech.items():
|
||||
parts.append(f"- {key}: {value}")
|
||||
parts.append("")
|
||||
|
||||
# Modules
|
||||
mods = self._architecture.get("modules", [])
|
||||
if mods:
|
||||
parts.append("### Modules")
|
||||
for m in mods:
|
||||
line = f"- **{m['name']}** ({m.get('file_path', '')}): {m.get('purpose', '')}"
|
||||
parts.append(line)
|
||||
parts.append("")
|
||||
|
||||
# Utilities
|
||||
utils = self._architecture.get("utilities", [])
|
||||
if utils:
|
||||
parts.append("### Shared Utilities")
|
||||
for u in utils:
|
||||
line = f"- **{u['name']}** ({u.get('file_path', '')}): {u.get('description', '')}"
|
||||
parts.append(line)
|
||||
parts.append("")
|
||||
|
||||
# Design patterns
|
||||
patterns = self._architecture.get("design_patterns", [])
|
||||
if patterns:
|
||||
parts.append("### Design Patterns")
|
||||
for p in patterns:
|
||||
parts.append(f"- {p.get('pattern', '')}: {p.get('usage', '')}")
|
||||
parts.append("")
|
||||
|
||||
# Naming conventions
|
||||
conventions = self._architecture.get("naming_conventions", {})
|
||||
if conventions:
|
||||
parts.append("### Naming Conventions")
|
||||
for key, value in conventions.items():
|
||||
parts.append(f"- {key}: {value}")
|
||||
parts.append("")
|
||||
|
||||
summary = "\n".join(parts)
|
||||
|
||||
if len(summary) > max_chars:
|
||||
summary = summary[:max_chars - 3] + "..."
|
||||
|
||||
return summary
|
||||
|
||||
def add_module(self, name: str, purpose: str, file_path: str):
|
||||
"""Manually add a module to the architecture.
|
||||
|
||||
Args:
|
||||
name: Module/class name.
|
||||
purpose: Brief description of what it does.
|
||||
file_path: Path to the source file.
|
||||
"""
|
||||
self._architecture["modules"].append({
|
||||
"name": name,
|
||||
"purpose": purpose,
|
||||
"file_path": file_path,
|
||||
})
|
||||
self.save_architecture(self._architecture)
|
||||
|
||||
def add_utility(self, name: str, description: str, file_path: str):
|
||||
"""Manually add a utility function to the architecture.
|
||||
|
||||
Args:
|
||||
name: Function name.
|
||||
description: Brief description of what it does.
|
||||
file_path: Path to the source file.
|
||||
"""
|
||||
self._architecture["utilities"].append({
|
||||
"name": name,
|
||||
"description": description,
|
||||
"file_path": file_path,
|
||||
})
|
||||
self.save_architecture(self._architecture)
|
||||
721
app_factory/core/claude_client.py
Normal file
721
app_factory/core/claude_client.py
Normal file
@@ -0,0 +1,721 @@
|
||||
"""Shared Claude Agent SDK client wrapper."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from importlib import import_module
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ClaudeCompletion:
|
||||
"""Normalized completion result from Claude Agent SDK."""
|
||||
|
||||
text: str
|
||||
input_tokens: int = 0
|
||||
output_tokens: int = 0
|
||||
raw_usage: dict[str, Any] | None = None
|
||||
|
||||
|
||||
def _load_sdk() -> tuple[Any, Any]:
|
||||
"""Load Claude Agent SDK symbols."""
|
||||
try:
|
||||
mod = import_module("claude_agent_sdk")
|
||||
return mod.query, mod.ClaudeAgentOptions
|
||||
except Exception as exc:
|
||||
raise ImportError(
|
||||
"Claude Agent SDK is not installed. Install 'claude-agent-sdk'."
|
||||
) from exc
|
||||
|
||||
|
||||
class ClaudeSDKClient:
|
||||
"""Small adapter over Claude Agent SDK query() streaming API."""
|
||||
|
||||
_RATE_LIMIT_RETRY_TIME_MARKS_SECONDS: tuple[float, ...] = (0.2, 1.0, 5.0)
|
||||
_SENSITIVE_KEY_TOKENS: tuple[str, ...] = (
|
||||
"api_key",
|
||||
"apikey",
|
||||
"auth",
|
||||
"token",
|
||||
"secret",
|
||||
"password",
|
||||
"authorization",
|
||||
"cookie",
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
auth_token: str | None = None,
|
||||
enable_debug: bool = False,
|
||||
):
|
||||
self._query: Any | None = None
|
||||
self._options_cls: Any | None = None
|
||||
self._env: dict[str, str] = {}
|
||||
self._enable_debug = enable_debug
|
||||
if api_key:
|
||||
self._env["ANTHROPIC_API_KEY"] = api_key
|
||||
if auth_token:
|
||||
self._env["ANTHROPIC_AUTH_TOKEN"] = auth_token
|
||||
|
||||
async def complete(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
model: str | None = None,
|
||||
system_prompt: str | None = None,
|
||||
max_turns: int = 100,
|
||||
cwd: str | None = None,
|
||||
env: dict[str, str] | None = None,
|
||||
observability: Any | None = None,
|
||||
agent_name: str = "claude_sdk",
|
||||
task_id: str = "-",
|
||||
) -> ClaudeCompletion:
|
||||
"""Run a single-turn completion and normalize text/token usage."""
|
||||
self._ensure_sdk_loaded()
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_start",
|
||||
{
|
||||
"model": model,
|
||||
"max_turns": max_turns,
|
||||
"cwd": cwd,
|
||||
"prompt_chars": len(prompt),
|
||||
"system_prompt_chars": len(system_prompt) if system_prompt else 0,
|
||||
},
|
||||
)
|
||||
options_kwargs: dict[str, Any] = {"max_turns": max_turns}
|
||||
if model:
|
||||
options_kwargs["model"] = model
|
||||
if system_prompt:
|
||||
options_kwargs["system_prompt"] = system_prompt
|
||||
if cwd:
|
||||
options_kwargs["cwd"] = cwd
|
||||
|
||||
effective_env = dict(self._env)
|
||||
if env:
|
||||
effective_env.update(env)
|
||||
effective_env = self._ensure_claude_home_writable(effective_env, cwd=cwd)
|
||||
if effective_env:
|
||||
options_kwargs["env"] = effective_env
|
||||
total_attempts = len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS) + 1
|
||||
for attempt in range(total_attempts):
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"attempt_start",
|
||||
{"attempt": attempt + 1, "total_attempts": total_attempts},
|
||||
)
|
||||
debug_stderr = None
|
||||
if self._enable_debug:
|
||||
debug_stderr = tempfile.TemporaryFile(mode="w+t", encoding="utf-8")
|
||||
attempt_options_kwargs = dict(options_kwargs)
|
||||
if debug_stderr is not None:
|
||||
attempt_options_kwargs["debug_stderr"] = debug_stderr
|
||||
attempt_options_kwargs["extra_args"] = {"debug-to-stderr": None}
|
||||
options = self._options_cls(**attempt_options_kwargs)
|
||||
assistant_parts: list[str] = []
|
||||
result_text: str | None = None
|
||||
usage: dict[str, Any] | None = None
|
||||
error_text: str | None = None
|
||||
result_subtype: str | None = None
|
||||
session_id: str | None = None
|
||||
stderr_detail = ""
|
||||
|
||||
try:
|
||||
async for msg in self._query(prompt=prompt, options=options):
|
||||
session_id = self._record_stream_message(
|
||||
msg=msg,
|
||||
observability=observability,
|
||||
agent_name=agent_name,
|
||||
task_id=task_id,
|
||||
current_session_id=session_id,
|
||||
)
|
||||
content = getattr(msg, "content", None)
|
||||
# Only assistant messages contain model output content.
|
||||
if content and hasattr(msg, "model"):
|
||||
for block in content:
|
||||
text = getattr(block, "text", None)
|
||||
if text:
|
||||
assistant_parts.append(text)
|
||||
|
||||
msg_result = getattr(msg, "result", None)
|
||||
if isinstance(msg_result, str) and msg_result.strip():
|
||||
result_text = msg_result
|
||||
|
||||
msg_subtype = getattr(msg, "subtype", None)
|
||||
if isinstance(msg_subtype, str):
|
||||
result_subtype = msg_subtype
|
||||
|
||||
msg_usage = getattr(msg, "usage", None)
|
||||
if isinstance(msg_usage, dict):
|
||||
usage = msg_usage
|
||||
|
||||
if getattr(msg, "is_error", False):
|
||||
error_text = msg_result if isinstance(msg_result, str) else "Claude SDK error"
|
||||
stderr_detail = self._combine_stderr_details(self._read_debug_stderr(debug_stderr))
|
||||
except Exception as exc:
|
||||
stderr_detail = self._combine_stderr_details(
|
||||
self._read_debug_stderr(debug_stderr),
|
||||
self._extract_exception_stderr(exc),
|
||||
)
|
||||
error_message = self._format_error(
|
||||
f"Claude SDK query failed: {exc}",
|
||||
stderr_detail,
|
||||
add_hint=True,
|
||||
)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": self._truncate_text(str(exc)),
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message) from exc
|
||||
finally:
|
||||
if debug_stderr is not None:
|
||||
debug_stderr.close()
|
||||
|
||||
if error_text:
|
||||
error_message = self._format_error(error_text, stderr_detail, add_hint=True)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": self._truncate_text(error_text),
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
if result_subtype and "error" in result_subtype.lower():
|
||||
error_message = self._format_error(
|
||||
f"Claude SDK execution ended with subtype '{result_subtype}'.",
|
||||
stderr_detail,
|
||||
add_hint=True,
|
||||
)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": f"result subtype={result_subtype}",
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
text = (result_text or "\n".join(assistant_parts)).strip()
|
||||
if not text:
|
||||
error_message = self._format_error(
|
||||
"Claude SDK returned empty response",
|
||||
stderr_detail,
|
||||
add_hint=True,
|
||||
)
|
||||
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_error",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"error": "empty response",
|
||||
"retrying": should_retry,
|
||||
"stderr": self._truncate_text(stderr_detail),
|
||||
},
|
||||
)
|
||||
if should_retry:
|
||||
continue
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
input_tokens, output_tokens = self._extract_token_counts(usage)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"request_complete",
|
||||
{
|
||||
"attempt": attempt + 1,
|
||||
"session_id": session_id,
|
||||
"result_subtype": result_subtype,
|
||||
"result_preview": self._truncate_text(text, max_chars=180),
|
||||
"input_tokens": input_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
"usage": self._sanitize_payload(usage),
|
||||
},
|
||||
)
|
||||
return ClaudeCompletion(
|
||||
text=text,
|
||||
input_tokens=input_tokens,
|
||||
output_tokens=output_tokens,
|
||||
raw_usage=usage,
|
||||
)
|
||||
|
||||
raise RuntimeError("Claude SDK retry loop exhausted unexpectedly")
|
||||
|
||||
def _ensure_sdk_loaded(self) -> None:
|
||||
if self._query is not None and self._options_cls is not None:
|
||||
return
|
||||
self._query, self._options_cls = _load_sdk()
|
||||
|
||||
def _extract_token_counts(self, usage: dict[str, Any] | None) -> tuple[int, int]:
|
||||
"""Best-effort token extraction across SDK usage payload variants."""
|
||||
if not isinstance(usage, dict):
|
||||
return 0, 0
|
||||
|
||||
input_tokens = self._to_int(
|
||||
usage.get("input_tokens") or usage.get("inputTokens")
|
||||
)
|
||||
output_tokens = self._to_int(
|
||||
usage.get("output_tokens") or usage.get("outputTokens")
|
||||
)
|
||||
|
||||
if input_tokens == 0:
|
||||
input_tokens = sum(
|
||||
self._to_int(v)
|
||||
for k, v in usage.items()
|
||||
if "input" in k.lower() and "output" not in k.lower()
|
||||
)
|
||||
|
||||
if output_tokens == 0:
|
||||
output_tokens = sum(
|
||||
self._to_int(v) for k, v in usage.items() if "output" in k.lower()
|
||||
)
|
||||
|
||||
return input_tokens, output_tokens
|
||||
|
||||
def _ensure_claude_home_writable(
|
||||
self,
|
||||
env: dict[str, str],
|
||||
*,
|
||||
cwd: str | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""Fallback to a project-local HOME when ~/.claude paths are not writable."""
|
||||
effective = dict(env)
|
||||
current_home = Path(effective.get("HOME") or str(Path.home())).expanduser()
|
||||
|
||||
if self._claude_home_is_writable(current_home):
|
||||
return effective
|
||||
|
||||
fallback_root = Path(cwd or os.getcwd()) / ".app_factory" / "claude_home"
|
||||
fallback_home = self._prepare_fallback_claude_home(
|
||||
source_home=current_home,
|
||||
fallback_home=fallback_root,
|
||||
)
|
||||
effective["HOME"] = str(fallback_home)
|
||||
logger.warning(
|
||||
"Claude home '%s' is not writable; using fallback HOME at '%s'.",
|
||||
current_home,
|
||||
fallback_home,
|
||||
)
|
||||
return effective
|
||||
|
||||
@staticmethod
|
||||
def _claude_home_is_writable(home: Path) -> bool:
|
||||
claude_dir = home / ".claude"
|
||||
required_dirs = [claude_dir, claude_dir / "todos", claude_dir / "debug"]
|
||||
config_file = home / ".claude.json"
|
||||
|
||||
try:
|
||||
for directory in required_dirs:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
probe = directory / ".app_factory_write_probe"
|
||||
probe.write_text("ok", encoding="utf-8")
|
||||
probe.unlink()
|
||||
|
||||
config_file.touch(exist_ok=True)
|
||||
with open(config_file, "a", encoding="utf-8"):
|
||||
pass
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _prepare_fallback_claude_home(source_home: Path, fallback_home: Path) -> Path:
|
||||
fallback_home.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
fallback_claude_dir = fallback_home / ".claude"
|
||||
(fallback_claude_dir / "todos").mkdir(parents=True, exist_ok=True)
|
||||
(fallback_claude_dir / "debug").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
source_claude_dir = source_home / ".claude"
|
||||
source_config = source_home / ".claude.json"
|
||||
target_config = fallback_home / ".claude.json"
|
||||
|
||||
if source_config.exists() and source_config.is_file() and os.access(source_config, os.R_OK):
|
||||
try:
|
||||
shutil.copy2(source_config, target_config)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
target_config.touch(exist_ok=True)
|
||||
|
||||
if source_claude_dir.exists() and source_claude_dir.is_dir() and os.access(
|
||||
source_claude_dir, os.R_OK
|
||||
):
|
||||
try:
|
||||
shutil.copytree(source_claude_dir, fallback_claude_dir, dirs_exist_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return fallback_home
|
||||
|
||||
@staticmethod
|
||||
def _to_int(value: Any) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def _read_debug_stderr(debug_stderr: Any) -> str:
|
||||
if debug_stderr is None:
|
||||
return ""
|
||||
try:
|
||||
debug_stderr.flush()
|
||||
debug_stderr.seek(0)
|
||||
value = debug_stderr.read()
|
||||
if isinstance(value, str):
|
||||
return value.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_exception_stderr(exc: Exception) -> str:
|
||||
stderr = getattr(exc, "stderr", None)
|
||||
return stderr.strip() if isinstance(stderr, str) else ""
|
||||
|
||||
@staticmethod
|
||||
def _combine_stderr_details(*details: str) -> str:
|
||||
merged: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for detail in details:
|
||||
value = detail.strip() if isinstance(detail, str) else ""
|
||||
if not value or value in seen:
|
||||
continue
|
||||
seen.add(value)
|
||||
merged.append(value)
|
||||
|
||||
if not merged:
|
||||
return ""
|
||||
|
||||
placeholder = "Check stderr output for details"
|
||||
non_placeholder = [detail for detail in merged if placeholder not in detail]
|
||||
preferred = non_placeholder if non_placeholder else merged
|
||||
return "\n\n".join(preferred)
|
||||
|
||||
async def _should_retry_rate_limit_error(self, error_message: str, attempt: int) -> bool:
|
||||
if attempt >= len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS):
|
||||
return False
|
||||
|
||||
text = error_message.lower()
|
||||
retryable_tokens = (
|
||||
"rate_limit_event",
|
||||
"rate limit",
|
||||
"rate-limited",
|
||||
"too many requests",
|
||||
"status code: 429",
|
||||
"status code 429",
|
||||
)
|
||||
if not any(token in text for token in retryable_tokens):
|
||||
return False
|
||||
|
||||
time_marks = self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS
|
||||
target_mark = time_marks[attempt]
|
||||
previous_mark = time_marks[attempt - 1] if attempt > 0 else 0.0
|
||||
delay = max(target_mark - previous_mark, 0.0)
|
||||
logger.warning(
|
||||
"Claude SDK rate limit/transient event detected (attempt %d/%d). "
|
||||
"Retrying in %.1fs (target %.1fs from first failure).",
|
||||
attempt + 1,
|
||||
len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS) + 1,
|
||||
delay,
|
||||
target_mark,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _format_error(message: str, stderr_detail: str, add_hint: bool = False) -> str:
|
||||
hint = ""
|
||||
if add_hint:
|
||||
hint = (
|
||||
"\nHint: verify Claude auth is available (ANTHROPIC_API_KEY or "
|
||||
"ANTHROPIC_AUTH_TOKEN, or a valid Claude Code OAuth session) and that the "
|
||||
"process can write ~/.claude and ~/.claude.json."
|
||||
)
|
||||
if stderr_detail:
|
||||
return f"{message}\nSDK stderr:\n{stderr_detail}{hint}"
|
||||
return f"{message}{hint}"
|
||||
|
||||
def _record_stream_message(
|
||||
self,
|
||||
*,
|
||||
msg: Any,
|
||||
observability: Any | None,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
current_session_id: str | None,
|
||||
) -> str | None:
|
||||
session_id = getattr(msg, "session_id", None) or current_session_id
|
||||
parent_tool_use_id = getattr(msg, "parent_tool_use_id", None)
|
||||
|
||||
stream_event = getattr(msg, "event", None)
|
||||
if stream_event is not None and hasattr(msg, "uuid"):
|
||||
stream_event_type = None
|
||||
if isinstance(stream_event, dict):
|
||||
stream_event_type = stream_event.get("type") or stream_event.get("event")
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"stream_event",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"stream_event_type": stream_event_type,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
},
|
||||
)
|
||||
return session_id
|
||||
|
||||
content = getattr(msg, "content", None)
|
||||
if content:
|
||||
is_assistant_message = hasattr(msg, "model")
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"assistant_message" if is_assistant_message else "user_message",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
"model": getattr(msg, "model", None),
|
||||
"content_block_count": len(content) if isinstance(content, list) else 1,
|
||||
},
|
||||
)
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
self._record_content_block(
|
||||
block=block,
|
||||
observability=observability,
|
||||
agent_name=agent_name,
|
||||
task_id=task_id,
|
||||
session_id=session_id,
|
||||
parent_tool_use_id=parent_tool_use_id,
|
||||
)
|
||||
|
||||
subtype = getattr(msg, "subtype", None)
|
||||
if isinstance(subtype, str):
|
||||
if hasattr(msg, "duration_ms"):
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"result_message",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"subtype": subtype,
|
||||
"is_error": bool(getattr(msg, "is_error", False)),
|
||||
"num_turns": getattr(msg, "num_turns", None),
|
||||
"duration_ms": getattr(msg, "duration_ms", None),
|
||||
"duration_api_ms": getattr(msg, "duration_api_ms", None),
|
||||
"total_cost_usd": getattr(msg, "total_cost_usd", None),
|
||||
"usage": self._sanitize_payload(getattr(msg, "usage", None)),
|
||||
},
|
||||
)
|
||||
elif hasattr(msg, "data"):
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"system_message",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"subtype": subtype,
|
||||
"data": self._sanitize_payload(getattr(msg, "data", None)),
|
||||
},
|
||||
)
|
||||
|
||||
return session_id
|
||||
|
||||
def _record_content_block(
|
||||
self,
|
||||
*,
|
||||
block: Any,
|
||||
observability: Any | None,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
session_id: str | None,
|
||||
parent_tool_use_id: str | None,
|
||||
) -> None:
|
||||
block_name = getattr(block, "name", None)
|
||||
block_input = getattr(block, "input", None)
|
||||
block_id = getattr(block, "id", None)
|
||||
if block_name is not None and block_input is not None and block_id is not None:
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"tool_use",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"tool_use_id": block_id,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
"tool_name": str(block_name),
|
||||
"tool_input": self._sanitize_payload(block_input),
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
tool_use_id = getattr(block, "tool_use_id", None)
|
||||
if tool_use_id is not None:
|
||||
content = getattr(block, "content", None)
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"tool_result",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"tool_use_id": tool_use_id,
|
||||
"parent_tool_use_id": parent_tool_use_id,
|
||||
"is_error": bool(getattr(block, "is_error", False)),
|
||||
"content": self._sanitize_payload(content),
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
text = getattr(block, "text", None)
|
||||
if isinstance(text, str) and text:
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"text_block",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"chars": len(text),
|
||||
"preview": self._truncate_text(text),
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
thinking = getattr(block, "thinking", None)
|
||||
if isinstance(thinking, str) and thinking:
|
||||
self._emit_observability_event(
|
||||
observability,
|
||||
agent_name,
|
||||
task_id,
|
||||
"thinking_block",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"chars": len(thinking),
|
||||
},
|
||||
)
|
||||
|
||||
def _emit_observability_event(
|
||||
self,
|
||||
observability: Any | None,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
event_type: str,
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
if observability is None:
|
||||
return
|
||||
log_method = getattr(observability, "log_claude_event", None)
|
||||
if not callable(log_method):
|
||||
return
|
||||
try:
|
||||
log_method(
|
||||
agent_name=agent_name,
|
||||
task_id=task_id,
|
||||
event_type=event_type,
|
||||
payload=self._sanitize_payload(payload),
|
||||
)
|
||||
except Exception:
|
||||
# Observability should never break execution.
|
||||
logger.debug("Failed to emit observability event", exc_info=True)
|
||||
|
||||
@classmethod
|
||||
def _is_sensitive_key(cls, key: Any) -> bool:
|
||||
if not isinstance(key, str):
|
||||
return False
|
||||
lowered = key.lower()
|
||||
return any(token in lowered for token in cls._SENSITIVE_KEY_TOKENS)
|
||||
|
||||
@classmethod
|
||||
def _sanitize_payload(cls, value: Any, *, _depth: int = 0) -> Any:
|
||||
if _depth >= 4:
|
||||
return "[truncated]"
|
||||
|
||||
if isinstance(value, dict):
|
||||
sanitized: dict[str, Any] = {}
|
||||
for idx, (k, v) in enumerate(value.items()):
|
||||
if idx >= 40:
|
||||
sanitized["__truncated_items__"] = len(value) - 40
|
||||
break
|
||||
key = str(k)
|
||||
if cls._is_sensitive_key(key):
|
||||
sanitized[key] = "[REDACTED]"
|
||||
else:
|
||||
sanitized[key] = cls._sanitize_payload(v, _depth=_depth + 1)
|
||||
return sanitized
|
||||
|
||||
if isinstance(value, (list, tuple)):
|
||||
items = [cls._sanitize_payload(v, _depth=_depth + 1) for v in value[:40]]
|
||||
if len(value) > 40:
|
||||
items.append(f"...({len(value) - 40} more)")
|
||||
return items
|
||||
|
||||
if isinstance(value, str):
|
||||
return cls._truncate_text(value)
|
||||
|
||||
if isinstance(value, (int, float, bool)) or value is None:
|
||||
return value
|
||||
|
||||
return cls._truncate_text(str(value))
|
||||
|
||||
@staticmethod
|
||||
def _truncate_text(value: str, max_chars: int = 400) -> str:
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
trimmed = value.strip()
|
||||
if len(trimmed) <= max_chars:
|
||||
return trimmed
|
||||
return f"{trimmed[:max_chars]}...({len(trimmed) - max_chars} more chars)"
|
||||
444
app_factory/core/graph.py
Normal file
444
app_factory/core/graph.py
Normal file
@@ -0,0 +1,444 @@
|
||||
"""Graph Orchestrator - LangGraph-based multi-agent workflow orchestration."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import TypedDict
|
||||
|
||||
from langgraph.graph import END, START, StateGraph
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AppFactoryState(TypedDict):
|
||||
"""Global state passed through the orchestration graph."""
|
||||
|
||||
user_input: str
|
||||
prd: str
|
||||
tasks: list # All tasks from task-master
|
||||
active_tasks: dict # task_id -> {status, container_id, worktree_path}
|
||||
completed_tasks: list # List of completed task_ids
|
||||
blocked_tasks: dict # task_id -> reason
|
||||
clarification_requests: list # Pending clarification dicts
|
||||
global_architecture: str # Architecture summary for dev agents
|
||||
iteration_count: int # Safety counter to prevent infinite loops
|
||||
max_iterations: int # Max loop iterations (default 50)
|
||||
errors: list # Error log
|
||||
|
||||
|
||||
class AppFactoryOrchestrator:
|
||||
"""Main LangGraph state machine for the App Factory."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pm_agent=None,
|
||||
task_agent=None,
|
||||
dev_manager=None,
|
||||
qa_agent=None,
|
||||
workspace_manager=None,
|
||||
observability=None,
|
||||
):
|
||||
self.pm_agent = pm_agent
|
||||
self.task_agent = task_agent
|
||||
self.dev_manager = dev_manager
|
||||
self.qa_agent = qa_agent
|
||||
self.workspace_manager = workspace_manager
|
||||
self.observability = observability
|
||||
|
||||
def build_graph(self) -> StateGraph:
|
||||
"""Build and compile the LangGraph StateGraph with nodes and edges."""
|
||||
graph = StateGraph(AppFactoryState)
|
||||
|
||||
graph.add_node("pm_node", self._pm_node)
|
||||
graph.add_node("task_node", self._task_node)
|
||||
graph.add_node("dev_dispatch_node", self._dev_dispatch_node)
|
||||
graph.add_node("qa_node", self._qa_node)
|
||||
graph.add_node("clarification_node", self._clarification_node)
|
||||
|
||||
graph.add_edge(START, "pm_node")
|
||||
graph.add_conditional_edges(
|
||||
"pm_node",
|
||||
self._should_continue_after_pm,
|
||||
{
|
||||
"task_node": "task_node",
|
||||
"end": END,
|
||||
},
|
||||
)
|
||||
graph.add_conditional_edges(
|
||||
"task_node",
|
||||
self._should_continue_after_tasks,
|
||||
{
|
||||
"dev_dispatch": "dev_dispatch_node",
|
||||
"end": END,
|
||||
"clarification": "clarification_node",
|
||||
},
|
||||
)
|
||||
graph.add_edge("dev_dispatch_node", "qa_node")
|
||||
graph.add_conditional_edges(
|
||||
"qa_node",
|
||||
self._should_continue_after_qa,
|
||||
{
|
||||
"task_node": "task_node",
|
||||
"clarification": "clarification_node",
|
||||
"end": END,
|
||||
},
|
||||
)
|
||||
graph.add_edge("clarification_node", "task_node")
|
||||
|
||||
return graph.compile()
|
||||
|
||||
def _should_continue_after_pm(self, state: dict) -> str:
|
||||
"""Routing function after pm_node: 'task_node' | 'end'."""
|
||||
prd = state.get("prd", "")
|
||||
if prd and prd.strip():
|
||||
return "task_node"
|
||||
|
||||
# PM failure (or empty prompt) yields no PRD and should terminate cleanly.
|
||||
return "end"
|
||||
|
||||
def _should_continue_after_tasks(self, state: dict) -> str:
|
||||
"""Routing function after task_node: 'dev_dispatch' | 'end' | 'clarification'."""
|
||||
if state.get("iteration_count", 0) >= state.get("max_iterations", 50):
|
||||
return "end"
|
||||
|
||||
tasks = state.get("tasks", [])
|
||||
completed = set(state.get("completed_tasks", []))
|
||||
all_task_ids = {str(t.get("id", "")) for t in tasks}
|
||||
|
||||
# Check if all tasks are done
|
||||
if all_task_ids and all_task_ids <= completed:
|
||||
return "end"
|
||||
|
||||
# Check for unblocked tasks (pending tasks with all deps done)
|
||||
unblocked = []
|
||||
for t in tasks:
|
||||
if str(t.get("id", "")) in completed:
|
||||
continue
|
||||
if t.get("status") == "done":
|
||||
continue
|
||||
deps = [str(d) for d in t.get("dependencies", [])]
|
||||
if all(d in completed for d in deps):
|
||||
unblocked.append(t)
|
||||
|
||||
if unblocked:
|
||||
return "dev_dispatch"
|
||||
|
||||
# No unblocked tasks - if there are blocked ones, try clarification
|
||||
if state.get("blocked_tasks") or state.get("clarification_requests"):
|
||||
return "clarification"
|
||||
|
||||
# No tasks at all or nothing left to do
|
||||
return "end"
|
||||
|
||||
def _should_continue_after_qa(self, state: dict) -> str:
|
||||
"""Routing function after qa_node: 'task_node' | 'clarification' | 'end'."""
|
||||
if state.get("iteration_count", 0) >= state.get("max_iterations", 50):
|
||||
return "end"
|
||||
|
||||
if state.get("clarification_requests"):
|
||||
return "clarification"
|
||||
|
||||
# Loop back to check for newly unblocked tasks
|
||||
return "task_node"
|
||||
|
||||
async def _pm_node(self, state: dict) -> dict:
|
||||
"""Call PM agent to expand user input into a PRD."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("start", "pm_node")
|
||||
|
||||
user_input = state.get("user_input", "")
|
||||
if not user_input:
|
||||
return {"prd": "", "errors": state.get("errors", []) + ["No user input provided"]}
|
||||
|
||||
if self.pm_agent is None:
|
||||
return {"prd": f"Mock PRD for: {user_input}"}
|
||||
|
||||
try:
|
||||
prd = await self.pm_agent.expand_prompt_to_prd(user_input)
|
||||
return {"prd": prd}
|
||||
except Exception as e:
|
||||
logger.error("PM agent failed: %s", e)
|
||||
return {"prd": "", "errors": state.get("errors", []) + [f"PM agent error: {e}"]}
|
||||
|
||||
async def _task_node(self, state: dict) -> dict:
|
||||
"""Parse PRD into tasks or get unblocked tasks. Increments iteration_count."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("pm_node/qa_node/clarification_node", "task_node")
|
||||
|
||||
iteration_count = state.get("iteration_count", 0) + 1
|
||||
updates = {"iteration_count": iteration_count}
|
||||
|
||||
if iteration_count >= state.get("max_iterations", 50):
|
||||
updates["errors"] = state.get("errors", []) + ["Max iterations reached"]
|
||||
return updates
|
||||
|
||||
if self.task_agent is None:
|
||||
return updates
|
||||
|
||||
try:
|
||||
existing_tasks = state.get("tasks", [])
|
||||
if not existing_tasks:
|
||||
# First pass - parse the PRD
|
||||
prd = state.get("prd", "")
|
||||
if prd:
|
||||
await self.task_agent.parse_prd(prd)
|
||||
unblocked = await self.task_agent.get_unblocked_tasks()
|
||||
updates["tasks"] = unblocked
|
||||
else:
|
||||
# Subsequent passes - refresh unblocked tasks
|
||||
unblocked = await self.task_agent.get_unblocked_tasks()
|
||||
updates["tasks"] = unblocked
|
||||
except Exception as e:
|
||||
logger.error("Task agent failed: %s", e)
|
||||
updates["errors"] = state.get("errors", []) + [f"Task agent error: {e}"]
|
||||
|
||||
return updates
|
||||
|
||||
async def _dev_dispatch_node(self, state: dict) -> dict:
|
||||
"""Dispatch dev agents concurrently for unblocked tasks."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("task_node", "dev_dispatch_node")
|
||||
|
||||
tasks = state.get("tasks", [])
|
||||
completed = set(state.get("completed_tasks", []))
|
||||
active_tasks = dict(state.get("active_tasks", {}))
|
||||
errors = list(state.get("errors", []))
|
||||
clarification_requests = list(state.get("clarification_requests", []))
|
||||
global_arch = state.get("global_architecture", "")
|
||||
|
||||
# Filter to unblocked, not-yet-completed tasks
|
||||
to_execute = []
|
||||
for t in tasks:
|
||||
tid = str(t.get("id", ""))
|
||||
if tid in completed or tid in active_tasks:
|
||||
continue
|
||||
deps = [str(d) for d in t.get("dependencies", [])]
|
||||
if all(d in completed for d in deps):
|
||||
to_execute.append(t)
|
||||
|
||||
if not to_execute:
|
||||
return {}
|
||||
|
||||
if self.dev_manager is None or self.workspace_manager is None:
|
||||
# Mock execution for testing
|
||||
new_completed = list(completed)
|
||||
for t in to_execute:
|
||||
tid = str(t.get("id", ""))
|
||||
active_tasks[tid] = {"status": "success", "container_id": "mock", "worktree_path": "/mock"}
|
||||
new_completed.append(tid)
|
||||
return {"active_tasks": active_tasks, "completed_tasks": new_completed}
|
||||
|
||||
async def _execute_single(task):
|
||||
tid = str(task.get("id", ""))
|
||||
worktree_path = None
|
||||
container = None
|
||||
try:
|
||||
worktree_path = await self.workspace_manager.create_worktree(tid)
|
||||
container = await self.workspace_manager.spin_up_clean_room(worktree_path, tid)
|
||||
container_id = container.id
|
||||
|
||||
if self.task_agent:
|
||||
await self.task_agent.update_task_status(tid, "in-progress")
|
||||
|
||||
result = await self.dev_manager.execute_with_retry(
|
||||
task, container_id, worktree_path, global_arch
|
||||
)
|
||||
return tid, result, worktree_path
|
||||
except Exception as e:
|
||||
logger.error("Dev dispatch failed for task %s: %s", tid, e)
|
||||
return tid, {"status": "failed", "output": str(e), "files_changed": [], "exit_code": -1}, worktree_path
|
||||
|
||||
# Execute concurrently
|
||||
results = await asyncio.gather(*[_execute_single(t) for t in to_execute], return_exceptions=True)
|
||||
|
||||
new_completed = list(completed)
|
||||
for item in results:
|
||||
if isinstance(item, Exception):
|
||||
errors.append(f"Dev dispatch exception: {item}")
|
||||
continue
|
||||
|
||||
tid, result, worktree_path = item
|
||||
status = result.get("status", "failed")
|
||||
active_tasks[tid] = {
|
||||
"status": status,
|
||||
"container_id": result.get("container_id", ""),
|
||||
"worktree_path": worktree_path or "",
|
||||
}
|
||||
|
||||
if status == "success":
|
||||
new_completed.append(tid)
|
||||
elif status == "needs_clarification":
|
||||
clarification_requests.append({
|
||||
"requesting_agent": "dev_agent",
|
||||
"task_id": tid,
|
||||
"question": f"Task {tid} failed after retries. Output: {result.get('output', '')[:500]}",
|
||||
"context": result.get("output", "")[:1000],
|
||||
})
|
||||
|
||||
return {
|
||||
"active_tasks": active_tasks,
|
||||
"completed_tasks": new_completed,
|
||||
"errors": errors,
|
||||
"clarification_requests": clarification_requests,
|
||||
}
|
||||
|
||||
async def _qa_node(self, state: dict) -> dict:
|
||||
"""Run QA on completed dev tasks."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("dev_dispatch_node", "qa_node")
|
||||
|
||||
active_tasks = dict(state.get("active_tasks", {}))
|
||||
completed = list(state.get("completed_tasks", []))
|
||||
errors = list(state.get("errors", []))
|
||||
clarification_requests = list(state.get("clarification_requests", []))
|
||||
blocked_tasks = dict(state.get("blocked_tasks", {}))
|
||||
|
||||
# Find tasks that were successfully completed by dev and need QA
|
||||
tasks_for_qa = []
|
||||
for tid, info in active_tasks.items():
|
||||
if info.get("status") == "success" and tid in completed:
|
||||
tasks_for_qa.append((tid, info))
|
||||
|
||||
if not tasks_for_qa or self.qa_agent is None:
|
||||
return {}
|
||||
|
||||
for tid, info in tasks_for_qa:
|
||||
worktree_path = info.get("worktree_path", "")
|
||||
if not worktree_path:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Find the task dict for context
|
||||
task_dict = None
|
||||
for t in state.get("tasks", []):
|
||||
if str(t.get("id", "")) == tid:
|
||||
task_dict = t
|
||||
break
|
||||
|
||||
qa_result = await self.qa_agent.review_and_merge(tid, worktree_path, task=task_dict)
|
||||
qa_status = qa_result.get("status", "")
|
||||
|
||||
if qa_status == "merged":
|
||||
# Successfully merged - update task status
|
||||
if self.task_agent:
|
||||
await self.task_agent.update_task_status(tid, "done")
|
||||
active_tasks[tid]["status"] = "merged"
|
||||
else:
|
||||
# QA failed - may need clarification or retry
|
||||
retry_count = qa_result.get("retry_count", 0)
|
||||
if retry_count >= (self.qa_agent.max_retries if self.qa_agent else 3):
|
||||
clarification_requests.append({
|
||||
"requesting_agent": "qa_agent",
|
||||
"task_id": tid,
|
||||
"question": f"QA failed for task {tid} with status '{qa_status}'",
|
||||
"context": str(qa_result),
|
||||
})
|
||||
else:
|
||||
blocked_tasks[tid] = f"QA {qa_status}: {qa_result}"
|
||||
# Remove from completed so it can be retried
|
||||
if tid in completed:
|
||||
completed.remove(tid)
|
||||
active_tasks[tid]["status"] = qa_status
|
||||
|
||||
# Cleanup workspace after QA
|
||||
if self.workspace_manager:
|
||||
try:
|
||||
await self.workspace_manager.cleanup_workspace(tid)
|
||||
except Exception as e:
|
||||
logger.warning("Workspace cleanup failed for task %s: %s", tid, e)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("QA failed for task %s: %s", tid, e)
|
||||
errors.append(f"QA error for task {tid}: {e}")
|
||||
|
||||
return {
|
||||
"active_tasks": active_tasks,
|
||||
"completed_tasks": completed,
|
||||
"errors": errors,
|
||||
"clarification_requests": clarification_requests,
|
||||
"blocked_tasks": blocked_tasks,
|
||||
}
|
||||
|
||||
async def _clarification_node(self, state: dict) -> dict:
|
||||
"""Handle clarification requests via PM agent."""
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("task_node/qa_node", "clarification_node")
|
||||
|
||||
requests = list(state.get("clarification_requests", []))
|
||||
blocked_tasks = dict(state.get("blocked_tasks", {}))
|
||||
errors = list(state.get("errors", []))
|
||||
|
||||
if not requests:
|
||||
return {"clarification_requests": []}
|
||||
|
||||
if self.pm_agent is None:
|
||||
# Clear requests without processing for testing
|
||||
return {"clarification_requests": [], "blocked_tasks": {}}
|
||||
|
||||
resolved = []
|
||||
remaining = []
|
||||
|
||||
for req in requests:
|
||||
try:
|
||||
answer = await self.pm_agent.handle_clarification_request(req)
|
||||
tid = req.get("task_id", "")
|
||||
if tid and tid in blocked_tasks:
|
||||
del blocked_tasks[tid]
|
||||
resolved.append({"request": req, "answer": answer})
|
||||
except Exception as e:
|
||||
logger.error("Clarification failed: %s", e)
|
||||
errors.append(f"Clarification error: {e}")
|
||||
remaining.append(req)
|
||||
|
||||
return {
|
||||
"clarification_requests": remaining,
|
||||
"blocked_tasks": blocked_tasks,
|
||||
"errors": errors,
|
||||
}
|
||||
|
||||
async def run(self, user_input: str) -> dict:
|
||||
"""Build graph and execute with initial state."""
|
||||
compiled = self.build_graph()
|
||||
|
||||
initial_state = {
|
||||
"user_input": user_input,
|
||||
"prd": "",
|
||||
"tasks": [],
|
||||
"active_tasks": {},
|
||||
"completed_tasks": [],
|
||||
"blocked_tasks": {},
|
||||
"clarification_requests": [],
|
||||
"global_architecture": "",
|
||||
"iteration_count": 0,
|
||||
"max_iterations": 50,
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
if self.observability:
|
||||
self.observability.log_state_transition("init", "run")
|
||||
|
||||
result = await compiled.ainvoke(initial_state)
|
||||
|
||||
self.save_state(result)
|
||||
return result
|
||||
|
||||
def save_state(self, state: dict, path: str = "app_factory/data/state.json"):
|
||||
"""Persist state to disk."""
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
# Convert to JSON-serializable form
|
||||
serializable = {}
|
||||
for k, v in state.items():
|
||||
try:
|
||||
json.dumps(v)
|
||||
serializable[k] = v
|
||||
except (TypeError, ValueError):
|
||||
serializable[k] = str(v)
|
||||
|
||||
with open(path, "w") as f:
|
||||
json.dump(serializable, f, indent=2)
|
||||
|
||||
def load_state(self, path: str = "app_factory/data/state.json") -> dict:
|
||||
"""Load state from disk."""
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
83
app_factory/core/logging_utils.py
Normal file
83
app_factory/core/logging_utils.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""Logging formatters and helpers for colorized terminal output."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, TextIO
|
||||
|
||||
|
||||
RESET = "\033[0m"
|
||||
DIM = "\033[2m"
|
||||
BOLD = "\033[1m"
|
||||
FG_BLUE = "\033[34m"
|
||||
FG_CYAN = "\033[36m"
|
||||
FG_GREEN = "\033[32m"
|
||||
FG_MAGENTA = "\033[35m"
|
||||
FG_YELLOW = "\033[33m"
|
||||
FG_RED = "\033[31m"
|
||||
|
||||
LEVEL_COLORS = {
|
||||
logging.DEBUG: f"{DIM}{FG_CYAN}",
|
||||
logging.INFO: FG_GREEN,
|
||||
logging.WARNING: FG_YELLOW,
|
||||
logging.ERROR: FG_RED,
|
||||
logging.CRITICAL: f"{BOLD}{FG_RED}",
|
||||
}
|
||||
|
||||
|
||||
def should_use_color(stream: Optional[TextIO] = None, use_color: Optional[bool] = None) -> bool:
|
||||
"""Return whether ANSI colors should be used for the given stream."""
|
||||
if use_color is not None:
|
||||
return use_color
|
||||
|
||||
if os.getenv("NO_COLOR") is not None:
|
||||
return False
|
||||
|
||||
force_color = os.getenv("FORCE_COLOR", "").strip().lower()
|
||||
if force_color and force_color not in {"0", "false", "no"}:
|
||||
return True
|
||||
|
||||
if os.getenv("TERM", "").lower() == "dumb":
|
||||
return False
|
||||
|
||||
target_stream = stream or sys.stderr
|
||||
is_tty = getattr(target_stream, "isatty", None)
|
||||
return bool(is_tty and is_tty())
|
||||
|
||||
|
||||
def colorize(text: str, style: str, enabled: bool) -> str:
|
||||
"""Apply ANSI style to text when enabled."""
|
||||
if not enabled or not style:
|
||||
return text
|
||||
return f"{style}{text}{RESET}"
|
||||
|
||||
|
||||
class LevelColorFormatter(logging.Formatter):
|
||||
"""Formatter that colors only the log level token."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
fmt: Optional[str] = None,
|
||||
datefmt: Optional[str] = None,
|
||||
style: str = "%",
|
||||
*,
|
||||
stream: Optional[TextIO] = None,
|
||||
use_color: Optional[bool] = None,
|
||||
):
|
||||
super().__init__(fmt=fmt, datefmt=datefmt, style=style)
|
||||
self._use_color = should_use_color(stream=stream, use_color=use_color)
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
if not self._use_color:
|
||||
return super().format(record)
|
||||
|
||||
original_levelname = record.levelname
|
||||
record.levelname = colorize(
|
||||
original_levelname,
|
||||
LEVEL_COLORS.get(record.levelno, ""),
|
||||
enabled=True,
|
||||
)
|
||||
try:
|
||||
return super().format(record)
|
||||
finally:
|
||||
record.levelname = original_levelname
|
||||
572
app_factory/core/observability.py
Normal file
572
app_factory/core/observability.py
Normal file
@@ -0,0 +1,572 @@
|
||||
"""Observability Manager - LangSmith tracing, logging, and monitoring."""
|
||||
|
||||
import contextlib
|
||||
import functools
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from app_factory.core.logging_utils import (
|
||||
FG_BLUE,
|
||||
FG_CYAN,
|
||||
FG_MAGENTA,
|
||||
LEVEL_COLORS,
|
||||
colorize,
|
||||
should_use_color,
|
||||
)
|
||||
|
||||
|
||||
class _StructuredFormatter(logging.Formatter):
|
||||
"""Custom formatter: [ISO_TIMESTAMP] [AGENT] [TASK] [LEVEL] message"""
|
||||
|
||||
_EVENT_COLORS = {
|
||||
"State transition": FG_MAGENTA,
|
||||
"Token usage": FG_BLUE,
|
||||
"Claude event": FG_BLUE,
|
||||
"Trace started": FG_CYAN,
|
||||
"Trace ended": FG_CYAN,
|
||||
}
|
||||
|
||||
def __init__(self, use_color: Optional[bool] = None):
|
||||
super().__init__()
|
||||
self._use_color = should_use_color(use_color=use_color)
|
||||
|
||||
def _colorize_message(self, message: str) -> str:
|
||||
for prefix, style in self._EVENT_COLORS.items():
|
||||
if message.startswith(prefix):
|
||||
return colorize(message, style, self._use_color)
|
||||
return message
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
|
||||
agent = getattr(record, "agent_name", "SYSTEM")
|
||||
task = getattr(record, "task_id", "-")
|
||||
level = record.levelname
|
||||
message = record.getMessage()
|
||||
|
||||
if self._use_color:
|
||||
ts = colorize(ts, FG_BLUE, enabled=True)
|
||||
agent = colorize(agent, FG_CYAN, enabled=True)
|
||||
task = colorize(task, FG_MAGENTA, enabled=True)
|
||||
level = colorize(level, LEVEL_COLORS.get(record.levelno, ""), enabled=True)
|
||||
message = self._colorize_message(message)
|
||||
|
||||
return f"[{ts}] [{agent}] [{task}] [{level}] {message}"
|
||||
|
||||
|
||||
class _TraceContext:
|
||||
"""Async context manager for trace_context()."""
|
||||
|
||||
def __init__(self, manager: "ObservabilityManager", agent_name: str, task_id: str):
|
||||
self._manager = manager
|
||||
self._agent_name = agent_name
|
||||
self._task_id = task_id
|
||||
self._run_id: Optional[str] = None
|
||||
|
||||
async def __aenter__(self) -> str:
|
||||
self._run_id = self._manager.start_trace(self._agent_name, self._task_id)
|
||||
return self._run_id
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
|
||||
if exc_val is not None:
|
||||
self._manager.end_trace(
|
||||
self._run_id, error=f"{exc_type.__name__}: {exc_val}"
|
||||
)
|
||||
else:
|
||||
self._manager.end_trace(self._run_id)
|
||||
return False # do not suppress exceptions
|
||||
|
||||
|
||||
class ObservabilityManager:
|
||||
"""Wraps LangSmith client for tracing and structured logging."""
|
||||
|
||||
_CLAUDE_EVENT_FILTERS = {
|
||||
"quiet": {
|
||||
"request_start",
|
||||
"request_error",
|
||||
"request_complete",
|
||||
"tool_use",
|
||||
"tool_result",
|
||||
},
|
||||
"focused": {
|
||||
"request_start",
|
||||
"request_error",
|
||||
"request_complete",
|
||||
"tool_use",
|
||||
"tool_result",
|
||||
"thinking_block",
|
||||
"result_message",
|
||||
},
|
||||
"verbose": None, # no filtering
|
||||
"off": set(),
|
||||
}
|
||||
|
||||
def __init__(self, project_name: str = None, claude_event_mode: str | None = None):
|
||||
self.project_name = project_name or os.getenv("LANGSMITH_PROJECT", "app-factory")
|
||||
requested_mode = (
|
||||
claude_event_mode
|
||||
or os.getenv("APP_FACTORY_CLAUDE_EVENT_MODE", "quiet")
|
||||
)
|
||||
normalized_mode = requested_mode.strip().lower() if isinstance(requested_mode, str) else "focused"
|
||||
self._claude_event_mode = (
|
||||
normalized_mode if normalized_mode in self._CLAUDE_EVENT_FILTERS else "focused"
|
||||
)
|
||||
|
||||
# --- LangSmith client (optional) ---
|
||||
self._client = None
|
||||
try:
|
||||
from langsmith import Client # noqa: F811
|
||||
|
||||
self._client = Client()
|
||||
except Exception as exc:
|
||||
# LangSmith not configured or unreachable -- degrade gracefully
|
||||
logging.getLogger(__name__).warning(
|
||||
"LangSmith unavailable (%s). Tracing disabled.", exc
|
||||
)
|
||||
|
||||
# --- Structured logger ---
|
||||
self.logger = logging.getLogger(f"app_factory.{self.project_name}")
|
||||
if not self.logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(_StructuredFormatter())
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
self.logger.propagate = False
|
||||
self._tool_name_by_use_id: dict[str, str] = {}
|
||||
self._tool_summary_by_use_id: dict[str, str] = {}
|
||||
|
||||
# --- Internal metrics ---
|
||||
self._active_runs: dict[str, dict] = {}
|
||||
self._metrics = {
|
||||
"total_tokens": 0,
|
||||
"total_traces": 0,
|
||||
"total_errors": 0,
|
||||
"total_claude_events": 0,
|
||||
"total_tool_calls": 0,
|
||||
"per_agent": defaultdict(lambda: {
|
||||
"tokens": 0,
|
||||
"traces": 0,
|
||||
"errors": 0,
|
||||
"claude_events": 0,
|
||||
"tool_calls": 0,
|
||||
}),
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tracing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def start_trace(self, agent_name: str, task_id: str, inputs: dict = None) -> str:
|
||||
"""Start a new trace run, return run_id."""
|
||||
run_id = uuid.uuid4().hex
|
||||
self._metrics["total_traces"] += 1
|
||||
self._metrics["per_agent"][agent_name]["traces"] += 1
|
||||
|
||||
self._active_runs[run_id] = {
|
||||
"agent_name": agent_name,
|
||||
"task_id": task_id,
|
||||
"start_time": time.time(),
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
"Trace started: run_id=%s",
|
||||
run_id,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
try:
|
||||
if self._client is not None:
|
||||
self._client.create_run(
|
||||
name=f"{agent_name}:{task_id}",
|
||||
run_type="chain",
|
||||
inputs=inputs or {},
|
||||
id=run_id,
|
||||
project_name=self.project_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
self.logger.warning(
|
||||
"LangSmith create_run failed: %s",
|
||||
exc,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
return run_id
|
||||
|
||||
def end_trace(self, run_id: str, outputs: dict = None, error: str = None):
|
||||
"""End a trace run with outputs or error."""
|
||||
run_info = self._active_runs.pop(run_id, {})
|
||||
agent_name = run_info.get("agent_name", "unknown")
|
||||
task_id = run_info.get("task_id", "-")
|
||||
|
||||
if error:
|
||||
self._metrics["total_errors"] += 1
|
||||
self._metrics["per_agent"][agent_name]["errors"] += 1
|
||||
self.logger.error(
|
||||
"Trace error: run_id=%s error=%s",
|
||||
run_id,
|
||||
error,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
else:
|
||||
self.logger.info(
|
||||
"Trace ended: run_id=%s",
|
||||
run_id,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
try:
|
||||
if self._client is not None:
|
||||
update_kwargs: dict[str, Any] = {"end_time": datetime.now(timezone.utc)}
|
||||
if outputs:
|
||||
update_kwargs["outputs"] = outputs
|
||||
if error:
|
||||
update_kwargs["error"] = error
|
||||
self._client.update_run(run_id, **update_kwargs)
|
||||
except Exception as exc:
|
||||
self.logger.warning(
|
||||
"LangSmith update_run failed: %s",
|
||||
exc,
|
||||
extra={"agent_name": agent_name, "task_id": task_id},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Decorator
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def trace_agent_execution(self, agent_name: str, task_id: str):
|
||||
"""Decorator for tracking agent calls with context."""
|
||||
|
||||
def decorator(func: Callable):
|
||||
@functools.wraps(func)
|
||||
async def async_wrapper(*args, **kwargs):
|
||||
run_id = self.start_trace(agent_name, task_id, inputs={"args": str(args), "kwargs": str(kwargs)})
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
self.end_trace(run_id, outputs={"result": str(result)})
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
|
||||
raise
|
||||
|
||||
@functools.wraps(func)
|
||||
def sync_wrapper(*args, **kwargs):
|
||||
run_id = self.start_trace(agent_name, task_id, inputs={"args": str(args), "kwargs": str(kwargs)})
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
self.end_trace(run_id, outputs={"result": str(result)})
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
|
||||
raise
|
||||
|
||||
if inspect.iscoroutinefunction(func):
|
||||
return async_wrapper
|
||||
return sync_wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Async helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def trace_agent(self, agent_name: str, task_id: str, func: Callable):
|
||||
"""Async helper to run a function within a trace context."""
|
||||
run_id = self.start_trace(agent_name, task_id)
|
||||
try:
|
||||
result = await func()
|
||||
self.end_trace(run_id, outputs={"result": str(result)})
|
||||
return result
|
||||
except Exception as exc:
|
||||
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
|
||||
raise
|
||||
|
||||
def trace_context(self, agent_name: str, task_id: str) -> _TraceContext:
|
||||
"""Return an async context manager for tracing.
|
||||
|
||||
Usage::
|
||||
|
||||
async with obs.trace_context("agent", "task_id") as run_id:
|
||||
...
|
||||
"""
|
||||
return _TraceContext(self, agent_name, task_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Logging helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def log_state_transition(self, from_state: str, to_state: str, metadata: dict = None):
|
||||
"""Log a state machine transition."""
|
||||
msg = f"State transition: {from_state} -> {to_state}"
|
||||
if metadata:
|
||||
msg += f" metadata={metadata}"
|
||||
self.logger.info(msg, extra={"agent_name": "STATE_MACHINE", "task_id": "-"})
|
||||
|
||||
def log_token_usage(
|
||||
self,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
input_tokens: int,
|
||||
output_tokens: int,
|
||||
model: str = None,
|
||||
):
|
||||
"""Log token usage for cost monitoring."""
|
||||
total = input_tokens + output_tokens
|
||||
self._metrics["total_tokens"] += total
|
||||
self._metrics["per_agent"][agent_name]["tokens"] += total
|
||||
|
||||
msg = f"Token usage: input={input_tokens} output={output_tokens} total={total}"
|
||||
if model:
|
||||
msg += f" model={model}"
|
||||
self.logger.info(msg, extra={"agent_name": agent_name, "task_id": task_id})
|
||||
|
||||
def log_error(self, agent_name: str, task_id: str, error: Exception, context: dict = None):
|
||||
"""Log an error with full stack trace."""
|
||||
self._metrics["total_errors"] += 1
|
||||
self._metrics["per_agent"][agent_name]["errors"] += 1
|
||||
|
||||
tb = traceback.format_exception(type(error), error, error.__traceback__)
|
||||
msg = f"Error: {error}\n{''.join(tb)}"
|
||||
if context:
|
||||
msg += f" context={context}"
|
||||
self.logger.error(msg, extra={"agent_name": agent_name, "task_id": task_id})
|
||||
|
||||
def log_claude_event(
|
||||
self,
|
||||
agent_name: str,
|
||||
task_id: str,
|
||||
event_type: str,
|
||||
payload: dict | None = None,
|
||||
):
|
||||
"""Log a Claude SDK/CLI event in structured form."""
|
||||
self._metrics["total_claude_events"] += 1
|
||||
self._metrics["per_agent"][agent_name]["claude_events"] += 1
|
||||
|
||||
normalized_event = (event_type or "unknown").strip().lower()
|
||||
normalized_payload = dict(payload or {})
|
||||
if normalized_event == "tool_use":
|
||||
self._metrics["total_tool_calls"] += 1
|
||||
self._metrics["per_agent"][agent_name]["tool_calls"] += 1
|
||||
tool_use_id = normalized_payload.get("tool_use_id")
|
||||
tool_name = normalized_payload.get("tool_name")
|
||||
tool_input = normalized_payload.get("tool_input")
|
||||
if isinstance(tool_use_id, str) and isinstance(tool_name, str):
|
||||
self._tool_name_by_use_id[tool_use_id] = tool_name
|
||||
self._tool_summary_by_use_id[tool_use_id] = self._summarize_tool_input(
|
||||
str(tool_name),
|
||||
tool_input,
|
||||
)
|
||||
|
||||
if normalized_event == "tool_result":
|
||||
tool_use_id = normalized_payload.get("tool_use_id")
|
||||
if isinstance(tool_use_id, str):
|
||||
tool_name = self._tool_name_by_use_id.pop(tool_use_id, None)
|
||||
tool_summary = self._tool_summary_by_use_id.pop(tool_use_id, None)
|
||||
if "tool_name" not in normalized_payload and tool_name:
|
||||
normalized_payload["tool_name"] = tool_name
|
||||
if "tool_input_summary" not in normalized_payload and tool_summary:
|
||||
normalized_payload["tool_input_summary"] = tool_summary
|
||||
|
||||
if not self._should_log_claude_event(normalized_event):
|
||||
return
|
||||
|
||||
msg = self._format_claude_event_message(normalized_event, normalized_payload)
|
||||
if not msg:
|
||||
return
|
||||
|
||||
self.logger.debug(msg, extra={"agent_name": agent_name, "task_id": task_id})
|
||||
|
||||
def _should_log_claude_event(self, event_type: str) -> bool:
|
||||
allowed = self._CLAUDE_EVENT_FILTERS.get(self._claude_event_mode)
|
||||
if allowed is None:
|
||||
return True
|
||||
return event_type in allowed
|
||||
|
||||
def _format_claude_event_message(self, event_type: str, payload: dict[str, Any]) -> str:
|
||||
session_id = payload.get("session_id")
|
||||
session_suffix = f" session={session_id}" if session_id else ""
|
||||
|
||||
if event_type == "request_start":
|
||||
model = payload.get("model") or "default"
|
||||
prompt_chars = payload.get("prompt_chars", 0)
|
||||
return f"Claude request started: model={model} prompt_chars={prompt_chars}{session_suffix}"
|
||||
|
||||
if event_type == "request_complete":
|
||||
inp = payload.get("input_tokens", 0)
|
||||
out = payload.get("output_tokens", 0)
|
||||
subtype = payload.get("result_subtype") or "unknown"
|
||||
preview = self._shorten_text(payload.get("result_preview"), max_chars=140)
|
||||
preview_fragment = f' result="{preview}"' if preview else ""
|
||||
return (
|
||||
f"Claude request completed: subtype={subtype} "
|
||||
f"tokens={inp}->{out}{preview_fragment}{session_suffix}"
|
||||
)
|
||||
|
||||
if event_type == "request_error":
|
||||
err = self._shorten_text(payload.get("error"))
|
||||
retrying = payload.get("retrying")
|
||||
retry_fragment = " retrying=true" if retrying else ""
|
||||
return f"Claude request error: {err}{retry_fragment}{session_suffix}"
|
||||
|
||||
if event_type == "tool_use":
|
||||
tool_name = payload.get("tool_name", "unknown_tool")
|
||||
tool_input = payload.get("tool_input")
|
||||
input_summary = self._summarize_tool_input(str(tool_name), tool_input)
|
||||
return f"Claude tool call: {tool_name} {input_summary}{session_suffix}"
|
||||
|
||||
if event_type == "tool_result":
|
||||
tool_name = payload.get("tool_name", "tool")
|
||||
is_error = bool(payload.get("is_error", False))
|
||||
content = payload.get("content")
|
||||
input_summary = payload.get("tool_input_summary")
|
||||
input_fragment = f" {input_summary}" if input_summary else ""
|
||||
status = "error" if is_error else "ok"
|
||||
if self._is_noisy_tool_name(str(tool_name)) and not is_error:
|
||||
return ""
|
||||
if self._is_noisy_tool_name(str(tool_name)) and is_error:
|
||||
error_preview = self._shorten_text(content, max_chars=420)
|
||||
error_fragment = f" error={error_preview}" if error_preview else ""
|
||||
return (
|
||||
f"Claude tool result: {tool_name} status={status}"
|
||||
f"{input_fragment}{error_fragment}{session_suffix}"
|
||||
)
|
||||
content_preview = self._compact_json(content, max_chars=420)
|
||||
return (
|
||||
f"Claude tool result: {tool_name} status={status}"
|
||||
f"{input_fragment} content={content_preview}{session_suffix}"
|
||||
)
|
||||
|
||||
if event_type == "text_block":
|
||||
preview = self._shorten_text(payload.get("preview"))
|
||||
return f"Claude says: {preview}{session_suffix}"
|
||||
|
||||
if event_type == "thinking_block":
|
||||
chars = payload.get("chars", 0)
|
||||
return f"Claude thinking block: chars={chars}{session_suffix}"
|
||||
|
||||
if event_type == "result_message":
|
||||
subtype = payload.get("subtype", "unknown")
|
||||
turns = payload.get("num_turns", 0)
|
||||
duration_ms = payload.get("duration_ms")
|
||||
duration_fragment = f" duration_ms={duration_ms}" if duration_ms is not None else ""
|
||||
return f"Claude result message: subtype={subtype} turns={turns}{duration_fragment}{session_suffix}"
|
||||
|
||||
payload_json = self._compact_json(payload)
|
||||
return f"Claude event: type={event_type} payload={payload_json}{session_suffix}"
|
||||
|
||||
@staticmethod
|
||||
def _shorten_text(value: Any, max_chars: int = 220) -> str:
|
||||
text = str(value) if value is not None else ""
|
||||
text = text.strip().replace("\n", " ")
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
return f"{text[:max_chars]}..."
|
||||
|
||||
@staticmethod
|
||||
def _compact_json(value: Any, max_chars: int = 300) -> str:
|
||||
with contextlib.suppress(TypeError, ValueError):
|
||||
rendered = json.dumps(value, sort_keys=True, default=str)
|
||||
if len(rendered) <= max_chars:
|
||||
return rendered
|
||||
return f"{rendered[:max_chars]}..."
|
||||
return ObservabilityManager._shorten_text(value, max_chars=max_chars)
|
||||
|
||||
@staticmethod
|
||||
def _is_noisy_tool_name(tool_name: str) -> bool:
|
||||
return tool_name.lower() in {"read", "bash", "grep", "glob", "find", "ls"}
|
||||
|
||||
@classmethod
|
||||
def _summarize_tool_input(cls, tool_name: str, tool_input: Any) -> str:
|
||||
if not isinstance(tool_input, dict):
|
||||
return f"input={cls._compact_json(tool_input, max_chars=140)}"
|
||||
|
||||
normalized_name = tool_name.lower()
|
||||
if normalized_name == "read":
|
||||
path = tool_input.get("file_path") or tool_input.get("path")
|
||||
return f"path={cls._shorten_path(path, max_chars=120)}"
|
||||
|
||||
if normalized_name == "bash":
|
||||
cmd = tool_input.get("command")
|
||||
compact_cmd = cls._abbreviate_workspace_paths(cmd)
|
||||
return f"command={cls._shorten_text(compact_cmd, max_chars=160)}"
|
||||
|
||||
description = tool_input.get("description")
|
||||
if isinstance(description, str) and description.strip():
|
||||
return f"description={cls._shorten_text(description, max_chars=140)}"
|
||||
|
||||
summary_keys = ("file_path", "path", "pattern", "query", "command", "name")
|
||||
summary: dict[str, Any] = {}
|
||||
for key in summary_keys:
|
||||
if key in tool_input:
|
||||
value = tool_input[key]
|
||||
if key in {"file_path", "path"}:
|
||||
value = cls._shorten_path(value, max_chars=120)
|
||||
summary[key] = value
|
||||
if summary:
|
||||
return f"input={cls._compact_json(summary, max_chars=160)}"
|
||||
return f"input={cls._compact_json(tool_input, max_chars=160)}"
|
||||
|
||||
@classmethod
|
||||
def _shorten_path(cls, value: Any, max_chars: int = 120) -> str:
|
||||
text = str(value).strip() if value is not None else ""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
normalized = text
|
||||
with contextlib.suppress(Exception):
|
||||
cwd = os.path.abspath(os.getcwd())
|
||||
if os.path.isabs(text):
|
||||
abs_path = os.path.abspath(text)
|
||||
if abs_path == cwd:
|
||||
normalized = "."
|
||||
elif abs_path.startswith(f"{cwd}{os.sep}"):
|
||||
normalized = os.path.relpath(abs_path, cwd)
|
||||
else:
|
||||
normalized = text.replace(f"{cwd}{os.sep}", "")
|
||||
|
||||
return cls._shorten_text(normalized, max_chars=max_chars)
|
||||
|
||||
@staticmethod
|
||||
def _abbreviate_workspace_paths(value: Any) -> str:
|
||||
text = str(value).strip() if value is not None else ""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
compact = text
|
||||
with contextlib.suppress(Exception):
|
||||
cwd = os.path.abspath(os.getcwd())
|
||||
compact = compact.replace(f"{cwd}{os.sep}", "")
|
||||
compact = compact.replace(cwd, ".")
|
||||
|
||||
return compact
|
||||
|
||||
@classmethod
|
||||
def _estimate_chars(cls, value: Any) -> int:
|
||||
if value is None:
|
||||
return 0
|
||||
if isinstance(value, str):
|
||||
return len(value)
|
||||
with contextlib.suppress(TypeError, ValueError):
|
||||
return len(json.dumps(value, default=str))
|
||||
return len(str(value))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Metrics
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_metrics(self) -> dict:
|
||||
"""Return accumulated metrics (total tokens, traces, errors)."""
|
||||
return {
|
||||
"total_tokens": self._metrics["total_tokens"],
|
||||
"total_traces": self._metrics["total_traces"],
|
||||
"total_errors": self._metrics["total_errors"],
|
||||
"total_claude_events": self._metrics["total_claude_events"],
|
||||
"total_tool_calls": self._metrics["total_tool_calls"],
|
||||
"per_agent": dict(self._metrics["per_agent"]),
|
||||
}
|
||||
230
app_factory/core/workspace.py
Normal file
230
app_factory/core/workspace.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Workspace Manager - Handles git worktrees and Docker containers for isolated execution."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import docker
|
||||
import git
|
||||
|
||||
|
||||
class WorkspaceError(Exception):
|
||||
"""Base exception for workspace operations."""
|
||||
|
||||
|
||||
class GitWorktreeError(WorkspaceError):
|
||||
"""Exception for git worktree failures."""
|
||||
|
||||
|
||||
class DockerProvisionError(WorkspaceError):
|
||||
"""Exception for Docker provisioning failures."""
|
||||
|
||||
|
||||
class WorkspaceManager:
|
||||
"""Manages git worktrees and Docker containers for isolated Dev Agent execution."""
|
||||
|
||||
def __init__(self, repo_path: str, docker_image: str = "python:3.11-slim"):
|
||||
"""Initialize WorkspaceManager.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the git repository.
|
||||
docker_image: Docker image to use for clean room containers.
|
||||
"""
|
||||
try:
|
||||
self.repo = git.Repo(repo_path)
|
||||
except git.InvalidGitRepositoryError as e:
|
||||
raise GitWorktreeError(f"Invalid git repository: {repo_path}") from e
|
||||
except git.NoSuchPathError as e:
|
||||
raise GitWorktreeError(f"Repository path not found: {repo_path}") from e
|
||||
|
||||
self.repo_path = Path(repo_path).resolve()
|
||||
self.docker_image = docker_image
|
||||
self.active_workspaces: dict[str, dict] = {}
|
||||
|
||||
try:
|
||||
self.docker_client = docker.from_env()
|
||||
except docker.errors.DockerException as e:
|
||||
raise DockerProvisionError(
|
||||
"Failed to connect to Docker daemon. Is Docker running?"
|
||||
) from e
|
||||
|
||||
async def create_worktree(self, task_id: str, base_branch: str = "main") -> str:
|
||||
"""Create a git worktree for a task.
|
||||
|
||||
Args:
|
||||
task_id: Unique identifier for the task.
|
||||
base_branch: Branch to base the worktree on.
|
||||
|
||||
Returns:
|
||||
Absolute path to the created worktree.
|
||||
|
||||
Raises:
|
||||
GitWorktreeError: If worktree creation fails.
|
||||
"""
|
||||
branch_name = f"feature/task-{task_id}"
|
||||
worktree_path = str(self.repo_path.parent / "worktrees" / task_id)
|
||||
|
||||
# Validate base branch exists
|
||||
try:
|
||||
self.repo.git.rev_parse("--verify", base_branch)
|
||||
except git.GitCommandError as e:
|
||||
raise GitWorktreeError(
|
||||
f"Base branch '{base_branch}' does not exist"
|
||||
) from e
|
||||
|
||||
# Check if worktree path already exists
|
||||
if os.path.exists(worktree_path):
|
||||
raise GitWorktreeError(
|
||||
f"Worktree path already exists: {worktree_path}"
|
||||
)
|
||||
|
||||
# Check if branch already exists
|
||||
if branch_name in [ref.name for ref in self.repo.branches]:
|
||||
raise GitWorktreeError(
|
||||
f"Branch already exists: {branch_name}"
|
||||
)
|
||||
|
||||
try:
|
||||
os.makedirs(os.path.dirname(worktree_path), exist_ok=True)
|
||||
self.repo.git.worktree(
|
||||
"add", worktree_path, "-b", branch_name, base_branch
|
||||
)
|
||||
except git.GitCommandError as e:
|
||||
raise GitWorktreeError(
|
||||
f"Failed to create worktree for task {task_id}: {e}"
|
||||
) from e
|
||||
|
||||
return str(Path(worktree_path).resolve())
|
||||
|
||||
async def spin_up_clean_room(self, worktree_path: str, task_id: str):
|
||||
"""Create an isolated Docker container for a task.
|
||||
|
||||
Args:
|
||||
worktree_path: Path to the git worktree to mount.
|
||||
task_id: Unique identifier for the task.
|
||||
|
||||
Returns:
|
||||
Container object with metadata.
|
||||
|
||||
Raises:
|
||||
DockerProvisionError: If container creation fails.
|
||||
"""
|
||||
try:
|
||||
self.docker_client.images.pull(self.docker_image)
|
||||
except docker.errors.APIError as e:
|
||||
raise DockerProvisionError(
|
||||
f"Failed to pull image '{self.docker_image}': {e}"
|
||||
) from e
|
||||
|
||||
try:
|
||||
container = self.docker_client.containers.create(
|
||||
image=self.docker_image,
|
||||
name=f"appfactory-task-{task_id}",
|
||||
volumes={
|
||||
worktree_path: {"bind": "/workspace", "mode": "rw"}
|
||||
},
|
||||
working_dir="/workspace",
|
||||
network_mode="none",
|
||||
auto_remove=False,
|
||||
detach=True,
|
||||
command="sleep infinity",
|
||||
)
|
||||
except docker.errors.APIError as e:
|
||||
raise DockerProvisionError(
|
||||
f"Failed to create container for task {task_id}: {e}"
|
||||
) from e
|
||||
|
||||
self.active_workspaces[task_id] = {
|
||||
"task_id": task_id,
|
||||
"worktree_path": worktree_path,
|
||||
"container_id": container.id,
|
||||
"container": container,
|
||||
}
|
||||
|
||||
return container
|
||||
|
||||
async def cleanup_workspace(self, task_id: str, container=None):
|
||||
"""Clean up a workspace by removing its container and worktree.
|
||||
|
||||
Args:
|
||||
task_id: Unique identifier for the task.
|
||||
container: Optional container object. If None, uses the registered one.
|
||||
|
||||
Raises:
|
||||
WorkspaceError: If cleanup fails completely.
|
||||
"""
|
||||
workspace = self.active_workspaces.get(task_id, {})
|
||||
errors = []
|
||||
|
||||
# Resolve container
|
||||
if container is None:
|
||||
container = workspace.get("container")
|
||||
|
||||
# Stop and remove container
|
||||
if container is not None:
|
||||
try:
|
||||
container.stop(timeout=5)
|
||||
except Exception:
|
||||
pass # Container may already be stopped
|
||||
try:
|
||||
container.remove(force=True)
|
||||
except Exception as e:
|
||||
errors.append(f"Container removal failed: {e}")
|
||||
|
||||
# Remove worktree
|
||||
worktree_path = workspace.get("worktree_path")
|
||||
if worktree_path is None:
|
||||
worktree_path = str(self.repo_path.parent / "worktrees" / task_id)
|
||||
|
||||
try:
|
||||
self.repo.git.worktree("remove", worktree_path, "--force")
|
||||
except git.GitCommandError:
|
||||
# Worktree may already be removed; try cleaning up the directory
|
||||
if os.path.exists(worktree_path):
|
||||
try:
|
||||
shutil.rmtree(worktree_path)
|
||||
except OSError as e:
|
||||
errors.append(f"Worktree directory removal failed: {e}")
|
||||
|
||||
# Prune worktree references
|
||||
try:
|
||||
self.repo.git.worktree("prune")
|
||||
except git.GitCommandError:
|
||||
pass
|
||||
|
||||
# Remove from registry
|
||||
self.active_workspaces.pop(task_id, None)
|
||||
|
||||
if errors:
|
||||
raise WorkspaceError(
|
||||
f"Cleanup completed with errors for task {task_id}: {'; '.join(errors)}"
|
||||
)
|
||||
|
||||
def get_active_workspaces(self) -> list:
|
||||
"""Return list of active workspace info dicts.
|
||||
|
||||
Returns:
|
||||
List of dicts with task_id, worktree_path, and container_id.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"task_id": info["task_id"],
|
||||
"worktree_path": info["worktree_path"],
|
||||
"container_id": info["container_id"],
|
||||
}
|
||||
for info in self.active_workspaces.values()
|
||||
]
|
||||
|
||||
async def cleanup_all(self):
|
||||
"""Cleanup all active workspaces. Used for graceful shutdown."""
|
||||
task_ids = list(self.active_workspaces.keys())
|
||||
errors = []
|
||||
for task_id in task_ids:
|
||||
try:
|
||||
await self.cleanup_workspace(task_id)
|
||||
except WorkspaceError as e:
|
||||
errors.append(str(e))
|
||||
if errors:
|
||||
raise WorkspaceError(
|
||||
f"Cleanup all completed with errors: {'; '.join(errors)}"
|
||||
)
|
||||
Reference in New Issue
Block a user